Skip to content

Commit

Permalink
feat!: Use HashStreamValidator for Data Validation (googleapis#1951)
Browse files Browse the repository at this point in the history
* feat!: Use `HashStreamValidator` for stream validation

- Removes `fast-crc32c` support

* fix: remove logs

* test: `crc32cGenerator` option

* test: improve `compression` test reliability

* chore: add temp debug logs

* chore: more debug logs

* fix: use static gzip content to remove test variance

* chore: remove debug logs

* chore: lint
  • Loading branch information
danielbankhead authored and ddelgrosso1 committed May 23, 2022
1 parent 7a13ccf commit 14f287c
Show file tree
Hide file tree
Showing 10 changed files with 284 additions and 173 deletions.
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
"extend": "^3.0.2",
"gaxios": "^5.0.0",
"google-auth-library": "^8.0.1",
"hash-stream-validation": "^0.2.2",
"mime": "^3.0.0",
"mime-types": "^2.0.8",
"p-limit": "^3.0.1",
Expand Down
10 changes: 5 additions & 5 deletions src/bucket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import {
Query,
} from './signer';
import {Readable} from 'stream';
import {CRC32CValidatorGenerator} from './crc32c';

interface SourceObject {
name: string;
Expand Down Expand Up @@ -623,6 +624,7 @@ class Bucket extends ServiceObject {

acl: Acl;
iam: Iam;
crc32cGenerator: CRC32CValidatorGenerator;

// eslint-disable-next-line @typescript-eslint/no-unused-vars
getFilesStream(query?: GetFilesOptions): Readable {
Expand Down Expand Up @@ -1035,6 +1037,9 @@ class Bucket extends ServiceObject {
pathPrefix: '/defaultObjectAcl',
});

this.crc32cGenerator =
options.crc32cGenerator || this.storage.crc32cGenerator;

this.iam = new Iam(this);

this.getFilesStream = paginator.streamify('getFiles');
Expand Down Expand Up @@ -3718,11 +3723,6 @@ class Bucket extends ServiceObject {
*
* Resumable uploads are enabled by default
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* See {@link https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload#uploads| Upload Options (Simple or Resumable)}
* See {@link https://cloud.google.com/storage/docs/json_api/v1/objects/insert| Objects: insert API Documentation}
*
Expand Down
4 changes: 4 additions & 0 deletions src/crc32c.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ interface CRC32CValidatorGenerator {
(): CRC32CValidator;
}

const CRC32C_DEFAULT_VALIDATOR_GENERATOR: CRC32CValidatorGenerator = () =>
new CRC32C();

const CRC32C_EXCEPTION_MESSAGES = {
INVALID_INIT_BASE64_RANGE: (l: number) =>
`base64-encoded data expected to equal 4 bytes, not ${l}`,
Expand Down Expand Up @@ -305,6 +308,7 @@ class CRC32C implements CRC32CValidator {

export {
CRC32C,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
CRC32C_EXCEPTION_MESSAGES,
CRC32C_EXTENSIONS,
CRC32C_EXTENSION_TABLE,
Expand Down
66 changes: 30 additions & 36 deletions src/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ import compressible = require('compressible');
import * as crypto from 'crypto';
import * as extend from 'extend';
import * as fs from 'fs';
// eslint-disable-next-line @typescript-eslint/no-var-requires
const hashStreamValidation = require('hash-stream-validation');
import * as mime from 'mime';
// eslint-disable-next-line @typescript-eslint/no-var-requires
const pumpify = require('pumpify');
Expand Down Expand Up @@ -68,6 +66,9 @@ import {
unicodeJSONStringify,
formatAsUTCISO,
} from './util';
import {CRC32CValidatorGenerator} from './crc32c';
import {HashStreamValidator} from './hash-stream-validator';

import retry = require('async-retry');

export type GetExpirationDateResponse = [Date];
Expand Down Expand Up @@ -288,11 +289,12 @@ export const STORAGE_POST_POLICY_BASE_URL = 'https://storage.googleapis.com';
const GS_URL_REGEXP = /^gs:\/\/([a-z0-9_.-]+)\/(.+)$/;

export interface FileOptions {
crc32cGenerator?: CRC32CValidatorGenerator;
encryptionKey?: string | Buffer;
generation?: number | string;
kmsKeyName?: string;
userProject?: string;
preconditionOpts?: PreconditionOptions;
userProject?: string;
}

export interface CopyOptions {
Expand Down Expand Up @@ -419,7 +421,7 @@ export enum FileExceptionMessages {
*/
class File extends ServiceObject<File> {
acl: Acl;

crc32cGenerator: CRC32CValidatorGenerator;
bucket: Bucket;
storage: Storage;
kmsKeyName?: string;
Expand Down Expand Up @@ -874,6 +876,9 @@ class File extends ServiceObject<File> {
pathPrefix: '/acl',
});

this.crc32cGenerator =
options.crc32cGenerator || this.bucket.crc32cGenerator;

this.instanceRetryValue = this.storage?.retryOptions?.autoRetry;
this.instancePreconditionOpts = options?.preconditionOpts;
}
Expand Down Expand Up @@ -1209,11 +1214,6 @@ class File extends ServiceObject<File> {
* code "CONTENT_DOWNLOAD_MISMATCH". If you receive this error, the best
* recourse is to try downloading the file again.
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* NOTE: Readable streams will emit the `end` event when the file is fully
* downloaded.
*
Expand Down Expand Up @@ -1277,8 +1277,7 @@ class File extends ServiceObject<File> {
typeof options.start === 'number' || typeof options.end === 'number';
const tailRequest = options.end! < 0;

// eslint-disable-next-line @typescript-eslint/no-explicit-any
let validateStream: any; // Created later, if necessary.
let validateStream: HashStreamValidator | undefined = undefined;

const throughStream = streamEvents(new PassThrough());

Expand All @@ -1287,12 +1286,10 @@ class File extends ServiceObject<File> {
let md5 = false;

if (typeof options.validation === 'string') {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(options as any).validation = (
options.validation as string
).toLowerCase();
crc32c = options.validation === 'crc32c';
md5 = options.validation === 'md5';
const value = options.validation.toLowerCase().trim();

crc32c = value === 'crc32c';
md5 = value === 'md5';
} else if (options.validation === false) {
crc32c = false;
}
Expand Down Expand Up @@ -1406,7 +1403,12 @@ class File extends ServiceObject<File> {
});
}

validateStream = hashStreamValidation({crc32c, md5});
validateStream = new HashStreamValidator({
crc32c,
md5,
crc32cGenerator: this.crc32cGenerator,
});

throughStreams.push(validateStream);
}

Expand Down Expand Up @@ -1474,15 +1476,14 @@ class File extends ServiceObject<File> {
// the best.
let failed = crc32c || md5;

if (crc32c && hashes.crc32c) {
// We must remove the first four bytes from the returned checksum.
// http://stackoverflow.com/questions/25096737/
// base64-encoding-of-crc32c-long-value
failed = !validateStream.test('crc32c', hashes.crc32c.substr(4));
}
if (validateStream) {
if (crc32c && hashes.crc32c) {
failed = !validateStream.test('crc32c', hashes.crc32c);
}

if (md5 && hashes.md5) {
failed = !validateStream.test('md5', hashes.md5);
if (md5 && hashes.md5) {
failed = !validateStream.test('md5', hashes.md5);
}
}

if (md5 && !hashes.md5) {
Expand Down Expand Up @@ -1730,11 +1731,6 @@ class File extends ServiceObject<File> {
* resumable feature is disabled.
* </p>
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* NOTE: Writable streams will emit the `finish` event when the file is fully
* uploaded.
*
Expand Down Expand Up @@ -1846,9 +1842,10 @@ class File extends ServiceObject<File> {

// Collect data as it comes in to store in a hash. This is compared to the
// checksum value on the returned metadata from the API.
const validateStream = hashStreamValidation({
const validateStream = new HashStreamValidator({
crc32c,
md5,
crc32cGenerator: this.crc32cGenerator,
});

const fileWriteStream = duplexify();
Expand Down Expand Up @@ -1896,10 +1893,7 @@ class File extends ServiceObject<File> {
let failed = crc32c || md5;

if (crc32c && metadata.crc32c) {
// We must remove the first four bytes from the returned checksum.
// http://stackoverflow.com/questions/25096737/
// base64-encoding-of-crc32c-long-value
failed = !validateStream.test('crc32c', metadata.crc32c.substr(4));
failed = !validateStream.test('crc32c', metadata.crc32c);
}

if (md5 && metadata.md5Hash) {
Expand Down
96 changes: 96 additions & 0 deletions src/hash-stream-validator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import {createHash, Hash} from 'crypto';
import {Transform} from 'stream';

import {
CRC32CValidatorGenerator,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
CRC32CValidator,
} from './crc32c';

interface HashStreamValidatorOptions {
crc32c: boolean;
md5: boolean;
crc32cGenerator: CRC32CValidatorGenerator;
}

class HashStreamValidator extends Transform {
readonly crc32cEnabled: boolean;
readonly md5Enabled: boolean;

#crc32cHash?: CRC32CValidator = undefined;
#md5Hash?: Hash = undefined;

#md5Digest = '';

constructor(options: Partial<HashStreamValidatorOptions> = {}) {
super();

this.crc32cEnabled = !!options.crc32c;
this.md5Enabled = !!options.md5;

if (this.crc32cEnabled) {
const crc32cGenerator =
options.crc32cGenerator || CRC32C_DEFAULT_VALIDATOR_GENERATOR;

this.#crc32cHash = crc32cGenerator();
}

if (this.md5Enabled) {
this.#md5Hash = createHash('md5');
}
}

_flush(callback: () => void) {
if (this.#md5Hash) {
this.#md5Digest = this.#md5Hash.digest('base64');
}

callback();
}

_transform(
chunk: Buffer,
encoding: BufferEncoding,
callback: (e?: Error) => void
) {
this.push(chunk, encoding);

try {
if (this.#crc32cHash) this.#crc32cHash.update(chunk);
if (this.#md5Hash) this.#md5Hash.update(chunk);
callback();
} catch (e) {
callback(e as Error);
}
}

test(hash: 'crc32c' | 'md5', sum: Buffer | string): boolean {
const check = Buffer.isBuffer(sum) ? sum.toString('base64') : sum;

if (hash === 'crc32c' && this.#crc32cHash) {
return this.#crc32cHash.validate(check);
}

if (hash === 'md5' && this.#md5Hash) {
return this.#md5Digest === check;
}

return false;
}
}

export {HashStreamValidator, HashStreamValidatorOptions};
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ export {
SetStorageClassResponse,
SignedPostPolicyV4Output,
} from './file';
export * from './hash-stream-validator';
export {
HmacKey,
HmacKeyMetadata,
Expand Down
14 changes: 12 additions & 2 deletions src/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ import {Channel} from './channel';
import {File} from './file';
import {normalize} from './util';
import {HmacKey, HmacKeyMetadata, HmacKeyOptions} from './hmacKey';
import {
CRC32CValidatorGenerator,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
} from './crc32c';

export interface GetServiceAccountOptions {
userProject?: string;
Expand Down Expand Up @@ -68,18 +72,20 @@ export interface PreconditionOptions {
}

export interface StorageOptions extends ServiceOptions {
retryOptions?: RetryOptions;
/**
* The API endpoint of the service used to make requests.
* Defaults to `storage.googleapis.com`.
*/
apiEndpoint?: string;
crc32cGenerator?: CRC32CValidatorGenerator;
retryOptions?: RetryOptions;
}

export interface BucketOptions {
crc32cGenerator?: CRC32CValidatorGenerator;
kmsKeyName?: string;
userProject?: string;
preconditionOpts?: PreconditionOptions;
userProject?: string;
}

export interface Cors {
Expand Down Expand Up @@ -457,6 +463,8 @@ export class Storage extends Service {
*/
acl: typeof Storage.acl;

crc32cGenerator: CRC32CValidatorGenerator;

getBucketsStream(): Readable {
// placeholder body, overwritten in constructor
return new Readable();
Expand Down Expand Up @@ -624,6 +632,8 @@ export class Storage extends Service {
* @see Storage.acl
*/
this.acl = Storage.acl;
this.crc32cGenerator =
options.crc32cGenerator || CRC32C_DEFAULT_VALIDATOR_GENERATOR;

this.retryOptions = config.retryOptions;

Expand Down
Loading

0 comments on commit 14f287c

Please sign in to comment.