Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Use HashStreamValidator for Data Validation #1951

Merged
merged 9 commits into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
"extend": "^3.0.2",
"gaxios": "^5.0.0",
"google-auth-library": "^8.0.1",
"hash-stream-validation": "^0.2.2",
"mime": "^3.0.0",
"mime-types": "^2.0.8",
"p-limit": "^3.0.1",
Expand Down
10 changes: 5 additions & 5 deletions src/bucket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import {
Query,
} from './signer';
import {Readable} from 'stream';
import {CRC32CValidatorGenerator} from './crc32c';

interface SourceObject {
name: string;
Expand Down Expand Up @@ -623,6 +624,7 @@ class Bucket extends ServiceObject {

acl: Acl;
iam: Iam;
crc32cGenerator: CRC32CValidatorGenerator;

// eslint-disable-next-line @typescript-eslint/no-unused-vars
getFilesStream(query?: GetFilesOptions): Readable {
Expand Down Expand Up @@ -1035,6 +1037,9 @@ class Bucket extends ServiceObject {
pathPrefix: '/defaultObjectAcl',
});

this.crc32cGenerator =
options.crc32cGenerator || this.storage.crc32cGenerator;

this.iam = new Iam(this);

this.getFilesStream = paginator.streamify('getFiles');
Expand Down Expand Up @@ -3718,11 +3723,6 @@ class Bucket extends ServiceObject {
*
* Resumable uploads are enabled by default
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* See {@link https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload#uploads| Upload Options (Simple or Resumable)}
* See {@link https://cloud.google.com/storage/docs/json_api/v1/objects/insert| Objects: insert API Documentation}
*
Expand Down
4 changes: 4 additions & 0 deletions src/crc32c.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ interface CRC32CValidatorGenerator {
(): CRC32CValidator;
}

const CRC32C_DEFAULT_VALIDATOR_GENERATOR: CRC32CValidatorGenerator = () =>
new CRC32C();

const CRC32C_EXCEPTION_MESSAGES = {
INVALID_INIT_BASE64_RANGE: (l: number) =>
`base64-encoded data expected to equal 4 bytes, not ${l}`,
Expand Down Expand Up @@ -305,6 +308,7 @@ class CRC32C implements CRC32CValidator {

export {
CRC32C,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
CRC32C_EXCEPTION_MESSAGES,
CRC32C_EXTENSIONS,
CRC32C_EXTENSION_TABLE,
Expand Down
66 changes: 30 additions & 36 deletions src/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ import compressible = require('compressible');
import * as crypto from 'crypto';
import * as extend from 'extend';
import * as fs from 'fs';
// eslint-disable-next-line @typescript-eslint/no-var-requires
const hashStreamValidation = require('hash-stream-validation');
import * as mime from 'mime';
// eslint-disable-next-line @typescript-eslint/no-var-requires
const pumpify = require('pumpify');
Expand Down Expand Up @@ -68,6 +66,9 @@ import {
unicodeJSONStringify,
formatAsUTCISO,
} from './util';
import {CRC32CValidatorGenerator} from './crc32c';
import {HashStreamValidator} from './hash-stream-validator';

import retry = require('async-retry');

export type GetExpirationDateResponse = [Date];
Expand Down Expand Up @@ -288,11 +289,12 @@ export const STORAGE_POST_POLICY_BASE_URL = 'https://storage.googleapis.com';
const GS_URL_REGEXP = /^gs:\/\/([a-z0-9_.-]+)\/(.+)$/;

export interface FileOptions {
crc32cGenerator?: CRC32CValidatorGenerator;
encryptionKey?: string | Buffer;
generation?: number | string;
kmsKeyName?: string;
userProject?: string;
preconditionOpts?: PreconditionOptions;
userProject?: string;
}

export interface CopyOptions {
Expand Down Expand Up @@ -419,7 +421,7 @@ export enum FileExceptionMessages {
*/
class File extends ServiceObject<File> {
acl: Acl;

crc32cGenerator: CRC32CValidatorGenerator;
bucket: Bucket;
storage: Storage;
kmsKeyName?: string;
Expand Down Expand Up @@ -874,6 +876,9 @@ class File extends ServiceObject<File> {
pathPrefix: '/acl',
});

this.crc32cGenerator =
options.crc32cGenerator || this.bucket.crc32cGenerator;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to look to the bucket if one is not passed as an option? Should there be a default within the File class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears all Files must have a bucket (looking at the construction) and all buckets have a generator (whether custom or the default) - this way customers can set things at a storage, bucket, or per-object level


this.instanceRetryValue = this.storage?.retryOptions?.autoRetry;
this.instancePreconditionOpts = options?.preconditionOpts;
}
Expand Down Expand Up @@ -1209,11 +1214,6 @@ class File extends ServiceObject<File> {
* code "CONTENT_DOWNLOAD_MISMATCH". If you receive this error, the best
* recourse is to try downloading the file again.
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* NOTE: Readable streams will emit the `end` event when the file is fully
* downloaded.
*
Expand Down Expand Up @@ -1277,8 +1277,7 @@ class File extends ServiceObject<File> {
typeof options.start === 'number' || typeof options.end === 'number';
const tailRequest = options.end! < 0;

// eslint-disable-next-line @typescript-eslint/no-explicit-any
let validateStream: any; // Created later, if necessary.
let validateStream: HashStreamValidator | undefined = undefined;

const throughStream = streamEvents(new PassThrough());

Expand All @@ -1287,12 +1286,10 @@ class File extends ServiceObject<File> {
let md5 = false;

if (typeof options.validation === 'string') {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(options as any).validation = (
options.validation as string
).toLowerCase();
crc32c = options.validation === 'crc32c';
md5 = options.validation === 'md5';
const value = options.validation.toLowerCase().trim();

crc32c = value === 'crc32c';
md5 = value === 'md5';
} else if (options.validation === false) {
crc32c = false;
}
Expand Down Expand Up @@ -1406,7 +1403,12 @@ class File extends ServiceObject<File> {
});
}

validateStream = hashStreamValidation({crc32c, md5});
validateStream = new HashStreamValidator({
crc32c,
md5,
crc32cGenerator: this.crc32cGenerator,
});

throughStreams.push(validateStream);
}

Expand Down Expand Up @@ -1474,15 +1476,14 @@ class File extends ServiceObject<File> {
// the best.
let failed = crc32c || md5;

if (crc32c && hashes.crc32c) {
// We must remove the first four bytes from the returned checksum.
// http://stackoverflow.com/questions/25096737/
// base64-encoding-of-crc32c-long-value
failed = !validateStream.test('crc32c', hashes.crc32c.substr(4));
}
if (validateStream) {
if (crc32c && hashes.crc32c) {
failed = !validateStream.test('crc32c', hashes.crc32c);
}

if (md5 && hashes.md5) {
failed = !validateStream.test('md5', hashes.md5);
if (md5 && hashes.md5) {
failed = !validateStream.test('md5', hashes.md5);
}
}

if (md5 && !hashes.md5) {
Expand Down Expand Up @@ -1730,11 +1731,6 @@ class File extends ServiceObject<File> {
* resumable feature is disabled.
* </p>
*
* For faster crc32c computation, you must manually install
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}:
*
* $ npm install --save fast-crc32c
*
* NOTE: Writable streams will emit the `finish` event when the file is fully
* uploaded.
*
Expand Down Expand Up @@ -1846,9 +1842,10 @@ class File extends ServiceObject<File> {

// Collect data as it comes in to store in a hash. This is compared to the
// checksum value on the returned metadata from the API.
const validateStream = hashStreamValidation({
const validateStream = new HashStreamValidator({
crc32c,
md5,
crc32cGenerator: this.crc32cGenerator,
});

const fileWriteStream = duplexify();
Expand Down Expand Up @@ -1896,10 +1893,7 @@ class File extends ServiceObject<File> {
let failed = crc32c || md5;

if (crc32c && metadata.crc32c) {
// We must remove the first four bytes from the returned checksum.
// http://stackoverflow.com/questions/25096737/
// base64-encoding-of-crc32c-long-value
failed = !validateStream.test('crc32c', metadata.crc32c.substr(4));
failed = !validateStream.test('crc32c', metadata.crc32c);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason you know of that this was previously removing the first 4 bytes? I think you mentioned it was only checking those bytes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea… unfortunately we weren’t checking the complete hash before - just the last byte

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any idea why? Also any idea on the performance impact of now checking the entire range of bytes?

Copy link
Contributor Author

@danielbankhead danielbankhead May 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In short, invalid implementation - see the link from the removed comment:

This would result in inaccurate validation. E.g.:
Before:

crc32c: {crc32c: '####wA=='},

After:

crc32c: {crc32c: 'hqBywA=='},

The perf for validation is the same - the update method is the sensitive portion for performance

}

if (md5 && metadata.md5Hash) {
Expand Down
96 changes: 96 additions & 0 deletions src/hash-stream-validator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import {createHash, Hash} from 'crypto';
import {Transform} from 'stream';

import {
CRC32CValidatorGenerator,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
CRC32CValidator,
} from './crc32c';

interface HashStreamValidatorOptions {
crc32c: boolean;
md5: boolean;
crc32cGenerator: CRC32CValidatorGenerator;
}

class HashStreamValidator extends Transform {
readonly crc32cEnabled: boolean;
readonly md5Enabled: boolean;

#crc32cHash?: CRC32CValidator = undefined;
#md5Hash?: Hash = undefined;

#md5Digest = '';

constructor(options: Partial<HashStreamValidatorOptions> = {}) {
super();

this.crc32cEnabled = !!options.crc32c;
this.md5Enabled = !!options.md5;

if (this.crc32cEnabled) {
const crc32cGenerator =
options.crc32cGenerator || CRC32C_DEFAULT_VALIDATOR_GENERATOR;

this.#crc32cHash = crc32cGenerator();
}

if (this.md5Enabled) {
this.#md5Hash = createHash('md5');
}
}

_flush(callback: () => void) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be good to also just mark these functions with private.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wanted to, but TS doesn’t like it as the parent class has it public

if (this.#md5Hash) {
this.#md5Digest = this.#md5Hash.digest('base64');
}

callback();
}

_transform(
chunk: Buffer,
encoding: BufferEncoding,
callback: (e?: Error) => void
) {
this.push(chunk, encoding);

try {
if (this.#crc32cHash) this.#crc32cHash.update(chunk);
if (this.#md5Hash) this.#md5Hash.update(chunk);
callback();
} catch (e) {
callback(e as Error);
}
}

test(hash: 'crc32c' | 'md5', sum: Buffer | string): boolean {
const check = Buffer.isBuffer(sum) ? sum.toString('base64') : sum;

if (hash === 'crc32c' && this.#crc32cHash) {
return this.#crc32cHash.validate(check);
}

if (hash === 'md5' && this.#md5Hash) {
return this.#md5Digest === check;
}

return false;
}
}

export {HashStreamValidator, HashStreamValidatorOptions};
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ export {
SetStorageClassResponse,
SignedPostPolicyV4Output,
} from './file';
export * from './hash-stream-validator';
export {
HmacKey,
HmacKeyMetadata,
Expand Down
14 changes: 12 additions & 2 deletions src/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ import {Channel} from './channel';
import {File} from './file';
import {normalize} from './util';
import {HmacKey, HmacKeyMetadata, HmacKeyOptions} from './hmacKey';
import {
CRC32CValidatorGenerator,
CRC32C_DEFAULT_VALIDATOR_GENERATOR,
} from './crc32c';

export interface GetServiceAccountOptions {
userProject?: string;
Expand Down Expand Up @@ -68,18 +72,20 @@ export interface PreconditionOptions {
}

export interface StorageOptions extends ServiceOptions {
retryOptions?: RetryOptions;
/**
* The API endpoint of the service used to make requests.
* Defaults to `storage.googleapis.com`.
*/
apiEndpoint?: string;
crc32cGenerator?: CRC32CValidatorGenerator;
retryOptions?: RetryOptions;
}

export interface BucketOptions {
crc32cGenerator?: CRC32CValidatorGenerator;
kmsKeyName?: string;
userProject?: string;
preconditionOpts?: PreconditionOptions;
userProject?: string;
}

export interface Cors {
Expand Down Expand Up @@ -457,6 +463,8 @@ export class Storage extends Service {
*/
acl: typeof Storage.acl;

crc32cGenerator: CRC32CValidatorGenerator;

getBucketsStream(): Readable {
// placeholder body, overwritten in constructor
return new Readable();
Expand Down Expand Up @@ -624,6 +632,8 @@ export class Storage extends Service {
* @see Storage.acl
*/
this.acl = Storage.acl;
this.crc32cGenerator =
options.crc32cGenerator || CRC32C_DEFAULT_VALIDATOR_GENERATOR;

this.retryOptions = config.retryOptions;

Expand Down
Loading