-
Notifications
You must be signed in to change notification settings - Fork 370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat!: Use HashStreamValidator
for Data Validation
#1951
Changes from all commits
790e47f
36d76b9
87b79c0
0372958
dca1f94
373c5b6
2e69684
e7370b6
06e92ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -27,8 +27,6 @@ import compressible = require('compressible'); | |||||
import * as crypto from 'crypto'; | ||||||
import * as extend from 'extend'; | ||||||
import * as fs from 'fs'; | ||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||||||
const hashStreamValidation = require('hash-stream-validation'); | ||||||
import * as mime from 'mime'; | ||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||||||
const pumpify = require('pumpify'); | ||||||
|
@@ -68,6 +66,9 @@ import { | |||||
unicodeJSONStringify, | ||||||
formatAsUTCISO, | ||||||
} from './util'; | ||||||
import {CRC32CValidatorGenerator} from './crc32c'; | ||||||
import {HashStreamValidator} from './hash-stream-validator'; | ||||||
|
||||||
import retry = require('async-retry'); | ||||||
|
||||||
export type GetExpirationDateResponse = [Date]; | ||||||
|
@@ -288,11 +289,12 @@ export const STORAGE_POST_POLICY_BASE_URL = 'https://storage.googleapis.com'; | |||||
const GS_URL_REGEXP = /^gs:\/\/([a-z0-9_.-]+)\/(.+)$/; | ||||||
|
||||||
export interface FileOptions { | ||||||
crc32cGenerator?: CRC32CValidatorGenerator; | ||||||
encryptionKey?: string | Buffer; | ||||||
generation?: number | string; | ||||||
kmsKeyName?: string; | ||||||
userProject?: string; | ||||||
preconditionOpts?: PreconditionOptions; | ||||||
userProject?: string; | ||||||
} | ||||||
|
||||||
export interface CopyOptions { | ||||||
|
@@ -419,7 +421,7 @@ export enum FileExceptionMessages { | |||||
*/ | ||||||
class File extends ServiceObject<File> { | ||||||
acl: Acl; | ||||||
|
||||||
crc32cGenerator: CRC32CValidatorGenerator; | ||||||
bucket: Bucket; | ||||||
storage: Storage; | ||||||
kmsKeyName?: string; | ||||||
|
@@ -874,6 +876,9 @@ class File extends ServiceObject<File> { | |||||
pathPrefix: '/acl', | ||||||
}); | ||||||
|
||||||
this.crc32cGenerator = | ||||||
options.crc32cGenerator || this.bucket.crc32cGenerator; | ||||||
|
||||||
this.instanceRetryValue = this.storage?.retryOptions?.autoRetry; | ||||||
this.instancePreconditionOpts = options?.preconditionOpts; | ||||||
} | ||||||
|
@@ -1209,11 +1214,6 @@ class File extends ServiceObject<File> { | |||||
* code "CONTENT_DOWNLOAD_MISMATCH". If you receive this error, the best | ||||||
* recourse is to try downloading the file again. | ||||||
* | ||||||
* For faster crc32c computation, you must manually install | ||||||
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}: | ||||||
* | ||||||
* $ npm install --save fast-crc32c | ||||||
* | ||||||
* NOTE: Readable streams will emit the `end` event when the file is fully | ||||||
* downloaded. | ||||||
* | ||||||
|
@@ -1277,8 +1277,7 @@ class File extends ServiceObject<File> { | |||||
typeof options.start === 'number' || typeof options.end === 'number'; | ||||||
const tailRequest = options.end! < 0; | ||||||
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||||||
let validateStream: any; // Created later, if necessary. | ||||||
let validateStream: HashStreamValidator | undefined = undefined; | ||||||
|
||||||
const throughStream = streamEvents(new PassThrough()); | ||||||
|
||||||
|
@@ -1287,12 +1286,10 @@ class File extends ServiceObject<File> { | |||||
let md5 = false; | ||||||
|
||||||
if (typeof options.validation === 'string') { | ||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||||||
(options as any).validation = ( | ||||||
options.validation as string | ||||||
).toLowerCase(); | ||||||
crc32c = options.validation === 'crc32c'; | ||||||
md5 = options.validation === 'md5'; | ||||||
const value = options.validation.toLowerCase().trim(); | ||||||
|
||||||
crc32c = value === 'crc32c'; | ||||||
md5 = value === 'md5'; | ||||||
} else if (options.validation === false) { | ||||||
crc32c = false; | ||||||
} | ||||||
|
@@ -1406,7 +1403,12 @@ class File extends ServiceObject<File> { | |||||
}); | ||||||
} | ||||||
|
||||||
validateStream = hashStreamValidation({crc32c, md5}); | ||||||
validateStream = new HashStreamValidator({ | ||||||
crc32c, | ||||||
md5, | ||||||
crc32cGenerator: this.crc32cGenerator, | ||||||
}); | ||||||
|
||||||
throughStreams.push(validateStream); | ||||||
} | ||||||
|
||||||
|
@@ -1474,15 +1476,14 @@ class File extends ServiceObject<File> { | |||||
// the best. | ||||||
let failed = crc32c || md5; | ||||||
|
||||||
if (crc32c && hashes.crc32c) { | ||||||
// We must remove the first four bytes from the returned checksum. | ||||||
// http://stackoverflow.com/questions/25096737/ | ||||||
// base64-encoding-of-crc32c-long-value | ||||||
failed = !validateStream.test('crc32c', hashes.crc32c.substr(4)); | ||||||
} | ||||||
if (validateStream) { | ||||||
if (crc32c && hashes.crc32c) { | ||||||
failed = !validateStream.test('crc32c', hashes.crc32c); | ||||||
} | ||||||
|
||||||
if (md5 && hashes.md5) { | ||||||
failed = !validateStream.test('md5', hashes.md5); | ||||||
if (md5 && hashes.md5) { | ||||||
failed = !validateStream.test('md5', hashes.md5); | ||||||
} | ||||||
} | ||||||
|
||||||
if (md5 && !hashes.md5) { | ||||||
|
@@ -1730,11 +1731,6 @@ class File extends ServiceObject<File> { | |||||
* resumable feature is disabled. | ||||||
* </p> | ||||||
* | ||||||
* For faster crc32c computation, you must manually install | ||||||
* {@link https://www.npmjs.com/package/fast-crc32c| `fast-crc32c`}: | ||||||
* | ||||||
* $ npm install --save fast-crc32c | ||||||
* | ||||||
* NOTE: Writable streams will emit the `finish` event when the file is fully | ||||||
* uploaded. | ||||||
* | ||||||
|
@@ -1846,9 +1842,10 @@ class File extends ServiceObject<File> { | |||||
|
||||||
// Collect data as it comes in to store in a hash. This is compared to the | ||||||
// checksum value on the returned metadata from the API. | ||||||
const validateStream = hashStreamValidation({ | ||||||
const validateStream = new HashStreamValidator({ | ||||||
crc32c, | ||||||
md5, | ||||||
crc32cGenerator: this.crc32cGenerator, | ||||||
}); | ||||||
|
||||||
const fileWriteStream = duplexify(); | ||||||
|
@@ -1896,10 +1893,7 @@ class File extends ServiceObject<File> { | |||||
let failed = crc32c || md5; | ||||||
|
||||||
if (crc32c && metadata.crc32c) { | ||||||
// We must remove the first four bytes from the returned checksum. | ||||||
// http://stackoverflow.com/questions/25096737/ | ||||||
// base64-encoding-of-crc32c-long-value | ||||||
failed = !validateStream.test('crc32c', metadata.crc32c.substr(4)); | ||||||
failed = !validateStream.test('crc32c', metadata.crc32c); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason you know of that this was previously removing the first 4 bytes? I think you mentioned it was only checking those bytes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea… unfortunately we weren’t checking the complete hash before - just the last byte There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any idea why? Also any idea on the performance impact of now checking the entire range of bytes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In short, invalid implementation - see the link from the removed comment: This would result in inaccurate validation. E.g.: Line 2373 in a9c4c18
After: Line 2194 in 06e92ac
The perf for validation is the same - the |
||||||
} | ||||||
|
||||||
if (md5 && metadata.md5Hash) { | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Copyright 2022 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
import {createHash, Hash} from 'crypto'; | ||
import {Transform} from 'stream'; | ||
|
||
import { | ||
CRC32CValidatorGenerator, | ||
CRC32C_DEFAULT_VALIDATOR_GENERATOR, | ||
CRC32CValidator, | ||
} from './crc32c'; | ||
|
||
interface HashStreamValidatorOptions { | ||
crc32c: boolean; | ||
md5: boolean; | ||
crc32cGenerator: CRC32CValidatorGenerator; | ||
} | ||
|
||
class HashStreamValidator extends Transform { | ||
readonly crc32cEnabled: boolean; | ||
readonly md5Enabled: boolean; | ||
|
||
#crc32cHash?: CRC32CValidator = undefined; | ||
#md5Hash?: Hash = undefined; | ||
|
||
#md5Digest = ''; | ||
|
||
constructor(options: Partial<HashStreamValidatorOptions> = {}) { | ||
super(); | ||
|
||
this.crc32cEnabled = !!options.crc32c; | ||
this.md5Enabled = !!options.md5; | ||
|
||
if (this.crc32cEnabled) { | ||
const crc32cGenerator = | ||
options.crc32cGenerator || CRC32C_DEFAULT_VALIDATOR_GENERATOR; | ||
|
||
this.#crc32cHash = crc32cGenerator(); | ||
} | ||
|
||
if (this.md5Enabled) { | ||
this.#md5Hash = createHash('md5'); | ||
} | ||
} | ||
|
||
_flush(callback: () => void) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to also just mark these functions with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wanted to, but TS doesn’t like it as the parent class has it |
||
if (this.#md5Hash) { | ||
this.#md5Digest = this.#md5Hash.digest('base64'); | ||
} | ||
|
||
callback(); | ||
} | ||
|
||
_transform( | ||
chunk: Buffer, | ||
encoding: BufferEncoding, | ||
callback: (e?: Error) => void | ||
) { | ||
this.push(chunk, encoding); | ||
|
||
try { | ||
if (this.#crc32cHash) this.#crc32cHash.update(chunk); | ||
if (this.#md5Hash) this.#md5Hash.update(chunk); | ||
callback(); | ||
} catch (e) { | ||
callback(e as Error); | ||
} | ||
} | ||
|
||
test(hash: 'crc32c' | 'md5', sum: Buffer | string): boolean { | ||
const check = Buffer.isBuffer(sum) ? sum.toString('base64') : sum; | ||
|
||
if (hash === 'crc32c' && this.#crc32cHash) { | ||
return this.#crc32cHash.validate(check); | ||
} | ||
|
||
if (hash === 'md5' && this.#md5Hash) { | ||
return this.#md5Digest === check; | ||
} | ||
|
||
return false; | ||
} | ||
} | ||
|
||
export {HashStreamValidator, HashStreamValidatorOptions}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason to look to the bucket if one is not passed as an option? Should there be a default within the File class?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It appears all
File
s must have a bucket (looking at the construction) and all buckets have a generator (whether custom or the default) - this way customers can set things at a storage, bucket, or per-object level