From 2ac17ec1e3c53b280efa298d137d96b2176bf046 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Thu, 7 Mar 2024 18:02:17 -0500 Subject: [PATCH] feat(NODE-5957): add BSON indexing API (#654) --- rollup.config.mjs | 2 + src/bson.ts | 1 + src/error.ts | 22 ++ src/parser/on_demand/index.ts | 28 ++ src/parser/on_demand/parse_to_elements.ts | 174 ++++++++++ test/node/error.test.ts | 28 +- test/node/exports.test.ts | 1 + .../on_demand/parse_to_elements.test.ts | 300 ++++++++++++++++++ test/node/release.test.ts | 2 + 9 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 src/parser/on_demand/index.ts create mode 100644 src/parser/on_demand/parse_to_elements.ts create mode 100644 test/node/parser/on_demand/parse_to_elements.test.ts diff --git a/rollup.config.mjs b/rollup.config.mjs index 17c402d8a..16a0376d2 100644 --- a/rollup.config.mjs +++ b/rollup.config.mjs @@ -17,6 +17,8 @@ const tsConfig = { importHelpers: false, noEmitHelpers: false, noEmitOnError: true, + // preserveConstEnums: false is the default, but we explicitly set it here to ensure we do not mistakenly generate objects where we expect literals + preserveConstEnums: false, // Generate separate source maps files with sourceContent included sourceMap: true, inlineSourceMap: false, diff --git a/src/bson.ts b/src/bson.ts index 5b079291c..5475e08e2 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -54,6 +54,7 @@ export { BSONValue } from './bson_value'; export { BSONError, BSONVersionError, BSONRuntimeError } from './error'; export { BSONType } from './constants'; export { EJSON } from './extended_json'; +export { onDemand } from './parser/on_demand/index'; /** @public */ export interface Document { diff --git a/src/error.ts b/src/error.ts index 3f2711e9f..7203f46b3 100644 --- a/src/error.ts +++ b/src/error.ts @@ -81,3 +81,25 @@ export class BSONRuntimeError extends BSONError { super(message); } } + +/** + * @public + * @category Error + * + * @experimental + * + * An error generated when BSON bytes are invalid. + * Reports the offset the parser was able to reach before encountering the error. + */ +export class BSONOffsetError extends BSONError { + public get name(): 'BSONOffsetError' { + return 'BSONOffsetError'; + } + + public offset: number; + + constructor(message: string, offset: number) { + super(`${message}. offset: ${offset}`); + this.offset = offset; + } +} diff --git a/src/parser/on_demand/index.ts b/src/parser/on_demand/index.ts new file mode 100644 index 000000000..bd08cdb8e --- /dev/null +++ b/src/parser/on_demand/index.ts @@ -0,0 +1,28 @@ +import { type BSONError, BSONOffsetError } from '../../error'; +import { type BSONElement, parseToElements } from './parse_to_elements'; +/** + * @experimental + * @public + * + * A new set of BSON APIs that are currently experimental and not intended for production use. + */ +export type OnDemand = { + BSONOffsetError: { + new (message: string, offset: number): BSONOffsetError; + isBSONError(value: unknown): value is BSONError; + }; + parseToElements: (this: void, bytes: Uint8Array, startOffset?: number) => Iterable; +}; + +/** + * @experimental + * @public + */ +const onDemand: OnDemand = Object.create(null); + +onDemand.parseToElements = parseToElements; +onDemand.BSONOffsetError = BSONOffsetError; + +Object.freeze(onDemand); + +export { onDemand }; diff --git a/src/parser/on_demand/parse_to_elements.ts b/src/parser/on_demand/parse_to_elements.ts new file mode 100644 index 000000000..bc3c107d6 --- /dev/null +++ b/src/parser/on_demand/parse_to_elements.ts @@ -0,0 +1,174 @@ +/* eslint-disable @typescript-eslint/no-unsafe-enum-comparison */ +import { BSONOffsetError } from '../../error'; + +/** + * @internal + * + * @remarks + * - This enum is const so the code we produce will inline the numbers + * - `minKey` is set to 255 so unsigned comparisons succeed + * - Modify with caution, double check the bundle contains literals + */ +const enum t { + double = 1, + string = 2, + object = 3, + array = 4, + binData = 5, + undefined = 6, + objectId = 7, + bool = 8, + date = 9, + null = 10, + regex = 11, + dbPointer = 12, + javascript = 13, + symbol = 14, + javascriptWithScope = 15, + int = 16, + timestamp = 17, + long = 18, + decimal = 19, + minKey = 255, + maxKey = 127 +} + +/** + * @public + * @experimental + */ +export type BSONElement = [ + type: number, + nameOffset: number, + nameLength: number, + offset: number, + length: number +]; + +/** Parses a int32 little-endian at offset, throws if it is negative */ +function getSize(source: Uint8Array, offset: number): number { + if (source[offset + 3] > 127) { + throw new BSONOffsetError('BSON size cannot be negative', offset); + } + return ( + source[offset] | + (source[offset + 1] << 8) | + (source[offset + 2] << 16) | + (source[offset + 3] << 24) + ); +} + +/** + * Searches for null terminator of a BSON element's value (Never the document null terminator) + * **Does not** bounds check since this should **ONLY** be used within parseToElements which has asserted that `bytes` ends with a `0x00`. + * So this will at most iterate to the document's terminator and error if that is the offset reached. + */ +function findNull(bytes: Uint8Array, offset: number): number { + let nullTerminatorOffset = offset; + + for (; bytes[nullTerminatorOffset] !== 0x00; nullTerminatorOffset++); + + if (nullTerminatorOffset === bytes.length - 1) { + // We reached the null terminator of the document, not a value's + throw new BSONOffsetError('Null terminator not found', offset); + } + + return nullTerminatorOffset; +} + +/** + * @public + * @experimental + */ +export function parseToElements(bytes: Uint8Array, startOffset = 0): Iterable { + if (bytes.length < 5) { + throw new BSONOffsetError( + `Input must be at least 5 bytes, got ${bytes.length} bytes`, + startOffset + ); + } + + const documentSize = getSize(bytes, startOffset); + + if (documentSize > bytes.length - startOffset) { + throw new BSONOffsetError( + `Parsed documentSize (${documentSize} bytes) does not match input length (${bytes.length} bytes)`, + startOffset + ); + } + + if (bytes[startOffset + documentSize - 1] !== 0x00) { + throw new BSONOffsetError('BSON documents must end in 0x00', startOffset + documentSize); + } + + const elements: BSONElement[] = []; + let offset = startOffset + 4; + + while (offset <= documentSize + startOffset) { + const type = bytes[offset]; + offset += 1; + + if (type === 0) { + if (offset - startOffset !== documentSize) { + throw new BSONOffsetError(`Invalid 0x00 type byte`, offset); + } + break; + } + + const nameOffset = offset; + const nameLength = findNull(bytes, offset) - nameOffset; + offset += nameLength + 1; + + let length: number; + + if (type === t.double || type === t.long || type === t.date || type === t.timestamp) { + length = 8; + } else if (type === t.int) { + length = 4; + } else if (type === t.objectId) { + length = 12; + } else if (type === t.decimal) { + length = 16; + } else if (type === t.bool) { + length = 1; + } else if (type === t.null || type === t.undefined || type === t.maxKey || type === t.minKey) { + length = 0; + } + // Needs a size calculation + else if (type === t.regex) { + length = findNull(bytes, findNull(bytes, offset) + 1) + 1 - offset; + } else if (type === t.object || type === t.array || type === t.javascriptWithScope) { + length = getSize(bytes, offset); + } else if ( + type === t.string || + type === t.binData || + type === t.dbPointer || + type === t.javascript || + type === t.symbol + ) { + length = getSize(bytes, offset) + 4; + if (type === t.binData) { + // binary subtype + length += 1; + } + if (type === t.dbPointer) { + // dbPointer's objectId + length += 12; + } + } else { + throw new BSONOffsetError( + `Invalid 0x${type.toString(16).padStart(2, '0')} type byte`, + offset + ); + } + + if (length > documentSize) { + throw new BSONOffsetError('value reports length larger than document', offset); + } + + elements.push([type, nameOffset, nameLength, offset, length]); + offset += length; + } + + return elements; +} diff --git a/test/node/error.test.ts b/test/node/error.test.ts index ee8543688..5a126d076 100644 --- a/test/node/error.test.ts +++ b/test/node/error.test.ts @@ -1,7 +1,13 @@ import { expect } from 'chai'; import { loadESModuleBSON } from '../load_bson'; -import { __isWeb__, BSONError, BSONVersionError, BSONRuntimeError } from '../register-bson'; +import { + __isWeb__, + BSONError, + BSONVersionError, + BSONRuntimeError, + onDemand +} from '../register-bson'; const instanceOfChecksWork = !__isWeb__; @@ -102,4 +108,24 @@ describe('BSONError', function () { expect(new BSONRuntimeError('Woops!')).to.have.property('name', 'BSONRuntimeError'); }); }); + + describe('class BSONOffsetError', () => { + it('is a BSONError instance', function () { + expect(BSONError.isBSONError(new onDemand.BSONOffsetError('Oopsie', 3))).to.be.true; + }); + + it('has a name property equal to "BSONOffsetError"', function () { + expect(new onDemand.BSONOffsetError('Woops!', 3)).to.have.property('name', 'BSONOffsetError'); + }); + + it('sets the offset property', function () { + expect(new onDemand.BSONOffsetError('Woops!', 3)).to.have.property('offset', 3); + }); + + it('includes the offset in the message', function () { + expect(new onDemand.BSONOffsetError('Woops!', 3)) + .to.have.property('message') + .that.matches(/offset: 3/i); + }); + }); }); diff --git a/test/node/exports.test.ts b/test/node/exports.test.ts index c23fd1f12..953e88727 100644 --- a/test/node/exports.test.ts +++ b/test/node/exports.test.ts @@ -18,6 +18,7 @@ const EXPECTED_EXPORTS = [ 'DBRef', 'Binary', 'ObjectId', + 'onDemand', 'UUID', 'Long', 'Timestamp', diff --git a/test/node/parser/on_demand/parse_to_elements.test.ts b/test/node/parser/on_demand/parse_to_elements.test.ts new file mode 100644 index 000000000..068641fc6 --- /dev/null +++ b/test/node/parser/on_demand/parse_to_elements.test.ts @@ -0,0 +1,300 @@ +import { expect } from 'chai'; + +import * as BSON from '../../../register-bson'; + +import { bufferFromHexArray, stringToUTF8HexBytes, int32LEToHex } from '../../tools/utils'; + +const parseToElements = BSON.onDemand.parseToElements; +const BSONOffsetError = BSON.onDemand.BSONOffsetError; + +describe('parseToElements()', () => { + context('when given less than 5 bytes', () => { + it('throws an error indicating minimum required size', () => { + const test = () => parseToElements(new Uint8Array(0)); + expect(test).to.throw(/at least 5 bytes/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + + context('when a document has a size smaller than the input', () => { + it('throws an error that it found a 0 type byte at an offset before document end', () => { + const test = () => parseToElements(new Uint8Array([6, 0, 0, 0, 0, 0, 0])); // given 7 bytes, but says 6 + expect(test).to.throw(/Invalid 0x00 type byte. offset: 5/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + + context('when given a document that does not end with a null terminator', () => { + it('throws an error that documents must end in 0x00', () => { + for (const test of [ + () => parseToElements(new Uint8Array([5, 0, 0, 0, 1])), + () => parseToElements(new Uint8Array([0, 5, 0, 0, 0, 1]), 1) + ]) { + expect(test).to.throw(/documents must end in 0x00/i); + expect(test).to.throw(BSONOffsetError); + } + }); + }); + + context('when given a document that has a size larger than the input', () => { + it('throws an error that the size and length do not match', () => { + const test = () => parseToElements(new Uint8Array([6, 0, 0, 0, 0])); // only 5 bytes, but says 6 + expect(test).to.throw(/does not match input length/i); + expect(test).to.throw(BSONOffsetError); + }); + + context('and an offset is provided', () => { + it('throws an error that the size and length do not match', () => { + const test = () => parseToElements(new Uint8Array([0, 6, 0, 0, 0, 0]), 1); // is 6 bytes, but offset is 1 + expect(test).to.throw(/does not match input length/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + }); + + context('when an element name has no null terminator', () => { + it('throws an error indicating null terminator not found', () => { + const test = () => + parseToElements(bufferFromHexArray(['10', '61', int32LEToHex(0x7fff_ffff)])); + expect(test).to.throw(/Null terminator not found/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + + context('when given a negative size', () => { + context('in a document', () => { + it('throws an error that a size cannot be negative', () => { + const testNegativeMax = () => parseToElements(new Uint8Array([0, 0, 0, 0x80, 0])); + const testNegative1 = () => parseToElements(new Uint8Array([0xff, 0xff, 0xff, 0xff, 0])); + expect(testNegativeMax).to.throw(/BSON size cannot be negative/i); + expect(testNegativeMax).to.throw(BSONOffsetError); + expect(testNegative1).to.throw(/BSON size cannot be negative/i); + expect(testNegative1).to.throw(BSONOffsetError); + }); + }); + + const sizedTypes = [ + // The array is in order of [TypeByte, ElementName, Int32Size, ElementValue] + { type: 'string', input: ['02', '6100', '00000080', '6100'] }, + { type: 'binary', input: ['05', '6100', '00000080', '01'] }, + { type: 'dbpointer', input: ['05', '6100', '00000080', '6100', '00'.repeat(12)] }, + { type: 'code', input: ['05', '6100', '00000080', '6100'] }, + { type: 'symbol', input: ['05', '6100', '00000080', '6100'] }, + { type: 'object', input: ['05', '6100', '00000080', '00'] }, + { type: 'array', input: ['05', '6100', '00000080', '00'] }, + { type: 'code_w_scope', input: ['05', '6100', '00000080', '010000006100', '05000000'] } + ]; + + for (const sizedType of sizedTypes) { + context(`in a ${sizedType.type}`, () => { + it('throws an error that a size cannot be negative', () => { + const test = () => parseToElements(bufferFromHexArray(sizedType.input)); + expect(test).to.throw(/BSON size cannot be negative/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + } + }); + + context('when an sized element reports a size larger than document', () => { + const sizedTypes = [ + // The array is in order of [TypeByte, ElementName, Int32Size, ElementValue] + { type: 'string', input: ['02', '6100', '00000070', '6100'] }, + { type: 'binary', input: ['05', '6100', '00000070', '01'] }, + { type: 'dbpointer', input: ['05', '6100', '00000070', '6100', '00'.repeat(12)] }, + { type: 'code', input: ['05', '6100', '00000070', '6100'] }, + { type: 'symbol', input: ['05', '6100', '00000070', '6100'] }, + { type: 'object', input: ['05', '6100', '00000070', '00'] }, + { type: 'array', input: ['05', '6100', '00000070', '00'] }, + { type: 'code_w_scope', input: ['05', '6100', '00000070', '010000006100', '05000000'] } + ]; + + for (const sizedType of sizedTypes) { + context(`for ${sizedType.type}`, () => { + it('throws an error that a size cannot be larger than the document', () => { + const test = () => parseToElements(bufferFromHexArray(sizedType.input)); + expect(test).to.throw(/larger than document/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + } + }); + + context('when given an empty bson document', () => { + it('returns no elements', () => { + expect(parseToElements(bufferFromHexArray([]))).to.deep.equal([]); + }); + }); + + context('when given a document with an invalid type', () => { + it('throws an error that there is an invalid type', () => { + const test = () => parseToElements(bufferFromHexArray(['14', '6100'])); + expect(test).to.throw(/Invalid 0x14 type byte/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + + context('when given a regexp', () => { + context('with no null terminator for the pattern', () => { + it('throws an error', () => { + const regexp = [ + Buffer.from('abc').toString('hex'), + // '00', + Buffer.from('imx').toString('hex'), + '00' + ].join(''); + const test = () => parseToElements(bufferFromHexArray(['0B', '6100', regexp])); + expect(test).to.throw(/Null terminator not found/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + + context('with no null terminator for the flags', () => { + it('throws an error', () => { + const regexp = [ + Buffer.from('abc').toString('hex'), + '00', + Buffer.from('imx').toString('hex') + // '00' + ].join(''); + const test = () => parseToElements(bufferFromHexArray(['0B', '6100', regexp])); + expect(test).to.throw(/Null terminator not found/i); + expect(test).to.throw(BSONOffsetError); + }); + }); + }); + + const common = { nameOffset: 5, nameLength: 1, offset: 7 }; + const regexp = [ + Buffer.from('abc').toString('hex'), + '00', + Buffer.from('imx').toString('hex'), + '00' + ].join(''); + const code_w_scope = [ + int32LEToHex(13 + 5 + 4), // code is 13, document is 5, 4 for leading int + stringToUTF8HexBytes('() => {}'), + int32LEToHex(5), + '00' + ].join(''); + const tableTest = [ + { + name: 'double', + input: ['01', '6100', '0100000000000000'], + output: { type: 1, length: 8 } + }, + { + name: 'string', + input: ['02', '6100', stringToUTF8HexBytes('hello')], + output: { type: 2, length: 'hello'.length + 4 + 1 } // 4 for the size, 1 for the null + }, + { + name: 'object', + input: ['03', '6100', int32LEToHex(5), '00'], + output: { type: 3, length: 5 } + }, + { + name: 'array', + input: ['04', '6100', int32LEToHex(5), '00'], + output: { type: 4, length: 5 } + }, + { + name: 'binary', + input: ['05', '6100', int32LEToHex(5), '23', '00'], + output: { type: 5, length: 10 } + }, + { + name: 'undefined', + input: ['06', '6100'], + output: { type: 6, length: 0 } + }, + { + name: 'objectId', + input: ['07', '6100', '00'.repeat(12)], + output: { type: 7, length: 12 } + }, + { + name: 'boolean', + input: ['08', '6100', '45'], + output: { type: 8, length: 1 } + }, + { + name: 'date', + input: ['09', '6100', '00'.repeat(8)], + output: { type: 9, length: 8 } + }, + { + name: 'null', + input: ['0A', '6100'], + output: { type: 10, length: 0 } + }, + { + name: 'regexp', + input: ['0B', '6100', regexp], + output: { type: 11, length: 8 } + }, + { + name: 'dbpointer', + input: ['0C', '6100', stringToUTF8HexBytes('db.coll'), '00'.repeat(12)], + output: { type: 12, length: 'db.coll'.length + 4 + 1 + 12 } + }, + { + name: 'code', + input: ['0D', '6100', stringToUTF8HexBytes('() => {}')], + output: { type: 13, length: '() => {}'.length + 4 + 1 } + }, + { + name: 'symbol', + input: ['0E', '6100', stringToUTF8HexBytes('symbol')], + output: { type: 14, length: 'symbol'.length + 4 + 1 } + }, + { + name: 'code_w_scope', + input: ['0F', '6100', code_w_scope], + output: { type: 15, length: '() => {}'.length + 4 + 1 + 5 + 4 } + }, + { + name: 'int', + input: ['10', '6100', int32LEToHex(320)], + output: { type: 16, length: 4 } + }, + { + name: 'timestamp', + input: ['11', '6100', '00'.repeat(8)], + output: { type: 17, length: 8 } + }, + { + name: 'long', + input: ['12', '6100', '00'.repeat(8)], + output: { type: 18, length: 8 } + }, + { + name: 'decimal128', + input: ['13', '6100', '00'.repeat(16)], + output: { type: 19, length: 16 } + }, + { + name: 'minkey', + input: ['FF', '6100'], + output: { type: 255, length: 0 } + }, + { + name: 'maxkey', + input: ['7F', '6100'], + output: { type: 127, length: 0 } + } + ]; + + context(`when given a bson document`, () => { + for (const test of tableTest) { + context(`with one ${test.name} element`, () => { + it(`returns one element with type=${test.output.type} and length=${test.output.length}`, () => { + const output = { ...common, ...test.output }; + expect(parseToElements(bufferFromHexArray(test.input))).to.deep.equal([ + [output.type, output.nameOffset, output.nameLength, output.offset, output.length] + ]); + }); + }); + } + }); +}); diff --git a/test/node/release.test.ts b/test/node/release.test.ts index c04b5124f..da69230df 100644 --- a/test/node/release.test.ts +++ b/test/node/release.test.ts @@ -40,6 +40,8 @@ const REQUIRED_FILES = [ 'src/parser/deserializer.ts', 'src/parser/serializer.ts', 'src/parser/utils.ts', + 'src/parser/on_demand/index.ts', + 'src/parser/on_demand/parse_to_elements.ts', 'src/regexp.ts', 'src/symbol.ts', 'src/timestamp.ts',