Skip to content

Commit

Permalink
fix(NODE-6123): utf8 validation is not strict enough
Browse files Browse the repository at this point in the history
  • Loading branch information
aditi-khare-mongoDB committed Apr 24, 2024
1 parent da9de3a commit 3768409
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 304 deletions.
2 changes: 1 addition & 1 deletion etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export class RequireVendor {
*/
transform(code, id) {
// TODO(NODE-4930)
if (!id.includes('web_byte_utils')) {
if (!id.includes('validate_utf8')) {
return;
}

Expand Down
10 changes: 0 additions & 10 deletions src/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,3 @@ export class BSONOffsetError extends BSONError {
this.offset = offset;
}
}

export class BSONUTF8Error extends BSONError {
public get name(): 'BSONUTF8Error' {
return 'BSONUTF8Error';
}

constructor(message: string, options?: { cause?: unknown }) {
super(message, options);
}
}
14 changes: 6 additions & 8 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import { BSONSymbol } from '../symbol';
import { Timestamp } from '../timestamp';
import { ByteUtils } from '../utils/byte_utils';
import { NumberUtils } from '../utils/number_utils';
import { validateUtf8 } from '../validate_utf8';

/** @public */
export interface DeserializeOptions {
Expand Down Expand Up @@ -603,13 +602,12 @@ function deserializeObject(
buffer[index + stringSize - 1] !== 0
)
throw new BSONError('bad string length in bson');
// Namespace
if (validation != null && validation.utf8) {
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false);
const namespace = ByteUtils.toUTF8(
buffer,
index,
index + stringSize - 1,
validation != null && (validation.utf8 as boolean)
);
// Update parse index position
index = index + stringSize;

Expand Down
9 changes: 0 additions & 9 deletions src/test.ts

This file was deleted.

26 changes: 2 additions & 24 deletions src/utils/node_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BSONError, BSONUTF8Error } from '../error';
import { BSONError } from '../error';
import { validateUtf8 } from '../validate_utf8';
import { tryReadBasicLatin, tryWriteBasicLatin } from './latin';

Expand Down Expand Up @@ -27,28 +27,6 @@ type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
declare const Buffer: NodeJsBufferConstructor;
declare const require: (mod: 'crypto') => { randomBytes: (byteLength: number) => Uint8Array };

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};

type TextEncoder = {
readonly encoding: string;
encode(input?: string): Uint8Array;
};
type TextEncoderConstructor = {
new (): TextEncoder;
};

// Node byte utils global
declare const TextDecoder: TextDecoderConstructor;
declare const TextEncoder: TextEncoderConstructor;

/** @internal */
export function nodejsMathRandomBytes(byteLength: number) {
return nodeJsByteUtils.fromNumberArray(
Expand Down Expand Up @@ -161,7 +139,7 @@ export const nodeJsByteUtils = {
// TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
for (let i = 0; i < string.length; i++) {
if (string.charCodeAt(i) === 0xfffd) {
if (!validateUtf8(buffer, start, end)) {
if (!validateUtf8(buffer, start, end, fatal)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
break;
Expand Down
12 changes: 3 additions & 9 deletions src/utils/web_byte_utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BSONError, BSONUTF8Error } from '../error';
import { BSONError } from '../error';
import { validateUtf8 } from '../validate_utf8';
import { tryReadBasicLatin } from './latin';

type TextDecoder = {
Expand Down Expand Up @@ -179,14 +180,7 @@ export const webByteUtils = {
return basicLatin;
}

if (fatal) {
try {
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
} catch (cause) {
throw new BSONUTF8Error('Invalid UTF-8 string in BSON document', { cause });
}
}
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
return validateUtf8(uint8array, start, end, fatal);
},

utf8ByteLength(input: string): number {
Expand Down
89 changes: 33 additions & 56 deletions src/validate_utf8.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
import { NumberUtils } from "./utils/number_utils";

const FIRST_BIT = 0x80;
const FIRST_TWO_BITS = 0xc0;
const FIRST_THREE_BITS = 0xe0;
const FIRST_FOUR_BITS = 0xf0;
const FIRST_FIVE_BITS = 0xf8;

const TWO_BIT_CHAR = 0xc0;
const THREE_BIT_CHAR = 0xe0;
const FOUR_BIT_CHAR = 0xf0;
const CONTINUING_CHAR = 0x80;

// max utf8 values representable in given number of bytes
const ONE_BYTE_MAX = 0x7f;
const TWO_BYTE_MAX = 0x7ff;
const THREE_BYTE_MAX = 0xf7ff;

import { BSONError } from './error';

type TextDecoder = {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: Uint8Array): string;
};
type TextDecoderConstructor = {
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
};

type TextEncoder = {
readonly encoding: string;
encode(input?: string): Uint8Array;
};
type TextEncoderConstructor = {
new (): TextEncoder;
};

// Node byte utils global
declare const TextDecoder: TextDecoderConstructor;
declare const TextEncoder: TextEncoderConstructor;

/**
* Determines if the passed in bytes are valid utf8
Expand All @@ -24,45 +29,17 @@ const THREE_BYTE_MAX = 0xf7ff;
* @param end - The index to end validating
*/
export function validateUtf8(
bytes: { [index: number]: number },
buffer: Uint8Array,
start: number,
end: number
): boolean {
let continuation = 0;

for (let i = start; i < end; i += 1) {
const byte = bytes[i];

if (continuation) {
if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
return false;
}
continuation -= 1;
} else if (byte & FIRST_BIT &&
parseUtf8Bytes([byte, bytes[i+1]]) > ONE_BYTE_MAX) {
if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
continuation = 1;
} else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR &&
parseUtf8Bytes([byte, bytes[i+1], bytes[i+2]]) > TWO_BYTE_MAX) {
continuation = 2;
} else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR &&
parseUtf8Bytes([byte, bytes[i+1], bytes[i+2], bytes[i+3]]) > THREE_BYTE_MAX) {
continuation = 3;
} else {
return false;
}
end: number,
fatal: boolean
): string {
if (fatal) {
try {
return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
} catch (cause) {
throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
}
}

return !continuation;
return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
}

function parseUtf8Bytes(arr: number[]): number {
arr[0] >>= (arr.length - 1);
for (let i = 1; i < arr.length; i++) {
arr[i] >>= 2;
arr[i] <<= i*8;
arr[0] = arr[0] | arr[i]
}
return arr[0];
}
Loading

0 comments on commit 3768409

Please sign in to comment.