diff --git a/benchmark/buffers/buffer-atob.js b/benchmark/buffers/buffer-atob.js new file mode 100644 index 00000000000000..8a0b355bebbf1f --- /dev/null +++ b/benchmark/buffers/buffer-atob.js @@ -0,0 +1,23 @@ +'use strict'; +const buffer = require('buffer'); +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + len: [64 * 1024 ], + n: [32] +}, { + test: { len: 256 } +}); + +function main({ n, len }) { + let s = ''; + let large = ''; + let i; + for (i = 0; i < 256; ++i) s += String.fromCharCode(i); + for (i = 0; i < len; i += 256) large += s; + const b64 = btoa(large); + + bench.start(); + for (i = 0; i < n; ++i) buffer.atob(b64); + bench.end(n); +} diff --git a/benchmark/buffers/buffer-btoa.js b/benchmark/buffers/buffer-btoa.js new file mode 100644 index 00000000000000..5288cb6411a13a --- /dev/null +++ b/benchmark/buffers/buffer-btoa.js @@ -0,0 +1,21 @@ +'use strict'; +const buffer = require('buffer'); +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + len: [64 * 1024 * 1024], + n: [32] +}, { + test: { len: 256 } +}); + +function main({ n, len }) { + let s = ''; + let large = ''; + let i; + for (i = 0; i < 256; ++i) s += String.fromCharCode(i); + for (i = 0; i < len; i += 256) large += s; + bench.start(); + for (i = 0; i < n; ++i) buffer.btoa(large); + bench.end(n); +} diff --git a/lib/buffer.js b/lib/buffer.js index 951805870c5870..ab66f8a9d6a98c 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -63,7 +63,9 @@ const { swap32: _swap32, swap64: _swap64, kMaxLength, - kStringMaxLength + kStringMaxLength, + btoa: _btoa, + atob: _atob, } = internalBinding('buffer'); const { getOwnNonIndexProperties, @@ -1213,36 +1215,34 @@ if (internalBinding('config').hasIntl) { let DOMException; -const lazyInvalidCharError = hideStackFrames((message, name) => { - if (DOMException === undefined) - DOMException = internalBinding('messaging').DOMException; - throw new DOMException('Invalid character', 'InvalidCharacterError'); -}); - function btoa(input) { - // TODO(@jasnell): The implementation here has not been performance - // optimized in any way. input = `${input}`; - for (let n = 0; n < input.length; n++) { - if (input[n].charCodeAt(0) > 0xff) - lazyInvalidCharError(); + let ret; + try { + ret = _btoa(input); + } catch (e) { + if (DOMException === undefined) + DOMException = internalBinding('messaging').DOMException; + let code = 'Error'; + if (e.code === 'ERR_INVALID_CHARACTER') code = 'InvalidCharacterError'; + throw new DOMException(e.message, code); } - const buf = Buffer.from(input, 'latin1'); - return buf.toString('base64'); + return ret; } -const kBase64Digits = - 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; - function atob(input) { - // TODO(@jasnell): The implementation here has not been performance - // optimized in any way. input = `${input}`; - for (let n = 0; n < input.length; n++) { - if (!kBase64Digits.includes(input[n])) - lazyInvalidCharError(); + let ret; + try { + ret = _atob(input); + } catch (e) { + if (DOMException === undefined) + DOMException = internalBinding('messaging').DOMException; + let code = 'Error'; + if (e.code === 'ERR_INVALID_CHARACTER') code = 'InvalidCharacterError'; + throw new DOMException(e.message, code); } - return Buffer.from(input, 'base64').toString('latin1'); + return ret; } module.exports = { diff --git a/src/base64-inl.h b/src/base64-inl.h index 1b6cdd93f002a4..38daccb3ff1c23 100644 --- a/src/base64-inl.h +++ b/src/base64-inl.h @@ -60,9 +60,21 @@ bool base64_decode_group_slow(char* const dst, const size_t dstlen, #endif template -size_t base64_decode_fast(char* const dst, const size_t dstlen, - const TypeName* const src, const size_t srclen, - const size_t decoded_size) { +size_t base64_decode_fast(char* const dst, + const size_t dstlen, + const TypeName* const src, + size_t srclen, + const size_t decoded_size, + bool* succ, + const bool strict) { + CHECK(!strict || succ != nullptr); + + if (static_cast(src[srclen - 1]) == '=') { + srclen--; + if (static_cast(src[srclen - 1]) == '=') srclen--; + } + + if (succ != nullptr) *succ = true; const size_t available = dstlen < decoded_size ? dstlen : decoded_size; const size_t max_k = available / 3 * 3; size_t max_i = srclen / 4 * 4; @@ -79,6 +91,10 @@ size_t base64_decode_fast(char* const dst, const size_t dstlen, const uint32_t v = ReadUint32BE(txt); // If MSB is set, input contains whitespace or is not valid base64. if (v & 0x80808080) { + if (strict) { + *succ = false; + return 0; + } if (!base64_decode_group_slow(dst, dstlen, src, srclen, &i, &k)) return k; max_i = i + (srclen - i) / 4 * 4; // Align max_i again. @@ -90,9 +106,50 @@ size_t base64_decode_fast(char* const dst, const size_t dstlen, k += 3; } } + if (i < srclen && k < dstlen) { +#define CHECK_INVALID(x) \ + if (txt[(x)] & 0x80) { \ + if (strict) { \ + *succ = false; \ + return 0; \ + } else { \ + break; \ + } \ + } + + int leftover = srclen - i; + unsigned char txt[] = {0, 0, 0, 0}; + switch (leftover) { + case 4: + txt[3] = static_cast( + unbase64(static_cast(src[i + 3]))); + CHECK_INVALID(3); + // DO NOT break; + + case 3: + txt[2] = static_cast( + unbase64(static_cast(src[i + 2]))); + CHECK_INVALID(2); + // DO NOT break; + + case 2: + txt[1] = static_cast( + unbase64(static_cast(src[i + 1]))); + CHECK_INVALID(1); + // DO NOT break; + + default: + txt[0] = static_cast( + unbase64(static_cast(src[i + 0]))); + CHECK_INVALID(0); + } + +#undef CHECK_INVALID + base64_decode_group_slow(dst, dstlen, src, srclen, &i, &k); } + return k; } @@ -113,10 +170,15 @@ size_t base64_decoded_size(const TypeName* src, size_t size) { template -size_t base64_decode(char* const dst, const size_t dstlen, - const TypeName* const src, const size_t srclen) { +size_t base64_decode(char* const dst, + const size_t dstlen, + const TypeName* const src, + const size_t srclen, + bool* succ, + const bool strict) { const size_t decoded_size = base64_decoded_size(src, srclen); - return base64_decode_fast(dst, dstlen, src, srclen, decoded_size); + return base64_decode_fast( + dst, dstlen, src, srclen, decoded_size, succ, strict); } diff --git a/src/base64.h b/src/base64.h index 0db096810cd4a8..c8578ffbe5614d 100644 --- a/src/base64.h +++ b/src/base64.h @@ -53,8 +53,12 @@ template size_t base64_decoded_size(const TypeName* src, size_t size); template -size_t base64_decode(char* const dst, const size_t dstlen, - const TypeName* const src, const size_t srclen); +size_t base64_decode(char* const dst, + const size_t dstlen, + const TypeName* const src, + size_t srclen, + bool* succ = nullptr, + const bool strict = false); inline size_t base64_encode(const char* src, size_t slen, diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 9006c1de767533..71a824d543716c 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1158,6 +1158,82 @@ void GetZeroFillToggle(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(Uint32Array::New(ab, 0, 1)); } +void AToB(const FunctionCallbackInfo& args) { + CHECK(args[0]->IsString()); + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + Local input = args[0].As(); + + // Check latin + if (!input->IsOneByte()) { + env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate())); + return; + } + + node::Utf8Value input_utf8_value(isolate, input); + + std::string input_str = input_utf8_value.ToString(); + + size_t max_dest_len = base64_decoded_size_fast(input_str.length()); + char* dest = static_cast(malloc(max_dest_len * sizeof(char))); + bool succ; + int dest_len = base64_decode(dest, + max_dest_len, + input_str.c_str(), + input_str.length(), + &succ, + true /* strict */); + + if (!succ) { + free(dest); + env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate())); + return; + } + + MaybeLocal maybe_dest = + String::NewFromOneByte(isolate, + reinterpret_cast(dest), + v8::NewStringType::kNormal, + dest_len); + CHECK(!maybe_dest.IsEmpty()); + args.GetReturnValue().Set(maybe_dest.ToLocalChecked()); + + free(dest); +} + +void BToA(const FunctionCallbackInfo& args) { + CHECK(args[0]->IsString()); + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + Local input = args[0].As(); + + // Check latin + if (!input->IsOneByte()) { + env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate())); + return; + } + + node::Utf8Value input_utf8_value(isolate, input); + + std::string input_str = input_utf8_value.ToString(); + + size_t max_dest_len = base64_encoded_size(input_str.length()); + char* dest = static_cast(malloc(max_dest_len * sizeof(char))); + + int dest_len = + base64_encode(input_str.c_str(), input_str.length(), dest, max_dest_len); + + MaybeLocal maybe_dest = + String::NewFromOneByte(isolate, + reinterpret_cast(dest), + v8::NewStringType::kNormal, + dest_len); + CHECK(!maybe_dest.IsEmpty()); + args.GetReturnValue().Set(maybe_dest.ToLocalChecked()); + + free(dest); +} + void Initialize(Local target, Local unused, Local context, @@ -1209,6 +1285,9 @@ void Initialize(Local target, env->SetMethod(target, "getZeroFillToggle", GetZeroFillToggle); + env->SetMethod(target, "atob", AToB); + env->SetMethod(target, "btoa", BToA); + Blob::Initialize(env, target); } @@ -1251,6 +1330,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(StringWrite); registry->Register(GetZeroFillToggle); + registry->Register(AToB); + registry->Register(BToA); + Blob::RegisterExternalReferences(registry); FixedSizeBlobCopyJob::RegisterExternalReferences(registry); } diff --git a/src/node_errors.h b/src/node_errors.h index 291365fa3b4dc9..cea34010c8d6c0 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -60,6 +60,7 @@ void OnFatalError(const char* location, const char* message); V(ERR_INVALID_ARG_VALUE, TypeError) \ V(ERR_OSSL_EVP_INVALID_DIGEST, Error) \ V(ERR_INVALID_ARG_TYPE, TypeError) \ + V(ERR_INVALID_CHARACTER, RangeError) \ V(ERR_INVALID_MODULE, Error) \ V(ERR_INVALID_THIS, TypeError) \ V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \ @@ -145,6 +146,7 @@ ERRORS_WITH_CODE(V) V(ERR_EXECUTION_ENVIRONMENT_NOT_AVAILABLE, \ "Context not associated with Node.js environment") \ V(ERR_INVALID_ADDRESS, "Invalid socket address") \ + V(ERR_INVALID_CHARACTER, "Invalid character") \ V(ERR_INVALID_MODULE, "No such module") \ V(ERR_INVALID_THIS, "Value of \"this\" is the wrong type") \ V(ERR_INVALID_TRANSFER_OBJECT, "Found invalid object in transferList") \ diff --git a/test/parallel/test-btoa-atob.js b/test/parallel/test-btoa-atob.js new file mode 100644 index 00000000000000..120f98e36f8a2b --- /dev/null +++ b/test/parallel/test-btoa-atob.js @@ -0,0 +1,35 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); + +let s1 = ''; +for (let i = 0; i < 256; ++i) s1 += String.fromCharCode(i); +const s1B64 = Buffer.from(s1).toString('base64'); + +assert.strictEqual(btoa(s1), s1B64); + +const s2 = 'hello world'; +const s2B64 = Buffer.from(s2).toString('base64'); +assert.strictEqual(btoa(s2), s2B64); + +const s3 = 'BlingBling...'; +const s3B64 = Buffer.from(s3).toString('base64'); +assert.strictEqual(btoa(s3), s3B64); + +const s4 = '哇咔咔'; +const s4B64 = Buffer.from(s4).toString('base64'); +assert.throws(() => { btoa(s4); }, { + name: 'InvalidCharacterError', + message: 'Invalid character', + code: 5, +}); + +assert.strictEqual(atob(s1B64), + Buffer.from(s1B64, 'base64').toString('latin1')); +assert.strictEqual(atob(s2B64), + Buffer.from(s2B64, 'base64').toString('latin1')); +assert.strictEqual(atob(s3B64), + Buffer.from(s3B64, 'base64').toString('latin1')); +assert.strictEqual(atob(s4B64), + Buffer.from(s4B64, 'base64').toString('latin1'));