Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src,lib,buffer: improve atob / btoa performance #38433

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions benchmark/buffers/buffer-atob.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict';
const buffer = require('buffer');
const common = require('../common.js');

const bench = common.createBenchmark(main, {
len: [64 * 1024 ],
n: [32]
}, {
test: { len: 256 }
});

function main({ n, len }) {
let s = '';
let large = '';
let i;
for (i = 0; i < 256; ++i) s += String.fromCharCode(i);
for (i = 0; i < len; i += 256) large += s;
const b64 = btoa(large);

bench.start();
for (i = 0; i < n; ++i) buffer.atob(b64);
bench.end(n);
}
21 changes: 21 additions & 0 deletions benchmark/buffers/buffer-btoa.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
'use strict';
const buffer = require('buffer');
const common = require('../common.js');

const bench = common.createBenchmark(main, {
len: [64 * 1024 * 1024],
n: [32]
}, {
test: { len: 256 }
});

function main({ n, len }) {
let s = '';
let large = '';
let i;
for (i = 0; i < 256; ++i) s += String.fromCharCode(i);
for (i = 0; i < len; i += 256) large += s;
bench.start();
for (i = 0; i < n; ++i) buffer.btoa(large);
bench.end(n);
}
46 changes: 23 additions & 23 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ const {
swap32: _swap32,
swap64: _swap64,
kMaxLength,
kStringMaxLength
kStringMaxLength,
btoa: _btoa,
atob: _atob,
} = internalBinding('buffer');
const {
getOwnNonIndexProperties,
Expand Down Expand Up @@ -1213,36 +1215,34 @@ if (internalBinding('config').hasIntl) {

let DOMException;

const lazyInvalidCharError = hideStackFrames((message, name) => {
if (DOMException === undefined)
DOMException = internalBinding('messaging').DOMException;
throw new DOMException('Invalid character', 'InvalidCharacterError');
});

function btoa(input) {
// TODO(@jasnell): The implementation here has not been performance
// optimized in any way.
input = `${input}`;
for (let n = 0; n < input.length; n++) {
if (input[n].charCodeAt(0) > 0xff)
lazyInvalidCharError();
let ret;
try {
ret = _btoa(input);
} catch (e) {
if (DOMException === undefined)
DOMException = internalBinding('messaging').DOMException;
let code = 'Error';
if (e.code === 'ERR_INVALID_CHARACTER') code = 'InvalidCharacterError';
throw new DOMException(e.message, code);
}
const buf = Buffer.from(input, 'latin1');
return buf.toString('base64');
return ret;
}

const kBase64Digits =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';

function atob(input) {
// TODO(@jasnell): The implementation here has not been performance
// optimized in any way.
input = `${input}`;
for (let n = 0; n < input.length; n++) {
if (!kBase64Digits.includes(input[n]))
lazyInvalidCharError();
let ret;
try {
ret = _atob(input);
} catch (e) {
if (DOMException === undefined)
DOMException = internalBinding('messaging').DOMException;
let code = 'Error';
if (e.code === 'ERR_INVALID_CHARACTER') code = 'InvalidCharacterError';
throw new DOMException(e.message, code);
}
return Buffer.from(input, 'base64').toString('latin1');
return ret;
}

module.exports = {
Expand Down
74 changes: 68 additions & 6 deletions src/base64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,21 @@ bool base64_decode_group_slow(char* const dst, const size_t dstlen,
#endif

template <typename TypeName>
size_t base64_decode_fast(char* const dst, const size_t dstlen,
const TypeName* const src, const size_t srclen,
const size_t decoded_size) {
size_t base64_decode_fast(char* const dst,
const size_t dstlen,
const TypeName* const src,
size_t srclen,
const size_t decoded_size,
bool* succ,
const bool strict) {
CHECK(!strict || succ != nullptr);

if (static_cast<unsigned char>(src[srclen - 1]) == '=') {
srclen--;
if (static_cast<unsigned char>(src[srclen - 1]) == '=') srclen--;
}

if (succ != nullptr) *succ = true;
const size_t available = dstlen < decoded_size ? dstlen : decoded_size;
const size_t max_k = available / 3 * 3;
size_t max_i = srclen / 4 * 4;
Expand All @@ -79,6 +91,10 @@ size_t base64_decode_fast(char* const dst, const size_t dstlen,
const uint32_t v = ReadUint32BE(txt);
// If MSB is set, input contains whitespace or is not valid base64.
if (v & 0x80808080) {
if (strict) {
*succ = false;
return 0;
}
if (!base64_decode_group_slow(dst, dstlen, src, srclen, &i, &k))
return k;
max_i = i + (srclen - i) / 4 * 4; // Align max_i again.
Expand All @@ -90,9 +106,50 @@ size_t base64_decode_fast(char* const dst, const size_t dstlen,
k += 3;
}
}

if (i < srclen && k < dstlen) {
#define CHECK_INVALID(x) \
if (txt[(x)] & 0x80) { \
if (strict) { \
*succ = false; \
return 0; \
} else { \
break; \
} \
}

int leftover = srclen - i;
unsigned char txt[] = {0, 0, 0, 0};
switch (leftover) {
case 4:
txt[3] = static_cast<unsigned char>(
unbase64(static_cast<uint8_t>(src[i + 3])));
CHECK_INVALID(3);
// DO NOT break;

case 3:
txt[2] = static_cast<unsigned char>(
unbase64(static_cast<uint8_t>(src[i + 2])));
CHECK_INVALID(2);
// DO NOT break;

case 2:
txt[1] = static_cast<unsigned char>(
unbase64(static_cast<uint8_t>(src[i + 1])));
CHECK_INVALID(1);
// DO NOT break;

default:
txt[0] = static_cast<unsigned char>(
unbase64(static_cast<uint8_t>(src[i + 0])));
CHECK_INVALID(0);
}

#undef CHECK_INVALID

base64_decode_group_slow(dst, dstlen, src, srclen, &i, &k);
}

return k;
}

Expand All @@ -113,10 +170,15 @@ size_t base64_decoded_size(const TypeName* src, size_t size) {


template <typename TypeName>
size_t base64_decode(char* const dst, const size_t dstlen,
const TypeName* const src, const size_t srclen) {
size_t base64_decode(char* const dst,
const size_t dstlen,
const TypeName* const src,
const size_t srclen,
bool* succ,
const bool strict) {
const size_t decoded_size = base64_decoded_size(src, srclen);
return base64_decode_fast(dst, dstlen, src, srclen, decoded_size);
return base64_decode_fast(
dst, dstlen, src, srclen, decoded_size, succ, strict);
}


Expand Down
8 changes: 6 additions & 2 deletions src/base64.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ template <typename TypeName>
size_t base64_decoded_size(const TypeName* src, size_t size);

template <typename TypeName>
size_t base64_decode(char* const dst, const size_t dstlen,
const TypeName* const src, const size_t srclen);
size_t base64_decode(char* const dst,
const size_t dstlen,
const TypeName* const src,
size_t srclen,
bool* succ = nullptr,
const bool strict = false);

inline size_t base64_encode(const char* src,
size_t slen,
Expand Down
82 changes: 82 additions & 0 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,82 @@ void GetZeroFillToggle(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(Uint32Array::New(ab, 0, 1));
}

void AToB(const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
Local<String> input = args[0].As<String>();

// Check latin
if (!input->IsOneByte()) {
env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate()));
return;
}

node::Utf8Value input_utf8_value(isolate, input);

std::string input_str = input_utf8_value.ToString();

size_t max_dest_len = base64_decoded_size_fast(input_str.length());
char* dest = static_cast<char*>(malloc(max_dest_len * sizeof(char)));
bool succ;
int dest_len = base64_decode(dest,
max_dest_len,
input_str.c_str(),
input_str.length(),
&succ,
true /* strict */);

if (!succ) {
free(dest);
env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate()));
return;
}

MaybeLocal<String> maybe_dest =
String::NewFromOneByte(isolate,
reinterpret_cast<const unsigned char*>(dest),
v8::NewStringType::kNormal,
dest_len);
CHECK(!maybe_dest.IsEmpty());
args.GetReturnValue().Set(maybe_dest.ToLocalChecked());

free(dest);
}

void BToA(const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
Local<String> input = args[0].As<String>();

// Check latin
if (!input->IsOneByte()) {
env->isolate()->ThrowException(ERR_INVALID_CHARACTER(env->isolate()));
return;
}

node::Utf8Value input_utf8_value(isolate, input);

std::string input_str = input_utf8_value.ToString();

size_t max_dest_len = base64_encoded_size(input_str.length());
char* dest = static_cast<char*>(malloc(max_dest_len * sizeof(char)));

int dest_len =
base64_encode(input_str.c_str(), input_str.length(), dest, max_dest_len);

MaybeLocal<String> maybe_dest =
String::NewFromOneByte(isolate,
reinterpret_cast<const unsigned char*>(dest),
v8::NewStringType::kNormal,
dest_len);
CHECK(!maybe_dest.IsEmpty());
args.GetReturnValue().Set(maybe_dest.ToLocalChecked());

free(dest);
}

void Initialize(Local<Object> target,
Local<Value> unused,
Local<Context> context,
Expand Down Expand Up @@ -1209,6 +1285,9 @@ void Initialize(Local<Object> target,

env->SetMethod(target, "getZeroFillToggle", GetZeroFillToggle);

env->SetMethod(target, "atob", AToB);
env->SetMethod(target, "btoa", BToA);

Blob::Initialize(env, target);
}

Expand Down Expand Up @@ -1251,6 +1330,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(StringWrite<UTF8>);
registry->Register(GetZeroFillToggle);

registry->Register(AToB);
registry->Register(BToA);

Blob::RegisterExternalReferences(registry);
FixedSizeBlobCopyJob::RegisterExternalReferences(registry);
}
Expand Down
2 changes: 2 additions & 0 deletions src/node_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void OnFatalError(const char* location, const char* message);
V(ERR_INVALID_ARG_VALUE, TypeError) \
V(ERR_OSSL_EVP_INVALID_DIGEST, Error) \
V(ERR_INVALID_ARG_TYPE, TypeError) \
V(ERR_INVALID_CHARACTER, RangeError) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DOMException are expected in atob/btoa per https://html.spec.whatwg.org/multipage/webappapis.html#dom-atob

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's wrapped in JavaScript side.

btw, this PR seems won't be merged according to @addaleax's suggestion.

V(ERR_INVALID_MODULE, Error) \
V(ERR_INVALID_THIS, TypeError) \
V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \
Expand Down Expand Up @@ -145,6 +146,7 @@ ERRORS_WITH_CODE(V)
V(ERR_EXECUTION_ENVIRONMENT_NOT_AVAILABLE, \
"Context not associated with Node.js environment") \
V(ERR_INVALID_ADDRESS, "Invalid socket address") \
V(ERR_INVALID_CHARACTER, "Invalid character") \
V(ERR_INVALID_MODULE, "No such module") \
V(ERR_INVALID_THIS, "Value of \"this\" is the wrong type") \
V(ERR_INVALID_TRANSFER_OBJECT, "Found invalid object in transferList") \
Expand Down
35 changes: 35 additions & 0 deletions test/parallel/test-btoa-atob.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
'use strict';

require('../common');
const assert = require('assert');

let s1 = '';
for (let i = 0; i < 256; ++i) s1 += String.fromCharCode(i);
const s1B64 = Buffer.from(s1).toString('base64');

assert.strictEqual(btoa(s1), s1B64);

const s2 = 'hello world';
const s2B64 = Buffer.from(s2).toString('base64');
assert.strictEqual(btoa(s2), s2B64);

const s3 = 'BlingBling...';
const s3B64 = Buffer.from(s3).toString('base64');
assert.strictEqual(btoa(s3), s3B64);

const s4 = '哇咔咔';
const s4B64 = Buffer.from(s4).toString('base64');
assert.throws(() => { btoa(s4); }, {
name: 'InvalidCharacterError',
message: 'Invalid character',
code: 5,
});

assert.strictEqual(atob(s1B64),
Buffer.from(s1B64, 'base64').toString('latin1'));
assert.strictEqual(atob(s2B64),
Buffer.from(s2B64, 'base64').toString('latin1'));
assert.strictEqual(atob(s3B64),
Buffer.from(s3B64, 'base64').toString('latin1'));
assert.strictEqual(atob(s4B64),
Buffer.from(s4B64, 'base64').toString('latin1'));