Skip to content

Commit

Permalink
⚡ 优化了encoding-hex的性能,现在比 @std/encoding 还快
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaubee committed Oct 10, 2024
1 parent a07e437 commit fa9f14a
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 43 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

###### 简介

1. 个人项目经验,主要特点在于对原型链的扩展,以及对异步的简化。
1. 个人推荐优先学习 jsr 上,deno 官方推出的 @std 系列标准库。
> 这个库中与 @std 系列有一些交集,交集的部分通常是我个人觉得我自己的实现更好。
1. 个人项目经验,涵盖大量常用函数集合,建议与 deno 的 [`@std/*`](https://jsr.io/@std) 互为补充。
> 这个库中与 @std 系列有一些交集,交集的部分通常是我个人觉得我自己的实现更好:
>
> - 可能是性能上更好
> - 可能是使用体验上更易用好用
> - 可能是条件边界覆盖更全。
1. 这个库不会包含垫片相关的,只要在 esnext 范畴内我都会使用,所以请自行处理垫片相关的问题

###### 关于 @gaubee/util/global
Expand Down
2 changes: 1 addition & 1 deletion deno.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@gaubee/util",
"version": "0.7.1",
"version": "0.7.2",
"tasks": {
"build": "deno run -A ./dnt.ts",
"npm": "deno task build && deno task pub-npm",
Expand Down
81 changes: 81 additions & 0 deletions src/encoding-base64.bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
const base64_string = "68656c6c6f20776f726c64";
Deno.bench("str_to_base64_binary_v1", () => {
str_to_base64_binary_v1(base64_string);
});
Deno.bench("str_to_base64_binary_v2", () => {
str_to_base64_binary_v2(base64_string);
});
Deno.bench("str_to_base64_binary_v3", () => {
str_to_base64_binary_v3(base64_string);
});
Deno.bench("str_to_base64_binary_v4", () => {
str_to_base64_binary_v4(base64_string);
});

const str_to_base64_binary_v1 = (str: string): Uint8Array => {
const binaryString = atob(str);
const uint8Array = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
uint8Array[i] = binaryString.charCodeAt(i);
}
return uint8Array;
};
const encoder = new TextEncoder();
const str_to_base64_binary_v2 = (str: string): Uint8Array => {
const binaryString = atob(str);
const uint8Array = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
uint8Array[i] = base64_binary_table[binaryString[i]];
}
return uint8Array;
};
const base64_binary_table = Object.freeze(
Array.from({ length: 256 }).reduce((table: Record<string, number>, _, index) => {
table[String.fromCharCode(index)] = index;
return table;
}, {}),
);

const str_to_base64_binary_v3 = (str: string): Uint8Array => {
const utf8_arr = encoder.encode(atob(str));
const base64_arr: number[] = [];
for (let i = 0, pos = 0; i < utf8_arr.length; i++) {
const c = utf8_arr[i];
if (c <= 127) {
base64_arr[pos++] = c;
} else if (c === 194) {
base64_arr[pos++] = utf8_arr[++i];
} else if (c === 195) {
base64_arr[pos++] = utf8_arr[++i] + 64;
}
}
return new Uint8Array(base64_arr);
};

const utf8_arr_v4_cache = new Uint8Array(1000);
const str_to_base64_binary_v4 = (str: string): Uint8Array => {
const encodingResult = encoder.encodeInto(atob(str), utf8_arr_v4_cache);
const base64_arr = new Uint8Array(encodingResult.read);
for (let i = 0, pos = 0; i < encodingResult.written; i++) {
const c = utf8_arr_v4_cache[i];
if (c <= 127) {
base64_arr[pos++] = c;
} else if (c === 194) {
base64_arr[pos++] = utf8_arr_v4_cache[++i];
} else if (c === 195) {
base64_arr[pos++] = utf8_arr_v4_cache[++i] + 64;
}
}
return base64_arr;
};

// console.log(Object.keys(base64_binary_table));
// for (const key in base64_binary_table) {
// console.log(key, base64_binary_table[key], Array.from(encoder.encode(key)));
// }
// console.log(encoder.encode(Object.keys(base64_binary_table).join("")));

// console.log(str_to_base64_binary_v1(base64_string));
// console.log(str_to_base64_binary_v2(base64_string));
// console.log(str_to_base64_binary_v3(base64_string));
// console.log(str_to_base64_binary_v4(base64_string));
188 changes: 188 additions & 0 deletions src/encoding-hex.bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/**
* v1 是 最简单的版本,但已经使用上了缓存技术
* v2 是 deno 的 @std/encoding 的版本
* v3 是 针对性优化版本,优化了缓存访问速度;同时增大了缓存的量
*
* CPU | 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
* Runtime | Deno 1.46.3 (x86_64-pc-windows-msvc)
*
* file:///D:/dev/GitHub/gaubee-util/src/encoding-hex.bench.ts
*
* benchmark time/iter (avg) iter/s (min … max) p75 p99 p995
* ------------------------- ----------------------------- --------------------- --------------------------
* binary_to_hex_string_v1 728.7 ns 1,372,000 (636.3 ns … 1.0 µs) 749.1 ns 1.0 µs 1.0 µs
* binary_to_hex_string_v2 705.8 ns 1,417,000 (622.7 ns … 882.4 ns) 724.0 ns 882.4 ns 882.4 ns
* binary_to_hex_string_v3 352.9 ns 2,834,000 (305.8 ns … 697.5 ns) 355.2 ns 658.5 ns 697.5 ns
* str_to_hex_binary_v1 3.1 µs 317,600 ( 2.9 µs … 3.7 µs) 3.2 µs 3.7 µs 3.7 µs
* str_to_hex_binary_v2 1.2 µs 825,100 ( 1.1 µs … 1.7 µs) 1.2 µs 1.7 µs 1.7 µs
* str_to_hex_binary_v3 616.1 ns 1,623,000 (488.9 ns … 1.0 µs) 647.2 ns 1.0 µs 1.0 µs
*
* 可以看到 v3 性能基本是比 v2 快一倍
* 项目中还需要考虑到综合启动速度,所以会在 v3 的基础上,做一些惰性缓存,也即是第一次执行才会激活缓存
*/

const test_u8a = crypto.getRandomValues(new Uint8Array(70));
const test_hex = Array.from(test_u8a).map((v) => v.toString(16).padStart(2, "0")).join("");
Deno.bench("binary_to_hex_string_v1", () => {
binary_to_hex_string_v1(test_u8a);
});
Deno.bench("binary_to_hex_string_v2", () => {
binary_to_hex_string_v2(test_u8a);
});
Deno.bench("binary_to_hex_string_v3", () => {
binary_to_hex_string_v3(test_u8a);
});
Deno.bench("str_to_hex_binary_v1", () => {
str_to_hex_binary_v1(test_hex);
});
Deno.bench("str_to_hex_binary_v2", () => {
str_to_hex_binary_v2(test_hex);
});
Deno.bench("str_to_hex_binary_v3", () => {
str_to_hex_binary_v3(test_hex);
});
/**
* 将二进制转成 hex(小写) 字符串
*/
const binary_to_hex_string_v1 = (u8a: Uint8Array): string => {
let binaryString = "";
for (const byte of u8a) {
binaryString += binary_hex8_table[byte];
}
return binaryString;
};
const binary_hex8_table = Object.freeze(Array.from({ length: 256 }, (_, index) => {
return index.toString(16).padStart(2, "0");
}));

/**
* 将二进制转成 hex(小写) 字符串
*/
const binary_to_hex_string_v3 = (u8a: Uint8Array): string => {
let binaryString = "";
const len = u8a.length;
const end = (len % 2 === 1) ? len - 1 : len;
for (let i = 0; i < end; i += 2) {
binaryString += binary_hex16_table[(u8a[i + 0] << 8) + u8a[i + 1]];
}
if (len !== end) {
binaryString += binary_hex8_table[u8a[end]];
}
return binaryString;
};
const binary_hex16_table = Object.freeze(Array.from({ length: 2 ** 16 }, (_, index) => {
return index.toString(16).padStart(4, "0");
}));
/**
* 将 hex(小写) 字符串转成二进制
*/
const str_to_hex_binary_v1 = (str: string): Uint8Array => {
const uint8Array = new Uint8Array(str.length / 2);
for (let i = 0; i < uint8Array.length; i++) {
const str_index = i * 2;
//uint8Array[i] =
hex8_binary_table[str[str_index] + str[str_index + 1]];
}
return uint8Array;
};
const hex8_binary_table = Object.freeze(
Array.from({ length: 256 }).reduce((table: Record<string, number>, _, index) => {
table[binary_hex8_table[index]] = index;
return table;
}, {}),
);

/**
* 将 hex(小写) 字符串转成二进制
*/
const str_to_hex_binary_v3 = (str: string): Uint8Array => {
const uint8Array = new Uint8Array(str.length / 2);
for (let i = 0; i < uint8Array.length; i++) {
const str_index = i * 2;
uint8Array[i] = hex8_binary_ver_table[str.charCodeAt(str_index)][str.charCodeAt(str_index + 1)];
// hex8_binary_table[str[str_index] + str[str_index + 1]];
}
return uint8Array;
};
const hex8_binary_ver_table: number[][] = [];
for (let a = 0; a < 16; a++) {
const bs: number[] = hex8_binary_ver_table[a.toString(16).charCodeAt(0)] = [];
for (let b = 0; b < 16; b++) {
bs[b.toString(16).charCodeAt(0)] = (a << 4) + b;
}
}

const hexTable = new TextEncoder().encode("0123456789abcdef");
const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder();

function binary_to_hex_string_v2(u8: Uint8Array): string {
const dst = new Uint8Array(u8.length * 2);
for (let i = 0; i < u8.length; i++) {
const v = u8[i]!;
dst[i * 2] = hexTable[v >> 4]!;
dst[i * 2 + 1] = hexTable[v & 0x0f]!;
}
return textDecoder.decode(dst);
}
function str_to_hex_binary_v2(src: string): Uint8Array {
const u8 = textEncoder.encode(src);
const dst = new Uint8Array(u8.length / 2);
for (let i = 0; i < dst.length; i++) {
const a = fromHexChar(u8[i * 2]!);
const b = fromHexChar(u8[i * 2 + 1]!);
dst[i] = (a << 4) | b;
}

if (u8.length % 2 === 1) {
// Check for invalid char before reporting bad length,
// since the invalid char (if present) is an earlier problem.
fromHexChar(u8[dst.length * 2]!);
throw errLength(u8.length);
}

return dst;
}
/** Converts a hex character into its value. */
function fromHexChar(byte: number): number {
// '0' <= byte && byte <= '9'
if (48 <= byte && byte <= 57) return byte - 48;
// 'a' <= byte && byte <= 'f'
if (97 <= byte && byte <= 102) return byte - 97 + 10;
// 'A' <= byte && byte <= 'F'
if (65 <= byte && byte <= 70) return byte - 65 + 10;

throw errInvalidByte(byte);
}
function errInvalidByte(byte: number) {
return new TypeError(`Invalid byte '${String.fromCharCode(byte)}'`);
}

function errLength(len: number) {
return new RangeError(
`Cannot decode the hex string as the input length should be even: length is ${len}`,
);
}

if (import.meta.main) {
console.log(binary_to_hex_string_v1(test_u8a));
console.log(binary_to_hex_string_v2(test_u8a));
console.log(binary_to_hex_string_v3(test_u8a));

console.log(str_to_hex_binary_v1(test_hex));
console.log(str_to_hex_binary_v2(test_hex));
console.log(str_to_hex_binary_v3(test_hex));

// for (const key in hex8_binary_table) {
// console.log(
// key,
// key.charCodeAt(0),
// key.charCodeAt(1),
// key.charCodeAt(0) + key.charCodeAt(1),
// key,
// hex8_binary_table[key],
// );
// }
}
// 9 57
// a 96 % => 58
27 changes: 0 additions & 27 deletions src/encoding.bench.ts

This file was deleted.

Loading

0 comments on commit fa9f14a

Please sign in to comment.