Skip to content

Commit

Permalink
[wasm] Shorter jiterp encoding for zero v128 constants; exploit natur…
Browse files Browse the repository at this point in the history
…al stack alignment (#88058)

* Add support for v128 trace locals
* Use a default-initialized local for zero v128 constants
* Attempt to exploit natural alignment for local loads and stores more often
  • Loading branch information
kg authored Jun 26, 2023
1 parent 5e71921 commit 369301f
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 39 deletions.
48 changes: 32 additions & 16 deletions src/mono/wasm/runtime/jiterpreter-support.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,22 @@ export class WasmBuilder {
this.i52_const(0);
this.appendSimd(WasmSimdOpcode.i64x2_splat);
*/
this.appendSimd(WasmSimdOpcode.v128_const);
for (let i = 0; i < 16; i++)
this.appendU8(0);
this.local("v128_zero");
} else if (typeof (value) === "object") {
mono_assert(value.byteLength === 16, "Expected v128_const arg to be 16 bytes in size");
this.appendSimd(WasmSimdOpcode.v128_const);
this.appendBytes(value);
let isZero = true;
for (let i = 0; i < 16; i++) {
if (value[i] !== 0)
isZero = false;
}

if (isZero) {
// mono_log_info("Detected that literal v128_const was zero");
this.local("v128_zero");
} else {
this.appendSimd(WasmSimdOpcode.v128_const);
this.appendBytes(value);
}
} else {
throw new Error("Expected v128_const arg to be 0 or a Uint8Array");
}
Expand Down Expand Up @@ -723,6 +732,7 @@ export class WasmBuilder {
counts[WasmValtype.i64] = 0;
counts[WasmValtype.f32] = 0;
counts[WasmValtype.f64] = 0;
counts[WasmValtype.v128] = 0;

for (const k in locals) {
const ty = locals[k];
Expand All @@ -734,34 +744,39 @@ export class WasmBuilder {
const offi32 = 0,
offi64 = counts[WasmValtype.i32],
offf32 = offi64 + counts[WasmValtype.i64],
offf64 = offf32 + counts[WasmValtype.f32];
offf64 = offf32 + counts[WasmValtype.f32],
offv128 = offf64 + counts[WasmValtype.f64];

counts[WasmValtype.i32] = 0;
counts[WasmValtype.i64] = 0;
counts[WasmValtype.f32] = 0;
counts[WasmValtype.f64] = 0;
counts[WasmValtype.v128] = 0;

for (const k in locals) {
const ty = locals[k];
let idx = 0;
let idx = 0, offset;
switch (ty) {
case WasmValtype.i32:
idx = (counts[ty]++) + offi32 + base;
this.locals.set(k, idx);
offset = offi32;
break;
case WasmValtype.i64:
idx = (counts[ty]++) + offi64 + base;
this.locals.set(k, idx);
offset = offi64;
break;
case WasmValtype.f32:
idx = (counts[ty]++) + offf32 + base;
this.locals.set(k, idx);
offset = offf32;
break;
case WasmValtype.f64:
idx = (counts[ty]++) + offf64 + base;
this.locals.set(k, idx);
offset = offf64;
break;
case WasmValtype.v128:
offset = offv128;
break;
default:
throw new Error(`Unimplemented valtype: ${ty}`);
}
idx = (counts[ty]++) + offset + base;
this.locals.set(k, idx);
// mono_log_info(`local ${k} ${locals[k]} -> ${idx}`);
}

Expand All @@ -780,7 +795,7 @@ export class WasmBuilder {
this.locals.clear();
this.branchTargets.clear();
let counts: any = {};
const tk = [WasmValtype.i32, WasmValtype.i64, WasmValtype.f32, WasmValtype.f64];
const tk = [WasmValtype.i32, WasmValtype.i64, WasmValtype.f32, WasmValtype.f64, WasmValtype.v128];

// We first assign the parameters local indices and then
// we assign the named locals indices, because parameters
Expand Down Expand Up @@ -1412,6 +1427,7 @@ export const enum WasmValtype {
i64 = 0x7E,
f32 = 0x7D,
f64 = 0x7C,
v128 = 0x7B,
}

let wasmTable: WebAssembly.Table | undefined;
Expand Down
69 changes: 63 additions & 6 deletions src/mono/wasm/runtime/jiterpreter-trace-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1721,6 +1721,67 @@ function append_branch_target_block(builder: WasmBuilder, ip: MintOpcodePtr, isB
builder.cfg.startBranchBlock(ip, isBackBranchTarget);
}

function computeMemoryAlignment(offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
// First, compute the best possible alignment
let alignment = 0;
if (offset % 16 === 0)
alignment = 4;
else if (offset % 8 === 0)
alignment = 3;
else if (offset % 4 === 0)
alignment = 2;
else if (offset % 2 === 0)
alignment = 1;

// stackval is 8 bytes. interp aligns the stack to 16 bytes for v128.
// wasm spec prohibits alignment higher than natural alignment, just to be annoying
switch (opcodeOrPrefix) {
case WasmOpcode.PREFIX_simd:
// For loads that aren't a regular v128 load, assume weird things might be happening with alignment
alignment = (
(simdOpcode === WasmSimdOpcode.v128_load) ||
(simdOpcode === WasmSimdOpcode.v128_store)
) ? Math.min(alignment, 4) : 0;
break;
case WasmOpcode.i64_load:
case WasmOpcode.f64_load:
case WasmOpcode.i64_store:
case WasmOpcode.f64_store:
alignment = Math.min(alignment, 3);
break;
case WasmOpcode.i64_load32_s:
case WasmOpcode.i64_load32_u:
case WasmOpcode.i64_store32:
case WasmOpcode.i32_load:
case WasmOpcode.f32_load:
case WasmOpcode.i32_store:
case WasmOpcode.f32_store:
alignment = Math.min(alignment, 2);
break;
case WasmOpcode.i64_load16_s:
case WasmOpcode.i64_load16_u:
case WasmOpcode.i32_load16_s:
case WasmOpcode.i32_load16_u:
case WasmOpcode.i64_store16:
case WasmOpcode.i32_store16:
alignment = Math.min(alignment, 1);
break;
case WasmOpcode.i64_load8_s:
case WasmOpcode.i64_load8_u:
case WasmOpcode.i32_load8_s:
case WasmOpcode.i32_load8_u:
case WasmOpcode.i64_store8:
case WasmOpcode.i32_store8:
alignment = 0;
break;
default:
alignment = 0;
break;
}

return alignment;
}

function append_ldloc(builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
builder.local("pLocals");
mono_assert(opcodeOrPrefix >= WasmOpcode.i32_load, () => `Expected load opcode but got ${opcodeOrPrefix}`);
Expand All @@ -1729,9 +1790,7 @@ function append_ldloc(builder: WasmBuilder, offset: number, opcodeOrPrefix: Wasm
// This looks wrong but I assure you it's correct.
builder.appendULeb(simdOpcode);
}
// stackval is 8 bytes, but pLocals might not be 8 byte aligned so we use 4
// wasm spec prohibits alignment higher than natural alignment, just to be annoying
const alignment = (simdOpcode !== undefined) || (opcodeOrPrefix > WasmOpcode.f64_load) ? 0 : 2;
const alignment = computeMemoryAlignment(offset, opcodeOrPrefix, simdOpcode);
builder.appendMemarg(offset, alignment);
}

Expand All @@ -1747,9 +1806,7 @@ function append_stloc_tail(builder: WasmBuilder, offset: number, opcodeOrPrefix:
// This looks wrong but I assure you it's correct.
builder.appendULeb(simdOpcode);
}
// stackval is 8 bytes, but pLocals might not be 8 byte aligned so we use 4
// wasm spec prohibits alignment higher than natural alignment, just to be annoying
const alignment = (simdOpcode !== undefined) || (opcodeOrPrefix > WasmOpcode.f64_store) ? 0 : 2;
const alignment = computeMemoryAlignment(offset, opcodeOrPrefix, simdOpcode);
builder.appendMemarg(offset, alignment);
invalidate_local(offset);
// HACK: Invalidate the second stack slot used by a simd vector
Expand Down
38 changes: 21 additions & 17 deletions src/mono/wasm/runtime/jiterpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -774,30 +774,34 @@ function generate_wasm(

builder.generateTypeSection();

const traceLocals : any = {
"disp": WasmValtype.i32,
"cknull_ptr": WasmValtype.i32,
"dest_ptr": WasmValtype.i32,
"src_ptr": WasmValtype.i32,
"memop_dest": WasmValtype.i32,
"memop_src": WasmValtype.i32,
"index": WasmValtype.i32,
"count": WasmValtype.i32,
"math_lhs32": WasmValtype.i32,
"math_rhs32": WasmValtype.i32,
"math_lhs64": WasmValtype.i64,
"math_rhs64": WasmValtype.i64,
"temp_f32": WasmValtype.f32,
"temp_f64": WasmValtype.f64,
"backbranched": WasmValtype.i32,
};
if (builder.options.enableSimd)
traceLocals["v128_zero"] = WasmValtype.v128;

let keep = true,
traceValue = 0;
builder.defineFunction(
{
type: "trace",
name: traceName,
export: true,
locals: {
"disp": WasmValtype.i32,
"cknull_ptr": WasmValtype.i32,
"dest_ptr": WasmValtype.i32,
"src_ptr": WasmValtype.i32,
"memop_dest": WasmValtype.i32,
"memop_src": WasmValtype.i32,
"index": WasmValtype.i32,
"count": WasmValtype.i32,
"math_lhs32": WasmValtype.i32,
"math_rhs32": WasmValtype.i32,
"math_lhs64": WasmValtype.i64,
"math_rhs64": WasmValtype.i64,
"temp_f32": WasmValtype.f32,
"temp_f64": WasmValtype.f64,
"backbranched": WasmValtype.i32,
}
locals: traceLocals
}, () => {
if (emitPadding) {
builder.appendU8(WasmOpcode.nop);
Expand Down

0 comments on commit 369301f

Please sign in to comment.