Skip to content

Commit

Permalink
[mono][interp] Super instruction tweaks (#99319)
Browse files Browse the repository at this point in the history
* [mono][interp] Remove most of hardcoded LDC instructions

They have little benefit. Also fix oversight which resulted in using MINT_LDC_I4_S only for byte sized integers.

* [mono][interp] Refactor fetching of immediate value for superinstructions

Make `get_sreg_imm` more flexible, by returning the immediate value and the mint type that this value can fit in.
Simplify return immediate handling by also applying conversion to immediate value, if necessary.

* [mono][interp] Avoid running super instruction pass more than once

We will avoid running it if we know will do another iteration later (especially useful when first iteration is ssa disabled). It can still happen for it to run multiple times (when bblock optimization requests retry), but this should be very rare. This will save compilation time and should also remove the necessity of handling super instructions in constant folding pass.

Reverts #99055 since it is no longer necessary.

* [mono][interp] Add a few super instructions with 32 bit immediates

Also add instructions for and/or + immediates.

* [mono][interp] Fix MINT_SHL_AND_ generation

and + ldc will already be transformed to a superinstruction, so this can also be simplified.

* Implement new superinsns in jiterpreter

* [mono][interp] Add explicit cast for clarity

---------

Co-authored-by: Katelyn Gadd <[email protected]>
Co-authored-by: Larry Ewing <[email protected]>
  • Loading branch information
3 people authored Mar 7, 2024
1 parent b0f6444 commit d88ed80
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 184 deletions.
18 changes: 10 additions & 8 deletions src/mono/browser/runtime/jiterpreter-tables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,8 @@ import {
} from "./mintops";

export const ldcTable: { [opcode: number]: [WasmOpcode, number] } = {
[MintOpcode.MINT_LDC_I4_M1]: [WasmOpcode.i32_const, -1],
[MintOpcode.MINT_LDC_I4_0]: [WasmOpcode.i32_const, 0],
[MintOpcode.MINT_LDC_I4_1]: [WasmOpcode.i32_const, 1],
[MintOpcode.MINT_LDC_I4_2]: [WasmOpcode.i32_const, 2],
[MintOpcode.MINT_LDC_I4_3]: [WasmOpcode.i32_const, 3],
[MintOpcode.MINT_LDC_I4_4]: [WasmOpcode.i32_const, 4],
[MintOpcode.MINT_LDC_I4_5]: [WasmOpcode.i32_const, 5],
[MintOpcode.MINT_LDC_I4_6]: [WasmOpcode.i32_const, 6],
[MintOpcode.MINT_LDC_I4_7]: [WasmOpcode.i32_const, 7],
[MintOpcode.MINT_LDC_I4_8]: [WasmOpcode.i32_const, 8],
};

// operator, loadOperator, storeOperator
Expand Down Expand Up @@ -93,6 +85,16 @@ export const unopTable: { [opcode: number]: OpRec3 | undefined } = {
[MintOpcode.MINT_CLZ_I8]: [WasmOpcode.i64_clz, WasmOpcode.i64_load, WasmOpcode.i64_store],
[MintOpcode.MINT_CTZ_I8]: [WasmOpcode.i64_ctz, WasmOpcode.i64_load, WasmOpcode.i64_store],
[MintOpcode.MINT_POPCNT_I8]: [WasmOpcode.i64_popcnt, WasmOpcode.i64_load, WasmOpcode.i64_store],

[MintOpcode.MINT_ADD_I4_IMM2]: [WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_MUL_I4_IMM2]: [WasmOpcode.i32_mul, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_ADD_I8_IMM2]: [WasmOpcode.i64_add, WasmOpcode.i64_load, WasmOpcode.i64_store],
[MintOpcode.MINT_MUL_I8_IMM2]: [WasmOpcode.i64_mul, WasmOpcode.i64_load, WasmOpcode.i64_store],

[MintOpcode.MINT_AND_I4_IMM]: [WasmOpcode.i32_and, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_AND_I4_IMM2]: [WasmOpcode.i32_and, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_OR_I4_IMM]: [WasmOpcode.i32_or, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_OR_I4_IMM2]: [WasmOpcode.i32_or, WasmOpcode.i32_load, WasmOpcode.i32_store],
};

// HACK: Generating correct wasm for these is non-trivial so we hand them off to C.
Expand Down
18 changes: 17 additions & 1 deletion src/mono/browser/runtime/jiterpreter-trace-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1525,7 +1525,7 @@ export function generateWasmBody(
} else
ip = abort;
} else if (
(opcode >= MintOpcode.MINT_LDC_I4_M1) &&
(opcode >= MintOpcode.MINT_LDC_I4_0) &&
(opcode <= MintOpcode.MINT_LDC_R8)
) {
if (!emit_ldc(builder, ip, opcode))
Expand Down Expand Up @@ -2610,6 +2610,8 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):

case MintOpcode.MINT_ADD_I4_IMM:
case MintOpcode.MINT_MUL_I4_IMM:
case MintOpcode.MINT_AND_I4_IMM:
case MintOpcode.MINT_OR_I4_IMM:
case MintOpcode.MINT_SHL_I4_IMM:
case MintOpcode.MINT_SHR_I4_IMM:
case MintOpcode.MINT_SHR_UN_I4_IMM:
Expand All @@ -2619,6 +2621,14 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
builder.i32_const(getArgI16(ip, 3));
break;

case MintOpcode.MINT_ADD_I4_IMM2:
case MintOpcode.MINT_MUL_I4_IMM2:
case MintOpcode.MINT_AND_I4_IMM2:
case MintOpcode.MINT_OR_I4_IMM2:
append_ldloc(builder, getArgU16(ip, 2), loadOp);
builder.i32_const(getArgI32(ip, 3));
break;

case MintOpcode.MINT_ADD_I8_IMM:
case MintOpcode.MINT_MUL_I8_IMM:
case MintOpcode.MINT_SHL_I8_IMM:
Expand All @@ -2630,6 +2640,12 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
builder.i52_const(getArgI16(ip, 3));
break;

case MintOpcode.MINT_ADD_I8_IMM2:
case MintOpcode.MINT_MUL_I8_IMM2:
append_ldloc(builder, getArgU16(ip, 2), loadOp);
builder.i52_const(getArgI32(ip, 3));
break;

default:
append_ldloc(builder, getArgU16(ip, 2), loadOp);
break;
Expand Down
63 changes: 36 additions & 27 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3997,36 +3997,13 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
MINT_IN_BREAK;
}

#define LDC(n) do { LOCAL_VAR (ip [1], gint32) = (n); ip += 2; } while (0)
MINT_IN_CASE(MINT_LDC_I4_M1)
LDC(-1);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_0)
LDC(0);
LOCAL_VAR (ip [1], gint32) = 0;
ip += 2;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_1)
LDC(1);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_2)
LDC(2);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_3)
LDC(3);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_4)
LDC(4);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_5)
LDC(5);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_6)
LDC(6);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_7)
LDC(7);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_8)
LDC(8);
LOCAL_VAR (ip [1], gint32) = 1;
ip += 2;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_LDC_I4_S)
LOCAL_VAR (ip [1], gint32) = (short)ip [2];
Expand Down Expand Up @@ -5308,6 +5285,10 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_ADD_I4_IMM2)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + (gint32)READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_ADD1_I8)
LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + 1;
ip += 3;
Expand All @@ -5316,6 +5297,10 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_ADD_I8_IMM2)
LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + (gint32)READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_SUB_I4)
BINOP(gint32, -);
MINT_IN_BREAK;
Expand Down Expand Up @@ -5346,10 +5331,18 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) * (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_MUL_I4_IMM2)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) * (gint32)READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_MUL_I8_IMM)
LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_MUL_I8_IMM2)
LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint32)READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_ADD_MUL_I4_IMM)
LOCAL_VAR (ip [1], gint32) = (LOCAL_VAR (ip [2], gint32) + (gint16)ip [3]) * (gint16)ip [4];
ip += 5;
Expand Down Expand Up @@ -5457,12 +5450,28 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
MINT_IN_CASE(MINT_AND_I4)
BINOP(gint32, &);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_AND_I4_IMM)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) & (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_AND_I4_IMM2)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) & READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_AND_I8)
BINOP(gint64, &);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_OR_I4)
BINOP(gint32, |);
MINT_IN_BREAK;
MINT_IN_CASE(MINT_OR_I4_IMM)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) | (gint16)ip [3];
ip += 4;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_OR_I4_IMM2)
LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) | READ32 (ip + 3);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_OR_I8)
BINOP(gint64, |);
MINT_IN_BREAK;
Expand Down
8 changes: 5 additions & 3 deletions src/mono/mono/mini/interp/jiterpreter-opcode-values.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ OPRANGE(MINT_RET_I4_IMM, MINT_RET_I8_IMM, ABORT_OUTSIDE_BRANCH_BLOCK_NONE)

// High value because interp has to do a memory load for the immediate
// but we can inline it into the trace
OPRANGE(MINT_LDC_I4_M1, MINT_LDC_R8, HIGH)
OPRANGE(MINT_LDC_I4_0, MINT_LDC_R8, HIGH)

OPRANGE(MINT_MOV_I4_I1, MINT_MOV_4, NORMAL)
// High value for large/complex moves
OPRANGE(MINT_MOV_8, MINT_MOV_8_4, HIGH)

// Binops. Assume most of them are not any faster in jiterp
OPRANGE(MINT_ADD_I4, MINT_CLT_UN_R8, NORMAL)
// Unops and some superinsns. Most will not be faster in jiterp.
OPRANGE(MINT_ADD1_I4, MINT_SHR_I8_IMM, NORMAL)
// Unops. Most will not be faster in jiterp.
OPRANGE(MINT_ADD1_I4, MINT_CEQ0_I4, NORMAL)
// Some superinsns that will be faster in jiterp due to inline constants
OPRANGE(MINT_ADD_I4_IMM, MINT_ADD_MUL_I8_IMM, HIGH)
// Math intrinsics. We implement most of these by calling libc or using wasm opcodes
OPRANGE(MINT_ASIN, MINT_MAXF, NORMAL)
// Field operations. Null check optimization makes these more efficient than interp
Expand Down
18 changes: 10 additions & 8 deletions src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,8 @@ OPDEF(MINT_RET_U1, "ret.u1", 2, 0, 1, MintOpNoArgs)
OPDEF(MINT_RET_I2, "ret.i2", 2, 0, 1, MintOpNoArgs)
OPDEF(MINT_RET_U2, "ret.u2", 2, 0, 1, MintOpNoArgs)

OPDEF(MINT_LDC_I4_M1, "ldc.i4.m1", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_0, "ldc.i4.0", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_1, "ldc.i4.1", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_2, "ldc.i4.2", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_3, "ldc.i4.3", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_4, "ldc.i4.4", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_5, "ldc.i4.5", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_6, "ldc.i4.6", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_7, "ldc.i4.7", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_8, "ldc.i4.8", 2, 1, 0, MintOpNoArgs)
OPDEF(MINT_LDC_I4_S, "ldc.i4.s", 3, 1, 0, MintOpShortInt)
OPDEF(MINT_LDC_I4, "ldc.i4", 4, 1, 0, MintOpInt)

Expand Down Expand Up @@ -654,10 +646,20 @@ OPDEF(MINT_RET_I4_IMM, "ret.i4.imm", 2, 0, 0, MintOpShortInt)
OPDEF(MINT_RET_I8_IMM, "ret.i8.imm", 2, 0, 0, MintOpShortInt)

OPDEF(MINT_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_ADD_I4_IMM2, "add.i4.imm2", 5, 1, 1, MintOpInt)
OPDEF(MINT_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_ADD_I8_IMM2, "add.i8.imm2", 5, 1, 1, MintOpInt)

OPDEF(MINT_MUL_I4_IMM, "mul.i4.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_MUL_I4_IMM2, "mul.i4.imm2", 5, 1, 1, MintOpInt)
OPDEF(MINT_MUL_I8_IMM, "mul.i8.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_MUL_I8_IMM2, "mul.i8.imm2", 5, 1, 1, MintOpInt)

OPDEF(MINT_AND_I4_IMM, "and.i4.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_AND_I4_IMM2, "and.i4.imm2", 5, 1, 1, MintOpInt)

OPDEF(MINT_OR_I4_IMM, "or.i4.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_OR_I4_IMM2, "or.i4.imm2", 5, 1, 1, MintOpInt)

OPDEF(MINT_SHR_UN_I4_IMM, "shr.un.i4.imm", 4, 1, 1, MintOpShortInt)
OPDEF(MINT_SHR_UN_I8_IMM, "shr.un.i8.imm", 4, 1, 1, MintOpShortInt)
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ typedef enum {
#define MINT_IS_SUPER_BRANCH(op) ((op) >= MINT_BRFALSE_I4_SP && (op) <= MINT_BLT_UN_I8_IMM_SP)
#define MINT_IS_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_JIT_CALL)
#define MINT_IS_PATCHABLE_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_VCALL)
#define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_M1 && (op) <= MINT_LDC_I4)
#define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_0 && (op) <= MINT_LDC_I4)
#define MINT_IS_LDC_I8(op) ((op) >= MINT_LDC_I8_0 && (op) <= MINT_LDC_I8)
#define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CEQ0_I4)
#define MINT_IS_BINOP(op) ((op) >= MINT_ADD_I4 && (op) <= MINT_CLT_UN_R8)
Expand Down
Loading

0 comments on commit d88ed80

Please sign in to comment.