diff --git a/build.rs b/build.rs index 81ec0be6997f..513ea1278910 100644 --- a/build.rs +++ b/build.rs @@ -229,8 +229,12 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64"; } + // This is only implemented on aarch64. + ("simd", "simd_boolean") => { + return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "aarch64"; + } + // These tests have simd operators which aren't implemented yet. - ("simd", "simd_boolean") => return true, ("simd", "simd_f32x4_pmin_pmax") => return true, ("simd", "simd_f32x4_rounding") => return true, ("simd", "simd_f64x2_pmin_pmax") => return true, diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 2c16734f2766..053848975a02 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -2193,6 +2193,24 @@ pub(crate) fn define( .operands_out(vec![s]), ); + let a = &Operand::new("a", TxN); + let x = &Operand::new("x", Int); + + ig.push( + Inst::new( + "vhigh_bits", + r#" + Reduce a vector to a scalar integer. + + Return a scalar integer, consisting of the concatenation of the most significant bit + of each lane of ``a``. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![x]), + ); + let a = &Operand::new("a", &Int.as_bool()); let Cond = &Operand::new("Cond", &imm.intcc); let x = &Operand::new("x", Int); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index d422fdc24f36..05e76ff606e6 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1441,9 +1441,67 @@ impl MachInstEmit for Inst { }; let (u, opcode) = match op { VecLanesOp::Uminv => (0b1, 0b11010), + VecLanesOp::Addv => (0b0, 0b11011), }; sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn)); } + &Inst::VecShiftImm { + op, + rd, + rn, + size, + imm, + } => { + let (is_shr, template) = match op { + VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32), + }; + let imm = imm as u32; + // Deal with the somewhat strange encoding scheme for, and limits on, + // the shift amount. + let immh_immb = match (size, is_shr) { + (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => { + 0b_1000_000_u32 | (64 - imm) + } + (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => { + 0b_0100_000_u32 | (32 - imm) + } + (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => { + 0b_0010_000_u32 | (16 - imm) + } + (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => { + 0b_0001_000_u32 | (8 - imm) + } + (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm, + (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm, + (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm, + (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm, + _ => panic!( + "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}", + op, size, imm + ), + }; + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); + } + &Inst::VecExtract { rd, rn, rm, imm4 } => { + if imm4 < 16 { + let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32; + let rm_enc = machreg_to_vec(rm); + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4( + template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc, + ); + } else { + panic!( + "aarch64: Inst::VecExtract: emit: invalid extract index {}", + imm4 + ); + } + } &Inst::VecTbl { rd, rn, @@ -1827,6 +1885,7 @@ impl MachInstEmit for Inst { debug_assert!(!size.is_128bits()); (0b001_01110_00_1 | enc_size << 1, 0b100000) } + VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), }; let top11 = if is_float { top11 | enc_float_size << 1 diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 6d981c2eaa93..0f6a0b115b76 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -3175,6 +3175,54 @@ fn test_aarch64_binemit() { "umlal v9.2d, v20.2s, v17.2s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "9039014E", + "zip1 v16.16b, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A239464E", + "zip1 v2.8h, v13.8h, v6.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88398E4E", + "zip1 v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(9), + rn: vreg(20), + rm: vreg(17), + size: VectorSize::Size64x2, + }, + "893AD14E", + "zip1 v9.2d, v20.2d, v17.2d", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, @@ -3461,6 +3509,168 @@ fn test_aarch64_binemit() { "uminv s18, v4.4s", )); + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(2), + rn: vreg(29), + size: VectorSize::Size8x16, + }, + "A2BB314E", + "addv b2, v29.16b", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(3), + rn: vreg(21), + size: VectorSize::Size16x8, + }, + "A3BA714E", + "addv h3, v21.8h", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(18), + rn: vreg(5), + size: VectorSize::Size32x4, + }, + "B2B8B14E", + "addv s18, v5.4s", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(27), + rn: vreg(5), + imm: 7, + size: VectorSize::Size8x16, + }, + "BB540F4F", + "shl v27.16b, v5.16b, #7", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(1), + rn: vreg(30), + imm: 0, + size: VectorSize::Size8x16, + }, + "C157084F", + "shl v1.16b, v30.16b, #0", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: writable_vreg(26), + rn: vreg(6), + imm: 16, + size: VectorSize::Size16x8, + }, + "DA04104F", + "sshr v26.8h, v6.8h, #16", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: writable_vreg(3), + rn: vreg(19), + imm: 1, + size: VectorSize::Size16x8, + }, + "63061F4F", + "sshr v3.8h, v19.8h, #1", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Ushr, + rd: writable_vreg(25), + rn: vreg(6), + imm: 32, + size: VectorSize::Size32x4, + }, + "D904206F", + "ushr v25.4s, v6.4s, #32", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Ushr, + rd: writable_vreg(5), + rn: vreg(21), + imm: 1, + size: VectorSize::Size32x4, + }, + "A5063F6F", + "ushr v5.4s, v21.4s, #1", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(22), + rn: vreg(13), + imm: 63, + size: VectorSize::Size64x2, + }, + "B6557F4F", + "shl v22.2d, v13.2d, #63", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(23), + rn: vreg(9), + imm: 0, + size: VectorSize::Size64x2, + }, + "3755404F", + "shl v23.2d, v9.2d, #0", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 0, + }, + "C103116E", + "ext v1.16b, v30.16b, v17.16b, #0", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 8, + }, + "C143116E", + "ext v1.16b, v30.16b, v17.16b, #8", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 15, + }, + "C17B116E", + "ext v1.16b, v30.16b, v17.16b, #15", + )); + insns.push(( Inst::VecTbl { rd: writable_vreg(0), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 544d04c23cec..31a15d95802d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -287,6 +287,8 @@ pub enum VecALUOp { Addp, /// Unsigned multiply add long Umlal, + /// Zip vectors (primary) [meaning, high halves] + Zip1, } /// A Vector miscellaneous operation with two registers. @@ -332,10 +334,23 @@ pub enum VecMiscNarrowOp { /// An operation across the lanes of vectors. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecLanesOp { + /// Integer addition across a vector + Addv, /// Unsigned minimum across a vector Uminv, } +/// A shift-by-immediate operation on each lane of a vector. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecShiftImmOp { + // Unsigned shift left + Shl, + // Unsigned shift right + Ushr, + // Signed shift right + Sshr, +} + /// An operation on the bits of a register. This can be paired with several instruction formats /// below (see `Inst`) in any combination. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] @@ -949,6 +964,28 @@ pub enum Inst { size: VectorSize, }, + /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate), + /// Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts, + /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero + /// right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm` + /// values from 0 to lane-size-in-bits - 1 inclusive. + VecShiftImm { + op: VecShiftImmOp, + rd: Writable, + rn: Reg, + size: VectorSize, + imm: u8, + }, + + /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes + /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. + VecExtract { + rd: Writable, + rn: Reg, + rm: Reg, + imm4: u8, + }, + /// Table vector lookup - single register table. The table consists of 8-bit elements and is /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination @@ -1577,6 +1614,15 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::VecShiftImm { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecExtract { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } &Inst::VecTbl { rd, rn, @@ -2157,6 +2203,24 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecShiftImm { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecExtract { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } &mut Inst::VecTbl { ref mut rd, ref mut rn, @@ -3330,6 +3394,7 @@ impl Inst { VecALUOp::Fmul => ("fmul", size), VecALUOp::Addp => ("addp", size), VecALUOp::Umlal => ("umlal", size), + VecALUOp::Zip1 => ("zip1", size), }; let rd_size = if alu_op == VecALUOp::Umlal { size.widen() @@ -3381,11 +3446,28 @@ impl Inst { &Inst::VecLanes { op, rd, rn, size } => { let op = match op { VecLanesOp::Uminv => "uminv", + VecLanesOp::Addv => "addv", }; let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } + &Inst::VecShiftImm { op, rd, rn, size, imm } => { + let op = match op { + VecShiftImmOp::Shl => "shl", + VecShiftImmOp::Ushr => "ushr", + VecShiftImmOp::Sshr => "sshr", + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); + format!("{} {}, {}, #{}", op, rd, rn, imm) + } + &Inst::VecExtract { rd, rn, rm, imm4 } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) + } &Inst::VecTbl { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index fc28cb35818e..5477964a444b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2060,6 +2060,197 @@ pub(crate) fn lower_insn_to_regs>( normalize_bool_result(ctx, insn, rd); } + Opcode::VhighBits => { + let dst_r = get_output_reg(ctx, outputs[0]); + let src_v = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + // All three sequences use one integer temporary and two vector temporaries. The + // shift is done early so as to give the register allocator the possibility of using + // the same reg for `tmp_v1` and `src_v` in the case that this is the last use of + // `src_v`. See https://github.com/WebAssembly/simd/pull/201 for the background and + // derivation of these sequences. Alternative sequences are discussed in + // https://github.com/bytecodealliance/wasmtime/issues/2296, although they are not + // used here. + // Also .. FIXME: when https://github.com/bytecodealliance/wasmtime/pull/2310 is + // merged, use `lower_splat_constant` instead to generate the constants. + let tmp_r0 = ctx.alloc_tmp(RegClass::I64, I64); + let tmp_v0 = ctx.alloc_tmp(RegClass::V128, I8X16); + let tmp_v1 = ctx.alloc_tmp(RegClass::V128, I8X16); + match ty { + I8X16 => { + // sshr tmp_v1.16b, src_v.16b, #7 + // mov tmp_r0, #0x0201 + // movk tmp_r0, #0x0804, lsl 16 + // movk tmp_r0, #0x2010, lsl 32 + // movk tmp_r0, #0x8040, lsl 48 + // dup tmp_v0.2d, tmp_r0 + // and tmp_v1.16b, tmp_v1.16b, tmp_v0.16b + // ext tmp_v0.16b, tmp_v1.16b, tmp_v1.16b, #8 + // zip1 tmp_v0.16b, tmp_v1.16b, tmp_v0.16b + // addv tmp_v0h, tmp_v0.8h + // mov dst_r, tmp_v0.h[0] + ctx.emit(Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: tmp_v1, + rn: src_v, + size: VectorSize::Size8x16, + imm: 7, + }); + lower_constant_u64(ctx, tmp_r0, 0x8040201008040201u64); + ctx.emit(Inst::VecDup { + rd: tmp_v0, + rn: tmp_r0.to_reg(), + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::And, + rd: tmp_v1, + rn: tmp_v1.to_reg(), + rm: tmp_v0.to_reg(), + size: VectorSize::Size8x16, + }); + ctx.emit(Inst::VecExtract { + rd: tmp_v0, + rn: tmp_v1.to_reg(), + rm: tmp_v1.to_reg(), + imm4: 8, + }); + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: tmp_v0, + rn: tmp_v1.to_reg(), + rm: tmp_v0.to_reg(), + size: VectorSize::Size8x16, + }); + ctx.emit(Inst::VecLanes { + op: VecLanesOp::Addv, + rd: tmp_v0, + rn: tmp_v0.to_reg(), + size: VectorSize::Size16x8, + }); + ctx.emit(Inst::MovFromVec { + rd: dst_r, + rn: tmp_v0.to_reg(), + idx: 0, + size: VectorSize::Size16x8, + }); + } + I16X8 => { + // sshr tmp_v1.8h, src_v.8h, #15 + // mov tmp_r0, #0x1 + // movk tmp_r0, #0x2, lsl 16 + // movk tmp_r0, #0x4, lsl 32 + // movk tmp_r0, #0x8, lsl 48 + // dup tmp_v0.2d, tmp_r0 + // shl tmp_r0, tmp_r0, #4 + // mov tmp_v0.d[1], tmp_r0 + // and tmp_v0.16b, tmp_v1.16b, tmp_v0.16b + // addv tmp_v0h, tmp_v0.8h + // mov dst_r, tmp_v0.h[0] + ctx.emit(Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: tmp_v1, + rn: src_v, + size: VectorSize::Size16x8, + imm: 15, + }); + lower_constant_u64(ctx, tmp_r0, 0x0008000400020001u64); + ctx.emit(Inst::VecDup { + rd: tmp_v0, + rn: tmp_r0.to_reg(), + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsl64, + rd: tmp_r0, + rn: tmp_r0.to_reg(), + immshift: ImmShift { imm: 4 }, + }); + ctx.emit(Inst::MovToVec { + rd: tmp_v0, + rn: tmp_r0.to_reg(), + idx: 1, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::And, + rd: tmp_v0, + rn: tmp_v1.to_reg(), + rm: tmp_v0.to_reg(), + size: VectorSize::Size8x16, + }); + ctx.emit(Inst::VecLanes { + op: VecLanesOp::Addv, + rd: tmp_v0, + rn: tmp_v0.to_reg(), + size: VectorSize::Size16x8, + }); + ctx.emit(Inst::MovFromVec { + rd: dst_r, + rn: tmp_v0.to_reg(), + idx: 0, + size: VectorSize::Size16x8, + }); + } + I32X4 => { + // sshr tmp_v1.4s, src_v.4s, #31 + // mov tmp_r0, #0x1 + // movk tmp_r0, #0x2, lsl 32 + // dup tmp_v0.2d, tmp_r0 + // shl tmp_r0, tmp_r0, #2 + // mov tmp_v0.d[1], tmp_r0 + // and tmp_v0.16b, tmp_v1.16b, tmp_v0.16b + // addv tmp_v0s, tmp_v0.4s + // mov dst_r, tmp_v0.s[0] + ctx.emit(Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: tmp_v1, + rn: src_v, + size: VectorSize::Size32x4, + imm: 31, + }); + lower_constant_u64(ctx, tmp_r0, 0x0000000200000001u64); + ctx.emit(Inst::VecDup { + rd: tmp_v0, + rn: tmp_r0.to_reg(), + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsl64, + rd: tmp_r0, + rn: tmp_r0.to_reg(), + immshift: ImmShift { imm: 2 }, + }); + ctx.emit(Inst::MovToVec { + rd: tmp_v0, + rn: tmp_r0.to_reg(), + idx: 1, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::And, + rd: tmp_v0, + rn: tmp_v1.to_reg(), + rm: tmp_v0.to_reg(), + size: VectorSize::Size8x16, + }); + ctx.emit(Inst::VecLanes { + op: VecLanesOp::Addv, + rd: tmp_v0, + rn: tmp_v0.to_reg(), + size: VectorSize::Size32x4, + }); + ctx.emit(Inst::MovFromVec { + rd: dst_r, + rn: tmp_v0.to_reg(), + idx: 0, + size: VectorSize::Size32x4, + }); + } + _ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty), + } + } + Opcode::Shuffle => { let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes"); let rd = get_output_reg(ctx, outputs[0]); diff --git a/cranelift/codegen/src/preopt.serialized b/cranelift/codegen/src/preopt.serialized index 5669c9912628..03a7b4419380 100644 Binary files a/cranelift/codegen/src/preopt.serialized and b/cranelift/codegen/src/preopt.serialized differ diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 9faf8c07677f..f040d2939340 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1600,6 +1600,10 @@ pub fn translate_operator( let bool_result = builder.ins().vall_true(a); state.push1(builder.ins().bint(I32, bool_result)) } + Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().vhigh_bits(I32, a)); + } Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => { translate_vector_icmp(IntCC::Equal, type_of(op), builder, state) } @@ -1763,10 +1767,7 @@ pub fn translate_operator( | Operator::F64x2Trunc | Operator::F64x2PMin | Operator::F64x2PMax - | Operator::F64x2Nearest - | Operator::I8x16Bitmask - | Operator::I16x8Bitmask - | Operator::I32x4Bitmask => { + | Operator::F64x2Nearest => { return Err(wasm_unsupported!("proposed SIMD operator {:?}", op)); }