Skip to content

Commit

Permalink
CL/aarch64 back end: implement the wasm SIMD bitmask instructions
Browse files Browse the repository at this point in the history
The `bitmask.{8x16,16x8,32x4}` instructions do not map neatly to any single
AArch64 SIMD instruction, and instead need a sequence of around ten
instructions.  Because of this, this patch is somewhat longer and more complex
than it would be for (eg) x64.

Main changes are:

* the relevant testsuite test (`simd_boolean.wast`) has been enabled on aarch64.

* at the CLIF level, add a new instruction `vhigh_bits`, into which these wasm
  instructions are to be translated.

* in the wasm->CLIF translation (code_translator.rs), translate into
  `vhigh_bits`.  This is straightforward.

* in the CLIF->AArch64 translation (lower_inst.rs), translate `vhigh_bits`
  into equivalent sequences of AArch64 instructions.  There is a different
  sequence for each of the `{8x16, 16x8, 32x4}` variants.

All other changes are AArch64-specific, and add instruction definitions needed
by the previous step:

* Add two new families of AArch64 instructions: `VecShiftImm` (vector shift by
  immediate) and `VecExtract` (effectively a double-length vector shift)

* To the existing AArch64 family `VecRRR`, add a `zip1` variant.  To the
  `VecLanesOp` family add an `addv` variant.

* Add supporting code for the above changes to AArch64 instructions:
  - getting the register uses (`aarch64_get_regs`)
  - mapping the registers (`aarch64_map_regs`)
  - printing instructions
  - emitting instructions (`impl MachInstEmit for Inst`).  The handling of
    `VecShiftImm` is a bit complex.
  - emission tests for new instructions and variants.
  • Loading branch information
julian-seward1 committed Oct 23, 2020
1 parent b10e027 commit 2702942
Show file tree
Hide file tree
Showing 8 changed files with 570 additions and 5 deletions.
6 changes: 5 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,12 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
}

// This is only implemented on aarch64.
("simd", "simd_boolean") => {
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "aarch64";
}

// These tests have simd operators which aren't implemented yet.
("simd", "simd_boolean") => return true,
("simd", "simd_f32x4_pmin_pmax") => return true,
("simd", "simd_f32x4_rounding") => return true,
("simd", "simd_f64x2_pmin_pmax") => return true,
Expand Down
18 changes: 18 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2193,6 +2193,24 @@ pub(crate) fn define(
.operands_out(vec![s]),
);

let a = &Operand::new("a", TxN);
let x = &Operand::new("x", Int);

ig.push(
Inst::new(
"vhigh_bits",
r#"
Reduce a vector to a scalar integer.
Return a scalar integer, consisting of the concatenation of the most significant bit
of each lane of ``a``.
"#,
&formats.unary,
)
.operands_in(vec![a])
.operands_out(vec![x]),
);

let a = &Operand::new("a", &Int.as_bool());
let Cond = &Operand::new("Cond", &imm.intcc);
let x = &Operand::new("x", Int);
Expand Down
59 changes: 59 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1441,9 +1441,67 @@ impl MachInstEmit for Inst {
};
let (u, opcode) = match op {
VecLanesOp::Uminv => (0b1, 0b11010),
VecLanesOp::Addv => (0b0, 0b11011),
};
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
}
&Inst::VecShiftImm {
op,
rd,
rn,
size,
imm,
} => {
let (is_shr, template) = match op {
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
};
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size, is_shr) {
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
if imm4 < 16 {
let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
let rm_enc = machreg_to_vec(rm);
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(
template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
);
} else {
panic!(
"aarch64: Inst::VecExtract: emit: invalid extract index {}",
imm4
);
}
}
&Inst::VecTbl {
rd,
rn,
Expand Down Expand Up @@ -1827,6 +1885,7 @@ impl MachInstEmit for Inst {
debug_assert!(!size.is_128bits());
(0b001_01110_00_1 | enc_size << 1, 0b100000)
}
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
};
let top11 = if is_float {
top11 | enc_float_size << 1
Expand Down
210 changes: 210 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3175,6 +3175,54 @@ fn test_aarch64_binemit() {
"umlal v9.2d, v20.2s, v17.2s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Zip1,
rd: writable_vreg(16),
rn: vreg(12),
rm: vreg(1),
size: VectorSize::Size8x16,
},
"9039014E",
"zip1 v16.16b, v12.16b, v1.16b",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Zip1,
rd: writable_vreg(2),
rn: vreg(13),
rm: vreg(6),
size: VectorSize::Size16x8,
},
"A239464E",
"zip1 v2.8h, v13.8h, v6.8h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Zip1,
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(14),
size: VectorSize::Size32x4,
},
"88398E4E",
"zip1 v8.4s, v12.4s, v14.4s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Zip1,
rd: writable_vreg(9),
rn: vreg(20),
rm: vreg(17),
size: VectorSize::Size64x2,
},
"893AD14E",
"zip1 v9.2d, v20.2d, v17.2d",
));

insns.push((
Inst::VecMisc {
op: VecMisc2::Not,
Expand Down Expand Up @@ -3461,6 +3509,168 @@ fn test_aarch64_binemit() {
"uminv s18, v4.4s",
));

insns.push((
Inst::VecLanes {
op: VecLanesOp::Addv,
rd: writable_vreg(2),
rn: vreg(29),
size: VectorSize::Size8x16,
},
"A2BB314E",
"addv b2, v29.16b",
));

insns.push((
Inst::VecLanes {
op: VecLanesOp::Addv,
rd: writable_vreg(3),
rn: vreg(21),
size: VectorSize::Size16x8,
},
"A3BA714E",
"addv h3, v21.8h",
));

insns.push((
Inst::VecLanes {
op: VecLanesOp::Addv,
rd: writable_vreg(18),
rn: vreg(5),
size: VectorSize::Size32x4,
},
"B2B8B14E",
"addv s18, v5.4s",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
rd: writable_vreg(27),
rn: vreg(5),
imm: 7,
size: VectorSize::Size8x16,
},
"BB540F4F",
"shl v27.16b, v5.16b, #7",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
rd: writable_vreg(1),
rn: vreg(30),
imm: 0,
size: VectorSize::Size8x16,
},
"C157084F",
"shl v1.16b, v30.16b, #0",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Sshr,
rd: writable_vreg(26),
rn: vreg(6),
imm: 16,
size: VectorSize::Size16x8,
},
"DA04104F",
"sshr v26.8h, v6.8h, #16",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Sshr,
rd: writable_vreg(3),
rn: vreg(19),
imm: 1,
size: VectorSize::Size16x8,
},
"63061F4F",
"sshr v3.8h, v19.8h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 32,
size: VectorSize::Size32x4,
},
"D904206F",
"ushr v25.4s, v6.4s, #32",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size32x4,
},
"A5063F6F",
"ushr v5.4s, v21.4s, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
rd: writable_vreg(22),
rn: vreg(13),
imm: 63,
size: VectorSize::Size64x2,
},
"B6557F4F",
"shl v22.2d, v13.2d, #63",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
rd: writable_vreg(23),
rn: vreg(9),
imm: 0,
size: VectorSize::Size64x2,
},
"3755404F",
"shl v23.2d, v9.2d, #0",
));

insns.push((
Inst::VecExtract {
rd: writable_vreg(1),
rn: vreg(30),
rm: vreg(17),
imm4: 0,
},
"C103116E",
"ext v1.16b, v30.16b, v17.16b, #0",
));

insns.push((
Inst::VecExtract {
rd: writable_vreg(1),
rn: vreg(30),
rm: vreg(17),
imm4: 8,
},
"C143116E",
"ext v1.16b, v30.16b, v17.16b, #8",
));

insns.push((
Inst::VecExtract {
rd: writable_vreg(1),
rn: vreg(30),
rm: vreg(17),
imm4: 15,
},
"C17B116E",
"ext v1.16b, v30.16b, v17.16b, #15",
));

insns.push((
Inst::VecTbl {
rd: writable_vreg(0),
Expand Down
Loading

0 comments on commit 2702942

Please sign in to comment.