Skip to content

Commit

Permalink
Support big- and little-endian lane order with bitcast (bytecodeallia…
Browse files Browse the repository at this point in the history
…nce#5196)

Add a MemFlags operand to the bitcast instruction, where only the
`big` and `little` flags are accepted.  These define the lane order
to be used when casting between types of different lane counts.

Update all users to pass an appropriate MemFlags argument.

Implement lane swaps where necessary in the s390x back-end.

This is the final part necessary to fix
bytecodealliance#4566.
  • Loading branch information
uweigand authored Nov 7, 2022
1 parent 5cef535 commit 3e5938e
Show file tree
Hide file tree
Showing 16 changed files with 296 additions and 52 deletions.
12 changes: 9 additions & 3 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3104,6 +3104,7 @@ pub(crate) fn define(

let x = &Operand::new("x", Mem);
let a = &Operand::new("a", MemTo).with_doc("Bits of `x` reinterpreted");
let MemFlags = &Operand::new("MemFlags", &imm.memflags);

ig.push(
Inst::new(
Expand All @@ -3113,11 +3114,16 @@ pub(crate) fn define(
The input and output types must be storable to memory and of the same
size. A bitcast is equivalent to storing one type and loading the other
type from the same address.
type from the same address, both using the specified MemFlags.
Note that this operation only supports the `big` or `little` MemFlags.
The specified byte order only affects the result in the case where
input and output types differ in lane count/size. In this case, the
operation is only valid if a byte order specifier is provided.
"#,
&formats.unary,
&formats.load_no_offset,
)
.operands_in(vec![x])
.operands_in(vec![MemFlags, x])
.operands_out(vec![a]),
);

Expand Down
10 changes: 5 additions & 5 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2197,25 +2197,25 @@
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; SIMD&FP <=> SIMD&FP
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type (ty_float_or_vec _)))))
x)

; GPR => SIMD&FP
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type in_ty))))
(if (ty_int_ref_scalar_64 in_ty))
(mov_to_fpu x (scalar_size in_ty)))

; SIMD&FP => GPR
(rule 3 (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
(rule 3 (lower (has_type out_ty (bitcast _ x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
(if (ty_int_ref_scalar_64 out_ty))
(mov_from_vec x 0 (scalar_size out_ty)))

; GPR <=> GPR
(rule 2 (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
(rule 2 (lower (has_type out_ty (bitcast _ x @ (value_type in_ty))))
(if (ty_int_ref_scalar_64 out_ty))
(if (ty_int_ref_scalar_64 in_ty))
x)
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
(rule 1 (lower (has_type $I128 (bitcast _ x @ (value_type $I128)))) x)

;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@
)
;;;;; Rules for `bitcast`;;;;;;;;;
(rule
(lower (has_type out (bitcast v @ (value_type in_ty))))
(lower (has_type out (bitcast _ v @ (value_type in_ty))))
(gen_moves v in_ty out))

;;;;; Rules for `ceil`;;;;;;;;;
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1486,6 +1486,11 @@
(rule (lane_order_equal (LaneOrder.BigEndian) (LaneOrder.LittleEndian)) $false)
(rule (lane_order_equal (LaneOrder.BigEndian) (LaneOrder.BigEndian)) $true)

;; Return lane order matching memory byte order.
(decl pure lane_order_from_memflags (MemFlags) LaneOrder)
(rule 0 (lane_order_from_memflags (littleendian)) (LaneOrder.LittleEndian))
(rule 1 (lane_order_from_memflags (bigendian)) (LaneOrder.BigEndian))

;; Convert a CLIF immediate lane index value to big-endian lane order.
(decl be_lane_idx (Type u8) u8)
(extern constructor be_lane_idx be_lane_idx)
Expand Down
40 changes: 23 additions & 17 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1738,40 +1738,46 @@
;;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Reinterpret a 64-bit integer value as floating-point.
(rule (lower (has_type $F64 (bitcast x @ (value_type $I64))))
(rule (lower (has_type $F64 (bitcast _ x @ (value_type $I64))))
(vec_insert_lane_undef $F64X2 x 0 (zero_reg)))

;; Reinterpret a 64-bit floating-point value as integer.
(rule (lower (has_type $I64 (bitcast x @ (value_type $F64))))
(rule (lower (has_type $I64 (bitcast _ x @ (value_type $F64))))
(vec_extract_lane $F64X2 x 0 (zero_reg)))

;; Reinterpret a 32-bit integer value as floating-point.
(rule (lower (has_type $F32 (bitcast x @ (value_type $I32))))
(rule (lower (has_type $F32 (bitcast _ x @ (value_type $I32))))
(vec_insert_lane_undef $F32X4 x 0 (zero_reg)))

;; Reinterpret a 32-bit floating-point value as integer.
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
(rule (lower (has_type $I32 (bitcast _ x @ (value_type $F32))))
(vec_extract_lane $F32X4 x 0 (zero_reg)))

;; Bitcast between types residing in GPRs is a no-op.
(rule 1 (lower (has_type (gpr32_ty _)
(bitcast x @ (value_type (gpr32_ty _))))) x)
(bitcast _ x @ (value_type (gpr32_ty _))))) x)
(rule 2 (lower (has_type (gpr64_ty _)
(bitcast x @ (value_type (gpr64_ty _))))) x)
(bitcast _ x @ (value_type (gpr64_ty _))))) x)

;; Bitcast between types residing in FPRs is a no-op.
(rule 3 (lower (has_type (ty_scalar_float _)
(bitcast x @ (value_type (ty_scalar_float _))))) x)

;; Bitcast between types residing in VRs is a no-op.
;; FIXME: There are two flavors of vector bitcast, which are currently not
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
;; lane order, and those generated elsewhere assume big-endian lane order.
;; Bitcast is a no-op if current lane order matches that assumed lane order.
;; However, due to our choice of lane order depending on the current function
;; ABI, every bitcast we currently see here is indeed a no-op.
(rule 4 (lower (has_type (vr128_ty _)
(bitcast x @ (value_type (vr128_ty _))))) x)
(bitcast _ x @ (value_type (ty_scalar_float _))))) x)

;; Bitcast between types residing in VRs is a no-op if lane count is unchanged.
(rule 5 (lower (has_type (multi_lane bits count)
(bitcast _ x @ (value_type (multi_lane bits count))))) x)

;; Bitcast between types residing in VRs with different lane counts is a
;; no-op if the operation's MemFlags indicate a byte order compatible with
;; the current lane order. Otherwise, lane elements need to be swapped,
;; first in the input type, and then again in the output type. This could
;; be optimized further, but we don't bother at the moment since due to our
;; choice of lane order depending on the current function ABI, this case will
;; currently never arise in practice.
(rule 4 (lower (has_type (vr128_ty out_ty)
(bitcast flags x @ (value_type (vr128_ty in_ty)))))
(abi_vec_elt_rev (lane_order_from_memflags flags) out_ty
(abi_vec_elt_rev (lane_order_from_memflags flags) in_ty x)))


;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down
12 changes: 6 additions & 6 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3298,25 +3298,25 @@

;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I32 (bitcast src @ (value_type $F32))))
(rule (lower (has_type $I32 (bitcast _ src @ (value_type $F32))))
(bitcast_xmm_to_gpr $F32 src))

(rule (lower (has_type $F32 (bitcast src @ (value_type $I32))))
(rule (lower (has_type $F32 (bitcast _ src @ (value_type $I32))))
(bitcast_gpr_to_xmm $I32 src))

(rule (lower (has_type $I64 (bitcast src @ (value_type $F64))))
(rule (lower (has_type $I64 (bitcast _ src @ (value_type $F64))))
(bitcast_xmm_to_gpr $F64 src))

(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
(rule (lower (has_type $F64 (bitcast _ src @ (value_type $I64))))
(bitcast_gpr_to_xmm $I64 src))

;; Bitcast between types residing in GPR registers is a no-op.
(rule 1 (lower (has_type (is_gpr_type _)
(bitcast x @ (value_type (is_gpr_type _))))) x)
(bitcast _ x @ (value_type (is_gpr_type _))))) x)

;; Bitcast between types residing in XMM registers is a no-op.
(rule 2 (lower (has_type (is_xmm_type _)
(bitcast x @ (value_type (is_xmm_type _))))) x)
(bitcast _ x @ (value_type (is_xmm_type _))))) x)

;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
21 changes: 18 additions & 3 deletions cranelift/codegen/src/verifier/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ use crate::ir::entities::AnyEntity;
use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint};
use crate::ir::{
types, ArgumentPurpose, Block, Constant, DynamicStackSlot, FuncRef, Function, GlobalValue,
Inst, JumpTable, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList,
Inst, JumpTable, MemFlags, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList,
};
use crate::isa::TargetIsa;
use crate::iterators::IteratorExtras;
Expand Down Expand Up @@ -729,11 +729,12 @@ impl<'a> Verifier<'a> {
));
}
}
Unary {
LoadNoOffset {
opcode: Opcode::Bitcast,
flags,
arg,
} => {
self.verify_bitcast(inst, arg, errors)?;
self.verify_bitcast(inst, flags, arg, errors)?;
}
UnaryConst {
opcode: Opcode::Vconst,
Expand Down Expand Up @@ -1070,6 +1071,7 @@ impl<'a> Verifier<'a> {
fn verify_bitcast(
&self,
inst: Inst,
flags: MemFlags,
arg: Value,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
Expand All @@ -1086,6 +1088,19 @@ impl<'a> Verifier<'a> {
typ.bits()
),
))
} else if flags != MemFlags::new()
&& flags != MemFlags::new().with_endianness(ir::Endianness::Little)
&& flags != MemFlags::new().with_endianness(ir::Endianness::Big)
{
errors.fatal((
inst,
"The bitcast instruction only accepts the `big` or `little` memory flags",
))
} else if flags == MemFlags::new() && typ.lane_count() != value_type.lane_count() {
errors.fatal((
inst,
"Byte order specifier required for bitcast instruction changing lane count",
))
} else {
Ok(())
}
Expand Down
79 changes: 79 additions & 0 deletions cranelift/filetests/filetests/isa/s390x/bitcast.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
test compile precise-output
target s390x

;; Bitcast between integral types is a no-op.

function %bitcast_i8_i8(i8) -> i8 {
block0(v0: i8):
v1 = bitcast.i8 v0
return v1
}

; block0:
; br %r14

function %bitcast_i16_i16(i16) -> i16 {
block0(v0: i16):
v1 = bitcast.i16 v0
return v1
}

; block0:
; br %r14

function %bitcast_i32_i32(i32) -> i32 {
block0(v0: i32):
v1 = bitcast.i32 v0
return v1
}

; block0:
; br %r14

function %bitcast_i64_i64(i64) -> i64 {
block0(v0: i64):
v1 = bitcast.i64 v0
return v1
}

; block0:
; br %r14

function %bitcast_i128_i128(i128) -> i128 {
block0(v0: i128):
v1 = bitcast.i128 v0
return v1
}

; block0:
; vl %v0, 0(%r3)
; vst %v0, 0(%r2)
; br %r14

function %bitcast_r64_i64(r64) -> i64 {
block0(v0: r64):
v1 = bitcast.i64 v0
return v1
}

; block0:
; br %r14

function %bitcast_i64_r64(i64) -> r64 {
block0(v0: i64):
v1 = bitcast.r64 v0
return v1
}

; block0:
; br %r14

function %bitcast_r64_r64(r64) -> r64 {
block0(v0: r64):
v1 = bitcast.r64 v0
return v1
}

; block0:
; br %r14

18 changes: 18 additions & 0 deletions cranelift/filetests/filetests/isa/s390x/floating-point.clif
Original file line number Diff line number Diff line change
Expand Up @@ -1200,3 +1200,21 @@ block0(v0: f32):
; vlgvf %r2, %v0, 0
; br %r14

function %bitcast_f32_f32(f32) -> f32 {
block0(v0: f32):
v1 = bitcast.f32 v0
return v1
}

; block0:
; br %r14

function %bitcast_f64_f64(f64) -> f64 {
block0(v0: f64):
v1 = bitcast.f64 v0
return v1
}

; block0:
; br %r14

Loading

0 comments on commit 3e5938e

Please sign in to comment.