Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][jit] Adding compare all/any intrinsics. #83515

Merged
merged 12 commits into from
Mar 21, 2023
61 changes: 22 additions & 39 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,19 +1139,16 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_dup_g_4s(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))
#define arm_neon_dup_g_2d(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn))

// the opcode is smov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_smovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))

// the opcode is umov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg.
#define arm_neon_umovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_smov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn))
#define arm_neon_smov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn))
#define arm_neon_smov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0101, (rd), (rn))
#define arm_neon_smov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0101, (rd), (rn))

#define arm_neon_umov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn))
#define arm_neon_umov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn))
#define arm_neon_umov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0111, (rd), (rn))
#define arm_neon_umov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0111, (rd), (rn))


/* NEON :: 3-register same FP16 */
// TODO
Expand Down Expand Up @@ -1576,6 +1573,9 @@ arm_encode_arith_imm (int imm, guint32 *shift)
/* NEON :: across lanes */
#define arm_neon_xln_opcode(p, q, u, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), (q), 0b00001110001100000000100000000000 | (u) << 29 | (size) << 22 | (opcode) << 12, (rd), (rn))

#define arm_neon_umaxv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn))
#define arm_neon_uminv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b11010, (rd), (rn))

// contrary to most other opcodes, the suffix is the type of source
#define arm_neon_saddlv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00011, (rd), (rn))
#define arm_neon_saddlv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00011, (rd), (rn))
Expand Down Expand Up @@ -1609,18 +1609,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_uaddlv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b00011, (rd), (rn))
#define arm_neon_uaddlv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b00011, (rd), (rn))

#define arm_neon_umaxv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn))
#define arm_neon_umaxv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn))
#define arm_neon_umaxv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn))
#define arm_neon_umaxv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn))
#define arm_neon_umaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn))

#define arm_neon_uminv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_uminv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11010, (rd), (rn))
#define arm_neon_uminv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b11010, (rd), (rn))
#define arm_neon_uminv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11010, (rd), (rn))
#define arm_neon_uminv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b11010, (rd), (rn))

#define arm_neon_fmaxnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn))
#define arm_neon_fmaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01111, (rd), (rn))
#define arm_neon_fminnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b01100, (rd), (rn))
Expand Down Expand Up @@ -2313,6 +2301,15 @@ arm_encode_arith_imm (int imm, guint32 *shift)
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_sli(p, width, type, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn), (shift))
#define arm_neon_shrn(p, type, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, (type), 0b10000, (rd), (rn), (shift))

#define arm_neon_sshr_8b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_16b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift))
#define arm_neon_sshr_4h(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b00000, (rd), (rn), (shift))
Expand Down Expand Up @@ -2345,12 +2342,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_srsra_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_4, 0b00110, (rd), (rn), (shift))
#define arm_neon_srsra_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_8, 0b00110, (rd), (rn), (shift))

#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111)
#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \
int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \
arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \
} while (0)

#define arm_neon_shl_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_shl_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b01010, (rd), (rn), (shift))
Expand Down Expand Up @@ -2454,14 +2445,6 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define arm_neon_sri_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01000, (rd), (rn), (shift))
#define arm_neon_sri_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01000, (rd), (rn), (shift))

#define arm_neon_sli_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_8h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_4s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift))
#define arm_neon_sli_2d(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01010, (rd), (rn), (shift))

#define arm_neon_sqshlu_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift))
#define arm_neon_sqshlu_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01100, (rd), (rn), (shift))
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ xcompare: dest:x src1:x src2:x len:4
xcompare_fp: dest:x src1:x src2:x len:4
negate: dest:x src1:x len:4
ones_complement: dest:x src1:x len:4
xextract: dest:i src1:x len:12
xbinop_forceint: dest:x src1:x src2:x len:4
xcast: dest:x src1:x len:4 clob:1

Expand Down
24 changes: 24 additions & 0 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3396,6 +3396,27 @@ emit_move_return_value (MonoCompile *cfg, guint8 * code, MonoInst *ins)
return code;
}

static guint8*
emit_xextract (guint8* code, int width, int mode, int dreg, int sreg1)
{
switch (mode) {
case SIMD_EXTR_IS_ANY_SET:
arm_neon_umaxv (code, width, TYPE_I8, FP_TEMP_REG, sreg1);
arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0);
arm_lsrw(code, dreg, dreg, 7); // dreg contains 0xff for TRUE or 0x0 for FALSE, normalize to 0x1/0x0
break;
case SIMD_EXTR_ARE_ALL_SET:
arm_neon_uminv (code, width, TYPE_I8, FP_TEMP_REG, sreg1);
arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0);
arm_lsrw(code, dreg, dreg, 7);
break;
default:
g_assert_not_reached ();
}

return code;
}

/*
* emit_branch_island:
*
Expand Down Expand Up @@ -3822,6 +3843,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
case OP_XEXTRACT:
code = emit_xextract (code, VREG_FULL, ins->inst_c0, dreg, sreg1);
break;

/* ALU */
case OP_IADD:
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1479,13 +1479,18 @@ MINI_OP(OP_XCOMPARE_SCALAR, "xcompare_scalar", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP, "xcompare_fp", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_FP_SCALAR, "xcompare_fp_scalar", XREG, XREG, XREG)

/* Extract from XREG into IREG.
* inst_c0 - specific instruction, one of SIMD_EXTR_... */
MINI_OP(OP_XEXTRACT, "xextract", IREG, XREG, NONE)

/*
* Generic SIMD operations, the rest of the JIT doesn't care about the exact operation.
*/
MINI_OP(OP_XBINOP, "xbinop", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_FORCEINT, "xbinop_forceint", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_SCALAR, "xbinop_scalar", XREG, XREG, XREG)
MINI_OP(OP_XBINOP_BYSCALAR, "xbinop_byscalar", XREG, XREG, XREG)

/* inst_c0 contains an INTRINS_ enum, inst_c1 might contain additional data */
MINI_OP(OP_XOP, "xop", NONE, NONE, NONE)
MINI_OP(OP_XOP_X_I, "xop_x_i", XREG, IREG, NONE)
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini.h
Original file line number Diff line number Diff line change
Expand Up @@ -2933,6 +2933,11 @@ enum {
SIMD_PREFETCH_MODE_2,
};

enum {
SIMD_EXTR_IS_ANY_SET,
SIMD_EXTR_ARE_ALL_SET
};

int mini_primitive_type_size (MonoTypeEnum type);
MonoTypeEnum mini_get_simd_type_info (MonoClass *klass, guint32 *nelems);

Expand Down
110 changes: 72 additions & 38 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,11 +509,18 @@ static MonoInst*
emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoInst *arg1, MonoInst *arg2)
{
#ifdef TARGET_ARM64
int size = mono_class_value_size (klass, NULL);
if (size == 16)
if (!COMPILE_LLVM (cfg)) {
MonoTypeEnum elemt = get_underlying_type (m_class_get_this_arg (arg1->klass));
MonoInst* cmp = emit_xcompare (cfg, arg1->klass, elemt, arg1, arg2);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
} else if (mono_class_value_size (klass, NULL) == 16) {
return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg);
else
} else {
return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
}
#else
MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
if (!COMPILE_LLVM (cfg))
Expand Down Expand Up @@ -1201,9 +1208,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
return NULL;
}

if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256"))
return NULL; // TODO: Fix Vector256.WithUpper/WithLower

if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256") || !strcmp (m_class_get_name (cfg->method->klass), "Vector512"))
return NULL;
// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg)) {
Expand All @@ -1216,6 +1223,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
case SN_LessThanOrEqual:
case SN_Negate:
case SN_OnesComplement:
case SN_EqualsAny:
case SN_GreaterThanAny:
case SN_GreaterThanOrEqualAny:
case SN_LessThanAny:
case SN_LessThanOrEqualAny:
case SN_EqualsAll:
case SN_GreaterThanAll:
case SN_GreaterThanOrEqualAll:
case SN_LessThanAll:
case SN_LessThanOrEqualAll:
case SN_Subtract:
case SN_BitwiseAnd:
case SN_BitwiseOr:
Expand Down Expand Up @@ -1488,18 +1505,27 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!is_element_type_primitive (fsig->params [0]))
return NULL;
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
switch (id) {
case SN_Equals:
return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]);
case SN_EqualsAll:
return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_EqualsAny: {
MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp_eq, zero);
if (id == SN_Equals)
return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]);

if (COMPILE_LLVM (cfg)) {
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
switch (id) {
case SN_EqualsAll:
return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_EqualsAny: {
MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp_eq, zero);
}
}
default: g_assert_not_reached ();
} else {
MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
}
g_assert_not_reached ();
}
case SN_ExtractMostSignificantBits: {
if (!is_element_type_primitive (fsig->params [0]) || type_enum_is_float (arg0_type))
Expand Down Expand Up @@ -1567,34 +1593,40 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
fsig->ret->type == MONO_TYPE_BOOLEAN &&
mono_metadata_type_equal (fsig->params [0], fsig->params [1]));

MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]);
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);

gboolean is_all = FALSE;
switch (id) {
case SN_GreaterThanAll:
case SN_GreaterThanOrEqualAll:
case SN_LessThanAll:
case SN_LessThanOrEqualAll: {
// for floating point numbers all ones is NaN and so
// they must be treated differently than integer types
if (type_enum_is_float (arg0_type)) {
case SN_LessThanOrEqualAll:
is_all = TRUE;
break;
}

MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
if (COMPILE_LLVM (cfg)) {
MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]);
if (is_all) {
// for floating point numbers all ones is NaN and so
// they must be treated differently than integer types
if (type_enum_is_float (arg0_type)) {
MonoInst *zero = emit_xzero (cfg, arg_class);
MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero);
return emit_xequal (cfg, arg_class, inverted_cmp, zero);
}

MonoInst *ones = emit_xones (cfg, arg_class);
return emit_xequal (cfg, arg_class, cmp, ones);
} else {
MonoInst *zero = emit_xzero (cfg, arg_class);
MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero);
return emit_xequal (cfg, arg_class, inverted_cmp, zero);
return emit_not_xequal (cfg, arg_class, cmp, zero);
}

MonoInst *ones = emit_xones (cfg, arg_class);
return emit_xequal (cfg, arg_class, cmp, ones);
}
case SN_GreaterThanAny:
case SN_GreaterThanOrEqualAny:
case SN_LessThanAny:
case SN_LessThanOrEqualAny: {
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp, zero);
}
default:
g_assert_not_reached ();
} else {
MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]);
MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1);
ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET;
ret->inst_c1 = mono_class_value_size (klass, NULL);
return ret;
}
}
case SN_Narrow: {
Expand Down Expand Up @@ -1908,6 +1940,8 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
case SN_op_BitwiseAnd:
case SN_op_BitwiseOr:
case SN_op_ExclusiveOr:
case SN_op_Equality:
case SN_op_Inequality:
break;
default:
return NULL;
Expand Down