diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c index 17f2366846381..9ed49621efb31 100644 --- a/src/mono/mono/mini/method-to-ir.c +++ b/src/mono/mono/mini/method-to-ir.c @@ -12643,6 +12643,14 @@ mono_op_no_side_effects (int opcode) case OP_NOT_NULL: case OP_IL_SEQ_POINT: case OP_RTTYPE: +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_ARM64) + case OP_EXTRACT_I1: + case OP_EXTRACT_I2: + case OP_EXTRACT_I4: + case OP_EXTRACT_I8: + case OP_EXTRACT_R4: + case OP_EXTRACT_R8: +#endif return TRUE; default: return FALSE; diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 11789395ab1d6..9c3eead0a4177 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4065,18 +4065,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; } + int idx_to = GTMREG_TO_UINT32 (ins->inst_c0) & 0xff; + int idx_from = GTMREG_TO_UINT32 (ins->inst_c0) >> 8; if (dreg != sreg1) { if (dreg != sreg2) { arm_neon_mov (code, dreg, sreg1); - arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0); + arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from); } else { arm_neon_mov (code, NEON_TMP_REG, sreg1); - arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0); + arm_neon_ins_e(code, t, NEON_TMP_REG, sreg2, idx_to, idx_from); arm_neon_mov (code, dreg, NEON_TMP_REG); } } else { g_assert (dreg != sreg2); - arm_neon_ins_e(code, t, dreg, sreg2, GTMREG_TO_UINT32 (ins->inst_c0), 0); + arm_neon_ins_e(code, t, dreg, sreg2, idx_to, idx_from); } break; } diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index c73fc05650335..ceebcf784eedc 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1068,21 +1068,42 @@ emit_hardware_intrinsics ( return custom_emit (cfg, fsig, args, klass, intrin_group, info, id, arg0_type, is_64bit); } +static MonoInst* +emit_vector_insert_element ( + MonoCompile* cfg, MonoClass* vklass, MonoInst* ins, MonoTypeEnum type, MonoInst* element, + int index, gboolean is_zero_inited) +{ + int op = type_to_insert_op (type); + + if (is_zero_inited && is_zero_const (element)) { + // element already set to zero +#ifdef TARGET_ARM64 + } else if (!COMPILE_LLVM (cfg) && element->opcode == type_to_extract_op (type) && + (type == MONO_TYPE_R4 || type == MONO_TYPE_R8)) { + // OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types. + ins = emit_simd_ins (cfg, vklass, op, ins->dreg, element->sreg1); + ins->inst_c0 = index | ((element->inst_c0) << 8); + ins->inst_c1 = type; +#endif + } else { + ins = emit_simd_ins (cfg, vklass, op, ins->dreg, element->dreg); + ins->inst_c0 = index; + ins->inst_c1 = type; + } + + return ins; +} + static MonoInst * emit_vector_create_elementwise ( MonoCompile *cfg, MonoMethodSignature *fsig, MonoType *vtype, MonoTypeEnum type, MonoInst **args) { - int op = type_to_insert_op (type); MonoClass *vklass = mono_class_from_mono_type_internal (vtype); MonoInst *ins = emit_xzero (cfg, vklass); - for (int i = 0; i < fsig->param_count; ++i) { - if (!is_zero_const (args [i])) { - ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->dreg); - ins->inst_c0 = i; - ins->inst_c1 = type; - } - } + for (int i = 0; i < fsig->param_count; ++i) + ins = emit_vector_insert_element (cfg, vklass, ins, type, args[i], i, TRUE); + return ins; } @@ -2280,17 +2301,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (args [1]->opcode == OP_ICONST) { // If the index is provably a constant, we can generate vastly better code. int index = GTMREG_TO_INT (args[1]->inst_c0); - if (index < 0 || index >= elems) { MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems); MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException"); } - int insert_op = type_to_insert_op (arg0_type); - MonoInst *ins = emit_simd_ins (cfg, klass, insert_op, args [0]->dreg, args [2]->dreg); - ins->inst_c0 = index; - ins->inst_c1 = arg0_type; - return ins; + return emit_vector_insert_element (cfg, klass, args [0], arg0_type, args [2], index, FALSE); } if (!COMPILE_LLVM (cfg) && fsig->params [0]->type != MONO_TYPE_GENERICINST) @@ -2691,11 +2707,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f ins->dreg = dreg; ins->inst_c1 = MONO_TYPE_R4; - for (int i = 1; i < fsig->param_count; ++i) { - ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, ins->dreg, args [i + 1]->dreg); - ins->inst_c0 = i; - ins->inst_c1 = MONO_TYPE_R4; - } + for (int i = 1; i < fsig->param_count; ++i) + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, args [i + 1], i, FALSE); + ins->dreg = dreg; if (indirect) { @@ -2836,10 +2850,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException"); } - ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, dreg, args [2]->dreg); - ins->inst_c0 = index; - ins->inst_c1 = MONO_TYPE_R4; - ins->dreg = dreg; + if (args [0]->dreg == dreg) { + ins = emit_vector_insert_element (cfg, klass, args [0], MONO_TYPE_R4, args [2], index, FALSE); + } else { + ins = emit_simd_ins (cfg, klass, OP_INSERT_R4, dreg, args [2]->dreg); + ins->inst_c0 = index; + ins->inst_c1 = MONO_TYPE_R4; + ins->dreg = dreg; + } if (indirect) { EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, args [0]->dreg, 0, dreg);