Skip to content

Commit

Permalink
enable EVEX embedded broadcast for vpmultishiftqb (#109267)
Browse files Browse the repository at this point in the history
  • Loading branch information
saucecontrol authored Oct 28, 2024
1 parent 78d3f21 commit 42c1be2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512VBMI Intrinsics
#define FIRST_NI_AVX512VBMI NI_AVX512VBMI_MultiShift
HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
#define LAST_NI_AVX512VBMI NI_AVX512VBMI_PermuteVar64x8x2
Expand All @@ -1245,7 +1245,7 @@ HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512VBMI.VL Intrinsics
#define FIRST_NI_AVX512VBMI_VL NI_AVX512VBMI_VL_MultiShift
HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down Expand Up @@ -1316,7 +1316,7 @@ HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar,
HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down Expand Up @@ -1382,7 +1382,7 @@ HARDWARE_INTRINSIC(AVX10v1_V512, ExtractVector256,
HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, LeadingZeroCount, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_
INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements
INST3(vpermi2b, "permi2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index
INST3(vpermt2b, "permt2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table
INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table
INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Select Packed Unaligned Bytes From Quadword Sources

INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

Expand Down

0 comments on commit 42c1be2

Please sign in to comment.