Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Aug 7, 2023
2 parents 3255d60 + 151c8ab commit 76d7477
Show file tree
Hide file tree
Showing 18 changed files with 185 additions and 22 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 2.6...3.0.2)

project(xbyak LANGUAGES CXX VERSION 6.72)
project(xbyak LANGUAGES CXX VERSION 6.73)

file(GLOB headers xbyak/*.h)

Expand Down
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
* 2023/Aug/02 ver 6.72 add xbegin/xabort/xend
* 2023/Jul/27 ver 6.71 Allocator supports huge page
* 2023/Jul/05 ver 6.70 add alias of vclmulqdq, correct alias of pclmulqdq
Expand Down
17 changes: 17 additions & 0 deletions gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@ void putX_X_XM(bool omitOnly)
{ 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
{ 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
{ 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 },
{ 0xDA, "sm3msg1", T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
{ 0xDA, "sm3msg2", T_66 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
{ 0xDE, "sm3rnds2", T_66 | T_0F3A | T_W0 | T_EVEX | T_EW0, true, false, 2 },
{ 0xDA, "sm4key4", T_F3 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
{ 0xDA, "sm4rnds4", T_F2 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down Expand Up @@ -1116,6 +1121,10 @@ void put()
puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }");
puts("void xabort(uint8_t imm) { db(0xC6); db(0xF8); db(imm); }");
puts("void xbegin(uint32_t rel) { db(0xC7); db(0xF8); dd(rel); }");

puts("void vsha512msg1(const Ymm& y, const Xmm& x) { if (!(y.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y, 0, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCC); }");
puts("void vsha512msg2(const Ymm& y1, const Ymm& y2) { if (!(y1.isYMM() && y2.isYMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, 0, y2, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCD); }");
puts("void vsha512rnds2(const Ymm& y1, const Ymm& y2, const Xmm& x) { if (!(y1.isYMM() && y2.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, &y2, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCB); }");
}
{
const struct Tbl {
Expand Down Expand Up @@ -1808,6 +1817,7 @@ void put()
}
}
// avx-vnni-int8
// avx-vnni-int16
{
const struct Tbl {
uint8_t code;
Expand All @@ -1820,6 +1830,13 @@ void put()
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },

{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '6.72',
version: '6.73',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 6.72 [![Badge Build]][Build Status]
# Xbyak 6.73 [![Badge Build]][Build Status]

*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*

Expand Down
3 changes: 2 additions & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.72
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.73

-----------------------------------------------------------------------------
◎概要
Expand Down Expand Up @@ -402,6 +402,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴

2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加
2023/08/02 ver 6.72 xabort, xbegin, xend追加
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
2023/07/05 ver 6.70 vpclmulqdqのailas追加
Expand Down
2 changes: 1 addition & 1 deletion sample/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ toyvm : toyvm.cpp $(XBYAK_INC)
static_buf: static_buf.cpp $(XBYAK_INC)
static_buf64: static_buf.cpp $(XBYAK_INC)
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
test_util64 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
jmp_table: jmp_table.cpp $(XBYAK_INC)
jmp_table64: jmp_table.cpp $(XBYAK_INC)
memfd: memfd.cpp $(XBYAK_INC)
2 changes: 2 additions & 0 deletions sample/cpuid/arl.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
19 changes: 16 additions & 3 deletions sample/cpuid/cpuid.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
#!/bin/bash

UPDATE=0
if [ $# -eq 1 ]; then
UPDATE=1
fi

if [ $UPDATE == 1 ]; then
echo "UPDATE"
fi

make -C ../ test_util64

cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr)
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr arl lnl)

for cpu in ${cpus[@]} ; do
echo $cpu
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
diff $cpu.txt tmp.txt
if [ $UPDATE == 1 ]; then
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
else
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
diff $cpu.txt tmp.txt
fi
done

2 changes: 1 addition & 1 deletion sample/cpuid/grr.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd
2 changes: 2 additions & 0 deletions sample/cpuid/lnl.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
2 changes: 1 addition & 1 deletion sample/cpuid/srf.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd
10 changes: 0 additions & 10 deletions sample/cpuid/update-txt.sh

This file was deleted.

4 changes: 4 additions & 0 deletions sample/test_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ void putCPUinfo(bool onlyCpuidFeature)
{ Cpu::tRAO_INT, "rao-int" },
{ Cpu::tCMPCCXADD, "cmpccxadd" },
{ Cpu::tPREFETCHITI, "prefetchiti" },
{ Cpu::tSHA512, "sha512" },
{ Cpu::tSM3, "sm3" },
{ Cpu::tSM4, "sm4" },
{ Cpu::tAVX_VNNI_INT16, "avx_vnni_int16" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
Expand Down
111 changes: 111 additions & 0 deletions test/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2170,4 +2170,115 @@ CYBOZU_TEST_AUTO(prefetchiti)
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(crypto)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vsha512msg1(ymm3, xmm5);
vsha512msg2(ymm9, ymm10);
vsha512rnds2(ymm1, ymm3, xmm2);

vsm3msg1(xmm1, xmm2, xmm3);
vsm3msg1(xmm1, xmm2, ptr [rax]);
vsm3msg2(xmm5, xmm7, xmm3);
vsm3msg2(xmm5, xmm6, ptr [rax]);
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);

vsm4key4(xmm1, xmm2, xmm3);
vsm4key4(xmm1, xmm2, ptr [rdx]);
vsm4rnds4(xmm1, xmm2, xmm3);
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
}
} c;
const uint8_t tbl[] = {
// sha512
0xc4, 0xe2, 0x7f, 0xcc, 0xdd,
0xc4, 0x42, 0x7f, 0xcd, 0xca,
0xc4, 0xe2, 0x67, 0xcb, 0xca,

// sm3
0xC4, 0xE2, 0x68, 0xDA, 0xCB,
0xC4, 0xE2, 0x68, 0xDA, 0x08,
0xC4, 0xE2, 0x41, 0xDA, 0xEB,
0xC4, 0xE2, 0x49, 0xDA, 0x28,
0xC4, 0xE3, 0x41, 0xDE, 0xEB, 0x12,
0xC4, 0xE3, 0x41, 0xDE, 0x29, 0x34,

// sm4
0xc4, 0xe2, 0x6a, 0xda, 0xcb,
0xc4, 0xe2, 0x6a, 0xda, 0x0a,
0xc4, 0xe2, 0x6b, 0xda, 0xcb,
0xc4, 0xe2, 0x4b, 0xda, 0x2c, 0x81,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(avx_vnni_int)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpdpbssd(xmm1, xmm2, xmm3);
vpdpbssd(ymm1, ymm2, ptr [rax]);
vpdpbssds(xmm1, xmm2, xmm3);
vpdpbssds(ymm1, ymm2, ptr [rax]);
vpdpbsud(xmm1, xmm2, xmm3);
vpdpbsud(ymm1, ymm2, ptr [rax]);
vpdpbsuds(xmm1, xmm2, xmm3);
vpdpbsuds(ymm1, ymm2, ptr [rax]);
vpdpbuud(xmm1, xmm2, xmm3);
vpdpbuud(ymm1, ymm2, ptr [rax]);
vpdpbuuds(xmm1, xmm2, xmm3);
vpdpbuuds(ymm1, ymm2, ptr [rax]);

vpdpwsud(xmm1, xmm2, xmm3);
vpdpwsud(ymm1, ymm2, ptr [rax]);
vpdpwsuds(xmm1, xmm2, xmm3);
vpdpwsuds(ymm1, ymm2, ptr [rax]);
vpdpwusd(xmm1, xmm2, xmm3);
vpdpwusd(ymm1, ymm2, ptr [rax]);
vpdpwusds(xmm1, xmm2, xmm3);
vpdpwusds(ymm1, ymm2, ptr [rax]);
vpdpwuud(xmm1, xmm2, xmm3);
vpdpwuud(ymm1, ymm2, ptr [rax]);
vpdpwuuds(xmm1, xmm2, xmm3);
vpdpwuuds(ymm1, ymm2, ptr [rax]);
}
} c;
const uint8_t tbl[] = {
0xc4, 0xe2, 0x6b, 0x50, 0xcb,
0xc4, 0xe2, 0x6f, 0x50, 0x08,
0xc4, 0xe2, 0x6b, 0x51, 0xcb,
0xc4, 0xe2, 0x6f, 0x51, 0x08,
0xc4, 0xe2, 0x6a, 0x50, 0xcb,
0xc4, 0xe2, 0x6e, 0x50, 0x08,
0xc4, 0xe2, 0x6a, 0x51, 0xcb,
0xc4, 0xe2, 0x6e, 0x51, 0x08,
0xc4, 0xe2, 0x68, 0x50, 0xcb,
0xc4, 0xe2, 0x6c, 0x50, 0x08,
0xc4, 0xe2, 0x68, 0x51, 0xcb,
0xc4, 0xe2, 0x6c, 0x51, 0x08,
0xc4, 0xe2, 0x6a, 0xd2, 0xcb,
0xc4, 0xe2, 0x6e, 0xd2, 0x08,
0xc4, 0xe2, 0x6a, 0xd3, 0xcb,
0xc4, 0xe2, 0x6e, 0xd3, 0x08,
0xc4, 0xe2, 0x69, 0xd2, 0xcb,
0xc4, 0xe2, 0x6d, 0xd2, 0x08,
0xc4, 0xe2, 0x69, 0xd3, 0xcb,
0xc4, 0xe2, 0x6d, 0xd3, 0x08,
0xc4, 0xe2, 0x68, 0xd2, 0xcb,
0xc4, 0xe2, 0x6c, 0xd2, 0x08,
0xc4, 0xe2, 0x68, 0xd3, 0xcb,
0xc4, 0xe2, 0x6c, 0xd3, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}


#endif
2 changes: 1 addition & 1 deletion xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ namespace Xbyak {

enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x6720 /* 0xABCD = A.BC(.D) */
VERSION = 0x6730 /* 0xABCD = A.BC(.D) */
};

#ifndef MIE_INTEGER_TYPE_DEFINED
Expand Down
16 changes: 15 additions & 1 deletion xbyak/xbyak_mnemonic.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const char *getVersionString() const { return "6.72"; }
const char *getVersionString() const { return "6.73"; }
void aadd(const Address& addr, const Reg32e &reg) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
void aand(const Address& addr, const Reg32e &reg) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
Expand Down Expand Up @@ -1219,6 +1219,12 @@ void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD2); }
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD3); }
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD2); }
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD3); }
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD2); }
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD3); }
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
Expand Down Expand Up @@ -1348,8 +1354,16 @@ void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { op
void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); }
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); }
void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); }
void vsha512msg1(const Ymm& y, const Xmm& x) { if (!(y.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y, 0, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCC); }
void vsha512msg2(const Ymm& y1, const Ymm& y2) { if (!(y1.isYMM() && y2.isYMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, 0, y2, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCD); }
void vsha512rnds2(const Ymm& y1, const Ymm& y2, const Xmm& x) { if (!(y1.isYMM() && y2.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, &y2, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCB); }
void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); }
void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); }
void vsm3msg1(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
void vsm3msg2(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
void vsm3rnds2(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0xDE, imm); }
void vsm4key4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
void vsm4rnds4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); }
void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); }
void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); }
Expand Down
8 changes: 8 additions & 0 deletions xbyak/xbyak_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,10 @@ class Cpu {
XBYAK_DEFINE_TYPE(75, tSERIALIZE);
XBYAK_DEFINE_TYPE(76, tUINTR);
XBYAK_DEFINE_TYPE(77, tXSAVE);
XBYAK_DEFINE_TYPE(78, tSHA512);
XBYAK_DEFINE_TYPE(79, tSM3);
XBYAK_DEFINE_TYPE(80, tSM4);
XBYAK_DEFINE_TYPE(81, tAVX_VNNI_INT16);

#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
Expand Down Expand Up @@ -608,6 +612,9 @@ class Cpu {
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
if (maxNumSubLeaves >= 1) {
getCpuidEx(7, 1, data);
if (EAX & (1U << 0)) type_ |= tSHA512;
if (EAX & (1U << 1)) type_ |= tSM3;
if (EAX & (1U << 2)) type_ |= tSM4;
if (EAX & (1U << 3)) type_ |= tRAO_INT;
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
if (type_ & tAVX512F) {
Expand All @@ -618,6 +625,7 @@ class Cpu {
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
if (EDX & (1U << 10)) type_ |= tAVX_VNNI_INT16;
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
}
}
Expand Down

0 comments on commit 76d7477

Please sign in to comment.