Skip to content

Commit

Permalink
setDefaultEncoding has changed.
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 15, 2024
1 parent 0c2f7fc commit ae76be3
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 35 deletions.
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
Expand Down
10 changes: 5 additions & 5 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc.
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
The default behavior depends on the order in which the instruction was introduced (whether VEX or EVEX came first),
The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first),
and can be specified using setDefaultEncoding.

```
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above
vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI)
setDefaultEncoding(VexEncoding); // default encoding is VEX
vpdpbusd(xm0, xm1, xm2); // VEX
Expand All @@ -128,7 +128,7 @@ setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
```

- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)`
- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)`
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.

param|vnniEnc|avx10Enc
Expand All @@ -137,7 +137,7 @@ VexEncoding|AVX-VNNI|-
EvexEncoding|AVX512-VNNI|-
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
AVX10v2Encoding|-|AVX10.2
default|EvexEncoding|VexEncoding
default|EvexEncoding|PreAVXv2Encoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw

### Remark
Expand Down
5 changes: 3 additions & 2 deletions readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
xbyak.hをインクルードするだけですぐ利用することができます。
C++の枠組み内で閉じているため、外部アセンブラは不要です。
32bit/64bit両対応です。
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/AVX-512/APX/AVX10.2

・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
Expand Down Expand Up @@ -46,7 +46,7 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎新機能

APX/AVX10対応
APX/AVX10.2対応

例外なしモード追加
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
Expand Down Expand Up @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴

2024/10/15 ver 7.11 AVX10.2完全サポート
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正
2024/10/08 ver 7.09 AVX10.2のYMMレジスタの埋め込み丸め対応
Expand Down
4 changes: 2 additions & 2 deletions test/avx10_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,10 @@ CYBOZU_TEST_AUTO(vmpsadbw)
struct Code : Xbyak::CodeGenerator {
Code()
{
setDefaultEncoding();
setDefaultEncodingAVX10();
vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
setDefaultEncoding(VexEncoding, EvexEncoding);
setDefaultEncodingAVX10(AVX10v2Encoding);
vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
}
Expand Down
2 changes: 1 addition & 1 deletion test/test_by_xed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
setDefaultEncoding(EvexEncoding, AVX10v2Encoding);
setDefaultEncodingAVX10(AVX10v2Encoding);
#include "tmp.cpp"
}
};
Expand Down
59 changes: 34 additions & 25 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ enum {
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
ERR_BAD_ENCODING_MODE,
ERR_INTERNAL // Put it at last.
};

Expand Down Expand Up @@ -290,6 +291,7 @@ inline const char *ConvertErrorToString(int err)
"can't use rex2",
"invalid dfv",
"invalid reg index",
"bad encoding mode",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
Expand Down Expand Up @@ -1674,7 +1676,7 @@ typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding,
PreAVX10v2Encoding = EvexEncoding,
PreAVX10v2Encoding,
AVX10v2Encoding
} PreferredEncoding;

Expand Down Expand Up @@ -2663,25 +2665,24 @@ class CodeGenerator : public CodeArray {
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
}
PreferredEncoding getEncoding(PreferredEncoding encoding, int sel) const
PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
{
if (encoding == DefaultEncoding) {
encoding = defaultEncoding_[sel];
if (enc == DefaultEncoding) {
enc = defaultEncoding_[sel];
}
if (encoding == EvexEncoding) {
if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID)
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
}
return encoding;
return enc;
}
uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
bool isVex = getEncoding(encoding, sel) == VexEncoding;
return isVex ? typeVex : T_MUST_EVEX | typeEvex;
uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
enc = getEncoding(enc, sel);
return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
Expand Down Expand Up @@ -3138,8 +3139,8 @@ class CodeGenerator : public CodeArray {
#endif
, isDefaultJmpNEAR_(false)
{
// select avx512-vnni, vmpsadbw(avx)
setDefaultEncoding();
setDefaultEncodingAVX10();
labelMgr_.set(this);
}
void reset()
Expand Down Expand Up @@ -3176,11 +3177,19 @@ class CodeGenerator : public CodeArray {
#undef jnl
#endif

// set default encoding
// vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding)
// avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding)
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
// set default encoding of VNNI
// EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
{
if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[0] = enc;
}
// default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
{
if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[1] = enc;
}

void bswap(const Reg32e& r)
{
Expand All @@ -3195,7 +3204,7 @@ class CodeGenerator : public CodeArray {
db(0xC8 + (idx & 7));
}
// AVX10 zero-extending for vmovd, vmovw
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit)
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
Expand All @@ -3210,31 +3219,31 @@ class CodeGenerator : public CodeArray {
rev = !rev;
}
int sel = -1;
if (getEncoding(encoding, 1) == AVX10v2Encoding) {
if (getEncoding(enc, 1) == AVX10v2Encoding) {
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
} else {
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
}
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
}
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32);
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
}
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64);
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false
Expand Down

0 comments on commit ae76be3

Please sign in to comment.