Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 31, 2024
2 parents 97b6611 + e8ba033 commit 2c02730
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 62 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.20.1)
project(xbyak LANGUAGES CXX VERSION 7.21)

file(GLOB headers xbyak/*.h)

Expand Down
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2024/Oct/31 ver 7.21 Enhance XMM register validation in SSE instructions
* 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
Expand Down
41 changes: 19 additions & 22 deletions gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
const MmxTbl6 *p = &mmxTbl6[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, T_0F, %s); }\n", p->name, p->code, p->pref);
printf("void %s(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
printf("void %s(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
}
}
{
Expand Down Expand Up @@ -484,7 +484,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
printf("void %s(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
{
Expand Down Expand Up @@ -1095,7 +1095,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
// cast xmm register to 16bit register to put 0x66
printf("void %s(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x%02X); }\n", p->name, p->code);
printf("void %s(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x%02X); }\n", p->name, p->code);
}
}
{
Expand Down Expand Up @@ -1165,24 +1165,22 @@ void put()
puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }");
puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }");

puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }");
puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }");
puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }");
puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); }");
puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); }");
puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); }");
puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
puts("void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }");
puts("void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }");
puts("void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }");
puts("void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); }");
puts("void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); }");
puts("void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); }");
puts("void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }");
puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }");

puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }");
puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }");
puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }");
puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }");
puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }");
puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); }");

puts("void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x7E); }");
puts("void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x6E); }");
puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); }");
puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); }");
puts("void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F); }");
puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }");
Expand Down Expand Up @@ -1443,7 +1441,6 @@ void put()
printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
}
puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
puts("void sha1msg12(const Xmm& x, const Operand& op) { opROO(Reg(), op, x, T_MUST_EVEX, 0xD9); }");
}
// (m, x), (m, y)
{
Expand Down Expand Up @@ -1952,8 +1949,8 @@ void put64()

putMemOp("cmpxchg16b", "T_0F", 1, 0xC7, 64);
putMemOp("fxrstor64", "T_0F", 1, 0xAE, 64);
puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }");
puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }");
puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }");
puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }");
puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }");
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.20.1',
version: '7.21',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 7.20.1 [![Badge Build]][Build Status]
# Xbyak 7.21 [![Badge Build]][Build Status]

*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*

Expand Down
3 changes: 2 additions & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.20.1
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.21

-----------------------------------------------------------------------------
◎概要
Expand Down Expand Up @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴

2024/10/31 ver 7.21 SSE命令のXMMレジスタのチェックを厳密化
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
2024/10/15 ver 7.11 AVX10.2完全サポート
Expand Down
36 changes: 36 additions & 0 deletions test/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,42 @@ CYBOZU_TEST_AUTO(badSSE)
CYBOZU_TEST_EXCEPTION(movapd(xm16, xm1), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(movhpd(xm16, ptr[eax]), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(pextrb(eax, xm16, 1), Xbyak::Error);

CYBOZU_TEST_EXCEPTION(lddqu(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(maskmovdqu(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(maskmovq(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movapd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movaps(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movd(eax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(xm16, eax), Error);
CYBOZU_TEST_EXCEPTION(movdq2q(mm1, xm16), Error);
CYBOZU_TEST_EXCEPTION(movdqa(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movdqu(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movhlps(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movlhps(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movmskpd(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movmskps(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movntdq(ptr[rax], xmm16), Error);
CYBOZU_TEST_EXCEPTION(movntdqa(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movntpd(ptr[rax], xmm16), Error);
CYBOZU_TEST_EXCEPTION(movntps(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movntq(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movq(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(xm16, rax), Error);
CYBOZU_TEST_EXCEPTION(movq2dq(xm16, mm1), Error);
CYBOZU_TEST_EXCEPTION(movsd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movss(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movupd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movups(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(extractps(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrb(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrd(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrw(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pmovmskb(eax, xm16), Error);
}
} code;
}
Expand Down
8 changes: 4 additions & 4 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ namespace Xbyak {

enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7201 /* 0xABCD = A.BC(.D) */
VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
};

#ifndef MIE_INTEGER_TYPE_DEFINED
Expand Down Expand Up @@ -1734,10 +1734,10 @@ class CodeGenerator : public CodeArray {
{
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
}
static inline bool isValidSSE(const Operand& op1)
static inline bool isValidSSE(const Operand& op)
{
// SSE instructions do not support XMM16 - XMM31
return !(op1.isXMM() && op1.getIdx() >= 16);
return !(op.isXMM() && op.getIdx() >= 16);
}
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
Expand Down Expand Up @@ -2172,7 +2172,7 @@ class CodeGenerator : public CodeArray {
}
}
}
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE)
{
if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
Expand Down
Loading

0 comments on commit 2c02730

Please sign in to comment.