diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d520a34..b70b3db9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,11 +19,14 @@ jobs: steps: - uses: actions/checkout@v4 - run: apt -y update - - run: apt -y install g++-multilib libboost-dev make nasm yasm wget xz-utils python3 + - run: apt -y install g++-multilib libboost-dev make nasm yasm wget #xz-utils python3 - run: make test - run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION" - run: | cd test - wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz - tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz - env XED=sde-external-9.44.0-2024-08-22-lin/xed64 make xed_test + #wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz + #tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz + wget https://github.com/herumi/xed-bin/raw/refs/heads/main/xed + chmod +x ./xed + ./xed -version + env XED=./xed make xed_test diff --git a/test/Makefile b/test/Makefile index a61895fd..cf5c7163 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,8 +60,7 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt -TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt xed_test: @set -e; \ for target in $(addprefix avx10/, $(TEST_FILES)); do \ diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt index a387c610..c544e02c 100644 --- a/test/avx10/bf16.txt +++ b/test/avx10/bf16.txt @@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13); vcomsbf16(xm2, xm3); vcomsbf16(xm2, ptr[rax+128]); -//vgetexppbf16(xm1|k3, xmm2); -//vgetexppbf16(xm1|k3, ptr[rax+128]); -//vgetexppbf16(xm1|k3, ptr_b[rax+128]); +vgetexppbf16(xm1|k3, xmm2); +vgetexppbf16(xm1|k3, ptr[rax+128]); +vgetexppbf16(xm1|k3, ptr_b[rax+128]); -//vgetexppbf16(ym1|k3, ymm2); -//vgetexppbf16(ym1|k3, ptr[rax+128]); -//vgetexppbf16(ym1|k3, ptr_b[rax+128]); +vgetexppbf16(ym1|k3, ymm2); +vgetexppbf16(ym1|k3, ptr[rax+128]); +vgetexppbf16(ym1|k3, ptr_b[rax+128]); -//vgetexppbf16(zm1|k3, zmm2); -//vgetexppbf16(zm1|k3, ptr[rax+128]); -//vgetexppbf16(zm1|k3, ptr_b[rax+128]); +vgetexppbf16(zm1|k3, zmm2); +vgetexppbf16(zm1|k3, ptr[rax+128]); +vgetexppbf16(zm1|k3, ptr_b[rax+128]); vgetmantpbf16(xm1|k3, xmm2, 3); vgetmantpbf16(xm1|k3, ptr[rax+128], 5);