diff --git a/examples/asm_examples/ex_aligned_data_in_code/Makefile b/examples/asm_examples/ex_aligned_data_in_code/Makefile index c3181abd..8abd91ad 100644 --- a/examples/asm_examples/ex_aligned_data_in_code/Makefile +++ b/examples/asm_examples/ex_aligned_data_in_code/Makefile @@ -1,10 +1,16 @@ all: ex_original.s gcc ex_original.s -o ex + gcc ex_original2.s -o ex2 @./ex > out.txt + @./ex2 > out2.txt clean: rm -f ex out.txt rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb + rm -f ex2 out2.txt + rm -fr ex2.unstripped ex2.s check: ./ex > /tmp/res.txt @ diff out.txt /tmp/res.txt && echo TEST OK + ./ex2 > /tmp/res2.txt + @ diff out2.txt /tmp/res2.txt && echo TEST OK diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s index 368f2e94..65a2ddfa 100644 --- a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s +++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s @@ -14,7 +14,7 @@ main: # Load data into XMM register using movdqa: `data128.1` needs to be aligned. movdqa data128.1(%rip), %xmm0 - # A pair of instructions from an access to `data128.2`, which needs to + # A pair of instructions forms an access to `data128.2`, which needs to # be aligned. lea data128.2(%rip), %rax movdqa 0(%rax), %xmm1 @@ -22,11 +22,8 @@ main: # Load data into YMM register using movdqa: `data256` needs to be aligned. vmovapd data256(%rip), %ymm0 - # Load data into ZMM register using movdqa: `data512` needs to be aligned. - vmovaps data512(%rip), %zmm0 - - # Load data into ZMM register using vmovups: `data512u` does not need to be aligned. - vmovups data512u(%rip), %zmm1 + # Load data into YMM register using vmovups: `data256u` does not need to be aligned. + vmovups data256u(%rip), %ymm1 call print_message2 @@ -58,17 +55,9 @@ data128.2: data256: .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -.align 64 -data512: - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 .zero 3 -data512u: - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +data256u: .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original2.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original2.s new file mode 100644 index 00000000..c103b098 --- /dev/null +++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original2.s @@ -0,0 +1,63 @@ +# This example is to demonostrate that data-in-code is properly aligned +# when it is referenced by instructions that require explicitly aligned memory. +# If not properly aligned, it may cause a segmentation fault due to alignment +# requirement violation. +# See Table 15-6 in https://cdrdv2.intel.com/v1/dl/getContent/671200. +# +# This example tests avx512 instructions. + + .section .text + +.globl main +.type main, @function +main: + call print_message1 + + # Load data into ZMM register using movdqa: `data512` needs to be aligned. + vmovaps data512(%rip), %zmm0 + + # Load data into ZMM register using vmovups: `data512u` does not need to be aligned. + vmovups data512u(%rip), %zmm1 + + call print_message2 + + xorq %rax, %rax + + ret + +.type print_message1, @function +print_message1: + lea message1(%rip), %rdi + call printf + ret + +.align 16 +.type print_message2, @function +print_message2: + lea message2(%rip), %rdi + call printf + ret + .zero 3 + +.align 64 +data512: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + + .zero 3 +data512u: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + + .section .data + +message1: + .ascii "Performing SIMD operations...\n" + .byte 0 +message2: + .ascii "SIMD operations completed.\n" + .byte 0 diff --git a/tests/misc_test.py b/tests/misc_test.py index a04e7562..ff1f1517 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -152,6 +152,15 @@ def test_lock_cmpxchg(self): self.subtest_lock_cmpxchg(example) +def check_avx512f_support(): + if platform.system() == "Linux": + output = subprocess.check_output(["lscpu"]) + output = output.decode("utf-8") + if "avx512f" in output: + return True + return False + + class AuxDataTests(unittest.TestCase): @unittest.skipUnless( platform.system() == "Linux", "This test is linux only." @@ -450,15 +459,51 @@ def test_aligned_data_in_code(self): ir = disassemble(Path(binary)).ir() m = ir.modules[0] + main_sym = next(m.symbols_named("main")) + main_block = main_sym.referent + alignments = m.aux_data["alignment"].data.items() - alignment_list = [alignment for uuid, alignment in alignments] + alignment_list = [ + alignment + for block, alignment in alignments + if block.address > main_block.address + ] - # alignment=16: `data128.1`, `data128.2`, and `main` - self.assertEqual(alignment_list.count(16), 3) + # alignment=16: `data128.1`, `data128.2` + self.assertEqual(alignment_list.count(16), 2) # alignment=32: `data256` self.assertEqual(alignment_list.count(32), 1) - # alignment=64: `data512` and `_start` - self.assertEqual(alignment_list.count(64), 2) + + @unittest.skipUnless( + platform.system() == "Linux", "This test is linux only." + ) + @unittest.skipUnless( + check_avx512f_support(), "This test requires avx512f." + ) + def test_aligned_data_in_code512(self): + """ + Test that alignment directives are correctly generated for + data_in_code referenced by instructions that require 64-byte + alignment + """ + binary = "ex2" + with cd(ex_asm_dir / "ex_aligned_data_in_code"): + self.assertTrue(compile("gcc", "g++", "-O0", [])) + ir = disassemble(Path(binary)).ir() + m = ir.modules[0] + + main_sym = next(m.symbols_named("main")) + main_block = main_sym.referent + + alignments = m.aux_data["alignment"].data.items() + alignment_list = [ + alignment + for block, alignment in alignments + if block.address > main_block.address + ] + + # alignment=64: `data512` + self.assertEqual(alignment_list.count(64), 1) class RawGtirbTests(unittest.TestCase):