Skip to content

Commit

Permalink
aarch64: Use canonical RTL representation for SVE2 XAR and extend it …
Browse files Browse the repository at this point in the history
…to fixed-width modes

The MD pattern for the XAR instruction in SVE2 is currently expressed with
non-canonical RTL by using a ROTATERT code with a constant rotate amount.
Fix it by using the left ROTATE code.  This necessitates splitting out the
expander separately to translate the immediate coming from the intrinsic
from a right-rotate to a left-rotate immediate.

Additionally, as the SVE2 XAR instruction is unpredicated and can handle all
element sizes from .b to .d, it is a good fit for implementing the XOR+ROTATE
operation for Advanced SIMD modes where the TARGET_SHA3 cannot be used
(that can only handle V2DImode operands).  Therefore let's extend the accepted
modes of the SVE2 patternt to include the Advanced SIMD integer modes.

This leads to some tests for the svxar* intrinsics to fail because they now
simplify to a plain EOR when the rotate amount is the width of the element.
This simplification is desirable (EOR instructions have better or equal
throughput than XAR, and they are non-destructive of their input) so the
tests are adjusted.

For V2DImode XAR operations we should prefer the Advanced SIMD version when
it is available (TARGET_SHA3) because it is non-destructive, so restrict the
SVE2 pattern accordingly.  Tests are added to confirm this.

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for mainline?

Signed-off-by: Kyrylo Tkachov <[email protected]>

gcc/

	* config/aarch64/iterators.md (SVE_ASIMD_FULL_I): New mode iterator.
	* config/aarch64/aarch64-sve2.md (@aarch64_sve2_xar<mode>):
	Use SVE_ASIMD_FULL_I modes.  Use ROTATE code for the rotate step.
	Adjust output logic.
	* config/aarch64/aarch64-sve-builtins-sve2.cc (svxar_impl): Define.
	(svxar): Use the above.

gcc/testsuite/

	* gcc.target/aarch64/xar_neon_modes.c: New test.
	* gcc.target/aarch64/xar_v2di_nonsve.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_s16.c: Scan for EOR rather than
	XAR.
	* gcc.target/aarch64/sve2/acle/asm/xar_s32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_s64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_s8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_u16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_u32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_u64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/xar_u8.c: Likewise.
  • Loading branch information
ktkachov committed Nov 4, 2024
1 parent de2bc6a commit 1e5ff11
Show file tree
Hide file tree
Showing 13 changed files with 191 additions and 59 deletions.
18 changes: 17 additions & 1 deletion gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ class svaba_impl : public function_base
}
};

class svxar_impl : public function_base
{
public:
rtx
expand (function_expander &e) const override
{
/* aarch64_sve2_xar represents this operation with a left-rotate RTX.
Convert the right-rotate amount from the intrinsic to fit this. */
machine_mode mode = e.vector_mode (0);
HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode)
- INTVAL (e.args[2]);
e.args[2] = aarch64_simd_gen_const_vector_dup (mode, rot);
return e.use_exact_insn (code_for_aarch64_sve2_xar (mode));
}
};

class svcdot_impl : public function_base
{
public:
Expand Down Expand Up @@ -795,6 +811,6 @@ FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
FUNCTION (svxar, svxar_impl,)

} /* end namespace aarch64_sve */
30 changes: 20 additions & 10 deletions gcc/config/aarch64/aarch64-sve2.md
Original file line number Diff line number Diff line change
Expand Up @@ -1266,18 +1266,28 @@
;; - XAR
;; -------------------------------------------------------------------------

;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
;; version should be preferred when available as it is non-destructive on its
;; input.
(define_insn "@aarch64_sve2_xar<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(rotatert:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
[ w , %0 , w ; * ] xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
[ ?&w , w , w ; yes ] movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
[(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
(rotate:SVE_ASIMD_FULL_I
(xor:SVE_ASIMD_FULL_I
(match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
(match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
(match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
"TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
{
operands[3]
= GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
- INTVAL (unwrap_const_vec_duplicate (operands[3])));
if (which_alternative == 0)
return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
}
[(set_attr "movprfx" "*,yes")]
)

;; -------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions gcc/config/aarch64/iterators.md
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,9 @@
;; All fully-packed SVE integer vector modes.
(define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])

;; All fully-packed SVE integer and Advanced SIMD integer modes.
(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])

;; All fully-packed SVE floating-point vector modes.
(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])

Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s16_untied, svint16_t,

/*
** xar_16_s16_tied1:
** xar z0\.h, z0\.h, z1\.h, #16
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,

/*
** xar_16_s16_tied2:
** xar z0\.h, z0\.h, z1\.h, #16
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
/*
** xar_16_s16_untied:
** (
** movprfx z0, z1
** xar z0\.h, z0\.h, z2\.h, #16
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.h, z0\.h, z1\.h, #16
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s32_untied, svint32_t,

/*
** xar_32_s32_tied1:
** xar z0\.s, z0\.s, z1\.s, #32
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,

/*
** xar_32_s32_tied2:
** xar z0\.s, z0\.s, z1\.s, #32
** (
** eor z0\.d, z0\.d, z1\.d
** |
** eor z0\.d, z1\.d, z0\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
/*
** xar_32_s32_untied:
** (
** movprfx z0, z1
** xar z0\.s, z0\.s, z2\.s, #32
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.s, z0\.s, z1\.s, #32
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s64_untied, svint64_t,

/*
** xar_64_s64_tied1:
** xar z0\.d, z0\.d, z1\.d, #64
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,

/*
** xar_64_s64_tied2:
** xar z0\.d, z0\.d, z1\.d, #64
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
/*
** xar_64_s64_untied:
** (
** movprfx z0, z1
** xar z0\.d, z0\.d, z2\.d, #64
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.d, z0\.d, z1\.d, #64
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s8_untied, svint8_t,

/*
** xar_8_s8_tied1:
** xar z0\.b, z0\.b, z1\.b, #8
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,

/*
** xar_8_s8_tied2:
** xar z0\.b, z0\.b, z1\.b, #8
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
/*
** xar_8_s8_untied:
** (
** movprfx z0, z1
** xar z0\.b, z0\.b, z2\.b, #8
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.b, z0\.b, z1\.b, #8
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u16_untied, svuint16_t,

/*
** xar_16_u16_tied1:
** xar z0\.h, z0\.h, z1\.h, #16
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,

/*
** xar_16_u16_tied2:
** xar z0\.h, z0\.h, z1\.h, #16
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
/*
** xar_16_u16_untied:
** (
** movprfx z0, z1
** xar z0\.h, z0\.h, z2\.h, #16
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.h, z0\.h, z1\.h, #16
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u32_untied, svuint32_t,

/*
** xar_32_u32_tied1:
** xar z0\.s, z0\.s, z1\.s, #32
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,

/*
** xar_32_u32_tied2:
** xar z0\.s, z0\.s, z1\.s, #32
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
/*
** xar_32_u32_untied:
** (
** movprfx z0, z1
** xar z0\.s, z0\.s, z2\.s, #32
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.s, z0\.s, z1\.s, #32
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u64_untied, svuint64_t,

/*
** xar_64_u64_tied1:
** xar z0\.d, z0\.d, z1\.d, #64
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,

/*
** xar_64_u64_tied2:
** xar z0\.d, z0\.d, z1\.d, #64
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
/*
** xar_64_u64_untied:
** (
** movprfx z0, z1
** xar z0\.d, z0\.d, z2\.d, #64
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.d, z0\.d, z1\.d, #64
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
18 changes: 12 additions & 6 deletions gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u8_untied, svuint8_t,

/*
** xar_8_u8_tied1:
** xar z0\.b, z0\.b, z1\.b, #8
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,
Expand All @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,

/*
** xar_8_u8_tied2:
** xar z0\.b, z0\.b, z1\.b, #8
** (
** eor z0\.d, z1\.d, z0\.d
** |
** eor z0\.d, z0\.d, z1\.d
** )
** ret
*/
TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
Expand All @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
/*
** xar_8_u8_untied:
** (
** movprfx z0, z1
** xar z0\.b, z0\.b, z2\.b, #8
** eor z0\.d, z1\.d, z2\.d
** |
** movprfx z0, z2
** xar z0\.b, z0\.b, z1\.b, #8
** eor z0\.d, z2\.d, z1\.d
** )
** ret
*/
Expand Down
Loading

0 comments on commit 1e5ff11

Please sign in to comment.