-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
aarch64: Optimize vector rotates as vector permutes where possible
Some vector rotate operations can be implemented in a single instruction rather than using the fallback SHL+USRA sequence. In particular, when the rotate amount is half the bitwidth of the element we can use a REV64,REV32,REV16 instruction. More generally, rotates by a byte amount can be implented using vector permutes. This patch adds such a generic routine in expmed.cc called expand_rotate_as_vec_perm that calculates the required permute indices and uses the expand_vec_perm_const interface. On aarch64 this ends up generating the single-instruction sequences above where possible and can use LDR+TBL sequences too, which are a good choice. With help from Richard, the routine should be VLA-safe. However, the only use of expand_rotate_as_vec_perm introduced in this patch is in aarch64-specific code that for now only handles fixed-width modes. A runtime aarch64 test is added to ensure the permute indices are not messed up. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <[email protected]> gcc/ * expmed.h (expand_rotate_as_vec_perm): Declare. * expmed.cc (expand_rotate_as_vec_perm): Define. * config/aarch64/aarch64-protos.h (aarch64_emit_opt_vec_rotate): Declare prototype. * config/aarch64/aarch64.cc (aarch64_emit_opt_vec_rotate): Implement. * config/aarch64/aarch64-simd.md (*aarch64_simd_rotate_imm<mode>): Call the above. gcc/testsuite/ * gcc.target/aarch64/vec-rot-exec.c: New test. * gcc.target/aarch64/simd/pr117048_2.c: New test.
- Loading branch information
Showing
7 changed files
with
232 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2 -mlittle-endian" } */ | ||
/* { dg-final { check-function-bodies "**" "" "" } } */ | ||
|
||
typedef char __attribute__ ((vector_size (16))) v16qi; | ||
typedef unsigned short __attribute__ ((vector_size (16))) v8hi; | ||
typedef unsigned int __attribute__ ((vector_size (16))) v4si; | ||
typedef unsigned long long __attribute__ ((vector_size (16))) v2di; | ||
typedef unsigned short __attribute__ ((vector_size (8))) v4hi; | ||
typedef unsigned int __attribute__ ((vector_size (8))) v2si; | ||
|
||
/* | ||
** G1: | ||
** rev64 v0\.4s, v0\.4s | ||
** ret | ||
*/ | ||
v2di | ||
G1 (v2di r) | ||
{ | ||
return (r >> 32) | (r << 32); | ||
} | ||
|
||
/* | ||
** G2: | ||
** rev32 v0\.8h, v0\.8h | ||
** ret | ||
*/ | ||
v4si | ||
G2 (v4si r) | ||
{ | ||
return (r >> 16) | (r << 16); | ||
} | ||
|
||
/* | ||
** G3: | ||
** rev16 v0\.16b, v0\.16b | ||
** ret | ||
*/ | ||
v8hi | ||
G3 (v8hi r) | ||
{ | ||
return (r >> 8) | (r << 8); | ||
} | ||
|
||
/* | ||
** G4: | ||
** rev32 v0\.4h, v0\.4h | ||
** ret | ||
*/ | ||
v2si | ||
G4 (v2si r) | ||
{ | ||
return (r >> 16) | (r << 16); | ||
} | ||
|
||
/* | ||
** G5: | ||
** rev16 v0\.8b, v0\.8b | ||
** ret | ||
*/ | ||
v4hi | ||
G5 (v4hi r) | ||
{ | ||
return (r >> 8) | (r << 8); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* { dg-do run } */ | ||
/* { dg-options "-O2" } */ | ||
|
||
typedef char __attribute__ ((vector_size (16))) v16qi; | ||
typedef unsigned short __attribute__ ((vector_size (16))) v8hi; | ||
typedef unsigned int __attribute__ ((vector_size (16))) v4si; | ||
typedef unsigned long long __attribute__ ((vector_size (16))) v2di; | ||
typedef char __attribute__ ((vector_size (8))) v8qi; | ||
typedef unsigned short __attribute__ ((vector_size (8))) v4hi; | ||
typedef unsigned int __attribute__ ((vector_size (8))) v2si; | ||
#define VEC_ELTS(X) (sizeof (X) / (sizeof (X[0]))) | ||
|
||
static const char __attribute__ ((aligned (16))) *str = "abcdefghijklmnopqrstuvwxyz"; | ||
|
||
unsigned long long | ||
__attribute__((noipa,noinline)) | ||
rot_64_one (unsigned long long x, unsigned amt) | ||
{ | ||
return (x << amt) | (x >> (64 - amt)); | ||
} | ||
unsigned int | ||
__attribute__((noipa,noinline)) | ||
rot_32_one (unsigned int x, unsigned amt) | ||
{ | ||
return (x << amt) | (x >> (32 - amt)); | ||
} | ||
|
||
unsigned short | ||
__attribute__((noipa,noinline)) | ||
rot_16_one (unsigned short x, unsigned short amt) | ||
{ | ||
return (x << amt) | (x >> (16 - amt)); | ||
} | ||
|
||
|
||
#define ROTFUNC(M,S,A) \ | ||
M \ | ||
__attribute__((noipa,noinline)) \ | ||
rot_##M##_##S##_##A (M x) \ | ||
{ \ | ||
return (x << A) | (x >> (S - A)); \ | ||
} \ | ||
\ | ||
void \ | ||
test_rot_##M##_##S##_##A (void) \ | ||
{ \ | ||
M vec = *(M *)str; \ | ||
M res = rot_##M##_##S##_##A (vec); \ | ||
for (__SIZE_TYPE__ i = 0; i < VEC_ELTS (vec); i++) \ | ||
if (res[i] != rot_##S##_one (vec[i], A)) \ | ||
__builtin_abort (); \ | ||
} | ||
|
||
ROTFUNC (v2di, 64, 56) | ||
ROTFUNC (v2di, 64, 48) | ||
ROTFUNC (v2di, 64, 40) | ||
ROTFUNC (v2di, 64, 32) | ||
ROTFUNC (v2di, 64, 24) | ||
ROTFUNC (v2di, 64, 16) | ||
ROTFUNC (v2di, 64, 8) | ||
|
||
ROTFUNC (v4si, 32, 24) | ||
ROTFUNC (v4si, 32, 16) | ||
ROTFUNC (v4si, 32, 8) | ||
|
||
ROTFUNC (v8hi, 16, 8) | ||
|
||
ROTFUNC (v2si, 32, 24) | ||
ROTFUNC (v2si, 32, 16) | ||
ROTFUNC (v2si, 32, 8) | ||
|
||
ROTFUNC (v4hi, 16, 8) | ||
|
||
#define CALL_TEST(M,S,A) test_rot_##M##_##S##_##A () | ||
|
||
int | ||
main (void) | ||
{ | ||
CALL_TEST (v2di, 64, 56); | ||
CALL_TEST (v2di, 64, 48); | ||
CALL_TEST (v2di, 64, 40); | ||
CALL_TEST (v2di, 64, 32); | ||
CALL_TEST (v2di, 64, 24); | ||
CALL_TEST (v2di, 64, 16); | ||
CALL_TEST (v2di, 64, 8); | ||
|
||
CALL_TEST (v4si, 32, 24); | ||
CALL_TEST (v4si, 32, 16); | ||
CALL_TEST (v4si, 32, 8); | ||
|
||
CALL_TEST (v8hi, 16, 8); | ||
|
||
CALL_TEST (v2si, 32, 24); | ||
CALL_TEST (v2si, 32, 16); | ||
CALL_TEST (v2si, 32, 8); | ||
|
||
CALL_TEST (v4hi, 16, 8); | ||
|
||
return 0; | ||
} | ||
|