Skip to content

Commit

Permalink
define ALIGN_SIZE backup
Browse files Browse the repository at this point in the history
in case not even using sse2, which would be the case on a 32-bit arch
  • Loading branch information
MikaelSlevinsky committed Sep 13, 2019
1 parent ef13b56 commit 8db4aab
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 34 deletions.
12 changes: 8 additions & 4 deletions src/ftinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static inline double floatmin(void) {return M_FLT_MIN;}
static inline long double floatminl(void) {return M_FLT_MINl;}
static inline quadruple floatminq(void) {return M_FLT_MINq;}

#if !(__APPLE__)
#ifndef __APPLE__
static inline float __cospif(float x) {return cosf(M_PIf*x);}
static inline double __cospi(double x) {return cos(M_PI*x);}
static inline float __sinpif(float x) {return sinf(M_PIf*x);}
Expand Down Expand Up @@ -94,15 +94,15 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}

#define MAX(a,b) ((a) > (b) ? a : b)
#define MIN(a,b) ((a) < (b) ? a : b)
#if __AVX512F__
#ifdef __AVX512F__
#define VECTOR_SIZE_8 8
#define ALIGN_SIZE VECTOR_SIZE_8
typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
#define vall8(x) ((double8) _mm512_set1_pd(x))
#define vload8(v) ((double8) _mm512_load_pd(v))
#define vstore8(u, v) (_mm512_store_pd(u, v))
#endif
#if __AVX__
#ifdef __AVX__
#define VECTOR_SIZE_4 4
#ifndef ALIGN_SIZE
#define ALIGN_SIZE VECTOR_SIZE_4
Expand All @@ -112,7 +112,7 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}
#define vload4(v) ((double4) _mm256_load_pd(v))
#define vstore4(u, v) (_mm256_store_pd(u, v))
#endif
#if __SSE2__
#ifdef __SSE2__
#define VECTOR_SIZE_2 2
#ifndef ALIGN_SIZE
#define ALIGN_SIZE VECTOR_SIZE_2
Expand All @@ -123,6 +123,10 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}
#define vstore2(u, v) (_mm_store_pd(u, v))
#endif

#ifndef ALIGN_SIZE
#define ALIGN_SIZE 1
#endif

#define VALIGN(N) ((N + ALIGN_SIZE - 1) & -ALIGN_SIZE)
#define VMALLOC(s) _mm_malloc(s, ALIGN_SIZE*8)
#define VFREE(s) _mm_free(s)
Expand Down
6 changes: 3 additions & 3 deletions src/rotations.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
Y[0] = y;
}

#if __SSE2__
#ifdef __SSE2__
static inline void apply_givens_SSE(const double S, const double C, double * X, double * Y) {
double2 x = vload2(X);
double2 y = vload2(Y);
Expand Down Expand Up @@ -54,7 +54,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
#endif


#if __AVX__
#ifdef __AVX__
static inline void apply_givens_AVX(const double S, const double C, double * X, double * Y) {
double4 x = vload4(X);
double4 y = vload4(Y);
Expand Down Expand Up @@ -82,7 +82,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
}
#endif

#if __AVX512F__
#ifdef __AVX512F__
static inline void apply_givens_AVX512(const double S, const double C, double * X, double * Y) {
double8 x = vload8(X);
double8 y = vload8(Y);
Expand Down
45 changes: 18 additions & 27 deletions test/test_assembly.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,6 @@

#include <immintrin.h>

#if __SSE2__
#define VECTOR_SIZE_2 2
typedef double double2 __attribute__ ((vector_size (VECTOR_SIZE_2*8)));
#define vall2(x) ((double2) _mm_set1_pd(x))
#define vload2(v) ((double2) _mm_load_pd(v))
#define vstore2(u, v) (_mm_store_pd(u, v))
#endif
#if __AVX__
#define VECTOR_SIZE_4 4
typedef double double4 __attribute__ ((vector_size (VECTOR_SIZE_4*8)));
#define vall4(x) ((double4) _mm256_set1_pd(x))
#define vload4(v) ((double4) _mm256_load_pd(v))
#define vstore4(u, v) (_mm256_store_pd(u, v))
#endif
#if __AVX512F__
#define VECTOR_SIZE_8 8
typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
#define vall8(x) ((double8) _mm512_set1_pd(x))
#define vload8(v) ((double8) _mm512_load_pd(v))
#define vstore8(u, v) (_mm512_store_pd(u, v))
#endif

void swap(double * A, double * B, const int N) {
double tmp;
for (int i = 0; i < N; i++) {
Expand All @@ -41,7 +19,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
Y[0] = y;
}

#if __SSE2__
#ifdef __SSE2__
#define VECTOR_SIZE_2 2
typedef double double2 __attribute__ ((vector_size (VECTOR_SIZE_2*8)));
#define vall2(x) ((double2) _mm_set1_pd(x))
#define vload2(v) ((double2) _mm_load_pd(v))
#define vstore2(u, v) (_mm_store_pd(u, v))
void apply_givens_SSE(const double S, const double C, double * X, double * Y) {
double2 x = vload2(X);
double2 y = vload2(Y);
Expand All @@ -50,8 +33,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
vstore2(Y, C*y - S*x);
}
#endif

#if __AVX__
#ifdef __AVX__
#define VECTOR_SIZE_4 4
typedef double double4 __attribute__ ((vector_size (VECTOR_SIZE_4*8)));
#define vall4(x) ((double4) _mm256_set1_pd(x))
#define vload4(v) ((double4) _mm256_load_pd(v))
#define vstore4(u, v) (_mm256_store_pd(u, v))
void apply_givens_AVX(const double S, const double C, double * X, double * Y) {
double4 x = vload4(X);
double4 y = vload4(Y);
Expand All @@ -60,8 +47,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
vstore4(Y, C*y - S*x);
}
#endif

#if __AVX512F__
#ifdef __AVX512F__
#define VECTOR_SIZE_8 8
typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
#define vall8(x) ((double8) _mm512_set1_pd(x))
#define vload8(v) ((double8) _mm512_load_pd(v))
#define vstore8(u, v) (_mm512_store_pd(u, v))
void apply_givens_AVX512(const double S, const double C, double * X, double * Y) {
double8 x = vload8(X);
double8 y = vload8(Y);
Expand Down

0 comments on commit 8db4aab

Please sign in to comment.