define ALIGN_SIZE backup

in case not even using sse2, which would be the case on a 32-bit arch
MikaelSlevinsky · Sep 13, 2019 · 8db4aab · 8db4aab
1 parent ef13b56
commit 8db4aab
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 34 deletions.
diff --git a/src/ftinternal.h b/src/ftinternal.h
@@ -60,7 +60,7 @@ static inline double floatmin(void) {return M_FLT_MIN;}
 static inline long double floatminl(void) {return M_FLT_MINl;}
 static inline quadruple floatminq(void) {return M_FLT_MINq;}
 
-#if !(__APPLE__)
+#ifndef __APPLE__
     static inline float __cospif(float x) {return cosf(M_PIf*x);}
     static inline double __cospi(double x) {return cos(M_PI*x);}
     static inline float __sinpif(float x) {return sinf(M_PIf*x);}
@@ -94,15 +94,15 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}
 
 #define MAX(a,b) ((a) > (b) ? a : b)
 #define MIN(a,b) ((a) < (b) ? a : b)
-#if __AVX512F__
+#ifdef __AVX512F__
     #define VECTOR_SIZE_8 8
     #define ALIGN_SIZE VECTOR_SIZE_8
     typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
     #define vall8(x) ((double8) _mm512_set1_pd(x))
     #define vload8(v) ((double8) _mm512_load_pd(v))
     #define vstore8(u, v) (_mm512_store_pd(u, v))
 #endif
-#if __AVX__
+#ifdef __AVX__
     #define VECTOR_SIZE_4 4
     #ifndef ALIGN_SIZE
         #define ALIGN_SIZE VECTOR_SIZE_4
@@ -112,7 +112,7 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}
     #define vload4(v) ((double4) _mm256_load_pd(v))
     #define vstore4(u, v) (_mm256_store_pd(u, v))
 #endif
-#if __SSE2__
+#ifdef __SSE2__
     #define VECTOR_SIZE_2 2
     #ifndef ALIGN_SIZE
         #define ALIGN_SIZE VECTOR_SIZE_2
@@ -123,6 +123,10 @@ static inline quadruple __tanpiq(quadruple x) {return tanq(M_PIq*x);}
     #define vstore2(u, v) (_mm_store_pd(u, v))
 #endif
 
+#ifndef ALIGN_SIZE
+    #define ALIGN_SIZE 1
+#endif
+
 #define VALIGN(N) ((N + ALIGN_SIZE - 1) & -ALIGN_SIZE)
 #define VMALLOC(s) _mm_malloc(s, ALIGN_SIZE*8)
 #define VFREE(s) _mm_free(s)

diff --git a/src/rotations.c b/src/rotations.c
@@ -25,7 +25,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
     Y[0] = y;
 }
 
-#if __SSE2__
+#ifdef __SSE2__
     static inline void apply_givens_SSE(const double S, const double C, double * X, double * Y) {
         double2 x = vload2(X);
         double2 y = vload2(Y);
@@ -54,7 +54,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
 #endif
 
 
-#if __AVX__
+#ifdef __AVX__
     static inline void apply_givens_AVX(const double S, const double C, double * X, double * Y) {
         double4 x = vload4(X);
         double4 y = vload4(Y);
@@ -82,7 +82,7 @@ static inline void apply_givens_t(const double S, const double C, double * X, do
     }
 #endif
 
-#if __AVX512F__
+#ifdef __AVX512F__
     static inline void apply_givens_AVX512(const double S, const double C, double * X, double * Y) {
         double8 x = vload8(X);
         double8 y = vload8(Y);

diff --git a/test/test_assembly.c b/test/test_assembly.c
@@ -2,28 +2,6 @@
 
 #include <immintrin.h>
 
-#if __SSE2__
-    #define VECTOR_SIZE_2 2
-    typedef double double2 __attribute__ ((vector_size (VECTOR_SIZE_2*8)));
-    #define vall2(x) ((double2) _mm_set1_pd(x))
-    #define vload2(v) ((double2) _mm_load_pd(v))
-    #define vstore2(u, v) (_mm_store_pd(u, v))
-#endif
-#if __AVX__
-    #define VECTOR_SIZE_4 4
-    typedef double double4 __attribute__ ((vector_size (VECTOR_SIZE_4*8)));
-    #define vall4(x) ((double4) _mm256_set1_pd(x))
-    #define vload4(v) ((double4) _mm256_load_pd(v))
-    #define vstore4(u, v) (_mm256_store_pd(u, v))
-#endif
-#if __AVX512F__
-    #define VECTOR_SIZE_8 8
-    typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
-    #define vall8(x) ((double8) _mm512_set1_pd(x))
-    #define vload8(v) ((double8) _mm512_load_pd(v))
-    #define vstore8(u, v) (_mm512_store_pd(u, v))
-#endif
-
 void swap(double * A, double * B, const int N) {
     double tmp;
     for (int i = 0; i < N; i++) {
@@ -41,7 +19,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
     Y[0] = y;
 }
 
-#if __SSE2__
+#ifdef __SSE2__
+    #define VECTOR_SIZE_2 2
+    typedef double double2 __attribute__ ((vector_size (VECTOR_SIZE_2*8)));
+    #define vall2(x) ((double2) _mm_set1_pd(x))
+    #define vload2(v) ((double2) _mm_load_pd(v))
+    #define vstore2(u, v) (_mm_store_pd(u, v))
     void apply_givens_SSE(const double S, const double C, double * X, double * Y) {
         double2 x = vload2(X);
         double2 y = vload2(Y);
@@ -50,8 +33,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
         vstore2(Y, C*y - S*x);
     }
 #endif
-
-#if __AVX__
+#ifdef __AVX__
+    #define VECTOR_SIZE_4 4
+    typedef double double4 __attribute__ ((vector_size (VECTOR_SIZE_4*8)));
+    #define vall4(x) ((double4) _mm256_set1_pd(x))
+    #define vload4(v) ((double4) _mm256_load_pd(v))
+    #define vstore4(u, v) (_mm256_store_pd(u, v))
     void apply_givens_AVX(const double S, const double C, double * X, double * Y) {
         double4 x = vload4(X);
         double4 y = vload4(Y);
@@ -60,8 +47,12 @@ void apply_givens(const double S, const double C, double * X, double * Y) {
         vstore4(Y, C*y - S*x);
     }
 #endif
-
-#if __AVX512F__
+#ifdef __AVX512F__
+    #define VECTOR_SIZE_8 8
+    typedef double double8 __attribute__ ((vector_size (VECTOR_SIZE_8*8)));
+    #define vall8(x) ((double8) _mm512_set1_pd(x))
+    #define vload8(v) ((double8) _mm512_load_pd(v))
+    #define vstore8(u, v) (_mm512_store_pd(u, v))
     void apply_givens_AVX512(const double S, const double C, double * X, double * Y) {
         double8 x = vload8(X);
         double8 y = vload8(Y);