AccelerateHS · tmcdonell · Oct 19, 2021 · Oct 20, 2021 · Oct 26, 2021 · Oct 27, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -114,7 +114,7 @@ jobs:
       - name: Haddock
         # Behaviour of cabal haddock has changed for the worse: https://github.com/haskell/cabal/issues/8725
         run: cabal haddock --disable-documentation
-        if: matrix.mode == 'release'
+        if: matrix.os != 'windows-latest' && matrix.mode == 'release'
 
       - name: Test doctest
         run: cabal test doctest

diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,5 @@
 /docs/_build
 *.hi
 *.o
+
+hie.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,12 +9,29 @@ Policy (PVP)](https://pvp.haskell.org)
 ## [next]
 ### Added
   * Added debugging functions in module `Data.Array.Accelerate.Debug.Trace` ([#485](https://github.com/AccelerateHS/accelerate/pull/485))
+  * Support for SIMD data types in expressions. Support for storing a type `a`
+    in a SIMD vector can be added by deriving an instance for the class `SIMD`.
+    Pattern synonyms `V2`, `V3`, `V4`, `V8` and `V16` are provided to work with
+    these at both the Haskell value and embedded expression level.
+  * Instances for SIMD types in basic numeric classes (e.g. `Num` for `<4 x Float>`)
+  * Support for 128-bit integers (signed and unsigned)
+  * Support for 128-bit floating point types (build with cabal flag `float128`)
 
 ### Changed
   * Removed dependency on lens ([#493](https://github.com/AccelerateHS/accelerate/pull/493))
+  * The shape constructors (e.g. `Z` and `(:.)`) are now pattern synonyms that
+    work on both Haskell values and embedded expressions Similarly for the
+    constructors of `Maybe`, `Either`, `Bool`, and `Ordering`.
 
 ### Fixed
   * Graphviz graph generation of `-ddump-dot` and `-ddump-simpl-dot` ([#384](https://github.com/AccelerateHS/accelerate/issues/384))
+  * Bug in `Semigroup` instance for `Maybe` ([#517](https://github.com/AccelerateHS/accelerate/issues/517))
+  * Bug in `Ord` instances or tuple types
+
+### Removed
+  * Pattern synonyms `Z_`, `(::.)`, `Any_`, `All_`, which are no longer required
+  * Pattern synonyms `Just_`, `Nothing_` etc., which have been renamed to no
+    longer require the trailing underscore.
 
 ### Contributors
 

diff --git a/accelerate.cabal b/accelerate.cabal
@@ -207,6 +207,15 @@ custom-setup
     , directory         >= 1.0
     , filepath          >= 1.0
 
+flag float128
+  manual:               True
+  default:              False
+  description:
+    Enable support for 128-bit floating point numbers
+    .
+    This requires the library 'quadmath' to be installed. Note that not all
+    targets support 128-bit floating-point numbers.
+
 flag debug
   manual:               True
   default:              False
@@ -364,6 +373,11 @@ library
         , unique
         , unordered-containers          >= 0.2
         , vector                        >= 0.10
+        , wide-word                     >= 0.1
+
+  if impl(ghc < 9.0)
+    build-depends:
+          integer-gmp
 
   exposed-modules:
         -- The core language and reference implementation
@@ -392,14 +406,15 @@ library
         Data.Array.Accelerate.Analysis.Hash
         Data.Array.Accelerate.Analysis.Match
         Data.Array.Accelerate.Array.Data
-        Data.Array.Accelerate.Array.Remote
-        Data.Array.Accelerate.Array.Remote.Class
-        Data.Array.Accelerate.Array.Remote.LRU
-        Data.Array.Accelerate.Array.Remote.Table
+        -- Data.Array.Accelerate.Array.Remote
+        -- Data.Array.Accelerate.Array.Remote.Class
+        -- Data.Array.Accelerate.Array.Remote.LRU
+        -- Data.Array.Accelerate.Array.Remote.Table
         Data.Array.Accelerate.Array.Unique
         Data.Array.Accelerate.Async
-        Data.Array.Accelerate.Error
         Data.Array.Accelerate.Debug.Internal
+        Data.Array.Accelerate.Error
+        Data.Array.Accelerate.Interpreter.Arithmetic
         Data.Array.Accelerate.Lifetime
         Data.Array.Accelerate.Pretty
         Data.Array.Accelerate.Representation.Array
@@ -433,9 +448,11 @@ library
         Data.Array.Accelerate.Test.Similar
 
         -- Other
+        Crypto.Hash.XKCP
         Data.BitSet
+        Data.Primitive.Bit
         Data.Primitive.Vec
-        Crypto.Hash.XKCP
+        Data.Numeric.Float128
 
   other-modules:
         Data.Array.Accelerate.Analysis.Hash.TH
@@ -445,6 +462,7 @@ library
         Data.Array.Accelerate.Classes.Eq
         Data.Array.Accelerate.Classes.Floating
         Data.Array.Accelerate.Classes.Fractional
+        Data.Array.Accelerate.Classes.FromBool
         Data.Array.Accelerate.Classes.FromIntegral
         Data.Array.Accelerate.Classes.Integral
         Data.Array.Accelerate.Classes.Num
@@ -454,6 +472,9 @@ library
         Data.Array.Accelerate.Classes.RealFloat
         Data.Array.Accelerate.Classes.RealFrac
         Data.Array.Accelerate.Classes.ToFloating
+        Data.Array.Accelerate.Classes.VEq
+        Data.Array.Accelerate.Classes.VNum
+        Data.Array.Accelerate.Classes.VOrd
         Data.Array.Accelerate.Debug.Internal.Clock
         Data.Array.Accelerate.Debug.Internal.Flags
         Data.Array.Accelerate.Debug.Internal.Graph
@@ -470,7 +491,10 @@ library
         Data.Array.Accelerate.Pattern.Either
         Data.Array.Accelerate.Pattern.Maybe
         Data.Array.Accelerate.Pattern.Ordering
+        Data.Array.Accelerate.Pattern.SIMD
+        Data.Array.Accelerate.Pattern.Shape
         Data.Array.Accelerate.Pattern.TH
+        Data.Array.Accelerate.Pattern.Tuple
         Data.Array.Accelerate.Prelude
         Data.Array.Accelerate.Pretty.Graphviz
         Data.Array.Accelerate.Pretty.Graphviz.Monad
@@ -489,6 +513,7 @@ library
         Data.Array.Accelerate.Test.NoFib.Config
 
         Language.Haskell.TH.Extra
+        GHC.TypeLits.Extra
 
   if flag(nofib)
     build-depends:
@@ -562,6 +587,7 @@ library
   cc-options:
         -O3
         -Wall
+        -std=c11
 
   cxx-options:
         -O3
@@ -590,6 +616,16 @@ library
         -caf-all
         -auto-all
 
+  if flag(float128)
+    cc-options:
+        -DFLOAT128_ENABLE
+
+    cpp-options:
+        -DFLOAT128_ENABLE
+
+    extra-libraries:
+        quadmath
+
   if flag(debug)
     cc-options:
         -DACCELERATE_DEBUG

diff --git a/cbits/float128.c b/cbits/float128.c
@@ -0,0 +1,119 @@
+
+#include <quadmath.h>
+#include <stdio.h>
+
+typedef _Float128 f128;
+
+union ieee754_quad {
+  f128 as_float128;
+  struct {
+#if WORDS_BIGENDIAN
+    uint64_t negative:1;
+    uint64_t exponent:15;
+    uint64_t mantissa0:48;
+    uint64_t mantissa1;
+#else
+    uint64_t mantissa1;
+    uint64_t mantissa0:48;
+    uint64_t exponent:15;
+    uint64_t negative:1;
+#endif
+  } as_uint128;
+};
+
+/* Operations from Read and Show
+ */
+void _readq(f128* r, const char* str) { *r = strtoflt128(str, NULL); }
+void _showq(char* buf, size_t n, f128 *a) { quadmath_snprintf(buf, n, "%Qf", *a); }
+
+/* Operations from Num
+ */
+void _addq(f128* r, const f128* a, const f128* b) { *r = *a + *b; }
+void _subq(f128* r, const f128* a, const f128* b) { *r = *a - *b; }
+void _mulq(f128* r, const f128* a, const f128* b) { *r = *a * *b; }
+void _negateq(f128* r, const f128* a) { *r = - *a; }
+void _absq(f128* r, const f128* a) { *r = fabsq(*a); }
+void _signumq(f128* r, const f128* a) { *r = (*a > 0.0q) - (*a < 0.0q); }
+
+/* Operations from Fractional
+ */
+void _divq(f128* r, const f128* a, const f128* b) { *r = *a / *b; }
+void _recipq(f128* r, const f128* a) { *r = 1.0q / *a; }
+
+/* Operations from Floating
+ */
+void _piq(f128* r) { *r = M_PIq; }
+void _expq(f128* r, const f128* a) { *r = expq(*a); }
+void _logq(f128* r, const f128* a) { *r = logq(*a); }
+void _sqrtq(f128* r, const f128* a) { *r = sqrtq(*a); }
+void _powq(f128* r, const f128* a, const f128* b) { *r = powq(*a, *b); }
+void _sinq(f128* r, const f128* a) { *r = sinq(*a); }
+void _cosq(f128* r, const f128* a) { *r = cosq(*a); }
+void _tanq(f128* r, const f128* a) { *r = tanq(*a); }
+void _asinq(f128* r, const f128* a) { *r = asinq(*a); }
+void _acosq(f128* r, const f128* a) { *r = acosq(*a); }
+void _atanq(f128* r, const f128* a) { *r = atanq(*a); }
+void _sinhq(f128* r, const f128* a) { *r = sinhq(*a); }
+void _coshq(f128* r, const f128* a) { *r = coshq(*a); }
+void _tanhq(f128* r, const f128* a) { *r = tanhq(*a); }
+void _asinhq(f128* r, const f128* a) { *r = asinhq(*a); }
+void _acoshq(f128* r, const f128* a) { *r = acoshq(*a); }
+void _atanhq(f128* r, const f128* a) { *r = atanhq(*a); }
+void _log1pq(f128* r, const f128* a) { *r = log1pq(*a); }
+void _expm1q(f128* r, const f128* a) { *r = expm1q(*a); }
+
+/* Operations from RealFrac
+ */
+void _roundq(f128* r, const f128* a) { *r = roundq(*a); }
+void _truncq(f128* r, const f128* a) { *r = truncq(*a); }
+void _floorq(f128* r, const f128* a) { *r = floorq(*a); }
+void _ceilq(f128* r, const f128* a) { *r = ceilq(*a); }
+
+/* Operations from RealFloat
+ */
+uint32_t _isnanq(const f128* a) { return isnanq(*a); }
+uint32_t _isinfq(const f128* a) { return isinfq(*a); }
+void _frexpq(f128* r, const f128* a, int32_t* b) { *r = frexpq(*a, b); }
+void _ldexpq(f128* r, const f128* a, int32_t b) { *r = ldexpq(*a, b); }
+void _atan2q(f128* r, const f128* a, const f128* b) { *r = atan2q(*a, *b); }
+
+/* A (single/double/quad) precision floating point number is denormalized iff:
+ *   - exponent is zero
+ *   - mantissa is non-zero
+ *   - (don't care about the sign bit)
+ */
+uint32_t _isdenormq(const f128* a)
+{
+  union ieee754_quad u;
+  u.as_float128 = *a;
+
+  return (u.as_uint128.exponent == 0
+      && (u.as_uint128.mantissa0 != 0 || u.as_uint128.mantissa1 != 0));
+}
+
+/* A (single/double/quad) precision floating point number is negative zero iff:
+ *   - sign bit is set
+ *   - all other bits are zero
+ */
+uint32_t _isnegzeroq(const f128* a)
+{
+  union ieee754_quad u;
+  u.as_float128 = *a;
+
+  return (
+      u.as_uint128.negative &&
+      u.as_uint128.exponent  == 0 &&
+      u.as_uint128.mantissa0 == 0 &&
+      u.as_uint128.mantissa1 == 0
+  );
+}
+
+/* Operations from Ord
+ */
+uint32_t _ltq(const f128* a, const f128* b) { return *a < *b; }
+uint32_t _leq(const f128* a, const f128* b) { return *a <= *b; }
+uint32_t _gtq(const f128* a, const f128* b) { return *a > *b; }
+uint32_t _geq(const f128* a, const f128* b) { return *a <= *b; }
+void _fminq(f128* r, const f128* a, const f128* b) { *r = fminq(*a, *b); }
+void _fmaxq(f128* r, const f128* a, const f128* b) { *r = fmaxq(*a, *b); }
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,3 +16,5 @@ @@
     /docs/_build
     *.hi
     *.o
+    hie.yaml