-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathfpe.h
62 lines (54 loc) · 1.84 KB
/
fpe.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#ifndef FPE_H
#define FPE_H
#include "types.h"
// Approximate floating-point arithmetic for normalized inputs and outputs.
// NOTE: This code has been designed for speed and simplicity, and does not
// fully implement IEEE floating-point arithmetic. In particular, it does
// not properly handle:
//
// (1) Denormalized numbers, infinities, or NaNs.
// (2) Overflow or underflow outside the range of normalized numbers.
// (3) Proper IEEE rounding.
//
// For normalized input and output, the error of a single operation should
// be no more than one ulp.
template <typename T>
struct FPEtraits;
template <>
struct FPEtraits<float> {
typedef uint32 U;
static const uint mbits = 23;
static const uint ebits = 8;
};
template <>
struct FPEtraits<double> {
typedef uint64 U;
static const uint mbits = 52;
static const uint ebits = 11;
};
template <
typename T, // floating type to implement
typename U = typename FPEtraits<T>::U, // corresponding integer type
uint mbits = FPEtraits<T>::mbits, // number of bits in significand
uint ebits = FPEtraits<T>::ebits // number of bits in exponent
>
class FPE {
public:
FPE(U u = 0) : u(u) {}
FPE(T t) : u((U&)t) {}
FPE(int i) : u(i) {}
operator T() const { return (T&)u; }
bool equalsign(FPE x) const { return !((u ^ x.u) & s); }
FPE operator -() const { return FPE(u ^ s); }
FPE operator +(FPE x) const;
FPE operator -(FPE x) const;
private:
FPE add(U g) const; // effective addition
FPE sub(U g) const; // effective subtraction
static const uint m = mbits; // number of bits in significand
static const U e = U(1) << mbits; // exponent least significant bit mask
static const U s = e << ebits; // sign bit mask
U u; // binary representation of float
};
#include "fpe.inl"
#endif