-
Notifications
You must be signed in to change notification settings - Fork 1
/
distributions.py
194 lines (164 loc) · 7.1 KB
/
distributions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import numpy as np
from scipy.stats import skewnorm
from abc import ABC, abstractmethod
"""
Author: Samuel Lehmann
Network with him at: https://www.linkedin.com/in/samuellehmann/
"""
# The maximum number of iterations to be run for cutoff distributions
_MAX_ITERATIONS = 100
# The default number of samples if no other value is specified
DEFAULT_SAMPLES = 50000
class Distribution(ABC):
"""
An abstract class that parents all distributions
"""
def __init__(self, name: str, num_samples: int = DEFAULT_SAMPLES, mid_length=None, lower_lim=None, upper_lim=None):
"""
An abstract class that parents all distributions
:param num_samples: The number of samples. Defaults to DEFAULT SAMPLES
:param mid_length: The mid_point value for all distributions
:param lower_lim: A cutoff at a lower limit, no cutoff applied if no value passed
:param upper_lim: A cutoff at an upper limit, no cutoff applied if no value passed
:param name: A string representation of the distribution
"""
self.num_samples = num_samples
self.lower_lim = lower_lim
self.upper_lim = upper_lim
self.name = name
self.nominal_value = mid_length
self.mean = None
self.std = None
self.skew = None
def mid_length(self):
"""
:return: The distributions medium value
"""
return self.nominal_value
@abstractmethod
def calculate(self):
"""
Returns a random sampling from the distribution in the form of a numpy array.
:return: A randomly ordered numpy array of values
"""
pass
def abs_max(self):
"""
The absolute maximum value possible in the distrbibution. May be none if not defined.
Eg. a normal distribution without a cutoff.
:return:
"""
return self.upper_lim
def abs_min(self):
"""
The absolute minimum value possible in the distrbibution. May be none if not defined.
Eg. a normal distribution without a cutoff.
:return:
"""
return self.lower_lim
class Normal(Distribution):
"""
A class for a normal distribution
"""
def __init__(self, mean: float, std: float, num_samples: int = DEFAULT_SAMPLES, lower_lim=None, upper_lim=None):
"""
:param mean: The mean value for the distribution
:param std: The standard deviation for the distribution
:param num_samples: Optional - the number of samples within the common lengths
:param lower_lim: A cutoff at a lower limit, no cutoff applied if no value passed
:param upper_lim: A cutoff at an upper limit, no cutoff applied if no value passed
"""
super().__init__("Normal", num_samples, mean, lower_lim, upper_lim)
self.mean = mean
self.std = std
self.lower_lim = lower_lim
self.upper_lim = upper_lim
def calculate(self):
"""
Returns a random sampling from the distribution in the form of a numpy array.
:return: A randomly ordered numpy array of values
"""
values = np.random.normal(self.mean, self.std, self.num_samples)
if self.lower_lim or self.upper_lim:
# remove samples not in range
if self.lower_lim:
values = values[values >= self.lower_lim]
if self.upper_lim:
values = values[values <= self.upper_lim]
count = 0
while len(values) < self.num_samples:
values = np.append(values, np.random.normal(self.mean, self.std, self.num_samples))
if self.lower_lim:
values = values[values >= self.lower_lim]
if self.upper_lim:
values = values[values <= self.upper_lim]
count += 1
if count > _MAX_ITERATIONS:
raise ValueError('Number of iterations exceeds the maximum set for cutoff distributions.')
values = values[:self.num_samples]
return values
class Uniform(Distribution):
"""
A class for a uniform distribution
"""
def __init__(self, nominal: float, tolerance: float, num_samples: int = DEFAULT_SAMPLES):
"""
:param nominal: The nominal value
:param tolerance: The bi-directional tolerance of common lengths
:param num_samples: The number of samples within the common lengths
"""
super().__init__("Uniform", num_samples, nominal, nominal - tolerance, nominal + tolerance)
self.nominal = nominal
self.tolerance = tolerance
def calculate(self):
"""
Returns a random sampling from the distribution in the form of a numpy array.
:return: A randomly ordered numpy array of values
"""
return np.random.uniform(self.nominal - self.tolerance, self.nominal + self.tolerance, self.num_samples)
class SkewedNormal(Distribution):
"""
A class for a skewed normal distribution
"""
def __init__(self, skew: float, mean: float, std: float, num_samples: int = DEFAULT_SAMPLES, lower_lim=None,
upper_lim=None):
"""
:param skew: 0 gives the normal distribution. A negative value will create a left skew whilst a positive
value will create a right skew.
:param mean: The mean value of the unskewed normal distribution
:param std: The standard deviation of the unskewed normal distribution
:param num_samples: The number of samples within the common lengths
:param lower_lim: A cutoff at a lower limit, no cutoff applied if no value passed
:param upper_lim: A cutoff at an upper limit, no cutoff applied if no value passed
"""
super().__init__("Skewed Normal", num_samples, mean, lower_lim, upper_lim)
self.skew = skew
self.mean = mean
self.std = std
self.lower_lim = lower_lim
self.upper_lim = upper_lim
def calculate(self):
"""
Returns a random sampling from the distribution in the form of a numpy array.
:return: A randomly ordered numpy array of values
"""
values = skewnorm.rvs(self.skew, self.mean, self.std, self.num_samples).astype(np.float64)
if self.lower_lim or self.upper_lim:
# remove samples not in range
if self.lower_lim:
values = values[values >= self.lower_lim]
if self.upper_lim:
values = values[values <= self.upper_lim]
count = 0
while len(values) < self.num_samples:
values = np.append(values,
skewnorm.rvs(self.skew, self.mean, self.std, self.num_samples).astype(np.float64))
if self.lower_lim:
values = values[values >= self.lower_lim]
if self.upper_lim:
values = values[values <= self.upper_lim]
count += 1
if count > _MAX_ITERATIONS:
raise ValueError('Number of iterations exceeds the maximum set for cutoff distributions.')
values = values[:self.num_samples]
return values