Skip to content

Commit

Permalink
Merge pull request #24 from Serapieum-of-alex/factory-design-pattern
Browse files Browse the repository at this point in the history
Factory design pattern
  • Loading branch information
MAfarrag authored Dec 11, 2023
2 parents 1f6d7d2 + 83358a8 commit e1f3c07
Show file tree
Hide file tree
Showing 25 changed files with 2,091 additions and 1,618 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
ignore = E501, W503
ignore = E203, E266, E501, W503, E741
max-line-length = 88
max-complexity = 18
select = B,C,E,F,W,T4
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,4 @@ build_artifacts
mo_*
conda/
*.zip
.run/
10 changes: 9 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,18 @@ History
* modify the pdf, cdf, and probability plot plots
* create separate plot and confidence_interval modules.

0.4.0 (2023-011-23)
0.4.0 (2023-11-23)
------------------

* add Pearson 3 distribution
* Use setup.py instead of pyproject.toml.
* Correct pearson correlation coefficient and add documentation .
* replace the pdf and cdf by the methods from scipy package.

0.5.0 (2023-12-11)
------------------

* Unify the all the methods for the distributions.
* Use factory design pattern to create the distributions.
* add tests for the eva module.
* use snake_case for the methods and variables.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pip install git+https://github.com/MAfarrag/statista
## pip
to install the last release you can easly use pip
```
pip install statista==0.4.0
pip install statista==0.5.0
```

Quick start
Expand Down
143 changes: 64 additions & 79 deletions examples/Extreme value statistics.py
Original file line number Diff line number Diff line change
@@ -1,126 +1,111 @@
"""Created on Wed Sep 9 23:31:11 2020.
@author: mofarrag
"""
"""Extreme value statistics"""
import matplotlib

matplotlib.use("TkAgg")
import pandas as pd
from statista.distributions import GEV, ConfidenceInterval, Gumbel, PlottingPosition

from statista.distributions import GEV, Gumbel, PlottingPosition, Distributions
from statista.confidence_interval import ConfidenceInterval

time_series1 = pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist()
time_series2 = pd.read_csv("examples/data/time_series2.txt", header=None)[0].tolist()
#%%
Gdist = Gumbel(time_series1)
# %%
gumbel_dist = Distributions("Gumbel", time_series1)
# defult parameter estimation method is maximum liklihood method
Param_mle = Gdist.estimateParameter(method="mle")
Gdist.ks()
Gdist.chisquare()
print(Param_mle)
loc = Param_mle[0]
scale = Param_mle[1]
param_mle = gumbel_dist.fit_model(method="mle")
gumbel_dist.ks()
gumbel_dist.chisquare()
print(param_mle)
# calculate and plot the pdf
pdf = Gdist.pdf(loc, scale, plot_figure=True)
cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
#%% lmoments
Param_lmoments = Gdist.estimateParameter(method="lmoments")
Gdist.ks()
Gdist.chisquare()
print(Param_lmoments)
loc = Param_lmoments[0]
scale = Param_lmoments[1]
pdf = gumbel_dist.pdf(param_mle, plot_figure=True)
cdf, _, _ = gumbel_dist.cdf(param_mle, plot_figure=True)
# %% lmoments
param_lmoments = gumbel_dist.fit_model(method="lmoments")
gumbel_dist.ks()
gumbel_dist.chisquare()
print(param_lmoments)
# calculate and plot the pdf
pdf = Gdist.pdf(loc, scale, plot_figure=True)
cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
#%%
pdf = gumbel_dist.pdf(param_lmoments, plot_figure=True)
cdf, _, _ = gumbel_dist.cdf(param_lmoments, plot_figure=True)
# %%
# calculate the CDF(Non Exceedance probability) using weibul plotting position
time_series1.sort()
# calculate the F (Non Exceedence probability based on weibul)
cdf_Weibul = PlottingPosition.weibul(time_series1)
cdf_weibul = PlottingPosition.weibul(time_series1)
# TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
Qth = Gdist.theporeticalEstimate(loc, scale, cdf_Weibul)
Qth = gumbel_dist.theoretical_estimate(param_lmoments, cdf_weibul)
# test = stats.chisquare(st.Standardize(Qth), st.Standardize(time_series1),ddof=5)
# calculate the confidence interval
upper, lower = Gdist.confidenceInterval(loc, scale, cdf_Weibul, alpha=0.1)
upper, lower = gumbel_dist.confidence_interval(param_lmoments, cdf_weibul, alpha=0.1)
# ProbapilityPlot can estimate the Qth and the lower and upper confidence interval in the process of plotting
fig, ax = Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
#%%
fig, ax = gumbel_dist.probability_plot(param_lmoments, cdf_weibul, alpha=0.1)
# %%
"""
if you want to focus only on high values, you can use a threshold to make the code focus on what is higher
this threshold.
"""
threshold = 17
Param_dist = Gdist.estimateParameter(
method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
param_dist = gumbel_dist.fit_model(
method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
)
print(Param_dist)
loc = Param_dist[0]
scale = Param_dist[1]
Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
#%%
print(param_dist)
gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
# %%
threshold = 18
Param_dist = Gdist.estimateParameter(
method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
param_dist = gumbel_dist.fit_model(
method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
)
print(Param_dist)
loc = Param_dist[0]
scale = Param_dist[1]
Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
#%% Generalized Extreme Value (GEV)
Gevdist = GEV(time_series2)
print(param_dist)
gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
# %% Generalized Extreme Value (GEV)
gev_dist = Distributions("GEV", time_series2)
# default parameter estimation method is maximum liklihood method
mle_param = Gevdist.estimateParameter(method="mle")
Gevdist.ks()
Gevdist.chisquare()
gev_mle_param = gev_dist.fit_model(method="mle")
gev_dist.ks()
gev_dist.chisquare()

print(mle_param)
shape = mle_param[0]
loc = mle_param[1]
scale = mle_param[2]
print(gev_mle_param)
# calculate and plot the pdf
pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
#%% lmoment method
lmom_param = Gevdist.estimateParameter(method="lmoments")
print(lmom_param)
shape = lmom_param[0]
loc = lmom_param[1]
scale = lmom_param[2]
pdf, fig, ax = gev_dist.pdf(gev_mle_param, plot_figure=True)
cdf, _, _ = gev_dist.cdf(gev_mle_param, plot_figure=True)
# %% lmoment method
gev_lmom_param = gev_dist.fit_model(method="lmoments")
print(gev_lmom_param)
# calculate and plot the pdf
pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
pdf, fig, ax = gev_dist.pdf(gev_lmom_param, plot_figure=True)
cdf, _, _ = gev_dist.cdf(gev_lmom_param, plot_figure=True)
#%%
time_series1.sort()
# calculate the F (Non Exceedence probability based on weibul)
cdf_Weibul = PlottingPosition.weibul(time_series1)
T = PlottingPosition.weibul(time_series1, option=2)
cdf_weibul = PlottingPosition.weibul(time_series1)
T = PlottingPosition.weibul(time_series1, return_period=True)
# TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
Qth = Gevdist.theporeticalEstimate(shape, loc, scale, cdf_Weibul)
Qth = gev_dist.theoretical_estimate(gev_lmom_param, cdf_weibul)

func = GEV.ci_func
upper, lower = Gevdist.confidenceInterval(
shape,
loc,
scale,
F=cdf_Weibul,
upper, lower = gev_dist.confidence_interval(
gev_lmom_param,
prob_non_exceed=cdf_weibul,
alpha=0.1,
statfunction=func,
n_samples=len(time_series1),
method="lmoments",
)
#%%
# %%
"""
calculate the confidence interval using the boot strap method directly
"""
CI = ConfidenceInterval.BootStrap(
CI = ConfidenceInterval.boot_strap(
time_series1,
statfunction=func,
gevfit=Param_dist,
gevfit=gev_lmom_param,
n_samples=len(time_series1),
F=cdf_Weibul,
F=cdf_weibul,
method="lmoments",
)
LB = CI["LB"]
UB = CI["UB"]
#%%
fig, ax = Gevdist.probapilityPlot(
shape, loc, scale, cdf_Weibul, func=func, n_samples=len(time_series1)
LB = CI["lb"]
UB = CI["ub"]
# %%
fig, ax = gev_dist.probability_plot(
gev_lmom_param, cdf_weibul, func=func, n_samples=len(time_series1)
)
23 changes: 9 additions & 14 deletions examples/SensitivityAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
"""Created on Sun Jun 21 01:55:25 2020.
@author: mofarrag
"""
# import os
Path = "F:/01Algorithms/Hydrology/HAPI/examples"
import matplotlib
Expand All @@ -19,15 +15,14 @@

Parameterpath = Path + "/data/Lumped/Coello_Lumped2021-03-08_muskingum.txt"
Path = Path + "/data/Lumped/"
#%%
### meteorological data
# %% meteorological data
start = "2009-01-01"
end = "2011-12-31"
name = "Coello"
Coello = Catchment(name, start, end)
Coello.ReadLumpedInputs(Path + "meteo_data-MSWEP.csv")

### Basic_inputs
# %% Basic_inputs
# catchment area
CatArea = 1530
# temporal resolution
Expand Down Expand Up @@ -67,11 +62,11 @@

Qobs = Coello.QGauges[Coello.QGauges.columns[0]]

Metrics["RMSE"] = PC.RMSE(Qobs, Coello.Qsim["q"])
Metrics["NSE"] = PC.NSE(Qobs, Coello.Qsim["q"])
Metrics["NSEhf"] = PC.NSEHF(Qobs, Coello.Qsim["q"])
Metrics["KGE"] = PC.KGE(Qobs, Coello.Qsim["q"])
Metrics["WB"] = PC.WB(Qobs, Coello.Qsim["q"])
Metrics["RMSE"] = PC.rmse(Qobs, Coello.Qsim["q"])
Metrics["NSE"] = PC.nse(Qobs, Coello.Qsim["q"])
Metrics["NSEhf"] = PC.nse_hf(Qobs, Coello.Qsim["q"])
Metrics["KGE"] = PC.kge(Qobs, Coello.Qsim["q"])
Metrics["WB"] = PC.wb(Qobs, Coello.Qsim["q"])

print("RMSE= " + str(round(Metrics["RMSE"], 2)))
print("NSE= " + str(round(Metrics["NSE"], 2)))
Expand Down Expand Up @@ -120,7 +115,7 @@ def WrapperType1(Randpar, Route, RoutingFn, Qobs):
Coello.Parameters = Randpar

Run.RunLumped(Coello, Route, RoutingFn)
rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
rmse = PC.rmse(Qobs, Coello.Qsim["q"])
return rmse


Expand All @@ -129,7 +124,7 @@ def WrapperType2(Randpar, Route, RoutingFn, Qobs):
Coello.Parameters = Randpar

Run.RunLumped(Coello, Route, RoutingFn)
rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
rmse = PC.rmse(Qobs, Coello.Qsim["q"])
return rmse, Coello.Qsim["q"]


Expand Down
61 changes: 61 additions & 0 deletions examples/data/pdf_obs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"ams" "pdf"
7550.433 0.000154642269711567
8651.166 0.000117765584990689
9380.397 9.33406205964961e-05
9071.717 0.000103377700612861
8354.173 0.000128159692079377
6161.421 0.000170527780685648
3544.866 2.77636664143863e-05
7120.425 0.000165513917257198
9298.242 9.59581061243758e-05
4710.178 0.000103421285516972
5380.688 0.000145317667468702
8499.548 0.000123066996435036
5617.157 0.000156011624908839
3742.097 3.7808086513221e-05
10274.362 6.78703210421545e-05
6186.083 0.000170853335852759
4552.435 9.20798127465615e-05
8006.946 0.000140143254028843
6790.353 0.000170872728246691
5455.554 0.000148978662405294
9361.352 9.39436873016158e-05
5709.204 0.00015946742554235
16825.49888 5.00789723424988e-06
5363.839 0.000144459876612251
12514.28 2.82532814190863e-05
7225.553 0.000163202752532002
12890.64 2.42739862374255e-05
8045.471 0.000138838408679289
18320.555 2.79830832979157e-06
7339.839 0.000160415133366504
7500.92 0.00015606542100079
11532.695 4.18156642982465e-05
6639.799 0.000172186064368232
6341.556 0.000172279119179583
6533.038 0.000172622478650322
5113.036 0.000130326486583241
3854.754 4.4283345872923e-05
5477.022 0.000149982281171861
7310.343 0.000161160143281715
4670.991 0.000100626037052284
10346.79 6.60642242605733e-05
9039.356 0.000104459733167728
6480.619 0.000172676431635955
8425.722 0.000125654190498816
10647.809 5.8974972602665e-05
7565.567 0.000154199994168006
5456.57 0.000149026629329571
8713.613 0.000115593076636878
9305.365 9.57295320374143e-05
5948.87 0.000166548208934179
7638.778 0.000152015488523764
8576.919 0.000120358190580951
4959.301 0.000120553356314615
12426.522 2.9268918540962e-05
6870.319 0.000169868029908526
7838.047 0.000145744442504979
6414.488 0.000172587367846412
13121.738 2.21092580728735e-05
9801.248 8.06319421593433e-05
8745.997 0.00011447011440418
Loading

0 comments on commit e1f3c07

Please sign in to comment.