Merge pull request #24 from Serapieum-of-alex/factory-design-pattern

Factory design pattern
Serapieum-of-alex · Dec 11, 2023 · e1f3c07 · e1f3c07
2 parents 1f6d7d2 + 83358a8
commit e1f3c07
Show file tree

Hide file tree

Showing 25 changed files with 2,091 additions and 1,618 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-ignore = E501, W503
+ignore = E203, E266, E501, W503, E741
 max-line-length = 88
 max-complexity = 18
 select = B,C,E,F,W,T4
diff --git a/.gitignore b/.gitignore
@@ -146,3 +146,4 @@ build_artifacts
 mo_*
 conda/
 *.zip
+.run/
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -35,10 +35,18 @@ History
 * modify the pdf, cdf, and probability plot plots
 * create separate plot and confidence_interval modules.
 
-0.4.0 (2023-011-23)
+0.4.0 (2023-11-23)
 ------------------
 
 * add Pearson 3 distribution
 * Use setup.py instead of pyproject.toml.
 * Correct pearson correlation coefficient and add documentation .
 * replace the pdf and cdf by the methods from scipy package.
+
+0.5.0 (2023-12-11)
+------------------
+
+* Unify the all the methods for the distributions.
+* Use factory design pattern to create the distributions.
+* add tests for the eva module.
+* use snake_case for the methods and variables.
diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ pip install git+https://github.com/MAfarrag/statista
 ## pip
 to install the last release you can easly use pip
 ```
-pip install statista==0.4.0
+pip install statista==0.5.0
 ```
 
 Quick start

diff --git a/examples/Extreme value statistics.py b/examples/Extreme value statistics.py
@@ -1,126 +1,111 @@
-"""Created on Wed Sep  9 23:31:11 2020.
-
-@author: mofarrag
-"""
+"""Extreme value statistics"""
 import matplotlib
 
 matplotlib.use("TkAgg")
 import pandas as pd
-from statista.distributions import GEV, ConfidenceInterval, Gumbel, PlottingPosition
+
+from statista.distributions import GEV, Gumbel, PlottingPosition, Distributions
+from statista.confidence_interval import ConfidenceInterval
 
 time_series1 = pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist()
 time_series2 = pd.read_csv("examples/data/time_series2.txt", header=None)[0].tolist()
-#%%
-Gdist = Gumbel(time_series1)
+# %%
+gumbel_dist = Distributions("Gumbel", time_series1)
 # defult parameter estimation method is maximum liklihood method
-Param_mle = Gdist.estimateParameter(method="mle")
-Gdist.ks()
-Gdist.chisquare()
-print(Param_mle)
-loc = Param_mle[0]
-scale = Param_mle[1]
+param_mle = gumbel_dist.fit_model(method="mle")
+gumbel_dist.ks()
+gumbel_dist.chisquare()
+print(param_mle)
 # calculate and plot the pdf
-pdf = Gdist.pdf(loc, scale, plot_figure=True)
-cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
-#%% lmoments
-Param_lmoments = Gdist.estimateParameter(method="lmoments")
-Gdist.ks()
-Gdist.chisquare()
-print(Param_lmoments)
-loc = Param_lmoments[0]
-scale = Param_lmoments[1]
+pdf = gumbel_dist.pdf(param_mle, plot_figure=True)
+cdf, _, _ = gumbel_dist.cdf(param_mle, plot_figure=True)
+# %% lmoments
+param_lmoments = gumbel_dist.fit_model(method="lmoments")
+gumbel_dist.ks()
+gumbel_dist.chisquare()
+print(param_lmoments)
 # calculate and plot the pdf
-pdf = Gdist.pdf(loc, scale, plot_figure=True)
-cdf, _, _ = Gdist.cdf(loc, scale, plot_figure=True)
-#%%
+pdf = gumbel_dist.pdf(param_lmoments, plot_figure=True)
+cdf, _, _ = gumbel_dist.cdf(param_lmoments, plot_figure=True)
+# %%
 # calculate the CDF(Non Exceedance probability) using weibul plotting position
 time_series1.sort()
 # calculate the F (Non Exceedence probability based on weibul)
-cdf_Weibul = PlottingPosition.weibul(time_series1)
+cdf_weibul = PlottingPosition.weibul(time_series1)
 # TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
-Qth = Gdist.theporeticalEstimate(loc, scale, cdf_Weibul)
+Qth = gumbel_dist.theoretical_estimate(param_lmoments, cdf_weibul)
 # test = stats.chisquare(st.Standardize(Qth), st.Standardize(time_series1),ddof=5)
 # calculate the confidence interval
-upper, lower = Gdist.confidenceInterval(loc, scale, cdf_Weibul, alpha=0.1)
+upper, lower = gumbel_dist.confidence_interval(param_lmoments, cdf_weibul, alpha=0.1)
 # ProbapilityPlot can estimate the Qth and the lower and upper confidence interval in the process of plotting
-fig, ax = Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
-#%%
+fig, ax = gumbel_dist.probability_plot(param_lmoments, cdf_weibul, alpha=0.1)
+# %%
 """
 if you want to focus only on high values, you can use a threshold to make the code focus on what is higher
 this threshold.
 """
 threshold = 17
-Param_dist = Gdist.estimateParameter(
-    method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
+param_dist = gumbel_dist.fit_model(
+    method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
 )
-print(Param_dist)
-loc = Param_dist[0]
-scale = Param_dist[1]
-Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
-#%%
+print(param_dist)
+gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
+# %%
 threshold = 18
-Param_dist = Gdist.estimateParameter(
-    method="optimization", ObjFunc=Gumbel.ObjectiveFn, threshold=threshold
+param_dist = gumbel_dist.fit_model(
+    method="optimization", obj_func=Gumbel.objective_fn, threshold=threshold
 )
-print(Param_dist)
-loc = Param_dist[0]
-scale = Param_dist[1]
-Gdist.probapilityPlot(loc, scale, cdf_Weibul, alpha=0.1)
-#%% Generalized Extreme Value (GEV)
-Gevdist = GEV(time_series2)
+print(param_dist)
+gumbel_dist.probability_plot(param_dist, cdf_weibul, alpha=0.1)
+# %% Generalized Extreme Value (GEV)
+gev_dist = Distributions("GEV", time_series2)
 # default parameter estimation method is maximum liklihood method
-mle_param = Gevdist.estimateParameter(method="mle")
-Gevdist.ks()
-Gevdist.chisquare()
+gev_mle_param = gev_dist.fit_model(method="mle")
+gev_dist.ks()
+gev_dist.chisquare()
 
-print(mle_param)
-shape = mle_param[0]
-loc = mle_param[1]
-scale = mle_param[2]
+print(gev_mle_param)
 # calculate and plot the pdf
-pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
-cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
-#%% lmoment method
-lmom_param = Gevdist.estimateParameter(method="lmoments")
-print(lmom_param)
-shape = lmom_param[0]
-loc = lmom_param[1]
-scale = lmom_param[2]
+pdf, fig, ax = gev_dist.pdf(gev_mle_param, plot_figure=True)
+cdf, _, _ = gev_dist.cdf(gev_mle_param, plot_figure=True)
+# %% lmoment method
+gev_lmom_param = gev_dist.fit_model(method="lmoments")
+print(gev_lmom_param)
 # calculate and plot the pdf
-pdf, fig, ax = Gevdist.pdf(shape, loc, scale, plot_figure=True)
-cdf, _, _ = Gevdist.cdf(shape, loc, scale, plot_figure=True)
+pdf, fig, ax = gev_dist.pdf(gev_lmom_param, plot_figure=True)
+cdf, _, _ = gev_dist.cdf(gev_lmom_param, plot_figure=True)
 #%%
 time_series1.sort()
 # calculate the F (Non Exceedence probability based on weibul)
-cdf_Weibul = PlottingPosition.weibul(time_series1)
-T = PlottingPosition.weibul(time_series1, option=2)
+cdf_weibul = PlottingPosition.weibul(time_series1)
+T = PlottingPosition.weibul(time_series1, return_period=True)
 # TheporeticalEstimate method calculates the theoretical values based on the Gumbel distribution
-Qth = Gevdist.theporeticalEstimate(shape, loc, scale, cdf_Weibul)
+Qth = gev_dist.theoretical_estimate(gev_lmom_param, cdf_weibul)
 
 func = GEV.ci_func
-upper, lower = Gevdist.confidenceInterval(
-    shape,
-    loc,
-    scale,
-    F=cdf_Weibul,
+upper, lower = gev_dist.confidence_interval(
+    gev_lmom_param,
+    prob_non_exceed=cdf_weibul,
     alpha=0.1,
     statfunction=func,
     n_samples=len(time_series1),
+    method="lmoments",
 )
-#%%
+# %%
 """
 calculate the confidence interval using the boot strap method directly
 """
-CI = ConfidenceInterval.BootStrap(
+CI = ConfidenceInterval.boot_strap(
     time_series1,
     statfunction=func,
-    gevfit=Param_dist,
+    gevfit=gev_lmom_param,
     n_samples=len(time_series1),
-    F=cdf_Weibul,
+    F=cdf_weibul,
+    method="lmoments",
 )
-LB = CI["LB"]
-UB = CI["UB"]
-#%%
-fig, ax = Gevdist.probapilityPlot(
-    shape, loc, scale, cdf_Weibul, func=func, n_samples=len(time_series1)
+LB = CI["lb"]
+UB = CI["ub"]
+# %%
+fig, ax = gev_dist.probability_plot(
+    gev_lmom_param, cdf_weibul, func=func, n_samples=len(time_series1)
 )
diff --git a/examples/SensitivityAnalysis.py b/examples/SensitivityAnalysis.py
@@ -1,7 +1,3 @@
-"""Created on Sun Jun 21 01:55:25 2020.
-
-@author: mofarrag
-"""
 # import os
 Path = "F:/01Algorithms/Hydrology/HAPI/examples"
 import matplotlib
@@ -19,15 +15,14 @@
 
 Parameterpath = Path + "/data/Lumped/Coello_Lumped2021-03-08_muskingum.txt"
 Path = Path + "/data/Lumped/"
-#%%
-### meteorological data
+# %% meteorological data
 start = "2009-01-01"
 end = "2011-12-31"
 name = "Coello"
 Coello = Catchment(name, start, end)
 Coello.ReadLumpedInputs(Path + "meteo_data-MSWEP.csv")
 
-### Basic_inputs
+# %% Basic_inputs
 # catchment area
 CatArea = 1530
 # temporal resolution
@@ -67,11 +62,11 @@
 
 Qobs = Coello.QGauges[Coello.QGauges.columns[0]]
 
-Metrics["RMSE"] = PC.RMSE(Qobs, Coello.Qsim["q"])
-Metrics["NSE"] = PC.NSE(Qobs, Coello.Qsim["q"])
-Metrics["NSEhf"] = PC.NSEHF(Qobs, Coello.Qsim["q"])
-Metrics["KGE"] = PC.KGE(Qobs, Coello.Qsim["q"])
-Metrics["WB"] = PC.WB(Qobs, Coello.Qsim["q"])
+Metrics["RMSE"] = PC.rmse(Qobs, Coello.Qsim["q"])
+Metrics["NSE"] = PC.nse(Qobs, Coello.Qsim["q"])
+Metrics["NSEhf"] = PC.nse_hf(Qobs, Coello.Qsim["q"])
+Metrics["KGE"] = PC.kge(Qobs, Coello.Qsim["q"])
+Metrics["WB"] = PC.wb(Qobs, Coello.Qsim["q"])
 
 print("RMSE= " + str(round(Metrics["RMSE"], 2)))
 print("NSE= " + str(round(Metrics["NSE"], 2)))
@@ -120,7 +115,7 @@ def WrapperType1(Randpar, Route, RoutingFn, Qobs):
     Coello.Parameters = Randpar
 
     Run.RunLumped(Coello, Route, RoutingFn)
-    rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
+    rmse = PC.rmse(Qobs, Coello.Qsim["q"])
     return rmse
 
 
@@ -129,7 +124,7 @@ def WrapperType2(Randpar, Route, RoutingFn, Qobs):
     Coello.Parameters = Randpar
 
     Run.RunLumped(Coello, Route, RoutingFn)
-    rmse = PC.RMSE(Qobs, Coello.Qsim["q"])
+    rmse = PC.rmse(Qobs, Coello.Qsim["q"])
     return rmse, Coello.Qsim["q"]
 
 

diff --git a/examples/data/pdf_obs.txt b/examples/data/pdf_obs.txt
@@ -0,0 +1,61 @@
+"ams" "pdf"
+7550.433 0.000154642269711567
+8651.166 0.000117765584990689
+9380.397 9.33406205964961e-05
+9071.717 0.000103377700612861
+8354.173 0.000128159692079377
+6161.421 0.000170527780685648
+3544.866 2.77636664143863e-05
+7120.425 0.000165513917257198
+9298.242 9.59581061243758e-05
+4710.178 0.000103421285516972
+5380.688 0.000145317667468702
+8499.548 0.000123066996435036
+5617.157 0.000156011624908839
+3742.097 3.7808086513221e-05
+10274.362 6.78703210421545e-05
+6186.083 0.000170853335852759
+4552.435 9.20798127465615e-05
+8006.946 0.000140143254028843
+6790.353 0.000170872728246691
+5455.554 0.000148978662405294
+9361.352 9.39436873016158e-05
+5709.204 0.00015946742554235
+16825.49888 5.00789723424988e-06
+5363.839 0.000144459876612251
+12514.28 2.82532814190863e-05
+7225.553 0.000163202752532002
+12890.64 2.42739862374255e-05
+8045.471 0.000138838408679289
+18320.555 2.79830832979157e-06
+7339.839 0.000160415133366504
+7500.92 0.00015606542100079
+11532.695 4.18156642982465e-05
+6639.799 0.000172186064368232
+6341.556 0.000172279119179583
+6533.038 0.000172622478650322
+5113.036 0.000130326486583241
+3854.754 4.4283345872923e-05
+5477.022 0.000149982281171861
+7310.343 0.000161160143281715
+4670.991 0.000100626037052284
+10346.79 6.60642242605733e-05
+9039.356 0.000104459733167728
+6480.619 0.000172676431635955
+8425.722 0.000125654190498816
+10647.809 5.8974972602665e-05
+7565.567 0.000154199994168006
+5456.57 0.000149026629329571
+8713.613 0.000115593076636878
+9305.365 9.57295320374143e-05
+5948.87 0.000166548208934179
+7638.778 0.000152015488523764
+8576.919 0.000120358190580951
+4959.301 0.000120553356314615
+12426.522 2.9268918540962e-05
+6870.319 0.000169868029908526
+7838.047 0.000145744442504979
+6414.488 0.000172587367846412
+13121.738 2.21092580728735e-05
+9801.248 8.06319421593433e-05
+8745.997 0.00011447011440418
-Original file line number
+Diff line change
@@ Expand Up / @@ -146,3 +146,4 @@ build_artifacts @@
     mo_*
     conda/
     *.zip
+    .run/