Skip to content

Commit

Permalink
remove the confidence_interval plot from the probability_plot method
Browse files Browse the repository at this point in the history
  • Loading branch information
MAfarrag committed Aug 16, 2024
1 parent aead861 commit 91a4191
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 124 deletions.
87 changes: 15 additions & 72 deletions statista/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,14 +499,12 @@ def confidence_interval(

def probability_plot(
self,
parameters: Dict[str, Union[float, Any]],
prob_non_exceed: np.ndarray,
alpha: float = 0.1,
fig1size: tuple = (10, 5),
fig2size: tuple = (6, 6),
xlabel: str = "Actual data",
ylabel: str = "cdf",
fontsize: int = 15,
parameters: Dict[str, Union[float, Any]] = None,
prob_non_exceed: np.ndarray = None,
) -> Tuple[List[Figure], list]:
"""Probability Plot.
Expand All @@ -528,8 +526,6 @@ def probability_plot(
value between 0 and 1.
fig1size: [tuple]
Default is (10, 5)
fig2size: [tuple]
Default is (6, 6)
xlabel: [str]
Default is "Actual data"
ylabel: [str]
Expand All @@ -539,13 +535,7 @@ def probability_plot(
Returns
-------
Qth: [list]
theoretical-generated values based on the theoretical cdf calculated from
weibul or the distribution parameters.
q_upper: [list]
upper-bound coresponding to the confidence interval.
q_lower: [list]
lower-bound coresponding to the confidence interval.
"""
pass

Expand Down Expand Up @@ -1225,15 +1215,13 @@ def confidence_interval(

def probability_plot(
self,
alpha: float = 0.1,
fig1_size: Tuple[float, float] = (10, 5),
fig2_size: Tuple[float, float] = (6, 6),
xlabel: str = "Actual data",
ylabel: str = "cdf",
fontsize: int = 15,
cdf: Union[np.ndarray, list] = None,
parameters: Dict[str, Union[float, Any]] = None,
) -> tuple[list[Figure], list[Axes]]: # pylint: disable=arguments-differ
) -> Tuple[Figure, Tuple[Axes, Axes]]: # pylint: disable=arguments-differ
"""Probability plot.
Probability Plot method calculates the theoretical values based on the Gumbel distribution
Expand All @@ -1243,12 +1231,8 @@ def probability_plot(
----------
cdf: [np.ndarray]
theoretical cdf calculated using weibul or using the distribution cdf function.
alpha: [float]
value between 0 and 1.
fig1_size: [tuple]
Default is (10, 5)
fig2_size: [tuple]
Default is (6, 6)
xlabel: [str]
Default is "Actual data"
ylabel: [str]
Expand Down Expand Up @@ -1292,11 +1276,6 @@ def probability_plot(
"to the get the non-exceedance probability"
)

q_th = self._inv_cdf(cdf, parameters)
q_upper, q_lower = self.confidence_interval(
prob_non_exceed=cdf, alpha=alpha, parameters=parameters
)

q_x = np.linspace(
float(self.data_sorted[0]), 1.5 * float(self.data_sorted[-1]), 10000
)
Expand All @@ -1305,16 +1284,11 @@ def probability_plot(

fig, ax = Plot.details(
q_x,
q_th,
self.data,
pdf_fitted,
cdf_fitted,
cdf,
q_lower,
q_upper,
alpha,
fig1_size=fig1_size,
fig2_size=fig2_size,
xlabel=xlabel,
ylabel=ylabel,
fontsize=fontsize,
Expand Down Expand Up @@ -2034,41 +2008,31 @@ def confidence_interval(

def probability_plot(
self,
alpha: Number = 0.1,
func: Callable = None,
method: str = "lmoments",
n_samples=100,
fig1_size=(10, 5),
fig2_size=(6, 6),
xlabel="Actual data",
ylabel="cdf",
fontsize=15,
cdf: Union[np.ndarray, list] = None,
parameters: Dict[str, Union[float, Any]] = None,
) -> tuple[list[Figure], list[Axes]]:
) -> Tuple[Figure, Tuple[Axes, Axes]]:
"""Probability Plot.
Probability Plot method calculates the theoretical values based on the Gumbel distribution
parameters, theoretical cdf (or weibul), and calculate the confidence interval.
parameters, theoretical cdf (or weibul), and calculates the confidence interval.
Parameters
----------
parameters: Dict[str, str]
{"loc": val, "scale": val, shape: val}
- loc : [numeric]
- loc: [numeric]
Location parameter of the GEV distribution.
- scale : [numeric]
- scale: [numeric]
Scale parameter of the GEV distribution.
- shape: [float, int]
Shape parameter for the GEV distribution.
cdf: [list]
Theoretical cdf calculated using weibul or using the distribution cdf function.
method: [str]
Method used to fit the generated samples from the bootstrap method ["lmoments", "mle", "mm"]. Default is
"lmoments".
alpha: [float]
Value between 0 and 1.
fontsize: [numeric]
Font size of the axis labels and legend
ylabel: [string]
Expand All @@ -2077,14 +2041,13 @@ def probability_plot(
X label string
fig1_size: [tuple]
size of the pdf and cdf figure
fig2_size: [tuple]
size of the confidence interval figure
n_samples: [integer]
number of points in the confidence interval calculation
alpha: [numeric]
alpha or SignificanceLevel is a value of the confidence interval.
func: [function]
function to be used in the confidence interval calculation.
Returns
-------
Figure:
matplotlib figure object
Tuple[Axes, Axes]:
matplotlib plot axes
"""
# if no parameters are provided, take the parameters provided in the class initialization.
if parameters is None:
Expand All @@ -2104,21 +2067,6 @@ def probability_plot(
"to the get the non-exceedance probability"
)

q_th = self.inverse_cdf(cdf, parameters)
if func is None:
func = GEV.ci_func

ci = ConfidenceInterval.boot_strap(
self.data,
statfunction=func,
gevfit=parameters,
n_samples=n_samples,
F=cdf,
method=method,
)
q_lower = ci["lb"]
q_upper = ci["ub"]

q_x = np.linspace(
float(self.data_sorted[0]), 1.5 * float(self.data_sorted[-1]), 10000
)
Expand All @@ -2127,16 +2075,11 @@ def probability_plot(

fig, ax = Plot.details(
q_x,
q_th,
self.data,
pdf_fitted,
cdf_fitted,
cdf,
q_lower,
q_upper,
alpha,
fig1_size=fig1_size,
fig2_size=fig2_size,
xlabel=xlabel,
ylabel=ylabel,
fontsize=fontsize,
Expand Down
21 changes: 7 additions & 14 deletions statista/eva.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from loguru import logger
from pandas import DataFrame

from statista.distributions import PlottingPosition, Distributions
from statista.distributions import Distributions


def ams_analysis(
Expand Down Expand Up @@ -140,7 +140,7 @@ def ams_analysis(
return_period = np.array(return_period)
# these values are the Non Exceedance probability (F) of the chosen
# return periods non_exceed_prop = 1 - (1/return_period)
# Non Exceedance propabilities
# Non Exceedance probabilities
# non_exceed_prop = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
non_exceed_prop = 1 - (1 / return_period)
save_to = Path(save_to)
Expand Down Expand Up @@ -194,26 +194,19 @@ def ams_analysis(
# get the Discharge coresponding to the return periods
q_rp = dist.inverse_cdf(non_exceed_prop, param_dist)

# to get the Non-Exceedance probability for a specific Value
# sort the ams_df
ams_df.sort()
# calculate the F (Exceedance probability based on weibul)
cdf_weibul = PlottingPosition.weibul(ams_df)
# Gumbel.probability_plot method calculates the theoretical values
# based on the Gumbel distribution
# parameters, theoretical cdf (or weibul), and calculate the confidence interval
if save_plots:
fig, _ = dist.probability_plot(
cdf=cdf_weibul,
alpha=significance_level,
method=method,
parameters=param_dist,
fig, _ = dist.probability_plot()
_, _, fig2, _ = dist.confidence_interval(
method=method, plot_figure=True, alpha=significance_level
)

fig[0].savefig(f"{save_to}/figures/{i}.png", format="png")
fig.savefig(f"{save_to}/figures/{i}.png", format="png")
plt.close()

fig[1].savefig(f"{save_to}/figures/f-{i}.png", format="png")
fig2.savefig(f"{save_to}/figures/f-{i}.png", format="png")
plt.close()

statistical_properties.loc[i, "mean"] = q_ts.mean()
Expand Down
35 changes: 7 additions & 28 deletions statista/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,45 +109,30 @@ def cdf(
@staticmethod
def details(
qx: Union[np.ndarray, list],
qth: Union[np.ndarray, list],
q_act: Union[np.ndarray, list],
pdf: Union[np.ndarray, list],
cdf_fitted: Union[np.ndarray, list],
cdf: Union[np.ndarray, list],
q_lower: Union[np.ndarray, list],
q_upper: Union[np.ndarray, list],
alpha: Number,
fig1_size: Tuple[float, float] = (10, 5),
fig2_size: Tuple[float, float] = (6, 6),
xlabel: str = "Actual data",
ylabel: str = "cdf",
fontsize: int = 11,
) -> Tuple[Tuple[Figure, Figure], Tuple[Axes, Axes]]:
) -> Tuple[Figure, Tuple[Axes, Axes]]:
"""details.
Parameters
----------
qx: [np.ndarray, list]
10,000 values generated between the minimum and maximum values of the actual data.
qth: [np.ndarray, list]
Theoretical quantiles (obtained using the inverse_cdf method).
q_act: [np.ndarray, list]
Actual data.
pdf: [np.ndarray, list]
Probability density function.
cdf_fitted: [np.ndarray, list]
Cumulative distribution function of the fitted distribution.
cdf
q_lower: [np.ndarray, list]
Lower limit of the confidence interval.
q_upper: [np.ndarray, list]
Upper limit of the confidence interval.
alpha: [float]
Significance level.
fig1_size: Tuple[float, float], optional, default=(10, 5)
Size of the first figure.
fig2_size: Tuple[float, float], optional, default=(6, 6)
Size of the second figure.
xlabel: str, optional, default="Actual data"
Label for x-axis.
ylabel: str, optional, default="cdf"
Expand All @@ -157,6 +142,10 @@ def details(
Returns
-------
Figure:
matplotlib figure object
Tuple[Axes, Axes]:
matplotlib plot axes
"""
fig1 = plt.figure(figsize=fig1_size)
gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig1)
Expand All @@ -174,18 +163,8 @@ def details(
ax2.scatter(q_act, cdf, color="#DC143C", facecolors="none")
ax2.set_xlabel(xlabel, fontsize=fontsize)
ax2.set_ylabel(ylabel, fontsize=15)

fig2, _ = Plot.confidence_level(
qth,
q_act,
q_lower,
q_upper,
alpha=alpha,
figsize=fig2_size,
fontsize=fontsize,
)
return [fig1, fig2], [ax1, ax2]
plt.show()
return fig1, (ax1, ax2)

@staticmethod
def confidence_level(
Expand All @@ -205,7 +184,7 @@ def confidence_level(
qth: [np.ndarray, list]
Theoretical quantiles (obtained using the inverse_cdf method).
q_act: [np.ndarray, list]
Actual data.
Actual data, unsorted.
q_lower: [np.ndarray, list]
Lower limit of the confidence interval.
q_upper: [np.ndarray, list]
Expand Down
16 changes: 6 additions & 10 deletions tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,13 @@ def test_probability_plot(
dist = Gumbel(time_series2, param)
# test default parameters.
fig, ax = dist.probability_plot()
assert isinstance(fig[0], Figure)
assert isinstance(fig[1], Figure)
assert isinstance(fig, Figure)
assert isinstance(ax[0], Axes)
assert isinstance(ax[1], Axes)
# test with the cdf parameter
cdf_weibul = PlottingPosition.weibul(time_series2)
fig, ax = dist.probability_plot(cdf=cdf_weibul, alpha=confidence_interval_alpha)
assert isinstance(fig[0], Figure)
assert isinstance(fig[1], Figure)
fig, ax = dist.probability_plot(cdf=cdf_weibul)
assert isinstance(fig, Figure)
assert isinstance(ax[0], Axes)
assert isinstance(ax[1], Axes)

Expand Down Expand Up @@ -381,15 +379,13 @@ def test_gev_probability_plot(
dist = GEV(time_series1, param)
# test default parameters.
fig, ax = dist.probability_plot()
assert isinstance(fig[0], Figure)
assert isinstance(fig[1], Figure)
assert isinstance(fig, Figure)
assert isinstance(ax[0], Axes)
assert isinstance(ax[1], Axes)
# test with the cdf parameter
cdf_weibul = PlottingPosition.weibul(time_series1)
fig, ax = dist.probability_plot(cdf=cdf_weibul, alpha=confidence_interval_alpha)
assert isinstance(fig[0], Figure)
assert isinstance(fig[1], Figure)
fig, ax = dist.probability_plot(cdf=cdf_weibul)
assert isinstance(fig, Figure)
assert isinstance(ax[0], Axes)
assert isinstance(ax[1], Axes)

Expand Down

0 comments on commit 91a4191

Please sign in to comment.