remove the confidence_interval plot from the probability_plot method

Serapieum-of-alex · Aug 16, 2024 · 91a4191 · 91a4191
1 parent aead861
commit 91a4191
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 124 deletions.
diff --git a/statista/distributions.py b/statista/distributions.py
@@ -499,14 +499,12 @@ def confidence_interval(
 
     def probability_plot(
         self,
-        parameters: Dict[str, Union[float, Any]],
-        prob_non_exceed: np.ndarray,
-        alpha: float = 0.1,
         fig1size: tuple = (10, 5),
-        fig2size: tuple = (6, 6),
         xlabel: str = "Actual data",
         ylabel: str = "cdf",
         fontsize: int = 15,
+        parameters: Dict[str, Union[float, Any]] = None,
+        prob_non_exceed: np.ndarray = None,
     ) -> Tuple[List[Figure], list]:
         """Probability Plot.
 
@@ -528,8 +526,6 @@ def probability_plot(
             value between 0 and 1.
         fig1size: [tuple]
             Default is (10, 5)
-        fig2size: [tuple]
-            Default is (6, 6)
         xlabel: [str]
             Default is "Actual data"
         ylabel: [str]
@@ -539,13 +535,7 @@ def probability_plot(
 
         Returns
         -------
-        Qth: [list]
-            theoretical-generated values based on the theoretical cdf calculated from
-            weibul or the distribution parameters.
-        q_upper: [list]
-            upper-bound coresponding to the confidence interval.
-        q_lower: [list]
-            lower-bound coresponding to the confidence interval.
+
         """
         pass
 
@@ -1225,15 +1215,13 @@ def confidence_interval(
 
     def probability_plot(
         self,
-        alpha: float = 0.1,
         fig1_size: Tuple[float, float] = (10, 5),
-        fig2_size: Tuple[float, float] = (6, 6),
         xlabel: str = "Actual data",
         ylabel: str = "cdf",
         fontsize: int = 15,
         cdf: Union[np.ndarray, list] = None,
         parameters: Dict[str, Union[float, Any]] = None,
-    ) -> tuple[list[Figure], list[Axes]]:  # pylint: disable=arguments-differ
+    ) -> Tuple[Figure, Tuple[Axes, Axes]]:  # pylint: disable=arguments-differ
         """Probability plot.
 
         Probability Plot method calculates the theoretical values based on the Gumbel distribution
@@ -1243,12 +1231,8 @@ def probability_plot(
         ----------
         cdf: [np.ndarray]
             theoretical cdf calculated using weibul or using the distribution cdf function.
-        alpha: [float]
-            value between 0 and 1.
         fig1_size: [tuple]
             Default is (10, 5)
-        fig2_size: [tuple]
-            Default is (6, 6)
         xlabel: [str]
             Default is "Actual data"
         ylabel: [str]
@@ -1292,11 +1276,6 @@ def probability_plot(
                     "to the get the non-exceedance probability"
                 )
 
-        q_th = self._inv_cdf(cdf, parameters)
-        q_upper, q_lower = self.confidence_interval(
-            prob_non_exceed=cdf, alpha=alpha, parameters=parameters
-        )
-
         q_x = np.linspace(
             float(self.data_sorted[0]), 1.5 * float(self.data_sorted[-1]), 10000
         )
@@ -1305,16 +1284,11 @@ def probability_plot(
 
         fig, ax = Plot.details(
             q_x,
-            q_th,
             self.data,
             pdf_fitted,
             cdf_fitted,
             cdf,
-            q_lower,
-            q_upper,
-            alpha,
             fig1_size=fig1_size,
-            fig2_size=fig2_size,
             xlabel=xlabel,
             ylabel=ylabel,
             fontsize=fontsize,
@@ -2034,41 +2008,31 @@ def confidence_interval(
 
     def probability_plot(
         self,
-        alpha: Number = 0.1,
-        func: Callable = None,
-        method: str = "lmoments",
-        n_samples=100,
         fig1_size=(10, 5),
-        fig2_size=(6, 6),
         xlabel="Actual data",
         ylabel="cdf",
         fontsize=15,
         cdf: Union[np.ndarray, list] = None,
         parameters: Dict[str, Union[float, Any]] = None,
-    ) -> tuple[list[Figure], list[Axes]]:
+    ) -> Tuple[Figure, Tuple[Axes, Axes]]:
         """Probability Plot.
 
         Probability Plot method calculates the theoretical values based on the Gumbel distribution
-        parameters, theoretical cdf (or weibul), and calculate the confidence interval.
+        parameters, theoretical cdf (or weibul), and calculates the confidence interval.
 
         Parameters
         ----------
         parameters: Dict[str, str]
             {"loc": val, "scale": val, shape: val}
 
-            - loc : [numeric]
+            - loc: [numeric]
                 Location parameter of the GEV distribution.
-            - scale : [numeric]
+            - scale: [numeric]
                 Scale parameter of the GEV distribution.
             - shape: [float, int]
                 Shape parameter for the GEV distribution.
         cdf: [list]
             Theoretical cdf calculated using weibul or using the distribution cdf function.
-        method: [str]
-            Method used to fit the generated samples from the bootstrap method ["lmoments", "mle", "mm"]. Default is
-            "lmoments".
-        alpha: [float]
-            Value between 0 and 1.
         fontsize: [numeric]
             Font size of the axis labels and legend
         ylabel: [string]
@@ -2077,14 +2041,13 @@ def probability_plot(
             X label string
         fig1_size: [tuple]
             size of the pdf and cdf figure
-        fig2_size: [tuple]
-            size of the confidence interval figure
-        n_samples: [integer]
-            number of points in the confidence interval calculation
-        alpha: [numeric]
-            alpha or SignificanceLevel is a value of the confidence interval.
-        func: [function]
-            function to be used in the confidence interval calculation.
+
+        Returns
+        -------
+        Figure:
+            matplotlib figure object
+        Tuple[Axes, Axes]:
+            matplotlib plot axes
         """
         # if no parameters are provided, take the parameters provided in the class initialization.
         if parameters is None:
@@ -2104,21 +2067,6 @@ def probability_plot(
                     "to the get the non-exceedance probability"
                 )
 
-        q_th = self.inverse_cdf(cdf, parameters)
-        if func is None:
-            func = GEV.ci_func
-
-        ci = ConfidenceInterval.boot_strap(
-            self.data,
-            statfunction=func,
-            gevfit=parameters,
-            n_samples=n_samples,
-            F=cdf,
-            method=method,
-        )
-        q_lower = ci["lb"]
-        q_upper = ci["ub"]
-
         q_x = np.linspace(
             float(self.data_sorted[0]), 1.5 * float(self.data_sorted[-1]), 10000
         )
@@ -2127,16 +2075,11 @@ def probability_plot(
 
         fig, ax = Plot.details(
             q_x,
-            q_th,
             self.data,
             pdf_fitted,
             cdf_fitted,
             cdf,
-            q_lower,
-            q_upper,
-            alpha,
             fig1_size=fig1_size,
-            fig2_size=fig2_size,
             xlabel=xlabel,
             ylabel=ylabel,
             fontsize=fontsize,

diff --git a/statista/eva.py b/statista/eva.py
@@ -8,7 +8,7 @@
 from loguru import logger
 from pandas import DataFrame
 
-from statista.distributions import PlottingPosition, Distributions
+from statista.distributions import Distributions
 
 
 def ams_analysis(
@@ -140,7 +140,7 @@ def ams_analysis(
     return_period = np.array(return_period)
     # these values are the Non Exceedance probability (F) of the chosen
     # return periods non_exceed_prop = 1 - (1/return_period)
-    # Non Exceedance propabilities
+    # Non Exceedance probabilities
     # non_exceed_prop = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
     non_exceed_prop = 1 - (1 / return_period)
     save_to = Path(save_to)
@@ -194,26 +194,19 @@ def ams_analysis(
         # get the Discharge coresponding to the return periods
         q_rp = dist.inverse_cdf(non_exceed_prop, param_dist)
 
-        # to get the Non-Exceedance probability for a specific Value
-        # sort the ams_df
-        ams_df.sort()
-        # calculate the F (Exceedance probability based on weibul)
-        cdf_weibul = PlottingPosition.weibul(ams_df)
         # Gumbel.probability_plot method calculates the theoretical values
         # based on the Gumbel distribution
         # parameters, theoretical cdf (or weibul), and calculate the confidence interval
         if save_plots:
-            fig, _ = dist.probability_plot(
-                cdf=cdf_weibul,
-                alpha=significance_level,
-                method=method,
-                parameters=param_dist,
+            fig, _ = dist.probability_plot()
+            _, _, fig2, _ = dist.confidence_interval(
+                method=method, plot_figure=True, alpha=significance_level
             )
 
-            fig[0].savefig(f"{save_to}/figures/{i}.png", format="png")
+            fig.savefig(f"{save_to}/figures/{i}.png", format="png")
             plt.close()
 
-            fig[1].savefig(f"{save_to}/figures/f-{i}.png", format="png")
+            fig2.savefig(f"{save_to}/figures/f-{i}.png", format="png")
             plt.close()
 
         statistical_properties.loc[i, "mean"] = q_ts.mean()

diff --git a/statista/plot.py b/statista/plot.py
@@ -109,45 +109,30 @@ def cdf(
     @staticmethod
     def details(
         qx: Union[np.ndarray, list],
-        qth: Union[np.ndarray, list],
         q_act: Union[np.ndarray, list],
         pdf: Union[np.ndarray, list],
         cdf_fitted: Union[np.ndarray, list],
         cdf: Union[np.ndarray, list],
-        q_lower: Union[np.ndarray, list],
-        q_upper: Union[np.ndarray, list],
-        alpha: Number,
         fig1_size: Tuple[float, float] = (10, 5),
-        fig2_size: Tuple[float, float] = (6, 6),
         xlabel: str = "Actual data",
         ylabel: str = "cdf",
         fontsize: int = 11,
-    ) -> Tuple[Tuple[Figure, Figure], Tuple[Axes, Axes]]:
+    ) -> Tuple[Figure, Tuple[Axes, Axes]]:
         """details.
 
         Parameters
         ----------
         qx: [np.ndarray, list]
             10,000 values generated between the minimum and maximum values of the actual data.
-        qth: [np.ndarray, list]
-            Theoretical quantiles (obtained using the inverse_cdf method).
         q_act: [np.ndarray, list]
             Actual data.
         pdf: [np.ndarray, list]
             Probability density function.
         cdf_fitted: [np.ndarray, list]
             Cumulative distribution function of the fitted distribution.
         cdf
-        q_lower: [np.ndarray, list]
-            Lower limit of the confidence interval.
-        q_upper: [np.ndarray, list]
-            Upper limit of the confidence interval.
-        alpha: [float]
-            Significance level.
         fig1_size:  Tuple[float, float], optional, default=(10, 5)
             Size of the first figure.
-        fig2_size: Tuple[float, float], optional, default=(6, 6)
-            Size of the second figure.
         xlabel: str, optional, default="Actual data"
             Label for x-axis.
         ylabel: str, optional, default="cdf"
@@ -157,6 +142,10 @@ def details(
 
         Returns
         -------
+        Figure:
+            matplotlib figure object
+        Tuple[Axes, Axes]:
+            matplotlib plot axes
         """
         fig1 = plt.figure(figsize=fig1_size)
         gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig1)
@@ -174,18 +163,8 @@ def details(
         ax2.scatter(q_act, cdf, color="#DC143C", facecolors="none")
         ax2.set_xlabel(xlabel, fontsize=fontsize)
         ax2.set_ylabel(ylabel, fontsize=15)
-
-        fig2, _ = Plot.confidence_level(
-            qth,
-            q_act,
-            q_lower,
-            q_upper,
-            alpha=alpha,
-            figsize=fig2_size,
-            fontsize=fontsize,
-        )
-        return [fig1, fig2], [ax1, ax2]
         plt.show()
+        return fig1, (ax1, ax2)
 
     @staticmethod
     def confidence_level(
@@ -205,7 +184,7 @@ def confidence_level(
         qth: [np.ndarray, list]
             Theoretical quantiles (obtained using the inverse_cdf method).
         q_act: [np.ndarray, list]
-            Actual data.
+            Actual data, unsorted.
         q_lower: [np.ndarray, list]
             Lower limit of the confidence interval.
         q_upper: [np.ndarray, list]

diff --git a/tests/test_distributions.py b/tests/test_distributions.py
@@ -216,15 +216,13 @@ def test_probability_plot(
         dist = Gumbel(time_series2, param)
         # test default parameters.
         fig, ax = dist.probability_plot()
-        assert isinstance(fig[0], Figure)
-        assert isinstance(fig[1], Figure)
+        assert isinstance(fig, Figure)
         assert isinstance(ax[0], Axes)
         assert isinstance(ax[1], Axes)
         # test with the cdf parameter
         cdf_weibul = PlottingPosition.weibul(time_series2)
-        fig, ax = dist.probability_plot(cdf=cdf_weibul, alpha=confidence_interval_alpha)
-        assert isinstance(fig[0], Figure)
-        assert isinstance(fig[1], Figure)
+        fig, ax = dist.probability_plot(cdf=cdf_weibul)
+        assert isinstance(fig, Figure)
         assert isinstance(ax[0], Axes)
         assert isinstance(ax[1], Axes)
 
@@ -381,15 +379,13 @@ def test_gev_probability_plot(
         dist = GEV(time_series1, param)
         # test default parameters.
         fig, ax = dist.probability_plot()
-        assert isinstance(fig[0], Figure)
-        assert isinstance(fig[1], Figure)
+        assert isinstance(fig, Figure)
         assert isinstance(ax[0], Axes)
         assert isinstance(ax[1], Axes)
         # test with the cdf parameter
         cdf_weibul = PlottingPosition.weibul(time_series1)
-        fig, ax = dist.probability_plot(cdf=cdf_weibul, alpha=confidence_interval_alpha)
-        assert isinstance(fig[0], Figure)
-        assert isinstance(fig[1], Figure)
+        fig, ax = dist.probability_plot(cdf=cdf_weibul)
+        assert isinstance(fig, Figure)
         assert isinstance(ax[0], Axes)
         assert isinstance(ax[1], Axes)