Merge pull request #216 from s-scherrer/bootstrap_methods

implemented BCa bootstrap method
TUW-GEO · Mar 8, 2021 · e8cbda4 · e8cbda4
2 parents a3123b9 + 7b1a835
commit e8cbda4
Show file tree

Hide file tree

Showing 2 changed files with 137 additions and 12 deletions.
diff --git a/src/pytesmo/metrics/pairwise_utils.py b/src/pytesmo/metrics/pairwise_utils.py
@@ -1,4 +1,5 @@
 import numpy as np
+from scipy import stats
 
 from pytesmo.metrics import pairwise
 
@@ -67,7 +68,7 @@ def with_analytical_ci(metric_func, x, y, alpha=0.05):
     return m, ci[0], ci[1]
 
 
-def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
+def with_bootstrapped_ci(metric_func, x, y, alpha=0.05, method="BCa",
                          nsamples=1000, minimum_data_length=100):
     """
     Evaluates metric and bootstraps confidence interval.
@@ -90,6 +91,18 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
         metric).
     alpha : float, optional
         Confidence level, default is 0.05.
+    method : str, optional
+        The method to use to calculate confidence intervals. Available methods
+        are:
+
+        - "BCa" (default): Bias-corrected and accelerated bootstrap.
+        - "percentile": Uses the percentiles of the bootstrapped metric
+          distribution.
+        - "basic": Uses the percentiles of the differences to the original
+          metric value.
+
+        For more info and a comparison of the methods, see [1]_, especially
+        Table 6 on page 38.
     nsamples : int, optional
         Number of bootstrap samples, default is 1000.
     minimum_data_length : int, optional
@@ -103,8 +116,17 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
         Lower bound of confidence interval
     upper : float or array of floats
         Upper bound of confidence interval
+
+    References
+    ----------
+    .. [1] Gilleland, E. (2010). *Confidence Intervals for Forecast
+    Verification* (No. NCAR/TN-479+STR). University Corporation for Atmospheric
+    Research. doi:10.5065/D6WD3XJM
     """
-    # prototype, probably inefficient
+    # Prototype, might be better to implement this in Cython if it's too slow.
+    # Then it would probably be best to make this function only a lookup table
+    # which calls a cpdef'd method, which itself calls the cdef'd metric
+    # function with a cdef'd bootstrap implementation.
     n = len(x)
     if n < minimum_data_length:
         raise ValueError(
@@ -113,12 +135,80 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
             f"You can pass 'minimum_data_length={n}' if you want to do"
             "bootstrapping nevertheless."
         )
-    mvals = np.empty(n, dtype=float)
+    orig_metric = metric_func(x, y)
+    bootstrapped_metric = np.empty(nsamples, dtype=float)
+    if method == "BCa":
+        orig_jk = _jackknife(metric_func, x, y)
     for i in range(nsamples):
         idx = np.random.choice(n, size=n)
         _x, _y = x[idx], y[idx]
-        mvals[i] = metric_func(_x, _y)
-    lower = np.quantile(mvals, alpha / 2)
-    upper = np.quantile(mvals, 1 - alpha / 2)
-    m = metric_func(x, y)
-    return m, lower, upper
+        bootstrapped_metric[i] = metric_func(_x, _y)
+    if method == "percentile":
+        lower, upper = _percentile_bs_ci(
+            bootstrapped_metric, orig_metric, alpha
+        )
+    elif method == "basic":
+        lower, upper = _basic_bs_ci(bootstrapped_metric, orig_metric, alpha)
+    elif method == "BCa":
+        lower, upper = _BCa_bs_ci(
+            bootstrapped_metric,
+            orig_metric,
+            alpha,
+            orig_jk
+        )
+    return orig_metric, lower, upper
+
+
+def _jackknife(metric_func, x, y):
+    jk = np.empty_like(x)
+    mask = np.ones(len(x), dtype=bool)
+    for i in range(len(x)):
+        mask[i] = False
+        jk[i] = metric_func(x[mask], y[mask])
+        mask[i] = True
+    return jk
+
+
+def _percentile_bs_ci(bs_m, m, alpha):
+    """Calculates the CI using percentiles"""
+    lower = np.quantile(bs_m, alpha / 2)
+    upper = np.quantile(bs_m, 1 - alpha / 2)
+    return lower, upper
+
+
+def _basic_bs_ci(bs_m, m, alpha):
+    """Basic bootstrap"""
+    lower = 2 * m - np.quantile(bs_m, 1 - alpha / 2)
+    upper = 2 * m - np.quantile(bs_m, alpha / 2)
+    return lower, upper
+
+
+def _BCa_bs_ci(bs_m, m, alpha, jk):
+    """BCa bootstrap"""
+    # see also here regarding implementation:
+    # https://www.erikdrysdale.com/bca_python/
+    z_alpha = stats.norm.ppf(alpha)
+    z_1_alpha = stats.norm.ppf(1-alpha)
+
+    # bias correction
+    z0 = stats.norm.ppf(
+        np.mean(bs_m <= m)
+    )
+
+    # acceleration
+    jk_mean = np.mean(jk)
+    a = (
+        np.sum((jk_mean - jk)**3)
+        / (6 * (np.sum((jk_mean - jk)**2))**(1.5))
+    )
+
+    # calculate adjusted percentiles
+    alpha_lower = stats.norm.cdf(
+        z0 + (z0 + z_alpha) / (1 - a * (z0 + z_alpha))
+    )
+    alpha_upper = stats.norm.cdf(
+        z0 + (z0 + z_1_alpha) / (1 - a * (z0 + z_1_alpha))
+    )
+    lower = np.quantile(bs_m, alpha_lower)
+    upper = np.quantile(bs_m, alpha_upper)
+    return lower, upper
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -3,6 +3,7 @@
 import numpy.testing as nptest
 import pandas as pd
 import pytest
+from scipy import stats
 
 import pytesmo.metrics
 from pytesmo.metrics import *
@@ -75,27 +76,61 @@ def test_analytical_cis(testdata):
         assert ub > ub10
 
 
+@pytest.mark.parametrize("method", ["BCa", "basic", "percentile"])
+def test_bootstrap_cis_simple(method):
+    np.random.seed(313)
+    # a test case with bias where we have analytical CIs to compare and
+    # convergence should work if everything is implemented correctly
+    x = np.random.randn(20000) + 5
+    y = np.random.randn(20000)
+    b, lb, ub = with_analytical_ci(pytesmo.metrics.bias, x, y, alpha=0.1)
+    # x and y have variance 1, so x - y has variance 2.
+    # Therefore, b ~ N(5, 2/n)
+    rv = stats.norm(loc=5, scale=np.sqrt(2/len(x)))
+    # Approximate 95% intervals are therefore
+    # 5 +- 2 * sqrt(2/n) = 5 +- 2 * sqrt(2/20000) ~ 5 +- 0.02
+    assert abs(b - 5) < 0.02
+    # The difference in CIs is based on experience values
+    nptest.assert_almost_equal(b - lb, 5 - rv.ppf(0.05), 4)
+    nptest.assert_almost_equal(ub - b, rv.ppf(0.95) - 5, 4)
+    # bias CIs are symmetric
+    assert b - lb == ub - b
+
+    # test bootstrapped CIs
+    bs_b, bs_lb, bs_ub = with_bootstrapped_ci(
+        pytesmo.metrics.bias, x, y, alpha=0.1,
+        method=method
+    )
+    assert bs_b == b  # this is the same function call
+    # The difference in CIs is based on experience values, and not quite as
+    # good as with analytical estimates
+    nptest.assert_almost_equal(bs_lb, lb, 2)
+    nptest.assert_almost_equal(bs_ub, rv.ppf(0.95), 2)
+
+
 @pytest.mark.slow
 def test_bootstrapped_cis(testdata):
     x, y = testdata
     for funcname in has_ci:
         func = getattr(pytesmo.metrics, funcname)
         m, lb, ub = with_analytical_ci(func, x, y, alpha=0.1)
         m_bs, lb_bs, ub_bs = with_bootstrapped_ci(
-            func, x, y, alpha=0.1, nsamples=1000
+            func, x, y, alpha=0.1, nsamples=1000, method="BCa"
         )
         assert m == m_bs
         assert lb_bs < ub_bs
         if funcname != "nrmsd":
             # nrmsd is a bit unstable when bootstrapping, due to the data
             # dependent normalization that is applied
-            assert abs(ub - ub_bs) < 1e-2
-            assert abs(lb - lb_bs) < 1e-2
+            assert abs(ub - ub_bs) < 2e-2
+            assert abs(lb - lb_bs) < 2e-2
         else:
             assert abs(ub - ub_bs) < 1e-1
             assert abs(lb - lb_bs) < 1e-1
     for funcname in no_ci:
-        m, lb, ub = with_bootstrapped_ci(func, x, y, alpha=0.1, nsamples=1000)
+        m, lb, ub = with_bootstrapped_ci(
+            func, x, y, alpha=0.1, nsamples=1000, method="BCa"
+        )
         assert lb < m
         assert m < ub