Skip to content

Commit

Permalink
Merge pull request #216 from s-scherrer/bootstrap_methods
Browse files Browse the repository at this point in the history
implemented BCa bootstrap method
  • Loading branch information
s-scherrer authored Mar 8, 2021
2 parents a3123b9 + 7b1a835 commit e8cbda4
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 12 deletions.
106 changes: 98 additions & 8 deletions src/pytesmo/metrics/pairwise_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from scipy import stats

from pytesmo.metrics import pairwise

Expand Down Expand Up @@ -67,7 +68,7 @@ def with_analytical_ci(metric_func, x, y, alpha=0.05):
return m, ci[0], ci[1]


def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
def with_bootstrapped_ci(metric_func, x, y, alpha=0.05, method="BCa",
nsamples=1000, minimum_data_length=100):
"""
Evaluates metric and bootstraps confidence interval.
Expand All @@ -90,6 +91,18 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
metric).
alpha : float, optional
Confidence level, default is 0.05.
method : str, optional
The method to use to calculate confidence intervals. Available methods
are:
- "BCa" (default): Bias-corrected and accelerated bootstrap.
- "percentile": Uses the percentiles of the bootstrapped metric
distribution.
- "basic": Uses the percentiles of the differences to the original
metric value.
For more info and a comparison of the methods, see [1]_, especially
Table 6 on page 38.
nsamples : int, optional
Number of bootstrap samples, default is 1000.
minimum_data_length : int, optional
Expand All @@ -103,8 +116,17 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
Lower bound of confidence interval
upper : float or array of floats
Upper bound of confidence interval
References
----------
.. [1] Gilleland, E. (2010). *Confidence Intervals for Forecast
Verification* (No. NCAR/TN-479+STR). University Corporation for Atmospheric
Research. doi:10.5065/D6WD3XJM
"""
# prototype, probably inefficient
# Prototype, might be better to implement this in Cython if it's too slow.
# Then it would probably be best to make this function only a lookup table
# which calls a cpdef'd method, which itself calls the cdef'd metric
# function with a cdef'd bootstrap implementation.
n = len(x)
if n < minimum_data_length:
raise ValueError(
Expand All @@ -113,12 +135,80 @@ def with_bootstrapped_ci(metric_func, x, y, alpha=0.05,
f"You can pass 'minimum_data_length={n}' if you want to do"
"bootstrapping nevertheless."
)
mvals = np.empty(n, dtype=float)
orig_metric = metric_func(x, y)
bootstrapped_metric = np.empty(nsamples, dtype=float)
if method == "BCa":
orig_jk = _jackknife(metric_func, x, y)
for i in range(nsamples):
idx = np.random.choice(n, size=n)
_x, _y = x[idx], y[idx]
mvals[i] = metric_func(_x, _y)
lower = np.quantile(mvals, alpha / 2)
upper = np.quantile(mvals, 1 - alpha / 2)
m = metric_func(x, y)
return m, lower, upper
bootstrapped_metric[i] = metric_func(_x, _y)
if method == "percentile":
lower, upper = _percentile_bs_ci(
bootstrapped_metric, orig_metric, alpha
)
elif method == "basic":
lower, upper = _basic_bs_ci(bootstrapped_metric, orig_metric, alpha)
elif method == "BCa":
lower, upper = _BCa_bs_ci(
bootstrapped_metric,
orig_metric,
alpha,
orig_jk
)
return orig_metric, lower, upper


def _jackknife(metric_func, x, y):
jk = np.empty_like(x)
mask = np.ones(len(x), dtype=bool)
for i in range(len(x)):
mask[i] = False
jk[i] = metric_func(x[mask], y[mask])
mask[i] = True
return jk


def _percentile_bs_ci(bs_m, m, alpha):
"""Calculates the CI using percentiles"""
lower = np.quantile(bs_m, alpha / 2)
upper = np.quantile(bs_m, 1 - alpha / 2)
return lower, upper


def _basic_bs_ci(bs_m, m, alpha):
"""Basic bootstrap"""
lower = 2 * m - np.quantile(bs_m, 1 - alpha / 2)
upper = 2 * m - np.quantile(bs_m, alpha / 2)
return lower, upper


def _BCa_bs_ci(bs_m, m, alpha, jk):
"""BCa bootstrap"""
# see also here regarding implementation:
# https://www.erikdrysdale.com/bca_python/
z_alpha = stats.norm.ppf(alpha)
z_1_alpha = stats.norm.ppf(1-alpha)

# bias correction
z0 = stats.norm.ppf(
np.mean(bs_m <= m)
)

# acceleration
jk_mean = np.mean(jk)
a = (
np.sum((jk_mean - jk)**3)
/ (6 * (np.sum((jk_mean - jk)**2))**(1.5))
)

# calculate adjusted percentiles
alpha_lower = stats.norm.cdf(
z0 + (z0 + z_alpha) / (1 - a * (z0 + z_alpha))
)
alpha_upper = stats.norm.cdf(
z0 + (z0 + z_1_alpha) / (1 - a * (z0 + z_1_alpha))
)
lower = np.quantile(bs_m, alpha_lower)
upper = np.quantile(bs_m, alpha_upper)
return lower, upper
43 changes: 39 additions & 4 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy.testing as nptest
import pandas as pd
import pytest
from scipy import stats

import pytesmo.metrics
from pytesmo.metrics import *
Expand Down Expand Up @@ -75,27 +76,61 @@ def test_analytical_cis(testdata):
assert ub > ub10


@pytest.mark.parametrize("method", ["BCa", "basic", "percentile"])
def test_bootstrap_cis_simple(method):
np.random.seed(313)
# a test case with bias where we have analytical CIs to compare and
# convergence should work if everything is implemented correctly
x = np.random.randn(20000) + 5
y = np.random.randn(20000)
b, lb, ub = with_analytical_ci(pytesmo.metrics.bias, x, y, alpha=0.1)
# x and y have variance 1, so x - y has variance 2.
# Therefore, b ~ N(5, 2/n)
rv = stats.norm(loc=5, scale=np.sqrt(2/len(x)))
# Approximate 95% intervals are therefore
# 5 +- 2 * sqrt(2/n) = 5 +- 2 * sqrt(2/20000) ~ 5 +- 0.02
assert abs(b - 5) < 0.02
# The difference in CIs is based on experience values
nptest.assert_almost_equal(b - lb, 5 - rv.ppf(0.05), 4)
nptest.assert_almost_equal(ub - b, rv.ppf(0.95) - 5, 4)
# bias CIs are symmetric
assert b - lb == ub - b

# test bootstrapped CIs
bs_b, bs_lb, bs_ub = with_bootstrapped_ci(
pytesmo.metrics.bias, x, y, alpha=0.1,
method=method
)
assert bs_b == b # this is the same function call
# The difference in CIs is based on experience values, and not quite as
# good as with analytical estimates
nptest.assert_almost_equal(bs_lb, lb, 2)
nptest.assert_almost_equal(bs_ub, rv.ppf(0.95), 2)


@pytest.mark.slow
def test_bootstrapped_cis(testdata):
x, y = testdata
for funcname in has_ci:
func = getattr(pytesmo.metrics, funcname)
m, lb, ub = with_analytical_ci(func, x, y, alpha=0.1)
m_bs, lb_bs, ub_bs = with_bootstrapped_ci(
func, x, y, alpha=0.1, nsamples=1000
func, x, y, alpha=0.1, nsamples=1000, method="BCa"
)
assert m == m_bs
assert lb_bs < ub_bs
if funcname != "nrmsd":
# nrmsd is a bit unstable when bootstrapping, due to the data
# dependent normalization that is applied
assert abs(ub - ub_bs) < 1e-2
assert abs(lb - lb_bs) < 1e-2
assert abs(ub - ub_bs) < 2e-2
assert abs(lb - lb_bs) < 2e-2
else:
assert abs(ub - ub_bs) < 1e-1
assert abs(lb - lb_bs) < 1e-1
for funcname in no_ci:
m, lb, ub = with_bootstrapped_ci(func, x, y, alpha=0.1, nsamples=1000)
m, lb, ub = with_bootstrapped_ci(
func, x, y, alpha=0.1, nsamples=1000, method="BCa"
)
assert lb < m
assert m < ub

Expand Down

0 comments on commit e8cbda4

Please sign in to comment.