Skip to content

Commit

Permalink
Add new method to merge p-values using FDR
Browse files Browse the repository at this point in the history
The method merges p-values by taking the minimum of the adjusted p-values. This then represents a p-value for a global hypothesis.

Signed-off-by: Patrick Bloebaum <[email protected]>
  • Loading branch information
bloebp committed Jun 18, 2024
1 parent 512d1b0 commit 90d133f
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 7 deletions.
4 changes: 2 additions & 2 deletions dowhy/gcm/independence_test/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import dowhy.gcm.config as config
from dowhy.gcm.independence_test.kernel_operation import approximate_rbf_kernel_features
from dowhy.gcm.stats import merge_p_values_average
from dowhy.gcm.stats import merge_p_values_average, merge_p_values_fdr
from dowhy.gcm.util.general import auto_apply_encoders, auto_fit_encoders, set_random_seed, shape_into_2d


Expand All @@ -20,7 +20,7 @@ def kernel_based(
bootstrap_num_runs: int = 10,
max_num_samples_run: int = 2000,
bootstrap_n_jobs: Optional[int] = None,
p_value_adjust_func: Callable[[Union[np.ndarray, List[float]]], float] = merge_p_values_average,
p_value_adjust_func: Callable[[Union[np.ndarray, List[float]]], float] = merge_p_values_fdr,
**kwargs,
) -> float:
"""Prepares the data and uses kernel (conditional) independence test. The independence test estimates a p-value
Expand Down
4 changes: 2 additions & 2 deletions dowhy/gcm/independence_test/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sklearn.preprocessing import scale

import dowhy.gcm.config as config
from dowhy.gcm.stats import estimate_ftest_pvalue, merge_p_values_average
from dowhy.gcm.stats import estimate_ftest_pvalue, merge_p_values_fdr
from dowhy.gcm.util.general import auto_apply_encoders, auto_fit_encoders, set_random_seed, shape_into_2d


Expand All @@ -21,7 +21,7 @@ def regression_based(
Z: Optional[np.ndarray] = None,
max_num_components_all_inputs: int = 40,
k_folds: int = 3,
p_value_adjust_func: Callable[[Union[np.ndarray, List[float]]], float] = merge_p_values_average,
p_value_adjust_func: Callable[[Union[np.ndarray, List[float]]], float] = merge_p_values_fdr,
max_samples_per_fold: int = -1,
n_jobs: Optional[int] = None,
) -> float:
Expand Down
4 changes: 2 additions & 2 deletions dowhy/gcm/model_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
create_polynom_logistic_regression_classifier,
)
from dowhy.gcm.ml.regression import create_ada_boost_regressor, create_extra_trees_regressor, create_polynom_regressor
from dowhy.gcm.stats import merge_p_values_average
from dowhy.gcm.stats import merge_p_values_fdr
from dowhy.gcm.util.general import is_categorical, set_random_seed, shape_into_2d
from dowhy.graph import get_ordered_predecessors, is_root_node

Expand Down Expand Up @@ -598,7 +598,7 @@ def _evaluate_invertibility_assumptions(
parent_samples[random_indices],
)
)
all_pnl_p_values[node] = merge_p_values_average(tmp_p_values)
all_pnl_p_values[node] = merge_p_values_fdr(tmp_p_values)

if len(all_pnl_p_values) == 0:
return all_pnl_p_values
Expand Down
27 changes: 27 additions & 0 deletions dowhy/gcm/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multitest import multipletests

from dowhy.gcm.constant import EPS
from dowhy.gcm.util.general import shape_into_2d
Expand Down Expand Up @@ -85,6 +86,32 @@ def merge_p_values_quantile(
return float(min(1.0, np.quantile(p_values / quantile, quantile)))


def merge_p_values_fdr(p_values: Union[np.ndarray, List[float]], fdr_method: str = "fdr_bh") -> float:
"""Merges p-values to represent the global null hypothesis that all hypotheses represented by the p-values are true.
Here, we first adjust the given p-values based on the provided false discovery rate (FDR) control method, and then
return the minimum.
:param p_values: A list or array of p-values.
:param fdr_method: The false discovery rate control method. For various options, please refer to
`this page <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_.
:return: The minimum p-value after adjusting based on the given FDR method.
"""
if len(p_values) == 0:
raise ValueError("Given list of p-values is empty!")

p_values = np.array(p_values)

if np.all(np.isnan(p_values)):
return float(np.nan)

p_values = p_values[~np.isnan(p_values)]

# Note: The alpha level doesn't matter here.
multipletests_result = multipletests(p_values, 0.05, method=fdr_method)
return min(multipletests_result[1])


def marginal_expectation(
prediction_method: Callable[[np.ndarray], np.ndarray],
feature_samples: np.ndarray,
Expand Down
17 changes: 16 additions & 1 deletion tests/gcm/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
create_linear_regressor,
create_logistic_regression_classifier,
)
from dowhy.gcm.stats import estimate_ftest_pvalue, marginal_expectation, merge_p_values_average, merge_p_values_quantile
from dowhy.gcm.stats import (
estimate_ftest_pvalue,
marginal_expectation,
merge_p_values_average,
merge_p_values_fdr,
merge_p_values_quantile,
)
from dowhy.gcm.util.general import geometric_median


Expand Down Expand Up @@ -55,6 +61,15 @@ def test_given_p_values_with_scaling_when_merge_p_values_quantile_then_returns_s
assert merge_p_values_quantile(p_values, p_values_scaling, quantile=0.75) == approx(0.193, abs=0.001)


def test_given_p_values_when_merge_p_values_fdr_then_returns_expected_p_vlaue():
assert merge_p_values_fdr([0]) == 0
assert merge_p_values_fdr([1]) == 1
assert merge_p_values_fdr([0.3]) == 0.3
assert merge_p_values_fdr([0, 1]) == 0.0
assert merge_p_values_fdr([0.1, 0.2, 0.5]) == approx(0.3)
assert merge_p_values_fdr([0.1, np.nan, 0.2, 0.5, np.nan]) == approx(0.3)


def test_given_invalid_inputs_when_merge_p_values_quantile_then_raises_error():
with pytest.raises(ValueError):
assert merge_p_values_quantile(np.array([0.1, 0.5, 1]), quantile=0)
Expand Down

0 comments on commit 90d133f

Please sign in to comment.