Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new method to merge p-values using FDR #1211

Merged
merged 1 commit into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dowhy/gcm/model_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
create_polynom_logistic_regression_classifier,
)
from dowhy.gcm.ml.regression import create_ada_boost_regressor, create_extra_trees_regressor, create_polynom_regressor
from dowhy.gcm.stats import merge_p_values_average
from dowhy.gcm.stats import merge_p_values_fdr
from dowhy.gcm.util.general import is_categorical, set_random_seed, shape_into_2d
from dowhy.graph import get_ordered_predecessors, is_root_node

Expand Down Expand Up @@ -598,7 +598,7 @@ def _evaluate_invertibility_assumptions(
parent_samples[random_indices],
)
)
all_pnl_p_values[node] = merge_p_values_average(tmp_p_values)
all_pnl_p_values[node] = merge_p_values_fdr(tmp_p_values)

if len(all_pnl_p_values) == 0:
return all_pnl_p_values
Expand Down
27 changes: 27 additions & 0 deletions dowhy/gcm/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multitest import multipletests

from dowhy.gcm.constant import EPS
from dowhy.gcm.util.general import shape_into_2d
Expand Down Expand Up @@ -85,6 +86,32 @@ def merge_p_values_quantile(
return float(min(1.0, np.quantile(p_values / quantile, quantile)))


def merge_p_values_fdr(p_values: Union[np.ndarray, List[float]], fdr_method: str = "fdr_bh") -> float:
"""Merges p-values to represent the global null hypothesis that all hypotheses represented by the p-values are true.

Here, we first adjust the given p-values based on the provided false discovery rate (FDR) control method, and then
return the minimum.

:param p_values: A list or array of p-values.
:param fdr_method: The false discovery rate control method. For various options, please refer to
`this page <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_.
:return: The minimum p-value after adjusting based on the given FDR method.
"""
if len(p_values) == 0:
raise ValueError("Given list of p-values is empty!")

p_values = np.array(p_values)

if np.all(np.isnan(p_values)):
return float(np.nan)

p_values = p_values[~np.isnan(p_values)]

# Note: The alpha level doesn't matter here.
multipletests_result = multipletests(p_values, 0.05, method=fdr_method)
return min(multipletests_result[1])


def marginal_expectation(
prediction_method: Callable[[np.ndarray], np.ndarray],
feature_samples: np.ndarray,
Expand Down
17 changes: 16 additions & 1 deletion tests/gcm/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
create_linear_regressor,
create_logistic_regression_classifier,
)
from dowhy.gcm.stats import estimate_ftest_pvalue, marginal_expectation, merge_p_values_average, merge_p_values_quantile
from dowhy.gcm.stats import (
estimate_ftest_pvalue,
marginal_expectation,
merge_p_values_average,
merge_p_values_fdr,
merge_p_values_quantile,
)
from dowhy.gcm.util.general import geometric_median


Expand Down Expand Up @@ -55,6 +61,15 @@ def test_given_p_values_with_scaling_when_merge_p_values_quantile_then_returns_s
assert merge_p_values_quantile(p_values, p_values_scaling, quantile=0.75) == approx(0.193, abs=0.001)


def test_given_p_values_when_merge_p_values_fdr_then_returns_expected_p_vlaue():
assert merge_p_values_fdr([0]) == 0
assert merge_p_values_fdr([1]) == 1
assert merge_p_values_fdr([0.3]) == 0.3
assert merge_p_values_fdr([0, 1]) == 0.0
assert merge_p_values_fdr([0.1, 0.2, 0.5]) == approx(0.3)
assert merge_p_values_fdr([0.1, np.nan, 0.2, 0.5, np.nan]) == approx(0.3)


def test_given_invalid_inputs_when_merge_p_values_quantile_then_raises_error():
with pytest.raises(ValueError):
assert merge_p_values_quantile(np.array([0.1, 0.5, 1]), quantile=0)
Expand Down