Skip to content

Commit

Permalink
feat: credible set quality filtering (#640)
Browse files Browse the repository at this point in the history
* feat: credible set quality filtering

* fix: purity threshold
  • Loading branch information
Daniel-Considine authored Jun 11, 2024
1 parent ca43fff commit 45d991c
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions src/gentropy/susie_finemapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from gentropy.common.session import Session
from gentropy.common.spark_helpers import neglog_pvalue_to_mantissa_and_exponent
from gentropy.dataset.ld_index import LDIndex
from gentropy.dataset.study_index import StudyIndex
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.summary_statistics import SummaryStatistics
Expand Down Expand Up @@ -1168,3 +1169,45 @@ def susie_finemapper_one_studylocus_row_v3_dev_ss_gathered(
)

return out

@staticmethod
def credible_set_qc(
cred_sets: StudyLocus,
study_index: StudyIndex,
ld_index: LDIndex,
p_value_threshold: float = 1e-5,
purity_min_r2: float = 0.01,
) -> StudyLocus:
"""Filter credible sets by lead P-value and min-R2 purity, and performs LD clumping.
Args:
cred_sets (StudyLocus): StudyLocus object with credible sets to filter/clump
study_index (StudyIndex): StudyIndex object
ld_index (LDIndex): LDIndex object
p_value_threshold (float): p-value threshold for filtering credible sets, default is 1e-5
purity_min_r2 (float): min-R2 purity threshold for filtering credible sets, default is 0.25
Returns:
StudyLocus: Credible sets which pass filters and LD clumping.
"""
df = (
cred_sets.df.withColumn(
"pValue", f.col("pValueMantissa") * f.pow(10, f.col("pValueExponent"))
)
.filter(f.col("pValue") <= p_value_threshold)
.filter(f.col("purityMinR2") >= purity_min_r2)
.drop("pValue")
)
cred_sets.df = df
cred_sets = (
cred_sets.annotate_ld(study_index, ld_index)
.clump()
.filter(
~f.array_contains(
f.col("qualityControls"),
"Explained by a more significant variant in high LD (clumped)",
)
)
)

return cred_sets

0 comments on commit 45d991c

Please sign in to comment.