Skip to content

Commit

Permalink
feat: 99% credible set validation during study_locus_validation (#765)
Browse files Browse the repository at this point in the history
* feat: study locus validation filters for 95% credible sets

* revert: no longer needed to filter for credible set interval

* feat: annotate credible sets before filter them

* docs: adding more context here
  • Loading branch information
d0choa authored Sep 24, 2024
1 parent ccdb1f2 commit 84d6638
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 11 deletions.
6 changes: 2 additions & 4 deletions src/gentropy/colocalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pyspark.sql.functions import col

from gentropy.common.session import Session
from gentropy.dataset.study_locus import CredibleInterval, StudyLocus
from gentropy.dataset.study_locus import StudyLocus
from gentropy.method.colocalisation import Coloc


Expand Down Expand Up @@ -46,9 +46,7 @@ def __init__(
)

# Transform
overlaps = credible_set.filter_credible_set(
CredibleInterval.IS95
).find_overlaps()
overlaps = credible_set.find_overlaps()
colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore

# Load
Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def filter_credible_set(
self: StudyLocus,
credible_interval: CredibleInterval,
) -> StudyLocus:
"""Filter study-locus tag variants based on given credible interval.
"""Annotate and filter study-locus tag variants based on given credible interval.
Args:
credible_interval (CredibleInterval): Credible interval to filter for.
Expand All @@ -562,7 +562,7 @@ def filter_credible_set(
StudyLocus: Filtered study-locus dataset.
"""
return StudyLocus(
_df=self._df.withColumn(
_df=self.annotate_credible_sets().df.withColumn(
"locus",
f.filter(
f.col("locus"),
Expand Down
6 changes: 2 additions & 4 deletions src/gentropy/pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ def __init__(
session, study_locus_ld_annotated_in
)
# PICS
picsed_sl = (
PICS.finemap(study_locus_ld_annotated)
.annotate_credible_sets()
.filter_credible_set(credible_interval=CredibleInterval.IS99)
picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set(
credible_interval=CredibleInterval.IS99
)
# Write
picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out)
4 changes: 3 additions & 1 deletion src/gentropy/study_locus_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from gentropy.common.session import Session
from gentropy.dataset.study_index import StudyIndex
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.study_locus import CredibleInterval, StudyLocus


class StudyLocusValidationStep:
Expand Down Expand Up @@ -46,6 +46,8 @@ def __init__(
.validate_study(study_index) # Flagging studies not in study index
.annotate_study_type(study_index) # Add study type to study locus
.qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics
# Annotates credible intervals and filter to only keep 99% credible sets
.filter_credible_set(credible_interval=CredibleInterval.IS99)
).persist() # we will need this for 2 types of outputs

study_locus_with_qc.valid_rows(
Expand Down

0 comments on commit 84d6638

Please sign in to comment.