diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 4f8431b98..0dcdff206 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -8,7 +8,7 @@ from pyspark.sql.functions import col from gentropy.common.session import Session -from gentropy.dataset.study_locus import CredibleInterval, StudyLocus +from gentropy.dataset.study_locus import StudyLocus from gentropy.method.colocalisation import Coloc @@ -46,9 +46,7 @@ def __init__( ) # Transform - overlaps = credible_set.filter_credible_set( - CredibleInterval.IS95 - ).find_overlaps() + overlaps = credible_set.find_overlaps() colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore # Load diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 333e44fdc..e3706eaf0 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -555,7 +555,7 @@ def filter_credible_set( self: StudyLocus, credible_interval: CredibleInterval, ) -> StudyLocus: - """Filter study-locus tag variants based on given credible interval. + """Annotate and filter study-locus tag variants based on given credible interval. Args: credible_interval (CredibleInterval): Credible interval to filter for. @@ -564,7 +564,7 @@ def filter_credible_set( StudyLocus: Filtered study-locus dataset. """ return StudyLocus( - _df=self._df.withColumn( + _df=self.annotate_credible_sets().df.withColumn( "locus", f.filter( f.col("locus"), diff --git a/src/gentropy/pics.py b/src/gentropy/pics.py index 80421b9ae..e80a37eb6 100644 --- a/src/gentropy/pics.py +++ b/src/gentropy/pics.py @@ -28,10 +28,8 @@ def __init__( session, study_locus_ld_annotated_in ) # PICS - picsed_sl = ( - PICS.finemap(study_locus_ld_annotated) - .annotate_credible_sets() - .filter_credible_set(credible_interval=CredibleInterval.IS99) + picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set( + credible_interval=CredibleInterval.IS99 ) # Write picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index d8eb21fec..287cd5645 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -4,7 +4,7 @@ from gentropy.common.session import Session from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import CredibleInterval, StudyLocus class StudyLocusValidationStep: @@ -47,6 +47,8 @@ def __init__( .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE + # Annotates credible intervals and filter to only keep 99% credible sets + .filter_credible_set(credible_interval=CredibleInterval.IS99) ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows(