diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 1a2aa3697..e6fd06c12 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -148,7 +148,7 @@ class FinemappingMethod(Enum): SUSIE_INF (str): SuSiE-inf method implemented in `gentropy` """ - PICS = "pics" + PICS = "PICS" SUSIE = "SuSie" SUSIE_INF = "SuSiE-inf" diff --git a/src/gentropy/datasource/gwas_catalog/study_index.py b/src/gentropy/datasource/gwas_catalog/study_index.py index c01d6d263..8630d31f6 100644 --- a/src/gentropy/datasource/gwas_catalog/study_index.py +++ b/src/gentropy/datasource/gwas_catalog/study_index.py @@ -655,11 +655,7 @@ def add_no_sumstats_flag(self: StudyIndexGWASCatalog) -> StudyIndexGWASCatalog: """ self.df = self.df.withColumn( "qualityControls", - StudyIndex.update_quality_flag( - f.col("qualityControls"), - ~f.col("hasSumstats"), - StudyQualityCheck.SUMSTATS_NOT_AVAILABLE, - ), + f.array(f.lit(StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value)) ) return self diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 96d0902c3..60e28b9a1 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -280,6 +280,15 @@ def finemap( StudyLocusQualityCheck.NOT_QUALIFYING_LD_BLOCK, ), ) + # Flagging all PICS loci with OUT_OF_SAMPLE_LD flag: + .withColumn( + "qualityControls", + StudyLocus.update_quality_flag( + f.col("qualityControls"), + f.lit(True), + StudyLocusQualityCheck.OUT_OF_SAMPLE_LD, + ), + ) .withColumn( "finemappingMethod", finemapping_method_expression, diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 1d34479e1..19b833124 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -629,7 +629,7 @@ class TestStudyLocusValidation: STUDY_LOCUS_DATA = [ # Won't be flagged: - ("1", "v1", "s1", 1.0, -8, [], "pics"), + ("1", "v1", "s1", 1.0, -8, [], "PICS"), # Already flagged, needs to be tested if the flag reamins unique: ( "2", @@ -638,7 +638,7 @@ class TestStudyLocusValidation: 5.0, -4, [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value], - "pics", + "PICS", ), # To be flagged: ("3", "v3", "s3", 1.0, -4, [], "SuSiE-inf"), @@ -869,18 +869,18 @@ class TestStudyLocusRedundancyFlagging: """Collection of tests related to flagging redundant credible sets.""" STUDY_LOCUS_DATA = [ - ("1", "v1", "s1", "pics", []), - ("2", "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("3", "v3", "s1", "pics", []), - ("3", "v3", "s1", "pics", []), - ("1", "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s1", "PICS", []), + ("2", "v2", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("3", "v3", "s1", "PICS", []), + ("3", "v3", "s1", "PICS", []), + ("1", "v1", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), ("1", "v1", "s3", "SuSie", []), - ("1", "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s4", "pics", []), + ("1", "v1", "s3", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s4", "PICS", []), ("1", "v1", "s4", "SuSie", []), - ("1", "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s4", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), ] STUDY_LOCUS_SCHEMA = t.StructType( @@ -946,7 +946,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v1", "s1", "X", - "pics", + "PICS", 1, 3, [ @@ -962,7 +962,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v2", "s1", "X", - "pics", + "PICS", 4, 5, [ @@ -977,7 +977,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v3", "s1", "X", - "pics", + "PICS", 6, 7, [ @@ -1004,7 +1004,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v5", "s1", "X", - "pics", + "PICS", 5, 5, [ @@ -1018,7 +1018,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v6", "s2", "X", - "pics", + "PICS", 3, 5, [ @@ -1141,11 +1141,11 @@ class TestStudyLocusDuplicationFlagging: STUDY_LOCUS_DATA = [ # Non-duplicated: - ("1", "v1", "s1", "pics"), + ("1", "v1", "s1", "PICS"), # Triplicate: - ("3", "v3", "s1", "pics"), - ("3", "v3", "s1", "pics"), - ("3", "v3", "s1", "pics"), + ("3", "v3", "s1", "PICS"), + ("3", "v3", "s1", "PICS"), + ("3", "v3", "s1", "PICS"), ] STUDY_LOCUS_SCHEMA = t.StructType(