Skip to content

Commit

Permalink
feat: changes to PICS credible sets (OUT_OF_SAMPLE_LD QC flag and cap…
Browse files Browse the repository at this point in the history
…ital PICS) (opentargets#910)

* feat: add OUT_OF_SAMPLE_LD QC flag to PICS credible sets

* feat: change pics finemapping method to PICS

* test: change pics to PICS in test data

* fix: flag studies without sumstats without relying on hasSumstats column

* fix: flag studies without sumstats without using update_quality_flag function
  • Loading branch information
vivienho authored Nov 12, 2024
1 parent 10b4be0 commit e5b3c9e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 27 deletions.
2 changes: 1 addition & 1 deletion src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class FinemappingMethod(Enum):
SUSIE_INF (str): SuSiE-inf method implemented in `gentropy`
"""

PICS = "pics"
PICS = "PICS"
SUSIE = "SuSie"
SUSIE_INF = "SuSiE-inf"

Expand Down
6 changes: 1 addition & 5 deletions src/gentropy/datasource/gwas_catalog/study_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,11 +655,7 @@ def add_no_sumstats_flag(self: StudyIndexGWASCatalog) -> StudyIndexGWASCatalog:
"""
self.df = self.df.withColumn(
"qualityControls",
StudyIndex.update_quality_flag(
f.col("qualityControls"),
~f.col("hasSumstats"),
StudyQualityCheck.SUMSTATS_NOT_AVAILABLE,
),
f.array(f.lit(StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value))
)
return self

Expand Down
9 changes: 9 additions & 0 deletions src/gentropy/method/pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,15 @@ def finemap(
StudyLocusQualityCheck.NOT_QUALIFYING_LD_BLOCK,
),
)
# Flagging all PICS loci with OUT_OF_SAMPLE_LD flag:
.withColumn(
"qualityControls",
StudyLocus.update_quality_flag(
f.col("qualityControls"),
f.lit(True),
StudyLocusQualityCheck.OUT_OF_SAMPLE_LD,
),
)
.withColumn(
"finemappingMethod",
finemapping_method_expression,
Expand Down
42 changes: 21 additions & 21 deletions tests/gentropy/dataset/test_study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ class TestStudyLocusValidation:

STUDY_LOCUS_DATA = [
# Won't be flagged:
("1", "v1", "s1", 1.0, -8, [], "pics"),
("1", "v1", "s1", 1.0, -8, [], "PICS"),
# Already flagged, needs to be tested if the flag reamins unique:
(
"2",
Expand All @@ -638,7 +638,7 @@ class TestStudyLocusValidation:
5.0,
-4,
[StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value],
"pics",
"PICS",
),
# To be flagged:
("3", "v3", "s3", 1.0, -4, [], "SuSiE-inf"),
Expand Down Expand Up @@ -869,18 +869,18 @@ class TestStudyLocusRedundancyFlagging:
"""Collection of tests related to flagging redundant credible sets."""

STUDY_LOCUS_DATA = [
("1", "v1", "s1", "pics", []),
("2", "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("3", "v3", "s1", "pics", []),
("3", "v3", "s1", "pics", []),
("1", "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s1", "PICS", []),
("2", "v2", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
("3", "v3", "s1", "PICS", []),
("3", "v3", "s1", "PICS", []),
("1", "v1", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s3", "SuSie", []),
("1", "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s4", "pics", []),
("1", "v1", "s3", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s4", "PICS", []),
("1", "v1", "s4", "SuSie", []),
("1", "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]),
("1", "v1", "s4", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]),
]

STUDY_LOCUS_SCHEMA = t.StructType(
Expand Down Expand Up @@ -946,7 +946,7 @@ class TestStudyLocusSuSiERedundancyFlagging:
"v1",
"s1",
"X",
"pics",
"PICS",
1,
3,
[
Expand All @@ -962,7 +962,7 @@ class TestStudyLocusSuSiERedundancyFlagging:
"v2",
"s1",
"X",
"pics",
"PICS",
4,
5,
[
Expand All @@ -977,7 +977,7 @@ class TestStudyLocusSuSiERedundancyFlagging:
"v3",
"s1",
"X",
"pics",
"PICS",
6,
7,
[
Expand All @@ -1004,7 +1004,7 @@ class TestStudyLocusSuSiERedundancyFlagging:
"v5",
"s1",
"X",
"pics",
"PICS",
5,
5,
[
Expand All @@ -1018,7 +1018,7 @@ class TestStudyLocusSuSiERedundancyFlagging:
"v6",
"s2",
"X",
"pics",
"PICS",
3,
5,
[
Expand Down Expand Up @@ -1141,11 +1141,11 @@ class TestStudyLocusDuplicationFlagging:

STUDY_LOCUS_DATA = [
# Non-duplicated:
("1", "v1", "s1", "pics"),
("1", "v1", "s1", "PICS"),
# Triplicate:
("3", "v3", "s1", "pics"),
("3", "v3", "s1", "pics"),
("3", "v3", "s1", "pics"),
("3", "v3", "s1", "PICS"),
("3", "v3", "s1", "PICS"),
("3", "v3", "s1", "PICS"),
]

STUDY_LOCUS_SCHEMA = t.StructType(
Expand Down

0 comments on commit e5b3c9e

Please sign in to comment.