From 148e26e7013ebd400f4ada63a4d0a8b2480c490b Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 24 Sep 2024 16:36:44 +0100 Subject: [PATCH] fix: small qc flag fixes (#784) --- src/gentropy/dataset/study_index.py | 2 -- src/gentropy/study_locus_validation.py | 5 +---- src/gentropy/study_validation.py | 3 +-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index ac637f137..3c3debba9 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -30,14 +30,12 @@ class StudyQualityCheck(Enum): UNRESOLVED_DISEASE (str): Disease identifier could not match to referece or retired identifier - labelling failing disease UNKNOWN_STUDY_TYPE (str): Indicating the provided type of study is not supported. DUPLICATED_STUDY (str): Flagging if a study identifier is not unique. - NO_GENE_PROVIDED (str): Flagging QTL studies if the measured """ UNRESOLVED_TARGET = "Target/gene identifier could not match to reference." UNRESOLVED_DISEASE = "No valid disease identifier found." UNKNOWN_STUDY_TYPE = "This type of study is not supported." DUPLICATED_STUDY = "The identifier of this study is not unique." - NO_GENE_PROVIDED = "QTL study doesn't have gene assigned." @dataclass diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 4d1c234dc..7c853bbcb 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -41,14 +41,11 @@ def __init__( # Running validation then writing output: study_locus_with_qc = ( StudyLocus.from_parquet(session, list(study_locus_path)) - # Flagging study locus with subsignificant p-values - .validate_lead_pvalue(pvalue_cutoff=gwas_significance) # Add flag for MHC region .qc_MHC_region() .validate_study(study_index) # Flagging studies not in study index - .annotate_study_type(study_index) # Add study type to study locus + .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics - .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows( diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index 5bfb83fe0..565aa410d 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -58,8 +58,7 @@ def __init__( # Running validation: study_index_with_qc = ( - study_index.validate_disease(disease_index) - .validate_unique_study_id() # Flagging duplicated study ids + study_index.validate_unique_study_id() # Flagging duplicated study ids .validate_study_type() # Flagging non-supported study types. .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs