diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 3a67e7868..0a1f9438a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -121,10 +121,16 @@ class GWASCatalogSumstatsPreprocessConfig(StepConfig): class EqtlCatalogueConfig(StepConfig): """eQTL Catalogue step configuration.""" + session: Any = field( + default_factory=lambda: { + "start_hail": True, + } + ) eqtl_catalogue_paths_imported: str = MISSING eqtl_catalogue_study_index_out: str = MISSING eqtl_catalogue_credible_sets_out: str = MISSING mqtl_quantification_methods_blacklist: list[str] = field(default_factory=lambda: []) + eqtl_lead_pvalue_threshold: float = 1e-3 _target_: str = "gentropy.eqtl_catalogue.EqtlCatalogueStep" @@ -168,6 +174,7 @@ class FinngenFinemappingConfig(StepConfig): _target_: str = ( "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" ) + finngen_finemapping_lead_pvalue_threshold: float = 1e-5 @dataclass diff --git a/src/gentropy/eqtl_catalogue.py b/src/gentropy/eqtl_catalogue.py index 7adc5d8a2..3ad61ddea 100644 --- a/src/gentropy/eqtl_catalogue.py +++ b/src/gentropy/eqtl_catalogue.py @@ -3,6 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session +from gentropy.config import EqtlCatalogueConfig from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex @@ -20,6 +21,7 @@ def __init__( eqtl_catalogue_paths_imported: str, eqtl_catalogue_study_index_out: str, eqtl_catalogue_credible_sets_out: str, + eqtl_lead_pvalue_threshold: float = EqtlCatalogueConfig().eqtl_lead_pvalue_threshold, ) -> None: """Run eQTL Catalogue ingestion step. @@ -29,6 +31,7 @@ def __init__( eqtl_catalogue_paths_imported (str): Input eQTL Catalogue fine mapping results path. eqtl_catalogue_study_index_out (str): Output eQTL Catalogue study index path. eqtl_catalogue_credible_sets_out (str): Output eQTL Catalogue credible sets path. + eqtl_lead_pvalue_threshold (float, optional): Lead p-value threshold. Defaults to EqtlCatalogueConfig().eqtl_lead_pvalue_threshold. """ # Extract studies_metadata = EqtlCatalogueStudyIndex.read_studies_from_source( @@ -58,13 +61,19 @@ def __init__( processed_susie_df = EqtlCatalogueFinemapping.parse_susie_results( credible_sets_df, lbf_df, studies_metadata ) - credible_sets = EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) - study_index = EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) - # Load - study_index.df.write.mode(session.write_mode).parquet( - eqtl_catalogue_study_index_out + ( + EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(eqtl_catalogue_study_index_out) ) - credible_sets.df.write.mode(session.write_mode).parquet( - eqtl_catalogue_credible_sets_out + + ( + EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) + # Flagging sub-significnat loci: + .validate_lead_pvalue(pvalue_cutoff=eqtl_lead_pvalue_threshold) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(eqtl_catalogue_credible_sets_out) ) diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index 80089cf68..ca5ca1656 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -20,6 +20,7 @@ def __init__( finngen_finemapping_out: str, finngen_susie_finemapping_snp_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_cs_summary_files, + finngen_finemapping_lead_pvalue_threshold: float = FinngenFinemappingConfig().finngen_finemapping_lead_pvalue_threshold, ) -> None: """Run FinnGen finemapping ingestion step. @@ -28,16 +29,21 @@ def __init__( finngen_finemapping_out (str): Output path for the finemapping results in StudyLocus format. finngen_susie_finemapping_snp_files(str): Path to the FinnGen SuSIE finemapping results. finngen_susie_finemapping_cs_summary_files (str): FinnGen SuSIE summaries for CS filters(LBF>2). + finngen_finemapping_lead_pvalue_threshold (float): Lead p-value threshold. """ # Read finemapping outputs from the input paths. - finngen_finemapping_df = FinnGenFinemapping.from_finngen_susie_finemapping( - spark=session.spark, - finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, - finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, - ) - - # Write the output. - finngen_finemapping_df.df.write.mode(session.write_mode).parquet( - finngen_finemapping_out + ( + FinnGenFinemapping.from_finngen_susie_finemapping( + spark=session.spark, + finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, + finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + ) + # Flagging sub-significnat loci: + .validate_lead_pvalue( + pvalue_cutoff=finngen_finemapping_lead_pvalue_threshold + ) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(finngen_finemapping_out) ) diff --git a/src/gentropy/pics.py b/src/gentropy/pics.py index e80a37eb6..f96f54997 100644 --- a/src/gentropy/pics.py +++ b/src/gentropy/pics.py @@ -3,6 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session +from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.study_locus import CredibleInterval, StudyLocus from gentropy.method.pics import PICS @@ -28,8 +29,14 @@ def __init__( session, study_locus_ld_annotated_in ) # PICS - picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set( - credible_interval=CredibleInterval.IS99 + ( + PICS.finemap(study_locus_ld_annotated) + .filter_credible_set(credible_interval=CredibleInterval.IS99) + # Flagging sub-significnat loci: + .validate_lead_pvalue( + pvalue_cutoff=WindowBasedClumpingStepConfig().gwas_significance + ) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(picsed_study_locus_out) ) - # Write - picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out)