diff --git a/config/step/ot_variant_index.yaml b/config/step/ot_variant_index.yaml index 1625c7126..3834196b2 100644 --- a/config/step/ot_variant_index.yaml +++ b/config/step/ot_variant_index.yaml @@ -2,5 +2,5 @@ defaults: - variant_index variant_annotation_path: ${datasets.variant_annotation} -credible_set_path: ${datasets.study_locus} +credible_set_path: ${datasets.credible_set} variant_index_path: ${datasets.variant_index} diff --git a/src/airflow/dags/genetics_etl.py b/src/airflow/dags/genetics_etl.py index 55f343648..ae510131c 100644 --- a/src/airflow/dags/genetics_etl.py +++ b/src/airflow/dags/genetics_etl.py @@ -35,16 +35,16 @@ # PICS credible sets from GWAS Catalog curated associations: "gwas_catalog_curated_credible_set": { "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "credible_set_datasets/gwas_catalog_curated", + "source_object": "credible_set_datasets/gwas_catalog_PICSed_curated_associations", "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_pics_from_curation", + "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_PICSed_curated_associations", }, # PICS credible sets from GWAS Catalog summary statistics: "gwas_catalog_sumstats_credible_set": { "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "credible_set_datasets/gwas_catalog_summary_stats", + "source_object": "credible_set_datasets/gwas_catalog_PICSed_summary_statistics", "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_pics_from_summary_statistics", + "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_PICSed_summary_statistics", }, # GWAS Catalog manifest files: "gwas_catalog_manifests": { diff --git a/src/airflow/dags/gwas_catalog_preprocess.py b/src/airflow/dags/gwas_catalog_preprocess.py index 36130c87e..1814ddf2d 100644 --- a/src/airflow/dags/gwas_catalog_preprocess.py +++ b/src/airflow/dags/gwas_catalog_preprocess.py @@ -45,12 +45,8 @@ WINDOW_BASED_CLUMPED = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_summary_stats_window_clumped" LD_BASED_CLUMPED = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_summary_stats_ld_clumped" # Credible sets: -CURATED_CREDIBLE_SETS = ( - f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_curated" -) -SUMMARY_STATISTICS_CREDIBLE_SETS = ( - f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_summary_stats" -) +CURATED_CREDIBLE_SETS = f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_PICSed_curated_associations" +SUMMARY_STATISTICS_CREDIBLE_SETS = f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_PICSed_summary_statistics" def upload_harmonized_study_list( diff --git a/src/gentropy/pics.py b/src/gentropy/pics.py index c2ed9bf66..80421b9ae 100644 --- a/src/gentropy/pics.py +++ b/src/gentropy/pics.py @@ -3,7 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import CredibleInterval, StudyLocus from gentropy.method.pics import PICS @@ -28,6 +28,10 @@ def __init__( session, study_locus_ld_annotated_in ) # PICS - picsed_sl = PICS.finemap(study_locus_ld_annotated).annotate_credible_sets() + picsed_sl = ( + PICS.finemap(study_locus_ld_annotated) + .annotate_credible_sets() + .filter_credible_set(credible_interval=CredibleInterval.IS99) + ) # Write picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out)