Skip to content

Commit

Permalink
feat(qtl): ingest credible sets from single cell derived QTLs (#630)
Browse files Browse the repository at this point in the history
* chore: prototype ingestion of sceqtls

* chore: use credible sets from ftp

* feat(study_index): rename tissuefromsourceid to biosamplefromsourceid to accommodate cell type ids

* chore: update output paths to final destination
  • Loading branch information
ireneisdoomed committed Jun 6, 2024
1 parent 947d1e6 commit fd3154a
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 8 deletions.
8 changes: 4 additions & 4 deletions src/airflow/dags/eqtl_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
AUTOSCALING = "eqtl-preprocess"
PROJECT_ID = "open-targets-genetics-dev"

EQTL_CATALOG_SUSIE_LOCATION = "gs://eqtl_catalogue_data/ebi_ftp/susie"
TEMP_DECOMPRESS_LOCATION = "gs://eqtl_catalogue_data/susie_decompressed_tmp"
DECOMPRESS_FAILED_LOG = f"{TEMP_DECOMPRESS_LOCATION}.log"
EQTL_CATALOGUE_SUSIE_LOCATION = "gs://eqtl_catalogue_data/ebi_ftp/susie"
TEMP_DECOMPRESS_LOCATION = f"{EQTL_CATALOGUE_SUSIE_LOCATION}_decompressed_tmp"
DECOMPRESS_FAILED_LOG = f"{TEMP_DECOMPRESS_LOCATION}/logs.log"
STUDY_INDEX_PATH = "gs://eqtl_catalogue_data/study_index"
CREDIBLE_SET_PATH = "gs://eqtl_catalogue_data/credible_set_datasets/susie"

Expand All @@ -35,7 +35,7 @@
location="europe-west1",
project_id=PROJECT_ID,
parameters={
"inputFilePattern": f"{EQTL_CATALOG_SUSIE_LOCATION}/**/*.gz",
"inputFilePattern": f"{EQTL_CATALOGUE_SUSIE_LOCATION}/**/*.gz",
"outputDirectory": TEMP_DECOMPRESS_LOCATION,
"outputFailureFile": DECOMPRESS_FAILED_LOG,
},
Expand Down
2 changes: 1 addition & 1 deletion src/gentropy/assets/schemas/study_index.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"metadata": {}
},
{
"name": "tissueFromSourceId",
"name": "biosampleFromSourceId",
"type": "string",
"nullable": true,
"metadata": {}
Expand Down
3 changes: 2 additions & 1 deletion src/gentropy/datasource/eqtl_catalogue/finemapping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Process SuSIE finemapping results from eQTL Catalogue."""

from __future__ import annotations

from dataclasses import dataclass
Expand Down Expand Up @@ -190,7 +191,7 @@ def parse_susie_results(
f.col("sample_group"),
f.col("molecular_trait_id"),
).alias("studyId"),
f.col("tissue_id").alias("tissueFromSourceId"),
f.col("tissue_id").alias("biosampleFromSourceId"),
EqtlCatalogueStudyIndex._identify_study_type(
f.col("quant_method")
).alias("studyType"),
Expand Down
5 changes: 3 additions & 2 deletions src/gentropy/datasource/eqtl_catalogue/study_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class EqtlCatalogueStudyIndex:
StructField("quant_method", StringType(), True),
]
)
raw_studies_metadata_path = "https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/19929ff6a99bf402194292a14f96f9615b35f65f/data_tables/dataset_metadata.tsv"
raw_studies_metadata_path = "https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/4c8ca340e3eb2878073b290785cb8ff1a4c788f8/data_tables/dataset_metadata_upcoming.tsv"

@classmethod
def _identify_study_type(
Expand Down Expand Up @@ -85,7 +85,8 @@ def _identify_study_type(

@classmethod
def get_studies_of_interest(
cls: type[EqtlCatalogueStudyIndex], studies_metadata: DataFrame
cls: type[EqtlCatalogueStudyIndex],
studies_metadata: DataFrame,
) -> list[str]:
"""Filter studies of interest from the raw studies metadata.
Expand Down

0 comments on commit fd3154a

Please sign in to comment.