Skip to content

Commit

Permalink
Merge pull request #87 from elixir-europe/map-data-files-to-repository
Browse files Browse the repository at this point in the history
Map data files to repository
  • Loading branch information
kdp-cloud authored Nov 12, 2024
2 parents 7390763 + 5bd6cb4 commit 98c8b1e
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 32 deletions.
7 changes: 3 additions & 4 deletions mars-cli/mars_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from mars_lib.logging import print_and_log
from mars_lib.validation import validate, CustomValidationException
from logging.handlers import RotatingFileHandler
from pydantic import ValidationError
import requests
import sys
import os
Expand Down Expand Up @@ -264,13 +263,13 @@ def submit(
target_repositories = []

if submit_to_biosamples:
target_repositories.append(TargetRepository.BIOSAMPLES)
target_repositories.append(TargetRepository.BIOSAMPLES.value)

if submit_to_ena:
target_repositories.append(TargetRepository.ENA)
target_repositories.append(TargetRepository.ENA.value)

if submit_to_metabolights:
target_repositories.append(TargetRepository.METABOLIGHTS)
target_repositories.append(TargetRepository.METABOLIGHTS.value)

print_and_log(
f"Starting submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}."
Expand Down
59 changes: 57 additions & 2 deletions mars-cli/mars_lib/isa_json.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from typing import Union, List, Any, Tuple, Optional
from typing import Union, List, Any, Tuple, Optional, Dict

from mars_lib.logging import print_and_log
from mars_lib.models.isa_json import (
Investigation,
Assay,
Expand Down Expand Up @@ -38,7 +40,7 @@ def reduce_isa_json_for_target_repo(
new_studies = []
studies = filtered_isa_json.investigation.studies
for study in studies:
if target_repo == TargetRepository.BIOSAMPLES:
if target_repo == TargetRepository.BIOSAMPLES.value:
filtered_assays = []
else:
assays = study.assays
Expand Down Expand Up @@ -420,3 +422,56 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa

isa_json.investigation = investigation
return isa_json


def map_data_files_to_repositories(
files: List[str], isa_json: IsaJson
) -> Dict[str, List[str]]:
# Note: This works well in
df_map: Dict[str, List[str]] = {}
assays: List[Assay] = [
assay for study in isa_json.investigation.studies for assay in study.assays
]

files_dicts = [{"full_name": f, "short_name": f.split("/")[-1]} for f in files]
remaining_files = files_dicts.copy()
for assay in assays:
target_repo_comment: Comment = detect_target_repo_comment(assay.comments)
# This is an effect of everything being optional in the Comment model.
# Should we decide to make the value mandatory, this guard clause would not be necessary anymore.
if target_repo_comment.value is None:
raise ValueError(
f"At least one assay in the ISA-JSON has no '{TARGET_REPO_KEY}' comment. Mapping not possible. Make sure all assays in the ISA-JSON have this comment!"
)
assay_data_files = [df.name for df in assay.dataFiles]

# Check if the files in the ISA-JSON are present in the command
# If not, raise an error
for adf in assay_data_files:
if adf not in [fd["short_name"] for fd in files_dicts]:
raise ValueError(
f"""Assay for repository '{target_repo_comment.value}' has encountered a mismatch while mapping the data files to the ISA-JSON.
Data File '{adf}' is missing in the data files passed in the command:
{files}
Please correct the mismatch!"""
)
else:
remaining_files = [
fd for fd in remaining_files if fd["short_name"] != adf
]

df_map[target_repo_comment.value] = [
fd["full_name"]
for fd in files_dicts
if fd["short_name"] in assay_data_files
]

[
print_and_log(
msg=f"File '{rf['short_name']}' could not be mapped to any data file in the ISA-JSON. For this reason, it will be skipped during submission!",
level="warning",
)
for rf in remaining_files
]

return df_map
44 changes: 27 additions & 17 deletions mars-cli/mars_lib/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
load_isa_json,
reduce_isa_json_for_target_repo,
update_isa_json,
map_data_files_to_repositories,
)
from mars_lib.models.isa_json import Comment, IsaJson
from mars_lib.models.repository_response import RepositoryResponse
Expand Down Expand Up @@ -52,7 +53,7 @@ def submission(
urls: dict[str, Any],
file_transfer: str,
output: str,
data_file_paths=None,
data_file_paths: List[TextIOWrapper] = [],
) -> None:
# If credential manager info found:
# Get password from the credential manager
Expand Down Expand Up @@ -80,6 +81,11 @@ def submission(
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
)

# create data file map
data_file_map = map_data_files_to_repositories(
files=[str(dfp) for dfp in data_file_paths], isa_json=isa_json
)

time_stamp = datetime.timestamp(datetime.now())

if DEBUG:
Expand All @@ -91,7 +97,7 @@ def submission(
):
raise ValueError("No target repository selected.")

if TargetRepository.BIOSAMPLES in target_repositories:
if TargetRepository.BIOSAMPLES.value in target_repositories:
# Submit to Biosamples
biosamples_result = submit_to_biosamples(
isa_json=isa_json,
Expand All @@ -100,7 +106,7 @@ def submission(
webin_token_url=urls["WEBIN"]["TOKEN"],
)
print_and_log(
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
f"Submission to {TargetRepository.BIOSAMPLES.value} was successful. Result:\n{biosamples_result.json()}",
level="info",
)
# Update `isa_json`, based on the receipt returned
Expand All @@ -111,16 +117,20 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "1_after_biosamples", isa_json)

if TargetRepository.ENA in target_repositories:
if TargetRepository.ENA.value in target_repositories:
# Step 1 : upload data if file paths are provided
if data_file_paths and file_transfer:
upload_to_ena(
file_paths=data_file_paths,
file_paths=[
Path(df) for df in data_file_map[TargetRepository.ENA.value]
],
user_credentials=user_credentials,
submission_url=urls["ENA"]["DATA-SUBMISSION"],
file_transfer=file_transfer,
)
print_and_log(f"Start submitting to {TargetRepository.ENA}.", level="debug")
print_and_log(
f"Start submitting to {TargetRepository.ENA.value}.", level="debug"
)

# Step 2 : submit isa-json to ena
ena_result = submit_to_ena(
Expand All @@ -129,11 +139,11 @@ def submission(
submission_url=urls["ENA"]["SUBMISSION"],
)
print_and_log(
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
f"Submission to {TargetRepository.ENA.value} was successful. Result:\n{ena_result.json()}"
)

print_and_log(
f"Update ISA-JSON based on receipt from {TargetRepository.ENA}.",
f"Update ISA-JSON based on receipt from {TargetRepository.ENA.value}.",
level="debug",
)
ena_mars_receipt = RepositoryResponse.model_validate(
Expand All @@ -143,10 +153,10 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "2_after_ena", isa_json)

if TargetRepository.METABOLIGHTS in target_repositories:
if TargetRepository.METABOLIGHTS.value in target_repositories:
# Submit to MetaboLights
metabolights_result = upload_to_metabolights(
file_paths=data_file_paths,
file_paths=data_file_map[TargetRepository.METABOLIGHTS.value],
file_transfer=file_transfer,
isa_json=isa_json,
metabolights_credentials=user_credentials,
Expand All @@ -155,7 +165,7 @@ def submission(
)
metabolights_receipt_obj = metabolights_result.json()
print_and_log(
f"Submission to {TargetRepository.METABOLIGHTS} was successful. Result:\n{metabolights_receipt_obj}",
f"Submission to {TargetRepository.METABOLIGHTS.value} was successful. Result:\n{metabolights_receipt_obj}",
level="info",
)
metabolights_receipt = RepositoryResponse.model_validate(
Expand All @@ -171,11 +181,11 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "3_after_metabolights", isa_json)

if TargetRepository.EVA in target_repositories:
if TargetRepository.EVA.value in target_repositories:
# Submit to EVA
# TODO: Filter out other assays
print_and_log(
f"Submission to {TargetRepository.EVA} was successful.", level="info"
f"Submission to {TargetRepository.EVA.value} was successful.", level="info"
)
# TODO: Update `isa_json`, based on the receipt returned

Expand All @@ -201,7 +211,7 @@ def submit_to_biosamples(
headers=headers,
params=params,
json=reduce_isa_json_for_target_repo(
isa_json, TargetRepository.BIOSAMPLES
isa_json, TargetRepository.BIOSAMPLES.value
).model_dump(by_alias=True, exclude_none=True),
)

Expand Down Expand Up @@ -338,9 +348,9 @@ def submit_to_ena(
submission_url,
headers=headers,
params=params,
json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump(
by_alias=True, exclude_none=True
),
json=reduce_isa_json_for_target_repo(
isa_json, TargetRepository.ENA.value
).model_dump(by_alias=True, exclude_none=True),
)

if result.status_code != 200:
Expand Down
1 change: 1 addition & 0 deletions mars-cli/mars_lib/target_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class TargetRepository(str, Enum):
METABOLIGHTS = "metabolights"
BIOSAMPLES = "biosamples"
EVA = "eva"
ARRAYEXPRESS = "arrayexpress"

@classmethod
def available_repositories(cls):
Expand Down
2 changes: 1 addition & 1 deletion mars-cli/tests/test_biosample_external_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_validate_bs_accession():
validate_bs_accession(invalid_accession)

valid_accession = "SAMEA112654119"
assert validate_bs_accession(valid_accession) != ValueError
validate_bs_accession(valid_accession)


def test_validate_json_against_schema():
Expand Down
Loading

0 comments on commit 98c8b1e

Please sign in to comment.