Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Map data files to repository #87

Merged
merged 13 commits into from
Nov 12, 2024
7 changes: 3 additions & 4 deletions mars-cli/mars_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from mars_lib.logging import print_and_log
from mars_lib.validation import validate, CustomValidationException
from logging.handlers import RotatingFileHandler
from pydantic import ValidationError
import requests
import sys
import os
Expand Down Expand Up @@ -264,13 +263,13 @@ def submit(
target_repositories = []

if submit_to_biosamples:
target_repositories.append(TargetRepository.BIOSAMPLES)
target_repositories.append(TargetRepository.BIOSAMPLES.value)

if submit_to_ena:
target_repositories.append(TargetRepository.ENA)
target_repositories.append(TargetRepository.ENA.value)

if submit_to_metabolights:
target_repositories.append(TargetRepository.METABOLIGHTS)
target_repositories.append(TargetRepository.METABOLIGHTS.value)

print_and_log(
f"Starting submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}."
Expand Down
59 changes: 57 additions & 2 deletions mars-cli/mars_lib/isa_json.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from typing import Union, List, Any, Tuple, Optional
from typing import Union, List, Any, Tuple, Optional, Dict

from mars_lib.logging import print_and_log
from mars_lib.models.isa_json import (
Investigation,
Assay,
Expand Down Expand Up @@ -38,7 +40,7 @@ def reduce_isa_json_for_target_repo(
new_studies = []
studies = filtered_isa_json.investigation.studies
for study in studies:
if target_repo == TargetRepository.BIOSAMPLES:
if target_repo == TargetRepository.BIOSAMPLES.value:
filtered_assays = []
else:
assays = study.assays
Expand Down Expand Up @@ -420,3 +422,56 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa

isa_json.investigation = investigation
return isa_json


def map_data_files_to_repositories(
files: List[str], isa_json: IsaJson
) -> Dict[str, List[str]]:
# Note: This works well in
df_map: Dict[str, List[str]] = {}
assays: List[Assay] = [
assay for study in isa_json.investigation.studies for assay in study.assays
]

files_dicts = [{"full_name": f, "short_name": f.split("/")[-1]} for f in files]
remaining_files = files_dicts.copy()
for assay in assays:
target_repo_comment: Comment = detect_target_repo_comment(assay.comments)
# This is an effect of everything being optional in the Comment model.
# Should we decide to make the value mandatory, this guard clause would not be necessary anymore.
if target_repo_comment.value is None:
raise ValueError(
f"At least one assay in the ISA-JSON has no '{TARGET_REPO_KEY}' comment. Mapping not possible. Make sure all assays in the ISA-JSON have this comment!"
)
assay_data_files = [df.name for df in assay.dataFiles]

# Check if the files in the ISA-JSON are present in the command
# If not, raise an error
for adf in assay_data_files:
if adf not in [fd["short_name"] for fd in files_dicts]:
raise ValueError(
f"""Assay for repository '{target_repo_comment.value}' has encountered a mismatch while mapping the data files to the ISA-JSON.
Data File '{adf}' is missing in the data files passed in the command:
{files}
Please correct the mismatch!"""
)
else:
remaining_files = [
fd for fd in remaining_files if fd["short_name"] != adf
]

df_map[target_repo_comment.value] = [
fd["full_name"]
for fd in files_dicts
if fd["short_name"] in assay_data_files
]

[
print_and_log(
msg=f"File '{rf['short_name']}' could not be mapped to any data file in the ISA-JSON. For this reason, it will be skipped during submission!",
level="warning",
)
for rf in remaining_files
]

return df_map
44 changes: 27 additions & 17 deletions mars-cli/mars_lib/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
load_isa_json,
reduce_isa_json_for_target_repo,
update_isa_json,
map_data_files_to_repositories,
)
from mars_lib.models.isa_json import Comment, IsaJson
from mars_lib.models.repository_response import RepositoryResponse
Expand Down Expand Up @@ -52,7 +53,7 @@ def submission(
urls: dict[str, Any],
file_transfer: str,
output: str,
data_file_paths=None,
data_file_paths: List[TextIOWrapper] = [],
) -> None:
# If credential manager info found:
# Get password from the credential manager
Expand Down Expand Up @@ -80,6 +81,11 @@ def submission(
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
)

# create data file map
data_file_map = map_data_files_to_repositories(
files=[str(dfp) for dfp in data_file_paths], isa_json=isa_json
)

time_stamp = datetime.timestamp(datetime.now())

if DEBUG:
Expand All @@ -91,7 +97,7 @@ def submission(
):
raise ValueError("No target repository selected.")

if TargetRepository.BIOSAMPLES in target_repositories:
if TargetRepository.BIOSAMPLES.value in target_repositories:
# Submit to Biosamples
biosamples_result = submit_to_biosamples(
isa_json=isa_json,
Expand All @@ -100,7 +106,7 @@ def submission(
webin_token_url=urls["WEBIN"]["TOKEN"],
)
print_and_log(
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
f"Submission to {TargetRepository.BIOSAMPLES.value} was successful. Result:\n{biosamples_result.json()}",
level="info",
)
# Update `isa_json`, based on the receipt returned
Expand All @@ -111,16 +117,20 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "1_after_biosamples", isa_json)

if TargetRepository.ENA in target_repositories:
if TargetRepository.ENA.value in target_repositories:
# Step 1 : upload data if file paths are provided
if data_file_paths and file_transfer:
upload_to_ena(
file_paths=data_file_paths,
file_paths=[
Path(df) for df in data_file_map[TargetRepository.ENA.value]
],
user_credentials=user_credentials,
submission_url=urls["ENA"]["DATA-SUBMISSION"],
file_transfer=file_transfer,
)
print_and_log(f"Start submitting to {TargetRepository.ENA}.", level="debug")
print_and_log(
f"Start submitting to {TargetRepository.ENA.value}.", level="debug"
)

# Step 2 : submit isa-json to ena
ena_result = submit_to_ena(
Expand All @@ -129,11 +139,11 @@ def submission(
submission_url=urls["ENA"]["SUBMISSION"],
)
print_and_log(
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
f"Submission to {TargetRepository.ENA.value} was successful. Result:\n{ena_result.json()}"
)

print_and_log(
f"Update ISA-JSON based on receipt from {TargetRepository.ENA}.",
f"Update ISA-JSON based on receipt from {TargetRepository.ENA.value}.",
level="debug",
)
ena_mars_receipt = RepositoryResponse.model_validate(
Expand All @@ -143,10 +153,10 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "2_after_ena", isa_json)

if TargetRepository.METABOLIGHTS in target_repositories:
if TargetRepository.METABOLIGHTS.value in target_repositories:
# Submit to MetaboLights
metabolights_result = upload_to_metabolights(
file_paths=data_file_paths,
file_paths=data_file_map[TargetRepository.METABOLIGHTS.value],
file_transfer=file_transfer,
isa_json=isa_json,
metabolights_credentials=user_credentials,
Expand All @@ -155,7 +165,7 @@ def submission(
)
metabolights_receipt_obj = metabolights_result.json()
print_and_log(
f"Submission to {TargetRepository.METABOLIGHTS} was successful. Result:\n{metabolights_receipt_obj}",
f"Submission to {TargetRepository.METABOLIGHTS.value} was successful. Result:\n{metabolights_receipt_obj}",
level="info",
)
metabolights_receipt = RepositoryResponse.model_validate(
Expand All @@ -171,11 +181,11 @@ def submission(
if DEBUG:
save_step_to_file(time_stamp, "3_after_metabolights", isa_json)

if TargetRepository.EVA in target_repositories:
if TargetRepository.EVA.value in target_repositories:
# Submit to EVA
# TODO: Filter out other assays
print_and_log(
f"Submission to {TargetRepository.EVA} was successful.", level="info"
f"Submission to {TargetRepository.EVA.value} was successful.", level="info"
)
# TODO: Update `isa_json`, based on the receipt returned

Expand All @@ -201,7 +211,7 @@ def submit_to_biosamples(
headers=headers,
params=params,
json=reduce_isa_json_for_target_repo(
isa_json, TargetRepository.BIOSAMPLES
isa_json, TargetRepository.BIOSAMPLES.value
).model_dump(by_alias=True, exclude_none=True),
)

Expand Down Expand Up @@ -338,9 +348,9 @@ def submit_to_ena(
submission_url,
headers=headers,
params=params,
json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump(
by_alias=True, exclude_none=True
),
json=reduce_isa_json_for_target_repo(
isa_json, TargetRepository.ENA.value
).model_dump(by_alias=True, exclude_none=True),
)

if result.status_code != 200:
Expand Down
1 change: 1 addition & 0 deletions mars-cli/mars_lib/target_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class TargetRepository(str, Enum):
METABOLIGHTS = "metabolights"
BIOSAMPLES = "biosamples"
EVA = "eva"
ARRAYEXPRESS = "arrayexpress"

@classmethod
def available_repositories(cls):
Expand Down
2 changes: 1 addition & 1 deletion mars-cli/tests/test_biosample_external_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_validate_bs_accession():
validate_bs_accession(invalid_accession)

valid_accession = "SAMEA112654119"
assert validate_bs_accession(valid_accession) != ValueError
validate_bs_accession(valid_accession)


def test_validate_json_against_schema():
Expand Down
Loading