Skip to content

Commit

Permalink
Merge pull request #76 from oyurekten/metabolights
Browse files Browse the repository at this point in the history
Metabolights initial integration updates for MARS cli
  • Loading branch information
apriltuesday authored Nov 8, 2024
2 parents 2bf8d41 + 0c96d8f commit 7390763
Show file tree
Hide file tree
Showing 5 changed files with 2,125 additions and 8 deletions.
8 changes: 8 additions & 0 deletions mars-cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,14 @@ python mars_cli.py --development submit --submit-to-metabolights False --submit-
python mars_cli.py --credential-service-name biosamples --username-credentials <username> --file-transfer ftp --data-files ../data/ENA_data.R1.fastq.gz --submit-to-metabolights False --output final-isa ../data/biosamples-input-isa.json
```

### Submit data files and isa-json to Metabolights

Work in progress, currently data transfer is not supported yet.

```bash
python mars_cli.py --credential-service-name metabolights --username-credentials <username> --file-transfer ftp --data-files ../data/ISA-BH2024-ALL/metpro-analysis.txt <additional *.mzml data files> --submit-to-biosamples False --submit-to-ena False --output final-isa ../data/metabolights-input-isa.json
```

## Deploy repository services

[To set up and run the MARS tool locally using Docker, follow these steps](../repository-services/README.md)
Expand Down
38 changes: 36 additions & 2 deletions mars-cli/mars_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,23 @@
fallback="https://wwwdev.ebi.ac.uk/ena/submit/webin/auth/token",
),
},
"METABOLIGHTS": {
"SERVICE": config.get(
"metabolights",
"development-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/submissions/",
),
"SUBMISSION": config.get(
"metabolights",
"development-submission-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/submissions/",
),
"TOKEN": config.get(
"metabolights",
"development-token-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/auth/token",
),
},
"BIOSAMPLES": {
"SERVICE": config.get(
"biosamples",
Expand Down Expand Up @@ -124,6 +141,23 @@
fallback="https://wwwdev.ebi.ac.uk/ena/dev/submit/webin/auth/token",
),
},
"METABOLIGHTS": {
"SERVICE": config.get(
"metabolights",
"production-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/submissions/",
),
"SUBMISSION": config.get(
"metabolights",
"production-submission-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/submissions/",
),
"TOKEN": config.get(
"metabolights",
"production-token-url",
fallback="https://www-test.ebi.ac.uk/metabolights/mars/ws3/auth/token",
),
},
"BIOSAMPLES": {
"SERVICE": config.get(
"biosamples",
Expand Down Expand Up @@ -239,7 +273,7 @@ def submit(
target_repositories.append(TargetRepository.METABOLIGHTS)

print_and_log(
f"Staring submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}."
f"Starting submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}."
)

urls_dict = ctx.obj["FILTERED_URLS"]
Expand Down Expand Up @@ -267,7 +301,7 @@ def health_check(ctx):
print_and_log("Checking the health of the target repositories.")

filtered_urls = ctx.obj["FILTERED_URLS"]
for repo in ["WEBIN", "ENA", "BIOSAMPLES"]:
for repo in ["WEBIN", "ENA", "BIOSAMPLES", "METABOLIGHTS"]:
repo_url = filtered_urls[repo]["SERVICE"]
try:
health_response = requests.get(repo_url)
Expand Down
45 changes: 45 additions & 0 deletions mars-cli/mars_lib/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,48 @@ def get_webin_auth_token(
raise ValueError(error_message)

return token


def get_metabolights_auth_token(
credentials_dict: dict[str, str],
headers: dict[str, str] = {"Content-Type": "application/x-www-form-urlencoded"},
auth_url: str = "https://www-test.ebi.ac.uk/metabolights/mars/ws3/auth/token",
) -> Optional[str]:
"""
Obtain Webin authentication token.
Args:
credentials_dict (dict): The password dictionary for authentication.
header (dict): The header information.
auth__url (str): The URL for MetaboLights authentication.
Returns:
str: The obtained token.
"""
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "application/json",
}
form_data = f'grant_type=password&username={credentials_dict["username"]}&password={credentials_dict["password"]}'
try:
response = requests.post(
auth_url,
headers=headers,
data=form_data,
timeout=20,
)
response.raise_for_status()

except Exception as ex:
raise ex

response_content = response.json()
if (
response
and "access_token" in response_content
and response_content["access_token"]
):
return response_content["access_token"]
else:
error_message = f"ERROR when generating token. See response's content below:\n{response_content}"
raise Exception(error_message)
141 changes: 135 additions & 6 deletions mars-cli/mars_lib/submit.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import io
import os
from datetime import datetime
from io import TextIOWrapper
import time
import requests
import json
from typing import Any
from mars_lib.authentication import get_webin_auth_token
from mars_lib.authentication import get_metabolights_auth_token, get_webin_auth_token
from mars_lib.biosamples_external_references import (
get_header,
biosamples_endpoints,
Expand All @@ -18,7 +20,7 @@
reduce_isa_json_for_target_repo,
update_isa_json,
)
from mars_lib.models.isa_json import IsaJson
from mars_lib.models.isa_json import Comment, IsaJson
from mars_lib.models.repository_response import RepositoryResponse
from mars_lib.target_repo import TargetRepository
from mars_lib.logging import print_and_log
Expand Down Expand Up @@ -143,18 +145,37 @@ def submission(

if TargetRepository.METABOLIGHTS in target_repositories:
# Submit to MetaboLights
# TODO: Filter out other assays
metabolights_result = upload_to_metabolights(
file_paths=data_file_paths,
file_transfer=file_transfer,
isa_json=isa_json,
metabolights_credentials=user_credentials,
metabolights_url=urls["METABOLIGHTS"]["SUBMISSION"],
metabolights_token_url=urls["METABOLIGHTS"]["TOKEN"],
)
metabolights_receipt_obj = metabolights_result.json()
print_and_log(
f"Submission to {TargetRepository.METABOLIGHTS} was successful",
f"Submission to {TargetRepository.METABOLIGHTS} was successful. Result:\n{metabolights_receipt_obj}",
level="info",
)
# TODO: Update `isa_json`, based on the receipt returned
metabolights_receipt = RepositoryResponse.model_validate(
metabolights_receipt_obj
)
# TODO: MetaboLights creates accession number with errors. Errors are not handled.
isa_json.investigation.studies[0].comments.append(
Comment(
name="metabolights_accession",
value=metabolights_receipt.accessions[0].value,
)
)
if DEBUG:
save_step_to_file(time_stamp, "3_after_metabolights", isa_json)

if TargetRepository.EVA in target_repositories:
# Submit to EVA
# TODO: Filter out other assays
print_and_log(
f"Submission to {TargetRepository.EVA} was successful", level="info"
f"Submission to {TargetRepository.EVA} was successful.", level="info"
)
# TODO: Update `isa_json`, based on the receipt returned

Expand Down Expand Up @@ -197,6 +218,114 @@ def submit_to_biosamples(
return result


def upload_to_metabolights(
file_paths: list[str],
isa_json: IsaJson,
metabolights_credentials: dict[str, str],
metabolights_url: str,
metabolights_token_url: str,
file_transfer: str = "ftp",
):
data_upload_protocol = (
"ftp" if not file_transfer or file_transfer.lower() == "ftp" else ""
)

if not data_upload_protocol == "ftp":
raise ValueError(
f"Data upload protocol {data_upload_protocol} is not supported"
)

token = get_metabolights_auth_token(
metabolights_credentials, auth_url=metabolights_token_url
)
headers = {
"accept": "application/json",
"Authorization": f"Bearer {token}",
}
isa_json_str = isa_json.investigation.model_dump_json(
by_alias=True, exclude_none=True
)
json_file = io.StringIO(isa_json_str)

files = {"isa_json_file": ("isa_json.json", json_file)}
result = None
try:
submission_response = requests.post(
metabolights_url,
headers=headers,
files=files,
timeout=120,
)
submission_response.raise_for_status()
result = submission_response.json()
except Exception as exc:
raise exc

validation_url = find_value_in_info_section("validation-url", result["info"])
validation_status_url = find_value_in_info_section(
"validation-status-url", result["info"]
)
ftp_credentials_url = find_value_in_info_section(
"ftp-credentials-url", result["info"]
)

if file_transfer == "ftp":
ftp_credentials_response = requests.get(ftp_credentials_url, headers=headers)
ftp_credentials_response.raise_for_status()
ftp_credentials = ftp_credentials_response.json()
ftp_base_path = ftp_credentials["ftpPath"] # noqa F841
uploader = FTPUploader( # noqa F841
ftp_credentials["ftpHost"],
ftp_credentials["ftpUser"],
ftp_credentials["ftpPassword"],
)
# TODO: Update after the uploader is implemented/tested
# uploader.upload(file_paths, target_location=ftp_base_path)

validation_response = requests.post(validation_url, headers=headers)
validation_response.raise_for_status()
pool_time_in_seconds = 10
max_pool_count = 100
validation_status_response = None
for _ in range(max_pool_count):
timeout = False
try:
validation_status_response = requests.get(
validation_status_url, headers=headers, timeout=30
)
validation_status_response.raise_for_status()
except requests.exceptions.Timeout:
timeout = True
if not timeout:
if validation_status_response is None:
raise ValueError("Validation status response is None")
validation_status = validation_status_response.json()
validation_time = find_value_in_info_section(
"validation-time", validation_status["info"], fail_gracefully=True
)
if validation_time:
break
time.sleep(pool_time_in_seconds)
else:
raise ValueError(f"Validation failed after {max_pool_count} iterations")

if validation_status_response:
return validation_status_response

raise ValueError("Submission failed for MetaboLights")


def find_value_in_info_section(
key: str, info_section: list[Any], fail_gracefully: bool = False
) -> Any:
for info in info_section:
if info["name"] == key:
return info["message"]
if fail_gracefully:
return None
raise ValueError(f"Name {key} not found in info section")


def submit_to_ena(
isa_json: IsaJson, user_credentials: dict[str, str], submission_url: str
) -> requests.Response:
Expand Down
Loading

0 comments on commit 7390763

Please sign in to comment.