diff --git a/mars-cli/mars_lib/models/isa_json.py b/mars-cli/mars_lib/models/isa_json.py index 931f0ae..8d6da45 100644 --- a/mars-cli/mars_lib/models/isa_json.py +++ b/mars-cli/mars_lib/models/isa_json.py @@ -1,4 +1,7 @@ from __future__ import annotations + +import re + from enum import Enum from typing import List, Optional, Union from pydantic import BaseModel, Field, field_validator, ConfigDict @@ -189,6 +192,15 @@ class Assay(CommentedIsaBase): technologyType: Optional[OntologyAnnotation] = None unitCategories: List[OntologyAnnotation] = [] + @field_validator("filename") + def validate_filename(cls, v: str) -> Union[str, None]: + if v is None: + return v + elif re.match(r"^a_", v): + return v + else: + raise ValueError("'filename' should start with 'a_'") + @field_validator("comments") def detect_target_repo_comments(cls, v: List[Comment]) -> Optional[List[Comment]]: target_repo_comments = [ @@ -242,14 +254,14 @@ class MaterialAttribute(IsaBase): class Study(CommentedIsaBase): - id: Optional[str] = Field(alias="@id", default=None) + id: str = Field(alias="@id", default=None) assays: List[Assay] = [] characteristicCategories: List[MaterialAttribute] = [] description: Optional[str] = None factors: List[Factor] = [] filename: Optional[str] = None identifier: Optional[str] = None - materials: Optional[StudyMaterialType] + materials: Optional[StudyMaterialType] = None people: List[Person] = [] processSequence: List[Process] = [] protocols: List[Protocol] = [] @@ -260,9 +272,18 @@ class Study(CommentedIsaBase): title: Optional[str] = None unitCategories: List[OntologyAnnotation] = [] + @field_validator("filename") + def validate_filename(cls, v: str) -> Union[str, None]: + if v is None: + return v + elif re.match(r"^s_", v): + return v + else: + raise ValueError("'filename' should start with 's_'") + class Investigation(CommentedIsaBase): - id: Optional[str] = Field(alias="@id", default=None) + id: str = Field(alias="@id", default=None) description: Optional[str] = None filename: Optional[str] = None identifier: Optional[str] = None @@ -274,6 +295,15 @@ class Investigation(CommentedIsaBase): submissionDate: Optional[str] = None title: Optional[str] = None + @field_validator("filename") + def validate_filename(cls, v: str) -> Union[str, None]: + if v is None: + return v + elif re.match(r"^i_", v): + return v + else: + raise ValueError("'filename' should start with 'i_'") + class IsaJson(IsaBase): investigation: Investigation diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index 1d7083a..f14f9ba 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -1,3 +1,5 @@ +import re + from mars_lib.isa_json import ( reduce_isa_json_for_target_repo, load_isa_json, @@ -6,7 +8,15 @@ from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY import pytest from pydantic import ValidationError -from mars_lib.models.isa_json import Data, Material, Assay, Person, IsaJson +from mars_lib.models.isa_json import ( + Data, + Material, + Assay, + Person, + IsaJson, + Investigation, + Study, +) from mars_lib.models.repository_response import RepositoryResponse import json @@ -225,3 +235,21 @@ def test_update_study_materials_with_accession_categories(): updated_investigation.studies[0].materials.samples[0].characteristics[-1].value == repo_response.accessions[1].value ) + + +def test_filename_validation(): + # ISA should have a filename that starts with 'x_' + with pytest.raises(ValidationError, match=f"'filename' should start with 'i_'"): + Investigation.model_validate({"@id": "1", "filename": "bad filename"}) + + with pytest.raises(ValidationError, match=f"'filename' should start with 's_'"): + Study.model_validate({"@id": "2", "filename": "bad filename"}) + + with pytest.raises(ValidationError, match=f"'filename' should start with 'a_'"): + Assay.model_validate({"@id": "3", "filename": "bad filename"}) + + assert re.match(r"^i_", "i_Good_file_name") + + assert Investigation.model_validate({"@id": "4", "filename": "i_Good_File_Name"}) + assert Study.model_validate({"@id": "5", "filename": "s_Good_File_Name"}) + assert Assay.model_validate({"@id": "6", "filename": "a_Good_File_Name"}) diff --git a/test-data/biosamples-input-isa.json b/test-data/biosamples-input-isa.json index dbf6263..c840269 100644 --- a/test-data/biosamples-input-isa.json +++ b/test-data/biosamples-input-isa.json @@ -6,7 +6,7 @@ "submissionDate": "", "publicReleaseDate": "", "ontologySourceReferences": [], - "filename": "Bob's investigation.txt", + "filename": "i_Bob's investigation.txt", "comments": [ { "name": "ISAjson export time", @@ -60,7 +60,7 @@ "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", "submissionDate": "", "publicReleaseDate": "", - "filename": "Arabidopsis thaliana.txt", + "filename": "s_Arabidopsis thaliana.txt", "comments": [ { "name": "SEEK Study ID", @@ -1008,4 +1008,4 @@ } ] } -} \ No newline at end of file +} diff --git a/test-data/biosamples-modified-isa.json b/test-data/biosamples-modified-isa.json index b71ad8a..4e322c2 100644 --- a/test-data/biosamples-modified-isa.json +++ b/test-data/biosamples-modified-isa.json @@ -6,7 +6,7 @@ "submissionDate": "", "publicReleaseDate": "", "ontologySourceReferences": [], - "filename": "Bob's investigation.txt", + "filename": "i_Bob's investigation.txt", "comments": [ { "name": "ISAjson export time", @@ -60,7 +60,7 @@ "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", "submissionDate": "", "publicReleaseDate": "", - "filename": "Arabidopsis thaliana.txt", + "filename": "s_Arabidopsis thaliana.txt", "comments": [ { "name": "SEEK Study ID", @@ -1019,4 +1019,4 @@ } ] } -} \ No newline at end of file +} diff --git a/test-data/biosamples-original-isa-no-accesion-char.json b/test-data/biosamples-original-isa-no-accesion-char.json index 9aee9d6..77e786f 100644 --- a/test-data/biosamples-original-isa-no-accesion-char.json +++ b/test-data/biosamples-original-isa-no-accesion-char.json @@ -6,7 +6,7 @@ "submissionDate": "", "publicReleaseDate": "", "ontologySourceReferences": [], - "filename": "Bob's investigation.txt", + "filename": "i_Bob's investigation.txt", "comments": [ { "name": "ISAjson export time", @@ -60,7 +60,7 @@ "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", "submissionDate": "", "publicReleaseDate": "", - "filename": "Arabidopsis thaliana.txt", + "filename": "s_Arabidopsis thaliana.txt", "comments": [ { "name": "SEEK Study ID", diff --git a/test-data/biosamples-original-isa.json b/test-data/biosamples-original-isa.json index bbd6121..0554c18 100644 --- a/test-data/biosamples-original-isa.json +++ b/test-data/biosamples-original-isa.json @@ -6,7 +6,7 @@ "submissionDate": "", "publicReleaseDate": "", "ontologySourceReferences": [], - "filename": "Bob's investigation.txt", + "filename": "i_Bob's investigation.txt", "comments": [ { "name": "ISAjson export time", @@ -60,7 +60,7 @@ "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", "submissionDate": "", "publicReleaseDate": "", - "filename": "Arabidopsis thaliana.txt", + "filename": "s_Arabidopsis thaliana.txt", "comments": [ { "name": "SEEK Study ID", @@ -1002,4 +1002,4 @@ } ] } -} \ No newline at end of file +}