Skip to content

Commit

Permalink
Add validator for filename
Browse files Browse the repository at this point in the history
  • Loading branch information
kdp-cloud committed Nov 5, 2024
1 parent eb8881b commit 859814e
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 15 deletions.
36 changes: 33 additions & 3 deletions mars-cli/mars_lib/models/isa_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from __future__ import annotations

import re

from enum import Enum
from typing import List, Optional, Union
from pydantic import BaseModel, Field, field_validator, ConfigDict
Expand Down Expand Up @@ -189,6 +192,15 @@ class Assay(CommentedIsaBase):
technologyType: Optional[OntologyAnnotation] = None
unitCategories: List[OntologyAnnotation] = []

@field_validator("filename")
def validate_filename(cls, v: str) -> Union[str, None]:
if v is None:
return v
elif re.match(r"^a_", v):
return v
else:
raise ValueError("'filename' should start with 'a_'")

@field_validator("comments")
def detect_target_repo_comments(cls, v: List[Comment]) -> Optional[List[Comment]]:
target_repo_comments = [
Expand Down Expand Up @@ -242,14 +254,14 @@ class MaterialAttribute(IsaBase):


class Study(CommentedIsaBase):
id: Optional[str] = Field(alias="@id", default=None)
id: str = Field(alias="@id", default=None)
assays: List[Assay] = []
characteristicCategories: List[MaterialAttribute] = []
description: Optional[str] = None
factors: List[Factor] = []
filename: Optional[str] = None
identifier: Optional[str] = None
materials: Optional[StudyMaterialType]
materials: Optional[StudyMaterialType] = None
people: List[Person] = []
processSequence: List[Process] = []
protocols: List[Protocol] = []
Expand All @@ -260,9 +272,18 @@ class Study(CommentedIsaBase):
title: Optional[str] = None
unitCategories: List[OntologyAnnotation] = []

@field_validator("filename")
def validate_filename(cls, v: str) -> Union[str, None]:
if v is None:
return v
elif re.match(r"^s_", v):
return v
else:
raise ValueError("'filename' should start with 's_'")


class Investigation(CommentedIsaBase):
id: Optional[str] = Field(alias="@id", default=None)
id: str = Field(alias="@id", default=None)
description: Optional[str] = None
filename: Optional[str] = None
identifier: Optional[str] = None
Expand All @@ -274,6 +295,15 @@ class Investigation(CommentedIsaBase):
submissionDate: Optional[str] = None
title: Optional[str] = None

@field_validator("filename")
def validate_filename(cls, v: str) -> Union[str, None]:
if v is None:
return v
elif re.match(r"^i_", v):
return v
else:
raise ValueError("'filename' should start with 'i_'")


class IsaJson(IsaBase):
investigation: Investigation
30 changes: 29 additions & 1 deletion mars-cli/tests/test_isa_json.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from mars_lib.isa_json import (
reduce_isa_json_for_target_repo,
load_isa_json,
Expand All @@ -6,7 +8,15 @@
from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY
import pytest
from pydantic import ValidationError
from mars_lib.models.isa_json import Data, Material, Assay, Person, IsaJson
from mars_lib.models.isa_json import (
Data,
Material,
Assay,
Person,
IsaJson,
Investigation,
Study,
)
from mars_lib.models.repository_response import RepositoryResponse
import json

Expand Down Expand Up @@ -225,3 +235,21 @@ def test_update_study_materials_with_accession_categories():
updated_investigation.studies[0].materials.samples[0].characteristics[-1].value
== repo_response.accessions[1].value
)


def test_filename_validation():
# ISA should have a filename that starts with 'x_'
with pytest.raises(ValidationError, match=f"'filename' should start with 'i_'"):
Investigation.model_validate({"@id": "1", "filename": "bad filename"})

with pytest.raises(ValidationError, match=f"'filename' should start with 's_'"):
Study.model_validate({"@id": "2", "filename": "bad filename"})

with pytest.raises(ValidationError, match=f"'filename' should start with 'a_'"):
Assay.model_validate({"@id": "3", "filename": "bad filename"})

assert re.match(r"^i_", "i_Good_file_name")

assert Investigation.model_validate({"@id": "4", "filename": "i_Good_File_Name"})
assert Study.model_validate({"@id": "5", "filename": "s_Good_File_Name"})
assert Assay.model_validate({"@id": "6", "filename": "a_Good_File_Name"})
6 changes: 3 additions & 3 deletions test-data/biosamples-input-isa.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"submissionDate": "",
"publicReleaseDate": "",
"ontologySourceReferences": [],
"filename": "Bob's investigation.txt",
"filename": "i_Bob's investigation.txt",
"comments": [
{
"name": "ISAjson export time",
Expand Down Expand Up @@ -60,7 +60,7 @@
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
"submissionDate": "",
"publicReleaseDate": "",
"filename": "Arabidopsis thaliana.txt",
"filename": "s_Arabidopsis thaliana.txt",
"comments": [
{
"name": "SEEK Study ID",
Expand Down Expand Up @@ -1008,4 +1008,4 @@
}
]
}
}
}
6 changes: 3 additions & 3 deletions test-data/biosamples-modified-isa.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"submissionDate": "",
"publicReleaseDate": "",
"ontologySourceReferences": [],
"filename": "Bob's investigation.txt",
"filename": "i_Bob's investigation.txt",
"comments": [
{
"name": "ISAjson export time",
Expand Down Expand Up @@ -60,7 +60,7 @@
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
"submissionDate": "",
"publicReleaseDate": "",
"filename": "Arabidopsis thaliana.txt",
"filename": "s_Arabidopsis thaliana.txt",
"comments": [
{
"name": "SEEK Study ID",
Expand Down Expand Up @@ -1019,4 +1019,4 @@
}
]
}
}
}
4 changes: 2 additions & 2 deletions test-data/biosamples-original-isa-no-accesion-char.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"submissionDate": "",
"publicReleaseDate": "",
"ontologySourceReferences": [],
"filename": "Bob's investigation.txt",
"filename": "i_Bob's investigation.txt",
"comments": [
{
"name": "ISAjson export time",
Expand Down Expand Up @@ -60,7 +60,7 @@
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
"submissionDate": "",
"publicReleaseDate": "",
"filename": "Arabidopsis thaliana.txt",
"filename": "s_Arabidopsis thaliana.txt",
"comments": [
{
"name": "SEEK Study ID",
Expand Down
6 changes: 3 additions & 3 deletions test-data/biosamples-original-isa.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"submissionDate": "",
"publicReleaseDate": "",
"ontologySourceReferences": [],
"filename": "Bob's investigation.txt",
"filename": "i_Bob's investigation.txt",
"comments": [
{
"name": "ISAjson export time",
Expand Down Expand Up @@ -60,7 +60,7 @@
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
"submissionDate": "",
"publicReleaseDate": "",
"filename": "Arabidopsis thaliana.txt",
"filename": "s_Arabidopsis thaliana.txt",
"comments": [
{
"name": "SEEK Study ID",
Expand Down Expand Up @@ -1002,4 +1002,4 @@
}
]
}
}
}

0 comments on commit 859814e

Please sign in to comment.