From 5578e5ba24364b9376f02a4f8fe775f12fafb31f Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Mon, 18 Nov 2024 15:24:19 +0100 Subject: [PATCH 1/5] Update tests --- tests/schema.json | 132 ++++++++++++++++++++++++++ tests/test_00_pypi_rocrate.py | 22 +++-- tests/test_01_params_metadata_json.py | 27 +++--- tests/test_02_schema.py | 49 ++++++++++ tests/test_99_logging.py | 38 ++++---- 5 files changed, 228 insertions(+), 40 deletions(-) create mode 100644 tests/schema.json create mode 100644 tests/test_02_schema.py diff --git a/tests/schema.json b/tests/schema.json new file mode 100644 index 0000000..6c5e711 --- /dev/null +++ b/tests/schema.json @@ -0,0 +1,132 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "@context": { + "type": "string", + "format": "uri" + }, + "@graph": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "about": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "conformsTo": { + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "sdPublisher": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "version": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "dateModified": { + "type": "string", + "format": "date-time" + }, + "name": { + "type": "string" + }, + "encodingFormat": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "genre": { + "type": "string" + }, + "creativeWorkStatus": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "keywords": { + "type": "string" + }, + "hasPart": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + }, + "comment": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "text": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + } + } + } + }, + "required": ["@id", "@type"] + } + } + }, + "required": ["@context", "@graph"] +} \ No newline at end of file diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py index c0378d3..ca5420e 100644 --- a/tests/test_00_pypi_rocrate.py +++ b/tests/test_00_pypi_rocrate.py @@ -5,14 +5,15 @@ """ import os import json -import unittest +import unittest, traceback import tempfile from pathlib import Path from zipfile import ZIP_DEFLATED -from zipfile import Path as ZPath from zipfile import ZipFile from rocrate.rocrate import ROCrate +LABEL = 'pypi_rocrate' + class Test_1(unittest.TestCase): """ derived class for this test @@ -35,25 +36,26 @@ def test_main(self): fileName = os.path.join(root, name) print(f'\n\nTry to parse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] + dirName = os.path.splitext(os.path.basename(fileName))[0] try: dirpath = Path(tempfile.mkdtemp()) elnFile.extractall(dirpath) - temppath= dirpath.joinpath(dirName.name) - crate = ROCrate(temppath) + tempPath= [i for i in dirpath.iterdir() if i.is_dir()][0] + crate = ROCrate(tempPath) for e in crate.get_entities(): print(f' {e.id}: {e.type}') if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':True} + logJson[fileName] = {LABEL:True} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True} + logJson[fileName] = logJson[fileName] | {LABEL:True} except Exception: print(" ***** ERROR: Could not parse content of this file!! *****") + print(f" Temporary folder: ",tempPath) + print(traceback.format_exc()) if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':False} + logJson[fileName] = {LABEL:False} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False} + logJson[fileName] = logJson[fileName] | {LABEL:False} success = False json.dump(logJson, open('tests/logging.json', 'w')) assert success diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py index d93ddfa..228150f 100644 --- a/tests/test_01_params_metadata_json.py +++ b/tests/test_01_params_metadata_json.py @@ -8,6 +8,7 @@ from zipfile import Path as ZPath from zipfile import ZipFile + class Test_2(unittest.TestCase): """ derived class for this test @@ -28,6 +29,7 @@ def test_main(self): OUTPUT_INFO = False OUTPUT_COUNTS = True KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type'] + LABEL = 'params_metadata_json' # log-file if Path('tests/logging.json').exists(): @@ -52,24 +54,25 @@ def processNode(graph, nodeID): # CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT if '@type' not in node: print('**ERROR: all nodes must have @type. check:', nodeID) + return False if node['@type'] == 'Dataset': for key in DATASET_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in DATASET_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}') elif node['@type'] == 'File': for key in FILE_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in file: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in FILE_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for file: "{key}" not in @id={node["@id"]}') # CHECK PROPERTIES FOR ALL KEYS - if any([str(i).strip()=='' for i in node.values()]): + if any(not str(i).strip() for i in node.values()): print(f'**WARNING: {nodeID} contains empty values in the key-value pairs') # SPECIFIC CHECKS ON CERTAIN KEYS if isinstance(node.get('keywords', ''), list): @@ -89,16 +92,14 @@ def processNode(graph, nodeID): print(f'\n\nParse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: success = True - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] - metadataJsonFile = dirName.joinpath(METADATA_FILE) - metadataContent = json.loads(metadataJsonFile.read_bytes()) + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) graph = metadataContent["@graph"] # find information from master node ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE] if len(ro_crate_nodes) == 1: for key in ROCRATE_NOTE_MANDATORY: - if not key in ro_crate_nodes[0]: + if key not in ro_crate_nodes[0]: print(f'**ERROR: "{key}" not in @id={METADATA_FILE}') else: print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ') @@ -109,9 +110,9 @@ def processNode(graph, nodeID): for partI in main_node['hasPart']: success = processNode(graph, partI['@id']) and success if fileName not in logJson: - logJson[fileName] = {'params_metadata_json':success} + logJson[fileName] = {LABEL:success} else: - logJson[fileName] = logJson[fileName] | {'params_metadata_json':success} + logJson[fileName] = logJson[fileName] | {LABEL:success} # count occurances of all keys counts = {} @@ -133,4 +134,4 @@ def processNode(graph, nodeID): print(f'{prefix}{k:15}: {v}') print('\n\nSuccess:', success) json.dump(logJson, open('tests/logging.json', 'w')) - assert success + assert success #if this fails on your local test, great. It is a summary such that github actions report correctly diff --git a/tests/test_02_schema.py b/tests/test_02_schema.py new file mode 100644 index 0000000..1847da3 --- /dev/null +++ b/tests/test_02_schema.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +""" +Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines. +https://pypi.org/project/rocrate/ +""" +import os +import json +import unittest +from pathlib import Path +from zipfile import ZIP_DEFLATED +from zipfile import ZipFile +from jsonschema import Draft202012Validator + +LABEL = 'schema' +METADATA_FILE = 'ro-crate-metadata.json' + +class Test_1(unittest.TestCase): + """ + derived class for this test + """ + def test_main(self): + """ + main function + """ + # log-file + if Path('tests/logging.json').exists(): + logJson = json.load(open('tests/logging.json')) + else: + logJson = {} + + schema = json.load(open('tests/schema.json', 'r', encoding='utf-8')) + validator = Draft202012Validator(schema=schema) + validator.check_schema(schema=schema) + success = True + for root, _, files in os.walk(".", topdown=False): + for name in files: + if not name.endswith('.eln'): + continue + fileName = os.path.join(root, name) + print(f'\nInspect: {name}') + with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) + for error in sorted(validator.iter_errors(metadataContent), key=str): + print(f'- {error.message}') + success = False + logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success} + json.dump(logJson, open('tests/logging.json', 'w')) + assert success diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py index e5c368e..a28723c 100644 --- a/tests/test_99_logging.py +++ b/tests/test_99_logging.py @@ -4,6 +4,10 @@ import json import unittest +COLUMNS = ['params_metadata_json', 'pypi_rocrate','schema'] +HEADER = "## Results of verification\nautomatically created\n\n" + + class Test_2(unittest.TestCase): """ derived class for this test @@ -12,25 +16,25 @@ def test_main(self): """ main function """ - columns = ['params_metadata_json', 'pypi_rocrate'] - header = "## Results of verification\nautomatically created\n\n" if Path('tests/logging.json').exists(): logJson = json.load(open('tests/logging.json')) - output = open('tests/logging.md', 'w') - output.write(header) - output.write(f'| software | file name | {" | ".join(columns)} |\n') - output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in columns])} |\n') - for filename, result in logJson.items(): - software = filename.split('/')[2] - individualFileName = filename.split('/')[3] - if len(individualFileName)>30: - individualFileName=individualFileName[:24]+'...eln' - resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in columns]) - output.write(f'| {software} | {individualFileName} | {resultStr} |\n') - output.write("\n\nDefinition of tests\n") - output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") - output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") - output.close() + print(f'Test results\n{json.dumps(logJson, indent=2)}') + with open('tests/logging.md', 'w') as output: + output.write(HEADER) + output.write(f'| software | file name | {" | ".join(COLUMNS)} |\n') + output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in COLUMNS])} |\n') + for filename, result in logJson.items(): + software = filename.split('/')[2] + individualFileName = filename.split('/')[3] + if len(individualFileName)>30: + individualFileName=individualFileName[:24]+'...eln' + resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in COLUMNS]) + output.write(f'| {software} | {individualFileName} | {resultStr} |\n') + output.write("\n\nDefinition of tests\n") + output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") + output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") + output.write("- **schema**: tests if the conventions of the consortium are fulfilled using a schema description.\n") + output.close() print('Created logging markdown') else: print('Did not create logging markdown') From 76b879853358b124b3fdd256d8c728a7d53ad1dc Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Mon, 18 Nov 2024 15:29:11 +0100 Subject: [PATCH 2/5] include github action --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index edd3695..9326d24 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest rocrate + pip install pytest rocrate jsonschema - name: Test with pytest run: | pytest --tb=no -s From 8fc7087cfe79bf235e65ff9f432d841e840ffdb2 Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 19 Nov 2024 22:05:53 +0100 Subject: [PATCH 3/5] Improved tests and included rocrate validator --- .github/workflows/pytest.yml | 2 +- tests/test_00_pypi_rocrate.py | 4 +- tests/test_01_params_metadata_json.py | 2 +- tests/test_03_validator.py | 65 +++++++++++++++++++++++++++ tests/test_99_logging.py | 9 ++-- 5 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 tests/test_03_validator.py diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9326d24..97923bf 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest rocrate jsonschema + pip install pytest rocrate jsonschema rocrateValidator - name: Test with pytest run: | pytest --tb=no -s diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py index ca5420e..c975f78 100644 --- a/tests/test_00_pypi_rocrate.py +++ b/tests/test_00_pypi_rocrate.py @@ -13,6 +13,7 @@ from rocrate.rocrate import ROCrate LABEL = 'pypi_rocrate' +verbose = False class Test_1(unittest.TestCase): """ @@ -43,7 +44,8 @@ def test_main(self): tempPath= [i for i in dirpath.iterdir() if i.is_dir()][0] crate = ROCrate(tempPath) for e in crate.get_entities(): - print(f' {e.id}: {e.type}') + if verbose: + print(f' {e.id}: {e.type}') if fileName not in logJson: logJson[fileName] = {LABEL:True} else: diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py index 228150f..da3f20d 100644 --- a/tests/test_01_params_metadata_json.py +++ b/tests/test_01_params_metadata_json.py @@ -27,7 +27,7 @@ def test_main(self): # runtime global variables METADATA_FILE = 'ro-crate-metadata.json' OUTPUT_INFO = False - OUTPUT_COUNTS = True + OUTPUT_COUNTS = False KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type'] LABEL = 'params_metadata_json' diff --git a/tests/test_03_validator.py b/tests/test_03_validator.py new file mode 100644 index 0000000..249557e --- /dev/null +++ b/tests/test_03_validator.py @@ -0,0 +1,65 @@ +#!/usr/bin/python3 +""" +Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines. +https://pypi.org/project/rocrate/ +""" +import os +import json +import tempfile +import unittest +from pathlib import Path +from zipfile import ZIP_DEFLATED +from zipfile import ZipFile +import rocrate_validator +from rocrate_validator.errors import ValidationError +import rocrate_validator.services as rvs + +LABEL = 'validator' +METADATA_FILE = 'ro-crate-metadata.json' + +class Test_1(unittest.TestCase): + """ + derived class for this test + """ + def test_main(self): + """ + main function + """ + # log-file + if Path('tests/logging.json').exists(): + logJson = json.load(open('tests/logging.json')) + else: + logJson = {} + + for root, _, files in os.walk(".", topdown=False): + for name in files: + if not name.endswith('.eln'): + continue + fileName = os.path.join(root, name) + print(f'\n\nTry to parse: {fileName}') + with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: + dirpath = Path(tempfile.mkdtemp())/Path(fileName).parent.name + dirpath.mkdir(parents=True, exist_ok=True) + elnFile.extractall(dirpath) + rocrate_dir= [i for i in dirpath.iterdir() if i.is_dir()][0] + result = rvs.validate({ + "profiles_path": rocrate_validator.utils.get_profiles_path(), + "profile_identifier": "ro-crate", + "requirement_severity": rocrate_validator.models.Severity.REQUIRED.name, + "requirement_severity_only": False, + "inherit_profiles": True, + "verbose": True, + "data_path": rocrate_dir, + "ontology_path": None, + "abort_on_first": False + }) + result_dict = result.to_dict() + if result_dict['issues'] == [] and result_dict['passed']: + success = True + else: + print(f'{fileName} is not valid') + print(result_dict) + success = False + logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success} + json.dump(logJson, open('tests/logging.json', 'w')) + assert success diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py index a28723c..510bdab 100644 --- a/tests/test_99_logging.py +++ b/tests/test_99_logging.py @@ -4,7 +4,7 @@ import json import unittest -COLUMNS = ['params_metadata_json', 'pypi_rocrate','schema'] +COLUMNS = ['pypi_rocrate','validator','schema','params_metadata_json'] HEADER = "## Results of verification\nautomatically created\n\n" @@ -31,9 +31,10 @@ def test_main(self): resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in COLUMNS]) output.write(f'| {software} | {individualFileName} | {resultStr} |\n') output.write("\n\nDefinition of tests\n") - output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") - output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") - output.write("- **schema**: tests if the conventions of the consortium are fulfilled using a schema description.\n") + output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; if eln file can be easily opened by that library.\n") + output.write("- **validator**: tests if the ro-crate conventions fulfilled using pypi's rocrateValidator.\n") + output.write("- **schema**: tests if the conventions of the ELN-consortium are fulfilled using a schema description.\n") + output.write("- **params_metadata_json**: tests if the conventions of the ELN-consortium are fulfilled, aka parameters exist and are consistent with convention.\n") output.close() print('Created logging markdown') else: From 68377f023e2d0997c254910db4a109f52da55689 Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 19 Nov 2024 22:08:47 +0100 Subject: [PATCH 4/5] Add traceback to pytest --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 97923bf..10e2b51 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -25,7 +25,7 @@ jobs: pip install pytest rocrate jsonschema rocrateValidator - name: Test with pytest run: | - pytest --tb=no -s + pytest --tb=short -s continue-on-error: true - name: Create action summary run: | From 370ea9c99afc56a108ca11cf9f94c97c65cb894d Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 19 Nov 2024 22:13:42 +0100 Subject: [PATCH 5/5] Correct the installed lib --- .github/workflows/pytest.yml | 2 +- tests/test_99_logging.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 10e2b51..1207ef8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest rocrate jsonschema rocrateValidator + pip install pytest rocrate jsonschema roc-validator - name: Test with pytest run: | pytest --tb=short -s diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py index 510bdab..31c1478 100644 --- a/tests/test_99_logging.py +++ b/tests/test_99_logging.py @@ -32,7 +32,7 @@ def test_main(self): output.write(f'| {software} | {individualFileName} | {resultStr} |\n') output.write("\n\nDefinition of tests\n") output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; if eln file can be easily opened by that library.\n") - output.write("- **validator**: tests if the ro-crate conventions fulfilled using pypi's rocrateValidator.\n") + output.write("- **validator**: tests if the ro-crate conventions fulfilled using pypi's roc-validator.\n") output.write("- **schema**: tests if the conventions of the ELN-consortium are fulfilled using a schema description.\n") output.write("- **params_metadata_json**: tests if the conventions of the ELN-consortium are fulfilled, aka parameters exist and are consistent with convention.\n") output.close()