diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index edd3695..1207ef8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,10 +22,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest rocrate + pip install pytest rocrate jsonschema roc-validator - name: Test with pytest run: | - pytest --tb=no -s + pytest --tb=short -s continue-on-error: true - name: Create action summary run: | diff --git a/tests/schema.json b/tests/schema.json new file mode 100644 index 0000000..6c5e711 --- /dev/null +++ b/tests/schema.json @@ -0,0 +1,132 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "@context": { + "type": "string", + "format": "uri" + }, + "@graph": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "about": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "conformsTo": { + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "sdPublisher": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "version": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "dateModified": { + "type": "string", + "format": "date-time" + }, + "name": { + "type": "string" + }, + "encodingFormat": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "genre": { + "type": "string" + }, + "creativeWorkStatus": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "keywords": { + "type": "string" + }, + "hasPart": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + }, + "comment": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "text": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + } + } + } + }, + "required": ["@id", "@type"] + } + } + }, + "required": ["@context", "@graph"] +} \ No newline at end of file diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py index c0378d3..c975f78 100644 --- a/tests/test_00_pypi_rocrate.py +++ b/tests/test_00_pypi_rocrate.py @@ -5,14 +5,16 @@ """ import os import json -import unittest +import unittest, traceback import tempfile from pathlib import Path from zipfile import ZIP_DEFLATED -from zipfile import Path as ZPath from zipfile import ZipFile from rocrate.rocrate import ROCrate +LABEL = 'pypi_rocrate' +verbose = False + class Test_1(unittest.TestCase): """ derived class for this test @@ -35,25 +37,27 @@ def test_main(self): fileName = os.path.join(root, name) print(f'\n\nTry to parse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] + dirName = os.path.splitext(os.path.basename(fileName))[0] try: dirpath = Path(tempfile.mkdtemp()) elnFile.extractall(dirpath) - temppath= dirpath.joinpath(dirName.name) - crate = ROCrate(temppath) + tempPath= [i for i in dirpath.iterdir() if i.is_dir()][0] + crate = ROCrate(tempPath) for e in crate.get_entities(): - print(f' {e.id}: {e.type}') + if verbose: + print(f' {e.id}: {e.type}') if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':True} + logJson[fileName] = {LABEL:True} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True} + logJson[fileName] = logJson[fileName] | {LABEL:True} except Exception: print(" ***** ERROR: Could not parse content of this file!! *****") + print(f" Temporary folder: ",tempPath) + print(traceback.format_exc()) if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':False} + logJson[fileName] = {LABEL:False} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False} + logJson[fileName] = logJson[fileName] | {LABEL:False} success = False json.dump(logJson, open('tests/logging.json', 'w')) assert success diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py index d93ddfa..da3f20d 100644 --- a/tests/test_01_params_metadata_json.py +++ b/tests/test_01_params_metadata_json.py @@ -8,6 +8,7 @@ from zipfile import Path as ZPath from zipfile import ZipFile + class Test_2(unittest.TestCase): """ derived class for this test @@ -26,8 +27,9 @@ def test_main(self): # runtime global variables METADATA_FILE = 'ro-crate-metadata.json' OUTPUT_INFO = False - OUTPUT_COUNTS = True + OUTPUT_COUNTS = False KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type'] + LABEL = 'params_metadata_json' # log-file if Path('tests/logging.json').exists(): @@ -52,24 +54,25 @@ def processNode(graph, nodeID): # CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT if '@type' not in node: print('**ERROR: all nodes must have @type. check:', nodeID) + return False if node['@type'] == 'Dataset': for key in DATASET_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in DATASET_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}') elif node['@type'] == 'File': for key in FILE_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in file: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in FILE_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for file: "{key}" not in @id={node["@id"]}') # CHECK PROPERTIES FOR ALL KEYS - if any([str(i).strip()=='' for i in node.values()]): + if any(not str(i).strip() for i in node.values()): print(f'**WARNING: {nodeID} contains empty values in the key-value pairs') # SPECIFIC CHECKS ON CERTAIN KEYS if isinstance(node.get('keywords', ''), list): @@ -89,16 +92,14 @@ def processNode(graph, nodeID): print(f'\n\nParse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: success = True - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] - metadataJsonFile = dirName.joinpath(METADATA_FILE) - metadataContent = json.loads(metadataJsonFile.read_bytes()) + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) graph = metadataContent["@graph"] # find information from master node ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE] if len(ro_crate_nodes) == 1: for key in ROCRATE_NOTE_MANDATORY: - if not key in ro_crate_nodes[0]: + if key not in ro_crate_nodes[0]: print(f'**ERROR: "{key}" not in @id={METADATA_FILE}') else: print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ') @@ -109,9 +110,9 @@ def processNode(graph, nodeID): for partI in main_node['hasPart']: success = processNode(graph, partI['@id']) and success if fileName not in logJson: - logJson[fileName] = {'params_metadata_json':success} + logJson[fileName] = {LABEL:success} else: - logJson[fileName] = logJson[fileName] | {'params_metadata_json':success} + logJson[fileName] = logJson[fileName] | {LABEL:success} # count occurances of all keys counts = {} @@ -133,4 +134,4 @@ def processNode(graph, nodeID): print(f'{prefix}{k:15}: {v}') print('\n\nSuccess:', success) json.dump(logJson, open('tests/logging.json', 'w')) - assert success + assert success #if this fails on your local test, great. It is a summary such that github actions report correctly diff --git a/tests/test_02_schema.py b/tests/test_02_schema.py new file mode 100644 index 0000000..1847da3 --- /dev/null +++ b/tests/test_02_schema.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +""" +Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines. +https://pypi.org/project/rocrate/ +""" +import os +import json +import unittest +from pathlib import Path +from zipfile import ZIP_DEFLATED +from zipfile import ZipFile +from jsonschema import Draft202012Validator + +LABEL = 'schema' +METADATA_FILE = 'ro-crate-metadata.json' + +class Test_1(unittest.TestCase): + """ + derived class for this test + """ + def test_main(self): + """ + main function + """ + # log-file + if Path('tests/logging.json').exists(): + logJson = json.load(open('tests/logging.json')) + else: + logJson = {} + + schema = json.load(open('tests/schema.json', 'r', encoding='utf-8')) + validator = Draft202012Validator(schema=schema) + validator.check_schema(schema=schema) + success = True + for root, _, files in os.walk(".", topdown=False): + for name in files: + if not name.endswith('.eln'): + continue + fileName = os.path.join(root, name) + print(f'\nInspect: {name}') + with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) + for error in sorted(validator.iter_errors(metadataContent), key=str): + print(f'- {error.message}') + success = False + logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success} + json.dump(logJson, open('tests/logging.json', 'w')) + assert success diff --git a/tests/test_03_validator.py b/tests/test_03_validator.py new file mode 100644 index 0000000..249557e --- /dev/null +++ b/tests/test_03_validator.py @@ -0,0 +1,65 @@ +#!/usr/bin/python3 +""" +Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines. +https://pypi.org/project/rocrate/ +""" +import os +import json +import tempfile +import unittest +from pathlib import Path +from zipfile import ZIP_DEFLATED +from zipfile import ZipFile +import rocrate_validator +from rocrate_validator.errors import ValidationError +import rocrate_validator.services as rvs + +LABEL = 'validator' +METADATA_FILE = 'ro-crate-metadata.json' + +class Test_1(unittest.TestCase): + """ + derived class for this test + """ + def test_main(self): + """ + main function + """ + # log-file + if Path('tests/logging.json').exists(): + logJson = json.load(open('tests/logging.json')) + else: + logJson = {} + + for root, _, files in os.walk(".", topdown=False): + for name in files: + if not name.endswith('.eln'): + continue + fileName = os.path.join(root, name) + print(f'\n\nTry to parse: {fileName}') + with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: + dirpath = Path(tempfile.mkdtemp())/Path(fileName).parent.name + dirpath.mkdir(parents=True, exist_ok=True) + elnFile.extractall(dirpath) + rocrate_dir= [i for i in dirpath.iterdir() if i.is_dir()][0] + result = rvs.validate({ + "profiles_path": rocrate_validator.utils.get_profiles_path(), + "profile_identifier": "ro-crate", + "requirement_severity": rocrate_validator.models.Severity.REQUIRED.name, + "requirement_severity_only": False, + "inherit_profiles": True, + "verbose": True, + "data_path": rocrate_dir, + "ontology_path": None, + "abort_on_first": False + }) + result_dict = result.to_dict() + if result_dict['issues'] == [] and result_dict['passed']: + success = True + else: + print(f'{fileName} is not valid') + print(result_dict) + success = False + logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success} + json.dump(logJson, open('tests/logging.json', 'w')) + assert success diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py index e5c368e..31c1478 100644 --- a/tests/test_99_logging.py +++ b/tests/test_99_logging.py @@ -4,6 +4,10 @@ import json import unittest +COLUMNS = ['pypi_rocrate','validator','schema','params_metadata_json'] +HEADER = "## Results of verification\nautomatically created\n\n" + + class Test_2(unittest.TestCase): """ derived class for this test @@ -12,25 +16,26 @@ def test_main(self): """ main function """ - columns = ['params_metadata_json', 'pypi_rocrate'] - header = "## Results of verification\nautomatically created\n\n" if Path('tests/logging.json').exists(): logJson = json.load(open('tests/logging.json')) - output = open('tests/logging.md', 'w') - output.write(header) - output.write(f'| software | file name | {" | ".join(columns)} |\n') - output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in columns])} |\n') - for filename, result in logJson.items(): - software = filename.split('/')[2] - individualFileName = filename.split('/')[3] - if len(individualFileName)>30: - individualFileName=individualFileName[:24]+'...eln' - resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in columns]) - output.write(f'| {software} | {individualFileName} | {resultStr} |\n') - output.write("\n\nDefinition of tests\n") - output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") - output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") - output.close() + print(f'Test results\n{json.dumps(logJson, indent=2)}') + with open('tests/logging.md', 'w') as output: + output.write(HEADER) + output.write(f'| software | file name | {" | ".join(COLUMNS)} |\n') + output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in COLUMNS])} |\n') + for filename, result in logJson.items(): + software = filename.split('/')[2] + individualFileName = filename.split('/')[3] + if len(individualFileName)>30: + individualFileName=individualFileName[:24]+'...eln' + resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in COLUMNS]) + output.write(f'| {software} | {individualFileName} | {resultStr} |\n') + output.write("\n\nDefinition of tests\n") + output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; if eln file can be easily opened by that library.\n") + output.write("- **validator**: tests if the ro-crate conventions fulfilled using pypi's roc-validator.\n") + output.write("- **schema**: tests if the conventions of the ELN-consortium are fulfilled using a schema description.\n") + output.write("- **params_metadata_json**: tests if the conventions of the ELN-consortium are fulfilled, aka parameters exist and are consistent with convention.\n") + output.close() print('Created logging markdown') else: print('Did not create logging markdown')