diff --git a/tests/schema.json b/tests/schema.json new file mode 100644 index 0000000..6c5e711 --- /dev/null +++ b/tests/schema.json @@ -0,0 +1,132 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "@context": { + "type": "string", + "format": "uri" + }, + "@graph": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "about": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "conformsTo": { + "type": "object", + "properties": { + "@id": { + "type": "string", + "format": "uri" + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "sdPublisher": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "version": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + }, + "dateModified": { + "type": "string", + "format": "date-time" + }, + "name": { + "type": "string" + }, + "encodingFormat": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "genre": { + "type": "string" + }, + "creativeWorkStatus": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "keywords": { + "type": "string" + }, + "hasPart": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + }, + "comment": { + "type": "array", + "items": { + "type": "object", + "properties": { + "@id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "text": { + "type": "string" + }, + "author": { + "type": "object", + "properties": { + "@id": { + "type": "string" + } + } + } + } + } + } + }, + "required": ["@id", "@type"] + } + } + }, + "required": ["@context", "@graph"] +} \ No newline at end of file diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py index c0378d3..ca5420e 100644 --- a/tests/test_00_pypi_rocrate.py +++ b/tests/test_00_pypi_rocrate.py @@ -5,14 +5,15 @@ """ import os import json -import unittest +import unittest, traceback import tempfile from pathlib import Path from zipfile import ZIP_DEFLATED -from zipfile import Path as ZPath from zipfile import ZipFile from rocrate.rocrate import ROCrate +LABEL = 'pypi_rocrate' + class Test_1(unittest.TestCase): """ derived class for this test @@ -35,25 +36,26 @@ def test_main(self): fileName = os.path.join(root, name) print(f'\n\nTry to parse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] + dirName = os.path.splitext(os.path.basename(fileName))[0] try: dirpath = Path(tempfile.mkdtemp()) elnFile.extractall(dirpath) - temppath= dirpath.joinpath(dirName.name) - crate = ROCrate(temppath) + tempPath= [i for i in dirpath.iterdir() if i.is_dir()][0] + crate = ROCrate(tempPath) for e in crate.get_entities(): print(f' {e.id}: {e.type}') if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':True} + logJson[fileName] = {LABEL:True} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True} + logJson[fileName] = logJson[fileName] | {LABEL:True} except Exception: print(" ***** ERROR: Could not parse content of this file!! *****") + print(f" Temporary folder: ",tempPath) + print(traceback.format_exc()) if fileName not in logJson: - logJson[fileName] = {'pypi_rocrate':False} + logJson[fileName] = {LABEL:False} else: - logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False} + logJson[fileName] = logJson[fileName] | {LABEL:False} success = False json.dump(logJson, open('tests/logging.json', 'w')) assert success diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py index d93ddfa..228150f 100644 --- a/tests/test_01_params_metadata_json.py +++ b/tests/test_01_params_metadata_json.py @@ -8,6 +8,7 @@ from zipfile import Path as ZPath from zipfile import ZipFile + class Test_2(unittest.TestCase): """ derived class for this test @@ -28,6 +29,7 @@ def test_main(self): OUTPUT_INFO = False OUTPUT_COUNTS = True KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type'] + LABEL = 'params_metadata_json' # log-file if Path('tests/logging.json').exists(): @@ -52,24 +54,25 @@ def processNode(graph, nodeID): # CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT if '@type' not in node: print('**ERROR: all nodes must have @type. check:', nodeID) + return False if node['@type'] == 'Dataset': for key in DATASET_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in DATASET_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}') elif node['@type'] == 'File': for key in FILE_MANDATORY: - if not key in node: + if key not in node: print(f'**ERROR in file: "{key}" not in @id={node["@id"]}') globalSuccess = False for key in FILE_SUGGESTED: - if not key in node and OUTPUT_INFO: + if key not in node and OUTPUT_INFO: print(f'**INFO for file: "{key}" not in @id={node["@id"]}') # CHECK PROPERTIES FOR ALL KEYS - if any([str(i).strip()=='' for i in node.values()]): + if any(not str(i).strip() for i in node.values()): print(f'**WARNING: {nodeID} contains empty values in the key-value pairs') # SPECIFIC CHECKS ON CERTAIN KEYS if isinstance(node.get('keywords', ''), list): @@ -89,16 +92,14 @@ def processNode(graph, nodeID): print(f'\n\nParse: {fileName}') with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: success = True - p = ZPath(elnFile) - dirName = sorted(p.iterdir())[0] - metadataJsonFile = dirName.joinpath(METADATA_FILE) - metadataContent = json.loads(metadataJsonFile.read_bytes()) + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) graph = metadataContent["@graph"] # find information from master node ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE] if len(ro_crate_nodes) == 1: for key in ROCRATE_NOTE_MANDATORY: - if not key in ro_crate_nodes[0]: + if key not in ro_crate_nodes[0]: print(f'**ERROR: "{key}" not in @id={METADATA_FILE}') else: print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ') @@ -109,9 +110,9 @@ def processNode(graph, nodeID): for partI in main_node['hasPart']: success = processNode(graph, partI['@id']) and success if fileName not in logJson: - logJson[fileName] = {'params_metadata_json':success} + logJson[fileName] = {LABEL:success} else: - logJson[fileName] = logJson[fileName] | {'params_metadata_json':success} + logJson[fileName] = logJson[fileName] | {LABEL:success} # count occurances of all keys counts = {} @@ -133,4 +134,4 @@ def processNode(graph, nodeID): print(f'{prefix}{k:15}: {v}') print('\n\nSuccess:', success) json.dump(logJson, open('tests/logging.json', 'w')) - assert success + assert success #if this fails on your local test, great. It is a summary such that github actions report correctly diff --git a/tests/test_02_schema.py b/tests/test_02_schema.py new file mode 100644 index 0000000..1847da3 --- /dev/null +++ b/tests/test_02_schema.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +""" +Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines. +https://pypi.org/project/rocrate/ +""" +import os +import json +import unittest +from pathlib import Path +from zipfile import ZIP_DEFLATED +from zipfile import ZipFile +from jsonschema import Draft202012Validator + +LABEL = 'schema' +METADATA_FILE = 'ro-crate-metadata.json' + +class Test_1(unittest.TestCase): + """ + derived class for this test + """ + def test_main(self): + """ + main function + """ + # log-file + if Path('tests/logging.json').exists(): + logJson = json.load(open('tests/logging.json')) + else: + logJson = {} + + schema = json.load(open('tests/schema.json', 'r', encoding='utf-8')) + validator = Draft202012Validator(schema=schema) + validator.check_schema(schema=schema) + success = True + for root, _, files in os.walk(".", topdown=False): + for name in files: + if not name.endswith('.eln'): + continue + fileName = os.path.join(root, name) + print(f'\nInspect: {name}') + with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile: + metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0] + metadataContent = json.loads(elnFile.read(metadataJsonFile)) + for error in sorted(validator.iter_errors(metadataContent), key=str): + print(f'- {error.message}') + success = False + logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success} + json.dump(logJson, open('tests/logging.json', 'w')) + assert success diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py index e5c368e..a28723c 100644 --- a/tests/test_99_logging.py +++ b/tests/test_99_logging.py @@ -4,6 +4,10 @@ import json import unittest +COLUMNS = ['params_metadata_json', 'pypi_rocrate','schema'] +HEADER = "## Results of verification\nautomatically created\n\n" + + class Test_2(unittest.TestCase): """ derived class for this test @@ -12,25 +16,25 @@ def test_main(self): """ main function """ - columns = ['params_metadata_json', 'pypi_rocrate'] - header = "## Results of verification\nautomatically created\n\n" if Path('tests/logging.json').exists(): logJson = json.load(open('tests/logging.json')) - output = open('tests/logging.md', 'w') - output.write(header) - output.write(f'| software | file name | {" | ".join(columns)} |\n') - output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in columns])} |\n') - for filename, result in logJson.items(): - software = filename.split('/')[2] - individualFileName = filename.split('/')[3] - if len(individualFileName)>30: - individualFileName=individualFileName[:24]+'...eln' - resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in columns]) - output.write(f'| {software} | {individualFileName} | {resultStr} |\n') - output.write("\n\nDefinition of tests\n") - output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") - output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") - output.close() + print(f'Test results\n{json.dumps(logJson, indent=2)}') + with open('tests/logging.md', 'w') as output: + output.write(HEADER) + output.write(f'| software | file name | {" | ".join(COLUMNS)} |\n') + output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in COLUMNS])} |\n') + for filename, result in logJson.items(): + software = filename.split('/')[2] + individualFileName = filename.split('/')[3] + if len(individualFileName)>30: + individualFileName=individualFileName[:24]+'...eln' + resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in COLUMNS]) + output.write(f'| {software} | {individualFileName} | {resultStr} |\n') + output.write("\n\nDefinition of tests\n") + output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n") + output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n") + output.write("- **schema**: tests if the conventions of the consortium are fulfilled using a schema description.\n") + output.close() print('Created logging markdown') else: print('Did not create logging markdown')