Skip to content

Commit

Permalink
Merge pull request #59 from SteffenBrinckmann/sb_validator_test_metadata
Browse files Browse the repository at this point in the history
Validator test metadata: github action
  • Loading branch information
NicolasCARPi authored Jan 18, 2024
2 parents 31f18f9 + 041e8aa commit e5372d0
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 184 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: pytest
# Run all pytests in the test folder

on: [ push ]

permissions:
contents: write
pull-requests: write

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest rocrate
- name: Test with pytest
run: |
pytest --tb=no -s
continue-on-error: true
- name: Create action summary
run: |
cat tests/logging.md >> $GITHUB_STEP_SUMMARY
54 changes: 0 additions & 54 deletions .github/workflows/validator_pypi_rocrate.yml

This file was deleted.

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__

*.pyc
59 changes: 59 additions & 0 deletions tests/test_00_pypi_rocrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/python3
"""
Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines.
https://pypi.org/project/rocrate/
"""
import os
import json
import unittest
import tempfile
from pathlib import Path
from zipfile import ZIP_DEFLATED
from zipfile import Path as ZPath
from zipfile import ZipFile
from rocrate.rocrate import ROCrate

class Test_1(unittest.TestCase):
"""
derived class for this test
"""
def test_main(self):
"""
main function
"""
# log-file
if Path('tests/logging.json').exists():
logJson = json.load(open('tests/logging.json'))
else:
logJson = {}

success = True
for root, _, files in os.walk(".", topdown=False):
for name in files:
if not name.endswith('.eln'):
continue
fileName = os.path.join(root, name)
print(f'\n\nTry to parse: {fileName}')
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
try:
dirpath = Path(tempfile.mkdtemp())
elnFile.extractall(dirpath)
temppath= dirpath.joinpath(dirName.name)
crate = ROCrate(temppath)
for e in crate.get_entities():
print(f' {e.id}: {e.type}')
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':True}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True}
except Exception:
print(" ***** ERROR: Could not parse content of this file!! *****")
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':False}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False}
success = False
json.dump(logJson, open('tests/logging.json', 'w'))
assert success
136 changes: 136 additions & 0 deletions tests/test_01_params_metadata_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/python3
""" This tests against rules that we as the ELN consortium set for ourselves """
import os
import json
from pathlib import Path
import unittest
from zipfile import ZIP_DEFLATED
from zipfile import Path as ZPath
from zipfile import ZipFile

class Test_2(unittest.TestCase):
"""
derived class for this test
"""
def test_main(self):
"""
main function
"""
# global variables worth discussion
ROCRATE_NOTE_MANDATORY = ['version','sdPublisher']
DATASET_MANDATORY = ['name']
DATASET_SUGGESTED = ['author','mentions', 'dateCreated', 'dateModified', 'identifier', 'text', 'keywords']
FILE_MANDATORY = ['name']
FILE_SUGGESTED = ['sha256', 'encodingFormat', 'contentSize', 'description']

# runtime global variables
METADATA_FILE = 'ro-crate-metadata.json'
OUTPUT_INFO = False
OUTPUT_COUNTS = True
KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type']

# log-file
if Path('tests/logging.json').exists():
logJson = json.load(open('tests/logging.json'))
else:
logJson = {}

def processNode(graph, nodeID):
"""
recursive function call to process each node
Args:
graph: full graph
nodeID: id of node in graph
"""
globalSuccess = True
nodes = [ i for i in graph if '@id' in i and i['@id'] == nodeID]
if len(nodes)!=1:
print('**ERROR: all entries must only occur once in crate. check:', nodeID)
return
node = nodes[0]
# CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT
if '@type' not in node:
print('**ERROR: all nodes must have @type. check:', nodeID)
if node['@type'] == 'Dataset':
for key in DATASET_MANDATORY:
if not key in node:
print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in DATASET_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}')
elif node['@type'] == 'File':
for key in FILE_MANDATORY:
if not key in node:
print(f'**ERROR in file: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in FILE_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for file: "{key}" not in @id={node["@id"]}')
# CHECK PROPERTIES FOR ALL KEYS
if any([str(i).strip()=='' for i in node.values()]):
print(f'**WARNING: {nodeID} contains empty values in the key-value pairs')
# SPECIFIC CHECKS ON CERTAIN KEYS
if isinstance(node.get('keywords', ''), list):
print(f'**ERROR: {nodeID} contains an array of keywords. Use comma or space separated string')
globalSuccess = False
# recurse to children
children = node.pop('hasPart') if 'hasPart' in node else []
for child in children:
globalSuccess = processNode(graph, child['@id']) and globalSuccess
return globalSuccess

for root, _, files in os.walk(".", topdown=False):
for name in files:
if not name.endswith('.eln'):
continue
fileName = os.path.join(root, name)
print(f'\n\nParse: {fileName}')
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
success = True
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
metadataJsonFile = dirName.joinpath(METADATA_FILE)
metadataContent = json.loads(metadataJsonFile.read_bytes())
graph = metadataContent["@graph"]
# find information from master node
ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE]
if len(ro_crate_nodes) == 1:
for key in ROCRATE_NOTE_MANDATORY:
if not key in ro_crate_nodes[0]:
print(f'**ERROR: "{key}" not in @id={METADATA_FILE}')
else:
print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ')
success = False
main_node = [i for i in graph if i["@id"] == "./"][0]

# iteratively go through graph
for partI in main_node['hasPart']:
success = processNode(graph, partI['@id']) and success
if fileName not in logJson:
logJson[fileName] = {'params_metadata_json':success}
else:
logJson[fileName] = logJson[fileName] | {'params_metadata_json':success}

# count occurances of all keys
counts = {}
for node in graph:
if node['@id'] in ['./',METADATA_FILE]:
continue
for key in node.keys():
if key in counts:
counts[key] += 1
else:
counts[key] = 1

view = [ (v,k) for k,v in counts.items() ]
view.sort(reverse=True)
if OUTPUT_COUNTS:
print('===== Counts (* unspecified)')
for v,k in view:
prefix = ' ' if k in KNOWN_KEYS else ' * '
print(f'{prefix}{k:15}: {v}')
print('\n\nSuccess:', success)
json.dump(logJson, open('tests/logging.json', 'w'))
assert success
36 changes: 36 additions & 0 deletions tests/test_99_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/python3
""" Convert a logging.json to a readme file """
from pathlib import Path
import json
import unittest

class Test_2(unittest.TestCase):
"""
derived class for this test
"""
def test_main(self):
"""
main function
"""
columns = ['params_metadata_json', 'pypi_rocrate']
header = "## Results of verification\nautomatically created\n\n"
if Path('tests/logging.json').exists():
logJson = json.load(open('tests/logging.json'))
output = open('tests/logging.md', 'w')
output.write(header)
output.write(f'| software | file name | {" | ".join(columns)} |\n')
output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in columns])} |\n')
for filename, result in logJson.items():
software = filename.split('/')[2]
individualFileName = filename.split('/')[3]
if len(individualFileName)>30:
individualFileName=individualFileName[:24]+'...eln'
resultStr = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in columns])
output.write(f'| {software} | {individualFileName} | {resultStr} |\n')
output.write("\n\nDefinition of tests\n")
output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n")
output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n")
output.close()
print('Created logging markdown')
else:
print('Did not create logging markdown')
Loading

0 comments on commit e5372d0

Please sign in to comment.