Merge pull request #59 from SteffenBrinckmann/sb_validator_test_metadata

Validator test metadata: github action
TheELNConsortium · Jan 18, 2024 · e5372d0 · e5372d0
2 parents 31f18f9 + 041e8aa
commit e5372d0
Show file tree

Hide file tree

Showing 7 changed files with 266 additions and 184 deletions.
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -0,0 +1,32 @@
+name: pytest
+# Run all pytests in the test folder
+
+on: [ push ]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
+          fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest rocrate
+      - name: Test with pytest
+        run: |
+          pytest --tb=no -s
+        continue-on-error: true
+      - name: Create action summary
+        run: |
+          cat tests/logging.md >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/validator_pypi_rocrate.yml b/.github/workflows/validator_pypi_rocrate.yml
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+
+*.pyc
diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python3
+"""
+Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines.
+https://pypi.org/project/rocrate/
+"""
+import os
+import json
+import unittest
+import tempfile
+from pathlib import Path
+from zipfile import ZIP_DEFLATED
+from zipfile import Path as ZPath
+from zipfile import ZipFile
+from rocrate.rocrate import ROCrate
+
+class Test_1(unittest.TestCase):
+    """
+    derived class for this test
+    """
+    def test_main(self):
+        """
+        main function
+        """
+        # log-file
+        if Path('tests/logging.json').exists():
+            logJson = json.load(open('tests/logging.json'))
+        else:
+            logJson = {}
+
+        success = True
+        for root, _, files in os.walk(".", topdown=False):
+            for name in files:
+                if not name.endswith('.eln'):
+                  continue
+                fileName = os.path.join(root, name)
+                print(f'\n\nTry to parse: {fileName}')
+                with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
+                    p = ZPath(elnFile)
+                    dirName = sorted(p.iterdir())[0]
+                    try:
+                        dirpath = Path(tempfile.mkdtemp())
+                        elnFile.extractall(dirpath)
+                        temppath= dirpath.joinpath(dirName.name)
+                        crate = ROCrate(temppath)
+                        for e in crate.get_entities():
+                            print(f'  {e.id}: {e.type}')
+                        if fileName not in logJson:
+                            logJson[fileName] = {'pypi_rocrate':True}
+                        else:
+                            logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True}
+                    except Exception:
+                        print("  *****  ERROR: Could not parse content of this file!!  *****")
+                        if fileName not in logJson:
+                            logJson[fileName] = {'pypi_rocrate':False}
+                        else:
+                            logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False}
+                        success = False
+        json.dump(logJson, open('tests/logging.json', 'w'))
+        assert success
diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py
@@ -0,0 +1,136 @@
+#!/usr/bin/python3
+"""  This tests against rules that we as the ELN consortium set for ourselves """
+import os
+import json
+from pathlib import Path
+import unittest
+from zipfile import ZIP_DEFLATED
+from zipfile import Path as ZPath
+from zipfile import ZipFile
+
+class Test_2(unittest.TestCase):
+    """
+    derived class for this test
+    """
+    def test_main(self):
+        """
+        main function
+        """
+        # global variables worth discussion
+        ROCRATE_NOTE_MANDATORY = ['version','sdPublisher']
+        DATASET_MANDATORY = ['name']
+        DATASET_SUGGESTED = ['author','mentions',  'dateCreated', 'dateModified', 'identifier', 'text', 'keywords']
+        FILE_MANDATORY = ['name']
+        FILE_SUGGESTED = ['sha256', 'encodingFormat', 'contentSize', 'description']
+
+        # runtime global variables
+        METADATA_FILE = 'ro-crate-metadata.json'
+        OUTPUT_INFO = False
+        OUTPUT_COUNTS = True
+        KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type']
+
+        # log-file
+        if Path('tests/logging.json').exists():
+            logJson = json.load(open('tests/logging.json'))
+        else:
+            logJson = {}
+
+        def processNode(graph, nodeID):
+            """
+            recursive function call to process each node
+
+            Args:
+              graph: full graph
+              nodeID: id of node in graph
+            """
+            globalSuccess = True
+            nodes = [ i for i in graph if '@id' in i and i['@id'] == nodeID]
+            if len(nodes)!=1:
+                print('**ERROR: all entries must only occur once in crate. check:', nodeID)
+                return
+            node = nodes[0]
+            # CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT
+            if '@type' not in node:
+                print('**ERROR: all nodes must have @type. check:', nodeID)
+            if node['@type'] == 'Dataset':
+                for key in DATASET_MANDATORY:
+                    if not key in node:
+                        print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}')
+                        globalSuccess = False
+                for key in DATASET_SUGGESTED:
+                    if not key in node and OUTPUT_INFO:
+                        print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}')
+            elif node['@type'] == 'File':
+                for key in FILE_MANDATORY:
+                    if not key in node:
+                        print(f'**ERROR in file: "{key}" not in @id={node["@id"]}')
+                        globalSuccess = False
+                for key in FILE_SUGGESTED:
+                    if not key in node and OUTPUT_INFO:
+                        print(f'**INFO for file: "{key}" not in @id={node["@id"]}')
+            # CHECK PROPERTIES FOR ALL KEYS
+            if any([str(i).strip()=='' for i in node.values()]):
+                print(f'**WARNING: {nodeID} contains empty values in the key-value pairs')
+            # SPECIFIC CHECKS ON CERTAIN KEYS
+            if isinstance(node.get('keywords', ''), list):
+                print(f'**ERROR: {nodeID} contains an array of keywords. Use comma or space separated string')
+                globalSuccess = False
+            # recurse to children
+            children = node.pop('hasPart') if 'hasPart' in node else []
+            for child in children:
+                globalSuccess = processNode(graph, child['@id']) and globalSuccess
+            return globalSuccess
+
+        for root, _, files in os.walk(".", topdown=False):
+            for name in files:
+                if not name.endswith('.eln'):
+                  continue
+                fileName = os.path.join(root, name)
+                print(f'\n\nParse: {fileName}')
+                with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
+                    success = True
+                    p = ZPath(elnFile)
+                    dirName = sorted(p.iterdir())[0]
+                    metadataJsonFile = dirName.joinpath(METADATA_FILE)
+                    metadataContent = json.loads(metadataJsonFile.read_bytes())
+                    graph = metadataContent["@graph"]
+                    # find information from master node
+                    ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE]
+                    if len(ro_crate_nodes) == 1:
+                        for key in ROCRATE_NOTE_MANDATORY:
+                            if not key in ro_crate_nodes[0]:
+                                print(f'**ERROR: "{key}" not in @id={METADATA_FILE}')
+                    else:
+                        print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ')
+                        success = False
+                    main_node = [i for i in graph if i["@id"] == "./"][0]
+
+                    # iteratively go through graph
+                    for partI in main_node['hasPart']:
+                        success = processNode(graph, partI['@id']) and success
+                    if fileName not in logJson:
+                        logJson[fileName] = {'params_metadata_json':success}
+                    else:
+                        logJson[fileName] = logJson[fileName] | {'params_metadata_json':success}
+
+                    # count occurances of all keys
+                    counts = {}
+                    for node in graph:
+                        if node['@id'] in ['./',METADATA_FILE]:
+                            continue
+                        for key in node.keys():
+                            if key in counts:
+                                counts[key] += 1
+                            else:
+                                counts[key] = 1
+
+                    view = [ (v,k) for k,v in counts.items() ]
+                    view.sort(reverse=True)
+                    if OUTPUT_COUNTS:
+                        print('===== Counts (* unspecified)')
+                        for v,k in view:
+                            prefix = '   ' if k in KNOWN_KEYS else ' * '
+                            print(f'{prefix}{k:15}: {v}')
+        print('\n\nSuccess:', success)
+        json.dump(logJson, open('tests/logging.json', 'w'))
+        assert success
diff --git a/tests/test_99_logging.py b/tests/test_99_logging.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+""" Convert a logging.json to a readme file """
+from pathlib import Path
+import json
+import unittest
+
+class Test_2(unittest.TestCase):
+    """
+    derived class for this test
+    """
+    def test_main(self):
+        """
+        main function
+        """
+        columns = ['params_metadata_json', 'pypi_rocrate']
+        header  = "## Results of verification\nautomatically created\n\n"
+        if Path('tests/logging.json').exists():
+            logJson = json.load(open('tests/logging.json'))
+            output = open('tests/logging.md', 'w')
+            output.write(header)
+            output.write(f'| software | file name | {" | ".join(columns)} |\n')
+            output.write(f'| -------- | --------- | {" | ".join(["-----------" for _ in columns])} |\n')
+            for filename, result in logJson.items():
+                software = filename.split('/')[2]
+                individualFileName = filename.split('/')[3]
+                if len(individualFileName)>30:
+                    individualFileName=individualFileName[:24]+'...eln'
+                resultStr   = ' | '.join([':white_check_mark:' if result[col] else ':x:' for col in columns])
+                output.write(f'| {software} | {individualFileName} | {resultStr} |\n')
+            output.write("\n\nDefinition of tests\n")
+            output.write("- **pypi_rocrate**: tests if eln-file can be opened by pypi's rocrate; aka if eln file conforms to rocrate convention.\n")
+            output.write("- **params_metadata_json**: tests if the conventions of the consortium are fulfilled, aka parameters exist and are consistent with convention.\n")
+            output.close()
+            print('Created logging markdown')
+        else:
+            print('Did not create logging markdown')