TheELNConsortium · SteffenBrinckmann · Nov 20, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 19, 2024
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -22,10 +22,10 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install pytest rocrate
+          pip install pytest rocrate jsonschema roc-validator
       - name: Test with pytest
         run: |
-          pytest --tb=no -s
+          pytest --tb=short -s
         continue-on-error: true
       - name: Create action summary
         run: |

diff --git a/tests/schema.json b/tests/schema.json
@@ -0,0 +1,132 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "properties": {
+    "@context": {
+      "type": "string",
+      "format": "uri"
+    },
+    "@graph": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "@id": {
+            "type": "string"
+          },
+          "@type": {
+            "type": "string"
+          },
+          "about": {
+            "type": "object",
+            "properties": {
+              "@id": {
+                "type": "string"
+              }
+            }
+          },
+          "conformsTo": {
+            "type": "object",
+            "properties": {
+              "@id": {
+                "type": "string",
+                "format": "uri"
+              }
+            }
+          },
+          "dateCreated": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "sdPublisher": {
+            "type": "object",
+            "properties": {
+              "@id": {
+                "type": "string"
+              }
+            }
+          },
+          "version": {
+            "type": "string"
+          },
+          "author": {
+            "type": "object",
+            "properties": {
+              "@id": {
+                "type": "string"
+              }
+            }
+          },
+          "dateModified": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "name": {
+            "type": "string"
+          },
+          "encodingFormat": {
+            "type": "string"
+          },
+          "url": {
+            "type": "string",
+            "format": "uri"
+          },
+          "genre": {
+            "type": "string"
+          },
+          "creativeWorkStatus": {
+            "type": "string"
+          },
+          "identifier": {
+            "type": "string"
+          },
+          "keywords": {
+            "type": "string"
+          },
+          "hasPart": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "@id": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "comment": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "@id": {
+                  "type": "string"
+                },
+                "@type": {
+                  "type": "string"
+                },
+                "dateCreated": {
+                  "type": "string",
+                  "format": "date-time"
+                },
+                "text": {
+                  "type": "string"
+                },
+                "author": {
+                  "type": "object",
+                  "properties": {
+                    "@id": {
+                      "type": "string"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "required": ["@id", "@type"]
+      }
+    }
+  },
+  "required": ["@context", "@graph"]
+}
diff --git a/tests/test_00_pypi_rocrate.py b/tests/test_00_pypi_rocrate.py
@@ -5,14 +5,16 @@
 """
 import os
 import json
-import unittest
+import unittest, traceback
 import tempfile
 from pathlib import Path
 from zipfile import ZIP_DEFLATED
-from zipfile import Path as ZPath
 from zipfile import ZipFile
 from rocrate.rocrate import ROCrate
 
+LABEL = 'pypi_rocrate'
+verbose = False
+
 class Test_1(unittest.TestCase):
     """
     derived class for this test
@@ -35,25 +37,27 @@ def test_main(self):
                 fileName = os.path.join(root, name)
                 print(f'\n\nTry to parse: {fileName}')
                 with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
-                    p = ZPath(elnFile)
-                    dirName = sorted(p.iterdir())[0]
+                    dirName = os.path.splitext(os.path.basename(fileName))[0]
                     try:
                         dirpath = Path(tempfile.mkdtemp())
                         elnFile.extractall(dirpath)
-                        temppath= dirpath.joinpath(dirName.name)
-                        crate = ROCrate(temppath)
+                        tempPath= [i for i in dirpath.iterdir() if i.is_dir()][0]
+                        crate = ROCrate(tempPath)
                         for e in crate.get_entities():
-                            print(f'  {e.id}: {e.type}')
+                            if verbose:
+                                print(f'  {e.id}: {e.type}')
                         if fileName not in logJson:
-                            logJson[fileName] = {'pypi_rocrate':True}
+                            logJson[fileName] = {LABEL:True}
                         else:
-                            logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True}
+                            logJson[fileName] = logJson[fileName] | {LABEL:True}
                     except Exception:
                         print("  *****  ERROR: Could not parse content of this file!!  *****")
+                        print(f"  Temporary folder: ",tempPath)
+                        print(traceback.format_exc())
                         if fileName not in logJson:
-                            logJson[fileName] = {'pypi_rocrate':False}
+                            logJson[fileName] = {LABEL:False}
                         else:
-                            logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False}
+                            logJson[fileName] = logJson[fileName] | {LABEL:False}
                         success = False
         json.dump(logJson, open('tests/logging.json', 'w'))
         assert success
diff --git a/tests/test_01_params_metadata_json.py b/tests/test_01_params_metadata_json.py
@@ -8,6 +8,7 @@
 from zipfile import Path as ZPath
 from zipfile import ZipFile
 
+
 class Test_2(unittest.TestCase):
     """
     derived class for this test
@@ -26,8 +27,9 @@ def test_main(self):
         # runtime global variables
         METADATA_FILE = 'ro-crate-metadata.json'
         OUTPUT_INFO = False
-        OUTPUT_COUNTS = True
+        OUTPUT_COUNTS = False
         KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type']
+        LABEL = 'params_metadata_json'
 
         # log-file
         if Path('tests/logging.json').exists():
@@ -52,24 +54,25 @@ def processNode(graph, nodeID):
             # CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT
             if '@type' not in node:
                 print('**ERROR: all nodes must have @type. check:', nodeID)
+                return False
             if node['@type'] == 'Dataset':
                 for key in DATASET_MANDATORY:
-                    if not key in node:
+                    if key not in node:
                         print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}')
                         globalSuccess = False
                 for key in DATASET_SUGGESTED:
-                    if not key in node and OUTPUT_INFO:
+                    if key not in node and OUTPUT_INFO:
                         print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}')
             elif node['@type'] == 'File':
                 for key in FILE_MANDATORY:
-                    if not key in node:
+                    if key not in node:
                         print(f'**ERROR in file: "{key}" not in @id={node["@id"]}')
                         globalSuccess = False
                 for key in FILE_SUGGESTED:
-                    if not key in node and OUTPUT_INFO:
+                    if key not in node and OUTPUT_INFO:
                         print(f'**INFO for file: "{key}" not in @id={node["@id"]}')
             # CHECK PROPERTIES FOR ALL KEYS
-            if any([str(i).strip()=='' for i in node.values()]):
+            if any(not str(i).strip() for i in node.values()):
                 print(f'**WARNING: {nodeID} contains empty values in the key-value pairs')
             # SPECIFIC CHECKS ON CERTAIN KEYS
             if isinstance(node.get('keywords', ''), list):
@@ -89,16 +92,14 @@ def processNode(graph, nodeID):
                 print(f'\n\nParse: {fileName}')
                 with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
                     success = True
-                    p = ZPath(elnFile)
-                    dirName = sorted(p.iterdir())[0]
-                    metadataJsonFile = dirName.joinpath(METADATA_FILE)
-                    metadataContent = json.loads(metadataJsonFile.read_bytes())
+                    metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0]
+                    metadataContent = json.loads(elnFile.read(metadataJsonFile))
                     graph = metadataContent["@graph"]
                     # find information from master node
                     ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE]
                     if len(ro_crate_nodes) == 1:
                         for key in ROCRATE_NOTE_MANDATORY:
-                            if not key in ro_crate_nodes[0]:
+                            if key not in ro_crate_nodes[0]:
                                 print(f'**ERROR: "{key}" not in @id={METADATA_FILE}')
                     else:
                         print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ')
@@ -109,9 +110,9 @@ def processNode(graph, nodeID):
                     for partI in main_node['hasPart']:
                         success = processNode(graph, partI['@id']) and success
                     if fileName not in logJson:
-                        logJson[fileName] = {'params_metadata_json':success}
+                        logJson[fileName] = {LABEL:success}
                     else:
-                        logJson[fileName] = logJson[fileName] | {'params_metadata_json':success}
+                        logJson[fileName] = logJson[fileName] | {LABEL:success}
 
                     # count occurances of all keys
                     counts = {}
@@ -133,4 +134,4 @@ def processNode(graph, nodeID):
                             print(f'{prefix}{k:15}: {v}')
         print('\n\nSuccess:', success)
         json.dump(logJson, open('tests/logging.json', 'w'))
-        assert success
+        assert success  #if this fails on your local test, great. It is a summary such that github actions report correctly
diff --git a/tests/test_02_schema.py b/tests/test_02_schema.py
@@ -0,0 +1,49 @@
+#!/usr/bin/python3
+"""
+Validate if rocrate of pypi can open and parse it. This is a test if we follow general ro-crate guidelines.
+https://pypi.org/project/rocrate/
+"""
+import os
+import json
+import unittest
+from pathlib import Path
+from zipfile import ZIP_DEFLATED
+from zipfile import ZipFile
+from jsonschema import Draft202012Validator
+
+LABEL = 'schema'
+METADATA_FILE = 'ro-crate-metadata.json'
+
+class Test_1(unittest.TestCase):
+    """
+    derived class for this test
+    """
+    def test_main(self):
+        """
+        main function
+        """
+        # log-file
+        if Path('tests/logging.json').exists():
+            logJson = json.load(open('tests/logging.json'))
+        else:
+            logJson = {}
+
+        schema = json.load(open('tests/schema.json', 'r', encoding='utf-8'))
+        validator = Draft202012Validator(schema=schema)
+        validator.check_schema(schema=schema)
+        success = True
+        for root, _, files in os.walk(".", topdown=False):
+            for name in files:
+                if not name.endswith('.eln'):
+                  continue
+                fileName = os.path.join(root, name)
+                print(f'\nInspect: {name}')
+                with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
+                    metadataJsonFile = [i for i in elnFile.namelist() if i.endswith(METADATA_FILE)][0]
+                    metadataContent = json.loads(elnFile.read(metadataJsonFile))
+                    for error in sorted(validator.iter_errors(metadataContent), key=str):
+                        print(f'- {error.message}')
+                        success = False
+                logJson[fileName] = logJson.get(fileName,{}) | {LABEL: success}
+            json.dump(logJson, open('tests/logging.json', 'w'))
+        assert success