Skip to content

Commit

Permalink
Revert code changes in this branch since not intended
Browse files Browse the repository at this point in the history
  • Loading branch information
SteffenBrinckmann committed Oct 6, 2024
1 parent 1193297 commit b7e43a2
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 146 deletions.
53 changes: 22 additions & 31 deletions tests/test_00_pypi_rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,13 @@
import os
import json
import unittest
import sys
import tempfile
from pathlib import Path
from zipfile import ZIP_DEFLATED
from zipfile import Path as ZPath
from zipfile import ZipFile
from rocrate.rocrate import ROCrate

def testFile(fileName):
logJson = {}
print(f'\n\nTry to parse: {fileName}')
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
try:
dirpath = Path(tempfile.mkdtemp())
elnFile.extractall(dirpath)
tempPath= dirpath.joinpath(dirName.name)
crate = ROCrate(tempPath)
for e in crate.get_entities():
print(f' {e.id}: {e.type}')
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':True}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True}
except Exception:
print(" ***** ERROR: Could not parse content of this file!! *****")
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':False}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False}
success = False
raise
return logJson


class Test_1(unittest.TestCase):
"""
derived class for this test
Expand All @@ -62,7 +33,27 @@ def test_main(self):
if not name.endswith('.eln'):
continue
fileName = os.path.join(root, name)
jsonUpdate = testFile(fileName)
logJson.update(jsonUpdate)
print(f'\n\nTry to parse: {fileName}')
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
try:
dirpath = Path(tempfile.mkdtemp())
elnFile.extractall(dirpath)
temppath= dirpath.joinpath(dirName.name)
crate = ROCrate(temppath)
for e in crate.get_entities():
print(f' {e.id}: {e.type}')
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':True}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':True}
except Exception:
print(" ***** ERROR: Could not parse content of this file!! *****")
if fileName not in logJson:
logJson[fileName] = {'pypi_rocrate':False}
else:
logJson[fileName] = logJson[fileName] | {'pypi_rocrate':False}
success = False
json.dump(logJson, open('tests/logging.json', 'w'))
assert success
216 changes: 104 additions & 112 deletions tests/test_01_params_metadata_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,116 +8,6 @@
from zipfile import Path as ZPath
from zipfile import ZipFile

# global variables worth discussion
ROCRATE_NOTE_MANDATORY = ['version','sdPublisher']
DATASET_MANDATORY = ['name']
DATASET_SUGGESTED = ['author','mentions', 'dateCreated', 'dateModified', 'identifier', 'text', 'keywords', 'genre', 'variableMeasured']
FILE_MANDATORY = ['name']
FILE_SUGGESTED = ['sha256', 'encodingFormat', 'contentSize', 'description', 'genre', 'variableMeasured']

# runtime global variables
METADATA_FILE = 'ro-crate-metadata.json'
OUTPUT_INFO = False
OUTPUT_COUNTS = True
KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type']

def processNode(graph, nodeID):
"""
recursive function call to process each node
Args:
graph: full graph
nodeID: id of node in graph
"""
globalSuccess = True
nodes = [ i for i in graph if '@id' in i and i['@id'] == nodeID]
if len(nodes)!=1:
print('**ERROR: all entries must only occur once in crate. check:', nodeID)
return
node = nodes[0]
# CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT
if '@type' not in node:
print('**ERROR: all nodes must have @type. check:', nodeID)
if node['@type'] == 'Dataset':
for key in DATASET_MANDATORY:
if not key in node:
print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in DATASET_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}')
elif node['@type'] == 'File':
for key in FILE_MANDATORY:
if not key in node:
print(f'**ERROR in file: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in FILE_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for file: "{key}" not in @id={node["@id"]}')
# CHECK PROPERTIES FOR ALL KEYS
if any([str(i).strip()=='' for i in node.values()]):
print(f'**WARNING: {nodeID} contains empty values in the key-value pairs')
# SPECIFIC CHECKS ON CERTAIN KEYS
if isinstance(node.get('keywords', ''), list):
print(f'**ERROR: {nodeID} contains an array of keywords. Use comma or space separated string')
globalSuccess = False
# recurse to children
children = node.pop('hasPart') if 'hasPart' in node else []
for child in children:
globalSuccess = processNode(graph, child['@id']) and globalSuccess
return globalSuccess


def testFile(fileName):
print(f'\n\nParse: {fileName}')
logJson = {}
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
success = True
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
metadataJsonFile = dirName.joinpath(METADATA_FILE)
metadataContent = json.loads(metadataJsonFile.read_bytes())
graph = metadataContent["@graph"]
# find information from master node
ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE]
if len(ro_crate_nodes) == 1:
for key in ROCRATE_NOTE_MANDATORY:
if not key in ro_crate_nodes[0]:
print(f'**ERROR: "{key}" not in @id={METADATA_FILE}')
else:
print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ')
success = False
main_node = [i for i in graph if i["@id"] == "./"][0]

# iteratively go through graph
for partI in main_node['hasPart']:
success = processNode(graph, partI['@id']) and success
if fileName not in logJson:
logJson[fileName] = {'params_metadata_json':success}
else:
logJson[fileName] = logJson[fileName] | {'params_metadata_json':success}

# count occurrence of all keys
counts = {}
for node in graph:
if node['@id'] in ['./',METADATA_FILE]:
continue
for key in node.keys():
if key in counts:
counts[key] += 1
else:
counts[key] = 1

view = [ (v,k) for k,v in counts.items() ]
view.sort(reverse=True)
if OUTPUT_COUNTS:
print('===== Counts (* unspecified)')
for v,k in view:
prefix = ' ' if k in KNOWN_KEYS else ' * '
print(f'{prefix}{k:16}: {v}')
return success, logJson


class Test_2(unittest.TestCase):
"""
derived class for this test
Expand All @@ -126,19 +16,121 @@ def test_main(self):
"""
main function
"""
# global variables worth discussion
ROCRATE_NOTE_MANDATORY = ['version','sdPublisher']
DATASET_MANDATORY = ['name']
DATASET_SUGGESTED = ['author','mentions', 'dateCreated', 'dateModified', 'identifier', 'text', 'keywords']
FILE_MANDATORY = ['name']
FILE_SUGGESTED = ['sha256', 'encodingFormat', 'contentSize', 'description']

# runtime global variables
METADATA_FILE = 'ro-crate-metadata.json'
OUTPUT_INFO = False
OUTPUT_COUNTS = True
KNOWN_KEYS = DATASET_MANDATORY+DATASET_SUGGESTED+FILE_MANDATORY+FILE_SUGGESTED+['@id', '@type']

# log-file
if Path('tests/logging.json').exists():
logJson = json.load(open('tests/logging.json'))
else:
logJson = {}

def processNode(graph, nodeID):
"""
recursive function call to process each node
Args:
graph: full graph
nodeID: id of node in graph
"""
globalSuccess = True
nodes = [ i for i in graph if '@id' in i and i['@id'] == nodeID]
if len(nodes)!=1:
print('**ERROR: all entries must only occur once in crate. check:', nodeID)
return
node = nodes[0]
# CHECK IF MANDATORY AND SUGGESTED KEYWORDS ARE PRESENT
if '@type' not in node:
print('**ERROR: all nodes must have @type. check:', nodeID)
if node['@type'] == 'Dataset':
for key in DATASET_MANDATORY:
if not key in node:
print(f'**ERROR in dataset: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in DATASET_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for dataset: "{key}" not in @id={node["@id"]}')
elif node['@type'] == 'File':
for key in FILE_MANDATORY:
if not key in node:
print(f'**ERROR in file: "{key}" not in @id={node["@id"]}')
globalSuccess = False
for key in FILE_SUGGESTED:
if not key in node and OUTPUT_INFO:
print(f'**INFO for file: "{key}" not in @id={node["@id"]}')
# CHECK PROPERTIES FOR ALL KEYS
if any([str(i).strip()=='' for i in node.values()]):
print(f'**WARNING: {nodeID} contains empty values in the key-value pairs')
# SPECIFIC CHECKS ON CERTAIN KEYS
if isinstance(node.get('keywords', ''), list):
print(f'**ERROR: {nodeID} contains an array of keywords. Use comma or space separated string')
globalSuccess = False
# recurse to children
children = node.pop('hasPart') if 'hasPart' in node else []
for child in children:
globalSuccess = processNode(graph, child['@id']) and globalSuccess
return globalSuccess

for root, _, files in os.walk(".", topdown=False):
for name in files:
if not name.endswith('.eln'):
continue
fileName = os.path.join(root, name)
success, jsonUpdate = testFile(fileName)
logJson.update(jsonUpdate)
print(f'\n\nParse: {fileName}')
with ZipFile(fileName, 'r', compression=ZIP_DEFLATED) as elnFile:
success = True
p = ZPath(elnFile)
dirName = sorted(p.iterdir())[0]
metadataJsonFile = dirName.joinpath(METADATA_FILE)
metadataContent = json.loads(metadataJsonFile.read_bytes())
graph = metadataContent["@graph"]
# find information from master node
ro_crate_nodes = [i for i in graph if i["@id"] == METADATA_FILE]
if len(ro_crate_nodes) == 1:
for key in ROCRATE_NOTE_MANDATORY:
if not key in ro_crate_nodes[0]:
print(f'**ERROR: "{key}" not in @id={METADATA_FILE}')
else:
print(f'**ERROR: @id={METADATA_FILE} does not uniquely exist ')
success = False
main_node = [i for i in graph if i["@id"] == "./"][0]

# iteratively go through graph
for partI in main_node['hasPart']:
success = processNode(graph, partI['@id']) and success
if fileName not in logJson:
logJson[fileName] = {'params_metadata_json':success}
else:
logJson[fileName] = logJson[fileName] | {'params_metadata_json':success}

# count occurances of all keys
counts = {}
for node in graph:
if node['@id'] in ['./',METADATA_FILE]:
continue
for key in node.keys():
if key in counts:
counts[key] += 1
else:
counts[key] = 1

view = [ (v,k) for k,v in counts.items() ]
view.sort(reverse=True)
if OUTPUT_COUNTS:
print('===== Counts (* unspecified)')
for v,k in view:
prefix = ' ' if k in KNOWN_KEYS else ' * '
print(f'{prefix}{k:15}: {v}')
print('\n\nSuccess:', success)
json.dump(logJson, open('tests/logging.json', 'w'))
assert success
4 changes: 1 addition & 3 deletions tools/eln2md.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def process_part(part, level):
ro_crate_node = [i for i in graph if i["@id"] == METADATA_FILE][0]
output = '- '+METADATA_FILE+'\n'
if 'sdPublisher' in ro_crate_node:
name = ro_crate_node['sdPublisher'].get('name','---')
output += ' - publisher: ' + name + '\n'
output += ' - publisher: ' + ro_crate_node['sdPublisher']['name'] + '\n'
if 'version' in ro_crate_node:
output += ' - version: ' + ro_crate_node['version'] + '\n'
main_node = [i for i in graph if i["@id"] == "./"][0]
Expand Down Expand Up @@ -113,7 +112,6 @@ def process_part(part, level):
outfile.write(f'```json\n{outputString}\n```\n')
elif args.format == 'tree':
outputString = tree(metadataContent)
print(outputString)
outfile.write(f'```yml\n{outputString}\n```\n')
else:
print("**ERROR: unknown format")
Expand Down

0 comments on commit b7e43a2

Please sign in to comment.