Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding jsonata_validate prototype #71

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions mars-cli/jsonata_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import jsonata
import requests
import csv
import json
import argparse

def main(filepath='../test-data/biosamples-original-isa.json',
table_url='https://docs.google.com/spreadsheets/d/e/2PACX-1vQvgQoUByiJgGcJ4jtD8bG9AyQrh4TYVQE8aq7AqJRxLdfyLFATKspu_vkyqbVsTyEnNIBHqWtpgV6X/pub?gid=0&single=true&output=csv'):
"""
Main function to validate JSON data using a JSONata expression extracted from a CSV file.

Parameters:
filepath (str): The path to the JSON file to be validated.
table_url (str): The URL of the CSV file hosted online that contains JSONata expressions.

Returns:
None: This function prints the index and value of invalid JSONata expressions.
"""

try:
# Fetch CSV data from the provided URL
res = requests.get(table_url)
res.raise_for_status() # Check for HTTP request errors
except requests.exceptions.RequestException as e:
print(f"Error fetching the table from the URL: {e}")
return

try:
# Parse the fetched CSV data into a list of rows
table = csv.reader(res.text.split('\n'))
jsonata_list = list(table)
except Exception as e:
print(f"Error reading CSV data: {e}")
return

try:
# Open and load the JSON file
with open(filepath, 'r') as file:
data = json.load(file)
except FileNotFoundError:
print(f"Error: The file '{filepath}' was not found.")
return
except json.JSONDecodeError as e:
print(f"Error parsing JSON data: {e}")
return

# Loop through each row in the JSONata list starting from index 3
for index, ele in enumerate(jsonata_list):
if len(ele) > 6 and ele[6] != "" and index > 2: # Check if there is a valid JSONata expression
try:
# Evaluate the JSONata expression
expr = jsonata.Jsonata("'"+ele[6]) # Correcting the JSONata expression input
result = expr.evaluate(data)

if result == False: # If the evaluation fails, print details
print(f"Validation failed at row {index}:")
print(f"Sample ID: {ele[0]}")
print(f"Expression: {ele[6]}")
print(f"Result: {result}")
except Exception as e:
print(f"Error evaluating JSONata at row {index}: {e}")
continue

if __name__ == '__main__':
# help(main)
# Argument parser for command line inputs
parser = argparse.ArgumentParser(description='Validate JSON data using JSONata expressions from a CSV file.')

# Define command-line arguments
parser.add_argument('--filepath', type=str, required=True, help='Path to the JSON file to be validated.')
xiaoranzhou marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument('--table_url', type=str, required=False, help='URL of the CSV file containing JSONata expressions.', default='https://docs.google.com/spreadsheets/d/e/2PACX-1vQvgQoUByiJgGcJ4jtD8bG9AyQrh4TYVQE8aq7AqJRxLdfyLFATKspu_vkyqbVsTyEnNIBHqWtpgV6X/pub?gid=0&single=true&output=csv')

# Parse the arguments
args = parser.parse_args()

# Call the main function with arguments from the command line
main(filepath=args.filepath, table_url=args.table_url)
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class BioSampleSubmissionController {
@ApiResponse(responseCode = "408", description = "Request Timeout"),
@ApiResponse(responseCode = "415", description = "Unsupported media type")
})
@CrossOrigin(origins = "http://localhost:8000")
// @CrossOrigin(origins = "http://localhost:8000")
@PostMapping(
value = "/submit",
consumes = { APPLICATION_JSON_VALUE, APPLICATION_XML_VALUE })
Expand Down