Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented decode->flatten->filter->unflatten->encode enhancement #8

Merged
merged 4 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 109 additions & 12 deletions ckanext/vitality_prototype/meta_authorize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from enum import Enum
import logging
import json
import copy
from flatten_dict import flatten
from flatten_dict import unflatten


'''
Expand Down Expand Up @@ -148,28 +152,28 @@ def filter_dict(self, input, fields, whitelist):
the original dictionary input with fields corresponding to whitelist.
"""

#log.info(fields)

# Trivially check input type
if type(input) != dict:
raise TypeError("Only dicts can be filtered recursively! Attempted to filter " + str(type(input)))

# DECODE Stringified JSON elements
decoded = self._decode(copy.deepcopy(input))

# FLATTEN decoded input
flattened = flatten(decoded, reducer='path')

# Iterate through the dictionary entries

# TODO: I would use a comprehension + helper function.
# i.e. {k: v for k, v in input.items if filterLogicFn(k, v)}
# The original dictionary will stay in tact and in memory though.
for key,value in input.items():
for key,value in flattened.items():

log.info("Checking authorization for %s", str(key))

# TODO - this needs to be handled way better
if key == "en" or key == "fr":
continue

# Pop unknown fields
if key.encode('utf-8') not in fields:
input.pop(key, None)
flattened.pop(key, None)
log.warn("Popped unknown field: " + str(key))
continue

Expand All @@ -178,20 +182,113 @@ def filter_dict(self, input, fields, whitelist):

# If the current field's id does not appear in the whitelist, pop it from the input
if curr_field_id not in whitelist:
input.pop(key, None)
flattened.pop(key, None)
log.info("Key rejected!")
continue

# If the value is a dict, recurse
if type(value) is dict:

# Overwrite value with filtered dict
input[key] = self.filter_dict(value, fields, whitelist)
flattened[key] = self.filter_dict(value, fields, whitelist)

log.info("Key authorized!")

# UNFLATTEN filtered dictionary
unflattened = unflatten(flattened, splitter='path')

# STRIGIFY required json fields
Copy link
Contributor

@greebie greebie Jul 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

STRINGIFY

encoded = self._encode(unflattened)

return encoded


def _decode(self, input):
"""
Decode dictionary containing string encoded JSON objects.

Parameters
----------
input: dict or stringified JSON
The dictionary to decode

Returns
-------
A dictionary where all fields that contained stringified JSON are now
expanded into dictionaries.
"""
if type(input) == str or type(input) == unicode:
root = MetaAuthorize._parse_json(input)
elif type(input) == dict:
root = input
else:
raise TypeError("_decode can only decode str or dict inputs! Got {}".format(str(type(input))))

if root != None:
for key,value in root.items():
# If the value is a string attempt to parse it as json
#log.info("Attempting to decode: %s - %s ", key, str(type(value)))
#TODO - this may need to change for python3
if type(value) == str or type(value) == unicode:
#log.info("%s is a str/unicode!", key)
parsed_json = MetaAuthorize._parse_json(value, key)

# If the string parsed
if parsed_json != None:
# into a dictonary
if type(parsed_json) == dict:
# decode the parsed dict
parsed_json = self._decode(parsed_json)
log.info('%s - parsed type %s', key, type(parsed_json))
# replace the value at the current key
root[key] = parsed_json
# into a list
elif type(parsed_json) == list:
# replace the value at the current key
root[key] = parsed_json


# Else if the value is a dictonary, recurse!
elif type(value) == dict:
root[key] = self._decode(value)

# log.info("Filtered input")
# log.info(input)

return root

def _encode(self, input):

for key,value in input.items():

if key in _stringified_keys():
log.info("Stringifying %s", key)
input[key] = unicode(json.dumps(value),'utf-8')

return input


@staticmethod
def _parse_json(value, key=None):
try:
# TODO: Unicode stuff may need rework for python 3
return json.loads(value.encode('utf-8'))
except ValueError:
#log.info("Value could not be parsed as JSON. %s", key)
return None
except TypeError:
#log.warn("Value could not be parsed as JSON, %s", key)
return None


def _stringified_keys():
"""
Returns a list of keys whose values should be stringified json objects
"""
return [
"metadata-point-of-contact",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually it would be good to move these to constants.

"spatial",
"temporal-extent",
"unique-resource-identifier-full",
"notes",
"cited-responsible-party",
"dataset-reference-date"
]
Loading