-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implemented decode->flatten->filter->unflatten->encode enhancement #8
Merged
Merged
Changes from 1 commit
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
from enum import Enum | ||
import logging | ||
import json | ||
import copy | ||
from flatten_dict import flatten | ||
from flatten_dict import unflatten | ||
|
||
|
||
''' | ||
|
@@ -148,28 +152,28 @@ def filter_dict(self, input, fields, whitelist): | |
the original dictionary input with fields corresponding to whitelist. | ||
""" | ||
|
||
#log.info(fields) | ||
|
||
# Trivially check input type | ||
if type(input) != dict: | ||
raise TypeError("Only dicts can be filtered recursively! Attempted to filter " + str(type(input))) | ||
|
||
# DECODE Stringified JSON elements | ||
decoded = self._decode(copy.deepcopy(input)) | ||
|
||
# FLATTEN decoded input | ||
flattened = flatten(decoded, reducer='path') | ||
|
||
# Iterate through the dictionary entries | ||
|
||
# TODO: I would use a comprehension + helper function. | ||
# i.e. {k: v for k, v in input.items if filterLogicFn(k, v)} | ||
# The original dictionary will stay in tact and in memory though. | ||
for key,value in input.items(): | ||
for key,value in flattened.items(): | ||
|
||
log.info("Checking authorization for %s", str(key)) | ||
|
||
# TODO - this needs to be handled way better | ||
if key == "en" or key == "fr": | ||
continue | ||
|
||
# Pop unknown fields | ||
if key.encode('utf-8') not in fields: | ||
input.pop(key, None) | ||
flattened.pop(key, None) | ||
log.warn("Popped unknown field: " + str(key)) | ||
continue | ||
|
||
|
@@ -178,20 +182,113 @@ def filter_dict(self, input, fields, whitelist): | |
|
||
# If the current field's id does not appear in the whitelist, pop it from the input | ||
if curr_field_id not in whitelist: | ||
input.pop(key, None) | ||
flattened.pop(key, None) | ||
log.info("Key rejected!") | ||
continue | ||
|
||
# If the value is a dict, recurse | ||
if type(value) is dict: | ||
|
||
# Overwrite value with filtered dict | ||
input[key] = self.filter_dict(value, fields, whitelist) | ||
flattened[key] = self.filter_dict(value, fields, whitelist) | ||
|
||
log.info("Key authorized!") | ||
|
||
# UNFLATTEN filtered dictionary | ||
unflattened = unflatten(flattened, splitter='path') | ||
|
||
# STRIGIFY required json fields | ||
encoded = self._encode(unflattened) | ||
|
||
return encoded | ||
|
||
|
||
def _decode(self, input): | ||
""" | ||
Decode dictionary containing string encoded JSON objects. | ||
|
||
Parameters | ||
---------- | ||
input: dict or stringified JSON | ||
The dictionary to decode | ||
|
||
Returns | ||
------- | ||
A dictionary where all fields that contained stringified JSON are now | ||
expanded into dictionaries. | ||
""" | ||
if type(input) == str or type(input) == unicode: | ||
root = MetaAuthorize._parse_json(input) | ||
elif type(input) == dict: | ||
root = input | ||
else: | ||
raise TypeError("_decode can only decode str or dict inputs! Got {}".format(str(type(input)))) | ||
|
||
if root != None: | ||
for key,value in root.items(): | ||
# If the value is a string attempt to parse it as json | ||
#log.info("Attempting to decode: %s - %s ", key, str(type(value))) | ||
#TODO - this may need to change for python3 | ||
if type(value) == str or type(value) == unicode: | ||
#log.info("%s is a str/unicode!", key) | ||
parsed_json = MetaAuthorize._parse_json(value, key) | ||
|
||
# If the string parsed | ||
if parsed_json != None: | ||
# into a dictonary | ||
if type(parsed_json) == dict: | ||
# decode the parsed dict | ||
parsed_json = self._decode(parsed_json) | ||
log.info('%s - parsed type %s', key, type(parsed_json)) | ||
# replace the value at the current key | ||
root[key] = parsed_json | ||
# into a list | ||
elif type(parsed_json) == list: | ||
# replace the value at the current key | ||
root[key] = parsed_json | ||
|
||
|
||
# Else if the value is a dictonary, recurse! | ||
elif type(value) == dict: | ||
root[key] = self._decode(value) | ||
|
||
# log.info("Filtered input") | ||
# log.info(input) | ||
|
||
return root | ||
|
||
def _encode(self, input): | ||
|
||
for key,value in input.items(): | ||
|
||
if key in _strigified_keys(): | ||
log.info("Stringifying %s", key) | ||
input[key] = unicode(json.dumps(value),'utf-8') | ||
|
||
return input | ||
|
||
|
||
@staticmethod | ||
def _parse_json(value, key=None): | ||
try: | ||
# TODO: Unicode stuff may need rework for python 3 | ||
return json.loads(value.encode('utf-8')) | ||
except ValueError: | ||
#log.info("Value could not be parsed as JSON. %s", key) | ||
return None | ||
except TypeError: | ||
#log.warn("Value could not be parsed as JSON, %s", key) | ||
return None | ||
|
||
|
||
def _strigified_keys(): | ||
aianta marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Returns a list of keys whose values should be strigified json objects | ||
aianta marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
return [ | ||
"metadata-point-of-contact", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Eventually it would be good to move these to constants. |
||
"spatial", | ||
"temporal-extent", | ||
"unique-resource-identifier-full", | ||
"notes", | ||
"cited-responsible-party", | ||
"dataset-reference-date" | ||
] |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
STRINGIFY