Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented decode->flatten->filter->unflatten->encode enhancement #8

Merged
merged 4 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions ckanext/vitality_prototype/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
DATASET_FIELDS = [
"author",
"author_email",
"bbox-east-long",
"bbox-north-lat",
"bbox-south-lat",
"bbox-west-long",
"cited-responsible-party",
"creator_user_id",
"dataset-reference-date",
"eov",
"extras",
"frequency-of-update",
"groups",
"id",
"isopen",
"keywords/en",
"keywords/fr",
"license_id",
"license_title",
"license_url",
"maintainer",
"maintainer_email",
"metadata_created",
"metadata_modified",
"metadata-language",
"metadata-point-of-contact/contact-info_email",
"metadata-point-of-contact/contact-info_online-resource_application-profile",
"metadata-point-of-contact/contact-info_online-resource_description",
"metadata-point-of-contact/contact-info_online-resource_function",
"metadata-point-of-contact/contact-info_online-resource_name",
"metadata-point-of-contact/contact-info_online-resource_protocol",
"metadata-point-of-contact/contact-info_online-resource_protocol-request",
"metadata-point-of-contact/contact-info_online-resource_url",
"metadata-point-of-contact/individual-name",
"metadata-point-of-contact/organisation-name",
"metadata-point-of-contact/position-name",
"metadata-point-of-contact/role",
"metadata-reference-date",
"name",
"notes/en",
"notes/fr",
"notes_translated/en",
"notes_translated/fr",
"num_resources",
"num_tags",
"organization/approval_status",
"organization/created",
"organization/description",
"organization/description_translated/en",
"organization/description_translated/fr",
"organization/id",
"organization/image_url",
"organization/image_url_translated/en",
"organization/image_url_translated/fr",
"organization/is_organization",
"organization/revision_id",
"organization/state",
"organization/title",
"organization/title_translated/en",
"organization/title_translated/fr",
"organization/type",
"owner_org",
"private",
"progress",
"relationships_as_object",
"relationships_as_subject",
"resources",
"resource-type",
"revision_id",
"spatial/coordinates",
"spatial/type",
"state",
"tags",
"temporal-extent/begin",
"temporal-extent/end",
"title",
"title_translated/en",
"title_translated/fr",
"tracking_summary/recent",
"tracking_summary/total",
"type",
"unique-resource-identifier-full/authority",
"unique-resource-identifier-full/code",
"unique-resource-identifier-full/code-space",
"unique-resource-identifier-full/version",
"url",
"vertical-extent",
"xml_location_url",
"organization/name"
]

PUBLIC_FIELDS = [
"id",
"resources",
"type",
"name",
"state",
"organization/approval_status",
"orgnaization/created",
"organization/description",
"organization/description_translated/en",
"organization/description_translated/fr",
"organization/id",
"organization/image_url",
"organization/image_url_translated/en",
"organization/image_url_translated/fr",
"organization/is_organization",
"organization/revision_id",
"organization/state",
"organization/title",
"organization/title_translated/en",
"organization/title_translated/fr",
"organization/type",
"organization/name",
"title_translated/en",
"title_translated/fr"
]

STRINGIFIED_FIELDS = [
"metadata-point-of-contact",
"spatial",
"temporal-extent",
"unique-resource-identifier-full",
"notes",
"cited-responsible-party",
"dataset-reference-date"
]
107 changes: 95 additions & 12 deletions ckanext/vitality_prototype/meta_authorize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from enum import Enum
import logging
import json
import copy
import constants
from flatten_dict import flatten
from flatten_dict import unflatten


'''
Expand Down Expand Up @@ -148,28 +153,28 @@ def filter_dict(self, input, fields, whitelist):
the original dictionary input with fields corresponding to whitelist.
"""

#log.info(fields)

# Trivially check input type
if type(input) != dict:
raise TypeError("Only dicts can be filtered recursively! Attempted to filter " + str(type(input)))

# DECODE Stringified JSON elements
decoded = self._decode(copy.deepcopy(input))

# FLATTEN decoded input
flattened = flatten(decoded, reducer='path')

# Iterate through the dictionary entries

# TODO: I would use a comprehension + helper function.
# i.e. {k: v for k, v in input.items if filterLogicFn(k, v)}
# The original dictionary will stay in tact and in memory though.
for key,value in input.items():
for key,value in flattened.items():

log.info("Checking authorization for %s", str(key))

# TODO - this needs to be handled way better
if key == "en" or key == "fr":
continue

# Pop unknown fields
if key.encode('utf-8') not in fields:
input.pop(key, None)
flattened.pop(key, None)
log.warn("Popped unknown field: " + str(key))
continue

Expand All @@ -178,20 +183,98 @@ def filter_dict(self, input, fields, whitelist):

# If the current field's id does not appear in the whitelist, pop it from the input
if curr_field_id not in whitelist:
input.pop(key, None)
flattened.pop(key, None)
log.info("Key rejected!")
continue

# If the value is a dict, recurse
if type(value) is dict:

# Overwrite value with filtered dict
input[key] = self.filter_dict(value, fields, whitelist)
flattened[key] = self.filter_dict(value, fields, whitelist)

log.info("Key authorized!")

# UNFLATTEN filtered dictionary
unflattened = unflatten(flattened, splitter='path')

# STRINGIFY required json fields
encoded = self._encode(unflattened)

return encoded


def _decode(self, input):
"""
Decode dictionary containing string encoded JSON objects.

Parameters
----------
input: dict or stringified JSON
The dictionary to decode

Returns
-------
A dictionary where all fields that contained stringified JSON are now
expanded into dictionaries.
"""
if type(input) == str or type(input) == unicode:
root = MetaAuthorize._parse_json(input)
elif type(input) == dict:
root = input
else:
raise TypeError("_decode can only decode str or dict inputs! Got {}".format(str(type(input))))

if root != None:
for key,value in root.items():
# If the value is a string attempt to parse it as json
#log.info("Attempting to decode: %s - %s ", key, str(type(value)))
#TODO - this may need to change for python3
if type(value) == str or type(value) == unicode:
#log.info("%s is a str/unicode!", key)
parsed_json = MetaAuthorize._parse_json(value, key)

# If the string parsed
if parsed_json != None:
# into a dictonary
if type(parsed_json) == dict:
# decode the parsed dict
parsed_json = self._decode(parsed_json)
log.info('%s - parsed type %s', key, type(parsed_json))
# replace the value at the current key
root[key] = parsed_json
# into a list
elif type(parsed_json) == list:
# replace the value at the current key
root[key] = parsed_json


# Else if the value is a dictonary, recurse!
elif type(value) == dict:
root[key] = self._decode(value)

# log.info("Filtered input")
# log.info(input)

return root

def _encode(self, input):

for key,value in input.items():

if key in constants.STRINGIFIED_FIELDS:
log.info("Stringifying %s", key)
input[key] = unicode(json.dumps(value),'utf-8')

return input


@staticmethod
def _parse_json(value, key=None):
try:
# TODO: Unicode stuff may need rework for python 3
return json.loads(value.encode('utf-8'))
except ValueError:
#log.info("Value could not be parsed as JSON. %s", key)
return None
except TypeError:
#log.warn("Value could not be parsed as JSON, %s", key)
return None
Loading