Skip to content

Commit

Permalink
Merge pull request #515 from hubmapconsortium/yuanzhou/cache-bug
Browse files Browse the repository at this point in the history
Yuanzhou/cache bug
  • Loading branch information
yuanzhou authored Jul 18, 2023
2 parents b4b0c0f + c10be52 commit 7cf841f
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 161 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.30
2.1.31
123 changes: 45 additions & 78 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type):


"""
Generate the complete entity record as well as result filtering for response
Generate the complete entity record by running the read triggers
Parameters
----------
Expand Down Expand Up @@ -513,19 +513,18 @@ def get_complete_entity_result(token, entity_dict, properties_to_skip = []):
cache_result = None

# Need both client and prefix when fetching the cache
if _memcached_client and _memcached_prefix:
# Do NOT fetch cache is properties_to_skip is specified
if not properties_to_skip:
cache_key = f'{_memcached_prefix}_complete_{entity_uuid}'
cache_result = _memcached_client.get(cache_key)
# Do NOT fetch cache if properties_to_skip is specified
if _memcached_client and _memcached_prefix and (not properties_to_skip):
cache_key = f'{_memcached_prefix}_complete_{entity_uuid}'
cache_result = _memcached_client.get(cache_key)

# Use the cached data if found and still valid
# Otherwise, calculate and add to cache
if cache_result is None:
if _memcached_client and _memcached_prefix:
logger.info(f'Cache of complete entity of {entity_type} {entity_uuid} not found or expired at time {datetime.now()}')

# No error handling here since if a 'on_read_trigger' method failed,
# No error handling here since if a 'on_read_trigger' method fails,
# the property value will be the error message
# Pass {} since no new_data_dict for 'on_read_trigger'
generated_on_read_trigger_data_dict = generate_triggered_data('on_read_trigger', entity_type, token, entity_dict, {}, properties_to_skip)
Expand All @@ -537,15 +536,18 @@ def get_complete_entity_result(token, entity_dict, properties_to_skip = []):
complete_entity = remove_none_values(complete_entity_dict)

# Need both client and prefix when creating the cache
if _memcached_client and _memcached_prefix:
# Do NOT cache when properties_to_skip is specified
if _memcached_client and _memcached_prefix and (not properties_to_skip):
logger.info(f'Creating complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}')

# Do NOT cache when properties_to_skip is specified
if not properties_to_skip:
cache_key = f'{_memcached_prefix}_complete_{entity_uuid}'
_memcached_client.set(cache_key, complete_entity, expire = SchemaConstants.MEMCACHED_TTL)
cache_key = f'{_memcached_prefix}_complete_{entity_uuid}'
_memcached_client.set(cache_key, complete_entity, expire = SchemaConstants.MEMCACHED_TTL)

logger.debug(f"Following is the complete {entity_type} cache created at time {datetime.now()} using key {cache_key}:")
logger.debug(complete_entity)
else:
logger.info(f'Using complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}')
logger.debug(cache_result)

complete_entity = cache_result
else:
Expand Down Expand Up @@ -590,15 +592,13 @@ def get_complete_entities_list(token, entities_list, properties_to_skip = []):
----------
activity_dict : dict
A dictionary that contains all activity details
properties_to_exclude : list
Any additional properties to exclude from the response
Returns
-------
dict
A dictionary of activity information with keys that are all normalized
"""
def normalize_activity_result_for_response(activity_dict, properties_to_exclude = []):
def normalize_activity_result_for_response(activity_dict):
global _schema

properties = _schema['ACTIVITIES']['Activity']['properties']
Expand All @@ -607,7 +607,7 @@ def normalize_activity_result_for_response(activity_dict, properties_to_exclude
for key in activity_dict:
# Only return the properties defined in the schema yaml
# Exclude additional properties if specified
if (key in properties) and (key not in properties_to_exclude):
if key in properties:
# By default, all properties are exposed
# It's possible to see `exposed: true`
if ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
Expand All @@ -624,7 +624,7 @@ def normalize_activity_result_for_response(activity_dict, properties_to_exclude
Parameters
----------
entity_dict : dict
A merged dictionary that contains all possible data to be used by the trigger methods
Either a neo4j node converted dict or complete dict generated from get_complete_entity_result()
properties_to_exclude : list
Any additional properties to exclude from the response
Expand All @@ -635,73 +635,41 @@ def normalize_activity_result_for_response(activity_dict, properties_to_exclude
"""
def normalize_entity_result_for_response(entity_dict, properties_to_exclude = []):
global _schema
global _memcached_client
global _memcached_prefix

normalized_entity = {}

# In case entity_dict is None or
# an incorrectly created entity that doesn't have the `entity_type` property
if entity_dict and ('entity_type' in entity_dict) and ('uuid' in entity_dict):
entity_uuid = entity_dict['uuid']
entity_type = entity_dict['entity_type']
cache_result = None

# Need both client and prefix when fetching the cache
if _memcached_client and _memcached_prefix:
# Do NOT fetch cache is properties_to_exclude is specified
if not properties_to_exclude:
cache_key = f'{_memcached_prefix}_normalized_{entity_uuid}'
cache_result = _memcached_client.get(cache_key)

# Use the cached data if found and still valid
# Otherwise, calculate and add to cache
if cache_result is None:
if _memcached_client and _memcached_prefix:
logger.info(f'Cache of normalized entity of {entity_type} {entity_uuid} not found or expired at time {datetime.now()}')

properties = _schema['ENTITIES'][entity_type]['properties']

for key in entity_dict:
# Only return the properties defined in the schema yaml
# Exclude additional properties if specified
if (key in properties) and (key not in properties_to_exclude):
# Skip properties with None value and the ones that are marked as not to be exposed.
# By default, all properties are exposed if not marked as `exposed: false`
# It's still possible to see `exposed: true` marked explictly
if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
# Only run convert_str_literal() on string representation of Python dict and list with removing control characters
# No convertion for string representation of Python string, meaning that can still contain control characters
if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
logger.info(f"Executing convert_str_literal() on {entity_type}.{key} of uuid: {entity_uuid}")

# Safely evaluate a string containing a Python dict or list literal
# Only convert to Python list/dict when the string literal is not empty
# instead of returning the json-as-string or array-as-string
# convert_str_literal() also removes those control chars to avoid SyntaxError
entity_dict[key] = convert_str_literal(entity_dict[key])

# Add the target key with correct value of data type to the normalized_entity dict
normalized_entity[key] = entity_dict[key]

# Final step: remove properties with empty string value, empty dict {}, and empty list []
if (isinstance(normalized_entity[key], (str, dict, list)) and (not normalized_entity[key])):
normalized_entity.pop(key)

# Need both client and prefix when creating the cache
if _memcached_client and _memcached_prefix:
logger.info(f'Creating normalized entity cache of {entity_type} {entity_uuid} at time {datetime.now()}')

# Do NOT cache when properties_to_exclude is specified
if not properties_to_exclude:
cache_key = f'{_memcached_prefix}_normalized_{entity_uuid}'
_memcached_client.set(cache_key, normalized_entity, expire = SchemaConstants.MEMCACHED_TTL)
else:
logger.info(f'Using normalized entity cache of {entity_type} {entity_uuid} at time {datetime.now()}')
if entity_dict and ('entity_type' in entity_dict):
normalized_entity_type = entity_dict['entity_type']
properties = _schema['ENTITIES'][normalized_entity_type]['properties']

for key in entity_dict:
# Only return the properties defined in the schema yaml
# Exclude additional properties if specified
if (key in properties) and (key not in properties_to_exclude):
# Skip properties with None value and the ones that are marked as not to be exposed.
# By default, all properties are exposed if not marked as `exposed: false`
# It's still possible to see `exposed: true` marked explictly
if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
# Only run convert_str_literal() on string representation of Python dict and list with removing control characters
# No convertion for string representation of Python string, meaning that can still contain control characters
if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
logger.info(f"Executing convert_str_literal() on {normalized_entity_type}.{key} of uuid: {entity_dict['uuid']}")

# Safely evaluate a string containing a Python dict or list literal
# Only convert to Python list/dict when the string literal is not empty
# instead of returning the json-as-string or array-as-string
# convert_str_literal() also removes those control chars to avoid SyntaxError
entity_dict[key] = convert_str_literal(entity_dict[key])

# Add the target key with correct value of data type to the normalized_entity dict
normalized_entity[key] = entity_dict[key]

normalized_entity = cache_result
# Final step: remove properties with empty string value, empty dict {}, and empty list []
if (isinstance(normalized_entity[key], (str, dict, list)) and (not normalized_entity[key])):
normalized_entity.pop(key)

# One final return
return normalized_entity


Expand Down Expand Up @@ -1790,7 +1758,6 @@ def delete_memcached_cache(uuids_list):
for uuid in uuids_list:
cache_keys.append(f'{_memcached_prefix}_neo4j_{uuid}')
cache_keys.append(f'{_memcached_prefix}_complete_{uuid}')
cache_keys.append(f'{_memcached_prefix}_normalized_{uuid}')

_memcached_client.delete_many(cache_keys)

Expand Down
Loading

0 comments on commit 7cf841f

Please sign in to comment.