Skip to content

Commit

Permalink
Merge pull request #619 from hubmapconsortium/dataset_sample_and_dono…
Browse files Browse the repository at this point in the history
…r_endpoints

Dataset sample and donor endpoints
  • Loading branch information
yuanzhou authored Feb 12, 2024
2 parents 59dc58a + b7af360 commit 3257c88
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 3 deletions.
87 changes: 87 additions & 0 deletions entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1962,6 +1962,93 @@ paths:
description: The target dataset could not be found
'500':
description: Internal error
'/datasets/{id}/organs':
get:
summary: Retrieve a list of all of the smples that are organs that are associated with the dataset id
parameters:
- name: id
in: path
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
required: true
schema:
type: string
responses:
'200':
description: A list of entity_type == Sample with sample_category == organ associated with the dataset id
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/Sample'
'400':
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
'401':
description: The user's token has expired or the user did not supply a valid token
'403':
description: The user is not authorized to query the revision number of the given dataset.
'404':
description: The target dataset could not be found
'500':
description: Internal error
'/datasets/{id}/samples':
get:
summary: Retrieve a list of all of the samples that are not organs that are associated with the dataset id
parameters:
- name: id
in: path
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
required: true
schema:
type: string
responses:
'200':
description: A list of entity_type == Sample with sample_category != organ associated with the dataset id
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/Sample'
'400':
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
'401':
description: The user's token has expired or the user did not supply a valid token
'403':
description: The user is not authorized to query the revision number of the given dataset.
'404':
description: The target dataset could not be found
'500':
description: Internal error
'/datasets/{id}/donors':
get:
summary: Retrieve a list of all of the donors that are associated with the dataset id
parameters:
- name: id
in: path
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
required: true
schema:
type: string
responses:
'200':
description: A list of entity_type == Donor that are associated with the dataset id
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/Donor'
'400':
description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
'401':
description: The user's token has expired or the user did not supply a valid token
'403':
description: The user is not authorized to query the revision number of the given dataset.
'404':
description: The target dataset could not be found
'500':
description: Internal error
'/datasets/{id}/retract':
put:
summary: 'Retracts a dataset after it has been published. Requires a json body with a single field {retraction_reason: string}. The dataset for the given id is modified to include this new retraction_reason field and sets the dataset property sub_status to Retracted. The complete modified dataset is returned. Requires that the dataset being retracted has already been published (dataset.status == Published. Requires a user token with membership in the HuBMAP-Data-Admin group otherwise then a 403 will be returned.'
Expand Down
110 changes: 107 additions & 3 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2704,8 +2704,7 @@ def get_associated_organs_from_dataset(id):
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

# Use the internal token to query the target entity
# since public entities don't require user token
# Use the internal token to query the target entity since public entities don't require user token
token = get_internal_token()

# Query target entity against uuid-api and neo4j and return as a dict if exists
Expand All @@ -2725,7 +2724,7 @@ def get_associated_organs_from_dataset(id):
# the user token has the correct access level
associated_organs = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance, entity_dict['uuid'])

# If there are zero items in the list associated organs, then there are no associated
# If there are zero items in the list associated_organs, then there are no associated
# Organs and a 404 will be returned.
if len(associated_organs) < 1:
not_found_error("the dataset does not have any associated organs")
Expand All @@ -2737,6 +2736,111 @@ def get_associated_organs_from_dataset(id):

return jsonify(final_result)

"""
Get all samples associated with a given dataset
The gateway treats this endpoint as public accessible
Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
Returns
-------
json
a list of all the samples associated with the target dataset
"""
@app.route('/datasets/<id>/samples', methods=['GET'])
def get_associated_samples_from_dataset(id):
# Token is not required, but if an invalid token provided,
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

# Use the internal token to query the target entity
# since public entities don't require user token
token = get_internal_token()

# Query target entity against uuid-api and neo4j and return as a dict if exists
entity_dict = query_target_entity(id, token)
normalized_entity_type = entity_dict['entity_type']

# Only for Dataset
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
bad_request_error("The entity of given id is not a Dataset or Publication")

# published/public datasets don't require token
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required=True)

# By now, either the entity is public accessible or the user token has the correct access level
associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid'])

# If there are zero items in the list associated_samples, then there are no associated
# samples and a 404 will be returned.
if len(associated_samples) < 1:
not_found_error("the dataset does not have any associated samples")

complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples)

# Final result after normalization
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)

return jsonify(final_result)

"""
Get all donors associated with a given dataset
The gateway treats this endpoint as public accessible
Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
Returns
-------
json
a list of all the donors associated with the target dataset
"""
@app.route('/datasets/<id>/donors', methods=['GET'])
def get_associated_donors_from_dataset(id):
# Token is not required, but if an invalid token provided,
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

# Use the internal token to query the target entity
# since public entities don't require user token
token = get_internal_token()

# Query target entity against uuid-api and neo4j and return as a dict if exists
entity_dict = query_target_entity(id, token)
normalized_entity_type = entity_dict['entity_type']

# Only for Dataset
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
bad_request_error("The entity of given id is not a Dataset or Publication")

# published/public datasets don't require token
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required=True)

# By now, either the entity is public accessible or the user token has the correct access level
associated_donors = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance, entity_dict['uuid'])

# If there are zero items in the list associated_donors, then there are no associated
# donors and a 404 will be returned.
if len(associated_donors) < 1:
not_found_error("the dataset does not have any associated donors")

complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors)

# Final result after normalization
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)

return jsonify(final_result)

"""
Get the complete provenance info for all datasets
Expand Down
38 changes: 38 additions & 0 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,44 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):

return results

def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid):
results = []

# specimen_type -> sample_category 12/15/2022
query = (f"MATCH (ds:Dataset)<-[*]-(sample:Sample) "
f"WHERE ds.uuid='{dataset_uuid}' AND NOT sample.sample_category = 'organ' "
f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}")

logger.info("======get_associated_samples_from_dataset() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)

if record and record[record_field_name]:
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])

return results

def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid):
results = []

# specimen_type -> sample_category 12/15/2022
query = (f"MATCH (ds:Dataset)<-[*]-(donor:Donor) "
f"WHERE ds.uuid='{dataset_uuid}'"
f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}")

logger.info("======get_associated_donors_from_dataset() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)

if record and record[record_field_name]:
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])

return results

"""
Retrieve all the provenance information about each dataset. Each dataset's prov-info is given by a dictionary.
Certain fields such as first sample where there can be multiple nearest datasets in the provenance above a given
Expand Down

0 comments on commit 3257c88

Please sign in to comment.