From e16856c72341a536bdb4cc30f2d192fd88ed41b1 Mon Sep 17 00:00:00 2001 From: ChuckKollar Date: Mon, 5 Feb 2024 15:40:03 -0500 Subject: [PATCH 1/5] Endpoint to return 'samples', 'organs', and 'donors' uuids of dataset id given --- entity-api-spec.yaml | 43 ++++++++++++++++++++++++++++++++++++++++ src/app.py | 23 +++++++++++++++++++++ src/app_neo4j_queries.py | 31 +++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 5df9bf08..e34b50f7 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -1874,6 +1874,49 @@ paths: description: The target dataset could not be found '500': description: Internal error + '/datasets/{id}/organs-donors-samples-uuids': + get: + summary: Return a dict of 'samples', 'organs', and 'donors' as arrays for uuids associated with the Dataset id'. + parameters: + - name: id + in: path + description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID. + required: true + schema: + type: string + responses: + '200': + description: Object of 'samples', 'organs', and 'donors' uuids as arrays. + content: + application/json: + schema: + type: object + properties: + samples: + type: array + items: + type: string + description: sample uuid + organs: + type: array + items: + type: string + description: organ uuid + donorss: + type: array + items: + type: string + description: donor uuid + '400': + description: Invalid or misformatted entity identifier, or the given entity is not a Dataset + '401': + description: The user's token has expired or the user did not supply a valid token + '403': + description: The user is not authorized to query the revision number of the given dataset. + '404': + description: The target dataset could not be found + '500': + description: Internal error '/datasets/{id}/revisions': get: summary: 'From a given ID of a versioned dataset, retrieve a list of every dataset in the chain ordered from most recent to oldest. The revision number, as well as the dataset uuid will be included. An optional parameter ?include_dataset=true will include the full dataset for each revision as well. Public/Consortium access rules apply, if is for a non-public dataset and no token or a token without membership in HuBMAP-Read group is sent with the request then a 403 response should be returned. If the given id is published, but later revisions are not and the user is not in HuBMAP-Read group, only published revisions will be returned. The field next_revision_uuid will not be returned if the next revision is unpublished' diff --git a/src/app.py b/src/app.py index f21be0a6..ef6f7bd6 100644 --- a/src/app.py +++ b/src/app.py @@ -2626,6 +2626,29 @@ def get_associated_organs_from_dataset(id): return jsonify(final_result) +@app.route('/datasets//organs-donors-samples-uuids', methods=['GET']) +def get_associated_organs_donors_samples_uuids_from_dataset(id): + # Token is not required, but if an invalid token provided, + # we need to tell the client with a 401 error + validate_token_if_auth_header_exists(request) + + # Use the internal token to query the target entity + # since public entities don't require user token + token = get_internal_token() + + # Query target entity against uuid-api and neo4j and return as a dict if exists + entity_dict = query_target_entity(id, token) + + # Only for Dataset + if not schema_manager.entity_type_instanceof(entity_dict['entity_type'], 'Dataset'): + bad_request_error("The entity of given id is not a Dataset or Publication") + + result = \ + app_neo4j_queries.get_associated_organs_donors_samples_uuids_from_dataset(neo4j_driver_instance, + entity_dict['uuid']) + + return jsonify(result) + """ Get the complete provenance info for all datasets diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index adf26267..8f778fa2 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -670,6 +670,37 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): return results +def get_associated_organs_donors_samples_uuids_from_dataset(neo4j_driver, dataset_uuid): + """ + Return a dict of 'samples', 'organs', and 'donors' as arrays for uuids associated with the 'dataset_uuid'. + + :param neo4j_driver: + :param dataset_uuid: + :return: {samples: [...], organs: [...], donors: [...]} + """ + logger.info("======get_associated_organ_donor_sample_uuids_from_dataset()======") + + sample_query: str = \ + "MATCH (ds:Dataset)<-[*]-(s:Sample) " \ + f"WHERE ds.uuid='{dataset_uuid}' AND NOT s.sample_category = 'organ' " \ + "RETURN DISTINCT s.uuid" + organ_query: str = \ + f"MATCH (ds:Dataset)<-[*]-(o:Sample) " \ + f"WHERE ds.uuid='{dataset_uuid}' AND o.sample_category = 'organ' " \ + "RETURN DISTINCT o.uuid" + donor_query: str = \ + "MATCH (ds:Dataset)<-[*]-(d:Donor) " \ + f"WHERE ds.uuid='{dataset_uuid}' " \ + "RETURN DISTINCT d.uuid" + + results: dict = {} + with neo4j_driver.session() as session: + results['samples'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, sample_query) + results['organs'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, organ_query) + results['donors'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, donor_query) + return results + + """ Retrieve all the provenance information about each dataset. Each dataset's prov-info is given by a dictionary. Certain fields such as first sample where there can be multiple nearest datasets in the provenance above a given From cc9971340ddb1a83d8095234b43053a474c0b952 Mon Sep 17 00:00:00 2001 From: ChuckKollar Date: Mon, 5 Feb 2024 15:52:04 -0500 Subject: [PATCH 2/5] Fixed typo --- entity-api-spec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index e34b50f7..df549541 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -1902,7 +1902,7 @@ paths: items: type: string description: organ uuid - donorss: + donors: type: array items: type: string From f43b1ee5042e96ecc91c7bee19b6939cc3dd7a08 Mon Sep 17 00:00:00 2001 From: ChuckKollar Date: Wed, 7 Feb 2024 15:40:45 -0500 Subject: [PATCH 3/5] Implement suggestions from Joe --- entity-api-spec.yaml | 118 +++++++++++++++++++++++++++------------ src/app.py | 68 +++++++++++++++++++--- src/app_neo4j_queries.py | 61 +++++++++++--------- 3 files changed, 176 insertions(+), 71 deletions(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index df549541..e337a82f 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -1874,39 +1874,39 @@ paths: description: The target dataset could not be found '500': description: Internal error - '/datasets/{id}/organs-donors-samples-uuids': + '/datasets/{id}/revisions': get: - summary: Return a dict of 'samples', 'organs', and 'donors' as arrays for uuids associated with the Dataset id'. + summary: 'From a given ID of a versioned dataset, retrieve a list of every dataset in the chain ordered from most recent to oldest. The revision number, as well as the dataset uuid will be included. An optional parameter ?include_dataset=true will include the full dataset for each revision as well. Public/Consortium access rules apply, if is for a non-public dataset and no token or a token without membership in HuBMAP-Read group is sent with the request then a 403 response should be returned. If the given id is published, but later revisions are not and the user is not in HuBMAP-Read group, only published revisions will be returned. The field next_revision_uuid will not be returned if the next revision is unpublished' parameters: - name: id in: path - description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID. + description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID required: true schema: type: string + - name: include_dataset + in: query + description: A case insensitive string. Any value besides true will have no effect. If the string is 'true', the full dataset for each revision will be included in the response + required: false + schema: + type: string + enum: [ 'true', 'false' ] responses: '200': - description: Object of 'samples', 'organs', and 'donors' uuids as arrays. + description: The list of revised datasets that the referenced dataset is a member of including the index number of the revision, where 1 is the oldest version of any revision chain content: application/json: schema: type: object properties: - samples: - type: array - items: - type: string - description: sample uuid - organs: - type: array - items: - type: string - description: organ uuid - donors: - type: array - items: - type: string - description: donor uuid + uuid: + type: string + description: The uuid of a dataset + revision_number: + type: integer + description: The number in the revision chain of this dataset where 1 is the oldest revision + dataset: + $ref: '#/components/schemas/Dataset' '400': description: Invalid or misformatted entity identifier, or the given entity is not a Dataset '401': @@ -1917,9 +1917,9 @@ paths: description: The target dataset could not be found '500': description: Internal error - '/datasets/{id}/revisions': + '/datasets/{id}/organs': get: - summary: 'From a given ID of a versioned dataset, retrieve a list of every dataset in the chain ordered from most recent to oldest. The revision number, as well as the dataset uuid will be included. An optional parameter ?include_dataset=true will include the full dataset for each revision as well. Public/Consortium access rules apply, if is for a non-public dataset and no token or a token without membership in HuBMAP-Read group is sent with the request then a 403 response should be returned. If the given id is published, but later revisions are not and the user is not in HuBMAP-Read group, only published revisions will be returned. The field next_revision_uuid will not be returned if the next revision is unpublished' + summary: Retrieve a list of all of the smples that are organs that are associated with the dataset id parameters: - name: id in: path @@ -1927,29 +1927,73 @@ paths: required: true schema: type: string - - name: include_dataset - in: query - description: A case insensitive string. Any value besides true will have no effect. If the string is 'true', the full dataset for each revision will be included in the response - required: false + responses: + '200': + description: A list of entity_type == Sample with sample_category == organ associated with the dataset id + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Sample' + '400': + description: Invalid or misformatted entity identifier, or the given entity is not a Dataset + '401': + description: The user's token has expired or the user did not supply a valid token + '403': + description: The user is not authorized to query the revision number of the given dataset. + '404': + description: The target dataset could not be found + '500': + description: Internal error + '/datasets/{id}/samples': + get: + summary: Retrieve a list of all of the samples that are not organs that are associated with the dataset id + parameters: + - name: id + in: path + description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID + required: true schema: type: string - enum: [ 'true', 'false' ] responses: '200': - description: The list of revised datasets that the referenced dataset is a member of including the index number of the revision, where 1 is the oldest version of any revision chain + description: A list of entity_type == Sample with sample_category != organ associated with the dataset id content: application/json: schema: - type: object - properties: - uuid: - type: string - description: The uuid of a dataset - revision_number: - type: integer - description: The number in the revision chain of this dataset where 1 is the oldest revision - dataset: - $ref: '#/components/schemas/Dataset' + type: array + items: + $ref: '#/components/schemas/Sample' + '400': + description: Invalid or misformatted entity identifier, or the given entity is not a Dataset + '401': + description: The user's token has expired or the user did not supply a valid token + '403': + description: The user is not authorized to query the revision number of the given dataset. + '404': + description: The target dataset could not be found + '500': + description: Internal error + '/datasets/{id}/donors': + get: + summary: Retrieve a list of all of the donors that are associated with the dataset id + parameters: + - name: id + in: path + description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID + required: true + schema: + type: string + responses: + '200': + description: A list of entity_type == Donor that are associated with the dataset id + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Donor' '400': description: Invalid or misformatted entity identifier, or the given entity is not a Dataset '401': diff --git a/src/app.py b/src/app.py index ef6f7bd6..796cbc89 100644 --- a/src/app.py +++ b/src/app.py @@ -2626,8 +2626,8 @@ def get_associated_organs_from_dataset(id): return jsonify(final_result) -@app.route('/datasets//organs-donors-samples-uuids', methods=['GET']) -def get_associated_organs_donors_samples_uuids_from_dataset(id): +@app.route('/datasets//samples', methods=['GET']) +def get_associated_samples_from_dataset(id): # Token is not required, but if an invalid token provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) @@ -2638,17 +2638,71 @@ def get_associated_organs_donors_samples_uuids_from_dataset(id): # Query target entity against uuid-api and neo4j and return as a dict if exists entity_dict = query_target_entity(id, token) + normalized_entity_type = entity_dict['entity_type'] # Only for Dataset - if not schema_manager.entity_type_instanceof(entity_dict['entity_type'], 'Dataset'): + if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): bad_request_error("The entity of given id is not a Dataset or Publication") - result = \ - app_neo4j_queries.get_associated_organs_donors_samples_uuids_from_dataset(neo4j_driver_instance, - entity_dict['uuid']) + # published/public datasets don't require token + if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + # Token is required and the user must belong to HuBMAP-READ group + token = get_user_token(request, non_public_access_required=True) + + # By now, either the entity is public accessible or + # the user token has the correct access level + associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid']) + + # If there are zero items in the list associated organs, then there are no associated + # Organs and a 404 will be returned. + if len(associated_samples) < 1: + not_found_error("the dataset does not have any associated organs") + + complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples) + + # Final result after normalization + final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) - return jsonify(result) + return jsonify(final_result) + +@app.route('/datasets//donors', methods=['GET']) +def get_associated_donors_from_dataset(id): + # Token is not required, but if an invalid token provided, + # we need to tell the client with a 401 error + validate_token_if_auth_header_exists(request) + + # Use the internal token to query the target entity + # since public entities don't require user token + token = get_internal_token() + # Query target entity against uuid-api and neo4j and return as a dict if exists + entity_dict = query_target_entity(id, token) + normalized_entity_type = entity_dict['entity_type'] + + # Only for Dataset + if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): + bad_request_error("The entity of given id is not a Dataset or Publication") + + # published/public datasets don't require token + if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + # Token is required and the user must belong to HuBMAP-READ group + token = get_user_token(request, non_public_access_required=True) + + # By now, either the entity is public accessible or + # the user token has the correct access level + associated_donors = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance, entity_dict['uuid']) + + # If there are zero items in the list associated organs, then there are no associated + # Organs and a 404 will be returned. + if len(associated_donors) < 1: + not_found_error("the dataset does not have any associated organs") + + complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors) + + # Final result after normalization + final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + + return jsonify(final_result) """ Get the complete provenance info for all datasets diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 8f778fa2..0ed57f94 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -670,36 +670,43 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): return results -def get_associated_organs_donors_samples_uuids_from_dataset(neo4j_driver, dataset_uuid): - """ - Return a dict of 'samples', 'organs', and 'donors' as arrays for uuids associated with the 'dataset_uuid'. - - :param neo4j_driver: - :param dataset_uuid: - :return: {samples: [...], organs: [...], donors: [...]} - """ - logger.info("======get_associated_organ_donor_sample_uuids_from_dataset()======") - - sample_query: str = \ - "MATCH (ds:Dataset)<-[*]-(s:Sample) " \ - f"WHERE ds.uuid='{dataset_uuid}' AND NOT s.sample_category = 'organ' " \ - "RETURN DISTINCT s.uuid" - organ_query: str = \ - f"MATCH (ds:Dataset)<-[*]-(o:Sample) " \ - f"WHERE ds.uuid='{dataset_uuid}' AND o.sample_category = 'organ' " \ - "RETURN DISTINCT o.uuid" - donor_query: str = \ - "MATCH (ds:Dataset)<-[*]-(d:Donor) " \ - f"WHERE ds.uuid='{dataset_uuid}' " \ - "RETURN DISTINCT d.uuid" - - results: dict = {} +def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid): + results = [] + + # specimen_type -> sample_category 12/15/2022 + query = (f"MATCH (ds:Dataset)<-[*]-(sample:Sample) " + f"WHERE ds.uuid='{dataset_uuid}' AND NOT sample.sample_category = 'organ' " + f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}") + + logger.info("======get_associated_samples_from_dataset() query======") + logger.info(query) + with neo4j_driver.session() as session: - results['samples'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, sample_query) - results['organs'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, organ_query) - results['donors'] = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, donor_query) + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + + if record and record[record_field_name]: + results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) + return results +def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid): + results = [] + + # specimen_type -> sample_category 12/15/2022 + query = (f"MATCH (ds:Dataset)<-[*]-(donor:Donor) " + f"WHERE ds.uuid='{dataset_uuid}'" + f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}") + + logger.info("======get_associated_donors_from_dataset() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + + if record and record[record_field_name]: + results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) + + return results """ Retrieve all the provenance information about each dataset. Each dataset's prov-info is given by a dictionary. From b605982720f852ac70f5ddb10e28c9dfd7125ef5 Mon Sep 17 00:00:00 2001 From: ChuckKollar Date: Thu, 8 Feb 2024 10:58:49 -0500 Subject: [PATCH 4/5] Review comments from Joe addressed --- src/app.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/app.py b/src/app.py index 796cbc89..d942bdd6 100644 --- a/src/app.py +++ b/src/app.py @@ -2593,8 +2593,7 @@ def get_associated_organs_from_dataset(id): # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) - # Use the internal token to query the target entity - # since public entities don't require user token + # Use the internal token to query the target entity since public entities don't require user token token = get_internal_token() # Query target entity against uuid-api and neo4j and return as a dict if exists @@ -2614,7 +2613,7 @@ def get_associated_organs_from_dataset(id): # the user token has the correct access level associated_organs = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance, entity_dict['uuid']) - # If there are zero items in the list associated organs, then there are no associated + # If there are zero items in the list associated_organs, then there are no associated # Organs and a 404 will be returned. if len(associated_organs) < 1: not_found_error("the dataset does not have any associated organs") @@ -2649,14 +2648,13 @@ def get_associated_samples_from_dataset(id): # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) - # By now, either the entity is public accessible or - # the user token has the correct access level + # By now, either the entity is public accessible or the user token has the correct access level associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid']) - # If there are zero items in the list associated organs, then there are no associated - # Organs and a 404 will be returned. + # If there are zero items in the list associated_samples, then there are no associated + # samples and a 404 will be returned. if len(associated_samples) < 1: - not_found_error("the dataset does not have any associated organs") + not_found_error("the dataset does not have any associated samples") complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples) @@ -2688,14 +2686,13 @@ def get_associated_donors_from_dataset(id): # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) - # By now, either the entity is public accessible or - # the user token has the correct access level + # By now, either the entity is public accessible or the user token has the correct access level associated_donors = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance, entity_dict['uuid']) - # If there are zero items in the list associated organs, then there are no associated - # Organs and a 404 will be returned. + # If there are zero items in the list associated_donors, then there are no associated + # donors and a 404 will be returned. if len(associated_donors) < 1: - not_found_error("the dataset does not have any associated organs") + not_found_error("the dataset does not have any associated donors") complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors) From b7af3603ae061a21f1adbacbaffe031c0788284f Mon Sep 17 00:00:00 2001 From: ChuckKollar Date: Thu, 8 Feb 2024 11:02:22 -0500 Subject: [PATCH 5/5] Review comments from Joe addressed --- src/app.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/app.py b/src/app.py index d942bdd6..8a71082e 100644 --- a/src/app.py +++ b/src/app.py @@ -2625,6 +2625,21 @@ def get_associated_organs_from_dataset(id): return jsonify(final_result) +""" +Get all samples associated with a given dataset + +The gateway treats this endpoint as public accessible + +Parameters +---------- +id : str + The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity + +Returns +------- +json + a list of all the samples associated with the target dataset +""" @app.route('/datasets//samples', methods=['GET']) def get_associated_samples_from_dataset(id): # Token is not required, but if an invalid token provided, @@ -2663,6 +2678,21 @@ def get_associated_samples_from_dataset(id): return jsonify(final_result) +""" +Get all donors associated with a given dataset + +The gateway treats this endpoint as public accessible + +Parameters +---------- +id : str + The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity + +Returns +------- +json + a list of all the donors associated with the target dataset +""" @app.route('/datasets//donors', methods=['GET']) def get_associated_donors_from_dataset(id): # Token is not required, but if an invalid token provided,