Merge pull request #619 from hubmapconsortium/dataset_sample_and_dono…

…r_endpoints Dataset sample and donor endpoints
hubmapconsortium · Feb 12, 2024 · 3257c88 · 3257c88
2 parents 59dc58a + b7af360
commit 3257c88
Show file tree

Hide file tree

Showing 3 changed files with 232 additions and 3 deletions.
diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml
@@ -1962,6 +1962,93 @@ paths:
           description: The target dataset could not be found
         '500':
           description: Internal error
+  '/datasets/{id}/organs':
+    get:
+      summary: Retrieve a list of all of the smples that are organs that are associated with the dataset id
+      parameters:
+        - name: id
+          in: path
+          description: The unique identifier of entity.  This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: A list of entity_type == Sample with sample_category == organ associated with the dataset id
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/Sample'
+        '400':
+          description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
+        '401':
+          description: The user's token has expired or the user did not supply a valid token
+        '403':
+          description: The user is not authorized to query the revision number of the given dataset.
+        '404':
+          description: The target dataset could not be found
+        '500':
+          description: Internal error
+  '/datasets/{id}/samples':
+    get:
+      summary: Retrieve a list of all of the samples that are not organs that are associated with the dataset id
+      parameters:
+        - name: id
+          in: path
+          description: The unique identifier of entity.  This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: A list of entity_type == Sample with sample_category != organ associated with the dataset id
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/Sample'
+        '400':
+          description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
+        '401':
+          description: The user's token has expired or the user did not supply a valid token
+        '403':
+          description: The user is not authorized to query the revision number of the given dataset.
+        '404':
+          description: The target dataset could not be found
+        '500':
+          description: Internal error
+  '/datasets/{id}/donors':
+    get:
+      summary: Retrieve a list of all of the donors that are associated with the dataset id
+      parameters:
+        - name: id
+          in: path
+          description: The unique identifier of entity.  This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: A list of entity_type == Donor that are associated with the dataset id
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/Donor'
+        '400':
+          description: Invalid or misformatted entity identifier, or the given entity is not a Dataset
+        '401':
+          description: The user's token has expired or the user did not supply a valid token
+        '403':
+          description: The user is not authorized to query the revision number of the given dataset.
+        '404':
+          description: The target dataset could not be found
+        '500':
+          description: Internal error
   '/datasets/{id}/retract':
     put:
       summary: 'Retracts a dataset after it has been published. Requires a json body with a single field {retraction_reason: string}. The dataset for the given id is modified to include this new retraction_reason field and sets the dataset property sub_status to Retracted. The complete modified dataset is returned. Requires that the dataset being retracted has already been published (dataset.status == Published. Requires a user token with membership in the HuBMAP-Data-Admin group otherwise then a 403 will be returned.'

diff --git a/src/app.py b/src/app.py
@@ -2704,8 +2704,7 @@ def get_associated_organs_from_dataset(id):
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
 
-    # Use the internal token to query the target entity
-    # since public entities don't require user token
+    # Use the internal token to query the target entity since public entities don't require user token
     token = get_internal_token()
 
     # Query target entity against uuid-api and neo4j and return as a dict if exists
@@ -2725,7 +2724,7 @@ def get_associated_organs_from_dataset(id):
     # the user token has the correct access level
     associated_organs = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
 
-    # If there are zero items in the list associated organs, then there are no associated
+    # If there are zero items in the list associated_organs, then there are no associated
     # Organs and a 404 will be returned.
     if len(associated_organs) < 1:
         not_found_error("the dataset does not have any associated organs")
@@ -2737,6 +2736,111 @@ def get_associated_organs_from_dataset(id):
 
     return jsonify(final_result)
 
+"""
+Get all samples associated with a given dataset
+
+The gateway treats this endpoint as public accessible
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
+
+Returns
+-------
+json
+    a list of all the samples associated with the target dataset
+"""
+@app.route('/datasets/<id>/samples', methods=['GET'])
+def get_associated_samples_from_dataset(id):
+    # Token is not required, but if an invalid token provided,
+    # we need to tell the client with a 401 error
+    validate_token_if_auth_header_exists(request)
+
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
+
+    # Query target entity against uuid-api and neo4j and return as a dict if exists
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+
+    # Only for Dataset
+    if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+        bad_request_error("The entity of given id is not a Dataset or Publication")
+
+    # published/public datasets don't require token
+    if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+        # Token is required and the user must belong to HuBMAP-READ group
+        token = get_user_token(request, non_public_access_required=True)
+
+    # By now, either the entity is public accessible or the user token has the correct access level
+    associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
+
+    # If there are zero items in the list associated_samples, then there are no associated
+    # samples and a 404 will be returned.
+    if len(associated_samples) < 1:
+        not_found_error("the dataset does not have any associated samples")
+
+    complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples)
+
+    # Final result after normalization
+    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+
+    return jsonify(final_result)
+
+"""
+Get all donors associated with a given dataset
+
+The gateway treats this endpoint as public accessible
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
+
+Returns
+-------
+json
+    a list of all the donors associated with the target dataset
+"""
+@app.route('/datasets/<id>/donors', methods=['GET'])
+def get_associated_donors_from_dataset(id):
+    # Token is not required, but if an invalid token provided,
+    # we need to tell the client with a 401 error
+    validate_token_if_auth_header_exists(request)
+
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
+
+    # Query target entity against uuid-api and neo4j and return as a dict if exists
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+
+    # Only for Dataset
+    if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+        bad_request_error("The entity of given id is not a Dataset or Publication")
+
+    # published/public datasets don't require token
+    if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+        # Token is required and the user must belong to HuBMAP-READ group
+        token = get_user_token(request, non_public_access_required=True)
+
+    # By now, either the entity is public accessible or the user token has the correct access level
+    associated_donors = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
+
+    # If there are zero items in the list associated_donors, then there are no associated
+    # donors and a 404 will be returned.
+    if len(associated_donors) < 1:
+        not_found_error("the dataset does not have any associated donors")
+
+    complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors)
+
+    # Final result after normalization
+    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+
+    return jsonify(final_result)
 
 """
 Get the complete provenance info for all datasets

diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
@@ -670,6 +670,44 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):
 
     return results
 
+def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid):
+    results = []
+
+    # specimen_type -> sample_category 12/15/2022
+    query = (f"MATCH (ds:Dataset)<-[*]-(sample:Sample) "
+             f"WHERE ds.uuid='{dataset_uuid}' AND NOT sample.sample_category = 'organ' "
+             f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}")
+
+    logger.info("======get_associated_samples_from_dataset() query======")
+    logger.info(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
+
+    return results
+
+def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid):
+    results = []
+
+    # specimen_type -> sample_category 12/15/2022
+    query = (f"MATCH (ds:Dataset)<-[*]-(donor:Donor) "
+             f"WHERE ds.uuid='{dataset_uuid}'"
+             f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}")
+
+    logger.info("======get_associated_donors_from_dataset() query======")
+    logger.info(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
+
+    return results
+
 """
 Retrieve all the provenance information about each dataset. Each dataset's prov-info is given by a dictionary. 
 Certain fields such as first sample where there can be multiple nearest datasets in the provenance above a given