Merge pull request #52 from DARPA-ASKEM/4790-bug-app-fails-to-extract…

…-configurations-from-timeseries-datasets-1 4790 bug app fails to extract configurations from timeseries datasets 1
DARPA-ASKEM · Sep 17, 2024 · eb7617d · eb7617d
2 parents b5e9a5e + a82b6c7
commit eb7617d
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 12 deletions.
diff --git a/gollm/entities.py b/gollm/entities.py
@@ -11,8 +11,8 @@ class ConfigureModelDocument(BaseModel):
 
 class ConfigureModelDataset(BaseModel):
     dataset: List[str]
-    matrix: str
     amr: str  # expects AMR in a stringified JSON object
+    matrix: str = None
 
 
 class ModelCardModel(BaseModel):

diff --git a/gollm/openai/prompts/model_meta_compare.py b/gollm/openai/prompts/model_meta_compare.py
@@ -1,13 +1,18 @@
 MODEL_METADATA_COMPARE_PROMPT = """
 You are a helpful agent designed to compare multiple AMR models.
 
-Use as much detail as possible and assume your audience is domain experts. When you mention bias and limitations, provide detailed examples. Do not repeat the model card schema headers. Do not refer to 'gollmCard' in your response, refer to 'gollmCard metadata' as 'metadata'. Format the response in Markdown and include section headers.
+Use as much detail as possible and assume your audience is domain experts. Use the following to decide how to compare the AMR models:
+ - If all the AMR models contain metadata, focus solely on comparing metadata information.
+ - If some but not all of the AMR models contain metadata, compare both metadata and semantic information together.
+ - If none of the AMR models contain metadata, only focus on comparing semantic information.
 
-If all the AMR models contain gollmCard metadata, focus solely on comparing gollmCard information.
+Avoid making assumptions about the AMR models to maintain an objective perspective.
 
-If some but not all of the AMR models contain gollmCard metadata, compare headers, gollmCard, and semantic information together.
+If you mention bias and limitations, provide detailed examples.
+Do not repeat the metadata schema headers.
+Do not use 'gollmCard' in your response, refer to it as 'metadata'.
 
-If none of the AMR models contain gollmCard metadata, only focus on comparing headers and semantic information. Avoid making assumptions about the AMR models to maintain an objective perspective.
+Format the response in Markdown and include section headers.
 
 AMRs:
 

diff --git a/gollm/openai/tool_utils.py b/gollm/openai/tool_utils.py
@@ -106,7 +106,7 @@ def amr_enrichment_chain(amr: str, research_paper: str) -> dict:
     return postprocess_oai_json(output.choices[0].message.content)
 
 
-def model_card_chain(amr: str = None, research_paper: str = None) -> dict:
+def model_card_chain(amr: str, research_paper: str = None) -> dict:
     print("Creating model card...")
     assert amr, "An AMR model must be provided."
     if not research_paper:
@@ -199,7 +199,7 @@ def embedding_chain(text: str) -> List:
     return output.data[0].embedding
 
 
-def model_config_from_dataset(amr: str, dataset: List[str]) -> str:
+def model_config_from_dataset(amr: str, dataset: List[str], matrix: str) -> str:
     print("Extracting datasets...")
     dataset_text = os.linesep.join(dataset)
 
@@ -211,11 +211,15 @@ def model_config_from_dataset(amr: str, dataset: List[str]) -> str:
 
     print("Building prompt to extract model configurations from a dataset...")
     prompt = (CONFIGURE_FROM_DATASET_PROMPT
-              + CONFIGURE_FROM_DATASET_MAPPING_PROMPT
-              + CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT
-              + CONFIGURE_FROM_DATASET_AMR_PROMPT.format(amr=amr)
-              + CONFIGURE_FROM_DATASET_DATASET_PROMPT.format(data=dataset_text)
-              + "Answer:")
+        + CONFIGURE_FROM_DATASET_MAPPING_PROMPT
+        + CONFIGURE_FROM_DATASET_TIMESERIES_PROMPT
+        + CONFIGURE_FROM_DATASET_AMR_PROMPT.format(amr=amr)
+        + CONFIGURE_FROM_DATASET_DATASET_PROMPT.format(data=dataset_text))
+
+    if matrix:
+        prompt += CONFIGURE_FROM_DATASET_MATRIX_PROMPT.format(matrix=matrix)
+
+    prompt += "Answer:"
 
     print("Sending request to OpenAI API...")
     client = OpenAI()