Merge pull request #50 from DARPA-ASKEM/4513-feat-model-enrichment-flow

4513 feat model enrichment flow
DARPA-ASKEM · Sep 12, 2024 · 1a461d3 · 1a461d3
2 parents 530980f + b4427bd
commit 1a461d3
Show file tree

Hide file tree

Showing 11 changed files with 335 additions and 3,446 deletions.
diff --git a/api/run.py b/api/run.py
@@ -1,7 +1,7 @@
 from contextlib import contextmanager
 from fastapi import FastAPI, HTTPException
 import json
-from gollm.openai.tool_utils import model_config_from_document, amodel_card_chain
+from gollm.openai.tool_utils import model_config_from_document, model_card_chain
 from gollm.entities import ConfigureModelDocument, ModelCardModel
 
 app = FastAPI()
@@ -35,8 +35,10 @@ async def configure_model_from_document(input_model: ConfigureModelDocument):
 @app.post("/model_card")
 async def model_card(input_model: ModelCardModel):
     try:
+        amr = input_model.amr
         research_paper = input_model.research_paper
-        response = await amodel_card_chain(
+        response = await model_card_chain(
+            amr=amr,
             research_paper=research_paper
         )  # Use await here
         response = {"response": response}

diff --git a/gollm/entities.py b/gollm/entities.py
@@ -11,10 +11,12 @@ class ConfigureModelDocument(BaseModel):
 
 class ConfigureModelDataset(BaseModel):
     dataset: List[str]
+    matrix: str
     amr: str  # expects AMR in a stringified JSON object
 
 
 class ModelCardModel(BaseModel):
+    amr: str  # expects AMR in a stringified JSON object
     research_paper: str
 
 

diff --git a/gollm/openai/prompts/config_from_dataset.py b/gollm/openai/prompts/config_from_dataset.py
@@ -1,9 +1,9 @@
 CONFIGURE_FROM_DATASET_PROMPT = """
-You are a helpful agent designed to create a model configuration for a given AMR model from a set of user-supplied CSV datasets.
+You are a helpful agent designed to create a model configuration for a given AMR model from a user-supplied CSV dataset.
 
 Create a condition for each dataset.
 
-The user-supplied datasets may include both time-series datasets and model-mapping datasets.
+The user-supplied dataset may be either a time-series dataset or a model-mapping dataset.
 One of your key tasks is to determine the type of dataset supplied. This can be done by examining the column headers in the first row and the values in the first column of the user-supplied CSV dataset.
     - Model-mapping datasets have the first row and column containing labels and the rest containing numerical values. Often, the first cell in the CSV is empty.
     - Time-series datasets usually have the first row as labels and a column representing sequential time steps. You can use the column headers to determine which column represents time steps. If the dataset does not have header information, look for columns with date strings or incrementally increasing timestamps or numbers. The other columns will represent the values of the AMR model's states.
@@ -22,19 +22,16 @@
 
 ---EXAMPLE START---
 
----MODEL MAPPING START---
+---MATRIX START---
 
-...
 subject-controllers of f
 
-, S_1, S_2, S_3
-I_1, f_0, f_1, f_2
-I_2, f_4, f_3, f_5
-I_3, f_7, f_8, f_6
-
-...
+, I_1, I_2, I_3
+S_1, f_0, f_1, f_2
+S_2, f_4, f_3, f_5
+S_3, f_7, f_8, f_6
 
----MODEL MAPPING END---
+---MATRIX END---
 
 ---SAMPLE DATASET START---
 
@@ -45,7 +42,7 @@
 
 ---SAMPLE DATASET END---
 
-Since the subject controller of f_0 is I_1, S_1, we want to map the value from the dataset cell S_1, I_1 to f_0 which will be 38.6.
+Since the subject of f_0 is S_1 and the controller of f_0 is I_1. We want to map the value from the dataset cell S_1, I_1 to f_0 which will be 38.6.
 
 Based on this information, we do not know the initial values for I_1 and S_1. Do not misinterpret these interaction values as initials.
 
@@ -90,12 +87,13 @@
 """
 
 CONFIGURE_FROM_DATASET_DATASET_PROMPT = """
-Use the following user-supplied datasets to answer the query:
+Use the following user-supplied dataset to answer the query:
 
----START USER-SUPPLIED CSV DATASETS---
+---START USER-SUPPLIED CSV DATASET---
 
 {data}
----END USER-SUPPLIED CSV DATASETS---
+
+---END USER-SUPPLIED CSV DATASET---
 
 """
 
@@ -109,3 +107,14 @@
 ---END AMR MODEL JSON---
 
 """
+
+CONFIGURE_FROM_DATASET_MATRIX_PROMPT = """
+Use the following contact matrix as a reference for model-mapping datasets:
+
+---START MATRIX---
+
+{matrix}
+
+---END MATRIX---
+
+"""
diff --git a/gollm/openai/prompts/config_from_document.py b/gollm/openai/prompts/config_from_document.py
@@ -1,5 +1,6 @@
 CONFIGURE_FROM_DOCUMENT_PROMPT = """
-You are a helpful agent designed to find multiple model configurations for a given AMR model of various conditions described in a research paper.
+You are a helpful agent designed to find multiple model configurations for a given AMR model of various conditions described in a research paper and the initials and parameters that make up those conditions.
+For context, initials represent the initial state of the system through the initial distribution of tokens across the places, known as the initial marking. Each place corresponds to a variable or state in the system, such as a species concentration in a reaction, and the number of tokens reflects the initial conditions of the ODEs. Parameters define the system's rules and rates of evolution, including transition rates (analogous to reaction rates in ODEs) that determine how quickly tokens move between places. These parameters also include stoichiometric relationships, represented by the weights of arcs connecting places and transitions, dictating how many tokens are consumed or produced when a transition occurs.
 
 Use the following AMR model JSON file as a reference:
 

diff --git a/gollm/openai/prompts/model_card.py b/gollm/openai/prompts/model_card.py
@@ -1,55 +1,24 @@
-MODEL_CARD_TEMPLATE = """
-{
-  "ModelName": {
-    "model_summary": "A brief description of the system or process."
-  },
-  "ModelDetails": {
-    "model_description": "Describe the structure of the model in the paper, including its places, transitions, and arcs. Mention if it can likely be represented in petrinet format.",
-    "FundedBy": "If applicable, list the funding sources.",
-    "ModelType": "Mathematical / Graphical Model / Other"
-  },
-  "Uses": {
-    "DirectUse": "Explain how the model can be used to analyze or simulate specific systems or processes.",
-    "OutOfScopeUse": "Describe scenarios where using the model would be inappropriate or misleading."
-  },
-  "BiasRisksLimitations": {
-    "bias_risks_limitations": "Describe sources of bias and risk based on the research paper"
-  },
-  "Evaluation": {
-    "TestingDataFactorsMetrics": "Describe how the model was validated, e.g., through simulation, comparison with real-world data, etc."
-  },
-  "TechnicalSpecifications": {
-    "model_specs": "Details about the model's complexity, such as the number of places, transitions, parameter count, and arcs."
-  },
-  "Glossary": {
-    "terms": ["Str"]
-  },
-  "ModelCardAuthors": ["Str"],
-  "HowToGetStartedWithTheModel": {
-    "examples": ["Str"]
-  },
-  "Citation": {
-    "references": ["Str"]
-  },
-  "MoreInformation": {
-    "links": ["Str"]
-  },
-  "StructuralInformation": {
-    "schema_name": ["Str"],
-    "parameter_names": ["Str"],
-    "domain": ["Str"],
-    "model_type": ["Str"],
-    "model_structure": ["Str"],
-    "model_parameters": ["Str"]
-  }
-}
-"""
-
 INSTRUCTIONS = """
-You are a helpful agent designed to populate a model card containing metadata about a given research paper and its associated model. You may have access to a research paper and a model configuration file. Structural information should come from the model configuration file. \n
-Model configurations may be in the form of a petri net, stock and flow, regnet, or other model formats.
-You may only have access to either a research paper or model configuration. Do your best to populate the model card with as much information as possible. If you cannot answer the entire query, provide as much information as possible. If there is no answer, use the string "null" as a placeholder. \n
-Use the following research paper as a reference: ---PAPER START---{research_paper}---PAPER END--. ---MODEL START-- {amr} ---MODEL END--- Ensure that the output follows the below model card format.\n
-TEMPLATE: {model_card_template}\n
-Make sure that the following text can be serialized as a JSON object. DO NOT USE ```json``` in the model card header within the json object:\n
-{{\n """
+You are a helpful agent designed to populate metadata of a given AMR model.
+
+You may have access to a document that describes the given AMR model and a JSON representation of the AMR model we want populated. Structural information should come from the AMR model.
+
+You may only have access to the model. Do your best to populate the JSON object specified in the response format with as much information as possible.
+If you cannot answer the entire query, provide as much information as possible. If there is no answer, populate fields with a null values. Do not leave any fields empty and do not make up information.
+
+Use the following document as a reference:
+
+---DOCUMENT START---
+{research_paper}
+---DOCUMENT END--
+
+Use the following JSON representation of an AMR model as a reference:
+
+---MODEL START--
+{amr}
+---MODEL END---
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
+"""