Skip to content

Commit

Permalink
Merge pull request #45 from DARPA-ASKEM/4400-bug-compare-model-genera…
Browse files Browse the repository at this point in the history
…tes-nonsensical-summary-of-two-models

4400 bug compare model generates nonsensical summary of two models
  • Loading branch information
dgauldie committed Aug 13, 2024
2 parents e8ea1cb + 0d271e2 commit d9b8b5a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 6 deletions.
2 changes: 1 addition & 1 deletion gollm/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ModelCardModel(BaseModel):


class ModelCompareModel(BaseModel):
cards: List # model cards
amrs: List[str] # expects AMRs to be a stringified JSON object


class EmbeddingModel(BaseModel):
Expand Down
17 changes: 16 additions & 1 deletion gollm/openai/prompts/model_meta_compare.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
MODEL_METADATA_COMPARE_PROMPT = """
You are a helpful agent designed to compare the metadata of multiple models. Use as much detail as possible and assume that your audience is domain experts. When you mention bias and limitations, provide detailed examples. Do not repeat the model card schema headers. You have access to the model cards.\n{model_cards}\nComparison:
You are a helpful agent designed to compare multiple AMR models.
Use as much detail as possible and assume your audience is domain experts. When you mention bias and limitations, provide detailed examples. Do not repeat the model card schema headers. Do not refer to 'gollmCard' in your response, refer to 'gollmCard metadata' as 'metadata'. Format the response in Markdown and include section headers.
If all the AMR models contain gollmCard metadata, focus solely on comparing gollmCard information.
If some but not all of the AMR models contain gollmCard metadata, compare headers, gollmCard, and semantic information together.
If none of the AMR models contain gollmCard metadata, only focus on comparing headers and semantic information. Avoid making assumptions about the AMR models to maintain an objective perspective.
AMRs:
{amrs}
Comparison:
"""
12 changes: 8 additions & 4 deletions gollm/openai/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,23 @@ def config_from_dataset(amr: str, model_mapping: str, datasets: List[str]) -> st
return postprocess_oai_json(output.choices[0].message.content)


def compare_models(model_cards: List[str]) -> str:
def compare_models(amrs: List[str]) -> str:
print("Comparing models...")

joined_escaped_amrs = "\n------\n".join([escape_curly_braces(amr) for amr in amrs])
prompt = MODEL_METADATA_COMPARE_PROMPT.format(
model_cards="--------".join(model_cards)
amrs=joined_escaped_amrs
)

client = OpenAI()
output = client.chat.completions.create(
model="gpt-4o-2024-05-13",
model="gpt-4o-mini",
top_p=1,
frequency_penalty=0,
presence_penalty=0,
seed=123,
temperature=0,
max_tokens=1024,
max_tokens=2048,
messages=[
{"role": "user", "content": prompt},
],
Expand Down

0 comments on commit d9b8b5a

Please sign in to comment.