-
Notifications
You must be signed in to change notification settings - Fork 0
/
llmdiscoeval.py
422 lines (349 loc) · 19.1 KB
/
llmdiscoeval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
import asyncio
import random
import json
import re
from typing import List, Tuple, Dict
from dataclasses import dataclass, field
from openai import AsyncOpenAI
import os
import logging
# Set up logging
logging.basicConfig(filename='disco_simulation.log', level=logging.INFO,
format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# Initialize the Ollama client
client = AsyncOpenAI(
base_url='http://localhost:11434/v1',
api_key='ollama', # required, but unused
)
@dataclass
class Agent:
id: int
preferred_style: str
model: str
partner_preferences: Dict[int, float] = field(default_factory=dict)
@dataclass
class DancePair:
agent1: Agent
agent2: Agent
@dataclass
class DanceResult:
pair: DancePair
routine: str
score: int
feedback: str
iteration: int
song: Dict[str, str]
class MultiAgentDisco:
def __init__(self, num_agents: int, dance_styles: List[str], models: List[str], iterations: int, stabilization_threshold: int):
self.num_agents = num_agents
self.dance_styles = dance_styles
self.models = models
self.iterations = iterations
self.stabilization_threshold = stabilization_threshold
self.agents = self.initialize_agents()
self.dance_history = []
def initialize_agents(self) -> List[Agent]:
return [Agent(id=i, preferred_style=random.choice(self.dance_styles), model=random.choice(self.models)) for i in range(self.num_agents)]
async def retry_llm_request(self, func, *args, max_retries=10):
for attempt in range(max_retries):
try:
return await func(*args)
except json.JSONDecodeError as e:
logging.error(f"JSON decode error in LLM request (attempt {attempt + 1}/{max_retries}): {e}")
except Exception as e:
logging.error(f"Error in LLM request (attempt {attempt + 1}/{max_retries}): {e}")
if attempt == max_retries - 1:
raise
async def generate_song(self, agent: Agent) -> Dict[str, str]:
prompt = "Generate a brief description for a dance song, including its title, genre, tempo, mood, and a short description."
async def _generate():
response = await client.chat.completions.create(
model=agent.model,
messages=[
{"role": "system", "content": """You are a music composer AI assistant. Your task is to generate a JSON object representing musical composition parameters including title, genre, tempo, mood, and description.
Follow these instructions to generate the JSON object:
1. Create a JSON object with five key-value pairs: "title", "genre", "tempo", "mood", and "description".
2. Use the following input variables to set the values:
<title>{{TITLE}}</title>
<genre>{{GENRE}}</genre>
<tempo>{{TEMPO}}</tempo>
<mood>{{MOOD}}</mood>
<description>{{DESCRIPTION}}</description>
3. If any of the input variables are empty or not provided, use the following default values:
- For title: "Untitled"
- For genre: "Default"
- For tempo: "Moderate"
- For mood: "Neutral"
- For description: "A generic dance song"
4. Ensure that all values are strings, even if they contain numbers.
5. Format your response as a valid JSON object, enclosed in <response> tags.
Here are examples of valid responses:
<response>
{"title": "Electric Dreams", "genre": "Electronic", "tempo": "Fast", "mood": "Energetic", "description": "A pulsating electronic dance track with synthesizer melodies and a driving beat"}
</response>
<response>
{"title": "Moonlight Serenade", "genre": "Classical", "tempo": "Slow", "mood": "Melancholic", "description": "A gentle classical piece featuring soft piano and string arrangements"}
</response>
Remember to use the provided input variables if they are available, and only use the default values when necessary."""},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
try:
song = await self.retry_llm_request(_generate)
# Ensure all required keys are present
return {
"title": song.get("title", "Untitled"),
"genre": song.get("genre", "Default"),
"tempo": song.get("tempo", "Moderate"),
"mood": song.get("mood", "Neutral"),
"description": song.get("description", "A generic dance song")
}
except Exception as e:
logging.error(f"Failed to generate song after all retries: {e}")
return {
"title": "Untitled",
"genre": "Default",
"tempo": "Moderate",
"mood": "Neutral",
"description": "A generic dance song"
}
def select_partners(self) -> List[DancePair]:
available_agents = self.agents.copy()
pairs = []
while len(available_agents) >= 2:
agent1 = random.choice(available_agents)
available_agents.remove(agent1)
agent2 = max(available_agents, key=lambda a: agent1.partner_preferences.get(a.id, 0))
available_agents.remove(agent2)
pairs.append(DancePair(agent1, agent2))
return pairs
async def collaborate_on_dance(self, pair: DancePair, song: Dict[str, str]) -> str:
routine = []
dancers = [pair.agent1, pair.agent2]
async def _generate_move(dancer, turn):
prompt = f"""
Song: {json.dumps(song)}
Dancer {dancer.id} ({dancer.preferred_style}) is performing.
Previous moves: {' '.join(routine)}
Describe the next dance move:
"""
response = await client.chat.completions.create(
model=dancer.model,
messages=[
{"role": "system", "content": """You are an AI assistant tasked with generating a dance move for a dancer in response to a song and previous moves. Your goal is to create a cohesive and engaging dance routine by suggesting the next appropriate move.
Here's the information about the song:
<song>
{{SONG}}
</song>
Now, let's focus on the dancer and their previous moves:
Dancer ID: {{DANCER_ID}}
Preferred dance style: {{PREFERRED_STYLE}}
Previous moves: {{PREVIOUS_MOVES}}
To generate the next dance move, follow these steps:
1. Consider the song's rhythm, tempo, and mood.
2. Take into account the dancer's preferred style.
3. Analyze the previous moves to ensure continuity and flow in the routine.
4. Create a dance move that complements the existing routine and fits the song.
Your response should be a JSON object containing a single dance move. The move should be descriptive but concise, focusing on the key elements of the movement.
Format your response as follows:
<response>
{
"move": "Your dance move description here"
}
</response>
Remember to make the move appropriate for the song, the dancer's style, and the flow of the routine. Be creative while maintaining coherence with the previous moves."""},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
return result.get("move", "Unspecified move")
for turn in range(4): # 4 turns for a back-and-forth routine
dancer = dancers[turn % 2]
try:
move = await self.retry_llm_request(_generate_move, dancer, turn)
routine.append(f"Dancer {dancer.id}: {move}")
except Exception as e:
logging.error(f"Failed to generate move after all retries for Dancer {dancer.id} on turn {turn}: {e}")
routine.append(f"Dancer {dancer.id}: [Move generation failed]")
return "\n".join(routine)
async def judge_dance(self, routine: str, agent: Agent) -> Tuple[int, str]:
prompt = f"Judge this dance routine on a scale of 1 to 10 (use only integers) and provide brief feedback:\n{routine}"
async def _judge():
response = await client.chat.completions.create(
model=agent.model,
messages=[
{"role": "system", "content": """You are a professional dance judge tasked with evaluating a dance routine. Your goal is to provide a fair and constructive assessment of the performance.
Here is the dance routine you need to judge:
<routine>
{{ROUTINE}}
</routine>
Carefully evaluate the routine, considering factors such as technique, creativity, musicality, and overall performance quality. Pay attention to both strengths and areas for improvement.
Before scoring, think about your assessment and prepare your feedback. Consider specific elements of the performance that stood out, both positively and negatively. Your feedback should be constructive, highlighting what was done well and suggesting areas for improvement.
After formulating your thoughts, assign a score to the routine on a scale of 1 to 10, using only integer values. A score of 1 represents a poor performance, while 10 represents an exceptional, flawless performance.
Provide your evaluation in the form of a JSON object with two keys: "score" and "feedback". The "score" should be an integer between 1 and 10, and the "feedback" should be a string containing your brief, constructive feedback about the routine.
Your response should be structured as follows:
<answer>
{
"score": [Your integer score between 1 and 10],
"feedback": "[Your brief, constructive feedback here]"
}
</answer>
Ensure that your JSON is properly formatted and that the "score" is an integer, not a string or float."""},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
score = int(result.get("score", 5)) # Ensure score is an integer
feedback = result.get("feedback", "No feedback provided")
return score, feedback
try:
score, feedback = await self.retry_llm_request(_judge)
return score, feedback.strip()
except Exception as e:
logging.error(f"Error in judge_dance: {e}")
return 5, "Default feedback due to judging error."
def update_preferences(self, results: List[DanceResult]):
for result in results:
a1, a2 = result.pair.agent1, result.pair.agent2
a1.partner_preferences[a2.id] = a1.partner_preferences.get(a2.id, 0) + result.score
a2.partner_preferences[a1.id] = a2.partner_preferences.get(a1.id, 0) + result.score
def calculate_highest_ranked_pairings(self) -> List[Tuple[Agent, Agent, float]]:
all_pairings = []
for agent in self.agents:
for partner_id, score in agent.partner_preferences.items():
partner = next(a for a in self.agents if a.id == partner_id)
mutual_score = score + partner.partner_preferences.get(agent.id, 0)
all_pairings.append((agent, partner, mutual_score))
sorted_pairings = sorted(all_pairings, key=lambda x: x[2], reverse=True)
unique_pairings = []
seen = set()
for agent1, agent2, score in sorted_pairings:
pair = tuple(sorted([agent1.id, agent2.id]))
if pair not in seen:
unique_pairings.append((agent1, agent2, score))
seen.add(pair)
return unique_pairings[:min(len(unique_pairings), 3)] # Return top 3 unique pairings
async def run(self):
logging.info("Starting Multi-Agent Disco simulation")
stable_rounds = 0
previous_pairs = []
for iteration in range(self.iterations):
logging.info(f"Starting iteration {iteration + 1}")
print(f"\n--- Iteration {iteration + 1} ---")
song = await self.generate_song(random.choice(self.agents))
print(f"Song: {json.dumps(song, indent=2)}")
logging.info(f"Generated song: {json.dumps(song)}")
pairs = self.select_partners()
print("Dance Pairs:")
for pair in pairs:
print(f" Agent {pair.agent1.id} ({pair.agent1.preferred_style}, {pair.agent1.model}) with Agent {pair.agent2.id} ({pair.agent2.preferred_style}, {pair.agent2.model})")
logging.info(f"Paired Agent {pair.agent1.id} with Agent {pair.agent2.id}")
if pairs == previous_pairs:
stable_rounds += 1
else:
stable_rounds = 0
previous_pairs = pairs
results = []
for pair in pairs:
print(f"\nDance Routine for Agent {pair.agent1.id} and Agent {pair.agent2.id}:")
routine = await self.collaborate_on_dance(pair, song)
print(routine)
score, feedback = await self.judge_dance(routine, random.choice(self.agents))
result = DanceResult(pair, routine, score, feedback, iteration + 1, song)
results.append(result)
self.dance_history.append(result)
print(f"Score: {score}")
print(f"Feedback: {feedback}")
logging.info(f"Dance result - Score: {score}, Feedback: {feedback}")
self.update_preferences(results)
if stable_rounds >= self.stabilization_threshold:
print(f"\nPartner selection stabilized after {iteration + 1} iterations.")
logging.info(f"Partner selection stabilized after {iteration + 1} iterations.")
break
self.generate_final_report()
self.save_dance_history()
print("\nHighest-Ranked Pairings:")
top_pairings = self.calculate_highest_ranked_pairings()
for i, (agent1, agent2, score) in enumerate(top_pairings, 1):
print(f"{i}. Agent {agent1.id} ({agent1.preferred_style}, {agent1.model}) and Agent {agent2.id} ({agent2.preferred_style}, {agent2.model})")
print(f" Mutual Score: {score}")
def generate_final_report(self):
report = "Final Agent Preferences and Best Performances:\n\n"
sorted_agents = sorted(self.agents, key=lambda a: max(a.partner_preferences.values(), default=0), reverse=True)
for agent in sorted_agents:
report += f"Agent {agent.id} ({agent.preferred_style}, {agent.model}):\n"
sorted_preferences = sorted(agent.partner_preferences.items(), key=lambda x: x[1], reverse=True)
for partner_id, preference in sorted_preferences:
partner = next(a for a in self.agents if a.id == partner_id)
report += f" Partner: Agent {partner_id} ({partner.preferred_style}, {partner.model}), Score: {preference}\n"
best_performance = max(
(result for result in self.dance_history if result.pair.agent1.id == agent.id or result.pair.agent2.id == agent.id),
key=lambda r: r.score,
default=None
)
if best_performance:
report += f" Best Performance:\n"
report += f" Partner: Agent {best_performance.pair.agent2.id if best_performance.pair.agent1.id == agent.id else best_performance.pair.agent1.id}\n"
report += f" Score: {best_performance.score}\n"
report += f" Routine:\n{best_performance.routine}\n"
report += "\n"
report += "Highest-Ranked Pairings:\n\n"
top_pairings = self.calculate_highest_ranked_pairings()
for i, (agent1, agent2, score) in enumerate(top_pairings, 1):
report += f"{i}. Agent {agent1.id} ({agent1.preferred_style}, {agent1.model}) and Agent {agent2.id} ({agent2.preferred_style}, {agent2.model})\n"
report += f" Mutual Score: {score}\n\n"
with open("final_report.txt", "w") as f:
f.write(report)
logging.info("Final report generated and saved as final_report.txt")
def save_dance_history(self):
markdown = "# Dance History\n\n"
for i, result in enumerate(self.dance_history, 1):
markdown += f"## Dance {i}\n\n"
markdown += f"### Iteration\n{result.iteration}\n\n"
markdown += f"### Song\n"
markdown += f"Title: {result.song.get('title', 'Unknown')}\n"
markdown += f"Genre: {result.song.get('genre', 'Unknown')}\n"
markdown += f"Tempo: {result.song.get('tempo', 'Unknown')}\n"
markdown += f"Mood: {result.song.get('mood', 'Unknown')}\n"
markdown += f"Description: {result.song.get('description', 'No description available')}\n\n"
markdown += f"### Pair\n"
markdown += f"- Agent {result.pair.agent1.id} ({result.pair.agent1.preferred_style}, {result.pair.agent1.model})\n"
markdown += f"- Agent {result.pair.agent2.id} ({result.pair.agent2.preferred_style}, {result.pair.agent2.model})\n\n"
markdown += f"### Routine\n{result.routine}\n\n"
markdown += f"### Score\n{result.score}\n\n"
markdown += f"### Feedback\n{result.feedback}\n\n"
markdown += "---\n\n"
# Adding Highest-Ranked Pairings
markdown += "# Highest-Ranked Pairings\n\n"
top_pairings = self.calculate_highest_ranked_pairings()
for i, (agent1, agent2, score) in enumerate(top_pairings, 1):
markdown += f"{i}. Agent {agent1.id} ({agent1.preferred_style}, {agent1.model}) and Agent {agent2.id} ({agent2.preferred_style}, {agent2.model})\n"
markdown += f" Mutual Score: {score}\n\n"
# Finding and adding the highest-scored routine
if self.dance_history:
highest_scored_routine = max(self.dance_history, key=lambda x: x.score)
markdown += "# Highest-Ranked Overall Routine\n\n"
markdown += f"## Score: {highest_scored_routine.score}\n\n"
markdown += f"### Pair\n"
markdown += f"- Agent {highest_scored_routine.pair.agent1.id} ({highest_scored_routine.pair.agent1.preferred_style}, {highest_scored_routine.pair.agent1.model})\n"
markdown += f"- Agent {highest_scored_routine.pair.agent2.id} ({highest_scored_routine.pair.agent2.preferred_style}, {highest_scored_routine.pair.agent2.model})\n\n"
markdown += f"### Routine\n{highest_scored_routine.routine}\n\n"
markdown += f"### Feedback\n{highest_scored_routine.feedback}\n\n"
with open("dance_history.md", "w") as f:
f.write(markdown)
logging.info("Dance history saved as dance_history.md")
async def main():
framework = MultiAgentDisco(
num_agents=6,
dance_styles=["Salsa", "Tango", "Waltz", "Hip-hop", "Contemporary"],
models=["vanilj/hermes-3-llama-3.1-8b:latest", "mistral:v0.3", "llama3.1:latest"], # List of available models
iterations=10,
stabilization_threshold=3
)
await framework.run()
if __name__ == "__main__":
asyncio.run(main())