-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
74 lines (54 loc) · 2.19 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import argparse,time,os,pickle
import numpy as np
import matplotlib.pyplot as plt
plt.switch_backend('agg')
def find_latest(prefix, suffix):
i = 0
while os.path.exists(f'{prefix}{i}{suffix}'):
i += 1
return i
class logger:
def __init__(self, log_dir, env, num_iterations, samples=None):
self.log_dir = log_dir
self.env = env
self.hist_action_freq = np.zeros( (env.num_players, env.num_actions) )
self.num_iterations = num_iterations
self.samples = self.num_iterations if not samples else samples
self.step_size = self.num_iterations // self.samples
### store the selected action by each player sampled at every step_size round
self.sampled_action = np.zeros( (env.num_players, self.samples) )
### store action probability of each player sampled at every step_size round
self.belief_history = []
### store the frequency of the selected actions by each player from from round i*step_size to (i+1)*step_size
self.action_history = []
def write(self, text):
with open(self.log_dir+ '.log', 'a') as f:
f.write(text)
def record_round(self, t, actions, rewards, players):
for i, a in enumerate(actions):
self.hist_action_freq[i][a] += 1
if t % self.step_size == 0:
its = t // self.step_size - 1
for i, a in enumerate(actions):
self.sampled_action[i][its] = self.env.transform_action(a)
self.action_history.append( self.hist_action_freq / self.step_size )
self.hist_action_freq.fill(0)
beliefs = []
for p in players:
beliefs.append(p.action_prob)
self.belief_history.append(beliefs)
def plot_action_history(self): ### a draft plot for the action profile trend
time_axis = np.arange(0, self.num_iterations, step=self.step_size)
fig, ax = plt.subplots()
for i in reversed(range(self.env.num_players)):
ax.scatter(time_axis, self.sampled_action[i], s=1, label=f"player {i}")
ax.set_ylabel('Bids')
ax.set_xlabel('#round')
ax.set_title(str(self.env))
ax.legend(loc="upper left")
plt.savefig(self.log_dir+ '_action_history' '.png')
plt.clf()
def plot(self):
self.plot_action_history()
with open(self.log_dir+'_history.pickle', 'wb') as f:
pickle.dump((self.action_history, self.sampled_action, self.belief_history), f)