-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
117 lines (89 loc) · 3.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import argparse,time,os,pickle
import numpy as np
from player import *
from env import *
from utils import *
np.set_printoptions(precision=2)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
describe = lambda names : ''.join( ['{}: {},'.format(i, n) for i,n in enumerate(names)] )
parser.add_argument('--std', type=float, default=0, help='noise std. in feedback')
parser.add_argument('--iterations', type=int, default=100, help='number of rounds to play')
parser.add_argument('--strategy', type=int, help='strategies: ' + describe(strategy_choice_names))
parser.add_argument('--environment', type=int, default=0, help='environments: ' + describe(env_choice_names) )
parser.add_argument('--num_players', type=int, default=2, help='number of players ' )
parser.add_argument('--num_actions', type=int, help='number of actions ')
parser.add_argument('--unit', type=float, default=1, help='discretized unit')
parser.add_argument('--minx', type=float, default=0, help='min action')
parser.add_argument('--samples', type=int, default=100, help='number of samples to save' )
parser.add_argument('--new', default=False, action='store_true', help='whether to generate a new env instance')
parser.add_argument('--num_repeat', type=int, default=1, help='number of repeated simulation')
parser.add_argument('--use', type=int, default=-1, help='use a specified env instance, use -1 for the latest instance')
args = parser.parse_args()
std = args.std
num_iterations = args.iterations
strategy = args.strategy
num_players = args.num_players
num_actions = args.num_actions
unit = args.unit
minx = args.minx
samples = args.samples
env_choice = args.environment
env_name = env_choice_names[env_choice]
strategy_name = strategy_choice_names[strategy]
j = 0
while j < args.num_repeat:
log_dir = f'results/{env_name}/{strategy_name}'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
print("created directory")
else:
print("existing directory")
prefix = f'results/{env_name}/{num_actions}_{num_players}|{std}|{unit}|{minx}#'
i = find_latest(prefix, '.pickle')
if args.new or i==0: ### create new environment instance
env_module = __import__('env')
env = getattr(env_module, env_name)(std, num_actions,num_players, unit, minx)
env_dir = prefix + str(i) + '.pickle'
f = open(env_dir, 'wb')
pickle.dump(env, f )
print("save env at "+ env_dir)
f.close()
else: ### load existing environment instance
if args.use == -1: ### use the last env instance by default
i -= 1
else:
i = args.use
env_dir = prefix + str(i) + '.pickle'
if not os.path.exists(log_dir):
print("env path not found ", log_dir)
exit()
f = open(env_dir, 'rb')
env = pickle.load(f)
print("load env at " + env_dir)
f.close()
player_module = __import__('player')
players = [getattr(player_module, strategy_name)(num_actions, num_iterations) for i in range(num_players) ]
i = find_latest(f'{log_dir}/', '.log')
log_dir = f'{log_dir}/{i}'
L = logger(log_dir, env, num_iterations, samples=samples)
start = time.time()
L.write(f"iterations: {num_iterations}\n")
L.write(f"Environment:\n\t{env}\n")
actions = np.zeros(num_players, dtype=int)
### start the simulation
for t in range(1, num_iterations+1):
for i, p in enumerate(players):
actions[i] = p.act()
### send action profile and get payoff from the environment
rewards = env.feedback( actions )
### send payoff to each players
for a, p, r in zip(actions, players, rewards ):
p.feedback(a, r)
L.record_round(t, actions, rewards, players)
for i, p in enumerate(players):
L.write(f'Player{i}:\n\t{p}\n')
L.plot()
end = time.time()
print(log_dir, end-start)
j += 1