forked from udacity/AIND-Isolation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tournament.py
182 lines (143 loc) · 6.92 KB
/
tournament.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""
Estimate the strength rating of student-agent with iterative deepening and
a custom heuristic evaluation function against fixed-depth minimax and
alpha-beta search agents by running a round-robin tournament for the student
agent. Note that all agents are constructed from the student CustomPlayer
implementation, so any errors present in that class will affect the outcome
here.
The student agent plays a fixed number of "fair" matches against each test
agent. The matches are fair because the board is initialized randomly for both
players, and the players play each match twice -- switching the player order
between games. This helps to correct for imbalances in the game due to both
starting position and initiative.
For example, if the random moves chosen for initialization are (5, 2) and
(1, 3), then the first match will place agentA at (5, 2) as player 1 and
agentB at (1, 3) as player 2 then play to conclusion; the agents swap
initiative in the second match with agentB at (5, 2) as player 1 and agentA at
(1, 3) as player 2.
"""
import itertools
import random
import warnings
from collections import namedtuple
from isolation import Board
from sample_players import RandomPlayer
from sample_players import null_score
from sample_players import open_move_score
from sample_players import improved_score
from game_agent import CustomPlayer
from game_agent import custom_score
NUM_MATCHES = 5 # number of matches against each opponent
TIME_LIMIT = 150 # number of milliseconds before timeout
TIMEOUT_WARNING = "One or more agents lost a match this round due to " + \
"timeout. The get_move() function must return before " + \
"time_left() reaches 0 ms. You will need to leave some " + \
"time for the function to return, and may need to " + \
"increase this margin to avoid timeouts during " + \
"tournament play."
DESCRIPTION = """
This script evaluates the performance of the custom heuristic function by
comparing the strength of an agent using iterative deepening (ID) search with
alpha-beta pruning against the strength rating of agents using other heuristic
functions. The `ID_Improved` agent provides a baseline by measuring the
performance of a basic agent using Iterative Deepening and the "improved"
heuristic (from lecture) on your hardware. The `Student` agent then measures
the performance of Iterative Deepening and the custom heuristic against the
same opponents.
"""
Agent = namedtuple("Agent", ["player", "name"])
def play_match(player1, player2):
"""
Play a "fair" set of matches between two agents by playing two games
between the players, forcing each agent to play from randomly selected
positions. This should control for differences in outcome resulting from
advantage due to starting position on the board.
"""
num_wins = {player1: 0, player2: 0}
num_timeouts = {player1: 0, player2: 0}
num_invalid_moves = {player1: 0, player2: 0}
games = [Board(player1, player2), Board(player2, player1)]
# initialize both games with a random move and response
for _ in range(2):
move = random.choice(games[0].get_legal_moves())
games[0].apply_move(move)
games[1].apply_move(move)
# play both games and tally the results
for game in games:
winner, _, termination = game.play(time_limit=TIME_LIMIT)
if player1 == winner:
num_wins[player1] += 1
if termination == "timeout":
num_timeouts[player2] += 1
else:
num_invalid_moves[player2] += 1
elif player2 == winner:
num_wins[player2] += 1
if termination == "timeout":
num_timeouts[player1] += 1
else:
num_invalid_moves[player1] += 1
if sum(num_timeouts.values()) != 0:
warnings.warn(TIMEOUT_WARNING)
return num_wins[player1], num_wins[player2]
def play_round(agents, num_matches):
"""
Play one round (i.e., a single match between each pair of opponents)
"""
agent_1 = agents[-1]
wins = 0.
total = 0.
print("\nPlaying Matches:")
print("----------")
for idx, agent_2 in enumerate(agents[:-1]):
counts = {agent_1.player: 0., agent_2.player: 0.}
names = [agent_1.name, agent_2.name]
print(" Match {}: {!s:^11} vs {!s:^11}".format(idx + 1, *names), end=' ')
# Each player takes a turn going first
for p1, p2 in itertools.permutations((agent_1.player, agent_2.player)):
for _ in range(num_matches):
score_1, score_2 = play_match(p1, p2)
counts[p1] += score_1
counts[p2] += score_2
total += score_1 + score_2
wins += counts[agent_1.player]
print("\tResult: {} to {}".format(int(counts[agent_1.player]),
int(counts[agent_2.player])))
return 100. * wins / total
def main():
HEURISTICS = [("Null", null_score),
("Open", open_move_score),
("Improved", improved_score)]
AB_ARGS = {"search_depth": 5, "method": 'alphabeta', "iterative": False}
MM_ARGS = {"search_depth": 3, "method": 'minimax', "iterative": False}
CUSTOM_ARGS = {"method": 'alphabeta', 'iterative': True}
# Create a collection of CPU agents using fixed-depth minimax or alpha beta
# search, or random selection. The agent names encode the search method
# (MM=minimax, AB=alpha-beta) and the heuristic function (Null=null_score,
# Open=open_move_score, Improved=improved_score). For example, MM_Open is
# an agent using minimax search with the open moves heuristic.
mm_agents = [Agent(CustomPlayer(score_fn=h, **MM_ARGS),
"MM_" + name) for name, h in HEURISTICS]
ab_agents = [Agent(CustomPlayer(score_fn=h, **AB_ARGS),
"AB_" + name) for name, h in HEURISTICS]
random_agents = [Agent(RandomPlayer(), "Random")]
# ID_Improved agent is used for comparison to the performance of the
# submitted agent for calibration on the performance across different
# systems; i.e., the performance of the student agent is considered
# relative to the performance of the ID_Improved agent to account for
# faster or slower computers.
test_agents = [Agent(CustomPlayer(score_fn=improved_score, **CUSTOM_ARGS), "ID_Improved"),
Agent(CustomPlayer(score_fn=custom_score, **CUSTOM_ARGS), "Student")]
print(DESCRIPTION)
for agentUT in test_agents:
print("")
print("*************************")
print("{:^25}".format("Evaluating: " + agentUT.name))
print("*************************")
agents = random_agents + mm_agents + ab_agents + [agentUT]
win_ratio = play_round(agents, NUM_MATCHES)
print("\n\nResults:")
print("----------")
print("{!s:<15}{:>10.2f}%".format(agentUT.name, win_ratio))
if __name__ == "__main__":
main()