forked from google-deepmind/lab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_agent.py
206 lines (165 loc) · 7.22 KB
/
random_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Copyright 2016 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Basic random agent for DeepMind Lab."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import random
import numpy as np
import six
import deepmind_lab
def _action(*entries):
return np.array(entries, dtype=np.intc)
class DiscretizedRandomAgent(object):
"""Simple agent for DeepMind Lab."""
ACTIONS = {
'look_left': _action(-20, 0, 0, 0, 0, 0, 0),
'look_right': _action(20, 0, 0, 0, 0, 0, 0),
'look_up': _action(0, 10, 0, 0, 0, 0, 0),
'look_down': _action(0, -10, 0, 0, 0, 0, 0),
'strafe_left': _action(0, 0, -1, 0, 0, 0, 0),
'strafe_right': _action(0, 0, 1, 0, 0, 0, 0),
'forward': _action(0, 0, 0, 1, 0, 0, 0),
'backward': _action(0, 0, 0, -1, 0, 0, 0),
'fire': _action(0, 0, 0, 0, 1, 0, 0),
'jump': _action(0, 0, 0, 0, 0, 1, 0),
'crouch': _action(0, 0, 0, 0, 0, 0, 1)
}
ACTION_LIST = list(six.viewvalues(ACTIONS))
def step(self, unused_reward, unused_image):
"""Gets an image state and a reward, returns an action."""
return random.choice(DiscretizedRandomAgent.ACTION_LIST)
class SpringAgent(object):
"""A random agent using spring-like forces for its action evolution."""
def __init__(self, action_spec):
self.action_spec = action_spec
print('Starting random spring agent. Action spec:', action_spec)
self.omega = np.array([
0.1, # look left-right
0.1, # look up-down
0.1, # strafe left-right
0.1, # forward-backward
0.0, # fire
0.0, # jumping
0.0 # crouching
])
self.velocity_scaling = np.array([2.5, 2.5, 0.01, 0.01, 1, 1, 1])
self.indices = {a['name']: i for i, a in enumerate(self.action_spec)}
self.mins = np.array([a['min'] for a in self.action_spec])
self.maxs = np.array([a['max'] for a in self.action_spec])
self.reset()
self.rewards = 0
def critically_damped_derivative(self, t, omega, displacement, velocity):
r"""Critical damping for movement.
I.e., x(t) = (A + Bt) \exp(-\omega t) with A = x(0), B = x'(0) + \omega x(0)
See
https://en.wikipedia.org/wiki/Damping#Critical_damping_.28.CE.B6_.3D_1.29
for details.
Args:
t: A float representing time.
omega: The undamped natural frequency.
displacement: The initial displacement at, x(0) in the above equation.
velocity: The initial velocity, x'(0) in the above equation
Returns:
The velocity x'(t).
"""
a = displacement
b = velocity + omega * displacement
return (b - omega * t * (a + t * b)) * np.exp(-omega * t)
def step(self, reward, unused_frame):
"""Gets an image state and a reward, returns an action."""
self.rewards += reward
action = (self.maxs - self.mins) * np.random.random_sample(
size=[len(self.action_spec)]) + self.mins
# Compute the 'velocity' 1 time unit after a critical damped force
# dragged us towards the random `action`, given our current velocity.
self.velocity = self.critically_damped_derivative(1, self.omega, action,
self.velocity)
# Since walk and strafe are binary, we need some additional memory to
# smoothen the movement. Adding half of action from the last step works.
self.action = self.velocity / self.velocity_scaling + 0.5 * self.action
# Fire with p = 0.01 at each step
self.action[self.indices['FIRE']] = int(np.random.random() > 0.99)
# Jump/crouch with p = 0.005 at each step
self.action[self.indices['JUMP']] = int(np.random.random() > 0.995)
self.action[self.indices['CROUCH']] = int(np.random.random() > 0.995)
# Clip to the valid range and convert to the right dtype
return self.clip_action(self.action)
def clip_action(self, action):
return np.clip(action, self.mins, self.maxs).astype(np.intc)
def reset(self):
self.velocity = np.zeros([len(self.action_spec)])
self.action = np.zeros([len(self.action_spec)])
def run(length, width, height, fps, level, record, demo, demofiles, video):
"""Spins up an environment and runs the random agent."""
config = {
'fps': str(fps),
'width': str(width),
'height': str(height)
}
if record:
config['record'] = record
if demo:
config['demo'] = demo
if demofiles:
config['demofiles'] = demofiles
if video:
config['video'] = video
env = deepmind_lab.Lab(level, ['RGB_INTERLEAVED'], config=config)
env.reset()
# Starts the random spring agent. As a simpler alternative, we could also
# use DiscretizedRandomAgent().
agent = SpringAgent(env.action_spec())
reward = 0
for _ in six.moves.range(length):
if not env.is_running():
print('Environment stopped early')
env.reset()
agent.reset()
obs = env.observations()
action = agent.step(reward, obs['RGB_INTERLEAVED'])
reward = env.step(action, num_steps=1)
print('Finished after %i steps. Total reward received is %f'
% (length, agent.rewards))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--length', type=int, default=1000,
help='Number of steps to run the agent')
parser.add_argument('--width', type=int, default=80,
help='Horizontal size of the observations')
parser.add_argument('--height', type=int, default=80,
help='Vertical size of the observations')
parser.add_argument('--fps', type=int, default=60,
help='Number of frames per second')
parser.add_argument('--runfiles_path', type=str, default=None,
help='Set the runfiles path to find DeepMind Lab data')
parser.add_argument('--level_script', type=str,
default='tests/empty_room_test',
help='The environment level script to load')
parser.add_argument('--record', type=str, default=None,
help='Record the run to a demo file')
parser.add_argument('--demo', type=str, default=None,
help='Play back a recorded demo file')
parser.add_argument('--demofiles', type=str, default=None,
help='Directory for demo files')
parser.add_argument('--video', type=str, default=None,
help='Record the demo run as a video')
args = parser.parse_args()
if args.runfiles_path:
deepmind_lab.set_runfiles_path(args.runfiles_path)
run(args.length, args.width, args.height, args.fps, args.level_script,
args.record, args.demo, args.demofiles, args.video)