Skip to content

Commit

Permalink
Merge pull request #49 from RLBots/copy_trainer
Browse files Browse the repository at this point in the history
Added regression code and added some simple visualizations.
  • Loading branch information
dtracers authored Jan 20, 2018
2 parents 11a8bb0 + 891c0d0 commit 7b0c01b
Show file tree
Hide file tree
Showing 24 changed files with 585 additions and 121 deletions.
8 changes: 7 additions & 1 deletion conversions/input/input_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,13 @@ def split_teams(self, game_tick_packet):
return player_car, team_members, enemies, own_team_score, enemy_team_score

def create_result_array(self, array):
return np.array(array, dtype=np.float32)
np_version = np.array(array, dtype=np.float32)
output = np.argwhere(np.isnan(np_version))
if len(output) > 0:
print('nan indexes', output)
for index in output:
np_version[index[0]] = 0
return np_version

def get_player_goals(self, game_tick_packet, index):
return game_tick_packet.gamecars[index].Score.Goals
Expand Down
25 changes: 24 additions & 1 deletion modelHelpers/actions/action_factory.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
from modelHelpers.actions.action_handler import ActionHandler
from modelHelpers.actions.dynamic_action_handler import DynamicActionHandler
from modelHelpers.actions.dynamic_action_handler import DynamicActionHandler, LOSS_SQUARE_MEAN, LOSS_SPARSE_CROSS, \
LOSS_ABSOLUTE_DIFFERENCE
from modelHelpers.actions.split_action_handler import SplitActionHandler

default_scheme = [[('steer', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))],
[('throttle', (-1, 2, 1)), ('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[('yaw', 'steer')]]

super_split_scheme = [[('throttle', (-1, 1.5, .5)), ('steer', (-1, 1.5, .5)),
('yaw', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))],
[('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[]]

only_steer_split_scheme = [[('steer', (-1, 1.5, .5))],
[('throttle', (-1, 2, 1)), ('jump', (0, 2, 1)), ('boost', (0, 2, 1)),
('handbrake', (0, 2, 1)), ('yaw', (-1, 2, 1)),
('pitch', (-1, 2, 1)), ('roll', (-1, 2, 1))],
[]]

regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN)],
[('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[]]

mixed_controls = [[('throttle', (-1, 1.5, .5), LOSS_SPARSE_CROSS), ('steer', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE),
('yaw', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), ('pitch', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE),
('roll', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE)],
[('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[]]

def get_handler(split_mode=True, control_scheme=default_scheme):
"""
Expand Down
39 changes: 26 additions & 13 deletions modelHelpers/actions/action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,12 @@ def create_controller_from_selection(self, action_selection):
def create_tensorflow_controller_from_selection(self, action_selection, batch_size=1, should_stack=True):
combo_actions = self.actions
indexer = tf.constant(1, dtype=tf.int32)
action_selection = tf.cast(action_selection, tf.int32)
if batch_size > 1:
multiplier = tf.constant([int(batch_size), 1, 1])
combo_actions = tf.tile(tf.expand_dims(combo_actions, 0), multiplier)
indexer = tf.constant(np.arange(0, batch_size, 1), dtype=tf.int32)

button_combo = tf.gather_nd(combo_actions, tf.stack([indexer, action_selection[3]], axis=1))
button_combo = tf.gather_nd(combo_actions, tf.stack([indexer, tf.cast(action_selection[3], tf.int32)], axis=1))
new_shape = [self.get_logit_size(), batch_size]
button_combo = tf.reshape(button_combo, new_shape)
controller_option = button_combo
Expand Down Expand Up @@ -195,17 +194,6 @@ def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_spli
"""
return split_func(numpy_array)

def get_cross_entropy_with_logits(self, labels, logits, name):
"""
:param tf:
:param labels:
:param logits:
:param name:
:return:
"""
return tf.nn.softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name=name + 'ns')

def _find_closet_real_number_graph(self, number):
pure_number = tf.round(number * 2.0) / 2.0
comparison = tf.Variable(np.array([-1.0, -0.5, 0.0, 0.5, 1.0]), dtype=tf.float32)
Expand Down Expand Up @@ -243,3 +231,28 @@ def create_action_indexes_graph(self, real_action, batch_size=None):
combo_list.append(bucketed_control)

return self._create_combo_index_graph(combo_list)

def get_action_loss_from_logits(self, logits, labels, index):
"""
:param logits: A tensorflow logit
:param labels: A label of what accured
:param index: The index of the control in the actions list this maps to
:return: The loss for this particular action
"""
return tf.nn.softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name=str(index) + 'ns')

def get_last_layer_activation_function(self, func, index):
return func

def scale_layer(self, layer, index):
"""
Scales the layer if required
:param layer: the output layer of the model
:param index: The index regarding this specific action
:return: A scaled layer
"""
return layer

def get_loss_type(self, index):
return 'softmax'
94 changes: 72 additions & 22 deletions modelHelpers/actions/dynamic_action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@

import numpy as np
import tensorflow as tf
from tensorflow.python.ops.losses.losses_impl import Reduction

from modelHelpers.actions.action_handler import ActionHandler, ActionMap
from modelHelpers.actions.split_action_handler import SplitActionHandler


COMBO = 'combo'

super_split_scheme = [[('throttle', (-1, 1.5, .5)), ('steer', (-1, 1.5, .5)),
('yaw', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))],
[('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[]]




LOSS_SPARSE_CROSS = 'sparse_loss'
LOSS_SQUARE_MEAN = 'square_mean'
LOSS_ABSOLUTE_DIFFERENCE = 'abs_diff'


class DynamicActionHandler(SplitActionHandler):
Expand All @@ -37,6 +32,7 @@ class DynamicActionHandler(SplitActionHandler):
combo_name_list = []
dodge_suppressor_list = [['jump'], ['steer', 'pitch', 'roll', 'yaw']]
should_suppress_dodge = False
action_loss_type_map = {}

def __init__(self, control_scheme):
self.control_scheme = control_scheme
Expand All @@ -55,8 +51,14 @@ def reset(self):
self.combo_list = []
self.button_combo = []
self.combo_name_list = []
self.action_loss_type_map = {}

def is_classification(self, index):
return self.action_loss_type_map[index] == LOSS_SPARSE_CROSS

def create_range_action(self, item):
if len(item) > 2 and (item[2] == LOSS_SQUARE_MEAN or item[2] == LOSS_ABSOLUTE_DIFFERENCE):
return np.array([0])
action_data = np.arange(*item[1])
return action_data

Expand All @@ -74,6 +76,10 @@ def create_actions(self):
action = self.create_range_action(item)
self.action_sizes.append(len(action))
self.action_name_index_map[item[0]] = len(self.action_list_names)
if len(item) > 2:
self.action_loss_type_map[len(self.action_list_names)] = item[2]
else:
self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
self.action_list_names.append(item[0])
self.actions.append(action)

Expand All @@ -91,6 +97,7 @@ def create_actions(self):
self.button_combo = list(itertools.product(*self.combo_list))
self.action_sizes.append(len(self.button_combo))
self.action_name_index_map[COMBO] = len(self.action_list_names)
self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
self.action_list_names.append(COMBO)
self.actions.append(self.button_combo)

Expand All @@ -111,9 +118,12 @@ def create_controller_from_selection(self, action_selection):
index = self.action_name_index_map[control]
if index == COMBO:
true_index = self.combo_name_index_map[control]
controller_output.append(self.actions[combo_index][action_selection[combo_index]][true_index])
controller_output.append(self.actions[combo_index][int(action_selection[combo_index])][true_index])
continue
controller_output.append(self.actions[index][action_selection[index]])
if self.is_classification(index):
controller_output.append(self.actions[index][int(action_selection[index])])
else:
controller_output.append(action_selection[index])

# print(controller_output)
return controller_output
Expand All @@ -123,7 +133,6 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si

ranged_actions = []
combo_actions = tf.constant(np.transpose(np.array(self.button_combo)))
action_selection = tf.cast(action_selection, tf.int32)

# handle ranged actions
multiplier = tf.constant([int(batch_size), 1])
Expand Down Expand Up @@ -153,12 +162,16 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si
true_index = self.combo_name_index_map[control]
single_element = combo_actions[true_index]
controller_output.append(
tf.gather_nd(single_element, tf.stack([indexer, action_selection[combo_index]], axis=1)))
tf.gather_nd(single_element,
tf.stack([indexer, tf.cast(action_selection[combo_index], tf.int32)], axis=1)))
continue
ranged_action = ranged_actions[index]
selection = action_selection[index]
output = tf.gather_nd(ranged_action, tf.stack([indexer, selection], axis=1))
controller_output.append(output)
if self.is_classification(index):
ranged_action = ranged_actions[index]
output = tf.gather_nd(ranged_action, tf.stack([indexer, tf.cast(selection, tf.int32)], axis=1))
controller_output.append(output)
else:
controller_output.append(selection)

# make sure everything is the same type
controller_output = [tf.cast(option, tf.float32) for option in controller_output]
Expand Down Expand Up @@ -193,8 +206,10 @@ def create_action_index(self, real_action):
bucketed_control = self.round_action(real_control, action_size)
combo_list[real_index] = bucketed_control
else:
if indexes[action_index] is None:
if indexes[action_index] is None and self.is_classification(action_index):
indexes[action_index] = (self._find_closet_real_number(real_control))
elif indexes[action_index] is None:
indexes[action_index] = real_control

indexes[self.action_name_index_map[COMBO]] = self._create_combo_index(real_action, combo_list)

Expand Down Expand Up @@ -239,13 +254,48 @@ def create_action_indexes_graph(self, real_action, batch_size=None):
bucketed_control = self.round_action_graph(real_control, action_size)
combo_list[real_index] = bucketed_control
else:
if indexes[action_index] is None:
if indexes[action_index] is None and self.is_classification(action_index):
indexes[action_index] = self._find_closet_real_number_graph(real_control)
elif indexes[action_index] is None:
indexes[action_index] = tf.squeeze(real_control, axis=1)

combo_action = self._create_combo_index_graph(combo_list, real_action)
if batch_size is not None and batch_size == 1:
indexes[self.action_name_index_map[COMBO]] = tf.reshape(combo_action, [1])
else:
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action)
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)

result = tf.stack(indexes, axis=1)
return result

def get_action_loss_from_logits(self, logits, labels, index):
"""
:param logits: A tensorflow logit
:param labels: A label of what occurred
:param index: The index of the control in the actions list this maps to
:return: The loss for this particular action
"""
if self.action_loss_type_map[index] == LOSS_SPARSE_CROSS:
return tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.cast(labels, tf.int32), logits=logits, name=LOSS_SPARSE_CROSS)
if self.action_loss_type_map[index] == LOSS_SQUARE_MEAN:
return tf.losses.mean_squared_error(labels, tf.squeeze(logits), reduction=Reduction.NONE)
if self.action_loss_type_map[index] == LOSS_ABSOLUTE_DIFFERENCE:
return tf.losses.absolute_difference(labels, tf.squeeze(logits), reduction=Reduction.NONE)

def get_last_layer_activation_function(self, func, index):
if self.is_classification(index):
return func
return None

def scale_layer(self, layer, index):
"""
Scales the layer if required
:param layer: the output layer of the model
:param index: The index regarding this specific action
:return: A scaled layer
"""
if self.is_classification(index):
return layer
else:
return layer # * 2.0 - 1.0

def get_loss_type(self, index):
return self.action_loss_type_map[index]
13 changes: 0 additions & 13 deletions modelHelpers/actions/split_action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,19 +248,6 @@ def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_spli

return result

def get_cross_entropy_with_logits(self, labels, logits, name):
"""
In split mode there can be more than one class at a time.
This is so that
:param tf:
:param labels:
:param logits:
:param name:
:return:
"""
return tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.cast(labels, tf.float32), logits=logits, name=name+'s')

def create_action_indexes_graph(self, real_action, batch_size=None):
#slice each index
throttle = tf.slice(real_action, [0, 0], [-1, 1])
Expand Down
38 changes: 28 additions & 10 deletions models/actor_critic/base_actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,33 @@ def load_config_file(self):
super().load_config_file()
try:
self.num_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER,
'num_layers')
'num_layers')
except:
print('unable to load num_layers')

try:
self.network_size = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER,
'num_width')
'num_width')
except:
print('unable to load the width of each layer')


try:
self.forced_frame_action = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER,
'exploration_factor')
'exploration_factor')
except:
print('unable to load exploration_factor')

def smart_argmax(self, input_tensor):
try:
self.keep_prob = self.config_file.getfloat(base_model.MODEL_CONFIGURATION_HEADER,
'keep_probability')
except:
print('unable to load keep_probability')

def smart_argmax(self, input_tensor, index):
if not self.action_handler.is_classification(index):
# input_tensor = tf.Print(input_tensor, [input_tensor], str(index))
return tf.squeeze(input_tensor, axis=1)
argmax_index = tf.cast(tf.argmax(input_tensor, axis=1), tf.int32)
indexer = tf.range(0, self.mini_batch_size)
slicer_data = tf.stack([indexer, argmax_index], axis=1)
Expand Down Expand Up @@ -124,7 +133,8 @@ def _create_model(self, model_input):
lambda input_tensor: tf.argmax(
tf.nn.softmax(input_tensor), axis=1),
return_as_list=True)
self.smart_max = self.action_handler.run_func_on_split_tensors(self.policy_outputs,
indexes = np.arange(0, self.action_handler.get_number_actions(), 1).tolist()
self.smart_max = self.action_handler.run_func_on_split_tensors([self.policy_outputs, indexes],
self.smart_argmax,
return_as_list=True)
return self.predicted_actions, self.action_scores
Expand All @@ -142,7 +152,7 @@ def create_copy_training_model(self, model_input=None, taken_actions=None):
batched_input, batched_taken_actions = self.iterator.get_next()
else:
batched_input = converted_input
batched_taken_actions = self.taken_actions
batched_taken_actions = actions_input
with tf.name_scope("training_network"):
self.discounted_rewards = tf.constant(0.0)
with tf.variable_scope("actor_network", reuse=True):
Expand Down Expand Up @@ -214,7 +224,10 @@ def create_layer(self, activation_function, input, layer_number, input_size, out
initializer=tf.random_normal_initializer())
b = tf.get_variable(bias_name, [output_size],
initializer=tf.random_normal_initializer())
layer_output = activation_function(tf.matmul(input, W) + b)
if activation_function is not None:
layer_output = activation_function(tf.matmul(input, W) + b)
else:
layer_output = tf.matmul(input, W) + b
if variable_list is not None:
variable_list.append(W)
variable_list.append(b)
Expand Down Expand Up @@ -301,10 +314,15 @@ def create_last_layer(self, activation_function, inner_layer, network_size, num_

self.actor_last_row_layer = []
for i, item in enumerate(self.action_handler.get_action_sizes()):
with tf.variable_scope(str(self.action_handler.action_list_names[i])):
self.actor_last_row_layer.append(self.create_layer(activation_function, inner_layer[i], last_layer_name,
variable_name = str(self.action_handler.action_list_names[i])
with tf.variable_scope(variable_name):
fixed_activation = self.action_handler.get_last_layer_activation_function(activation_function, i)
layer = self.create_layer(fixed_activation, inner_layer[i], last_layer_name,
network_size, item, network_prefix,
variable_list=last_layer_list[i], dropout=False)[0])
variable_list=last_layer_list[i], dropout=False)[0]
scaled_layer = self.action_handler.scale_layer(layer, i)
self.actor_last_row_layer.append(scaled_layer)
# tf.summary.histogram(variable_name + '_output', scaled_layer)

return tf.concat(self.actor_last_row_layer, 1)

Expand Down
Loading

0 comments on commit 7b0c01b

Please sign in to comment.