From 00773334ac07d5f45e1ffebbb6b68d414fab2c68 Mon Sep 17 00:00:00 2001 From: dtracers Date: Sun, 21 Jan 2018 08:15:54 -0800 Subject: [PATCH 01/23] fixed various bugs --- modelHelpers/actions/action_factory.py | 2 +- models/actor_critic/base_actor_critic.py | 6 +- models/actor_critic/policy_gradient.py | 8 +- models/actor_critic/split_layers.py | 138 ++++++++++++++++++ models/actor_critic/tutorial_model.py | 125 +--------------- saltie.py | 1 + trainer/base_classes/default_model_trainer.py | 2 +- 7 files changed, 151 insertions(+), 131 deletions(-) create mode 100644 models/actor_critic/split_layers.py diff --git a/modelHelpers/actions/action_factory.py b/modelHelpers/actions/action_factory.py index a25837c..d9ce813 100644 --- a/modelHelpers/actions/action_factory.py +++ b/modelHelpers/actions/action_factory.py @@ -18,7 +18,7 @@ ('pitch', (-1, 2, 1)), ('roll', (-1, 2, 1))], []] -regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN), +regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), ('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN)], [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index 4860e01..a58fb61 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -288,9 +288,9 @@ def parse_actions(self, taken_actions): def log_output_data(self): """Logs the output of the last layer of the model""" - for i in range(self.action_handler.get_number_actions()): - variable_name = str(self.action_handler.action_list_names[i]) - with tf.variable_scope(variable_name): + with tf.name_scope('model_output'): + for i in range(self.action_handler.get_number_actions()): + variable_name = str(self.action_handler.action_list_names[i]) tf.summary.histogram(variable_name + '_output', self.actor_last_row_layer[i]) def get_regularization_loss(self, variables, prefix=None): diff --git a/models/actor_critic/policy_gradient.py b/models/actor_critic/policy_gradient.py index 5bb535d..ba77d3c 100644 --- a/models/actor_critic/policy_gradient.py +++ b/models/actor_critic/policy_gradient.py @@ -5,9 +5,10 @@ from models import base_model from models.actor_critic.base_actor_critic import BaseActorCritic from modelHelpers import tensorflow_reward_manager +from models.actor_critic.split_layers import SplitLayers -class PolicyGradient(BaseActorCritic): +class PolicyGradient(SplitLayers): max_gradient = 1.0 total_loss_divider = 1.0 @@ -114,6 +115,8 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac # calculates the entropy loss from getting the label wrong cross_entropy_loss, wrongness, reduced = self.calculate_loss_of_actor(logprobs, taken_actions, index) + if reduced: + cross_entropy_loss = tf.reduce_mean(cross_entropy_loss) if not reduced: if self.action_handler.is_classification(index): tf.summary.histogram('actor_wrongness', wrongness) @@ -206,6 +209,5 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): :param cross_entropy_loss: :return: The calculated_tensor, If the result is a scalar. """ - return tf.reduce_mean( - self.action_handler.get_action_loss_from_logits(logprobs, taken_actions, index)), 1.0, True + return self.action_handler.get_action_loss_from_logits(logprobs, taken_actions, index), 1.0, True diff --git a/models/actor_critic/split_layers.py b/models/actor_critic/split_layers.py new file mode 100644 index 0000000..3c782c1 --- /dev/null +++ b/models/actor_critic/split_layers.py @@ -0,0 +1,138 @@ +import tensorflow as tf +import numpy as np + +from models import base_model +from models.actor_critic.base_actor_critic import BaseActorCritic + + +class SplitLayers(BaseActorCritic): + num_split_layers = 7 + gated_layer_index = -1 + split_hidden_layer_variables = None + split_hidden_layer_name = "split_hidden_layer" + gated_layer_name = "gated_layer" + + def printParameters(self): + super().printParameters() + print('Split Layer Parameters:') + print('number of split layers:', self.num_split_layers) + print('gate layer (not used if < 0):', self.gated_layer_index) + + def load_config_file(self): + super().load_config_file() + try: + self.num_split_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, + 'num_split_layers') + except: + print('unable to load num_split_layers') + try: + self.gated_layer_index = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, + 'gated_layer_index') + except: + print('unable to load gated_layer_index') + + def create_gated_layer(self, inner_layer, input_state, layer_number, network_size, network_prefix, + variable_list=None): + with tf.variable_scope(self.gated_layer_name): + weight_input = network_prefix + "Winput" + str(layer_number) + weight_network = network_prefix + "Wnetwork" + str(layer_number) + weight_decider = network_prefix + "Wdecider" + str(layer_number) + + cut_size = network_size // 2.0 + + w_input = tf.get_variable(weight_input, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + w_network = tf.get_variable(weight_network, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + w_decider = tf.get_variable(weight_decider, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + + if variable_list is not None: + variable_list.append(w_network) + variable_list.append(w_decider) + + decider = tf.nn.sigmoid(tf.matmul(inner_layer, w_decider), name="decider" + str(layer_number)) + + left = tf.matmul(input_state, w_input) * decider + right = tf.matmul(inner_layer, w_network) * (tf.constant(1.0) - decider) + + return left + right, cut_size + + def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, + layers_list=[]): + inner_layer = input_layer + layer_size = self.network_size + max_layer = self.num_layers - 2 - self.num_split_layers + for i in range(0, max_layer): + if i == self.gated_layer_index: + inner_layer, layer_size = self.create_gated_layer(inner_layer, input_layer, i + 2, layer_size, + network_prefix, + variable_list=variable_list) + layers_list.append(inner_layer) + else: + with tf.variable_scope(self.hidden_layer_name): + inner_layer, layer_size = self.create_layer(tf.nn.relu6, inner_layer, i + 2, layer_size, + self.network_size, + network_prefix, variable_list=variable_list) + layers_list.append(inner_layer) + return inner_layer, layer_size + + def create_last_layer(self, activation_function, inner_layer, network_size, num_actions, network_prefix, + last_layer_list=None, layers_list=[]): + with tf.variable_scope(self.split_hidden_layer_name): + output_layers, layer_size = self.create_split_layers(tf.nn.relu6, inner_layer, network_size, + self.num_split_layers, + network_prefix, + variable_list=last_layer_list, layers_list=layers_list) + + return super().create_last_layer(activation_function, output_layers, layer_size, num_actions, network_prefix, + last_layer_list, layers_list=layers_list) + + def create_split_layers(self, activation_function, inner_layer, network_size, + num_split_layers, network_prefix, variable_list=None, layers_list=[]): + + cut_size = self.network_size // 3 + previous_layer = [] + last_sizes = [] + step_size = (network_size - cut_size) // num_split_layers + for i in reversed(np.arange(cut_size, network_size, step_size)): + layer_size = [] + for j in range(self.action_handler.get_number_actions()): + layer_size.append(i) + last_sizes.append(layer_size) + layer_size = [] + last_layer_size = last_sizes[len(last_sizes) - 1] + for j in range(self.action_handler.get_number_actions()): + previous_layer.append(inner_layer) + layer_size.append(network_size) + # needs to be one more longer then the number of layers + last_sizes.insert(0, layer_size) + for i in range(0, num_split_layers): + split_layers = [] + for j, item in enumerate(self.action_handler.get_action_sizes()): + name = str(i) + with tf.variable_scope(str(self.action_handler.action_list_names[j])): + inner_layer, last_layer_size = self.create_layer(activation_function, previous_layer[j], 'split' + name, + last_sizes[i][j], last_sizes[i + 1][j], network_prefix, + variable_list=variable_list[j]) + split_layers.append(inner_layer) + previous_layer = split_layers + layers_list.append(split_layers) + return layers_list[len(layers_list) - 1], last_layer_size + + def create_savers(self): + super().create_savers() + # self._create_layer_saver('actor_network', self.split_hidden_layer_name) + self._create_layer_saver('actor_network', self.gated_layer_name) + + def _create_last_row_saver(self, network_name): + super()._create_last_row_saver(network_name) + # create the hidden row savers + split_las_layer = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, + scope=network_name + '/' + self.split_hidden_layer_name + '.*') + reshaped_list = np.reshape(np.array(split_las_layer), [-1, self.action_handler.get_number_actions(), 2]) + for i in range(len(reshaped_list)): + for j in range(len(reshaped_list[i])): + self._create_layer_saver(network_name, self.split_hidden_layer_name + '_' + str(i), + extra_info=self.action_handler.action_list_names[j], + variable_list=reshaped_list[i][j].tolist()) diff --git a/models/actor_critic/tutorial_model.py b/models/actor_critic/tutorial_model.py index 584bdb3..38434d2 100644 --- a/models/actor_critic/tutorial_model.py +++ b/models/actor_critic/tutorial_model.py @@ -5,11 +5,6 @@ class TutorialModel(PolicyGradient): - num_split_layers = 7 - gated_layer_index = -1 - split_hidden_layer_variables = None - split_hidden_layer_name = "split_hidden_layer" - gated_layer_name = "gated_layer" max_gradient = 10.0 total_loss_divider = 2.0 # hidden_layer_activation = tf.nn.relu6 @@ -44,21 +39,10 @@ def __init__(self, session, def printParameters(self): super().printParameters() print('TutorialModel Parameters:') - print('number of split layers:', self.num_split_layers) - print('gate layer (not used if < 0):', self.gated_layer_index) + print('Teacher:', self.teacher) def load_config_file(self): super().load_config_file() - try: - self.num_split_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'num_split_layers') - except: - print('unable to load num_split_layers') - try: - self.gated_layer_index = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'gated_layer_index') - except: - print('unable to load gated_layer_index') try: self.teacher = '_' + self.config_file.get(base_model.MODEL_CONFIGURATION_HEADER, @@ -110,7 +94,7 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): wrongness += tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32) else: # doing anything else is very very slow - wrongness += 0.0 + wrongness += 1.0 # + tf.abs((1.0 - tf.abs(logprobs))) else: # use temporarily wrongness += tf.log(1.0 + tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32)) @@ -122,114 +106,9 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): return cross_entropy_loss, wrongness, False - def create_gated_layer(self, inner_layer, input_state, layer_number, network_size, network_prefix, variable_list=None, scope=None): - with tf.variable_scope(self.gated_layer_name): - weight_input = network_prefix + "Winput" + str(layer_number) - weight_network = network_prefix + "Wnetwork" + str(layer_number) - weight_decider = network_prefix + "Wdecider" + str(layer_number) - - cut_size = network_size // 2.0 - - w_input = tf.get_variable(weight_input, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - w_network = tf.get_variable(weight_network, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - w_decider = tf.get_variable(weight_decider, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - - if variable_list is not None: - variable_list.append(w_network) - variable_list.append(w_decider) - - decider = tf.nn.sigmoid(tf.matmul(inner_layer, w_decider), name="decider" + str(layer_number)) - - left = tf.matmul(input_state, w_input) * decider - right = tf.matmul(inner_layer, w_network) * (tf.constant(1.0) - decider) - - return left + right, cut_size - - def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, - layers_list=[]): - inner_layer = input_layer - layer_size = self.network_size - max_layer = self.num_layers - 2 - self.num_split_layers - for i in range(0, max_layer): - if i == self.gated_layer_index: - inner_layer, layer_size = self.create_gated_layer(inner_layer, input_layer, i + 2, layer_size, - network_prefix, - variable_list=variable_list) - layers_list.append(inner_layer) - else: - with tf.variable_scope(self.hidden_layer_name): - inner_layer, layer_size = self.create_layer(tf.nn.relu6, inner_layer, i + 2, layer_size, - self.network_size, - network_prefix, variable_list=variable_list) - layers_list.append(inner_layer) - return inner_layer, layer_size - - def create_last_layer(self, activation_function, inner_layer, network_size, num_actions, network_prefix, - last_layer_list=None, layers_list=[]): - with tf.variable_scope(self.split_hidden_layer_name): - output_layers, layer_size = self.create_split_layers(tf.nn.relu6, inner_layer, network_size, - self.num_split_layers, - network_prefix, - variable_list=last_layer_list, layers_list=layers_list) - - return super().create_last_layer(activation_function, output_layers, layer_size, num_actions, network_prefix, - last_layer_list, layers_list=layers_list) - - def create_split_layers(self, activation_function, inner_layer, network_size, - num_split_layers, network_prefix, variable_list=None, layers_list=[]): - - cut_size = self.network_size // 3 - previous_layer = [] - last_sizes = [] - step_size = (network_size - cut_size) // num_split_layers - for i in reversed(np.arange(cut_size, network_size, step_size)): - layer_size = [] - for j in range(self.action_handler.get_number_actions()): - layer_size.append(i) - last_sizes.append(layer_size) - layer_size = [] - last_layer_size = last_sizes[len(last_sizes) - 1] - for j in range(self.action_handler.get_number_actions()): - previous_layer.append(inner_layer) - layer_size.append(network_size) - # needs to be one more longer then the number of layers - last_sizes.insert(0, layer_size) - for i in range(0, num_split_layers): - split_layers = [] - for j, item in enumerate(self.action_handler.get_action_sizes()): - name = str(i) - with tf.variable_scope(str(self.action_handler.action_list_names[j])): - inner_layer, last_layer_size = self.create_layer(activation_function, previous_layer[j], 'split' + name, - last_sizes[i][j], last_sizes[i + 1][j], network_prefix, - variable_list=variable_list[j]) - split_layers.append(inner_layer) - previous_layer = split_layers - layers_list.append(split_layers) - return layers_list[len(layers_list) - 1], last_layer_size - def get_model_name(self): return 'tutorial_bot' + ('_split' if self.action_handler.is_split_mode else '') + self.teacher - def create_savers(self): - super().create_savers() - # self._create_layer_saver('actor_network', self.split_hidden_layer_name) - self._create_layer_saver('actor_network', self.gated_layer_name) - - def _create_last_row_saver(self, network_name): - super()._create_last_row_saver(network_name) - # create the hidden row savers - split_las_layer = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, - scope=network_name + '/' + self.split_hidden_layer_name + '.*') - reshaped_list = np.reshape(np.array(split_las_layer), [-1, self.action_handler.get_number_actions(), 2]) - for i in range(len(reshaped_list)): - for j in range(len(reshaped_list[i])): - self._create_layer_saver(network_name, self.split_hidden_layer_name + '_' + str(i), - extra_info=self.action_handler.action_list_names[j], - variable_list=reshaped_list[i][j].tolist()) - def add_histograms(self, gradients): # summarize gradients for grad, var in gradients: diff --git a/saltie.py b/saltie.py index 99b36cc..f9f48b4 100644 --- a/saltie.py +++ b/saltie.py @@ -156,6 +156,7 @@ def get_output_vector(self, game_tick_packet): action = self.actions_handler.get_random_option() self.previous_action = action controller_selection = self.actions_handler.create_controller_from_selection(action) + controller_selection = [max(-1, min(1, control)) for control in controller_selection] return controller_selection def create_model_hash(self): diff --git a/trainer/base_classes/default_model_trainer.py b/trainer/base_classes/default_model_trainer.py index 45c14db..a90c928 100644 --- a/trainer/base_classes/default_model_trainer.py +++ b/trainer/base_classes/default_model_trainer.py @@ -23,7 +23,7 @@ def load_config(self): super().load_config() config = super().create_config() try: - self.max_files = config.getfloat(self.OPTIMIZER_CONFIG_HEADER, 'learning_rate') + self.learning_rate = config.getfloat(self.OPTIMIZER_CONFIG_HEADER, 'learning_rate') except Exception as e: self.learning_rate = 0.001 try: From a9c7ebc5a6e35d70eeea79b7f64259162248797a Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 10:59:00 -0800 Subject: [PATCH 02/23] some changes to how we bucket inputs --- trainer/utils/controller_statistics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trainer/utils/controller_statistics.py b/trainer/utils/controller_statistics.py index 85e537d..acfc965 100644 --- a/trainer/utils/controller_statistics.py +++ b/trainer/utils/controller_statistics.py @@ -48,15 +48,15 @@ def get_amounts(self, input_array=None, bot_output=None): else: output = self.sess.run(self.controls, feed_dict={self.model_input: input_array}) - accuracy = np.sum(np.isclose(output, bot_output, 0.01), 1) / np.size(output[1]) + accuracy = np.sum(np.isclose(output, bot_output, 0.1), 1) / np.size(output[1]) self.accuracy_over_time.append(accuracy) self.bot_data_over_time.append((output, bot_output)) - analog_buckets = [-1.0001, -0.50001, -0.0001, 0.0001, 0.50001, 1.0001] + analog_buckets = [-1.0001, -0.50001, -0.1000, 0.1000, 0.50001, 1.0001] boolean_buckets = [-0.001, 0.50001, 1.0001] np.set_printoptions(formatter={'int': '{0:5}'.format}) names = ["Throttle", "Steer", "Pitch", "Yaw", "Roll", "Jump", "Boost", "Handbrake"] - print("Splitting up everything in ranges: [-1, -0.5>, [-0.5, -0>, [0], <0+, 0.5], <0.5, 1]") + print("Splitting up everything in ranges: [-1, -0.5>, [-0.5, -0.1>, [0], <0.1+, 0.5], <0.5, 1]") print("Real is model output, Expt is tutorialbot output and Acc. is accuracy") for i in range(8): print("From here the ranges are [0.0, 0.5>, [0.5, 1.0]") if i is 5 else None From aa02d947033b9db750596797a6c5da5e9913c70a Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 10:59:43 -0800 Subject: [PATCH 03/23] changes storage path --- models/base_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/base_model.py b/models/base_model.py index 3d53397..e453b94 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -347,9 +347,9 @@ def get_event_path(self, filename, is_replay=False): :return: The path of the file """ dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - base_path = "/training/data/events/" + base_path = "/training/training_events/" if is_replay: - base_path = "/training/replay_events/" + base_path = "/training/in_game_events/" complete_path = dir_path + base_path + self.get_model_name() + "/" + filename modified_path = complete_path counter = 0 From 73e046024be446c9f66360aa402aaeb0f3d52f47 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:00:11 -0800 Subject: [PATCH 04/23] changed how percentages work --- trainer/random_packet_trainer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/trainer/random_packet_trainer.py b/trainer/random_packet_trainer.py index c8e4a4b..bec22db 100644 --- a/trainer/random_packet_trainer.py +++ b/trainer/random_packet_trainer.py @@ -20,6 +20,10 @@ class RandomPacketTrainer(DefaultModelTrainer): controller_stats = None start_time = None model_save_time = None + frame_per_file = 20000 + + def __init__(self): + super().__init__() def get_random_data(self, packet_generator, input_formatter): game_tick_packet = packet_generator.get_random_array() @@ -85,8 +89,8 @@ def _run_trainer(self): model = self.model # Percentage to print statistics (and also save the model) - print_every_x_batches = (total_batches * batch_size) / save_step - print('Prints at this percentage:', 100.0 / print_every_x_batches) + save_step = (total_batches * batch_size) / save_step + print('Prints at this percentage:', 100.0 / self.save_step) model_counter = 0 self.model_save_time = 0 @@ -106,6 +110,7 @@ def _run_trainer(self): model_counter += 1 def finish_trainer(self): + print('trained on the equivalent of', self.total_batches * self.batch_size / self.frame_per_file, 'games') start_saving = time.time() self.model.save_model() print('saved model in', time.time() - start_saving, 'seconds') From 708e0e36172fef4b74df5421a4dac35ba47d180a Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:00:32 -0800 Subject: [PATCH 05/23] changed wrongess to be slightly more bucketing --- models/actor_critic/tutorial_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/actor_critic/tutorial_model.py b/models/actor_critic/tutorial_model.py index 38434d2..e7197a5 100644 --- a/models/actor_critic/tutorial_model.py +++ b/models/actor_critic/tutorial_model.py @@ -93,8 +93,7 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): if self.action_handler.is_classification(index): wrongness += tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32) else: - # doing anything else is very very slow - wrongness += 1.0 # + tf.abs((1.0 - tf.abs(logprobs))) + wrongness += tf.abs(taken_actions - tf.round(logprobs * 2.0) / 2.0) else: # use temporarily wrongness += tf.log(1.0 + tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32)) From 9ea6b9afe2a07a4aa4f4b365d8b66cd506de4c2f Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:00:43 -0800 Subject: [PATCH 06/23] changed activation to elu --- models/actor_critic/base_actor_critic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index a58fb61..2acc9b8 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -59,6 +59,9 @@ def printParameters(self): print('keep probability', self.keep_prob) print('regulation parameter', self.reg_param) + def get_activation(self): + return tf.nn.elu # tf.nn.relu6 + def load_config_file(self): super().load_config_file() try: @@ -243,15 +246,16 @@ def actor_network(self, input_states, variable_list=None, last_layer_list=None, last_layer_list = [[] for _ in range(len(self.action_handler.get_action_sizes()))] # define policy neural network actor_prefix = 'actor' + activation = self.get_activation() # input_states = tf.Print(input_states, [input_states], summarize=self.network_size, message='') with tf.variable_scope(self.first_layer_name): - layer1, _ = self.create_layer(tf.nn.relu6, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix, + layer1, _ = self.create_layer(activation, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix, variable_list=variable_list, dropout=False) layers_list.append([layer1]) # layer1 = tf.Print(layer1, [layer1], summarize=self.network_size, message='') - inner_layer, output_size = self.create_hidden_layers(tf.nn.relu6, layer1, self.network_size, actor_prefix, + inner_layer, output_size = self.create_hidden_layers(activation, layer1, self.network_size, actor_prefix, variable_list=variable_list, layers_list=layers_list) output_layer = self.create_last_layer(tf.nn.sigmoid, inner_layer, output_size, @@ -299,7 +303,7 @@ def get_regularization_loss(self, variables, prefix=None): reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) tf.summary.scalar(prefix + '_reg_loss', reg_loss) - return reg_loss + return tf.constant(0.0) # reg_loss def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, layers_list=[]): From 3f11284e16978b96619647c3334050c9796ae39b Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:00:57 -0800 Subject: [PATCH 07/23] normalization between -1, 1 --- modelHelpers/data_normalizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modelHelpers/data_normalizer.py b/modelHelpers/data_normalizer.py index 14a5aa8..bb7844f 100644 --- a/modelHelpers/data_normalizer.py +++ b/modelHelpers/data_normalizer.py @@ -170,7 +170,9 @@ def apply_normalization(self, input_array): # error_prevention = tf.cast(tf.equal(diff, 0.0), tf.float32) # diff = diff + error_prevention - result = (input_array - min) / diff + + #result = (input_array - min) / diff + result = input_array / diff #result = tf.Print(result, [min], 'min', summarize=16) #result = tf.Print(result, [max], 'max', summarize=16) #result = tf.Print(result, [input_array[0]], 'inp', summarize=30) From 542e710ff6624a465973a319aa9809702757a102 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:01:07 -0800 Subject: [PATCH 08/23] uses tanh --- modelHelpers/actions/dynamic_action_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelHelpers/actions/dynamic_action_handler.py b/modelHelpers/actions/dynamic_action_handler.py index 283c712..244108a 100644 --- a/modelHelpers/actions/dynamic_action_handler.py +++ b/modelHelpers/actions/dynamic_action_handler.py @@ -294,7 +294,7 @@ def get_action_loss_from_logits(self, logits, labels, index): def get_last_layer_activation_function(self, func, index): if self.is_classification(index): return func - return None + return tf.nn.tanh def scale_layer(self, layer, index): """ From 655bb4f532cf304e8bdc72267f1a9d509e4de797 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:01:15 -0800 Subject: [PATCH 09/23] loss is square mean --- modelHelpers/actions/action_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelHelpers/actions/action_factory.py b/modelHelpers/actions/action_factory.py index d9ce813..a25837c 100644 --- a/modelHelpers/actions/action_factory.py +++ b/modelHelpers/actions/action_factory.py @@ -18,7 +18,7 @@ ('pitch', (-1, 2, 1)), ('roll', (-1, 2, 1))], []] -regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), +regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN)], [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], From a9b94df5cab02e9c57e121e2188800cac509bebb Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:01:22 -0800 Subject: [PATCH 10/23] bug fix --- bot_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bot_manager.py b/bot_manager.py index 842f7e4..e8ff523 100644 --- a/bot_manager.py +++ b/bot_manager.py @@ -132,6 +132,7 @@ def run(self): print('\n\n\n\n Match has ended so ending bot loop\n\n\n\n\n') break + controller_input = None # Run the Agent only if the gameInfo has updated. tick_game_time = game_tick_packet.gameInfo.TimeSeconds should_call_while_paused = datetime.now() - last_call_real_time >= MAX_AGENT_CALL_PERIOD From 448784d0c3c5da8da3fad0db1bdda9f057616d33 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 11:01:29 -0800 Subject: [PATCH 11/23] config changes --- saltie2.cfg | 8 ++++---- trainer/configs/copy_trainer.cfg | 11 ++++++----- trainer/configs/randomised_trainer.cfg | 16 +++++++++------- trainer/configs/reward_trainer.cfg | 13 ++++++++++++- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/saltie2.cfg b/saltie2.cfg index 8b770f3..c6df422 100644 --- a/saltie2.cfg +++ b/saltie2.cfg @@ -23,12 +23,12 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.actor_critic.tutorial_model model_name = TutorialModel -teacher = replay_files control_scheme = regression_controls #model_directory = training/data/tutorial_bot_split10-layers/2/trained_variables -num_layers = 5 -num_split_layers = 2 -num_width = 256 +teacher = replay_files +num_layers = 3 +num_split_layers = 1 +num_width = 128 is_graphing = True is_evaluating = True exploration_factor = 500 diff --git a/trainer/configs/copy_trainer.cfg b/trainer/configs/copy_trainer.cfg index 88e69d3..c93fdbe 100644 --- a/trainer/configs/copy_trainer.cfg +++ b/trainer/configs/copy_trainer.cfg @@ -4,6 +4,7 @@ download_files = False [Optimizer Config] should_apply_features = True +learning_rate = 0.0005 [Copy Configuration] should_shuffle = True @@ -13,10 +14,10 @@ control_scheme = regression_controls [Model Configuration] batch_size = 20000 -mini_batch_size = 500 -num_width = 256 +mini_batch_size = 5000 +num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel -num_layers = 5 -num_split_layers = 2 -keep_probability = 0.7 +num_layers = 3 +num_split_layers = 1 +keep_probability = 0.8 diff --git a/trainer/configs/randomised_trainer.cfg b/trainer/configs/randomised_trainer.cfg index 0a7f53a..74a1178 100644 --- a/trainer/configs/randomised_trainer.cfg +++ b/trainer/configs/randomised_trainer.cfg @@ -1,11 +1,12 @@ [Randomised Trainer Configuration] -total_batches = 20000 -save_step = 20000000 -#teacher_package = TutorialBot.tutorial_bot_output -teacher_package = TutorialBot.atba2_demo_output +total_batches = 2000 +save_step = 10 +teacher_package = TutorialBot.tutorial_bot_output +#teacher_package = TutorialBot.atba2_demo_output [Optimizer Config] should_apply_features = True +learning_rate = 0.0005 [Misc Config] control_scheme = regression_controls @@ -18,6 +19,7 @@ mini_batch_size = 5000 num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel -num_layers = 5 -num_split_layers = 2 -keep_probability = 0.6 +teacher = replay_files +num_layers = 3 +num_split_layers = 1 +keep_probability = 0.8 diff --git a/trainer/configs/reward_trainer.cfg b/trainer/configs/reward_trainer.cfg index e27b8d8..f10a7b9 100644 --- a/trainer/configs/reward_trainer.cfg +++ b/trainer/configs/reward_trainer.cfg @@ -1,8 +1,19 @@ [Download Configuration] download_files = False +[Optimizer Config] +should_apply_features = False + +[Misc Config] +control_scheme = regression_controls + [Model Configuration] model_package = models.actor_critic.policy_gradient model_name = PolicyGradient -num_layers = 10 is_evaluating = True +batch_size = 20000 +mini_batch_size = 500 +num_width = 128 +num_layers = 5 +num_split_layers = 2 +keep_probability = 0.7 From cf0149ec4c4a5b2875f46d236f89022436136d20 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:55:43 -0800 Subject: [PATCH 12/23] fixed some bugs --- conversions/input/input_formatter.py | 16 +++++++++++----- conversions/output_formatter.py | 16 ++++++++-------- modelHelpers/actions/dynamic_action_handler.py | 5 +++-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/conversions/input/input_formatter.py b/conversions/input/input_formatter.py index e41c91f..0421151 100644 --- a/conversions/input/input_formatter.py +++ b/conversions/input/input_formatter.py @@ -118,11 +118,17 @@ def get_car_info(self, game_tick_packet, index): player_team = game_tick_packet.gamecars[index].Team player_boost = game_tick_packet.gamecars[index].Boost last_touched_ball = self.get_last_touched_ball(game_tick_packet.gamecars[index], game_tick_packet.gameball.LatestTouch) - car_array = [player_x, player_y, player_z, player_pitch, player_yaw, player_roll, - player_speed_x, player_speed_y, player_speed_z, player_angular_speed_x, - player_angular_speed_y, player_angular_speed_z, - player_on_ground, player_supersonic, player_demolished, player_jumped, - player_double_jumped, player_team, player_boost, last_touched_ball] + car_array = [player_x, player_y, player_z, + player_pitch, player_yaw, player_roll, + player_speed_x, player_speed_y, player_speed_z, + player_angular_speed_x, player_angular_speed_y, player_angular_speed_z, + player_on_ground, + player_supersonic, + player_demolished, + player_jumped, player_double_jumped, + player_team, + player_boost, + last_touched_ball] return car_array def get_last_touched_ball(self, car, latest_touch): diff --git a/conversions/output_formatter.py b/conversions/output_formatter.py index 20c11cd..48beea7 100644 --- a/conversions/output_formatter.py +++ b/conversions/output_formatter.py @@ -85,14 +85,14 @@ def get_car_info(array, index): car_info.Rotation = create_3D_rotation(array, index + 3) car_info.Velocity = create_3D_point(array, index + 6) car_info.AngularVelocity = create_3D_point(array, index + 9) - car_info.bOnGround = array[12] - car_info.bSuperSonic = array[13] - car_info.bDemolished = array[14] - car_info.bJumped = array[15] - car_info.bDoubleJumped = array[16] - car_info.Team = array[17] - car_info.Boost = array[18] - car_info.bLastTouchedBall = array[19] + car_info.bOnGround = array[index + 12] + car_info.bSuperSonic = array[index + 13] + car_info.bDemolished = array[index + 14] + car_info.bJumped = array[index + 15] + car_info.bDoubleJumped = array[index + 16] + car_info.Team = array[index + 17] + car_info.Boost = array[index + 18] + car_info.bLastTouchedBall = array[index + 19] return car_info diff --git a/modelHelpers/actions/dynamic_action_handler.py b/modelHelpers/actions/dynamic_action_handler.py index 244108a..a201021 100644 --- a/modelHelpers/actions/dynamic_action_handler.py +++ b/modelHelpers/actions/dynamic_action_handler.py @@ -270,8 +270,9 @@ def create_action_indexes_graph(self, real_action, batch_size=None): elif indexes[action_index] is None: indexes[action_index] = tf.squeeze(real_control, axis=1) - combo_action = self._create_combo_index_graph(combo_list, real_action) - indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1) + if len(self.combo_list) > 0: + combo_action = self._create_combo_index_graph(combo_list, real_action) + indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1) result = tf.stack(indexes, axis=1) return result From ec609f1d1c58f222dbf2c8dbca6b73db7442f85d Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:55:51 -0800 Subject: [PATCH 13/23] fixed the fake model --- models/fake_model.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/models/fake_model.py b/models/fake_model.py index 76734cb..cb18bb5 100644 --- a/models/fake_model.py +++ b/models/fake_model.py @@ -8,11 +8,20 @@ class FakeModel(BaseModel): teacher_package = None - def __init__(self, session, state_dim, num_actions, player_index=-1, action_handler=None, is_training=False, + def __init__(self, session, num_actions, + input_formatter_info=[0, 0], + player_index=-1, action_handler=None, is_training=False, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, config_file=None): - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) def get_class(self, class_package, class_name): class_package = importlib.import_module(class_package) From 878ed88edd60e84e1bf5ade0ce059d416115bcdc Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:56:14 -0800 Subject: [PATCH 14/23] fixed a bug in handbrake logic --- TutorialBot/tutorial_bot_output.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/TutorialBot/tutorial_bot_output.py b/TutorialBot/tutorial_bot_output.py index 3723829..95b1018 100644 --- a/TutorialBot/tutorial_bot_output.py +++ b/TutorialBot/tutorial_bot_output.py @@ -5,7 +5,7 @@ class TutorialBotOutput: # Constants distance_from_ball_to_go_fast = tf.constant(600.0) - distance_from_ball_to_boost = tf.constant(1500.0) # Minimum distance to ball for using boost + distance_from_ball_to_boost = tf.constant(2000.0) # Minimum distance to ball for using boost unreal_to_degrees = tf.constant( 1.0 / 65536.0 * 360.0) # The numbers used to convert unreal rotation units to degrees true = tf.constant(1.0) @@ -23,7 +23,7 @@ def distance(self, x1, y1, x2, y2): def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance_to_ball, is_on_ground): full_turn_angle = 70.0 half_turn_angle = 30.0 - powerslide_angle_constant = 710.0 # The angle (from the front of the bot to the ball) to start to powerslide. + powerslide_angle_constant = 80.0 # The angle (from the front of the bot to the ball) to start to powerslide. angle_front_to_target = self.feature_creator.generate_angle_to_target(bot_position.X, bot_position.Y, bot_rotation, @@ -46,14 +46,15 @@ def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance jump = tf.cast(should_jump, tf.float32) - powerslide_angle = full_turn_angle * tf.cast(tf.less(1000.0, distance_to_ball), tf.float32) - powerslide_angle = powerslide_angle_constant + powerslide_angle - - ps = tf.greater(tf.abs(angle_front_to_target), powerslide_angle) + ps = tf.logical_and(tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle), + tf.less_equal(distance_to_ball, 2000.0)) + # ps = tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle) power_slide = tf.cast(ps, tf.float32) + should_not_dodge = tf.cast(tf.greater_equal(distance_to_ball, 500), tf.float32) + # if jump is 1 then we should not execute a turn - safe_steer = steer * (1.0 - jump) + safe_steer = steer * (1.0 - jump * should_not_dodge) return (safe_steer, power_slide, jump) def get_output_vector(self, values): @@ -79,8 +80,8 @@ def get_output_vector(self, values): xy_distance = self.distance(bot_pos.X, bot_pos.Y, ball_pos.X, ball_pos.Y) # Boost when ball is far enough away - boost = tf.logical_and(tf.greater(xy_distance, self.distance_from_ball_to_boost), - tf.greater(car_boost, 34)) + boost = tf.logical_and(tf.greater_equal(xy_distance, self.distance_from_ball_to_boost / car_boost), + tf.greater_equal(car_boost, 10)) full_throttle = 0.5 * tf.cast(tf.greater(xy_distance, self.distance_from_ball_to_go_fast), tf.float32) throttle = full_throttle + tf.constant(0.5) From d7835fe2dfe62f52d38d34cd936c99cdd5769091 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:56:24 -0800 Subject: [PATCH 15/23] print how many games are being trained --- trainer/random_packet_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/trainer/random_packet_trainer.py b/trainer/random_packet_trainer.py index bec22db..8737333 100644 --- a/trainer/random_packet_trainer.py +++ b/trainer/random_packet_trainer.py @@ -90,6 +90,7 @@ def _run_trainer(self): # Percentage to print statistics (and also save the model) save_step = (total_batches * batch_size) / save_step + print('training on the equivalent of', self.total_batches * self.batch_size / self.frame_per_file, 'games') print('Prints at this percentage:', 100.0 / self.save_step) model_counter = 0 self.model_save_time = 0 From db02c4e71f0a4b625758b2e0e36c75ea9c7e030c Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:56:35 -0800 Subject: [PATCH 16/23] added more to the test --- tests/actions_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/actions_test.py b/tests/actions_test.py index 6e2f2d6..0c15057 100644 --- a/tests/actions_test.py +++ b/tests/actions_test.py @@ -1,4 +1,4 @@ -from modelHelpers.actions import action_handler, dynamic_action_handler, action_factory +from modelHelpers.actions import action_factory import tensorflow as tf import numpy as np @@ -95,7 +95,7 @@ def test2(): def test3(): handler = action_factory.get_handler(False) - dynamic_handler = action_factory.get_handler(True, dynamic_action_handler.super_split_scheme) + dynamic_handler = action_factory.get_handler(True, action_factory.regression_controls) session = tf.Session(config=tf.ConfigProto( device_count={'GPU': 0} @@ -109,20 +109,23 @@ def test3(): [-0.2, -0.3, 0.2, 0.3, 0.0, 1.0, 0.0, 0.0], [ 1.0, -0.3, 0.2, 0.3, 0.0, 0.0, 1.0, 0.0], [-1.0, -0.3, 0.2, 0.3, 0.0, 0.0, 0.0, 1.0], - [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 0.0, 0.0]]) + [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 0.0, 0.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 1.0, 1.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 1.0, 0.0, 1.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 1.0, 1.0, 0.0]]) #t, y, p, r, real_action = tf.Variable(input, dtype=tf.float32) action_index = dynamic_handler.create_action_indexes_graph(real_action) - back_again = dynamic_handler.create_tensorflow_controller_from_selection(tf.transpose(action_index), batch_size=9) + back_again = dynamic_handler.create_tensorflow_controller_from_selection(tf.transpose(action_index), batch_size=len(input)) init = tf.global_variables_initializer() session.run(init) indexes, dynamic_results = session.run([action_index, back_again]) - for index in range(9): + for index in range(len(input)): row = input[index] print('blank row') # print('input row ', np.array(row, dtype=np.float32)) @@ -132,7 +135,7 @@ def test3(): print('and back again') print('correct answer', row) - print('numpy result', handler.create_controller_from_selection(action_index)) + print('numpy result', dynamic_handler.create_controller_from_selection(indexes[index])) # purposely using the working result print('dynamic result', dynamic_results[index]) From 25fa6419e04e0ee87801665da749d315d1c4279e Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:56:48 -0800 Subject: [PATCH 17/23] added more configuration --- models/base_model.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/models/base_model.py b/models/base_model.py index e453b94..4740a3a 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -32,6 +32,8 @@ class BaseModel: input_formatter = None summarize = no_op iterator = None + reg_param = 0.001 + should_regulate = None """" This is a base class for all models It has a couple helper methods but is mainly used to provide a standard @@ -80,6 +82,8 @@ def printParameters(self): print('batch size:', self.batch_size) print('mini batch size:', self.mini_batch_size) print('using features', (self.feature_creator is not None)) + print('regulation parameter', self.reg_param) + print('is regulating parameter', self.should_regulate) def _create_variables(self): """Creates any variables needed by this model. @@ -396,6 +400,18 @@ def load_config_file(self): 'is_normalizing') except Exception as e: print('unable to load if it should be normalizing defaulting to true') + try: + self.should_regulate = self.config_file.getboolean(MODEL_CONFIGURATION_HEADER, + 'should_regulate') + except Exception as e: + self.should_regulate = True + print('unable to load if it should be regulating defaulting to true') + try: + self.reg_param = self.config_file.getfloat(MODEL_CONFIGURATION_HEADER, + 'regulate_param') + except Exception as e: + self.reg_param = 0.001 + print('unable to load if it should be regulating defaulting to true') def add_saver(self, name, variable_list): """ @@ -550,6 +566,14 @@ def get_variables_activations(self): r.append([weights, biases, 'sigmoid']) return r - def get_activations(self, input_array=None): return [[np.random.randint(0, 30) for i in range(7)], [np.random.rand() for i in range(5)]] + + def get_regularization_loss(self, variables, prefix=None): + """Gets the regularization loss from the varaibles. Used if the weights are getting to big""" + normalized_variables = [tf.reduce_sum(tf.nn.l2_loss(x) * self.reg_param) + for x in variables] + + reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) + tf.summary.scalar(prefix + '_reg_loss', reg_loss) + return tf.constant(0.0) # reg_loss From e64bb125f9eadbbac850803a12889f936d86fb16 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:56:57 -0800 Subject: [PATCH 18/23] fixed regulatiozation --- models/actor_critic/policy_gradient.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/models/actor_critic/policy_gradient.py b/models/actor_critic/policy_gradient.py index ba77d3c..0fcd7d3 100644 --- a/models/actor_critic/policy_gradient.py +++ b/models/actor_critic/policy_gradient.py @@ -135,7 +135,7 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac actor_reg_loss = self.get_regularization_loss(actor_network_variables, prefix="actor") - actor_loss = actor_loss + actor_reg_loss * self.reg_param + actor_loss = actor_loss + actor_reg_loss # compute actor gradients actor_gradients = self.optimizer.compute_gradients(actor_loss, @@ -153,15 +153,11 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac return [actor_gradients, actor_loss] def create_critic_gadients(self): - critic_reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in self.critic_network_variables], - name='critic_reg_loss') - - tf.summary.scalar("critic_reg_loss", critic_reg_loss) - + critic_reg_loss = self.get_regularization_loss(self.critic_network_variables, prefix='critic') # compute critic gradients mean_square_loss = tf.reduce_mean(tf.square(self.discounted_rewards - self.estimated_values), name='mean_square_loss') - critic_loss = mean_square_loss + self.reg_param * critic_reg_loss + critic_loss = mean_square_loss + critic_reg_loss tf.summary.scalar("critic_loss", critic_loss) critic_gradients = self.optimizer.compute_gradients(critic_loss, self.critic_network_variables) return (critic_gradients, critic_loss, critic_reg_loss) From b2886278df417e4a5026c1142c9f0f6dabb63e5b Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 13:58:13 -0800 Subject: [PATCH 19/23] fixed regualization bug --- models/actor_critic/base_actor_critic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index 2acc9b8..9d65c61 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -16,7 +16,6 @@ class BaseActorCritic(base_reinforcement.BaseReinforcement): forced_frame_action = 500 is_graphing = False keep_prob = 0.5 - reg_param = 0.001 first_layer_name = 'first_layer' hidden_layer_name = 'hidden_layer' @@ -57,7 +56,6 @@ def printParameters(self): print('network size', self.network_size) print('number of layers', self.num_layers) print('keep probability', self.keep_prob) - print('regulation parameter', self.reg_param) def get_activation(self): return tf.nn.elu # tf.nn.relu6 @@ -303,7 +301,10 @@ def get_regularization_loss(self, variables, prefix=None): reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) tf.summary.scalar(prefix + '_reg_loss', reg_loss) - return tf.constant(0.0) # reg_loss + if self.should_regulate: + return reg_loss + else: + return tf.constant(0.0) def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, layers_list=[]): From d35a605c01fb211d673c697c19139a958da7dc8e Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 18:10:53 -0800 Subject: [PATCH 20/23] fixed a bug with the model and scores --- models/actor_critic/base_actor_critic.py | 13 +------------ models/actor_critic/policy_gradient.py | 2 +- models/base_model.py | 5 ++++- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index 9d65c61..168c4b8 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -213,7 +213,7 @@ def sample_action(self, input_state): else: action_scores = self.sess.run([self.smart_max], {self.input_placeholder: input_state}) - print(action_scores) + # print(action_scores) action_scores = np.array(action_scores).flatten() return action_scores @@ -295,17 +295,6 @@ def log_output_data(self): variable_name = str(self.action_handler.action_list_names[i]) tf.summary.histogram(variable_name + '_output', self.actor_last_row_layer[i]) - def get_regularization_loss(self, variables, prefix=None): - normalized_variables = [tf.reduce_sum(tf.nn.l2_loss(x) * self.reg_param) - for x in variables] - - reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) - tf.summary.scalar(prefix + '_reg_loss', reg_loss) - if self.should_regulate: - return reg_loss - else: - return tf.constant(0.0) - def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, layers_list=[]): with tf.variable_scope(self.hidden_layer_name): diff --git a/models/actor_critic/policy_gradient.py b/models/actor_critic/policy_gradient.py index 0fcd7d3..3bed9a9 100644 --- a/models/actor_critic/policy_gradient.py +++ b/models/actor_critic/policy_gradient.py @@ -91,7 +91,7 @@ def create_actor_gradients(self, logprobs, taken_actions): total_loss = total_loss / self.total_loss_divider - # total_loss += actor_reg_loss + total_loss += actor_reg_loss # total_loss = tf.Print(total_loss, [total_loss], 'total_loss') diff --git a/models/base_model.py b/models/base_model.py index 4740a3a..5b25893 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -576,4 +576,7 @@ def get_regularization_loss(self, variables, prefix=None): reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) tf.summary.scalar(prefix + '_reg_loss', reg_loss) - return tf.constant(0.0) # reg_loss + if self.should_regulate: + return reg_loss * (self.reg_param * 10.0) + else: + return tf.constant(0.0) From 97bb8edcfbbfbc34a555da7fcd54ec593475da49 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 18:11:14 -0800 Subject: [PATCH 21/23] tutor does just the normal control scheme --- tutor_bot.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutor_bot.cfg b/tutor_bot.cfg index 1118150..136bf01 100644 --- a/tutor_bot.cfg +++ b/tutor_bot.cfg @@ -23,8 +23,8 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.fake_model model_name = FakeModel -control_scheme = regression_controls -#teacher_package = TutorialBot.tutorial_bot_output -teacher_package = TutorialBot.atba2_demo_output +control_scheme = regression_everything +teacher_package = TutorialBot.tutorial_bot_output +#teacher_package = TutorialBot.atba2_demo_output batch_size = 1 mini_batch_size = 1 From eba36bd57eb9c5be2f0ad55d0dbc2c313228ff6d Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 18:11:34 -0800 Subject: [PATCH 22/23] changed the config to do 4 layers instead of 2 --- saltie2.cfg | 4 ++-- trainer/configs/copy_trainer.cfg | 5 +++-- trainer/configs/randomised_trainer.cfg | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/saltie2.cfg b/saltie2.cfg index c6df422..03aae05 100644 --- a/saltie2.cfg +++ b/saltie2.cfg @@ -26,8 +26,8 @@ model_name = TutorialModel control_scheme = regression_controls #model_directory = training/data/tutorial_bot_split10-layers/2/trained_variables teacher = replay_files -num_layers = 3 -num_split_layers = 1 +num_layers = 4 +num_split_layers = 2 num_width = 128 is_graphing = True is_evaluating = True diff --git a/trainer/configs/copy_trainer.cfg b/trainer/configs/copy_trainer.cfg index c93fdbe..1de4140 100644 --- a/trainer/configs/copy_trainer.cfg +++ b/trainer/configs/copy_trainer.cfg @@ -18,6 +18,7 @@ mini_batch_size = 5000 num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel -num_layers = 3 -num_split_layers = 1 +num_layers = 4 +num_split_layers = 2 keep_probability = 0.8 +should_regulate = False diff --git a/trainer/configs/randomised_trainer.cfg b/trainer/configs/randomised_trainer.cfg index 74a1178..c9ad424 100644 --- a/trainer/configs/randomised_trainer.cfg +++ b/trainer/configs/randomised_trainer.cfg @@ -1,5 +1,5 @@ [Randomised Trainer Configuration] -total_batches = 2000 +total_batches = 4000 save_step = 10 teacher_package = TutorialBot.tutorial_bot_output #teacher_package = TutorialBot.atba2_demo_output @@ -20,6 +20,7 @@ num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel teacher = replay_files -num_layers = 3 -num_split_layers = 1 +num_layers = 4 +num_split_layers = 2 keep_probability = 0.8 +should_regulate = False From f0e36b7a838af32d3ec5cc36e826481ee8671257 Mon Sep 17 00:00:00 2001 From: dtracers Date: Mon, 22 Jan 2018 18:11:51 -0800 Subject: [PATCH 23/23] tried to make stats make sense for regression models --- trainer/utils/controller_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trainer/utils/controller_statistics.py b/trainer/utils/controller_statistics.py index acfc965..ee6fd2b 100644 --- a/trainer/utils/controller_statistics.py +++ b/trainer/utils/controller_statistics.py @@ -48,7 +48,7 @@ def get_amounts(self, input_array=None, bot_output=None): else: output = self.sess.run(self.controls, feed_dict={self.model_input: input_array}) - accuracy = np.sum(np.isclose(output, bot_output, 0.1), 1) / np.size(output[1]) + accuracy = np.sum(np.isclose(output, bot_output, 0.2), 1) / np.size(output[1]) self.accuracy_over_time.append(accuracy) self.bot_data_over_time.append((output, bot_output))