diff --git a/conversions/input/input_formatter.py b/conversions/input/input_formatter.py index 04ba7ed..0614c0f 100644 --- a/conversions/input/input_formatter.py +++ b/conversions/input/input_formatter.py @@ -77,7 +77,13 @@ def split_teams(self, game_tick_packet): return player_car, team_members, enemies, own_team_score, enemy_team_score def create_result_array(self, array): - return np.array(array, dtype=np.float32) + np_version = np.array(array, dtype=np.float32) + output = np.argwhere(np.isnan(np_version)) + if len(output) > 0: + print('nan indexes', output) + for index in output: + np_version[index[0]] = 0 + return np_version def get_player_goals(self, game_tick_packet, index): return game_tick_packet.gamecars[index].Score.Goals diff --git a/modelHelpers/actions/action_factory.py b/modelHelpers/actions/action_factory.py index 0e3fc37..d55ea59 100644 --- a/modelHelpers/actions/action_factory.py +++ b/modelHelpers/actions/action_factory.py @@ -1,11 +1,34 @@ from modelHelpers.actions.action_handler import ActionHandler -from modelHelpers.actions.dynamic_action_handler import DynamicActionHandler +from modelHelpers.actions.dynamic_action_handler import DynamicActionHandler, LOSS_SQUARE_MEAN, LOSS_SPARSE_CROSS, \ + LOSS_ABSOLUTE_DIFFERENCE from modelHelpers.actions.split_action_handler import SplitActionHandler default_scheme = [[('steer', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))], [('throttle', (-1, 2, 1)), ('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], [('yaw', 'steer')]] +super_split_scheme = [[('throttle', (-1, 1.5, .5)), ('steer', (-1, 1.5, .5)), + ('yaw', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))], + [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], + []] + +only_steer_split_scheme = [[('steer', (-1, 1.5, .5))], + [('throttle', (-1, 2, 1)), ('jump', (0, 2, 1)), ('boost', (0, 2, 1)), + ('handbrake', (0, 2, 1)), ('yaw', (-1, 2, 1)), + ('pitch', (-1, 2, 1)), ('roll', (-1, 2, 1))], + []] + +regression_controls = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN), + ('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN), + ('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN)], + [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], + []] + +mixed_controls = [[('throttle', (-1, 1.5, .5), LOSS_SPARSE_CROSS), ('steer', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), + ('yaw', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), ('pitch', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE), + ('roll', (-1, 1.5, .5), LOSS_ABSOLUTE_DIFFERENCE)], + [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], + []] def get_handler(split_mode=True, control_scheme=default_scheme): """ diff --git a/modelHelpers/actions/action_handler.py b/modelHelpers/actions/action_handler.py index 54b05ee..d43ff0c 100644 --- a/modelHelpers/actions/action_handler.py +++ b/modelHelpers/actions/action_handler.py @@ -92,13 +92,12 @@ def create_controller_from_selection(self, action_selection): def create_tensorflow_controller_from_selection(self, action_selection, batch_size=1, should_stack=True): combo_actions = self.actions indexer = tf.constant(1, dtype=tf.int32) - action_selection = tf.cast(action_selection, tf.int32) if batch_size > 1: multiplier = tf.constant([int(batch_size), 1, 1]) combo_actions = tf.tile(tf.expand_dims(combo_actions, 0), multiplier) indexer = tf.constant(np.arange(0, batch_size, 1), dtype=tf.int32) - button_combo = tf.gather_nd(combo_actions, tf.stack([indexer, action_selection[3]], axis=1)) + button_combo = tf.gather_nd(combo_actions, tf.stack([indexer, tf.cast(action_selection[3], tf.int32)], axis=1)) new_shape = [self.get_logit_size(), batch_size] button_combo = tf.reshape(button_combo, new_shape) controller_option = button_combo @@ -195,17 +194,6 @@ def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_spli """ return split_func(numpy_array) - def get_cross_entropy_with_logits(self, labels, logits, name): - """ - :param tf: - :param labels: - :param logits: - :param name: - :return: - """ - return tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits, name=name + 'ns') - def _find_closet_real_number_graph(self, number): pure_number = tf.round(number * 2.0) / 2.0 comparison = tf.Variable(np.array([-1.0, -0.5, 0.0, 0.5, 1.0]), dtype=tf.float32) @@ -243,3 +231,28 @@ def create_action_indexes_graph(self, real_action, batch_size=None): combo_list.append(bucketed_control) return self._create_combo_index_graph(combo_list) + + def get_action_loss_from_logits(self, logits, labels, index): + """ + :param logits: A tensorflow logit + :param labels: A label of what accured + :param index: The index of the control in the actions list this maps to + :return: The loss for this particular action + """ + return tf.nn.softmax_cross_entropy_with_logits( + labels=labels, logits=logits, name=str(index) + 'ns') + + def get_last_layer_activation_function(self, func, index): + return func + + def scale_layer(self, layer, index): + """ + Scales the layer if required + :param layer: the output layer of the model + :param index: The index regarding this specific action + :return: A scaled layer + """ + return layer + + def get_loss_type(self, index): + return 'softmax' diff --git a/modelHelpers/actions/dynamic_action_handler.py b/modelHelpers/actions/dynamic_action_handler.py index a1f2dc7..bc004bc 100644 --- a/modelHelpers/actions/dynamic_action_handler.py +++ b/modelHelpers/actions/dynamic_action_handler.py @@ -2,21 +2,16 @@ import numpy as np import tensorflow as tf +from tensorflow.python.ops.losses.losses_impl import Reduction from modelHelpers.actions.action_handler import ActionHandler, ActionMap from modelHelpers.actions.split_action_handler import SplitActionHandler COMBO = 'combo' - -super_split_scheme = [[('throttle', (-1, 1.5, .5)), ('steer', (-1, 1.5, .5)), - ('yaw', (-1, 1.5, .5)), ('pitch', (-1, 1.5, .5)), ('roll', (-1, 1.5, .5))], - [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], - []] - - - - +LOSS_SPARSE_CROSS = 'sparse_loss' +LOSS_SQUARE_MEAN = 'square_mean' +LOSS_ABSOLUTE_DIFFERENCE = 'abs_diff' class DynamicActionHandler(SplitActionHandler): @@ -37,6 +32,7 @@ class DynamicActionHandler(SplitActionHandler): combo_name_list = [] dodge_suppressor_list = [['jump'], ['steer', 'pitch', 'roll', 'yaw']] should_suppress_dodge = False + action_loss_type_map = {} def __init__(self, control_scheme): self.control_scheme = control_scheme @@ -55,8 +51,14 @@ def reset(self): self.combo_list = [] self.button_combo = [] self.combo_name_list = [] + self.action_loss_type_map = {} + + def is_classification(self, index): + return self.action_loss_type_map[index] == LOSS_SPARSE_CROSS def create_range_action(self, item): + if len(item) > 2 and (item[2] == LOSS_SQUARE_MEAN or item[2] == LOSS_ABSOLUTE_DIFFERENCE): + return np.array([0]) action_data = np.arange(*item[1]) return action_data @@ -74,6 +76,10 @@ def create_actions(self): action = self.create_range_action(item) self.action_sizes.append(len(action)) self.action_name_index_map[item[0]] = len(self.action_list_names) + if len(item) > 2: + self.action_loss_type_map[len(self.action_list_names)] = item[2] + else: + self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS self.action_list_names.append(item[0]) self.actions.append(action) @@ -91,6 +97,7 @@ def create_actions(self): self.button_combo = list(itertools.product(*self.combo_list)) self.action_sizes.append(len(self.button_combo)) self.action_name_index_map[COMBO] = len(self.action_list_names) + self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS self.action_list_names.append(COMBO) self.actions.append(self.button_combo) @@ -111,9 +118,12 @@ def create_controller_from_selection(self, action_selection): index = self.action_name_index_map[control] if index == COMBO: true_index = self.combo_name_index_map[control] - controller_output.append(self.actions[combo_index][action_selection[combo_index]][true_index]) + controller_output.append(self.actions[combo_index][int(action_selection[combo_index])][true_index]) continue - controller_output.append(self.actions[index][action_selection[index]]) + if self.is_classification(index): + controller_output.append(self.actions[index][int(action_selection[index])]) + else: + controller_output.append(action_selection[index]) # print(controller_output) return controller_output @@ -123,7 +133,6 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si ranged_actions = [] combo_actions = tf.constant(np.transpose(np.array(self.button_combo))) - action_selection = tf.cast(action_selection, tf.int32) # handle ranged actions multiplier = tf.constant([int(batch_size), 1]) @@ -153,12 +162,16 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si true_index = self.combo_name_index_map[control] single_element = combo_actions[true_index] controller_output.append( - tf.gather_nd(single_element, tf.stack([indexer, action_selection[combo_index]], axis=1))) + tf.gather_nd(single_element, + tf.stack([indexer, tf.cast(action_selection[combo_index], tf.int32)], axis=1))) continue - ranged_action = ranged_actions[index] selection = action_selection[index] - output = tf.gather_nd(ranged_action, tf.stack([indexer, selection], axis=1)) - controller_output.append(output) + if self.is_classification(index): + ranged_action = ranged_actions[index] + output = tf.gather_nd(ranged_action, tf.stack([indexer, tf.cast(selection, tf.int32)], axis=1)) + controller_output.append(output) + else: + controller_output.append(selection) # make sure everything is the same type controller_output = [tf.cast(option, tf.float32) for option in controller_output] @@ -193,8 +206,10 @@ def create_action_index(self, real_action): bucketed_control = self.round_action(real_control, action_size) combo_list[real_index] = bucketed_control else: - if indexes[action_index] is None: + if indexes[action_index] is None and self.is_classification(action_index): indexes[action_index] = (self._find_closet_real_number(real_control)) + elif indexes[action_index] is None: + indexes[action_index] = real_control indexes[self.action_name_index_map[COMBO]] = self._create_combo_index(real_action, combo_list) @@ -239,13 +254,48 @@ def create_action_indexes_graph(self, real_action, batch_size=None): bucketed_control = self.round_action_graph(real_control, action_size) combo_list[real_index] = bucketed_control else: - if indexes[action_index] is None: + if indexes[action_index] is None and self.is_classification(action_index): indexes[action_index] = self._find_closet_real_number_graph(real_control) + elif indexes[action_index] is None: + indexes[action_index] = tf.squeeze(real_control, axis=1) + combo_action = self._create_combo_index_graph(combo_list, real_action) - if batch_size is not None and batch_size == 1: - indexes[self.action_name_index_map[COMBO]] = tf.reshape(combo_action, [1]) - else: - indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action) + indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1) result = tf.stack(indexes, axis=1) return result + + def get_action_loss_from_logits(self, logits, labels, index): + """ + :param logits: A tensorflow logit + :param labels: A label of what occurred + :param index: The index of the control in the actions list this maps to + :return: The loss for this particular action + """ + if self.action_loss_type_map[index] == LOSS_SPARSE_CROSS: + return tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=tf.cast(labels, tf.int32), logits=logits, name=LOSS_SPARSE_CROSS) + if self.action_loss_type_map[index] == LOSS_SQUARE_MEAN: + return tf.losses.mean_squared_error(labels, tf.squeeze(logits), reduction=Reduction.NONE) + if self.action_loss_type_map[index] == LOSS_ABSOLUTE_DIFFERENCE: + return tf.losses.absolute_difference(labels, tf.squeeze(logits), reduction=Reduction.NONE) + + def get_last_layer_activation_function(self, func, index): + if self.is_classification(index): + return func + return None + + def scale_layer(self, layer, index): + """ + Scales the layer if required + :param layer: the output layer of the model + :param index: The index regarding this specific action + :return: A scaled layer + """ + if self.is_classification(index): + return layer + else: + return layer # * 2.0 - 1.0 + + def get_loss_type(self, index): + return self.action_loss_type_map[index] diff --git a/modelHelpers/actions/split_action_handler.py b/modelHelpers/actions/split_action_handler.py index 7a8173b..ea3bf42 100644 --- a/modelHelpers/actions/split_action_handler.py +++ b/modelHelpers/actions/split_action_handler.py @@ -248,19 +248,6 @@ def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_spli return result - def get_cross_entropy_with_logits(self, labels, logits, name): - """ - In split mode there can be more than one class at a time. - This is so that - :param tf: - :param labels: - :param logits: - :param name: - :return: - """ - return tf.nn.sigmoid_cross_entropy_with_logits( - labels=tf.cast(labels, tf.float32), logits=logits, name=name+'s') - def create_action_indexes_graph(self, real_action, batch_size=None): #slice each index throttle = tf.slice(real_action, [0, 0], [-1, 1]) diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index fff6e35..e13e27e 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -58,24 +58,33 @@ def load_config_file(self): super().load_config_file() try: self.num_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'num_layers') + 'num_layers') except: print('unable to load num_layers') try: self.network_size = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'num_width') + 'num_width') except: print('unable to load the width of each layer') try: self.forced_frame_action = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'exploration_factor') + 'exploration_factor') except: print('unable to load exploration_factor') - def smart_argmax(self, input_tensor): + try: + self.keep_prob = self.config_file.getfloat(base_model.MODEL_CONFIGURATION_HEADER, + 'keep_probability') + except: + print('unable to load keep_probability') + + def smart_argmax(self, input_tensor, index): + if not self.action_handler.is_classification(index): + # input_tensor = tf.Print(input_tensor, [input_tensor], str(index)) + return tf.squeeze(input_tensor, axis=1) argmax_index = tf.cast(tf.argmax(input_tensor, axis=1), tf.int32) indexer = tf.range(0, self.mini_batch_size) slicer_data = tf.stack([indexer, argmax_index], axis=1) @@ -124,7 +133,8 @@ def _create_model(self, model_input): lambda input_tensor: tf.argmax( tf.nn.softmax(input_tensor), axis=1), return_as_list=True) - self.smart_max = self.action_handler.run_func_on_split_tensors(self.policy_outputs, + indexes = np.arange(0, self.action_handler.get_number_actions(), 1).tolist() + self.smart_max = self.action_handler.run_func_on_split_tensors([self.policy_outputs, indexes], self.smart_argmax, return_as_list=True) return self.predicted_actions, self.action_scores @@ -142,7 +152,7 @@ def create_copy_training_model(self, model_input=None, taken_actions=None): batched_input, batched_taken_actions = self.iterator.get_next() else: batched_input = converted_input - batched_taken_actions = self.taken_actions + batched_taken_actions = actions_input with tf.name_scope("training_network"): self.discounted_rewards = tf.constant(0.0) with tf.variable_scope("actor_network", reuse=True): @@ -214,7 +224,10 @@ def create_layer(self, activation_function, input, layer_number, input_size, out initializer=tf.random_normal_initializer()) b = tf.get_variable(bias_name, [output_size], initializer=tf.random_normal_initializer()) - layer_output = activation_function(tf.matmul(input, W) + b) + if activation_function is not None: + layer_output = activation_function(tf.matmul(input, W) + b) + else: + layer_output = tf.matmul(input, W) + b if variable_list is not None: variable_list.append(W) variable_list.append(b) @@ -301,10 +314,15 @@ def create_last_layer(self, activation_function, inner_layer, network_size, num_ self.actor_last_row_layer = [] for i, item in enumerate(self.action_handler.get_action_sizes()): - with tf.variable_scope(str(self.action_handler.action_list_names[i])): - self.actor_last_row_layer.append(self.create_layer(activation_function, inner_layer[i], last_layer_name, + variable_name = str(self.action_handler.action_list_names[i]) + with tf.variable_scope(variable_name): + fixed_activation = self.action_handler.get_last_layer_activation_function(activation_function, i) + layer = self.create_layer(fixed_activation, inner_layer[i], last_layer_name, network_size, item, network_prefix, - variable_list=last_layer_list[i], dropout=False)[0]) + variable_list=last_layer_list[i], dropout=False)[0] + scaled_layer = self.action_handler.scale_layer(layer, i) + self.actor_last_row_layer.append(scaled_layer) + # tf.summary.histogram(variable_name + '_output', scaled_layer) return tf.concat(self.actor_last_row_layer, 1) diff --git a/models/actor_critic/policy_gradient.py b/models/actor_critic/policy_gradient.py index a672049..a30d859 100644 --- a/models/actor_critic/policy_gradient.py +++ b/models/actor_critic/policy_gradient.py @@ -1,9 +1,10 @@ +import numpy as np import tensorflow as tf +import math from models import base_model from models.actor_critic.base_actor_critic import BaseActorCritic from modelHelpers import tensorflow_reward_manager -import numpy as np class PolicyGradient(BaseActorCritic): @@ -84,11 +85,13 @@ def create_actor_gradients(self, logprobs, taken_actions): merged_gradient_list += item[0] total_loss += item[1] + total_loss = tf.check_numerics(total_loss, 'actor loss') + tf.summary.scalar("total_actor_loss", tf.reduce_mean(total_loss)) total_loss = total_loss / self.total_loss_divider - total_loss += actor_reg_loss + # total_loss += actor_reg_loss # total_loss = tf.Print(total_loss, [total_loss], 'total_loss') @@ -111,23 +114,26 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac taken_actions = tf.squeeze(taken_actions, axis=[1]) # calculates the entropy loss from getting the label wrong - cross_entropy_loss, wrongNess, reduced = self.calculate_loss_of_actor(logprobs, taken_actions, index) + cross_entropy_loss, wrongness, reduced = self.calculate_loss_of_actor(logprobs, taken_actions, index) if not reduced: - tf.summary.histogram('actor_wrongness', wrongNess) + if self.action_handler.is_classification(index): + tf.summary.histogram('actor_wrongness', wrongness) + else: + tf.summary.histogram('actor_wrongness', cross_entropy_loss) with tf.name_scope("compute_pg_gradients"): - pg_loss = cross_entropy_loss * (wrongNess * wrongNess) + actor_loss = cross_entropy_loss * (wrongness * wrongness) - pg_loss = tf.check_numerics(pg_loss, 'nan pg_loss') + actor_loss = tf.check_numerics(actor_loss, 'nan pg_loss') if reduced: - pg_loss = tf.reduce_mean(pg_loss, name='pg_loss') - tf.summary.scalar("actor_x_entropy_loss", cross_entropy_loss) + actor_loss = tf.reduce_mean(actor_loss, name='pg_loss') + tf.summary.scalar(self.action_handler.get_loss_type(index), cross_entropy_loss) else: - tf.summary.scalar("actor_x_entropy_loss", tf.reduce_mean(cross_entropy_loss)) + tf.summary.scalar(self.action_handler.get_loss_type(index), tf.reduce_mean(cross_entropy_loss)) actor_reg_loss = self.get_regularization_loss(actor_network_variables, prefix="actor") - actor_loss = pg_loss + actor_reg_loss * self.reg_param + actor_loss = actor_loss + actor_reg_loss * self.reg_param # compute actor gradients actor_gradients = self.optimizer.compute_gradients(actor_loss, @@ -176,7 +182,9 @@ def _compute_training_op(self, actor_gradients, critic_gradients): for i, (grad, var) in enumerate(gradients): # clip gradients by norm if grad is not None: - gradients[i] = (tf.clip_by_norm(grad, self.max_gradient), var) + post_clipping = tf.clip_by_norm(grad, self.max_gradient) + post_nanning = tf.where(tf.is_nan(post_clipping), tf.zeros_like(post_clipping), post_clipping) + gradients[i] = (post_nanning, var) self.add_histograms(gradients) # training update @@ -198,9 +206,10 @@ def run_train_step(self, calculate_summaries, input_states, actions, rewards=Non if self.batch_size > self.mini_batch_size: self.sess.run([self.input, self.taken_actions, self.iterator.initializer], feed_dict={self.input_placeholder: input_states, self.taken_actions_placeholder: actions}) - + num_batches = math.ceil(float(self.batch_size) / float(self.mini_batch_size)) + # print('num batches', num_batches) counter = 0 - while True: + while counter < num_batches: try: result, summary_str = self.sess.run([ self.train_op, @@ -242,6 +251,5 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): :param cross_entropy_loss: :return: The calculated_tensor, If the result is a scalar. """ - return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logprobs, - labels=taken_actions), 1.0, True + return self.action_handler.get_action_loss_from_logits(logprobs, taken_actions, index), 1.0, True diff --git a/models/actor_critic/tutorial_model.py b/models/actor_critic/tutorial_model.py index ea2d1ed..54468de 100644 --- a/models/actor_critic/tutorial_model.py +++ b/models/actor_critic/tutorial_model.py @@ -12,6 +12,8 @@ class TutorialModel(PolicyGradient): gated_layer_name = "gated_layer" max_gradient = 10.0 total_loss_divider = 2.0 + # hidden_layer_activation = tf.nn.relu6 + # hidden_layer_activation = tf.tanh def __init__(self, session, state_dim, num_actions, player_index=-1, action_handler=None, is_training=False, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, @@ -83,18 +85,24 @@ def body(n, counter): def calculate_loss_of_actor(self, logprobs, taken_actions, index): cross_entropy_loss, initial_wrongness, __ = super().calculate_loss_of_actor(logprobs, taken_actions, index) - wrongNess = tf.constant(initial_wrongness) + wrongness = tf.constant(initial_wrongness) argmax = tf.argmax(logprobs, axis=1) if self.action_handler.action_list_names[index] != 'combo': - wrongNess += tf.cast(tf.abs(tf.cast(argmax, tf.int32) - taken_actions), tf.float32) + if self.action_handler.is_classification(index): + wrongness += tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32) + else: + # doing anything else is very very slow + wrongness += 0.0 else: # use temporarily - wrongNess += tf.cast(tf.abs(tf.cast(argmax, tf.int32) - taken_actions), tf.float32) / 2.0 + wrongness += tf.log(1.0 + tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32)) #argmax = self.argmax[index] - #number = tf.bitwise.bitwise_xor(tf.cast(self.argmax[index], tf.int32), taken_actions) + + #wrongness += tf.log(1.0 + tf.cast(tf.bitwise.bitwise_xor( + # tf.cast(self.argmax[index], tf.int32), taken_actions), tf.float32)) # result = self.fancy_calculate_number_of_ones(number) # can't use until version 1.5 - return cross_entropy_loss, wrongNess, False + return cross_entropy_loss, wrongness, False def create_gated_layer(self, inner_layer, input_state, layer_number, network_size, network_prefix, variable_list=None, scope=None): with tf.variable_scope(self.gated_layer_name): diff --git a/models/atbas/nnatba.py b/models/atbas/nnatba.py index b24ebca..f1015ed 100644 --- a/models/atbas/nnatba.py +++ b/models/atbas/nnatba.py @@ -52,11 +52,11 @@ def encoder(self, input): layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, self.weights['out']), self.biases['out'])) return layer_3 - def create_copy_training_model(self): + def create_copy_training_model(self, model_input=None, taken_actions=None): self.labels = tf.placeholder(tf.int64, shape=(None, self.num_actions)) - cross_entropy = self.action_handler.get_cross_entropy_with_logits( - labels=self.labels, logits=self.logits, name='xentropy') + cross_entropy = self.action_handler.get_action_loss_from_logits( + labels=self.labels, logits=self.logits, index=0) loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') self.train_op = self.optimizer.minimize(loss) diff --git a/models/base_model.py b/models/base_model.py index c1d4166..c20c601 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -109,7 +109,7 @@ def sample_action(self, input_state): #always return an integer return 10 - def create_copy_training_model(self): + def create_copy_training_model(self, model_input=None, taken_actions=None): """ Creates a model used for training a bot that will copy the labeled data diff --git a/models/base_reinforcement.py b/models/base_reinforcement.py index c1f1d96..32eb909 100644 --- a/models/base_reinforcement.py +++ b/models/base_reinforcement.py @@ -87,11 +87,11 @@ def _create_variables(self): # reinforcement variables with tf.name_scope("compute_pg_gradients"): if self.action_handler.is_split_mode(): - self.taken_actions_placeholder = tf.placeholder(tf.int32, + self.taken_actions_placeholder = tf.placeholder(tf.float32, (None, self.action_handler.get_number_actions()), name="taken_actions_phd") else: - self.taken_actions_placeholder = tf.placeholder(tf.int32, (None,), name="taken_actions_phd") + self.taken_actions_placeholder = tf.placeholder(tf.float32, (None,), name="taken_actions_phd") self.taken_actions = self.taken_actions_placeholder self.input_rewards = self.create_reward() return {} diff --git a/models/fake_model.py b/models/fake_model.py index 6c775c3..76734cb 100644 --- a/models/fake_model.py +++ b/models/fake_model.py @@ -28,8 +28,6 @@ def load_config_file(self): def sample_action(self, input_state): result = self.sess.run(self.actions, feed_dict={self.input_placeholder: input_state})[0] - # print(result) - result = [int(x) for x in result] return result def get_input(self, model_input=None): diff --git a/rlbot.cfg b/rlbot.cfg index 854ef82..8b47735 100644 --- a/rlbot.cfg +++ b/rlbot.cfg @@ -3,13 +3,13 @@ username = unknown [RLBot Configuration] # Number of bots/players which will be spawned. We support up to max 10. -num_participants = 6 +num_participants = 2 [Participant Configuration] # Put the name of your bot config file here. Only total_num_participants config files will be read! # Everything needs a config, even players and default bots. We still set loadouts and names from config! -participant_config_0 = tutor_bot.cfg -participant_config_1 = tutor_bot.cfg +participant_config_0 = saltie2.cfg +participant_config_1 = saltie2.cfg participant_config_2 = tutor_bot.cfg participant_config_3 = tutor_bot.cfg participant_config_4 = tutor_bot.cfg diff --git a/saltie.py b/saltie.py index d7c76cd..a84e123 100644 --- a/saltie.py +++ b/saltie.py @@ -26,6 +26,7 @@ class Agent: previous_owngoals = 0 is_online_training = False is_graphing = True + control_scheme = None def __init__(self, name, team, index, config_file=None): self.last_frame_time = None @@ -39,7 +40,7 @@ def __init__(self, name, team, index, config_file=None): ) self.sess = tf.Session(config=config) # self.sess = tf.Session() - self.actions_handler = action_factory.get_handler(control_scheme=dynamic_action_handler.super_split_scheme) + self.actions_handler = action_factory.get_handler(control_scheme=self.control_scheme) self.state_dim = input_formatter.get_state_dim() self.num_actions = self.actions_handler.get_logit_size() print('num_actions', self.num_actions) @@ -62,7 +63,7 @@ def __init__(self, name, team, index, config_file=None): self.model.is_online_training = self.is_online_training - # self.model.apply_feature_creation(TensorflowFeatureCreator()) + self.model.apply_feature_creation(TensorflowFeatureCreator()) try: self.model.create_model(self.model.input_placeholder) @@ -95,10 +96,15 @@ def load_config_file(self): self.is_online_training = self.config_file.getboolean(MODEL_CONFIGURATION_HEADER, 'train_online') except: print('not training online') + try: + control_scheme = self.config_file.get(MODEL_CONFIGURATION_HEADER, 'control_scheme') + except Exception as e: + control_scheme = 'default_scheme' print('getting model from', model_package) print('name of model', model_name) self.model_class = self.get_class(model_package, model_name) + self.control_scheme = self.get_field('modelHelpers.actions.action_factory', control_scheme) def get_class(self, class_package, class_name): class_package = importlib.import_module(class_package) @@ -108,6 +114,13 @@ def get_class(self, class_package, class_name): return class_group[1] return None + def get_field(self, class_package, class_name): + class_package = importlib.import_module(class_package) + module_classes = inspect.getmembers(class_package) + for class_group in module_classes: + if class_group[0] == class_name: + return class_group[1] + return None def get_model_class(self): if self.model_class is None: diff --git a/saltie2.cfg b/saltie2.cfg index 46a0923..38c5d6e 100644 --- a/saltie2.cfg +++ b/saltie2.cfg @@ -23,7 +23,8 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.actor_critic.tutorial_model model_name = TutorialModel -teacher = replay_files +teacher = tutorial_bot_output +control_scheme = regression_controls #model_directory = training/data/tutorial_bot_split10-layers/2/trained_variables num_layers = 5 num_split_layers = 2 diff --git a/trainer/base_classes/base_trainer.py b/trainer/base_classes/base_trainer.py index 769a59e..a1e7c14 100644 --- a/trainer/base_classes/base_trainer.py +++ b/trainer/base_classes/base_trainer.py @@ -27,6 +27,14 @@ def get_class(self, class_package, class_name): return class_group[1] return None + def get_field(self, class_package, class_name): + class_package = importlib.import_module(class_package) + module_classes = inspect.getmembers(class_package) + for class_group in module_classes: + if class_group[0] == class_name: + return class_group[1] + return None + def get_config_name(self): return None diff --git a/trainer/base_classes/default_model_trainer.py b/trainer/base_classes/default_model_trainer.py index 79dfbc1..8b0b217 100644 --- a/trainer/base_classes/default_model_trainer.py +++ b/trainer/base_classes/default_model_trainer.py @@ -9,6 +9,7 @@ class DefaultModelTrainer(BaseTrainer): OPTIMIZER_CONFIG_HEADER = 'Optimizer Config' + MISC_CONFIG_HEADER = 'Misc Config' action_handler = None sess = None input_formatter = None @@ -16,6 +17,7 @@ class DefaultModelTrainer(BaseTrainer): learning_rate = None should_apply_features = None feature_creator = None + control_scheme = 'default_scheme' def load_config(self): super().load_config() @@ -28,9 +30,14 @@ def load_config(self): self.should_apply_features = config.getboolean(self.OPTIMIZER_CONFIG_HEADER, 'should_apply_features') except Exception as e: self.should_apply_features = False + try: + self.control_scheme = config.get(self.MISC_CONFIG_HEADER, 'control_scheme') + except Exception as e: + self.control_scheme = 'default_scheme' def setup_trainer(self): - self.action_handler = action_factory.get_handler(control_scheme=dynamic_action_handler.super_split_scheme) + controls = self.get_field('modelHelpers.actions.action_factory', self.control_scheme) + self.action_handler = action_factory.get_handler(control_scheme=controls) self.sess = tf.Session() if self.should_apply_features: self.feature_creator = TensorflowFeatureCreator() diff --git a/trainer/configs/copy_trainer.cfg b/trainer/configs/copy_trainer.cfg index fb9a9c1..88e69d3 100644 --- a/trainer/configs/copy_trainer.cfg +++ b/trainer/configs/copy_trainer.cfg @@ -5,11 +5,18 @@ download_files = False [Optimizer Config] should_apply_features = True +[Copy Configuration] +should_shuffle = True + +[Misc Config] +control_scheme = regression_controls + [Model Configuration] batch_size = 20000 -mini_batch_size = 1000 -num_width = 128 +mini_batch_size = 500 +num_width = 256 model_package = models.actor_critic.tutorial_model model_name = TutorialModel num_layers = 5 num_split_layers = 2 +keep_probability = 0.7 diff --git a/trainer/configs/randomised_trainer.cfg b/trainer/configs/randomised_trainer.cfg index 128c504..6b97622 100644 --- a/trainer/configs/randomised_trainer.cfg +++ b/trainer/configs/randomised_trainer.cfg @@ -1,11 +1,16 @@ [Randomised Trainer Configuration] total_batches = 10000 -save_step = 20000000 +save_step = 2000000 teacher_package = TutorialBot.tutorial_bot_output [Optimizer Config] should_apply_features = True +[Misc Config] +control_scheme = regression_controls +#control_scheme = mixed_controls +# control_scheme = super_split_scheme + [Model Configuration] batch_size = 5000 mini_batch_size = 5000 @@ -14,3 +19,4 @@ model_package = models.actor_critic.tutorial_model model_name = TutorialModel num_layers = 5 num_split_layers = 2 +keep_probability = 0.6 diff --git a/trainer/copy_trainer.py b/trainer/copy_trainer.py index 3346de9..72bf13d 100644 --- a/trainer/copy_trainer.py +++ b/trainer/copy_trainer.py @@ -3,11 +3,13 @@ from conversions.input.input_formatter import get_state_dim from trainer.base_classes.default_model_trainer import DefaultModelTrainer from trainer.base_classes.download_trainer import DownloadTrainer +from trainer.utils import controller_statistics from trainer.utils.trainer_runner import run_trainer class CopyTrainer(DownloadTrainer, DefaultModelTrainer): + should_shuffle = False file_number = 0 epoch = 0 @@ -17,9 +19,19 @@ class CopyTrainer(DownloadTrainer, DefaultModelTrainer): input_game_tick = [] input_batch = [] label_batch = [] + eval_file = False + eval_number = 30 + controller_stats = None + action_length = None def load_config(self): super().load_config() + config = super().create_config() + try: + self.should_shuffle = config.getboolean(self.DOWNLOAD_TRAINER_CONFIGURATION_HEADER, + 'download_files') + except Exception as e: + self.should_shuffle = True def get_config_name(self): return 'copy_trainer.cfg' @@ -39,21 +51,40 @@ def setup_model(self): self.model.create_copy_training_model() self.model.create_savers() self.model.initialize_model() + self.controller_stats = controller_statistics.OutputChecks(self.sess, self.action_handler, + self.batch_size, self.model.smart_max, + model_placeholder=self.model.input_placeholder) + self.controller_stats.create_model() def start_new_file(self): - self.file_number += 1 + self.input_batch = [] self.label_batch = [] self.input_game_tick = [] + if self.file_number % self.eval_number == 0: + self.eval_file = True + self.action_length = self.action_handler.control_size + else: + self.eval_file = False + self.action_length = self.action_handler.get_number_actions() + self.file_number += 1 def add_pair(self, input_array, output_array): self.input_batch.append(input_array) - label = self.action_handler.create_action_index(output_array) + if self.eval_file: + label = output_array + else: + label = self.action_handler.create_action_index(output_array) # print(output_array) # print(label) self.label_batch.append(label) + def unison_shuffled_copies(self, a, b): + assert len(a) == len(b) + p = np.random.permutation(len(a)) + return a[p], b[p] + def process_pair(self, input_array, output_array, pair_number, file_version): self.add_pair(input_array, output_array) if len(self.input_batch) == self.batch_size: @@ -71,16 +102,25 @@ def batch_process(self): self.input_batch = np.array(self.input_batch) self.input_batch = self.input_batch.reshape(input_length, get_state_dim()) + output = np.argwhere(np.isnan(self.input_batch)) + if len(output) > 0: + print('nan indexes', output) + for index in output: + self.input_batch[index[0]][index[1]] = 0 + self.label_batch = np.array(self.label_batch) - self.label_batch = self.label_batch.reshape(input_length, self.action_handler.get_number_actions()) + self.label_batch = self.label_batch.reshape(input_length, self.action_length) print(input_length) + if self.should_shuffle: + self.input_batch, self.label_batch = self.unison_shuffled_copies(self.input_batch, self.label_batch) + + if self.eval_file: + self.controller_stats.get_amounts(input_array=self.input_batch, bot_output=np.transpose(self.label_batch)) + else: + self.model.run_train_step(True, self.input_batch, self.label_batch) - self.model.run_train_step(True, self.input_batch, self.label_batch) - # Display logs per step - if self.epoch % self.display_step == 0: - print('has run on x values', self.batch_size) self.epoch += 1 def end_file(self): diff --git a/trainer/random_packet_trainer.py b/trainer/random_packet_trainer.py index 071962f..6b1f4ff 100644 --- a/trainer/random_packet_trainer.py +++ b/trainer/random_packet_trainer.py @@ -59,10 +59,8 @@ def setup_model(self): real_output = output_creator.get_output_vector(game_tick_packet) real_indexes = self.action_handler.create_action_indexes_graph(tf.stack(real_output, axis=1)) - reshaped = tf.cast(real_indexes, tf.int32) - self.model.taken_actions = reshaped self.model.create_model(input_state) - self.model.create_copy_training_model(input_state) + self.model.create_copy_training_model(model_input=input_state, taken_actions=real_indexes) self.model.create_savers() self.model.initialize_model() @@ -70,8 +68,10 @@ def setup_model(self): self.model.printParameters() # Initialising statistics and printing them before training - self.controller_stats = controller_statistics.OutputChecks(self.batch_size, self.model.argmax, game_tick_packet, - input_state, self.sess, self.action_handler, output_creator) + self.controller_stats = controller_statistics.OutputChecks(self.sess, self.action_handler, + self.batch_size, self.model.smart_max, + game_tick_packet=game_tick_packet, + bot=output_creator) self.controller_stats.create_model() def _run_trainer(self): diff --git a/trainer/utils/controller_statistics.py b/trainer/utils/controller_statistics.py index d76a784..85e537d 100644 --- a/trainer/utils/controller_statistics.py +++ b/trainer/utils/controller_statistics.py @@ -8,34 +8,45 @@ class OutputChecks: game_tick_packet = None accuracy_over_time = None bot_data_over_time = None + requires_input = False requires_output = False + controls = None - def __init__(self, packets, model_output, game_tick_packet, input_array, tf_session, action_handler, - bot=None): + def __init__(self, tf_session, action_handler, batch_size, model_output, + game_tick_packet=None, + bot=None, + model_placeholder=None): self.sess = tf_session - self.packets = packets + self.batch_size = batch_size self.game_tick_packet = game_tick_packet - self.input_array = input_array - self.packet_generator = random_packet_creator.TensorflowPacketGenerator(packets) self.tutorial_bot = bot self.model_output = model_output + self.model_input = model_placeholder self.actionHandler = action_handler if self.tutorial_bot is None: self.requires_output = True + if self.model_input is not None: + self.requires_input = True + def create_model(self): # clear history self.accuracy_over_time = [] self.bot_data_over_time = [] + self.controls = tf.transpose( + self.actionHandler.create_tensorflow_controller_from_selection(self.model_output, + self.batch_size)) + + def get_amounts(self, input_array=None, bot_output=None): - def get_amounts(self, bot_output=None): - controls = tf.transpose( - self.actionHandler.create_tensorflow_controller_from_selection(self.model_output, self.packets)) if not self.requires_output: bot_output = self.sess.run(self.tutorial_bot.get_output_vector(self.game_tick_packet)) - output = self.sess.run(controls) + if not self.requires_input: + output = self.sess.run(self.controls) + else: + output = self.sess.run(self.controls, feed_dict={self.model_input: input_array}) accuracy = np.sum(np.isclose(output, bot_output, 0.01), 1) / np.size(output[1]) self.accuracy_over_time.append(accuracy) diff --git a/trainer/utils/visualise_net.py b/trainer/utils/visualise_net.py new file mode 100644 index 0000000..20bb4ae --- /dev/null +++ b/trainer/utils/visualise_net.py @@ -0,0 +1,259 @@ +from tkinter import * +import numpy as np +import ast + +# Some values useful for editing how the net gets shown +x_spacing = 100 +y_spacing = 50 +circle_dia = 30 + + +class Visualiser: + gui = None # The window + relu = None # Whether activations are through relu + highrelu = 20 # The + bigweight = 30 + layer_activations = None # The values for the activations + scale = 1.0 # The current scale of the canvas + delta = 0.75 # The impact of scrolling + biggestarraylen = 0 # For aligning all the layers + eFrame = None # The frame with the customisation + iFrame = None # The frame with the info + cFrame = None # The frame with the canvas + canvas = None # The canvas showing the net + rotate_canvas = False # Should the canvas be rotated + + info_text_neuron = None # The info about the last neuron hovered over + info_text_line = None # The info about the last line (connection) hovered over + + input_array = None # The StringVar storing the array used when hitting generate + input_relu = None # The StringVar storing the array used for the relu adaption + relu_number = None # The IntVar storing the spinbox value + + + def __init__(self, inp=None): + # Initialising the window + self.gui = Tk() + self.gui.geometry('600x600') + self.gui.title("Net visualisation") + + # Initialising all variables + self.highrelu = 20 + self.relu = [True, True, True, True, False] # Is the layer using relu + self.bigweight = 30 + self.layer_activations = inp + # del inp (Is it necessary? Might kill the original array as well, creating problems over there) + self.rotate_canvas = False + self.last_layer = list() + self.scale = 1.0 + self.delta = 0.75 + self.biggestarraylen = 0 + for item in self.layer_activations: + if len(item) > self.biggestarraylen: + self.biggestarraylen = len(item) + + # Initialising the frames + self.eFrame = Frame(self.gui) + self.eFrame.grid(row=0, column=0) + self.iFrame = Frame(self.gui) + self.iFrame.grid(row=1, column=0, sticky='nw') + self.cFrame = Frame(self.gui, bd=1, relief=SUNKEN) + self.cFrame.grid(row=0, column=1, sticky='nsew', rowspan=2) + + self.config_options() + + self.canvas_stuff() + self.edit_stuff() + self.info_stuff() + mainloop() + + def edit_stuff(self): + self.input_array = StringVar() + input_array_field = Entry(self.eFrame, textvariable=self.input_array) + input_array_field.bind('', lambda event: self.change_input()) + input_array_field.grid(row=0, column=0) + input_array_button = Button(self.eFrame, command=self.change_input, text="Generate") + input_array_button.grid(row=0, column=1) + + self.input_relu = StringVar() + input_relu_field = Entry(self.eFrame, textvariable=self.input_relu) + input_relu_field.bind('', lambda event: self.change_relu()) + input_relu_field.grid(row=1, column=0) + input_relu_button = Button(self.eFrame, command=self.change_relu, text="Edit relu") + input_relu_button.grid(row=1, column=1) + + self.relu_number = IntVar() + self.relu_number.set(20) + relu_spin_box = Spinbox(self.eFrame, from_=1, to=1000, width=5, textvariable=self.relu_number) + relu_spin_box.bind('', lambda event: self.change_relu_factor()) + relu_spin_box.grid(row=2, column=0) + relu_button = Button(self.eFrame, command=self.change_relu_factor, text="Change high relu") + relu_button.grid(row=2, column=1) + + rotate = Button(self.eFrame, command=self.rotate_and_refresh, text="Rotate") + rotate.grid(row=3, column=0) + + def info_stuff(self): + self.info_text_neuron = StringVar() + self.info_text_neuron.set("Layer: ?\nNeuron: ?\nActivation type: ?\nActivation: ?") + activation_label = Label(self.iFrame, textvariable=self.info_text_neuron, justify=LEFT) + activation_label.grid(row=0, column=0, sticky='w') + + self.info_text_line = StringVar() + self.info_text_line.set("From:\nLayer: ?\nNeuron: ?\nTo:\nLayer: ?\nNeuron: ?") + activation_label = Label(self.iFrame, textvariable=self.info_text_line, justify=LEFT) + activation_label.grid(row=1, column=0, sticky='w') + + def canvas_stuff(self): + # Create canvas including the scrollbars + class AutoScrollbar(Scrollbar): + def set(self, lo, hi): + if float(lo) <= 0.0 and float(hi) >= 1.0: + self.grid_remove() + else: + self.grid() + Scrollbar.set(self, lo, hi) + + def wheel(event): + scale = 1.0 + # Respond to Linux (event.num) or Windows (event.delta) wheel event + if event.num == 5 or event.delta == -120: + scale *= self.delta + self.scale *= self.delta + if event.num == 4 or event.delta == 120: + scale /= self.delta + self.scale /= self.delta + # Rescale all canvas objects + x = self.canvas.canvasx(event.x) + y = self.canvas.canvasy(event.y) + self.canvas.scale('all', x, y, scale, scale) + self.canvas.configure(scrollregion=self.canvas.bbox('all')) + + vbar = AutoScrollbar(self.cFrame, orient='vertical') + hbar = AutoScrollbar(self.cFrame, orient='horizontal') + vbar.grid(row=0, column=1, sticky='ns') + hbar.grid(row=1, column=0, sticky='we') + self.canvas = Canvas(self.cFrame, xscrollcommand=hbar.set, yscrollcommand=vbar.set) + self.canvas.grid(row=0, column=0, sticky='nsew') + vbar.configure(command=self.canvas.yview) # bind scrollbars to the canvas + hbar.configure(command=self.canvas.xview) + + # Bind events to the Canvas + self.canvas.bind('', lambda event: self.canvas.scan_mark(event.x, event.y)) + self.canvas.bind('', lambda event: self.canvas.scan_dragto(event.x, event.y, gain=1)) + self.canvas.bind('', wheel) + self.canvas.configure(scrollregion=self.canvas.bbox('all')) + + # Generate the canvas itself + if self.layer_activations is not None: + for i in range(len(self.layer_activations)): + self.create_layer(i) + + def create_circle(self, x0, y0, activation, relu, layer, neuron): + if self.rotate_canvas: + x0, y0 = y0, x0 + if relu: + activation = activation if activation <= self.highrelu else self.highrelu + rgb = int(-1 * (activation - self.highrelu) * 255 / self.highrelu) + else: + activation = activation if activation <= 1 else 1 + rgb = int(-1 * (activation - 1) * 255) + hex_color = "#{:02x}{:02x}{:02x}".format(rgb, rgb, rgb) + tag = str(layer) + ";" + str(neuron) + self.canvas.create_oval(x0, y0, x0 + circle_dia, y0 + circle_dia, fill=hex_color, tags=tag) + + def handler(event, la=layer, ne=neuron): + self.info_text_neuron.set("Layer: " + str(la) + "\nNeuron: " + str(ne) + "\nActivation type: " + ( + "Relu" if self.relu[layer] else "Sigmoid") + "\nActivation: " + str( + self.layer_activations[layer][neuron])) + + self.canvas.tag_bind(tag, "", handler) + + def create_line(self, x0, y0, x1, y1, layer0, neuron0, layer1, neuron1): + if self.rotate_canvas: + x0, y0, x1, y1 = y0, x0, y1, x1 + half = .5 * circle_dia + + weight = self.obtain_weight() + r, g, b = 0, 0, 0 + if weight >= 0: + weight = weight if weight <= self.bigweight else self.bigweight + r = int(-1 * (weight - self.bigweight) * 255 / self.bigweight) + else: + weight = weight if weight >= (-self.bigweight) else (-self.bigweight) + b = int((weight + self.bigweight) * 255 / self.bigweight) + hex_color = "#{:02x}{:02x}{:02x}".format(r, g, b) + + tag = str(layer0) + ";" + str(neuron0) + ";" + str(layer1) + ";" + str(neuron1) + self.canvas.create_line(x0 + half, y0 + half, x1 + half, y1 + half, fill=hex_color, tags=tag) + + def handler(event, l0=layer0, n0=neuron0, l1=layer1, n1=neuron1): + self.info_text_line.set( + "From:\nLayer: " + str(l0) + "\nNeuron: " + str(n0) + "\nTo:\nLayer: " + str(l1) + "\nNeuron: " + str( + n1)) + + self.canvas.tag_bind(tag, "", handler) + self.canvas.tag_lower(tag) + + + def obtain_weight(self): + return np.random.randint(-30, 30) + + def create_layer(self, layer): + activations = self.layer_activations[layer] + x = layer * x_spacing + y = (self.biggestarraylen - len(activations)) * y_spacing * .5 + this_layer = list() + neuron = 0 + for i in activations: + this_layer.append([x, y]) + if layer != 0: + nn = 0 + for n in self.last_layer: + self.create_line(n[0], n[1], x, y, layer - 1, nn, layer, neuron) + nn += 1 + self.create_circle(x, y, i, self.relu[layer], layer, neuron) + y += y_spacing + neuron += 1 + self.last_layer = this_layer + + def refresh_canvas(self): + self.canvas.scale('all', 0, 0, 1, 1) + self.scale = 1 + self.canvas.delete('all') + for i in range(len(self.layer_activations)): + self.create_layer(i) + + def rotate_and_refresh(self): + self.rotate_canvas = not self.rotate_canvas + self.refresh_canvas() + + def change_relu_factor(self): + self.highrelu = self.relu_number.get() + self.refresh_canvas() + + def change_relu(self): + if self.input_relu.get(): + try: + self.relu = ast.literal_eval(self.input_relu.get()) + self.refresh_canvas() + except Exception: + pass + + def change_input(self): + if self.input_array.get(): + try: + self.layer_activations = ast.literal_eval(self.input_array.get()) + self.refresh_canvas() + except Exception: + pass + + def config_options(self): + # Make the canvas expandable + self.gui.grid_rowconfigure(0, weight=1) + self.gui.grid_rowconfigure(1, weight=1) + self.gui.grid_columnconfigure(1, weight=1) + self.cFrame.grid_rowconfigure(0, weight=1) + self.cFrame.grid_columnconfigure(0, weight=1) + + self.gui.grid_columnconfigure(0, minsize=100) diff --git a/tutor_bot.cfg b/tutor_bot.cfg index 11167d9..1118150 100644 --- a/tutor_bot.cfg +++ b/tutor_bot.cfg @@ -23,7 +23,8 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.fake_model model_name = FakeModel -teacher_package = TutorialBot.tutorial_bot_output -# teacher_package = TutorialBot.atba2_demo_output +control_scheme = regression_controls +#teacher_package = TutorialBot.tutorial_bot_output +teacher_package = TutorialBot.atba2_demo_output batch_size = 1 mini_batch_size = 1