Merge pull request #52 from RLBots/copy_trainer

Added keras support to the model. Also added some ways to visualize the models Fixed a ton of bugs
SaltieRL · Jan 23, 2018 · 07a523c · 07a523c
2 parents 62b6a0d + afb2697
commit 07a523c
Show file tree

Hide file tree

Showing 36 changed files with 1,209 additions and 533 deletions.
diff --git a/TutorialBot/tutorial_bot_output.py b/TutorialBot/tutorial_bot_output.py
@@ -5,7 +5,7 @@
 class TutorialBotOutput:
     # Constants
     distance_from_ball_to_go_fast = tf.constant(600.0)
-    distance_from_ball_to_boost = tf.constant(1500.0)  # Minimum distance to ball for using boost
+    distance_from_ball_to_boost = tf.constant(2000.0)  # Minimum distance to ball for using boost
     unreal_to_degrees = tf.constant(
         1.0 / 65536.0 * 360.0)  # The numbers used to convert unreal rotation units to degrees
     true = tf.constant(1.0)
@@ -23,7 +23,7 @@ def distance(self, x1, y1, x2, y2):
     def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance_to_ball, is_on_ground):
         full_turn_angle = 70.0
         half_turn_angle = 30.0
-        powerslide_angle_constant = 710.0 # The angle (from the front of the bot to the ball) to start to powerslide.
+        powerslide_angle_constant = 80.0 # The angle (from the front of the bot to the ball) to start to powerslide.
 
         angle_front_to_target = self.feature_creator.generate_angle_to_target(bot_position.X, bot_position.Y,
                                                                               bot_rotation,
@@ -46,14 +46,15 @@ def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance
 
         jump = tf.cast(should_jump, tf.float32)
 
-        powerslide_angle = full_turn_angle * tf.cast(tf.less(1000.0, distance_to_ball), tf.float32)
-        powerslide_angle = powerslide_angle_constant + powerslide_angle
-
-        ps = tf.greater(tf.abs(angle_front_to_target), powerslide_angle)
+        ps = tf.logical_and(tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle),
+                            tf.less_equal(distance_to_ball, 2000.0))
+        # ps = tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle)
         power_slide = tf.cast(ps, tf.float32)
 
+        should_not_dodge = tf.cast(tf.greater_equal(distance_to_ball, 500), tf.float32)
+
         # if jump is 1 then we should not execute a turn
-        safe_steer = steer * (1.0 - jump)
+        safe_steer = steer * (1.0 - jump * should_not_dodge)
         return (safe_steer, power_slide, jump)
 
     def get_output_vector(self, values):
@@ -79,8 +80,8 @@ def get_output_vector(self, values):
         xy_distance = self.distance(bot_pos.X, bot_pos.Y, ball_pos.X, ball_pos.Y)
 
         # Boost when ball is far enough away
-        boost = tf.logical_and(tf.greater(xy_distance, self.distance_from_ball_to_boost),
-                               tf.greater(car_boost, 34))
+        boost = tf.logical_and(tf.greater_equal(xy_distance, self.distance_from_ball_to_boost / car_boost),
+                               tf.greater_equal(car_boost, 10))
         full_throttle = 0.5 * tf.cast(tf.greater(xy_distance, self.distance_from_ball_to_go_fast), tf.float32)
         throttle = full_throttle + tf.constant(0.5)
 

diff --git a/bot_manager.py b/bot_manager.py
@@ -132,6 +132,7 @@ def run(self):
                 print('\n\n\n\n Match has ended so ending bot loop\n\n\n\n\n')
                 break
 
+            controller_input = None
             # Run the Agent only if the gameInfo has updated.
             tick_game_time = game_tick_packet.gameInfo.TimeSeconds
             should_call_while_paused = datetime.now() - last_call_real_time >= MAX_AGENT_CALL_PERIOD

diff --git a/conversions/input/input_formatter.py b/conversions/input/input_formatter.py
@@ -118,11 +118,17 @@ def get_car_info(self, game_tick_packet, index):
         player_team = game_tick_packet.gamecars[index].Team
         player_boost = game_tick_packet.gamecars[index].Boost
         last_touched_ball = self.get_last_touched_ball(game_tick_packet.gamecars[index], game_tick_packet.gameball.LatestTouch)
-        car_array = [player_x, player_y, player_z, player_pitch, player_yaw, player_roll,
-                player_speed_x, player_speed_y, player_speed_z, player_angular_speed_x,
-                player_angular_speed_y, player_angular_speed_z,
-                player_on_ground, player_supersonic, player_demolished, player_jumped,
-                player_double_jumped, player_team, player_boost, last_touched_ball]
+        car_array = [player_x, player_y, player_z,
+                     player_pitch, player_yaw, player_roll,
+                     player_speed_x, player_speed_y, player_speed_z,
+                     player_angular_speed_x, player_angular_speed_y, player_angular_speed_z,
+                     player_on_ground,
+                     player_supersonic,
+                     player_demolished,
+                     player_jumped, player_double_jumped,
+                     player_team,
+                     player_boost,
+                     last_touched_ball]
         return car_array
 
     def get_last_touched_ball(self, car, latest_touch):
@@ -190,6 +196,15 @@ def get_score_info(self, Score, diff_in_score):
 
         return [score, goals, own_goals, assists, saves, shots, demolitions, diff_in_score]
 
+    def format_array(self, array):
+        """
+        Formats the array to properly fit the model
+        :param input_length: The batch size of the array
+        :param array: A numpy array that is being rescaled
+        :return: A new array that has been properly formatted
+        """
+        return np.array(array, dtype=np.float32)
+
     def flattenArrays(self, array_of_array):
         """
         Takes an array of arrays and flattens it into a single array

diff --git a/conversions/input/simple_input_formatter.py b/conversions/input/simple_input_formatter.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from conversions.input.input_formatter import InputFormatter
+
+
+class SimpleInputFormatter(InputFormatter):
+
+    def create_input_array(self, game_tick_packet, passed_time=0.0):
+        # posx, posy, posz, rotx, roty, rotz, vx, vy, vz, angvx, angy, angvz, boost_amt, ballx, bally, ballz, ballvx, ballvy, ballvz
+        inputs = [game_tick_packet.gamecars[self.index].Location.X,
+                  game_tick_packet.gamecars[self.index].Location.Y,
+                  game_tick_packet.gamecars[self.index].Location.Z,
+                  game_tick_packet.gamecars[self.index].Rotation.Pitch,
+                  game_tick_packet.gamecars[self.index].Rotation.Yaw,
+                  game_tick_packet.gamecars[self.index].Rotation.Roll,
+                  game_tick_packet.gamecars[self.index].Velocity.X,
+                  game_tick_packet.gamecars[self.index].Velocity.Y,
+                  game_tick_packet.gamecars[self.index].Velocity.Z,
+                  game_tick_packet.gamecars[self.index].AngularVelocity.X,
+                  game_tick_packet.gamecars[self.index].AngularVelocity.Y,
+                  game_tick_packet.gamecars[self.index].AngularVelocity.Z,
+                  game_tick_packet.gamecars[self.index].Boost,
+                  game_tick_packet.gameball.Location.X,
+                  game_tick_packet.gameball.Location.Y,
+                  game_tick_packet.gameball.Location.Z,
+                  game_tick_packet.gameball.Velocity.X,
+                  game_tick_packet.gameball.Velocity.Y,
+                  game_tick_packet.gameball.Velocity.Z
+                  ]
+        return inputs
+
+    def get_state_dim(self):
+        return 19
+
+    def format_array(self, input_length, array):
+        """
+        Formats the array to properly fit the model
+        :param input_length: The batch size of the array
+        :param array: A numpy array that is being rescaled
+        :return: A new array that has been properly formatted
+        """
+        return array.reshape(input_length, get_state_dim())
diff --git a/conversions/output_formatter.py b/conversions/output_formatter.py
@@ -85,14 +85,14 @@ def get_car_info(array, index):
     car_info.Rotation = create_3D_rotation(array, index + 3)
     car_info.Velocity = create_3D_point(array, index + 6)
     car_info.AngularVelocity = create_3D_point(array, index + 9)
-    car_info.bOnGround = array[12]
-    car_info.bSuperSonic = array[13]
-    car_info.bDemolished = array[14]
-    car_info.bJumped = array[15]
-    car_info.bDoubleJumped = array[16]
-    car_info.Team = array[17]
-    car_info.Boost = array[18]
-    car_info.bLastTouchedBall = array[19]
+    car_info.bOnGround = array[index + 12]
+    car_info.bSuperSonic = array[index + 13]
+    car_info.bDemolished = array[index + 14]
+    car_info.bJumped = array[index + 15]
+    car_info.bDoubleJumped = array[index + 16]
+    car_info.Team = array[index + 17]
+    car_info.Boost = array[index + 18]
+    car_info.bLastTouchedBall = array[index + 19]
     return car_info
 
 

diff --git a/modelHelpers/actions/action_factory.py b/modelHelpers/actions/action_factory.py
@@ -30,6 +30,14 @@
                        [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
                        []]
 
+regression_everything = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
+                        ('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
+                        ('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('jump', (0, 2, 1), LOSS_SQUARE_MEAN),
+                          ('boost', (0, 2, 1), LOSS_SQUARE_MEAN),
+                          ('handbrake', (0, 2, 1), LOSS_SQUARE_MEAN)],
+                       [],
+                       []]
+
 def get_handler(split_mode=True, control_scheme=default_scheme):
     """
     Creates a handler based on the options given.

diff --git a/modelHelpers/actions/action_handler.py b/modelHelpers/actions/action_handler.py
@@ -165,22 +165,26 @@ def get_random_action(self):
         pass
 
     def get_random_option(self):
-        return [random.randrange(self.get_logit_size())]
+        return [random.randrange(self.get_action_sizes())]
 
-    def run_func_on_split_tensors(self, input_tensors, split_func):
+    def run_func_on_split_tensors(self, input_tensors, split_func, return_as_list=False):
         """
         Optionally splits the tensor and runs a function on the split tensor
         If the tensor should not be split it runs the function on the entire tensor
         :param tf: tensorflow
         :param input_tensors: needs to have shape of (?, num_actions)
         :param split_func: a function that is called with a tensor or array the same rank as input_tensor.
             It should return a tensor with the same rank as input_tensor
-        :return: a stacked tensor (see tf.stack) or the same tensor depending on if it is in split mode or not.
+        :param return_as_list If true then the result will be a list of tensors instead of a single stacked tensor
+        :return: a single tensor or a tensor wrapped in a list
         """
 
         if not isinstance(input_tensors, collections.Sequence):
             input_tensors = [input_tensors]
-        return split_func(*input_tensors)
+        if return_as_list:
+            return [split_func(*input_tensors)]
+        return [split_func(*input_tensors)]
+
 
     def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_split=False):
         """
@@ -256,3 +260,6 @@ def scale_layer(self, layer, index):
 
     def get_loss_type(self, index):
         return 'softmax'
+
+    def is_classification(self, index):
+        return True
diff --git a/modelHelpers/actions/dynamic_action_handler.py b/modelHelpers/actions/dynamic_action_handler.py
@@ -62,29 +62,7 @@ def create_range_action(self, item):
         action_data = np.arange(*item[1])
         return action_data
 
-    def create_actions(self):
-        self.reset()
-
-        for i, item in enumerate(self.control_names):
-            self.control_names_index_map[item] = i
-
-        ranges = self.control_scheme[0]
-        combo_scheme = self.control_scheme[1]
-        copies = self.control_scheme[2]
-
-        for item in ranges:
-            action = self.create_range_action(item)
-            self.action_sizes.append(len(action))
-            self.action_name_index_map[item[0]] = len(self.action_list_names)
-            if len(item) > 2:
-                self.action_loss_type_map[len(self.action_list_names)] = item[2]
-            else:
-                self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
-            self.action_list_names.append(item[0])
-            self.actions.append(action)
-
-        self.ranged_actions = list(self.actions)
-
+    def create_combo_actions(self, combo_scheme):
         for item in combo_scheme:
             action = self.create_range_action(item)
             self.combo_name_list.append(item[0])
@@ -101,6 +79,38 @@ def create_actions(self):
         self.action_list_names.append(COMBO)
         self.actions.append(self.button_combo)
 
+    def create_ranged_actions(self, ranges):
+        for item in ranges:
+            action = self.create_range_action(item)
+            self.action_sizes.append(len(action))
+            self.action_name_index_map[item[0]] = len(self.action_list_names)
+            if len(item) > 2:
+                self.action_loss_type_map[len(self.action_list_names)] = item[2]
+            else:
+                self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
+            self.action_list_names.append(item[0])
+            self.actions.append(action)
+
+        self.ranged_actions = list(self.actions)
+
+    def create_actions(self):
+        self.reset()
+
+        for i, item in enumerate(self.control_names):
+            self.control_names_index_map[item] = i
+
+        ranges = self.control_scheme[0]
+        combo_scheme = self.control_scheme[1]
+        copies = self.control_scheme[2]
+
+        if len(ranges) > 0:
+            self.create_ranged_actions(ranges)
+
+        if len(combo_scheme) > 0:
+            self.create_combo_actions(combo_scheme)
+        else:
+            self.action_name_index_map[COMBO] = -1
+
         for item in copies:
             self.action_name_index_map[item[0]] = self.action_name_index_map[item[1]]
         return self.actions
@@ -110,7 +120,7 @@ def create_action_map(self):
 
     def create_controller_from_selection(self, action_selection):
         if len(action_selection) != len(self.actions):
-            raise Exception('Invalid action selection size')
+            raise Exception('Invalid action selection size' + str(len(action_selection)) + ':' + str(len(self.actions)))
 
         combo_index = self.action_name_index_map[COMBO]
         controller_output = []
@@ -171,6 +181,7 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si
                 output = tf.gather_nd(ranged_action, tf.stack([indexer, tf.cast(selection, tf.int32)], axis=1))
                 controller_output.append(output)
             else:
+                # selection = tf.Print(selection, [selection], control)
                 controller_output.append(selection)
 
         # make sure everything is the same type
@@ -259,8 +270,9 @@ def create_action_indexes_graph(self, real_action, batch_size=None):
                 elif indexes[action_index] is None:
                     indexes[action_index] = tf.squeeze(real_control, axis=1)
 
-        combo_action = self._create_combo_index_graph(combo_list, real_action)
-        indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)
+        if len(self.combo_list) > 0:
+            combo_action = self._create_combo_index_graph(combo_list, real_action)
+            indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)
 
         result = tf.stack(indexes, axis=1)
         return result
@@ -283,7 +295,7 @@ def get_action_loss_from_logits(self, logits, labels, index):
     def get_last_layer_activation_function(self, func, index):
         if self.is_classification(index):
             return func
-        return None
+        return tf.nn.tanh
 
     def scale_layer(self, layer, index):
         """

diff --git a/modelHelpers/data_normalizer.py b/modelHelpers/data_normalizer.py
@@ -170,7 +170,9 @@ def apply_normalization(self, input_array):
         # error_prevention = tf.cast(tf.equal(diff, 0.0), tf.float32)
         # diff = diff + error_prevention
 
-        result = (input_array - min) / diff
+
+        #result = (input_array - min) / diff
+        result = input_array / diff
         #result = tf.Print(result, [min], 'min', summarize=16)
         #result = tf.Print(result, [max], 'max', summarize=16)
         #result = tf.Print(result, [input_array[0]], 'inp', summarize=30)