Skip to content

Commit

Permalink
Merge pull request #54 from RLBots/keras
Browse files Browse the repository at this point in the history
Fixed some more bugs
  • Loading branch information
dtracers authored Jan 23, 2018
2 parents 7c53f21 + f0e36b7 commit afb2697
Show file tree
Hide file tree
Showing 22 changed files with 293 additions and 212 deletions.
19 changes: 10 additions & 9 deletions TutorialBot/tutorial_bot_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class TutorialBotOutput:
# Constants
distance_from_ball_to_go_fast = tf.constant(600.0)
distance_from_ball_to_boost = tf.constant(1500.0) # Minimum distance to ball for using boost
distance_from_ball_to_boost = tf.constant(2000.0) # Minimum distance to ball for using boost
unreal_to_degrees = tf.constant(
1.0 / 65536.0 * 360.0) # The numbers used to convert unreal rotation units to degrees
true = tf.constant(1.0)
Expand All @@ -23,7 +23,7 @@ def distance(self, x1, y1, x2, y2):
def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance_to_ball, is_on_ground):
full_turn_angle = 70.0
half_turn_angle = 30.0
powerslide_angle_constant = 710.0 # The angle (from the front of the bot to the ball) to start to powerslide.
powerslide_angle_constant = 80.0 # The angle (from the front of the bot to the ball) to start to powerslide.

angle_front_to_target = self.feature_creator.generate_angle_to_target(bot_position.X, bot_position.Y,
bot_rotation,
Expand All @@ -46,14 +46,15 @@ def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance

jump = tf.cast(should_jump, tf.float32)

powerslide_angle = full_turn_angle * tf.cast(tf.less(1000.0, distance_to_ball), tf.float32)
powerslide_angle = powerslide_angle_constant + powerslide_angle

ps = tf.greater(tf.abs(angle_front_to_target), powerslide_angle)
ps = tf.logical_and(tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle),
tf.less_equal(distance_to_ball, 2000.0))
# ps = tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle)
power_slide = tf.cast(ps, tf.float32)

should_not_dodge = tf.cast(tf.greater_equal(distance_to_ball, 500), tf.float32)

# if jump is 1 then we should not execute a turn
safe_steer = steer * (1.0 - jump)
safe_steer = steer * (1.0 - jump * should_not_dodge)
return (safe_steer, power_slide, jump)

def get_output_vector(self, values):
Expand All @@ -79,8 +80,8 @@ def get_output_vector(self, values):
xy_distance = self.distance(bot_pos.X, bot_pos.Y, ball_pos.X, ball_pos.Y)

# Boost when ball is far enough away
boost = tf.logical_and(tf.greater(xy_distance, self.distance_from_ball_to_boost),
tf.greater(car_boost, 34))
boost = tf.logical_and(tf.greater_equal(xy_distance, self.distance_from_ball_to_boost / car_boost),
tf.greater_equal(car_boost, 10))
full_throttle = 0.5 * tf.cast(tf.greater(xy_distance, self.distance_from_ball_to_go_fast), tf.float32)
throttle = full_throttle + tf.constant(0.5)

Expand Down
1 change: 1 addition & 0 deletions bot_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def run(self):
print('\n\n\n\n Match has ended so ending bot loop\n\n\n\n\n')
break

controller_input = None
# Run the Agent only if the gameInfo has updated.
tick_game_time = game_tick_packet.gameInfo.TimeSeconds
should_call_while_paused = datetime.now() - last_call_real_time >= MAX_AGENT_CALL_PERIOD
Expand Down
16 changes: 11 additions & 5 deletions conversions/input/input_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,17 @@ def get_car_info(self, game_tick_packet, index):
player_team = game_tick_packet.gamecars[index].Team
player_boost = game_tick_packet.gamecars[index].Boost
last_touched_ball = self.get_last_touched_ball(game_tick_packet.gamecars[index], game_tick_packet.gameball.LatestTouch)
car_array = [player_x, player_y, player_z, player_pitch, player_yaw, player_roll,
player_speed_x, player_speed_y, player_speed_z, player_angular_speed_x,
player_angular_speed_y, player_angular_speed_z,
player_on_ground, player_supersonic, player_demolished, player_jumped,
player_double_jumped, player_team, player_boost, last_touched_ball]
car_array = [player_x, player_y, player_z,
player_pitch, player_yaw, player_roll,
player_speed_x, player_speed_y, player_speed_z,
player_angular_speed_x, player_angular_speed_y, player_angular_speed_z,
player_on_ground,
player_supersonic,
player_demolished,
player_jumped, player_double_jumped,
player_team,
player_boost,
last_touched_ball]
return car_array

def get_last_touched_ball(self, car, latest_touch):
Expand Down
16 changes: 8 additions & 8 deletions conversions/output_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ def get_car_info(array, index):
car_info.Rotation = create_3D_rotation(array, index + 3)
car_info.Velocity = create_3D_point(array, index + 6)
car_info.AngularVelocity = create_3D_point(array, index + 9)
car_info.bOnGround = array[12]
car_info.bSuperSonic = array[13]
car_info.bDemolished = array[14]
car_info.bJumped = array[15]
car_info.bDoubleJumped = array[16]
car_info.Team = array[17]
car_info.Boost = array[18]
car_info.bLastTouchedBall = array[19]
car_info.bOnGround = array[index + 12]
car_info.bSuperSonic = array[index + 13]
car_info.bDemolished = array[index + 14]
car_info.bJumped = array[index + 15]
car_info.bDoubleJumped = array[index + 16]
car_info.Team = array[index + 17]
car_info.Boost = array[index + 18]
car_info.bLastTouchedBall = array[index + 19]
return car_info


Expand Down
7 changes: 4 additions & 3 deletions modelHelpers/actions/dynamic_action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,9 @@ def create_action_indexes_graph(self, real_action, batch_size=None):
elif indexes[action_index] is None:
indexes[action_index] = tf.squeeze(real_control, axis=1)

combo_action = self._create_combo_index_graph(combo_list, real_action)
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)
if len(self.combo_list) > 0:
combo_action = self._create_combo_index_graph(combo_list, real_action)
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)

result = tf.stack(indexes, axis=1)
return result
Expand All @@ -294,7 +295,7 @@ def get_action_loss_from_logits(self, logits, labels, index):
def get_last_layer_activation_function(self, func, index):
if self.is_classification(index):
return func
return None
return tf.nn.tanh

def scale_layer(self, layer, index):
"""
Expand Down
4 changes: 3 additions & 1 deletion modelHelpers/data_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ def apply_normalization(self, input_array):
# error_prevention = tf.cast(tf.equal(diff, 0.0), tf.float32)
# diff = diff + error_prevention

result = (input_array - min) / diff

#result = (input_array - min) / diff
result = input_array / diff
#result = tf.Print(result, [min], 'min', summarize=16)
#result = tf.Print(result, [max], 'max', summarize=16)
#result = tf.Print(result, [input_array[0]], 'inp', summarize=30)
Expand Down
26 changes: 10 additions & 16 deletions models/actor_critic/base_actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ class BaseActorCritic(base_reinforcement.BaseReinforcement):
forced_frame_action = 500
is_graphing = False
keep_prob = 0.5
reg_param = 0.001

first_layer_name = 'first_layer'
hidden_layer_name = 'hidden_layer'
Expand Down Expand Up @@ -57,7 +56,9 @@ def printParameters(self):
print('network size', self.network_size)
print('number of layers', self.num_layers)
print('keep probability', self.keep_prob)
print('regulation parameter', self.reg_param)

def get_activation(self):
return tf.nn.elu # tf.nn.relu6

def load_config_file(self):
super().load_config_file()
Expand Down Expand Up @@ -212,7 +213,7 @@ def sample_action(self, input_state):
else:
action_scores = self.sess.run([self.smart_max],
{self.input_placeholder: input_state})
print(action_scores)
# print(action_scores)

action_scores = np.array(action_scores).flatten()
return action_scores
Expand Down Expand Up @@ -243,15 +244,16 @@ def actor_network(self, input_states, variable_list=None, last_layer_list=None,
last_layer_list = [[] for _ in range(len(self.action_handler.get_action_sizes()))]
# define policy neural network
actor_prefix = 'actor'
activation = self.get_activation()
# input_states = tf.Print(input_states, [input_states], summarize=self.network_size, message='')
with tf.variable_scope(self.first_layer_name):
layer1, _ = self.create_layer(tf.nn.relu6, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix,
layer1, _ = self.create_layer(activation, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix,
variable_list=variable_list, dropout=False)
layers_list.append([layer1])

# layer1 = tf.Print(layer1, [layer1], summarize=self.network_size, message='')

inner_layer, output_size = self.create_hidden_layers(tf.nn.relu6, layer1, self.network_size, actor_prefix,
inner_layer, output_size = self.create_hidden_layers(activation, layer1, self.network_size, actor_prefix,
variable_list=variable_list, layers_list=layers_list)

output_layer = self.create_last_layer(tf.nn.sigmoid, inner_layer, output_size,
Expand Down Expand Up @@ -288,19 +290,11 @@ def parse_actions(self, taken_actions):

def log_output_data(self):
"""Logs the output of the last layer of the model"""
for i in range(self.action_handler.get_number_actions()):
variable_name = str(self.action_handler.action_list_names[i])
with tf.variable_scope(variable_name):
with tf.name_scope('model_output'):
for i in range(self.action_handler.get_number_actions()):
variable_name = str(self.action_handler.action_list_names[i])
tf.summary.histogram(variable_name + '_output', self.actor_last_row_layer[i])

def get_regularization_loss(self, variables, prefix=None):
normalized_variables = [tf.reduce_sum(tf.nn.l2_loss(x) * self.reg_param)
for x in variables]

reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss'))
tf.summary.scalar(prefix + '_reg_loss', reg_loss)
return reg_loss

def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None,
layers_list=[]):
with tf.variable_scope(self.hidden_layer_name):
Expand Down
20 changes: 9 additions & 11 deletions models/actor_critic/policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from models import base_model
from models.actor_critic.base_actor_critic import BaseActorCritic
from modelHelpers import tensorflow_reward_manager
from models.actor_critic.split_layers import SplitLayers


class PolicyGradient(BaseActorCritic):
class PolicyGradient(SplitLayers):
max_gradient = 1.0
total_loss_divider = 1.0

Expand Down Expand Up @@ -90,7 +91,7 @@ def create_actor_gradients(self, logprobs, taken_actions):

total_loss = total_loss / self.total_loss_divider

# total_loss += actor_reg_loss
total_loss += actor_reg_loss

# total_loss = tf.Print(total_loss, [total_loss], 'total_loss')

Expand All @@ -114,6 +115,8 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac

# calculates the entropy loss from getting the label wrong
cross_entropy_loss, wrongness, reduced = self.calculate_loss_of_actor(logprobs, taken_actions, index)
if reduced:
cross_entropy_loss = tf.reduce_mean(cross_entropy_loss)
if not reduced:
if self.action_handler.is_classification(index):
tf.summary.histogram('actor_wrongness', wrongness)
Expand All @@ -132,7 +135,7 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac

actor_reg_loss = self.get_regularization_loss(actor_network_variables, prefix="actor")

actor_loss = actor_loss + actor_reg_loss * self.reg_param
actor_loss = actor_loss + actor_reg_loss

# compute actor gradients
actor_gradients = self.optimizer.compute_gradients(actor_loss,
Expand All @@ -150,15 +153,11 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac
return [actor_gradients, actor_loss]

def create_critic_gadients(self):
critic_reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in self.critic_network_variables],
name='critic_reg_loss')

tf.summary.scalar("critic_reg_loss", critic_reg_loss)

critic_reg_loss = self.get_regularization_loss(self.critic_network_variables, prefix='critic')
# compute critic gradients
mean_square_loss = tf.reduce_mean(tf.square(self.discounted_rewards - self.estimated_values), name='mean_square_loss')

critic_loss = mean_square_loss + self.reg_param * critic_reg_loss
critic_loss = mean_square_loss + critic_reg_loss
tf.summary.scalar("critic_loss", critic_loss)
critic_gradients = self.optimizer.compute_gradients(critic_loss, self.critic_network_variables)
return (critic_gradients, critic_loss, critic_reg_loss)
Expand Down Expand Up @@ -206,6 +205,5 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index):
:param cross_entropy_loss:
:return: The calculated_tensor, If the result is a scalar.
"""
return tf.reduce_mean(
self.action_handler.get_action_loss_from_logits(logprobs, taken_actions, index)), 1.0, True
return self.action_handler.get_action_loss_from_logits(logprobs, taken_actions, index), 1.0, True

Loading

0 comments on commit afb2697

Please sign in to comment.