Skip to content

Commit

Permalink
Add a constant -1 penalty to the reward
Browse files Browse the repository at this point in the history
  • Loading branch information
Flavian Hautbois committed Feb 27, 2018
1 parent ae732ca commit 6147371
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='super-mario',
version='1.0.0',
version='2.0.0',
description='Gym User Env - 32 levels of Super Mario Bros',
url='https://github.com/sicara/gym_super_mario',
author='Philip Paquette',
Expand Down
4 changes: 2 additions & 2 deletions super_mario/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
tile_suffix = '-Tiles' if draw_tiles == 1 else ''

register(
id='meta-SuperMarioBros{}-v1'.format(tile_suffix),
id='meta-SuperMarioBros{}-v2'.format(tile_suffix),
entry_point='super_mario:MetaSuperMarioBrosEnv',
max_episode_steps=9999999,
reward_threshold=32000,
Expand All @@ -30,7 +30,7 @@
for (world_number, level_number, area_number, max_distance) in SMB_LEVELS:
level = (world_number - 1) * 4 + (level_number - 1)
register(
id='SuperMarioBros-{}-{}{}-v1'.format(world_number, level_number, tile_suffix),
id='SuperMarioBros-{}-{}{}-v2'.format(world_number, level_number, tile_suffix),
entry_point='super_mario:SuperMarioBrosEnv',
max_episode_steps=10000,
reward_threshold=(max_distance - 40),
Expand Down
3 changes: 2 additions & 1 deletion super_mario/nes_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gym import utils, spaces
from gym.utils import seeding

PENALTY_NOT_MOVING = 1 # Penalty when not moving
DEFAULT_REWARD_DEATH = -2 # Negative reward when Mario dies
DISTANCE_START = 40 # Distance at which Mario starts in the level
STUCK_DURATION = 100 # Duration limit for Mario to get stuck in seconds
Expand Down Expand Up @@ -301,7 +302,7 @@ def _get_reward(self):
self.info['score'] -
self.old_info.get('score', 0)
)
self.reward = distance_since_last_frame + score_since_last_frame
self.reward = distance_since_last_frame + score_since_last_frame - PENALTY_NOT_MOVING

if self._get_is_finished and (self._is_dead() or self._is_stuck()):
self.reward = self.reward_death
Expand Down

0 comments on commit 6147371

Please sign in to comment.