Add a constant -1 penalty to the reward

sicara · Feb 27, 2018 · 6147371 · 6147371
1 parent ae732ca
commit 6147371
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 4 deletions.
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 setup(name='super-mario',
-    version='1.0.0',
+    version='2.0.0',
     description='Gym User Env - 32 levels of Super Mario Bros',
     url='https://github.com/sicara/gym_super_mario',
     author='Philip Paquette',

diff --git a/super_mario/__init__.py b/super_mario/__init__.py
@@ -19,7 +19,7 @@
         tile_suffix = '-Tiles' if draw_tiles == 1 else ''
 
         register(
-            id='meta-SuperMarioBros{}-v1'.format(tile_suffix),
+            id='meta-SuperMarioBros{}-v2'.format(tile_suffix),
             entry_point='super_mario:MetaSuperMarioBrosEnv',
             max_episode_steps=9999999,
             reward_threshold=32000,
@@ -30,7 +30,7 @@
         for (world_number, level_number, area_number, max_distance) in SMB_LEVELS:
             level = (world_number - 1) * 4 + (level_number - 1)
             register(
-                id='SuperMarioBros-{}-{}{}-v1'.format(world_number, level_number, tile_suffix),
+                id='SuperMarioBros-{}-{}{}-v2'.format(world_number, level_number, tile_suffix),
                 entry_point='super_mario:SuperMarioBrosEnv',
                 max_episode_steps=10000,
                 reward_threshold=(max_distance - 40),

diff --git a/super_mario/nes_env.py b/super_mario/nes_env.py
@@ -15,6 +15,7 @@
 from gym import utils, spaces
 from gym.utils import seeding
 
+PENALTY_NOT_MOVING = 1     # Penalty when not moving
 DEFAULT_REWARD_DEATH = -2  # Negative reward when Mario dies
 DISTANCE_START = 40        # Distance at which Mario starts in the level
 STUCK_DURATION = 100       # Duration limit for Mario to get stuck in seconds
@@ -301,7 +302,7 @@ def _get_reward(self):
             self.info['score'] -
             self.old_info.get('score', 0)
         )
-        self.reward = distance_since_last_frame + score_since_last_frame
+        self.reward = distance_since_last_frame + score_since_last_frame - PENALTY_NOT_MOVING
 
         if self._get_is_finished and (self._is_dead() or self._is_stuck()):
             self.reward = self.reward_death