StanfordVL · yyf20001230 · Jul 5, 2023 · Jul 19, 2023 · Jul 29, 2024 · Jul 30, 2024
diff --git a/omnigibson/envs/env_base.py b/omnigibson/envs/env_base.py
@@ -25,6 +25,12 @@
     merge_nested_dicts,
 )
 from omnigibson.utils.ui_utils import create_module_logger
+from omnigibson.utils.python_utils import assert_valid_key, merge_nested_dicts, create_class_from_registry_and_config,\
+    Recreatable
+
+
+import time
+
 
 # Create module logger
 log = create_module_logger(module_name=__name__)
@@ -50,6 +56,17 @@ def __init__(self, configs, in_vec_env=False):
         self.render_mode = "rgb_array"
         self.metadata = {"render.modes": ["rgb_array"]}
 
+        # Initialize other placeholders that will be filled in later
+        self._initial_pos_z_offset = None                   # how high to offset object placement to account for one action step of dropping
+        self._task = None
+        self._loaded = None
+        self._current_episode = 0
+
+        self._prev_sim_end_ts = 0
+        self._cur_sim_start_ts = 0
+
+        # Variables reset at the beginning of each episode
+        self._current_step = 0
         # Store if we are part of a vec env
         self.in_vec_env = in_vec_env
 
@@ -531,8 +548,13 @@ def _populate_info(self, info):
         if self._scene_graph_builder is not None:
             info["scene_graph"] = self.get_scene_graph()
 
-    def _pre_step(self, action):
+    def _pre_step(self, action, time_step=False):
         """Apply the pre-sim-step part of an environment step, i.e. apply the robot actions."""
+
+        # record the start time
+        if time_step:
+            self._cur_sim_start_ts = time.clock()
+
         # If the action is not a dictionary, convert into a dictionary
         if not isinstance(action, dict) and not isinstance(action, gym.spaces.Dict):
             action_dict = dict()
@@ -549,7 +571,7 @@ def _pre_step(self, action):
         for robot in self.robots:
             robot.apply_action(action_dict[robot.name])
 
-    def _post_step(self, action):
+    def _post_step(self, action, time_step=False):
         """Apply the post-sim-step part of an environment step, i.e. grab observations and return the step results."""
         # Grab observations
         obs, obs_info = self.get_obs()
@@ -560,6 +582,7 @@ def _post_step(self, action):
 
         # Grab reward, done, and info, and populate with internal info
         reward, done, info = self.task.step(self, action)
+
         self._populate_info(info)
         info["obs_info"] = obs_info
 
@@ -581,9 +604,14 @@ def _post_step(self, action):
 
         # Increment step
         self._current_step += 1
+
+        # record end time
+        if time_step:
+            self._prev_sim_end_ts = time.clock()
+
         return obs, reward, terminated, truncated, info
 
-    def step(self, action):
+    def step(self, action, time_step=False):
         """
         Apply robot's action and return the next state, reward, done and info,
         following OpenAI Gym's convention
@@ -601,9 +629,9 @@ def step(self, action):
                 - bool: truncated, i.e. whether this episode ended due to a time limit etc.
                 - dict: info, i.e. dictionary with any useful information
         """
-        self._pre_step(action)
+        self._pre_step(action, time_step=time_step)
         og.sim.step()
-        return self._post_step(action)
+        return self._post_step(action, time_step=time_step)
 
     def render(self):
         """Render the environment for debug viewing."""
@@ -633,6 +661,9 @@ def _reset_variables(self):
         """
         self._current_episode += 1
         self._current_step = 0
+        self._prev_sim_end_ts = 0
+        self._cur_sim_start_ts = 0
+
 
     def reset(self, get_obs=True, **kwargs):
         """
@@ -690,6 +721,16 @@ def reset(self, get_obs=True, **kwargs):
 
             return obs, {}
 
+    @property
+    def last_step_wall_time(self):
+        """
+        Returns:
+            int: return the amount of wall time the last simulation step took
+        """
+        if self._prev_sim_end_ts == 0 or self._cur_sim_start_ts == 0:
+            return 0 
+        return self._cur_sim_start_ts - self._prev_sim_end_ts
+
     @property
     def episode_steps(self):
         """

diff --git a/omnigibson/reward_functions/__init__.py b/omnigibson/reward_functions/__init__.py
@@ -3,4 +3,8 @@
 from omnigibson.reward_functions.point_goal_reward import PointGoalReward
 from omnigibson.reward_functions.potential_reward import PotentialReward
 from omnigibson.reward_functions.reaching_goal_reward import ReachingGoalReward
+from omnigibson.reward_functions.step_metric import StepMetric
+from omnigibson.reward_functions.wall_time_metric import WallTimeMetric
+from omnigibson.reward_functions.energy_metric import EnergyMetric
+from omnigibson.reward_functions.task_success_metric import TaskSuccessMetric
 from omnigibson.reward_functions.reward_function_base import REGISTERED_REWARD_FUNCTIONS, BaseRewardFunction
diff --git a/omnigibson/reward_functions/energy_metric.py b/omnigibson/reward_functions/energy_metric.py
@@ -0,0 +1,58 @@
+from omnigibson.reward_functions.reward_function_base import BaseRewardFunction
+import numpy as np
+
+
+class EnergyMetric(BaseRewardFunction):
+    """
+    Energy Metric
+
+    Measures displacement * mass for every link
+
+    Args:
+        measure_work: If true, measure beginning and end delta rather than step by step delta
+    """
+
+    def __init__(self, measure_work=False):
+        # Run super
+        super().__init__()
+        self._reward = 0
+        self.initialized = False
+        self.state_cache = {}
+        self.link_masses = {}
+        self.measure_work = measure_work 
+
+    def calculate_displacement(self, posrot, posrot2):
+        return np.linalg.norm(posrot[0] - posrot2[0])
+
+    def _step(self, task, env, action):
+        new_state_cache = {}
+        for obj in env.scene.objects:
+            for link_name, link in obj._links.items():
+                pos, rot = link.get_position_orientation()
+                new_state_cache[link_name] = (pos, rot)
+
+        if not self.initialized:
+            self.initialized = True
+            self.state_cache = new_state_cache
+
+            for obj in env.scene.objects:
+                for link_name, link in obj._links.items():
+                    self.link_masses[link_name] = link.mass
+            return 0.0, {}
+
+        work_metric = 0.0
+        for linkname, posrot in new_state_cache.items():
+            work_metric += self.calculate_displacement(posrot, self.state_cache[linkname]) * self.link_masses[linkname]
+
+        if self.measure_work:
+            self._reward = 0
+        if not self.measure_work:
+            self.state_cache = new_state_cache
+
+        self._reward += work_metric
+        return self._reward, {}
+
+    def reset(self, task, env):
+        super().reset(task, env)
+        self.state_cache = {}
+        self.initialized = False
diff --git a/omnigibson/reward_functions/step_metric.py b/omnigibson/reward_functions/step_metric.py
@@ -0,0 +1,17 @@
+from omnigibson.reward_functions.reward_function_base import BaseRewardFunction
+
+
+class StepMetric(BaseRewardFunction):
+    """
+    Step Metric
+    Metric for each simulator step
+    """
+
+    def __init__(self):
+        # Run super
+        super().__init__()
+        self._reward = 0
+
+    def _step(self, task, env, action):
+        self._reward += 1
+        return self._reward, {}
diff --git a/omnigibson/reward_functions/task_success_metric.py b/omnigibson/reward_functions/task_success_metric.py
@@ -0,0 +1,30 @@
+from omnigibson.reward_functions.reward_function_base import BaseRewardFunction
+
+class TaskSuccessMetric(BaseRewardFunction):
+    """
+    TaskSuccessMetric
+    Metric for partial or full task success 
+    """
+
+    def __init__(self):
+        # Run super
+        super().__init__()
+        self._reward = 0
+
+    def _step(self, task, env, action):
+        successes = []
+        partial_successes = []
+        for termination_condition in task._termination_conditions.values():
+            if termination_condition.partial_success >= 0.0:
+                partial_successes.append(termination_condition.partial_success)
+            done, success = termination_condition.step(task, env, action)
+            # success <=> done and non failure
+            successes.append(success)
+        if sum(successes) > 0:
+            self._reward = 1.0
+        elif partial_successes:
+            self._reward = sum(partial_successes) / len(partial_successes)
+        else:
+            self._reward = 0.0
+        # Populate info
+        return self._reward, {}
diff --git a/omnigibson/reward_functions/wall_time_metric.py b/omnigibson/reward_functions/wall_time_metric.py
@@ -0,0 +1,17 @@
+from omnigibson.reward_functions.reward_function_base import BaseRewardFunction
+
+
+class WallTimeMetric(BaseRewardFunction):
+    """
+    WallTimeMetric
+    Metric for wall time accumulated in policy steps
+    """
+
+    def __init__(self):
+        # Run super
+        super().__init__()
+        self._reward = 0
+
+    def _step(self, task, env, action):
+        self._reward += env.last_step_wall_time
+        return self._reward, {}
diff --git a/omnigibson/tasks/task_base.py b/omnigibson/tasks/task_base.py
@@ -6,6 +6,8 @@
 from omnigibson.utils.gym_utils import GymObservable
 from omnigibson.utils.python_utils import Registerable, classproperty
 
+from omnigibson.reward_functions import StepMetric, TaskSuccessMetric, WallTimeMetric, EnergyMetric
+
 REGISTERED_TASKS = dict()
 
 
@@ -48,6 +50,7 @@ def __init__(self, termination_config=None, reward_config=None):
         # Generate reward and termination functions
         self._termination_conditions = self._create_termination_conditions()
         self._reward_functions = self._create_reward_functions()
+        self._metric_functions = self._create_metric_functions()
 
         # Store other internal vars that will be populated at runtime
         self._loaded = False
@@ -144,6 +147,23 @@ def _create_reward_functions(self):
         """
         raise NotImplementedError()
 
+    def _create_metric_functions(self):
+        """
+        Creates the metric functions in the environment
+
+        Returns:
+            dict of BaseRewardFunction: Metric functions created for this task
+        """
+        metrics = dict()
+
+        metrics['steps'] = StepMetric()
+        metrics['task_success'] = TaskSuccessMetric()
+        metrics['wall_time'] = WallTimeMetric()
+        metrics['energy'] = EnergyMetric()
+        metrics['work'] = EnergyMetric(measure_work=True)
+
+        return metrics
+
     def _reset_scene(self, env):
         """
         Task-specific scene reset. Default is the normal scene reset
@@ -193,6 +213,10 @@ def reset(self, env):
             termination_condition.reset(self, env)
         for reward_function in self._reward_functions.values():
             reward_function.reset(self, env)
+        for metric_function in self._metric_functions.values():
+            metric_function.reset(self, env)
+            metric_function._reward = 0.0
+
 
     def _step_termination(self, env, action, info=None):
         """
@@ -261,6 +285,28 @@ def _step_reward(self, env, action, info=None):
 
         return total_reward, total_info
 
+    def _step_metrics(self, env, action):
+        """
+        Step and aggregate metric functions
+
+        Args:
+            env (Environment): Environment instance
+            action (n-array): 1D flattened array of actions executed by all agents in the environment
+
+        Returns:
+            - the break down of the metric scores and the metric info
+        """
+
+        # We'll also store individual reward split
+        breakdown_dict = dict()
+
+        for metric_name, metric_function in self._metric_functions.items():
+            metric, _ = metric_function.step(self, env, action)
+            breakdown_dict[metric_name] = metric
+
+        return breakdown_dict
+
+
     @abstractmethod
     def _get_obs(self, env):
         """
@@ -320,16 +366,18 @@ def step(self, env, action):
         # Make sure we're initialized
         assert self._loaded, "Task must be loaded using load() before calling step()!"
 
-        # We calculate termination conditions first and then rewards
         # (since some rewards can rely on termination conditions to update)
         done, done_info = self._step_termination(env=env, action=action)
         reward, reward_info = self._step_reward(env=env, action=action)
+        metric_score, metrics_info = self._step_metrics(env=env, action=action)
 
         # Update the internal state of this task
         self._reward = reward
+        self._metrics = metric_score
         self._done = done
         self._success = done_info["success"]
         self._info = {
+            "metrics": metrics_info,
             "reward": reward_info,
             "done": done_info,
         }

diff --git a/omnigibson/termination_conditions/predicate_goal.py b/omnigibson/termination_conditions/predicate_goal.py
@@ -44,3 +44,13 @@ def goal_status(self):
                 of the predicates matching either of those conditions
         """
         return self._goal_status
+
+
+    @property
+    def partial_success(self):
+        """
+        Returns:
+            float: partial success if supported, -1.0 otherwise 
+        """
+        assert self._done is not None, "At least one step() must occur before partial_success can be calculated!"
+        return len(satisfied) / (len(satisfied) + len(unsatisfied))
diff --git a/omnigibson/termination_conditions/termination_condition_base.py b/omnigibson/termination_conditions/termination_condition_base.py
@@ -93,6 +93,15 @@ def success(self):
         assert self._done is not None, "At least one step() must occur before success can be calculated!"
         return self._done and self._terminate_is_success
 
+    @property
+    def partial_success(self):
+        """
+        Returns:
+            float: partial success if supported, None otherwise 
+        """
+        assert self._done is not None, "At least one step() must occur before partial_success can be calculated!"
+        return None
+
     @classproperty
     def _terminate_is_success(cls):
         """