Skip to content

Commit

Permalink
bugfix. ref should be before obs in OrderedDict
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-bachhuber committed Feb 18, 2023
1 parent 25e13dd commit edaddd0
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions cc/env/wrappers/add_reference_and_reward.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import OrderedDict
from types import FunctionType

import dm_env
Expand All @@ -8,6 +7,7 @@
from tree_utils import batch_concat, tree_slice

from ...core import AbstractObservationReferenceSource
from ...train.step_fn import merge_x_y
from ...utils.sample_from_spec import _spec_from_observation
from ...utils.utils import timestep_array_from_env

Expand Down Expand Up @@ -42,11 +42,9 @@ def __init__(
super().__init__(environment)

def _modify_timestep(self, timestep: dm_env.TimeStep):
padded_obs = OrderedDict()
padded_obs["obs"] = timestep.observation
padded_obs["ref"] = tree_slice(
self._source.get_reference_actor(), self._i_timestep
)
ref = tree_slice(self._source.get_reference_actor(), self._i_timestep)
obs = timestep.observation
padded_obs = merge_x_y(ref, obs)

# calculate reward
# dm_env has convention that first timestep has no reward
Expand Down

0 comments on commit edaddd0

Please sign in to comment.