Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Linearise reward transform #2681

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions docs/source/reference/envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ to be able to create this other composition:
GrayScale
InitTracker
KLRewardTransform
LineariseReward
NoopResetEnv
ObservationNorm
ObservationTransform
Expand Down
333 changes: 332 additions & 1 deletion test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@
TensorSpec,
TensorStorage,
Unbounded,
UnboundedContinuous,
)
from torchrl.data.tensor_specs import BoundedContinuous
louisfaury marked this conversation as resolved.
Show resolved Hide resolved
from torchrl.envs import (
ActionMask,
BinarizeReward,
Expand All @@ -117,6 +119,7 @@
GrayScale,
gSDENoise,
InitTracker,
LineariseRewards,
MultiStepTransform,
NoopResetEnv,
ObservationNorm,
Expand Down Expand Up @@ -412,7 +415,7 @@ def test_transform_rb(self, rbclass):
assert ((sample["reward"] == 0) | (sample["reward"] == 1)).all()

def test_transform_inverse(self):
raise pytest.skip("No inverse for BinerizedReward")
raise pytest.skip("No inverse for BinarizedReward")


class TestClipTransform(TransformBase):
Expand Down Expand Up @@ -12403,6 +12406,334 @@ def test_transform_inverse(self):
pytest.skip("Tested elsewhere")


class TestLineariseRewards(TransformBase):
def test_weight_shape_error(self):
with pytest.raises(
ValueError, match="Expected weights to be a unidimensional tensor"
):
LineariseRewards(in_keys=("reward",), weights=torch.ones(size=(2, 4)))

def test_weight_sign_error(self):
with pytest.raises(ValueError, match="Expected all weights to be >0"):
LineariseRewards(in_keys=("reward",), weights=-torch.ones(size=(2,)))

def test_discrete_spec_error(self):
with pytest.raises(
NotImplementedError,
match="Aggregation of rewards that take discrete values is not supported.",
):
transform = LineariseRewards(in_keys=("reward",))
reward_spec = Categorical(n=2)
transform.transform_reward_spec(reward_spec)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_single_trans_env_check(self, reward_spec: TensorSpec):
env = TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_serial_trans_env_check(self, reward_spec: TensorSpec):
def make_env():
return TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)

env = SerialEnv(2, make_env)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_parallel_trans_env_check(
self, maybe_fork_ParallelEnv, reward_spec: TensorSpec
):
def make_env():
return TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)

env = maybe_fork_ParallelEnv(2, make_env)
try:
check_env_specs(env)
finally:
try:
env.close()
except RuntimeError:
pass

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_trans_serial_env_check(self, reward_spec: TensorSpec):
def make_env():
return ContinuousActionVecMockEnv(reward_spec=reward_spec)

env = TransformedEnv(
SerialEnv(2, make_env), LineariseRewards(in_keys=["reward"])
)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_trans_parallel_env_check(
self, maybe_fork_ParallelEnv, reward_spec: TensorSpec
):
def make_env():
return ContinuousActionVecMockEnv(reward_spec=reward_spec)

env = TransformedEnv(
maybe_fork_ParallelEnv(2, make_env),
LineariseRewards(in_keys=["reward"]),
)
try:
check_env_specs(env)
finally:
try:
env.close()
except RuntimeError:
pass

@pytest.mark.parametrize("reward_key", [("reward",), ("agents", "reward")])
@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_no_env(self, reward_key, num_rewards, weights):
out_keys = reward_key[:-1] + ("scalar_reward",)
t = LineariseRewards(in_keys=[reward_key], out_keys=[out_keys], weights=weights)
td = TensorDict({reward_key: torch.randn(num_rewards)}, [])
t._call(td)

weights = torch.ones(num_rewards) if weights is None else torch.tensor(weights)
expected = sum(
w * r
for w, r in zip(
weights,
td[reward_key],
)
)
torch.testing.assert_close(td[out_keys], expected)

@pytest.mark.parametrize("reward_key", [("reward",), ("agents", "reward")])
@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_compose(self, reward_key, num_rewards, weights):
out_keys = reward_key[:-1] + ("scalar_reward",)
t = Compose(
LineariseRewards(in_keys=[reward_key], out_keys=[out_keys], weights=weights)
)
td = TensorDict({reward_key: torch.randn(num_rewards)}, [])
t._call(td)

weights = torch.ones(num_rewards) if weights is None else torch.tensor(weights)
expected = sum(
w * r
for w, r in zip(
weights,
td[reward_key],
)
)
torch.testing.assert_close(td[out_keys], expected)

class _DummyMultiObjectiveEnv(EnvBase):
"""A dummy multi-objective environment."""

def __init__(self, num_rewards: int) -> None:
super().__init__()
self._num_rewards = num_rewards

self.observation_spec = Composite(
observation=UnboundedContinuous((*self.batch_size, 3))
)
self.action_spec = Categorical(2, (*self.batch_size, 1), dtype=torch.bool)
self.done_spec = Categorical(2, (*self.batch_size, 1), dtype=torch.bool)
self.full_done_spec["truncated"] = self.full_done_spec["terminated"].clone()
self.reward_spec = UnboundedContinuous(*self.batch_size, num_rewards)

def _reset(self, tensordict: TensorDict) -> TensorDict:
return self.observation_spec.sample()

def _step(self, tensordict: TensorDict) -> TensorDict:
done, terminated = False, False
reward = torch.randn((self._num_rewards,))

return TensorDict(
{
("observation"): self.observation_spec["observation"].sample(),
("done"): done,
("terminated"): terminated,
("reward"): reward,
}
)

def _set_seed(self) -> None:
pass

@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_env(self, num_rewards, weights):
weights = weights if weights is not None else [1.0 for _ in range(num_rewards)]

transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)
env = TransformedEnv(self._DummyMultiObjectiveEnv(num_rewards), transform)
rollout = env.rollout(10)
scalar_reward = rollout.get(("next", "scalar_reward"))
assert scalar_reward.shape[-1] == 1

expected = sum(
w * r
for w, r in zip(
weights, rollout.get(("next", "reward")).split(1, dim=-1), strict=True
)
)
torch.testing.assert_close(scalar_reward, expected)

@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_model(self, num_rewards, weights):
weights = weights if weights is not None else [1.0 for _ in range(num_rewards)]
transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)

model = nn.Sequential(transform, nn.Identity())
td = TensorDict({"reward": torch.randn(num_rewards)}, [])
model(td)

expected = sum(w * r for w, r in zip(weights, td["reward"], strict=True))
torch.testing.assert_close(td["scalar_reward"], expected)

@pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
def test_transform_rb(self, rbclass):
num_rewards = 3
weights = None
transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)

rb = rbclass(storage=LazyTensorStorage(10))
td = TensorDict({"reward": torch.randn(num_rewards)}, []).expand(10)
rb.append_transform(transform)
rb.extend(td)

td = rb.sample(2)
torch.testing.assert_close(td["scalar_reward"], td["reward"].sum(-1))

def test_transform_inverse(self):
raise pytest.skip("No inverse for LineariseReward")

@pytest.mark.parametrize(
"weights, reward_spec, expected_spec",
[
(None, UnboundedContinuous(shape=3), UnboundedContinuous(shape=1)),
(
None,
BoundedContinuous(0, 1, shape=3),
BoundedContinuous(0, 3, shape=1),
),
(
None,
BoundedContinuous(low=[-1.0, -2.0], high=[1.0, 2.0]),
BoundedContinuous(low=-3.0, high=3.0, shape=1),
),
(
[1.0, 0.0],
BoundedContinuous(
low=[-1.0, -2.0],
high=[1.0, 2.0],
shape=2,
),
BoundedContinuous(low=-1.0, high=1.0, shape=1),
),
],
)
def test_reward_spec(
self,
weights: list[float] | None,
reward_spec: TensorSpec,
expected_spec: TensorSpec,
) -> None:
transform = LineariseRewards(in_keys=("reward",), weights=weights)
assert transform.transform_reward_spec(reward_spec) == expected_spec

def test_composite_reward_spec(self) -> None:
weights = None
reward_spec = Composite(
agent_0=Composite(
reward=BoundedContinuous(low=[0, 0, 0], high=[1, 1, 1], shape=3)
),
agent_1=Composite(
reward=BoundedContinuous(
low=[-1, -1, -1],
high=[1, 1, 1],
shape=3,
)
),
)
expected_reward_spec = Composite(
agent_0=Composite(reward=BoundedContinuous(low=0, high=3, shape=1)),
agent_1=Composite(reward=BoundedContinuous(low=-3, high=3, shape=1)),
)
transform = LineariseRewards(
in_keys=[("agent_0", "reward"), ("agent_1", "reward")], weights=weights
)
assert transform.transform_reward_spec(reward_spec) == expected_reward_spec


if __name__ == "__main__":
args, unknown = argparse.ArgumentParser().parse_known_args()
pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
1 change: 1 addition & 0 deletions torchrl/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
gSDENoise,
InitTracker,
KLRewardTransform,
LineariseRewards,
MultiStepTransform,
NoopResetEnv,
ObservationNorm,
Expand Down
1 change: 1 addition & 0 deletions torchrl/envs/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
GrayScale,
gSDENoise,
InitTracker,
LineariseRewards,
NoopResetEnv,
ObservationNorm,
ObservationTransform,
Expand Down
Loading
Loading