Improve reality_show action/reward observations, parameterize more of…

… its settings, and add scenarios. PiperOrigin-RevId: 671686501 Change-Id: Ifc9b4b38d6ae906d1d4ab6065273a21eb62bbbe0
google-deepmind · Sep 6, 2024 · 771d7b8 · 771d7b8
1 parent c0d6d31
commit 771d7b8
Show file tree

Hide file tree

Showing 13 changed files with 445 additions and 159 deletions.
diff --git a/concordia/components/game_master/schelling_diagram_payoffs.py b/concordia/components/game_master/schelling_diagram_payoffs.py
@@ -66,9 +66,14 @@ def __init__(
       players: Sequence[deprecated_agent.BasicAgent | entity_agent.EntityAgent],
       acting_player_names: Sequence[str],
       outcome_summarization_fn: Callable[
-          [Mapping[str, int], Mapping[str, float]], Mapping[str, str]
+          [Mapping[str, int],
+           Mapping[str, str],
+           Mapping[str, float],
+           Mapping[str, float]],
+          Mapping[str, str],
       ],
       clock_now: Callable[[], datetime.datetime],
+      active_players_observe_joint_action_and_outcome: bool = False,
       name: str = 'scoring function',
       verbose: bool = False,
   ):
@@ -89,6 +94,10 @@ def __init__(
       outcome_summarization_fn: function of binarized joint actions and
         rewards which returns an outcome description message for each player
       clock_now: Function to call to get current time.
+      active_players_observe_joint_action_and_outcome: False by default, if set
+        to True, then active players observe the full joint action and outcome,
+        otherwise they observe only their own actions and rewards description.
+        Inactive players always observe the full joint action and outcome.
       name: name of this component e.g. Possessions, Account, Property, etc
       verbose: whether to print the full update chain of thought or not
     """
@@ -102,6 +111,9 @@ def __init__(
     self._outcome_summarization_fn = outcome_summarization_fn
     self._clock_now = clock_now
     self._name = name
+    self._active_players_observe_joint_action_and_outcome = (
+        active_players_observe_joint_action_and_outcome
+    )
     self._verbose = verbose
 
     self._history = []
@@ -182,34 +194,41 @@ def _get_rewards_from_joint_action(
 
   def _set_outcome_messages(
       self,
-      binary_joint_action: Mapping[str, int],
       rewards: Mapping[str, float],
+      binary_joint_action: Mapping[str, bool],
+      joint_action: Mapping[str, str],
   ) -> None:
     # Only the game master sees the actual reward values.
     game_master_private_state = '\n'.join(
         [f'{player.name}: {self._player_scores[player.name]}'
          for player in self._players])
     # Players see a text-based summarization of the events, which may or may not
     # include the actual reward values.
-    partial_states = self._outcome_summarization_fn(binary_joint_action,
-                                                    rewards)
+    partial_states = self._outcome_summarization_fn(
+        binary_joint_action, joint_action, rewards, self._player_scores
+    )
     common_view_of_player_obs = '\n'.join(
-        [f'{name} observed: {observation}' for name, observation
-         in partial_states.items()])
+        [f'{observation}' for observation in partial_states.values()]
+    )
 
     # State is only observed by the game master since players get
     # their observations from `partial_states`.
     self._state = f'{common_view_of_player_obs}\n{game_master_private_state}'
 
     # The game master gets a memory of the state.
     self._memory.add(self._state)
-    # Active players observe their own partial state description and inactive
-    # players get the common description.
+    # By default, active players observe only their own partial state
+    # description, but if `active_players_observe_joint_action_and_outcome` is
+    # True then they observe the full joint action and outcome. Inactive players
+    # always observe the full joint action/outcome.
     for player in self._players:
       if player.name in self._acting_player_names:
-        player.observe(partial_states[player.name])
+        if self._active_players_observe_joint_action_and_outcome:
+          self._partial_states[player.name] = common_view_of_player_obs
+        else:
+          self._partial_states[player.name] = partial_states[player.name]
       else:
-        player.observe(common_view_of_player_obs)
+        self._partial_states[player.name] = common_view_of_player_obs
 
   def update_before_event(self, player_action_attempt: str) -> None:
     # `player_action_attempt` is formatted as "name: attempt".
@@ -224,6 +243,7 @@ def update_after_event(
     current_scene_type = self._current_scene.state()
     payoffs_for_log = ''
     joint_action_for_log = ''
+    self._partial_states = {player.name: '' for player in self._players}
     finished = False
     if current_scene_type == self._resolution_scene:
       # Check if all players have acted so far in the current stage game.
@@ -238,7 +258,7 @@ def update_after_event(
           self._player_scores[name] += rewards[name]
 
         # Use the outcome summarization function to get the state.
-        self._set_outcome_messages(binary_joint_action, rewards)
+        self._set_outcome_messages(rewards, binary_joint_action, joint_action)
         self._memory.extend([self.state(),])
 
         joint_action_for_log = str(self._partial_joint_action)

diff --git a/examples/modular/environment/forbidden_fruit.py b/examples/modular/environment/forbidden_fruit.py
@@ -787,9 +787,10 @@ def configure_scenes(
 
 
 def outcome_summary_fn(
-    # `binary_joint_action` should be type Mapping[str, bool] (ie bool not int).
     unused_binary_joint_action: Mapping[str, int],
+    unused_joint_action: Mapping[str, str],
     rewards: Mapping[str, float],
+    unused_cumulative_rewards: Mapping[str, float],
 ) -> Mapping[str, str]:
   """Summarize the outcome of a decision scene."""
   marking = ''

diff --git a/examples/modular/environment/labor_collective_action.py b/examples/modular/environment/labor_collective_action.py
@@ -397,7 +397,9 @@ def configure_scenes(
 
   Returns:
     scenes: a sequence of scene specifications
-    schelling_payoffs: a component to compute rewards of collective action
+    decision_env: the game master object for the decision scenes
+    industrial_action: the labor strike game master component used in the
+      decision scenes
   """
   main_player_configs_list = list(main_player_configs)
   player_configs = main_player_configs_list + list(supporting_player_configs)

diff --git a/examples/modular/environment/modules/early_2000s_american_reality_show.py b/examples/modular/environment/modules/early_2000s_american_reality_show.py
@@ -14,23 +14,113 @@
 
 """A setting where the players are contestants on a reality TV show."""
 
-from collections.abc import Mapping
-import dataclasses
 import random
-from typing import Any
+
+from concordia.components import game_master as gm_components
+from examples.modular.environment import reality_show
+from concordia.typing import agent as agent_lib
+
+
+SchellingDiagram = gm_components.schelling_diagram_payoffs.SchellingDiagram
 
 # According to Google Search, the early 2000s were the peak of reality TV.
 YEAR = 2003
 MONTH = 7
 DAY = 9
 
-POSSIBLE_NUM_PLAYERS = (3, 4)
+DEFAULT_POSSIBLE_NUM_PLAYERS = (3, 4)
 
-DEFAULT_MINIGAME = 'Carpooling'
-NUM_MINIGAME_REPS_PER_SCENE = 3
+DEFAULT_MINIGAME = 'prisoners_dilemma'
+NUM_MINIGAME_REPS_PER_SCENE = (2, 3)
 
 NUM_INTERVIEW_QUESTIONS = 3
 
+MINIGAME_INTRO_PREMISE = (
+    "The show's host arrived to explain the next minigame. They "
+    'said the following:\n'
+)
+
+MINIGAMES = {
+    'prisoners_dilemma': reality_show.MiniGameSpec(
+        name='Carpooling',
+        public_premise=MINIGAME_INTRO_PREMISE
+        + (
+            'The next minigame is called Carpooling. Three coworkers can '
+            'carpool, cutting commute costs for all, or drive individually. '
+            'The commute happens daily, creating repeated decisions.'
+        ),
+        schelling_diagram=SchellingDiagram(
+            # A fear+greed-type (Prisoners' Dilemma-like) dilemma
+            cooperation=lambda num_cooperators: num_cooperators - 1.0,
+            defection=lambda num_cooperators: num_cooperators + 2.0,
+        ),
+        map_external_actions_to_schelling_diagram=dict(
+            cooperation='try to carpool with others',
+            defection='drive individually',
+        ),
+        action_spec=agent_lib.choice_action_spec(
+            call_to_action='Which action would {name} choose in the minigame?',
+            options=('try to carpool with others', 'drive individually'),
+            tag='minigame_action',
+        ),
+    ),
+    'chicken': reality_show.MiniGameSpec(
+        name='Home Appliance Sharing',
+        public_premise=MINIGAME_INTRO_PREMISE
+        + (
+            'Three neighbors share a tool/appliance infrequently. Each can '
+            'maintain it for shared use, or let others handle '
+            'upkeep and risk it being unavailable. Repeated use '
+            'creates dilemmas each time the tool/appliance is needed.'
+        ),
+        schelling_diagram=SchellingDiagram(
+            # A greed-type (Chicken-like) dilemma
+            cooperation=lambda num_cooperators: 4.0 * num_cooperators,
+            defection=lambda num_cooperators: 5.5 * num_cooperators - 2.0,
+        ),
+        map_external_actions_to_schelling_diagram=dict(
+            cooperation='maintain the appliance',
+            defection='let others handle upkeep of the appliance',
+        ),
+        action_spec=agent_lib.choice_action_spec(
+            call_to_action='Which action would {name} choose in the minigame?',
+            options=(
+                'maintain the appliance',
+                'let others handle upkeep of the appliance',
+            ),
+            tag='minigame_action',
+        ),
+    ),
+    'stag_hunt': reality_show.MiniGameSpec(
+        name='Boat Race',
+        public_premise=MINIGAME_INTRO_PREMISE
+        + (
+            'Three teammates are on a row boat racing team together. Each has '
+            'the option to give the race their all and really row '
+            'vigorously, but this option is very fatiguing and only '
+            'effective when all choose it simultaneously. Alternatively, each '
+            'teammate has the option of rowing less vigorously, this gets '
+            'them to their goal more slowly, but is less fatiguing and does '
+            'not require coordination with the others. The race is repeated '
+            'many times, going back and forth across the lake.'
+        ),
+        schelling_diagram=SchellingDiagram(
+            # A fear-type (Stag Hunt-like) dilemma
+            cooperation=lambda num_cooperators: (4.0 * num_cooperators) - 1.0,
+            defection=lambda num_cooperators: num_cooperators + 4.0,
+        ),
+        map_external_actions_to_schelling_diagram=dict(
+            cooperation='row vigorously',
+            defection='row less vigorously',
+        ),
+        action_spec=agent_lib.choice_action_spec(
+            call_to_action='Which action would {name} choose in the minigame?',
+            options=('row vigorously', 'row less vigorously'),
+            tag='minigame_action',
+        ),
+    ),
+}
+
 # These are all stereotypical reality show contestants. They are not meant to
 # be inclusive or diverse. They are meant to represent the time period and
 # genre, in this case reality tv in the early 2000s.
@@ -634,24 +724,14 @@
 }
 
 
-@dataclasses.dataclass
-class WorldConfig:
-  """The configuration of the simulated world."""
-
-  minigame: str
-  year: int
-  month: int
-  day: int
-  num_players: int
-  contestants: Mapping[str, Mapping[str, Any]]
-  num_minigame_reps_per_scene: int
-
-
-def sample_parameters(minigame: str = DEFAULT_MINIGAME):
+def sample_parameters(
+    minigame_name: str = DEFAULT_MINIGAME, num_players: int | None = None
+) -> reality_show.WorldConfig:
   """Sample parameters of the setting and the backstory for each player."""
   shuffled_male_names = list(random.sample(MALE_NAMES, len(MALE_NAMES)))
   shuffled_female_names = list(random.sample(FEMALE_NAMES, len(FEMALE_NAMES)))
-  num_players = random.choice(POSSIBLE_NUM_PLAYERS)
+  if num_players is None:
+    num_players = random.choice(DEFAULT_POSSIBLE_NUM_PLAYERS)
   contestants = {}
   for _ in range(num_players):
     gender = random.choice(GENDERS)
@@ -675,8 +755,9 @@ def sample_parameters(minigame: str = DEFAULT_MINIGAME):
         'subject_pronoun': HE_OR_SHE[gender],
         'object_pronoun': HIM_OR_HER[gender],
     }
-  return WorldConfig(
-      minigame=minigame,
+  return reality_show.WorldConfig(
+      minigame_name=minigame_name,
+      minigame=MINIGAMES[minigame_name],
       year=YEAR,
       month=MONTH,
       day=DAY,

diff --git a/...y_2000s_american_reality_show__chicken.py → ...erican_reality_show__chicken_3_players.py b/...y_2000s_american_reality_show__chicken.py → ...erican_reality_show__chicken_3_players.py
@@ -21,5 +21,6 @@
 def sample_parameters():
   """Sample parameters of the setting and the backstory for each player."""
   return parent_module.sample_parameters(
-      minigame='Home Appliance Sharing',
+      minigame_name='chicken',
+      num_players=3,
   )
diff --git a/...erican_reality_show__prisoners_dilemma.py → ...erican_reality_show__chicken_4_players.py b/...erican_reality_show__prisoners_dilemma.py → ...erican_reality_show__chicken_4_players.py
@@ -21,5 +21,6 @@
 def sample_parameters():
   """Sample parameters of the setting and the backstory for each player."""
   return parent_module.sample_parameters(
-      minigame='Carpooling',
+      minigame_name='chicken',
+      num_players=4,
   )
diff --git a/...lar/environment/modules/early_2000s_american_reality_show__prisoners_dilemma_3_players.py b/...lar/environment/modules/early_2000s_american_reality_show__prisoners_dilemma_3_players.py
@@ -0,0 +1,26 @@
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Settings for an early 2000s american reality show for the prisoners_dilemma.
+"""
+
+from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module
+
+
+def sample_parameters():
+  """Sample parameters of the setting and the backstory for each player."""
+  return parent_module.sample_parameters(
+      minigame_name='prisoners_dilemma',
+      num_players=3,
+  )
diff --git a/...lar/environment/modules/early_2000s_american_reality_show__prisoners_dilemma_4_players.py b/...lar/environment/modules/early_2000s_american_reality_show__prisoners_dilemma_4_players.py
@@ -0,0 +1,26 @@
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Settings for an early 2000s american reality show for the prisoners_dilemma.
+"""
+
+from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module
+
+
+def sample_parameters():
+  """Sample parameters of the setting and the backstory for each player."""
+  return parent_module.sample_parameters(
+      minigame_name='prisoners_dilemma',
+      num_players=4,
+  )
diff --git a/...2000s_american_reality_show__stag_hunt.py → ...ican_reality_show__stag_hunt_3_players.py b/...2000s_american_reality_show__stag_hunt.py → ...ican_reality_show__stag_hunt_3_players.py
@@ -21,5 +21,6 @@
 def sample_parameters():
   """Sample parameters of the setting and the backstory for each player."""
   return parent_module.sample_parameters(
-      minigame='Boat Race',
+      minigame_name='stag_hunt',
+      num_players=3,
   )
diff --git a/...les/modular/environment/modules/early_2000s_american_reality_show__stag_hunt_4_players.py b/...les/modular/environment/modules/early_2000s_american_reality_show__stag_hunt_4_players.py
@@ -0,0 +1,26 @@
+# Copyright 2024 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Settings for an early 2000s american reality show for the prisoners_dilemma.
+"""
+
+from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module
+
+
+def sample_parameters():
+  """Sample parameters of the setting and the backstory for each player."""
+  return parent_module.sample_parameters(
+      minigame_name='stag_hunt',
+      num_players=4,
+  )