Skip to content

Commit

Permalink
Improve reality_show action/reward observations, parameterize more of…
Browse files Browse the repository at this point in the history
… its settings, and add scenarios.

PiperOrigin-RevId: 671686501
Change-Id: Ifc9b4b38d6ae906d1d4ab6065273a21eb62bbbe0
  • Loading branch information
jzleibo authored and copybara-github committed Sep 6, 2024
1 parent c0d6d31 commit 771d7b8
Show file tree
Hide file tree
Showing 13 changed files with 445 additions and 159 deletions.
42 changes: 31 additions & 11 deletions concordia/components/game_master/schelling_diagram_payoffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,14 @@ def __init__(
players: Sequence[deprecated_agent.BasicAgent | entity_agent.EntityAgent],
acting_player_names: Sequence[str],
outcome_summarization_fn: Callable[
[Mapping[str, int], Mapping[str, float]], Mapping[str, str]
[Mapping[str, int],
Mapping[str, str],
Mapping[str, float],
Mapping[str, float]],
Mapping[str, str],
],
clock_now: Callable[[], datetime.datetime],
active_players_observe_joint_action_and_outcome: bool = False,
name: str = 'scoring function',
verbose: bool = False,
):
Expand All @@ -89,6 +94,10 @@ def __init__(
outcome_summarization_fn: function of binarized joint actions and
rewards which returns an outcome description message for each player
clock_now: Function to call to get current time.
active_players_observe_joint_action_and_outcome: False by default, if set
to True, then active players observe the full joint action and outcome,
otherwise they observe only their own actions and rewards description.
Inactive players always observe the full joint action and outcome.
name: name of this component e.g. Possessions, Account, Property, etc
verbose: whether to print the full update chain of thought or not
"""
Expand All @@ -102,6 +111,9 @@ def __init__(
self._outcome_summarization_fn = outcome_summarization_fn
self._clock_now = clock_now
self._name = name
self._active_players_observe_joint_action_and_outcome = (
active_players_observe_joint_action_and_outcome
)
self._verbose = verbose

self._history = []
Expand Down Expand Up @@ -182,34 +194,41 @@ def _get_rewards_from_joint_action(

def _set_outcome_messages(
self,
binary_joint_action: Mapping[str, int],
rewards: Mapping[str, float],
binary_joint_action: Mapping[str, bool],
joint_action: Mapping[str, str],
) -> None:
# Only the game master sees the actual reward values.
game_master_private_state = '\n'.join(
[f'{player.name}: {self._player_scores[player.name]}'
for player in self._players])
# Players see a text-based summarization of the events, which may or may not
# include the actual reward values.
partial_states = self._outcome_summarization_fn(binary_joint_action,
rewards)
partial_states = self._outcome_summarization_fn(
binary_joint_action, joint_action, rewards, self._player_scores
)
common_view_of_player_obs = '\n'.join(
[f'{name} observed: {observation}' for name, observation
in partial_states.items()])
[f'{observation}' for observation in partial_states.values()]
)

# State is only observed by the game master since players get
# their observations from `partial_states`.
self._state = f'{common_view_of_player_obs}\n{game_master_private_state}'

# The game master gets a memory of the state.
self._memory.add(self._state)
# Active players observe their own partial state description and inactive
# players get the common description.
# By default, active players observe only their own partial state
# description, but if `active_players_observe_joint_action_and_outcome` is
# True then they observe the full joint action and outcome. Inactive players
# always observe the full joint action/outcome.
for player in self._players:
if player.name in self._acting_player_names:
player.observe(partial_states[player.name])
if self._active_players_observe_joint_action_and_outcome:
self._partial_states[player.name] = common_view_of_player_obs
else:
self._partial_states[player.name] = partial_states[player.name]
else:
player.observe(common_view_of_player_obs)
self._partial_states[player.name] = common_view_of_player_obs

def update_before_event(self, player_action_attempt: str) -> None:
# `player_action_attempt` is formatted as "name: attempt".
Expand All @@ -224,6 +243,7 @@ def update_after_event(
current_scene_type = self._current_scene.state()
payoffs_for_log = ''
joint_action_for_log = ''
self._partial_states = {player.name: '' for player in self._players}
finished = False
if current_scene_type == self._resolution_scene:
# Check if all players have acted so far in the current stage game.
Expand All @@ -238,7 +258,7 @@ def update_after_event(
self._player_scores[name] += rewards[name]

# Use the outcome summarization function to get the state.
self._set_outcome_messages(binary_joint_action, rewards)
self._set_outcome_messages(rewards, binary_joint_action, joint_action)
self._memory.extend([self.state(),])

joint_action_for_log = str(self._partial_joint_action)
Expand Down
3 changes: 2 additions & 1 deletion examples/modular/environment/forbidden_fruit.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,9 +787,10 @@ def configure_scenes(


def outcome_summary_fn(
# `binary_joint_action` should be type Mapping[str, bool] (ie bool not int).
unused_binary_joint_action: Mapping[str, int],
unused_joint_action: Mapping[str, str],
rewards: Mapping[str, float],
unused_cumulative_rewards: Mapping[str, float],
) -> Mapping[str, str]:
"""Summarize the outcome of a decision scene."""
marking = ''
Expand Down
4 changes: 3 additions & 1 deletion examples/modular/environment/labor_collective_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,9 @@ def configure_scenes(
Returns:
scenes: a sequence of scene specifications
schelling_payoffs: a component to compute rewards of collective action
decision_env: the game master object for the decision scenes
industrial_action: the labor strike game master component used in the
decision scenes
"""
main_player_configs_list = list(main_player_configs)
player_configs = main_player_configs_list + list(supporting_player_configs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,113 @@

"""A setting where the players are contestants on a reality TV show."""

from collections.abc import Mapping
import dataclasses
import random
from typing import Any

from concordia.components import game_master as gm_components
from examples.modular.environment import reality_show
from concordia.typing import agent as agent_lib


SchellingDiagram = gm_components.schelling_diagram_payoffs.SchellingDiagram

# According to Google Search, the early 2000s were the peak of reality TV.
YEAR = 2003
MONTH = 7
DAY = 9

POSSIBLE_NUM_PLAYERS = (3, 4)
DEFAULT_POSSIBLE_NUM_PLAYERS = (3, 4)

DEFAULT_MINIGAME = 'Carpooling'
NUM_MINIGAME_REPS_PER_SCENE = 3
DEFAULT_MINIGAME = 'prisoners_dilemma'
NUM_MINIGAME_REPS_PER_SCENE = (2, 3)

NUM_INTERVIEW_QUESTIONS = 3

MINIGAME_INTRO_PREMISE = (
"The show's host arrived to explain the next minigame. They "
'said the following:\n'
)

MINIGAMES = {
'prisoners_dilemma': reality_show.MiniGameSpec(
name='Carpooling',
public_premise=MINIGAME_INTRO_PREMISE
+ (
'The next minigame is called Carpooling. Three coworkers can '
'carpool, cutting commute costs for all, or drive individually. '
'The commute happens daily, creating repeated decisions.'
),
schelling_diagram=SchellingDiagram(
# A fear+greed-type (Prisoners' Dilemma-like) dilemma
cooperation=lambda num_cooperators: num_cooperators - 1.0,
defection=lambda num_cooperators: num_cooperators + 2.0,
),
map_external_actions_to_schelling_diagram=dict(
cooperation='try to carpool with others',
defection='drive individually',
),
action_spec=agent_lib.choice_action_spec(
call_to_action='Which action would {name} choose in the minigame?',
options=('try to carpool with others', 'drive individually'),
tag='minigame_action',
),
),
'chicken': reality_show.MiniGameSpec(
name='Home Appliance Sharing',
public_premise=MINIGAME_INTRO_PREMISE
+ (
'Three neighbors share a tool/appliance infrequently. Each can '
'maintain it for shared use, or let others handle '
'upkeep and risk it being unavailable. Repeated use '
'creates dilemmas each time the tool/appliance is needed.'
),
schelling_diagram=SchellingDiagram(
# A greed-type (Chicken-like) dilemma
cooperation=lambda num_cooperators: 4.0 * num_cooperators,
defection=lambda num_cooperators: 5.5 * num_cooperators - 2.0,
),
map_external_actions_to_schelling_diagram=dict(
cooperation='maintain the appliance',
defection='let others handle upkeep of the appliance',
),
action_spec=agent_lib.choice_action_spec(
call_to_action='Which action would {name} choose in the minigame?',
options=(
'maintain the appliance',
'let others handle upkeep of the appliance',
),
tag='minigame_action',
),
),
'stag_hunt': reality_show.MiniGameSpec(
name='Boat Race',
public_premise=MINIGAME_INTRO_PREMISE
+ (
'Three teammates are on a row boat racing team together. Each has '
'the option to give the race their all and really row '
'vigorously, but this option is very fatiguing and only '
'effective when all choose it simultaneously. Alternatively, each '
'teammate has the option of rowing less vigorously, this gets '
'them to their goal more slowly, but is less fatiguing and does '
'not require coordination with the others. The race is repeated '
'many times, going back and forth across the lake.'
),
schelling_diagram=SchellingDiagram(
# A fear-type (Stag Hunt-like) dilemma
cooperation=lambda num_cooperators: (4.0 * num_cooperators) - 1.0,
defection=lambda num_cooperators: num_cooperators + 4.0,
),
map_external_actions_to_schelling_diagram=dict(
cooperation='row vigorously',
defection='row less vigorously',
),
action_spec=agent_lib.choice_action_spec(
call_to_action='Which action would {name} choose in the minigame?',
options=('row vigorously', 'row less vigorously'),
tag='minigame_action',
),
),
}

# These are all stereotypical reality show contestants. They are not meant to
# be inclusive or diverse. They are meant to represent the time period and
# genre, in this case reality tv in the early 2000s.
Expand Down Expand Up @@ -634,24 +724,14 @@
}


@dataclasses.dataclass
class WorldConfig:
"""The configuration of the simulated world."""

minigame: str
year: int
month: int
day: int
num_players: int
contestants: Mapping[str, Mapping[str, Any]]
num_minigame_reps_per_scene: int


def sample_parameters(minigame: str = DEFAULT_MINIGAME):
def sample_parameters(
minigame_name: str = DEFAULT_MINIGAME, num_players: int | None = None
) -> reality_show.WorldConfig:
"""Sample parameters of the setting and the backstory for each player."""
shuffled_male_names = list(random.sample(MALE_NAMES, len(MALE_NAMES)))
shuffled_female_names = list(random.sample(FEMALE_NAMES, len(FEMALE_NAMES)))
num_players = random.choice(POSSIBLE_NUM_PLAYERS)
if num_players is None:
num_players = random.choice(DEFAULT_POSSIBLE_NUM_PLAYERS)
contestants = {}
for _ in range(num_players):
gender = random.choice(GENDERS)
Expand All @@ -675,8 +755,9 @@ def sample_parameters(minigame: str = DEFAULT_MINIGAME):
'subject_pronoun': HE_OR_SHE[gender],
'object_pronoun': HIM_OR_HER[gender],
}
return WorldConfig(
minigame=minigame,
return reality_show.WorldConfig(
minigame_name=minigame_name,
minigame=MINIGAMES[minigame_name],
year=YEAR,
month=MONTH,
day=DAY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame='Home Appliance Sharing',
minigame_name='chicken',
num_players=3,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame='Carpooling',
minigame_name='chicken',
num_players=4,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2024 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Settings for an early 2000s american reality show for the prisoners_dilemma.
"""

from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module


def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame_name='prisoners_dilemma',
num_players=3,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2024 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Settings for an early 2000s american reality show for the prisoners_dilemma.
"""

from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module


def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame_name='prisoners_dilemma',
num_players=4,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame='Boat Race',
minigame_name='stag_hunt',
num_players=3,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2024 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Settings for an early 2000s american reality show for the prisoners_dilemma.
"""

from examples.modular.environment.modules import early_2000s_american_reality_show as parent_module


def sample_parameters():
"""Sample parameters of the setting and the backstory for each player."""
return parent_module.sample_parameters(
minigame_name='stag_hunt',
num_players=4,
)
Loading

0 comments on commit 771d7b8

Please sign in to comment.