Skip to content

Commit

Permalink
Make evaluation script write usable intermediate files which can be u…
Browse files Browse the repository at this point in the history
…sed in the event that the full script crashes. This makes it possible to re-run only the scenarios that crashed.

PiperOrigin-RevId: 673749453
Change-Id: I52aa1bd816b18f25bcf6a8a3485777c23fb48b13
  • Loading branch information
jzleibo authored and copybara-github committed Sep 12, 2024
1 parent d4e32c0 commit e5bb519
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions examples/modular/launch_concordia_challenge_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
'--num_repetitions_per_scenario',
action='store',
type=int,
default=2,
default=1,
dest='num_repetitions_per_scenario',
)
parser.add_argument('--api_key',
Expand Down Expand Up @@ -224,9 +224,10 @@ def _evaluate_all_repetitions_on_one_scenario(
)
with open(html_filename, 'a', encoding='utf-8') as f:
f.write(text_results_log)

# Average scores over repetitions and save results for all repetitions in a
# json-serializable format.
return logging_lib.ScenarioResult(
scenario_result_ = logging_lib.ScenarioResult(
scenario=scenario_name,
focal_agent=args.agent_name,
background_agent=scenario_config.background_agent_module,
Expand All @@ -245,6 +246,14 @@ def _evaluate_all_repetitions_on_one_scenario(
disable_language_model=args.disable_language_model,
exclude_from_elo_calculation=args.exclude_from_elo_calculation,
)
scenario_json_filename = (
f'{args.agent_name}__{args.model_name}__'
f'{args.embedder_name}__only_{scenario_name}.json'
).replace('/', '_')
json_str_ = scenario_result_.to_json()
with open(scenario_json_filename, 'a', encoding='utf-8') as f:
f.write(json_str_)
return scenario_result_

tasks = {
name: functools.partial(
Expand Down

0 comments on commit e5bb519

Please sign in to comment.