From 1e0ac83b2d419f8cc975106b8a6e30ec389a0e0a Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Wed, 11 Dec 2024 12:38:27 +0300 Subject: [PATCH] Enhanced verbose output of text generation models (#1351) --- tools/who_what_benchmark/tests/test_cli_text.py | 2 +- tools/who_what_benchmark/whowhatbench/wwb.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 0baf60a5a4..79335d46eb 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -156,7 +156,7 @@ def test_text_verbose(): ] ) assert result.returncode == 0 - assert "## Diff " in result.stderr + assert "## Diff:" in result.stderr def test_text_language_autodetect(): diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index e7feebc1b2..026a6cc69b 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -552,7 +552,6 @@ def print_text_results(evaluator): ref_text = "" actual_text = "" diff = "" - print("optimized_model: ", e["optimized_model"]) for l1, l2 in zip( e["source_model"].splitlines(), e["optimized_model"].splitlines() ): @@ -563,12 +562,13 @@ def print_text_results(evaluator): diff += diff_strings(l1, l2) + "\n" logger.info( - "--------------------------------------------------------------------------------------" + "=======================================================================================================" ) - logger.info("## Reference text %d:\n%s", i + 1, ref_text) - logger.info("## Actual text %d:\n%s", i + 1, actual_text) - logger.info("## Diff %d: ", i + 1) - logger.info(diff) + logger.info("## Prompt %d:\n%s\n", i + 1, e["prompt"]) + logger.info("## Metric value:%.4f\n", e[metric_of_interest]) + logger.info("## Reference text:\n%s\n", ref_text) + logger.info("## Actual text:\n%s\n", actual_text) + logger.info("## Diff:\n%s\n", diff) def print_image_results(evaluator): @@ -578,7 +578,7 @@ def print_image_results(evaluator): top_k=5, metric=metric_of_interest) for i, e in enumerate(worst_examples): logger.info( - "--------------------------------------------------------------------------------------" + "=======================================================================================================" ) logger.info(f"Top-{i+1} example:") logger.info(e) @@ -638,7 +638,7 @@ def main(): df.to_csv(os.path.join(args.output, "metrics.csv")) evaluator.dump_predictions(os.path.join(args.output, "target.csv")) - if args.verbose and args.target_model is not None: + if args.verbose and (args.target_model or args.target_data): if args.model_type == "text" or args.model_type == "visual-text": print_text_results(evaluator) elif "text-to-image" in args.model_type: