From 68db64b9ea6a67e3990d030947dd471ef79d02c6 Mon Sep 17 00:00:00 2001 From: mpc Date: Fri, 20 Dec 2024 14:12:31 +0000 Subject: [PATCH] outputs evaluation results for all synthetic questions --- data/.gitignore | 1 + dvc.yaml | 3 ++- params.yaml | 1 + scripts/evaluate.py | 12 +++++++++--- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/data/.gitignore b/data/.gitignore index b8dc417..439d928 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -17,3 +17,4 @@ /eidc_rag_test_set.csv /rag-pipeline.yml /pipeline.yml +/results.csv diff --git a/dvc.yaml b/dvc.yaml index 179c315..31f1704 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -79,10 +79,11 @@ stages: - ${files.eval-set} - ${files.pipeline} evaluate: - cmd: uv run scripts/evaluate.py ${files.eval-set} -m ${files.metrics} -img ${files.eval-plot} + cmd: uv run scripts/evaluate.py ${files.eval-set} -m ${files.metrics} -img ${files.eval-plot} -r ${files.results} deps: - ${files.eval-set} - scripts/evaluate.py outs: - ${files.metrics} - ${files.eval-plot} + - ${files.results} diff --git a/params.yaml b/params.yaml index b3dc785..a387f96 100644 --- a/params.yaml +++ b/params.yaml @@ -15,6 +15,7 @@ files: test-set: data/eidc_rag_testset.csv eval-set: data/evaluation_data.csv metrics: data/metrics.json + results: data/results.csv eval-plot: data/eval.png pipeline: data/pipeline.yml sub-sample: 0 # sample n datasets for testing (0 will use all datasets) diff --git a/scripts/evaluate.py b/scripts/evaluate.py index fbe348f..b4ea981 100644 --- a/scripts/evaluate.py +++ b/scripts/evaluate.py @@ -22,7 +22,7 @@ from ragas.run_config import RunConfig -def main(eval_dataset: str, metric_output: str, image_output: str) -> None: +def main(eval_dataset: str, metric_output: str, image_output: str, results_output: str) -> None: nest_asyncio.apply() # apply the event loop async fix df = pd.read_csv(eval_dataset, converters={"contexts": pd.eval}) eval_dataset = Dataset.from_pandas(df) @@ -45,7 +45,7 @@ def main(eval_dataset: str, metric_output: str, image_output: str) -> None: run_config=RunConfig(max_workers=1), ) result_df = result.to_pandas() - + result_df.to_csv(results_output, index=False) Path(metric_output).parent.mkdir(parents=True, exist_ok=True) with open(metric_output, "w+") as f: json.dump(result, f) @@ -88,5 +88,11 @@ def main(eval_dataset: str, metric_output: str, image_output: str) -> None: help="File to save image plot to.", default="data/evaluation.png", ) + parser.add_argument( + "-r", + "--results", + help="File to save evaluation results", + default="data/results.csv", + ) args = parser.parse_args() - main(args.eval_dataset, args.metrics_output, args.image_output) + main(args.eval_dataset, args.metrics_output, args.image_output, args.results)