diff --git a/eval/eval.py b/eval/eval.py index 53571dd0..f2c5d53e 100644 --- a/eval/eval.py +++ b/eval/eval.py @@ -54,6 +54,7 @@ class AsyncOpenRouterEvaluator: "question": entry["question"], "expected_answer": entry["answer"], "model_answer": answer, + "full_model_response": response, "score": score, "metadata": entry["metadata"], } diff --git a/eval/r1/eval.py b/eval/r1/eval.py index 3dbc39b1..759b4c75 100644 --- a/eval/r1/eval.py +++ b/eval/r1/eval.py @@ -104,6 +104,7 @@ class OpenRouterEvaluator: "question": entry["question"], "expected_answer": str(entry["answer"]), "model_answer": model_answer, + "full_model_response": response, "score": score, "metadata": str(entry["metadata"]), }