mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-26 17:13:17 +00:00
fix: Improve error logging and preserve full model response in eval process (#337)
This commit is contained in:
parent
be75c3aa5f
commit
c8e77d21a7
1 changed files with 16 additions and 3 deletions
19
eval/eval.py
19
eval/eval.py
|
|
@ -175,10 +175,23 @@ class AsyncModelEvaluator:
|
||||||
Returns:
|
Returns:
|
||||||
Dict with processing results
|
Dict with processing results
|
||||||
"""
|
"""
|
||||||
|
response = None
|
||||||
try:
|
try:
|
||||||
|
# Get model response first
|
||||||
response = await self.get_model_response(entry["question"])
|
response = await self.get_model_response(entry["question"])
|
||||||
model_answer = extract_answer(response)
|
|
||||||
score = dataset.score_answer(answer=model_answer, entry=entry)
|
# Try to extract answer and score it
|
||||||
|
try:
|
||||||
|
model_answer = extract_answer(response)
|
||||||
|
except Exception as extract_error:
|
||||||
|
self.logger.error(f"Error extracting answer: {str(extract_error)}")
|
||||||
|
raise Exception(f"Answer extraction error: {str(extract_error)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
score = dataset.score_answer(answer=model_answer, entry=entry)
|
||||||
|
except Exception as score_error:
|
||||||
|
self.logger.error(f"Error scoring answer: {str(score_error)}")
|
||||||
|
raise Exception(f"Answer scoring error: {str(score_error)}")
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(f"Question: {entry['question']}")
|
print(f"Question: {entry['question']}")
|
||||||
|
|
@ -207,7 +220,7 @@ class AsyncModelEvaluator:
|
||||||
"question": entry["question"],
|
"question": entry["question"],
|
||||||
"expected_answer": str(entry["answer"]),
|
"expected_answer": str(entry["answer"]),
|
||||||
"model_answer": "ERROR",
|
"model_answer": "ERROR",
|
||||||
"full_model_response": f"Error: {str(e)}",
|
"full_model_response": response if response is not None else f"Error: {str(e)}",
|
||||||
"score": 0.0,
|
"score": 0.0,
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue