mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Add eval configs, small fixes to eval script & rush-hour score_answer
This commit is contained in:
parent
fa950d0189
commit
677a2af03e
6 changed files with 283 additions and 22 deletions
13
eval/eval.py
13
eval/eval.py
|
|
@ -399,17 +399,16 @@ class AsyncModelEvaluator:
|
|||
Dict with processing results
|
||||
"""
|
||||
responses = None
|
||||
completion_results = []
|
||||
best_score = 0.0
|
||||
total_score = 0.0
|
||||
best_answer = None
|
||||
best_response = None
|
||||
|
||||
try:
|
||||
# Get multiple model responses
|
||||
responses = await self.get_model_response(entry["question"])
|
||||
|
||||
# Process each response
|
||||
completion_results = []
|
||||
best_score = 0.0
|
||||
total_score = 0.0
|
||||
best_answer = None
|
||||
best_response = None
|
||||
|
||||
# Count total completions for mean score calculation
|
||||
total_completions = len(responses)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue