mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
adapted score answer
This commit is contained in:
parent
ab33c2bcbb
commit
a1494c4e5b
1 changed files with 9 additions and 4 deletions
|
|
@ -3,7 +3,13 @@ from random import Random
|
|||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
from .rearc_board_format import BoardFormattingOptions, default_board_format_opts, format_board, format_board_pair
|
||||
from .rearc_board_format import (
|
||||
BoardFormattingOptions,
|
||||
default_board_format_opts,
|
||||
format_board,
|
||||
format_board_pair,
|
||||
parse_board,
|
||||
)
|
||||
from .rearc_utils import generators, verifiers
|
||||
from .rearc_utils.dsl import *
|
||||
from .rearc_utils.utils import *
|
||||
|
|
@ -166,9 +172,8 @@ class ReArcDataset(ProceduralDataset):
|
|||
reward = 0.0
|
||||
if answer is not None:
|
||||
try:
|
||||
task_id = metadata["task_id"]
|
||||
verifier = self._verifiers[task_id]
|
||||
if verifier(metadata["input"]) == metadata["output"]:
|
||||
formatted_answer = parse_board(answer, self.board_format_opts)
|
||||
if formatted_answer == metadata["output"]:
|
||||
reward = 1.0
|
||||
else:
|
||||
reward = 0.05
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue