mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-23 16:54:56 +00:00
hopefully final linter fixes lol
This commit is contained in:
parent
67869c3a79
commit
85296c519e
29 changed files with 76 additions and 155 deletions
|
|
@ -18,18 +18,14 @@ Supports thinking mode with <think></think> tags for extended reasoning.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import wandb
|
||||
from datasets import load_dataset
|
||||
from eval_helpers import (
|
||||
THINK_CONTENT_AFTER_PATTERN,
|
||||
compare_math_strings,
|
||||
create_system_content,
|
||||
extract_boxed_answers,
|
||||
extract_thinking_content,
|
||||
|
|
@ -47,8 +43,7 @@ from atroposlib.envs.base import (
|
|||
APIServerConfig,
|
||||
BaseEnv,
|
||||
BaseEnvConfig,
|
||||
EvalHandlingEnum,
|
||||
)
|
||||
)
|
||||
|
||||
# Prompt template following lighteval's structure
|
||||
# Added boxed instruction for consistency with our math verification
|
||||
|
|
@ -329,7 +324,7 @@ class GSM8KEvalEnv(BaseEnv):
|
|||
print(f"Extracted: {extracted_answer}")
|
||||
print(f"Correct: {is_correct} (method: {method})")
|
||||
if has_multiple_boxed:
|
||||
print(f"WARNING: Multiple \\boxed{{}} found - marked incorrect")
|
||||
print("WARNING: Multiple \\boxed{} found - marked incorrect")
|
||||
|
||||
return {
|
||||
"item_id": item["id"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue