mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
* first trl grpo implementation * added config yaml file * added read me and dependencies * updated reward format func
18 lines
438 B
Python
18 lines
438 B
Python
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class ScriptArguments:
|
|
"""
|
|
Arguments for the training script.
|
|
"""
|
|
|
|
dataset_name: str
|
|
dataset_config: Optional[str] = None
|
|
dataset_train_split: str = "train"
|
|
dataset_test_split: str = "test"
|
|
gradient_checkpointing_use_reentrant: bool = False
|
|
ignore_bias_buffers: bool = False
|
|
train_size: int = 1000
|
|
eval_size: int = 100
|