mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Test training with trl (#70)
* first trl grpo implementation * added config yaml file * added read me and dependencies * updated reward format func
This commit is contained in:
parent
a607db79f7
commit
d61db3772a
5 changed files with 287 additions and 0 deletions
18
examples/trl/grpo_config.py
Normal file
18
examples/trl/grpo_config.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScriptArguments:
|
||||
"""
|
||||
Arguments for the training script.
|
||||
"""
|
||||
|
||||
dataset_name: str
|
||||
dataset_config: Optional[str] = None
|
||||
dataset_train_split: str = "train"
|
||||
dataset_test_split: str = "test"
|
||||
gradient_checkpointing_use_reentrant: bool = False
|
||||
ignore_bias_buffers: bool = False
|
||||
train_size: int = 1000
|
||||
eval_size: int = 100
|
||||
Loading…
Add table
Add a link
Reference in a new issue