Test training with trl (#70)

* first trl grpo implementation
* added config yaml file
* added read me and dependencies
* updated reward format func
This commit is contained in:
joesharratt1229 2025-02-07 06:42:32 +00:00 committed by GitHub
parent a607db79f7
commit d61db3772a
5 changed files with 287 additions and 0 deletions

View file

@ -0,0 +1,18 @@
from dataclasses import dataclass
from typing import Optional
@dataclass
class ScriptArguments:
"""
Arguments for the training script.
"""
dataset_name: str
dataset_config: Optional[str] = None
dataset_train_split: str = "train"
dataset_test_split: str = "test"
gradient_checkpointing_use_reentrant: bool = False
ignore_bias_buffers: bool = False
train_size: int = 1000
eval_size: int = 100