Test training with trl (#70)

* first trl grpo implementation * added config yaml file * added read me and dependencies * updated reward format func
2026-04-22 16:49:06 +00:00 · 2025-02-07 06:42:32 +00:00 · 2025-02-07 06:42:32 +00:00 · d61db3772a
commit d61db3772a
parent a607db79f7
5 changed files with 287 additions and 0 deletions
--- a/examples/trl/grpo_config.py
+++ b/examples/trl/grpo_config.py
@ -0,0 +1,18 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class ScriptArguments:
+    """
+    Arguments for the training script.
+    """
+
+    dataset_name: str
+    dataset_config: Optional[str] = None
+    dataset_train_split: str = "train"
+    dataset_test_split: str = "test"
+    gradient_checkpointing_use_reentrant: bool = False
+    ignore_bias_buffers: bool = False
+    train_size: int = 1000
+    eval_size: int = 100