more readme changes

2026-04-23 16:54:56 +00:00 · 2026-03-02 11:15:34 -05:00 · 2026-03-02 11:15:34 -05:00 · 585244559e
commit 585244559e
parent 4a7da8049f
3 changed files with 9 additions and 82 deletions
--- a/example_trainer/README.md
+++ b/example_trainer/README.md
@ -169,7 +169,7 @@ python -m example_trainer.grpo \
 ### Startup Order

 ```bash
-# CRITICAL: Follow this exact order!
+# Follow this startup order
 # 1. Start API first
 run-api --port 8002

@ -306,7 +306,7 @@ environment uses the `/generate` path and includes token-level

 ### 2. Clipping Is Essential

-**CRITICAL:** Keep clipping enabled to avoid unstable policy updates:
+Keep clipping enabled to avoid unstable policy updates:

 ```bash
 --clip-eps 0.2     # Limits importance sampling ratio to [0.8, 1.2]
@ -869,7 +869,7 @@ If your model has `N` layers:
         "tokens": [[tok1, tok2, ...], ...],  # group_size sequences
         "masks": [[mask1, mask2, ...], ...],  # -100 for prompt, token_id for generated
         "scores": [score1, score2, ...],      # rewards
-         "inference_logprobs": [[lp1, lp2, ...], ...],  # CRITICAL for GRPO!
+         "inference_logprobs": [[lp1, lp2, ...], ...],  # required for this GRPO trainer
         "generation_params": {"temperature": 1.0},
         ...
       }