Fix chain sum veRL example for latest veRL (#371)

* fixes for latest verl * add balance_batch cofg * 1 -> 2 gpu * tweaks * also add raw ids to server script
2026-04-28 17:29:39 +00:00 · 2025-03-14 19:15:54 +00:00 · 2025-03-14 19:15:54 +00:00 · bd13b1b92a
commit bd13b1b92a
parent 8a0cacc054
5 changed files with 11 additions and 1 deletions
--- a/examples/veRL/chain_sum/main_ppo_custom_reward.py
+++ b/examples/veRL/chain_sum/main_ppo_custom_reward.py
@ -70,6 +70,7 @@ class ReasoningGymDataset(Dataset):
        row_dict["input_ids"] = input_ids[0]
        row_dict["attention_mask"] = attention_mask[0]
        row_dict["position_ids"] = position_ids[0]
+        row_dict["raw_prompt_ids"] = self.tokenizer.encode(prompt, add_special_tokens=False)

        # encode prompts without chat template
        if self.return_raw_chat: