Fix chain sum veRL example for latest veRL (#371)

* fixes for latest verl

* add balance_batch cofg

* 1 -> 2 gpu

* tweaks

* also add raw ids to server script
This commit is contained in:
Oliver Stanley 2025-03-14 19:15:54 +00:00 committed by GitHub
parent 8a0cacc054
commit bd13b1b92a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 11 additions and 1 deletions

View file

@ -70,6 +70,7 @@ class ReasoningGymDataset(Dataset):
row_dict["input_ids"] = input_ids[0]
row_dict["attention_mask"] = attention_mask[0]
row_dict["position_ids"] = position_ids[0]
row_dict["raw_prompt_ids"] = self.tokenizer.encode(prompt, add_special_tokens=False)
# encode prompts without chat template
if self.return_raw_chat: