simplified config and reward

2026-04-24 17:04:55 +00:00 · 2025-05-10 08:04:39 +10:00 · 2025-05-10 08:04:39 +10:00 · c506bb147e
commit c506bb147e
parent 7e95c0b67d
4 changed files with 210 additions and 455 deletions
--- a/atroposlib/utils/tokenize_for_trainer.py
+++ b/atroposlib/utils/tokenize_for_trainer.py
@ -4,7 +4,7 @@ from transformers import PreTrainedTokenizer
 from atroposlib.type_definitions import Message

 # Roles that should be masked in the loss calculation (not used for training)
-UNMASKED_ROLES = ["assistant"]
+UNMASKED_ROLES = ["assistant", "agent"]


 def tokenize_for_trainer(