simplified config and reward

This commit is contained in:
Shannon Sands 2025-05-10 08:04:39 +10:00
parent 7e95c0b67d
commit c506bb147e
4 changed files with 210 additions and 455 deletions

View file

@ -4,7 +4,7 @@ from transformers import PreTrainedTokenizer
from atroposlib.type_definitions import Message
# Roles that should be masked in the loss calculation (not used for training)
UNMASKED_ROLES = ["assistant"]
UNMASKED_ROLES = ["assistant", "agent"]
def tokenize_for_trainer(