impl conditional reward

This commit is contained in:
Oliver 2025-04-24 19:36:30 +01:00
parent 450d3dcfa4
commit 830ac3e10a
13 changed files with 52 additions and 24 deletions

View file

@ -52,11 +52,8 @@ curriculum:
'*': 0
reward:
use_accuracy: True
conditional_reward: True # Only provide a reward at all if the response format is correct
secondary_rewards:
- name: format
scaling_factor: 0.2
kwargs:
preappend_thinking_token: False
- name: length
scaling_factor: 0.2