impl conditional reward

This commit is contained in:
Oliver 2025-04-24 19:36:30 +01:00
parent 450d3dcfa4
commit 830ac3e10a
13 changed files with 52 additions and 24 deletions

View file

@ -53,6 +53,7 @@ curriculum:
word_len: 0
reward:
use_accuracy: True
conditional_reward: False
secondary_rewards:
- name: format
scaling_factor: 0.2