added r1 evaluation logic

This commit is contained in:
joesharratt1229 2025-02-11 03:46:56 +00:00
parent 0657222a8f
commit 42e02640a3
6 changed files with 208 additions and 0 deletions

View file

@ -0,0 +1,25 @@
model: deepseek/deepseek-r1
category: algorithmic
datasets:
- base_conversion
- binary_matrix
- caesar _cipher
- group_anagrams
- isomorphic_strings
- letter_counting
- letter_jumble
- number_filtering
- number_sorting
- palindrome
- ransom_note
- rotate_matrix
- sentence_reordering
- spell_backward
- spiral_matrix
- word_ladder
- word_sequence_reversal
- word_sorting
eval_dir: eval/r1
dataset_size: 50
dataset_seed: 42
developer_role: system