added local evals

2026-04-29 17:35:16 +00:00 · 2025-03-28 05:00:30 +00:00 · 2025-03-28 05:00:30 +00:00 · 774d23664d
commit 774d23664d
parent 7368d6d313
2 changed files with 511 additions and 0 deletions
--- a/eval/configs/eval_config.yaml
+++ b/eval/configs/eval_config.yaml
@ -0,0 +1,25 @@
+# Model configuration
+model: Qwen/Qwen2.5-0.5B-Instruct  # Change to the smaller model
+hf_path: /workspace/joe/verl_grpo_qwen_3b_curr
+max_tokens: 1024  # From max_response_length in training config
+temperature: 0.7  # Lower temperature for more focused responses
+top_p: 0.9  # From rollout top_p
+developer_prompt: DeepSeekZero
+developer_role: system  # Standard role for system prompts
+
+# Output configuration
+output_dir: eval_results
+save_metadata: true
+save_full_results: true
+
+# Categories and datasets to evaluate
+categories:
+  - category: reasoning
+    datasets:
+      - dataset: spell_backward
+        size: 1000  # From training dataset_size
+        seed: 42
+        params:
+          min_word_len: 3  # From training config
+          max_word_len: 10
+          data_file: holdout_words.txt