diff --git a/eval/yaml/default/claude-3.5-sonnet.yaml b/eval/yaml/easy/claude-3.5-sonnet.yaml similarity index 100% rename from eval/yaml/default/claude-3.5-sonnet.yaml rename to eval/yaml/easy/claude-3.5-sonnet.yaml diff --git a/eval/yaml/default/claude-3.7-sonnet.yaml b/eval/yaml/easy/claude-3.7-sonnet.yaml similarity index 100% rename from eval/yaml/default/claude-3.7-sonnet.yaml rename to eval/yaml/easy/claude-3.7-sonnet.yaml diff --git a/eval/yaml/default/claude-3.7-sonnet_thinking.yaml b/eval/yaml/easy/claude-3.7-sonnet_thinking.yaml similarity index 100% rename from eval/yaml/default/claude-3.7-sonnet_thinking.yaml rename to eval/yaml/easy/claude-3.7-sonnet_thinking.yaml diff --git a/eval/yaml/default/deepseek-r1.yaml b/eval/yaml/easy/deepseek-r1.yaml similarity index 100% rename from eval/yaml/default/deepseek-r1.yaml rename to eval/yaml/easy/deepseek-r1.yaml diff --git a/eval/yaml/default/google-gemma-3-27b-it.yaml b/eval/yaml/easy/google-gemma-3-27b-it.yaml similarity index 100% rename from eval/yaml/default/google-gemma-3-27b-it.yaml rename to eval/yaml/easy/google-gemma-3-27b-it.yaml diff --git a/eval/yaml/default/llama-3.1-8b-instruct.yaml b/eval/yaml/easy/llama-3.1-8b-instruct.yaml similarity index 100% rename from eval/yaml/default/llama-3.1-8b-instruct.yaml rename to eval/yaml/easy/llama-3.1-8b-instruct.yaml diff --git a/eval/yaml/default/llama-3.3-70b-instruct.yaml b/eval/yaml/easy/llama-3.3-70b-instruct.yaml similarity index 100% rename from eval/yaml/default/llama-3.3-70b-instruct.yaml rename to eval/yaml/easy/llama-3.3-70b-instruct.yaml diff --git a/eval/yaml/default/llama-4-maverick.yaml b/eval/yaml/easy/llama-4-maverick.yaml similarity index 100% rename from eval/yaml/default/llama-4-maverick.yaml rename to eval/yaml/easy/llama-4-maverick.yaml diff --git a/eval/yaml/default/openai-o1.yaml b/eval/yaml/easy/openai-o1.yaml similarity index 100% rename from eval/yaml/default/openai-o1.yaml rename to eval/yaml/easy/openai-o1.yaml diff --git a/eval/yaml/default/openai-o3-mini.yaml b/eval/yaml/easy/openai-o3-mini.yaml similarity index 100% rename from eval/yaml/default/openai-o3-mini.yaml rename to eval/yaml/easy/openai-o3-mini.yaml diff --git a/eval/yaml/default/qwen-qwq-32b.yaml b/eval/yaml/easy/qwen-qwq-32b.yaml similarity index 100% rename from eval/yaml/default/qwen-qwq-32b.yaml rename to eval/yaml/easy/qwen-qwq-32b.yaml diff --git a/eval/yaml/medium/claude-3.5-sonnet.yaml b/eval/yaml/hard/claude-3.5-sonnet.yaml similarity index 100% rename from eval/yaml/medium/claude-3.5-sonnet.yaml rename to eval/yaml/hard/claude-3.5-sonnet.yaml diff --git a/eval/yaml/medium/claude-3.7-sonnet_thinking.yaml b/eval/yaml/hard/claude-3.7-sonnet_thinking.yaml similarity index 100% rename from eval/yaml/medium/claude-3.7-sonnet_thinking.yaml rename to eval/yaml/hard/claude-3.7-sonnet_thinking.yaml diff --git a/eval/yaml/medium/deepseek-r1.yaml b/eval/yaml/hard/deepseek-r1.yaml similarity index 100% rename from eval/yaml/medium/deepseek-r1.yaml rename to eval/yaml/hard/deepseek-r1.yaml diff --git a/eval/yaml/medium/gemini-2.0-flash.yaml b/eval/yaml/hard/gemini-2.0-flash.yaml similarity index 100% rename from eval/yaml/medium/gemini-2.0-flash.yaml rename to eval/yaml/hard/gemini-2.0-flash.yaml diff --git a/eval/yaml/medium/gemma-3-12b.yaml b/eval/yaml/hard/gemma-3-12b.yaml similarity index 100% rename from eval/yaml/medium/gemma-3-12b.yaml rename to eval/yaml/hard/gemma-3-12b.yaml diff --git a/eval/yaml/medium/gemma-3-27b.yaml b/eval/yaml/hard/gemma-3-27b.yaml similarity index 100% rename from eval/yaml/medium/gemma-3-27b.yaml rename to eval/yaml/hard/gemma-3-27b.yaml diff --git a/eval/yaml/medium/gemma-3-4b.yaml b/eval/yaml/hard/gemma-3-4b.yaml similarity index 100% rename from eval/yaml/medium/gemma-3-4b.yaml rename to eval/yaml/hard/gemma-3-4b.yaml diff --git a/eval/yaml/medium/grok-3-mini.yaml b/eval/yaml/hard/grok-3-mini.yaml similarity index 100% rename from eval/yaml/medium/grok-3-mini.yaml rename to eval/yaml/hard/grok-3-mini.yaml diff --git a/eval/yaml/medium/llama-3.1-8b.yaml b/eval/yaml/hard/llama-3.1-8b.yaml similarity index 100% rename from eval/yaml/medium/llama-3.1-8b.yaml rename to eval/yaml/hard/llama-3.1-8b.yaml diff --git a/eval/yaml/medium/llama-3.2-3b.yaml b/eval/yaml/hard/llama-3.2-3b.yaml similarity index 100% rename from eval/yaml/medium/llama-3.2-3b.yaml rename to eval/yaml/hard/llama-3.2-3b.yaml diff --git a/eval/yaml/medium/llama-3.3-70b.yaml b/eval/yaml/hard/llama-3.3-70b.yaml similarity index 100% rename from eval/yaml/medium/llama-3.3-70b.yaml rename to eval/yaml/hard/llama-3.3-70b.yaml diff --git a/eval/yaml/medium/llama-4-maverick.yaml b/eval/yaml/hard/llama-4-maverick.yaml similarity index 100% rename from eval/yaml/medium/llama-4-maverick.yaml rename to eval/yaml/hard/llama-4-maverick.yaml diff --git a/eval/yaml/medium/llama-4-scout.yaml b/eval/yaml/hard/llama-4-scout.yaml similarity index 100% rename from eval/yaml/medium/llama-4-scout.yaml rename to eval/yaml/hard/llama-4-scout.yaml diff --git a/eval/yaml/medium/mistral-small-3.1-24b.yaml b/eval/yaml/hard/mistral-small-3.1-24b.yaml similarity index 100% rename from eval/yaml/medium/mistral-small-3.1-24b.yaml rename to eval/yaml/hard/mistral-small-3.1-24b.yaml diff --git a/eval/yaml/medium/o3-mini.yaml b/eval/yaml/hard/o3-mini.yaml similarity index 100% rename from eval/yaml/medium/o3-mini.yaml rename to eval/yaml/hard/o3-mini.yaml diff --git a/eval/yaml/medium/optimus-alpha.yaml b/eval/yaml/hard/optimus-alpha.yaml similarity index 100% rename from eval/yaml/medium/optimus-alpha.yaml rename to eval/yaml/hard/optimus-alpha.yaml diff --git a/eval/yaml/medium/qwen-qwq-32b.yaml b/eval/yaml/hard/qwen-qwq-32b.yaml similarity index 100% rename from eval/yaml/medium/qwen-qwq-32b.yaml rename to eval/yaml/hard/qwen-qwq-32b.yaml