add graphs eval configs

This commit is contained in:
Oliver 2025-07-28 12:21:31 +01:00
parent 1ab6cf3a12
commit 523d56f019
2 changed files with 43 additions and 1 deletions

View file

@ -0,0 +1,40 @@
# Config used for evaluating curriculum experiment models on graphs composite data
# Models evaluated on this config:
# Qwen/Qwen2.5-3B-Instruct (original model)
# qwen3b_graphs_noncurriculum_300 (original + 300 GRPO steps on non-curriculum graphs data)
# qwen3b_graphs_curriculum_300 (original + 300 GRPO steps on curriculum graphs data)
model_path: Qwen/Qwen2.5-3B-Instruct # Default model path
max_tokens: 2048 # From max_response_length in training config
top_p: 1.0
temperature: 1.0 # Lower temperature for more focused responses
dtype: bfloat16
developer_prompt: DeepSeekZero
developer_role: system
output_dir: results
save_metadata: true
save_full_results: true
eval_repeats: 1
categories:
- category: graphs
datasets:
- dataset: course_schedule
size: 50
seed: 42
- dataset: family_relationships
size: 50
seed: 42
- dataset: largest_island
size: 50
seed: 42
- dataset: quantum_lock
size: 50
seed: 42
- dataset: shortest_path
size: 50
seed: 42

View file

@ -45,6 +45,7 @@ class EvalConfig:
model_path: str
max_tokens: int
temperature: float
dtype: str
top_p: float
output_dir: str
save_metadata: bool
@ -82,7 +83,7 @@ class LocalModelEvaluator:
self.verbose = verbose
# Load model and tokenizer
self.llm = LLM(model=model_path)
self.llm = LLM(model=model_path, dtype=config.dtype)
self.tokenizer = self.llm.get_tokenizer()
self.sampling_params = SamplingParams(
temperature=config.temperature,
@ -214,6 +215,7 @@ class LocalModelEvaluator:
"duration_seconds": (datetime.now() - self.start_time).total_seconds(),
"max_tokens": self.config.max_tokens,
"temperature": self.config.temperature,
"dtype": self.config.dtype,
"top_p": self.config.top_p,
"eval_repeats": self.config.eval_repeats,
},