diff --git a/training/evaluations/curriculum/graphs.yml b/training/evaluations/curriculum/graphs.yml new file mode 100644 index 00000000..8fa521f6 --- /dev/null +++ b/training/evaluations/curriculum/graphs.yml @@ -0,0 +1,40 @@ +# Config used for evaluating curriculum experiment models on graphs composite data + +# Models evaluated on this config: +# Qwen/Qwen2.5-3B-Instruct (original model) +# qwen3b_graphs_noncurriculum_300 (original + 300 GRPO steps on non-curriculum graphs data) +# qwen3b_graphs_curriculum_300 (original + 300 GRPO steps on curriculum graphs data) + +model_path: Qwen/Qwen2.5-3B-Instruct # Default model path + +max_tokens: 2048 # From max_response_length in training config +top_p: 1.0 +temperature: 1.0 # Lower temperature for more focused responses +dtype: bfloat16 + +developer_prompt: DeepSeekZero +developer_role: system + +output_dir: results +save_metadata: true +save_full_results: true +eval_repeats: 1 + +categories: + - category: graphs + datasets: + - dataset: course_schedule + size: 50 + seed: 42 + - dataset: family_relationships + size: 50 + seed: 42 + - dataset: largest_island + size: 50 + seed: 42 + - dataset: quantum_lock + size: 50 + seed: 42 + - dataset: shortest_path + size: 50 + seed: 42 diff --git a/training/evaluations/evaluate_model.py b/training/evaluations/evaluate_model.py index 5b51ee48..2714eeaa 100644 --- a/training/evaluations/evaluate_model.py +++ b/training/evaluations/evaluate_model.py @@ -45,6 +45,7 @@ class EvalConfig: model_path: str max_tokens: int temperature: float + dtype: str top_p: float output_dir: str save_metadata: bool @@ -82,7 +83,7 @@ class LocalModelEvaluator: self.verbose = verbose # Load model and tokenizer - self.llm = LLM(model=model_path) + self.llm = LLM(model=model_path, dtype=config.dtype) self.tokenizer = self.llm.get_tokenizer() self.sampling_params = SamplingParams( temperature=config.temperature, @@ -214,6 +215,7 @@ class LocalModelEvaluator: "duration_seconds": (datetime.now() - self.start_time).total_seconds(), "max_tokens": self.config.max_tokens, "temperature": self.config.temperature, + "dtype": self.config.dtype, "top_p": self.config.top_p, "eval_repeats": self.config.eval_repeats, },