mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
add graphs eval configs
This commit is contained in:
parent
1ab6cf3a12
commit
523d56f019
2 changed files with 43 additions and 1 deletions
40
training/evaluations/curriculum/graphs.yml
Normal file
40
training/evaluations/curriculum/graphs.yml
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Config used for evaluating curriculum experiment models on graphs composite data
|
||||
|
||||
# Models evaluated on this config:
|
||||
# Qwen/Qwen2.5-3B-Instruct (original model)
|
||||
# qwen3b_graphs_noncurriculum_300 (original + 300 GRPO steps on non-curriculum graphs data)
|
||||
# qwen3b_graphs_curriculum_300 (original + 300 GRPO steps on curriculum graphs data)
|
||||
|
||||
model_path: Qwen/Qwen2.5-3B-Instruct # Default model path
|
||||
|
||||
max_tokens: 2048 # From max_response_length in training config
|
||||
top_p: 1.0
|
||||
temperature: 1.0 # Lower temperature for more focused responses
|
||||
dtype: bfloat16
|
||||
|
||||
developer_prompt: DeepSeekZero
|
||||
developer_role: system
|
||||
|
||||
output_dir: results
|
||||
save_metadata: true
|
||||
save_full_results: true
|
||||
eval_repeats: 1
|
||||
|
||||
categories:
|
||||
- category: graphs
|
||||
datasets:
|
||||
- dataset: course_schedule
|
||||
size: 50
|
||||
seed: 42
|
||||
- dataset: family_relationships
|
||||
size: 50
|
||||
seed: 42
|
||||
- dataset: largest_island
|
||||
size: 50
|
||||
seed: 42
|
||||
- dataset: quantum_lock
|
||||
size: 50
|
||||
seed: 42
|
||||
- dataset: shortest_path
|
||||
size: 50
|
||||
seed: 42
|
||||
|
|
@ -45,6 +45,7 @@ class EvalConfig:
|
|||
model_path: str
|
||||
max_tokens: int
|
||||
temperature: float
|
||||
dtype: str
|
||||
top_p: float
|
||||
output_dir: str
|
||||
save_metadata: bool
|
||||
|
|
@ -82,7 +83,7 @@ class LocalModelEvaluator:
|
|||
self.verbose = verbose
|
||||
|
||||
# Load model and tokenizer
|
||||
self.llm = LLM(model=model_path)
|
||||
self.llm = LLM(model=model_path, dtype=config.dtype)
|
||||
self.tokenizer = self.llm.get_tokenizer()
|
||||
self.sampling_params = SamplingParams(
|
||||
temperature=config.temperature,
|
||||
|
|
@ -214,6 +215,7 @@ class LocalModelEvaluator:
|
|||
"duration_seconds": (datetime.now() - self.start_time).total_seconds(),
|
||||
"max_tokens": self.config.max_tokens,
|
||||
"temperature": self.config.temperature,
|
||||
"dtype": self.config.dtype,
|
||||
"top_p": self.config.top_p,
|
||||
"eval_repeats": self.config.eval_repeats,
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue