mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-22 16:48:57 +00:00
first commit
This commit is contained in:
commit
621d00dd80
89 changed files with 15315 additions and 0 deletions
52
environments/dataset_environment/configs/dataset_local.yaml
Normal file
52
environments/dataset_environment/configs/dataset_local.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Dataset Environment Local Testing Configuration
|
||||
|
||||
# Base environment parameters
|
||||
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
|
||||
group_size: 1
|
||||
use_wandb: false
|
||||
max_num_workers: 1
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
total_steps: 1
|
||||
batch_size: 1
|
||||
steps_per_eval: 5
|
||||
max_token_length: 4096
|
||||
wandb_name: "dataset_test_local"
|
||||
ensure_scores_are_not_same: false
|
||||
|
||||
# Dataset specific configuration
|
||||
dataset:
|
||||
# Dataset parameters
|
||||
dataset_name: "gsm8k" # Example dataset
|
||||
dataset_config: "main"
|
||||
split: "train"
|
||||
prompt_field: "question"
|
||||
answer_field: "answer"
|
||||
|
||||
# Generation parameters
|
||||
system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution."
|
||||
prefill: "<think>\n"
|
||||
shuffle_dataset: true
|
||||
max_generations_per_prompt: 1
|
||||
|
||||
# Generation length parameters
|
||||
max_tokens: 4096
|
||||
length_warmup_steps: 0
|
||||
min_tokens: 0
|
||||
|
||||
# Completion parameters
|
||||
temperature: 0.7
|
||||
top_p: 0.9
|
||||
|
||||
# Reward functions
|
||||
reward_functions:
|
||||
- "accuracy"
|
||||
- "format"
|
||||
accuracy_reward_weight: 1.0
|
||||
format_reward_weight: 0.2
|
||||
|
||||
|
||||
# Server configuration
|
||||
server_configs:
|
||||
- model_name: "gpt-4.1-nano"
|
||||
api_key: ${OPENAI_API_KEY}
|
||||
timeout: 600
|
||||
73
environments/dataset_environment/configs/gsm8k.yaml
Normal file
73
environments/dataset_environment/configs/gsm8k.yaml
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-1B-Preview"
|
||||
group_size: 8
|
||||
use_wandb: true
|
||||
max_num_workers: 256
|
||||
max_eval_workers: 16
|
||||
steps_per_eval: 100
|
||||
batch_size: 1024
|
||||
max_batches_offpolicy: 3
|
||||
total_steps: 1000
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
|
||||
use_local_agents: true
|
||||
|
||||
dataset:
|
||||
dataset_name: "gsm8k"
|
||||
dataset_config: "main"
|
||||
split: "train"
|
||||
|
||||
prompt_field: "question"
|
||||
answer_field: "answer"
|
||||
|
||||
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem."
|
||||
shuffle_dataset: true
|
||||
max_generations_per_prompt: 1
|
||||
include_messages_in_scoring: false
|
||||
|
||||
# New configurable reward functions
|
||||
reward_functions:
|
||||
- type: "r1"
|
||||
weight: 1.5
|
||||
params:
|
||||
format_weight: 0.5
|
||||
accuracy_weight: 1.0
|
||||
- type: "cosine_scaled"
|
||||
weight: 0.8
|
||||
params:
|
||||
scale_factor: 1.2
|
||||
min_reward: -1.0
|
||||
max_reward: 1.0
|
||||
- type: "accuracy"
|
||||
weight: 2.0
|
||||
params:
|
||||
split_on_think_tag: true
|
||||
- type: "format"
|
||||
weight: 0.7
|
||||
params:
|
||||
preferred_tags: ["think", "reasoning"]
|
||||
require_all_tags: false
|
||||
- type: "reasoning_steps"
|
||||
weight: 1.0
|
||||
params:
|
||||
min_steps: 3
|
||||
- type: "repetition_penalty"
|
||||
weight: 0.5
|
||||
params:
|
||||
threshold: 0.1
|
||||
|
||||
# Legacy format still supported for backward compatibility
|
||||
# reward_funcs:
|
||||
# - "r1_reward"
|
||||
# - "cosine_scaled_reward"
|
||||
# - "accuracy_reward"
|
||||
# - "format_reward"
|
||||
# - "reasoning_steps_reward"
|
||||
# - "repetition_penalty_reward"
|
||||
|
||||
max_tokens: 16000
|
||||
length_warmup_steps: 100
|
||||
min_tokens: 2048
|
||||
|
||||
eval_dataset_name: "gsm8k"
|
||||
eval_dataset_config: "main"
|
||||
eval_split: "test"
|
||||
30
environments/dataset_environment/configs/gsm8k_debug.yaml
Normal file
30
environments/dataset_environment/configs/gsm8k_debug.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
|
||||
group_size: 1
|
||||
use_wandb: false
|
||||
max_num_workers: 1
|
||||
max_eval_workers: 0
|
||||
batch_size: 1
|
||||
total_steps: 100
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
|
||||
dataset:
|
||||
dataset_name: "gsm8k"
|
||||
dataset_config: "main"
|
||||
split: "train"
|
||||
|
||||
prompt_field: "question"
|
||||
answer_field: "answer"
|
||||
|
||||
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem."
|
||||
shuffle_dataset: true
|
||||
max_generations_per_prompt: 1
|
||||
include_messages_in_scoring: true
|
||||
|
||||
# Using multiple reward functions for testing
|
||||
reward_funcs:
|
||||
- "accuracy_reward"
|
||||
- "format_reward"
|
||||
|
||||
max_tokens: 4096
|
||||
length_warmup_steps: 0
|
||||
min_tokens: 200
|
||||
Loading…
Add table
Add a link
Reference in a new issue