first commit

This commit is contained in:
Dakota Nous 2025-04-29 12:10:10 -07:00
commit 621d00dd80
89 changed files with 15315 additions and 0 deletions

View file

@ -0,0 +1,52 @@
# Dataset Environment Local Testing Configuration
# Base environment parameters
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
group_size: 1
use_wandb: false
max_num_workers: 1
rollout_server_url: "http://localhost:8000"
total_steps: 1
batch_size: 1
steps_per_eval: 5
max_token_length: 4096
wandb_name: "dataset_test_local"
ensure_scores_are_not_same: false
# Dataset specific configuration
dataset:
# Dataset parameters
dataset_name: "gsm8k" # Example dataset
dataset_config: "main"
split: "train"
prompt_field: "question"
answer_field: "answer"
# Generation parameters
system_prompt: "You are an expert mathematician. You need to solve the given math problem step-by-step, showing your reasoning clearly. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your final answer.\n\nFollow these steps:\n1. Understand the problem carefully\n2. Plan your approach\n3. Execute the calculations step-by-step\n4. Verify your solution\n\nYou may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution."
prefill: "<think>\n"
shuffle_dataset: true
max_generations_per_prompt: 1
# Generation length parameters
max_tokens: 4096
length_warmup_steps: 0
min_tokens: 0
# Completion parameters
temperature: 0.7
top_p: 0.9
# Reward functions
reward_functions:
- "accuracy"
- "format"
accuracy_reward_weight: 1.0
format_reward_weight: 0.2
# Server configuration
server_configs:
- model_name: "gpt-4.1-nano"
api_key: ${OPENAI_API_KEY}
timeout: 600

View file

@ -0,0 +1,73 @@
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-1B-Preview"
group_size: 8
use_wandb: true
max_num_workers: 256
max_eval_workers: 16
steps_per_eval: 100
batch_size: 1024
max_batches_offpolicy: 3
total_steps: 1000
rollout_server_url: "http://localhost:8000"
use_local_agents: true
dataset:
dataset_name: "gsm8k"
dataset_config: "main"
split: "train"
prompt_field: "question"
answer_field: "answer"
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem."
shuffle_dataset: true
max_generations_per_prompt: 1
include_messages_in_scoring: false
# New configurable reward functions
reward_functions:
- type: "r1"
weight: 1.5
params:
format_weight: 0.5
accuracy_weight: 1.0
- type: "cosine_scaled"
weight: 0.8
params:
scale_factor: 1.2
min_reward: -1.0
max_reward: 1.0
- type: "accuracy"
weight: 2.0
params:
split_on_think_tag: true
- type: "format"
weight: 0.7
params:
preferred_tags: ["think", "reasoning"]
require_all_tags: false
- type: "reasoning_steps"
weight: 1.0
params:
min_steps: 3
- type: "repetition_penalty"
weight: 0.5
params:
threshold: 0.1
# Legacy format still supported for backward compatibility
# reward_funcs:
# - "r1_reward"
# - "cosine_scaled_reward"
# - "accuracy_reward"
# - "format_reward"
# - "reasoning_steps_reward"
# - "repetition_penalty_reward"
max_tokens: 16000
length_warmup_steps: 100
min_tokens: 2048
eval_dataset_name: "gsm8k"
eval_dataset_config: "main"
eval_split: "test"

View file

@ -0,0 +1,30 @@
tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
group_size: 1
use_wandb: false
max_num_workers: 1
max_eval_workers: 0
batch_size: 1
total_steps: 100
rollout_server_url: "http://localhost:8000"
dataset:
dataset_name: "gsm8k"
dataset_config: "main"
split: "train"
prompt_field: "question"
answer_field: "answer"
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem."
shuffle_dataset: true
max_generations_per_prompt: 1
include_messages_in_scoring: true
# Using multiple reward functions for testing
reward_funcs:
- "accuracy_reward"
- "format_reward"
max_tokens: 4096
length_warmup_steps: 0
min_tokens: 200