mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
Move BLEUBERI environment to community folder
- Moved environments/bleuberi to environments/community/bleuberi
- Updated .gitmodules to reflect new submodule path
- Fixed pre-commit formatting issues
- Cleaned up test output files
This commit is contained in:
parent
532024d01e
commit
0f6c06bb56
8 changed files with 16 additions and 9 deletions
|
|
@ -1,41 +0,0 @@
|
|||
env:
|
||||
wandb_name: bleuberi
|
||||
group_size: 4
|
||||
max_token_length: 2048
|
||||
max_num_workers_per_node: 8
|
||||
steps_per_eval: 100
|
||||
total_steps: 1000
|
||||
include_messages: true
|
||||
|
||||
# Dataset configuration
|
||||
dataset_name: "allenai/tulu-3-sft-mixture"
|
||||
dataset_split: "train"
|
||||
selection_mode: "hard"
|
||||
num_examples: 5000
|
||||
cache_dir: null
|
||||
streaming: false
|
||||
shuffle: true
|
||||
|
||||
# Reward configuration
|
||||
reward_funcs:
|
||||
- "bleu"
|
||||
ref_models:
|
||||
- "gold" # Use ground truth as reference
|
||||
|
||||
# System prompt configuration
|
||||
system_prompt: "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem. After your thinking, make sure to clearly provide your final answer inside <answer></answer> tags."
|
||||
|
||||
# Seeds and evaluation
|
||||
seed: 42
|
||||
eval_seed: 123
|
||||
num_eval_samples_per_task: 5
|
||||
eval_limit_ratio: 0.1
|
||||
reasoning: true
|
||||
|
||||
openai:
|
||||
base_url: "http://localhost:8000/v1"
|
||||
model: "Llama-3.1-8B-Instruct"
|
||||
api_key: "PLACEHOLDER"
|
||||
temperature: 0.7
|
||||
max_tokens: 1024
|
||||
top_p: 0.95
|
||||
Loading…
Add table
Add a link
Reference in a new issue