atropos/environments/hack0/wandb/run-20250518_174510-m5eheo08/files/config.yaml
2025-05-18 17:55:59 -07:00

78 lines
1.5 KiB
YAML

_wandb:
value:
cli_version: 0.19.11
m: []
python_version: 3.13.2
t:
"1":
- 1
- 11
- 49
- 51
- 55
- 95
"2":
- 1
- 11
- 49
- 51
- 55
- 95
"3":
- 13
- 16
- 23
- 55
"4": 3.13.2
"5": 0.19.11
"6": 4.51.3
"8":
- 5
"12": 0.19.11
"13": linux-x86_64
batch_size:
value: -1
data_path:
value: humor_dataset.jsonl
data_path_to_save_groups:
value: data/humor_1.jsonl
ensure_scores_are_not_same:
value: false
eval_handling:
value: STOP_TRAIN
eval_limit_ratio:
value: 0.5
group_size:
value: 2
include_messages:
value: true
inference_weight:
value: 1
max_batches_offpolicy:
value: 3
max_eval_workers:
value: 16
max_num_workers:
value: -1
max_num_workers_per_node:
value: 8
max_token_length:
value: 2048
min_items_sent_before_logging:
value: 2
num_rollouts_per_group_for_logging:
value: 1
num_rollouts_to_keep:
value: 32
rollout_server_url:
value: http://localhost:8000
steps_per_eval:
value: 100
tokenizer_name:
value: NousResearch/DeepHermes-3-Llama-3-3B-Preview
total_steps:
value: 2
use_wandb:
value: true
wandb_name:
value: null