qwen math training code (#435)

* qwen math training code

* pre-commit
This commit is contained in:
Zafir Stojanovski 2025-05-16 13:19:19 +02:00 committed by GitHub
parent 47303211b3
commit 0cda6b1205
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
51 changed files with 155089 additions and 0 deletions

View file

@ -0,0 +1,22 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: false
zero_stage: 2
gradient_clipping: auto
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 2
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false