diff --git a/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_13b/pretrain-baichun2_13b-config.json b/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_13b/pretrain-baichun2_13b-config.json index e0e4238a71ce..307b9e9a6775 100644 --- a/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_13b/pretrain-baichun2_13b-config.json +++ b/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_13b/pretrain-baichun2_13b-config.json @@ -26,12 +26,12 @@ "scale_loss": 1024, "learning_rate": 1e-05, "min_learning_rate": 5e-06, - "max_steps": 100, + "max_steps": 200, "save_steps": 5000, "weight_decay": 0.01, "warmup_ratio": 0.01, "max_grad_norm": 1.0, - "logging_steps": 1, + "logging_steps": 5, "dataloader_num_workers": 1, "eval_steps": 1000, "disable_tqdm": true, diff --git a/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_7b/pretrain-baichun2_7b-config.json b/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_7b/pretrain-baichun2_7b-config.json index 8c1b4a0c61ef..25e4ff3044c6 100644 --- a/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_7b/pretrain-baichun2_7b-config.json +++ b/tests/test_tipc/dygraph/hybrid_parallelism/baichun2/auto_config_baichun2_7b/pretrain-baichun2_7b-config.json @@ -31,7 +31,7 @@ "weight_decay": 0.01, "warmup_ratio": 0.01, "max_grad_norm": 1.0, - "logging_steps": 1, + "logging_steps": 5, "dataloader_num_workers": 1, "eval_steps": 1000, "disable_tqdm": true, diff --git a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-14b_seqlen4096_pretrain_bs32_bf16_MP2-PP1-sharding16-mbs1-acc2.sh b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-14b_seqlen4096_pretrain_bs32_bf16_MP2-PP1-sharding16-mbs1-acc2.sh index bf6cd5a603b1..3de10a6e1431 100644 --- a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-14b_seqlen4096_pretrain_bs32_bf16_MP2-PP1-sharding16-mbs1-acc2.sh +++ b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-14b_seqlen4096_pretrain_bs32_bf16_MP2-PP1-sharding16-mbs1-acc2.sh @@ -28,7 +28,8 @@ param+="run_mode=MP2-PP1-sharding16-mbs1-acc2 " param+="device_num=N4C32 " param+="global_batch_size=32 " param+="model_item=qwen-qwen-14b_seqlen4096_pretrain " -param+="max_steps=100 " +param+="max_steps=200 " +param+="logging_steps=5 " param+="gradient_accumulation_steps=2 " param+="pp_recompute_interval=1 " param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add, " diff --git a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-7b_seqlen4096_pretrain_bs32_bf16_MP1-PP1-sharding32-mbs1-acc1.sh b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-7b_seqlen4096_pretrain_bs32_bf16_MP1-PP1-sharding32-mbs1-acc1.sh index 4a9ed7f11e82..6a31ce26d1f8 100644 --- a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-7b_seqlen4096_pretrain_bs32_bf16_MP1-PP1-sharding32-mbs1-acc1.sh +++ b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/N4C32/qwen-qwen-7b_seqlen4096_pretrain_bs32_bf16_MP1-PP1-sharding32-mbs1-acc1.sh @@ -28,7 +28,8 @@ param+="run_mode=MP1-PP1-sharding32-mbs1-acc1 " param+="device_num=N4C32 " param+="global_batch_size=32 " param+="model_item=qwen-qwen-7b_seqlen4096_pretrain " -param+="max_steps=100 " +param+="max_steps=200 " +param+="logging_steps=5 " param+="gradient_accumulation_steps=1 " param+="pp_recompute_interval=1 " param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add " diff --git a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/benchmark_common/run_benchmark.sh index 1440bbf85867..391b325e57d2 100644 --- a/tests/test_tipc/dygraph/hybrid_parallelism/qwen/benchmark_common/run_benchmark.sh +++ b/tests/test_tipc/dygraph/hybrid_parallelism/qwen/benchmark_common/run_benchmark.sh @@ -32,6 +32,7 @@ function _set_params(){ global_batch_size=${global_batch_size:-16} model_item=${model_item:-"qwen-qwen-7b_seqlen2048_pretrain"} max_steps=${max_steps:-150} + logging_steps=${logging_steps:-1} gradient_accumulation_steps=${gradient_accumulation_steps:-8} pp_recompute_interval=${pp_recompute_interval:-1} tensor_parallel_config=${tensor_parallel_config:-"enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add"} @@ -162,7 +163,7 @@ function _train(){ --weight_decay 0.01 \ --warmup_ratio 0.01 \ --max_grad_norm 1.0 \ - --logging_steps 1 \ + --logging_steps ${logging_steps} \ --dataloader_num_workers 1 \ --eval_steps ${eval_steps} \ --sharding ${sharding} \