Skip to content

Commit

Permalink
Update on "[PP] add flexible interleaved 1f1b schedule"
Browse files Browse the repository at this point in the history
fixes #483

`python test_runner.py ./out --test pp_looped_flexible_1f1b`


[ghstack-poisoned]
  • Loading branch information
H-Huang committed Jul 30, 2024
2 parents 3b2c865 + 40c6398 commit 63dc3df
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def build_test_list():
"""
integration_tests_flavors = defaultdict(list)
integration_tests_flavors["debug_model.toml"] = [
OverrideDefinitions(
[
[
"--checkpoint.enable_checkpoint",
"--experimental.pipeline_parallel_degree 4",
"--experimental.pipeline_parallel_split_points layers.1,layers.2,layers.3,layers.4,layers.5,layers.6,layers.7",
"--experimental.pipeline_parallel_schedule flexible_interleaved_1f1b",
"--model.norm_type rmsnorm", # fused_rmsnorm throws cuda context error with pp
],
],
"PP looped flexible 1f1b test",
"pp_looped_flexible_1f1b",
requires_seed_checkpoint=True,
ngpu=4,
),
OverrideDefinitions(
[
[
Expand Down Expand Up @@ -254,21 +269,6 @@ def build_test_list():
requires_seed_checkpoint=True,
ngpu=4,
),
OverrideDefinitions(
[
[
"--checkpoint.enable_checkpoint",
"--experimental.pipeline_parallel_degree 4",
"--experimental.pipeline_parallel_split_points layers.1,layers.2,layers.3,layers.4,layers.5,layers.6,layers.7",
"--experimental.pipeline_parallel_schedule flexible_interleaved_1f1b",
"--model.norm_type rmsnorm", # fused_rmsnorm throws cuda context error with pp
],
],
"PP looped flexible 1f1b test",
"pp_looped_flexible_1f1b",
requires_seed_checkpoint=True,
ngpu=4,
),
OverrideDefinitions(
[
[
Expand Down

0 comments on commit 63dc3df

Please sign in to comment.