Skip to content

Commit

Permalink
Add configuration mixed_precision for finetuning (intel#23)
Browse files Browse the repository at this point in the history
* Add mixed_precision

* update

* update
  • Loading branch information
carsonwang authored Jan 5, 2024
1 parent 91f3756 commit cd05f0e
Show file tree
Hide file tree
Showing 13 changed files with 27 additions and 12 deletions.
1 change: 1 addition & 0 deletions docs/finetune_parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ The following are the parameters supported in the finetuning workflow.
|learning_rate|1e-5|Initial learning rate to use.|
|lr_scheduler|linear|The scheduler type to use, supported value: "linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"|
|weight_decay|0.0|Weight decay is a regularization technique that adds an L2 norm of all model weights to the loss function while increasing the probability of improving the model generalization.|
|mixed_precision|no|Whether or not to use mixed precision training. Choose from "no", "fp16", "bf16" or "fp8". Default is "no" if not set.
|device|CPU|The device type used, can be "CPU", "GPU".|
|num_training_workers|2|The number of the training process.|
|resources_per_worker|{"CPU": 32}|A dict to specify the resources for each worker. If `device` is "GPU", please set it like {"CPU": 32, "GPU": 1}.|
Expand Down
27 changes: 15 additions & 12 deletions finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,34 @@
from finetune_config import FinetuneConfig


def get_accelerate_environment_variable(mode: str) -> dict:
def get_accelerate_environment_variable(mode: str, config: Dict[str, Any]) -> dict:
mixed_precision = config["Training"]["mixed_precision"]
mode_env_vars = {
"CPU_DDP": {
"ACCELERATE_USE_CPU": "True",
"ACCELERATE_USE_IPEX": "False",
"ACCELERATE_MIXED_PRECISION": "no",
"ACCELERATE_USE_CPU": "true",
"ACCELERATE_USE_IPEX": "false",
"ACCELERATE_MIXED_PRECISION": mixed_precision,
},
"GPU_DDP": {
"ACCELERATE_USE_CPU": "False",
"ACCELERATE_USE_XPU": "True",
"ACCELERATE_USE_IPEX": "True",
"ACCELERATE_USE_CPU": "false",
"ACCELERATE_USE_XPU": "true",
"ACCELERATE_USE_IPEX": "true",
"ACCELERATE_MIXED_PRECISION": mixed_precision,
},
"GPU_FSDP": {
"ACCELERATE_USE_CPU": "False",
"ACCELERATE_USE_XPU": "True",
"ACCELERATE_USE_IPEX": "True",
"ACCELERATE_USE_CPU": "false",
"ACCELERATE_USE_XPU": "true",
"ACCELERATE_USE_IPEX": "true",
"ACCELERATE_USE_FSDP": "true",
"FSDP_SHARDING_STRATEGY": "1",
"FSDP_OFFLOAD_PARAMS": "false",
"FSDP_AUTO_WRAP_POLICY": "NO_WRAP ",
"FSDP_AUTO_WRAP_POLICY": "NO_WRAP",
"FSDP_BACKWARD_PREFETCH": "BACKWARD_PRE",
"FSDP_STATE_DICT_TYPE": "SHARDED_STATE_DICT",
"FSDP_FORWARD_PREFETCH": "false",
"FSDP_USE_ORIG_PARAMS": "false",
"FSDP_SYNC_MODULE_STATES": "true",
"ACCELERATE_MIXED_PRECISION": mixed_precision,
}
}
if mode not in mode_env_vars:
Expand Down Expand Up @@ -193,7 +196,7 @@ def main(external_config = None):
}
}

accelerate_env_vars = get_accelerate_environment_variable(accelerate_mode)
accelerate_env_vars = get_accelerate_environment_variable(accelerate_mode, config)
runtime_env["env_vars"].update(accelerate_env_vars)

if config["General"]["gpt_base_model"] == True:
Expand Down
1 change: 1 addition & 0 deletions finetune/finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/finetune_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class Training(BaseModel):
num_training_workers: int
resources_per_worker: RayResourceConfig
accelerate_mode: str
mixed_precision: str = "no"

@validator("device")
def check_device(cls, v: str):
Expand Down
1 change: 1 addition & 0 deletions finetune/models/bloom-560m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/finetune_config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/gpt-j-6b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/llama-2-7b-chat-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/llama-7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/mistral-7b-v0.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/mpt-7b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down
1 change: 1 addition & 0 deletions finetune/models/opt-125m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Training:
learning_rate: 1.0e-05
lr_scheduler: linear
weight_decay: 0.0
mixed_precision: bf16
device: CPU
num_training_workers: 2
resources_per_worker:
Expand Down

0 comments on commit cd05f0e

Please sign in to comment.