Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
flybird11111 authored Apr 30, 2024
2 parents 6eb2655 + d3f34ee commit e0b21ea
Show file tree
Hide file tree
Showing 266 changed files with 1,525 additions and 463 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/doc_test_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
needs: detect-changed-doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.0.0-11.7.0
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
options: --gpus all --rm
timeout-minutes: 20
defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release_docker_after_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
version=$(cat version.txt)
tag=hpcaitech/colossalai:$version
latest=hpcaitech/colossalai:latest
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 --build-arg VERSION=v${version} -t $tag ./docker
docker build --build-arg VERSION=v${version} -t $tag ./docker
docker tag $tag $latest
echo "tag=${tag}" >> $GITHUB_OUTPUT
echo "latest=${latest}" >> $GITHUB_OUTPUT
Expand Down
15 changes: 15 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,18 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
---------------- LICENSE FOR Hugging Face accelerate ----------------

Copyright 2021 The HuggingFace Team

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
</div>

## Latest News
* [2024/04] [Open-Sora Unveils Major Upgrade: Embracing Open Source with Single-Shot 16-Second Video Generation and 720p Resolution](https://hpc-ai.com/blog/open-soras-comprehensive-upgrade-unveiled-embracing-16-second-video-generation-and-720p-resolution-in-open-source)
* [2024/04] [Most cost-effective solutions for inference, fine-tuning and pretraining, tailored to LLaMA3 series](https://hpc-ai.com/blog/most-cost-effective-solutions-for-inference-fine-tuning-and-pretraining-tailored-to-llama3-series)
* [2024/03] [314 Billion Parameter Grok-1 Inference Accelerated by 3.8x, Efficient and Easy-to-Use PyTorch+HuggingFace version is Here](https://hpc-ai.com/blog/314-billion-parameter-grok-1-inference-accelerated-by-3.8x-efficient-and-easy-to-use-pytorchhuggingface-version-is-here)
* [2024/03] [Open-Sora: Revealing Complete Model Parameters, Training Details, and Everything for Sora-like Video Generation Models](https://hpc-ai.com/blog/open-sora-v1.0)
* [2024/03] [Open-Sora:Sora Replication Solution with 46% Cost Reduction, Sequence Expansion to Nearly a Million](https://hpc-ai.com/blog/open-sora)
Expand Down Expand Up @@ -131,7 +133,7 @@ distributed training and inference in a few lines.

[Open-Sora](https://github.com/hpcaitech/Open-Sora):Revealing Complete Model Parameters, Training Details, and Everything for Sora-like Video Generation Models
[[code]](https://github.com/hpcaitech/Open-Sora)
[[blog]](https://hpc-ai.com/blog/open-sora-v1.0)
[[blog]](https://hpc-ai.com/blog/open-soras-comprehensive-upgrade-unveiled-embracing-16-second-video-generation-and-720p-resolution-in-open-source)
[[HuggingFace model weights]](https://huggingface.co/hpcai-tech/Open-Sora)
[[Demo]](https://github.com/hpcaitech/Open-Sora?tab=readme-ov-file#-latest-demo)

Expand Down
16 changes: 10 additions & 6 deletions applications/Colossal-LLaMA/colossal_llama/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,19 @@ def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[str, torch

# `List[torch.Tensor]`
batch_input_ids = [
torch.LongTensor(instance["input_ids"][: self.max_length])
if len(instance["input_ids"]) > self.max_length
else torch.LongTensor(instance["input_ids"])
(
torch.LongTensor(instance["input_ids"][: self.max_length])
if len(instance["input_ids"]) > self.max_length
else torch.LongTensor(instance["input_ids"])
)
for instance in instances
]
batch_labels = [
torch.LongTensor(instance["labels"][: self.max_length])
if len(instance["labels"]) > self.max_length
else torch.LongTensor(instance["labels"])
(
torch.LongTensor(instance["labels"][: self.max_length])
if len(instance["labels"]) > self.max_length
else torch.LongTensor(instance["labels"])
)
for instance in instances
]

Expand Down
10 changes: 6 additions & 4 deletions applications/Colossal-LLaMA/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def main() -> None:
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
accelerator = get_accelerator()
coordinator = DistCoordinator()

Expand Down Expand Up @@ -253,9 +253,11 @@ def main() -> None:
coordinator.print_on_master(f"Model params: {format_numel_str(model_numel)}")

optimizer = HybridAdam(
model_params=filter(lambda p: p.requires_grad, model.parameters())
if args.freeze_non_embeds_params
else model.parameters(),
model_params=(
filter(lambda p: p.requires_grad, model.parameters())
if args.freeze_non_embeds_params
else model.parameters()
),
lr=args.lr,
betas=(0.9, 0.95),
weight_decay=args.weight_decay,
Expand Down
2 changes: 1 addition & 1 deletion applications/ColossalChat/benchmarks/benchmark_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def benchmark_train(args):
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
coordinator = DistCoordinator()

# ======================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def train(args):
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
coordinator = DistCoordinator()

# ==============================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def train(args):
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
coordinator = DistCoordinator()

# ======================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def train(args):
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
coordinator = DistCoordinator()

# ======================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def train(args):
# ==============================
# Initialize Distributed Training
# ==============================
colossalai.launch_from_torch({})
colossalai.launch_from_torch()
coordinator = DistCoordinator()

# ==============================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def rm_and_merge(


def main(args):
colossalai.launch_from_torch(config={}, seed=42)
colossalai.launch_from_torch(seed=42)
accelerator = get_accelerator()
world_size = dist.get_world_size()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def rm_and_merge(


def main(args):
colossalai.launch_from_torch(config={}, seed=42)
colossalai.launch_from_torch(seed=42)
world_size = dist.get_world_size()

rank = dist.get_rank()
Expand Down
8 changes: 6 additions & 2 deletions applications/ColossalMoE/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def main():
args = parse_args()

# Launch ColossalAI
colossalai.launch_from_torch(config={}, seed=args.seed)
colossalai.launch_from_torch(seed=args.seed)
coordinator = DistCoordinator()

config = MixtralConfig.from_pretrained(args.model_name)
Expand Down Expand Up @@ -96,7 +96,11 @@ def main():
if coordinator.rank == 0:
text = ["Hello my name is"]
else:
text = ["What's the largest country in the world?", "How many people live in China?", "帮我续写这首诗:离离原上草"]
text = [
"What's the largest country in the world?",
"How many people live in China?",
"帮我续写这首诗:离离原上草",
]
tokenizer.pad_token = tokenizer.unk_token
inputs = tokenizer(text, return_tensors="pt", padding=True).to(torch.cuda.current_device())

Expand Down
2 changes: 1 addition & 1 deletion applications/ColossalMoE/tests/test_mixtral_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def check_mixtral_moe_layer():


def run_dist(rank: int, world_size: int, port: int):
colossalai.launch({}, rank, world_size, "localhost", port)
colossalai.launch(rank, world_size, "localhost", port)
check_mixtral_moe_layer()


Expand Down
2 changes: 1 addition & 1 deletion applications/ColossalMoE/tests/test_moe_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def check_mixtral_moe_layer():


def run_dist(rank: int, world_size: int, port: int):
colossalai.launch({}, rank, world_size, "localhost", port)
colossalai.launch(rank, world_size, "localhost", port)
check_mixtral_moe_layer()


Expand Down
8 changes: 4 additions & 4 deletions applications/ColossalMoE/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def main():
args = parse_args()

# Launch ColossalAI
colossalai.launch_from_torch(config={}, seed=args.seed)
colossalai.launch_from_torch(seed=args.seed)
coordinator = DistCoordinator()

# Set plugin
Expand Down Expand Up @@ -195,9 +195,9 @@ def main():
lr_scheduler = CosineAnnealingWarmupLR(
optimizer=optimizer,
total_steps=args.num_epochs * len(dataloader),
warmup_steps=args.warmup_steps
if args.warmup_steps is not None
else int(args.num_epochs * len(dataloader) * 0.025),
warmup_steps=(
args.warmup_steps if args.warmup_steps is not None else int(args.num_epochs * len(dataloader) * 0.025)
),
eta_min=0.1 * args.lr,
)

Expand Down
2 changes: 1 addition & 1 deletion colossalai/auto_parallel/offload/amp_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def loss_scale(self):
return self.grad_scaler.scale.item()

def zero_grad(self, *args, **kwargs):
self.module.overflow_counter = torch.cuda.IntTensor([0])
self.module.overflow_counter = torch.tensor([0], dtype=torch.int, device=get_accelerator().get_current_device())
return self.optim.zero_grad(set_to_none=True)

def step(self, *args, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions colossalai/auto_parallel/offload/base_offload_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
import torch.nn as nn

from colossalai.utils import _cast_float
from colossalai.utils import _cast_float, get_current_device
from colossalai.utils.common import free_storage

from .region_manager import RegionManager
Expand All @@ -25,7 +25,7 @@ def __init__(self, model: nn.Module, region_manager: RegionManager, is_sync=True
self.model = model
self.region_manager = region_manager
self.grad_hook_list = []
self.overflow_counter = torch.cuda.IntTensor([0])
self.overflow_counter = torch.tensor([0], dtype=torch.int, device=get_current_device())

self.grad_offload_stream = torch.cuda.current_stream() if is_sync else GlobalRuntimeInfo.d2h_stream

Expand Down
76 changes: 76 additions & 0 deletions colossalai/booster/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import DataLoader

SUPPORT_PEFT = False
try:
import peft

SUPPORT_PEFT = True
except ImportError:
pass

import colossalai.interface.pretrained as pretrained_utils
from colossalai.checkpoint_io import GeneralCheckpointIO
from colossalai.interface import ModelWrapper, OptimizerWrapper
from colossalai.quantization import BnbQuantizationConfig

from .accelerator import Accelerator
from .mixed_precision import MixedPrecision, mixed_precision_factory
Expand Down Expand Up @@ -221,6 +230,56 @@ def no_sync(self, model: nn.Module = None, optimizer: OptimizerWrapper = None) -
assert self.plugin.support_no_sync(), f"The plugin {self.plugin.__class__.__name__} does not support no_sync."
return self.plugin.no_sync(model, optimizer)

def enable_lora(
self,
model: nn.Module,
pretrained_dir: Optional[str] = None,
lora_config: "peft.LoraConfig" = None,
bnb_quantization_config: Optional[BnbQuantizationConfig] = None,
quantize=False,
) -> nn.Module:
"""
Wrap the passed in model with LoRA modules for training. If pretrained directory is provided, lora configs and weights are loaded from that directory.
Lora in ColossalAI is implemented using Huggingface peft library, so the arguments for Lora configuration are same as those of peft.
Args:
model (nn.Module): The model to be appended with LoRA modules.
pretrained_dir(str, optional): The path to the pretrained directory, can be a local directory
or model_id of a PEFT configuration hosted inside a model repo on the Hugging Face Hub.
When set to None, create new lora configs and weights for the model using the passed in lora_config. Defaults to None.
lora_config: (peft.LoraConfig, optional): Passed in LoraConfig for peft. Defaults to None.
"""
if not SUPPORT_PEFT:
raise ImportError("Please install Huggingface Peft library to enable lora features in ColossalAI!")

assert self.plugin is not None, f"Lora can only be enabled when a plugin is provided."
assert self.plugin.support_lora(), f"The plugin {self.plugin.__class__.__name__} does not support lora."
if pretrained_dir is None:
assert (
lora_config is not None
), "Please provide configuration for Lora when pretrained directory path isn't passed in."
assert isinstance(
lora_config, peft.LoraConfig
), "The passed in configuration should be an instance of peft.LoraConfig."
if lora_config is None:
assert (
pretrained_dir is not None
), "Please provide pretrained directory path if not passing in lora configuration."
if quantize is True:
if bnb_quantization_config is not None:
warnings.warn(
"User defined BnbQuantizationConfig is not fully tested in ColossalAI. Use it at your own risk."
)
else:
bnb_quantization_config = BnbQuantizationConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
)

return self.plugin.enable_lora(model, pretrained_dir, lora_config, bnb_quantization_config)

def load_model(self, model: Union[nn.Module, ModelWrapper], checkpoint: str, strict: bool = True) -> None:
"""Load model from checkpoint.
Expand Down Expand Up @@ -323,3 +382,20 @@ def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str) -> None:
checkpoint (str): Path to the checkpoint. It must be a local file path.
"""
self.checkpoint_io.load_lr_scheduler(lr_scheduler, checkpoint)

def save_lora_as_pretrained(
self, model: Union[nn.Module, ModelWrapper], checkpoint: str, use_safetensors: bool = False
) -> None:
"""
Save the lora adapters and adapter configuration file to a pretrained checkpoint directory.
Args:
model (Union[nn.Module, ModelWrapper]): A model boosted by Booster.
checkpoint (str): Path to the checkpoint directory. It must be a local path.
use_safetensors (bool, optional): Whether to use safe tensors when saving. Defaults to False.
"""
if not SUPPORT_PEFT:
raise ImportError("Please install Huggingface Peft library to enable lora features in ColossalAI!")
assert self.plugin is not None, f"Lora can only be enabled when a plugin is provided."
assert self.plugin.support_lora(), f"The plugin {self.plugin.__class__.__name__} does not support lora."
self.checkpoint_io.save_lora_as_pretrained(model, checkpoint, use_safetensors)
10 changes: 9 additions & 1 deletion colossalai/booster/plugin/gemini_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import random
from pathlib import Path
from types import MethodType
from typing import Callable, Iterator, List, Optional, Tuple
from typing import Callable, Dict, Iterator, List, Optional, Tuple

import numpy as np
import torch
Expand Down Expand Up @@ -446,6 +446,9 @@ def __del__(self):
def support_no_sync(self) -> bool:
return False

def support_lora(self) -> bool:
return False

def control_precision(self) -> bool:
return True

Expand Down Expand Up @@ -576,3 +579,8 @@ def get_checkpoint_io(self) -> CheckpointIO:

def no_sync(self, model: nn.Module, optimizer: OptimizerWrapper) -> Iterator[None]:
raise NotImplementedError

def enable_lora(
self, model: nn.Module, pretrained_dir: Optional[str] = None, lora_config: Optional[Dict] = None
) -> nn.Module:
raise NotImplementedError
Loading

0 comments on commit e0b21ea

Please sign in to comment.