[devops] remove post commit ci (hpcaitech#5566)

* [devops] remove post commit ci * [misc] run pre-commit on all files * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
flybird11111 · Apr 8, 2024 · 641b1ee · 641b1ee
1 parent 341263d
commit 641b1ee
Show file tree

Hide file tree

Showing 82 changed files with 847 additions and 960 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -3,6 +3,7 @@
 - [ ] I have created an issue for this PR for traceability
 - [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description`
 - [ ] I have added relevant tags if possible for us to better distinguish different PRs
+- [ ] I have installed pre-commit: `pip install pre-commit && pre-commit install`
 
 
 ## 🚨 Issue number

diff --git a/.github/workflows/post_commit.yml b/.github/workflows/post_commit.yml
diff --git a/.gitignore b/.gitignore
@@ -162,4 +162,4 @@ coverage.xml
 
 # log, test files - ColossalChat
 applications/ColossalChat/logs
-applications/ColossalChat/tests/logs
+applications/ColossalChat/tests/logs
diff --git a/LICENSE b/LICENSE
@@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-   THE SOFTWARE.  
+   THE SOFTWARE.
diff --git a/applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py b/applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py
@@ -8,11 +8,10 @@
 
 import numpy as np
 import torch
-from transformers import LlamaTokenizer, LlamaForCausalLM
+from transformers import LlamaForCausalLM, LlamaTokenizer
 
 from colossalai.logging import get_dist_logger
 
-
 logger = get_dist_logger()
 
 

diff --git a/applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py b/applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py
@@ -10,8 +10,8 @@
 from typing import Any, Dict, Tuple, Union
 
 import torch
-from torch.optim.optimizer import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.optimizer import Optimizer
 
 from colossalai.booster import Booster
 from colossalai.cluster import DistCoordinator

diff --git a/applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py b/applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py
@@ -1,20 +1,19 @@
 from copy import deepcopy
-from typing import Optional, List, Dict, Tuple, Callable, Any
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import torch
 from torch import nn
-
 from transformers import PreTrainedTokenizer
-from transformers.utils import logging
 from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
-
+from transformers.utils import logging
+
 logger = logging.get_logger(__name__)
 
 
 def get_prompt_template(
-    input_query:str, 
-    history:List[Dict]= None, 
-    roles:list = ["", "Human", "Assistant"],
+    input_query: str,
+    history: List[Dict] = None,
+    roles: list = ["", "Human", "Assistant"],
 ) -> str:
     """
     Generates a prompt template for chat models based on input and history.
@@ -32,7 +31,7 @@ def get_prompt_template(
         new_history = []
     else:
         new_history = deepcopy(history)
-    
+
     new_history.append({"role": roles[1], "message": input_query.strip()})
     new_history.append({"role": roles[2], "message": None})
 
@@ -48,22 +47,23 @@ def get_prompt_template(
                 prompt += f"{role}: <s>"
     return prompt
 
+
 @torch.inference_mode()
 def streaming_chat(
-    model: Any, 
+    model: Any,
     tokenizer: PreTrainedTokenizer,
-    input_query: str, 
-    history: List[Dict] = None, 
-    roles: list = ["", "Human", "Assistant"], 
-    past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None, 
-    temperature: float = 0.8, 
-    top_p: float = 0.95, 
-    top_k: int = 50, 
-    do_sample: bool = True, 
+    input_query: str,
+    history: List[Dict] = None,
+    roles: list = ["", "Human", "Assistant"],
+    past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
+    temperature: float = 0.8,
+    top_p: float = 0.95,
+    top_k: int = 50,
+    do_sample: bool = True,
     length_penalty: float = 1.2,
-    max_new_tokens: int = 512, 
-    logits_processor: LogitsProcessorList = None, 
-    return_past_key_values: bool = False, 
+    max_new_tokens: int = 512,
+    logits_processor: LogitsProcessorList = None,
+    return_past_key_values: bool = False,
     **kwargs,
 ):
     """
@@ -87,7 +87,7 @@ def streaming_chat(
         **kwargs: Additional keyword arguments for generation.
 
     Yields:
-        Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and 
+        Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
         optionally the updated past key values if `return_past_key_values` is True.
 
     Ensures padding is on the left side for the tokenizer.
@@ -97,63 +97,68 @@ def streaming_chat(
         history = []
     if logits_processor is None:
         logits_processor = LogitsProcessorList()
-        
+
     generation_kwargs = {
-        'temperature': temperature,
-        'top_p': top_p,
-        'top_k': top_k,
-        'do_sample': do_sample,
-        'max_new_tokens': max_new_tokens,
-        'length_penalty': length_penalty,
-        'use_cache': True,
-        **kwargs
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "do_sample": do_sample,
+        "max_new_tokens": max_new_tokens,
+        "length_penalty": length_penalty,
+        "use_cache": True,
+        **kwargs,
     }
 
     prompt_str = get_prompt_template(input_query, history=history, roles=roles)
-   
+
     eos_token_id = [tokenizer.eos_token_id]
     inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
     history.append({"role": roles[1], "message": input_query.strip()})
     history.append({"role": roles[2], "message": None})
 
-    for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
-                            eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
-                            **generation_kwargs):
+    for outputs in stream_generate(
+        model,
+        **inputs,
+        past_key_values=past_key_values,
+        eos_token_id=eos_token_id,
+        return_past_key_values=return_past_key_values,
+        **generation_kwargs,
+    ):
         if return_past_key_values:
             outputs, past_key_values = outputs
 
-        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
+        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
         response = tokenizer.decode(outputs)
 
         history[-1]["message"] = response.strip()
         if return_past_key_values:
             yield response, history, past_key_values
         else:
             yield response, history
-                    
+
 
 @torch.inference_mode()
 def stream_generate(
-    model: Any, 
-    input_ids: torch.Tensor, 
+    model: Any,
+    input_ids: torch.Tensor,
     generation_config: Optional[GenerationConfig] = None,
     logits_processor: Optional[LogitsProcessorList] = None,
     stopping_criteria: Optional[StoppingCriteriaList] = None,
     prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
-    return_past_key_values: bool = False, 
+    return_past_key_values: bool = False,
     **kwargs,
 ):
     """
     Generates sequences of token ids using the specified model and generation parameters.
     Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
-    
+
     Args:
         model (Any): The model used for generating sequences of token ids.
-        input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder. 
+        input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
         generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
         logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
         and generation config.
-        stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments 
+        stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
         and a generation config.
         prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
         return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
@@ -169,33 +174,33 @@ def stream_generate(
         generation_config = model.generation_config
     generation_config = deepcopy(generation_config)
     model_kwargs = generation_config.update(**kwargs)
-    
+
     eos_token_id = generation_config.eos_token_id
     if isinstance(eos_token_id, int):
         eos_token_id = [eos_token_id]
     eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device) if eos_token_id is not None else None
 
     if generation_config.max_new_tokens is not None:
         generation_config.max_length = generation_config.max_new_tokens + input_ids_len
-    
+
     if input_ids_len >= generation_config.max_length:
         input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
         logger.warning(
-                    f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
-                    f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
-                    " increasing `max_new_tokens`."
-                )
+            f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
+            f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
+            " increasing `max_new_tokens`."
+        )
     logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
     stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
-    
+
     # prepare distribution pre_processing samplers
     logits_processor = model._get_logits_processor(
-            generation_config=generation_config,
-            input_ids_seq_length=input_ids_len,
-            encoder_input_ids=input_ids,
-            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
-            logits_processor=logits_processor,
-        )
+        generation_config=generation_config,
+        input_ids_seq_length=input_ids_len,
+        encoder_input_ids=input_ids,
+        prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+        logits_processor=logits_processor,
+    )
 
     # prepare stopping criteria
     stopping_criteria = model._get_stopping_criteria(
@@ -205,7 +210,7 @@ def stream_generate(
     logits_warper = model._get_logits_warper(generation_config)
     unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
     scores = None
-    
+
     while True:
         model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
         # forward pass to get next token
@@ -244,4 +249,4 @@ def stream_generate(
             yield input_ids
         # stop when each sentence is finished, or if exceed the maximum length
         if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
-            break
+            break