From 7b754a99deb9d2f1b386a7ca074b9b3cf24f100b Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Fri, 19 Apr 2024 14:45:50 +0800
Subject: [PATCH 01/32] added features to download models from the hugging face
 model hub/load local hugging face model and finetune loaded model with
 hugging face dataset

Added features to download models from hugging face model hub/load local hugging face model and finetune loaded model with hugging face dataset. Model loading and fine-tuning can happen both at the initialization stage and after the agent has been initialized (see README in `agentscope/examples/load_finetune_huggingface_model` for details). Major changes to the repo include creating the example script `load_finetune_huggingface_model`, adding a new model wrapper `HuggingFaceWrapper`, and creating a new agent type Finetune_DialogAgent. All changes are done in a new example directory `agentscope/examples/load_finetune_huggingface_model`.
---
 .../load_finetune_huggingface_model/README.md |  52 ++++
 .../huggingface_model.py                      | 236 ++++++++++++++++++
 .../load_finetune_huggingface_model.py        |  43 ++++
 3 files changed, 331 insertions(+)
 create mode 100644 examples/load_finetune_huggingface_model/README.md
 create mode 100644 examples/load_finetune_huggingface_model/huggingface_model.py
 create mode 100644 examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py

diff --git a/examples/load_finetune_huggingface_model/README.md b/examples/load_finetune_huggingface_model/README.md
new file mode 100644
index 000000000..54ae66c94
--- /dev/null
+++ b/examples/load_finetune_huggingface_model/README.md
@@ -0,0 +1,52 @@
+# Multi-Agent Conversation with Custom Model Loading and Fine-Tuning in AgentScope
+
+This example demonstrates how to load and optionally fine-tune a Hugging Face model within a multi-agent conversation setup using AgentScope. The complete code is provided in `load_finetune_huggingface_model.py`.
+
+## Background
+
+In the context of AgentScope, agents are designed to mimic user and assistant roles in a conversation. This setup allows for the integration and testing of different models from the Hugging Face Hub, enhancing their capabilities through fine-tuning with custom datasets.
+
+## Functionality Overview
+
+This example allows you to:
+
+- Set up a user agent and an assistant agent for interactive conversations.
+- Modify the `sys_prompt` to customize the assistant agent's role.
+- Terminate the conversation by entering "exit".
+
+## Advanced Features
+
+Beyond basic conversation setup, the example introduces advanced functionalities:
+
+- Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
+- Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.
+
+## Agent Initialization
+
+When initializing an agent, the following parameters need specification:
+
+- `model_id` (str): Identifier for the model on Hugging Face.
+- `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
+- `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
+- `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
+- `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
+
+## Tested Models
+
+The example is tested using specific Hugging Face models. While it is designed to be flexible, some models may require additional configuration or modification of the provided scripts.
+
+## Prerequisites
+
+Before running this example, ensure you have installed the following packages:
+
+- `transformers`
+- `peft`
+- `python-dotenv`
+- `pytorch`
+- `datasets`
+- `trl`
+
+Additionally, set your Hugging Face token in the `.env` file:
+
+```bash
+python load_finetune_huggingface_model.py
diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
new file mode 100644
index 000000000..00c444638
--- /dev/null
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -0,0 +1,236 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from agentscope.agents import DialogAgent
+
+from agentscope.models import ModelWrapperBase, ModelResponse
+from loguru import logger
+
+import torch
+import os
+from dotenv import load_dotenv, find_dotenv
+
+from typing import Optional
+
+class HuggingFaceWrapper(ModelWrapperBase):
+    model_type: str = "huggingface"  # Unique identifier for this model wrapper
+
+    def __init__(self, config_name, model_id, max_length=512, data_path = None, device = None, local_model_path = None, **kwargs):
+        super().__init__(config_name=config_name)
+        self.max_length = max_length  # Set max_length as an attribute
+        self.model_id = model_id
+        relative_path = os.path.join(os.path.dirname(__file__), "../load_finetune_huggingface_model/.env")
+        dotenv_path = os.path.normpath(relative_path)
+        _ = load_dotenv(dotenv_path) # read local .env file
+        huggingface_token  = os.getenv('HUGGINGFACE_TOKEN')
+
+        self.huggingface_token = huggingface_token
+        if device == None:
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = device
+        try:
+            if local_model_path == None:
+                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = huggingface_token, device_map="auto",)
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = huggingface_token)
+                print("load new model")
+            else:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    local_model_path, local_files_only=True,
+                    device_map="auto"
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = huggingface_token)
+                print("load local model")
+                
+            if data_path != None:
+                self.model = fine_tune(self.model, self.tokenizer, data_path, token = huggingface_token)
+                
+                
+            
+        except Exception as e:
+            logger.error(f"Failed to load model {model_id}: {e}")
+            raise
+
+    def __call__(self, input, **kwargs) -> ModelResponse:
+        try:
+            # Tokenize the input text
+            concatenated_input  = "\n".join([f"{d.get('name', 'System')}: {d['content']}" for d in input])
+            input_ids = self.tokenizer.encode(f"{concatenated_input}\nAssistent: ", return_tensors='pt')
+            # Generate response using the model
+            outputs = self.model.generate(input_ids.to(self.device), max_new_tokens = self.max_length, **kwargs)
+            # Decode the generated tokens to a string
+            generated_text = self.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
+            
+            return ModelResponse(text=generated_text, raw={'model_id': self.model_id})
+        except Exception as e:
+            logger.error(f"Generation error: {e}")
+            raise
+
+    def load_model(self, model_id, local_model_path = None):
+        """
+        Load a new model for the agent from a local path and update the agent's model.
+        
+        Parameters:
+            local_model_path (str): The file path to the model to be loaded.
+            model_id (str): An identifier for the model on Huggingface.
+        """
+        try:
+            if local_model_path == None:
+                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
+                print("new model")
+            else:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    local_model_path, local_files_only=True,
+                    device_map="auto"
+                )
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
+                print("local model")
+            
+            
+            # Optionally, log the successful model loading
+            logger.info(f"Successfully loaded new model '{model_id}' from '{local_model_path}'")
+        except Exception as e:
+            # Handle exceptions during model loading, such as file not found or load errors
+            logger.error(f"Failed to load model '{model_id}' from '{local_model_path}': {e}")
+            raise  # Or handle error appropriately
+
+    def fine_tune(self, data_path):
+        """
+        Fine-tune the agent's model using data from the specified path.
+        
+        Parameters:
+            data_path (str): The file path to the training data.
+        """
+        try:
+            self.model = fine_tune(self.model, self.tokenizer, data_path, token = self.huggingface_token)
+            
+            logger.info(f"Successfully fine-tuned model with data from '{data_path}'")
+        except Exception as e:
+            logger.error(f"Failed to fine-tune model with data from '{data_path}': {e}")
+            raise  # Or handle the error appropriately
+
+
+def fine_tune(model, tokenizer, data_path, token):
+    from datasets import load_dataset
+    from datetime import datetime
+    import os
+    import json
+
+    dataset = load_dataset(data_path, token = token)
+
+    from peft import LoraConfig
+
+    lora_config = LoraConfig(
+        r=16,
+        lora_alpha=32,
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM"
+    )
+
+    from peft import get_peft_model
+
+    
+
+    model = get_peft_model(model, lora_config)
+
+    from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
+    import transformers
+
+    def formatting_prompts_func(example):
+        output_texts = []
+        for i in range(len(example['conversations'])):
+            text = f"### Question: {example['conversations'][i][0]}\n ### Answer: {example['conversations'][i][1]}"
+            output_texts.append(text)
+        return output_texts
+
+    response_template = " ### Answer:"
+    collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
+
+    trainer = SFTTrainer(
+        model,
+        train_dataset=dataset["train"],
+        eval_dataset=dataset["train"],
+        formatting_func=formatting_prompts_func,
+        data_collator=collator,
+        peft_config=lora_config,
+        args=transformers.TrainingArguments(
+            per_device_train_batch_size=1,
+            gradient_accumulation_steps=1,
+            gradient_checkpointing=False,
+            # learning_rate=2e-6,
+            max_steps=10,
+            output_dir="./",
+            optim="paged_adamw_8bit",
+            fp16=True,
+            # num_train_epochs=10.0,
+            logging_steps=1
+        ),
+    )
+
+    print("fine-tuning model")
+
+    trainer.train()
+
+    
+    now = datetime.now()
+    time_string = now.strftime('%Y-%m-%d_%H-%M-%S')
+
+    # Specify the filename
+    log_name = f"{model.config._name_or_path.split('/')[-1]}_{time_string}_log_history.json"
+
+    relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+log_name)
+    normalized_path = os.path.normpath(relative_path)
+
+    os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+
+    # Writing JSON data
+    with open(normalized_path, 'w') as f:
+        json.dump(trainer.state.log_history, f)
+
+    save_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
+    relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+save_name)
+    normalized_path = os.path.normpath(relative_path)
+
+    os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+    # Check if directory exists
+    if not os.path.exists(normalized_path):
+        # If not, create the directory
+        os.makedirs(normalized_path)
+
+    #save model
+    trainer.save_model(normalized_path)
+    # trainer.mode.save_config(save_path)
+
+    
+    return model
+
+
+class Finetune_DialogAgent(DialogAgent):
+    def __init__(self, name: str, sys_prompt: str, model_config_name: str, use_memory: bool = True, memory_config: Optional[dict] = None):
+        super().__init__(name, sys_prompt, model_config_name, use_memory, memory_config)
+
+    def load_model(self, model_id, local_model_path=None):
+        """
+        Load a new model into the agent.
+
+        Parameters:
+            model_id (str): The Hugging Face model ID or a custom identifier.
+            local_model_path (str, optional): Path to a locally saved model.
+        """
+        if hasattr(self.model, "load_model"):
+            self.model.load_model(model_id, local_model_path)
+        else:
+            logger.error("The model wrapper does not support dynamic model loading.")
+
+    def fine_tune(self, data_path):
+        """
+        Fine-tune the agent's underlying model.
+
+        Parameters:
+            data_path (str): The path to the training data.
+        """
+        if hasattr(self.model, "fine_tune"):
+            self.model.fine_tune(data_path)
+            logger.info("Fine-tuning completed successfully.")
+        else:
+            logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
new file mode 100644
index 000000000..b3a6e1b5a
--- /dev/null
+++ b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
@@ -0,0 +1,43 @@
+import agentscope
+from agentscope.agents.user_agent import UserAgent
+from agentscope.pipelines.functional import sequentialpipeline
+from huggingface_model import Finetune_DialogAgent
+
+
+def main() -> None:
+    """A basic conversation demo with a custom model"""
+
+    # Initialize AgentScope with your custom model configuration
+    agentscope.init(
+        model_configs=[
+            {
+                "model_type": "huggingface",
+                "config_name": "my_custom_model",
+                "model_id": "google/gemma-2b-it",  # Or another generative model of your choice
+                # "local_model_path": "/home/zhan1130/agentscope/examples/conversation_basic/sft_gemma-2b-it_2024-04-04_14-22-35",
+                "max_length": 128,
+                "device": "cuda",
+                # "data_path": "GAIR/lima",
+            },
+        ],
+    )
+
+    # Init agents with the custom model
+    dialog_agent = Finetune_DialogAgent(
+        name="Assistant",
+        sys_prompt="You're a helpful assistant.",
+        model_config_name="my_custom_model",  # Use your custom model config name here
+    )
+    
+    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load gemma-2b-it from Hugging Face
+    dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset
+
+    user_agent = UserAgent()
+
+    # Start the conversation between user and assistant
+    x = None
+    while x is None or x.content != "exit":
+        x = sequentialpipeline([dialog_agent, user_agent], x)
+
+if __name__ == "__main__":
+    main()

From ea00db040c01e4639a9c10bf7a3849236c8f8c6d Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Tue, 23 Apr 2024 16:29:16 +0800
Subject: [PATCH 02/32] added customized hyperparameters specification

made customized hyperparameters specification available from `model_configs` for fine-tuning at initialization, or through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization
---
 .../load_finetune_huggingface_model/README.md |   6 +-
 .../huggingface_model.py                      | 184 ++++++++++--------
 .../load_finetune_huggingface_model.py        |  25 ++-
 3 files changed, 130 insertions(+), 85 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/README.md b/examples/load_finetune_huggingface_model/README.md
index 54ae66c94..400df461f 100644
--- a/examples/load_finetune_huggingface_model/README.md
+++ b/examples/load_finetune_huggingface_model/README.md
@@ -21,6 +21,8 @@ Beyond basic conversation setup, the example introduces advanced functionalities
 - Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
 - Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.
 
+The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/src/agentscope/models/huggingface_model.py`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization, as shown in the example script `load_finetune_huggingface_model.py`.
+
 ## Agent Initialization
 
 When initializing an agent, the following parameters need specification:
@@ -29,6 +31,7 @@ When initializing an agent, the following parameters need specification:
 - `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
 - `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
 - `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
+- `fine_tune_config` (dict, Optional): A configuration dictionary for fine-tuning the model. It allows specifying hyperparameters and other training options that will be passed to the fine-tuning method. If not provided, default settings will be used. This allows for customization of the fine-tuning process to optimize model performance based on specific requirements.
 - `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
 
 ## Tested Models
@@ -42,11 +45,10 @@ Before running this example, ensure you have installed the following packages:
 - `transformers`
 - `peft`
 - `python-dotenv`
-- `pytorch`
 - `datasets`
 - `trl`
 
 Additionally, set your Hugging Face token in the `.env` file:
 
 ```bash
-python load_finetune_huggingface_model.py
+python load_finetune_huggingface_model.py
\ No newline at end of file
diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
index 00c444638..878bf1f0c 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -13,10 +13,11 @@
 class HuggingFaceWrapper(ModelWrapperBase):
     model_type: str = "huggingface"  # Unique identifier for this model wrapper
 
-    def __init__(self, config_name, model_id, max_length=512, data_path = None, device = None, local_model_path = None, **kwargs):
+    def __init__(self, config_name, model_id, max_length=512, data_path = None, device = None, local_model_path = None, fine_tune_config=None, **kwargs):
         super().__init__(config_name=config_name)
         self.max_length = max_length  # Set max_length as an attribute
         self.model_id = model_id
+        # relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/.env")
         relative_path = os.path.join(os.path.dirname(__file__), "../load_finetune_huggingface_model/.env")
         dotenv_path = os.path.normpath(relative_path)
         _ = load_dotenv(dotenv_path) # read local .env file
@@ -41,7 +42,7 @@ def __init__(self, config_name, model_id, max_length=512, data_path = None, devi
                 print("load local model")
                 
             if data_path != None:
-                self.model = fine_tune(self.model, self.tokenizer, data_path, token = huggingface_token)
+                self.model = self.fine_tune_training(self.model, self.tokenizer, data_path,  token = self.huggingface_token, fine_tune_config = fine_tune_config)
                 
                 
             
@@ -93,7 +94,7 @@ def load_model(self, model_id, local_model_path = None):
             logger.error(f"Failed to load model '{model_id}' from '{local_model_path}': {e}")
             raise  # Or handle error appropriately
 
-    def fine_tune(self, data_path):
+    def fine_tune(self, data_path, fine_tune_config=None):
         """
         Fine-tune the agent's model using data from the specified path.
         
@@ -101,7 +102,7 @@ def fine_tune(self, data_path):
             data_path (str): The file path to the training data.
         """
         try:
-            self.model = fine_tune(self.model, self.tokenizer, data_path, token = self.huggingface_token)
+            self.model = self.fine_tune_training(self.model, self.tokenizer, data_path, token = self.huggingface_token, fine_tune_config = fine_tune_config)
             
             logger.info(f"Successfully fine-tuned model with data from '{data_path}'")
         except Exception as e:
@@ -109,103 +110,128 @@ def fine_tune(self, data_path):
             raise  # Or handle the error appropriately
 
 
-def fine_tune(model, tokenizer, data_path, token):
-    from datasets import load_dataset
-    from datetime import datetime
-    import os
-    import json
+    def fine_tune_training(self, model, tokenizer, data_path, token, fine_tune_config=None):
+        from datasets import load_dataset
+        from datetime import datetime
+        import os
+        import json
 
-    dataset = load_dataset(data_path, token = token)
+        dataset = load_dataset(data_path, token = token)
 
-    from peft import LoraConfig
+        from peft import LoraConfig
 
-    lora_config = LoraConfig(
-        r=16,
-        lora_alpha=32,
-        lora_dropout=0.05,
-        bias="none",
-        task_type="CAUSAL_LM"
-    )
+        lora_config_default = {
+            "r": 16,
+            "lora_alpha": 32,
+            "lora_dropout": 0.05,
+            "bias": "none",
+            "task_type": "CAUSAL_LM"
+        }
 
-    from peft import get_peft_model
+        if fine_tune_config is not None:
+            if fine_tune_config['lora_config'] is not None:
+                lora_config_default.update(fine_tune_config['lora_config'])
 
-    
 
-    model = get_peft_model(model, lora_config)
+        training_defaults = {
+            "per_device_train_batch_size": 1,
+            "gradient_accumulation_steps": 1,
+            "gradient_checkpointing": False,
+            "max_steps": 10,
+            "output_dir": "./",
+            "optim": "paged_adamw_8bit",
+            "fp16": True,
+            "logging_steps": 1,
+            # "learning_rate": 2e-6,
+            # "num_train_epochs": 10.0,
+        }
 
-    from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
-    import transformers
+        if fine_tune_config is not None:
+            if fine_tune_config['training_args'] is not None:
+                training_defaults.update(fine_tune_config['training_args'])
 
-    def formatting_prompts_func(example):
-        output_texts = []
-        for i in range(len(example['conversations'])):
-            text = f"### Question: {example['conversations'][i][0]}\n ### Answer: {example['conversations'][i][1]}"
-            output_texts.append(text)
-        return output_texts
+        from peft import get_peft_model
 
-    response_template = " ### Answer:"
-    collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
+        
+        lora_config = LoraConfig(**lora_config_default)
+        model = get_peft_model(model, lora_config)
+
+        from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
+        import transformers
 
-    trainer = SFTTrainer(
-        model,
-        train_dataset=dataset["train"],
-        eval_dataset=dataset["train"],
-        formatting_func=formatting_prompts_func,
-        data_collator=collator,
-        peft_config=lora_config,
-        args=transformers.TrainingArguments(
-            per_device_train_batch_size=1,
-            gradient_accumulation_steps=1,
-            gradient_checkpointing=False,
-            # learning_rate=2e-6,
-            max_steps=10,
-            output_dir="./",
-            optim="paged_adamw_8bit",
-            fp16=True,
-            # num_train_epochs=10.0,
-            logging_steps=1
-        ),
-    )
+        def formatting_prompts_func(example):
+            output_texts = []
+            for i in range(len(example['conversations'])):
+                text = f"### Question: {example['conversations'][i][0]}\n ### Answer: {example['conversations'][i][1]}"
+                output_texts.append(text)
+            return output_texts
 
-    print("fine-tuning model")
+        response_template = " ### Answer:"
+        collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
 
-    trainer.train()
+        trainer_args = transformers.TrainingArguments(**training_defaults)
 
-    
-    now = datetime.now()
-    time_string = now.strftime('%Y-%m-%d_%H-%M-%S')
+        trainer = SFTTrainer(
+            model,
+            train_dataset=dataset["train"],
+            eval_dataset=dataset["train"],
+            formatting_func=formatting_prompts_func,
+            data_collator=collator,
+            peft_config=lora_config,
+            args=trainer_args,
+        )
 
-    # Specify the filename
-    log_name = f"{model.config._name_or_path.split('/')[-1]}_{time_string}_log_history.json"
+        print("fine-tuning model")
 
-    relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+log_name)
-    normalized_path = os.path.normpath(relative_path)
+        trainer.train()
+
+        
+        now = datetime.now()
+        time_string = now.strftime('%Y-%m-%d_%H-%M-%S')
 
-    os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+        # Specify the filename
+        log_name = f"{model.config._name_or_path.split('/')[-1]}_{time_string}_log_history.json"
 
-    # Writing JSON data
-    with open(normalized_path, 'w') as f:
-        json.dump(trainer.state.log_history, f)
+        relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+log_name)
+        normalized_path = os.path.normpath(relative_path)
 
-    save_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
-    relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+save_name)
-    normalized_path = os.path.normpath(relative_path)
+        os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
 
-    os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
-    # Check if directory exists
-    if not os.path.exists(normalized_path):
-        # If not, create the directory
-        os.makedirs(normalized_path)
+        # Writing JSON data
+        with open(normalized_path, 'w') as f:
+            json.dump(trainer.state.log_history, f)
 
-    #save model
-    trainer.save_model(normalized_path)
-    # trainer.mode.save_config(save_path)
+        save_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
+        relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+save_name)
+        normalized_path = os.path.normpath(relative_path)
 
-    
-    return model
+        os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+        # Check if directory exists
+        if not os.path.exists(normalized_path):
+            # If not, create the directory
+            os.makedirs(normalized_path)
+
+        #save model
+        trainer.save_model(normalized_path)
+        # trainer.mode.save_config(save_path)
+
+        
+        return model
 
 
 class Finetune_DialogAgent(DialogAgent):
+    """
+    A dialog agent capable of fine-tuning its underlying model based on provided data.
+
+    Inherits from DialogAgent and adds functionality for fine-tuning with custom hyperparameters.
+
+    Parameters:
+        name (str): Name of the agent.
+        sys_prompt (str): System prompt or description of the agent's role.
+        model_config_name (str): The configuration name for the underlying model.
+        use_memory (bool, optional): Whether to use memory for the agent. Defaults to True.
+        memory_config (dict, Optional): Configuration for the agent's memory. Defaults to None.
+    """
     def __init__(self, name: str, sys_prompt: str, model_config_name: str, use_memory: bool = True, memory_config: Optional[dict] = None):
         super().__init__(name, sys_prompt, model_config_name, use_memory, memory_config)
 
@@ -222,7 +248,7 @@ def load_model(self, model_id, local_model_path=None):
         else:
             logger.error("The model wrapper does not support dynamic model loading.")
 
-    def fine_tune(self, data_path):
+    def fine_tune(self, data_path, fine_tune_config=None):
         """
         Fine-tune the agent's underlying model.
 
@@ -230,7 +256,7 @@ def fine_tune(self, data_path):
             data_path (str): The path to the training data.
         """
         if hasattr(self.model, "fine_tune"):
-            self.model.fine_tune(data_path)
+            self.model.fine_tune(data_path, fine_tune_config)
             logger.info("Fine-tuning completed successfully.")
         else:
             logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
index b3a6e1b5a..69965a938 100644
--- a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
@@ -8,16 +8,27 @@ def main() -> None:
     """A basic conversation demo with a custom model"""
 
     # Initialize AgentScope with your custom model configuration
+    
+
     agentscope.init(
         model_configs=[
             {
                 "model_type": "huggingface",
                 "config_name": "my_custom_model",
                 "model_id": "google/gemma-2b-it",  # Or another generative model of your choice
-                # "local_model_path": "/home/zhan1130/agentscope/examples/conversation_basic/sft_gemma-2b-it_2024-04-04_14-22-35",
+                # "local_model_path": # Specify your local model path
                 "max_length": 128,
                 "device": "cuda",
-                # "data_path": "GAIR/lima",
+
+                # "data_path": "GAIR/lima", # Specify a Hugging Face data path if you wish to finetune the model from the start
+
+                # fine_tune_config (Optional): Configuration for fine-tuning the model. This dictionary
+                # can include hyperparameters and other training options that
+                # will be passed to the fine-tuning method. Defaults to None.
+                # "fine_tune_config":{
+                # "lora_config": {"r": 20, "lora_alpha": 40},
+                # "training_args": {"max_steps": 20, "logging_steps": 2}
+                # }
             },
         ],
     )
@@ -29,8 +40,14 @@ def main() -> None:
         model_config_name="my_custom_model",  # Use your custom model config name here
     )
     
-    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load gemma-2b-it from Hugging Face
-    dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset
+    dialog_agent.load_model(model_id = "google/gemma-2b", local_model_path = None) #load gemma-2b-it from Hugging Face
+    # dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset with default hyperparameters
+    
+    #fine-tune loaded model with lima dataset with customized hyperparameters (`fine_tune_config` argument is optional. Defaults to None.)
+    dialog_agent.fine_tune("GAIR/lima", fine_tune_config ={
+    "lora_config": {"r": 24, "lora_alpha": 48},
+    "training_args": {"max_steps": 30, "logging_steps": 3}
+    })
 
     user_agent = UserAgent()
 

From 3e8c46839233d7f061a8f036a1c6b90484c7b034 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 25 Apr 2024 13:15:23 +0800
Subject: [PATCH 03/32] added docstring and made changes in accordance with the
 comments

---
 .../huggingface_model.py                      | 115 +++++++++++++++---
 .../load_finetune_huggingface_model.py        |  13 +-
 2 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
index 878bf1f0c..fa1b178c4 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -11,34 +11,48 @@
 from typing import Optional
 
 class HuggingFaceWrapper(ModelWrapperBase):
+    """Wrapper for a Hugging Face transformer model.
+
+    This class is responsible for loading and fine-tuning pre-trained models from the Hugging Face library.
+    """
     model_type: str = "huggingface"  # Unique identifier for this model wrapper
 
     def __init__(self, config_name, model_id, max_length=512, data_path = None, device = None, local_model_path = None, fine_tune_config=None, **kwargs):
+        """Initializes the HuggingFaceWrapper with the given configuration.
+
+        Arguments:
+            config_name (str): Configuration name for model setup.
+            model_id (str): Identifier for the pre-trained model on Hugging Face.
+            max_length (int): Maximum sequence length for the model output per reply. Defaults to 512.
+            data_path (str, optional): Path to the dataset for fine-tuning the model.
+            device (torch.device, optional): Device to run the model on. Will default to GPU if available.
+            local_model_path (str, optional): Local file path to a pre-trained model.
+            fine_tune_config (dict, optional): Configuration for fine-tuning the model.
+            **kwargs: Additional keyword arguments.
+        """
         super().__init__(config_name=config_name)
         self.max_length = max_length  # Set max_length as an attribute
         self.model_id = model_id
-        # relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/.env")
         relative_path = os.path.join(os.path.dirname(__file__), "../load_finetune_huggingface_model/.env")
         dotenv_path = os.path.normpath(relative_path)
         _ = load_dotenv(dotenv_path) # read local .env file
-        huggingface_token  = os.getenv('HUGGINGFACE_TOKEN')
+        self.huggingface_token  = os.getenv('HUGGINGFACE_TOKEN')
 
-        self.huggingface_token = huggingface_token
         if device == None:
             self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         else:
             self.device = device
         try:
             if local_model_path == None:
-                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = huggingface_token, device_map="auto",)
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = huggingface_token)
+                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
                 print("load new model")
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
                     local_model_path, local_files_only=True,
                     device_map="auto"
                 )
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = huggingface_token)
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
                 print("load local model")
                 
             if data_path != None:
@@ -51,6 +65,22 @@ def __init__(self, config_name, model_id, max_length=512, data_path = None, devi
             raise
 
     def __call__(self, input, **kwargs) -> ModelResponse:
+        """Process the input data to generate a response from the model.
+
+        This method tokenizes the input text, generates a response using the model,
+        and then decodes the generated tokens into a string.
+
+        Arguments:
+            input (list): A list of dictionaries where each dictionary contains 'name', 'role' and 'content' keys and their respective values.
+            **kwargs: Additional keyword arguments for the model's generate function.
+
+        Returns:
+            ModelResponse: An object containing the generated text and raw model output.
+
+        Raises:
+            Exception: If an error occurs during text generation.
+        """
+
         try:
             # Tokenize the input text
             concatenated_input  = "\n".join([f"{d.get('name', 'System')}: {d['content']}" for d in input])
@@ -60,7 +90,7 @@ def __call__(self, input, **kwargs) -> ModelResponse:
             # Decode the generated tokens to a string
             generated_text = self.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
             
-            return ModelResponse(text=generated_text, raw={'model_id': self.model_id})
+            return ModelResponse(text=generated_text, raw = outputs)
         except Exception as e:
             logger.error(f"Generation error: {e}")
             raise
@@ -69,10 +99,14 @@ def load_model(self, model_id, local_model_path = None):
         """
         Load a new model for the agent from a local path and update the agent's model.
         
-        Parameters:
+        Arguments:
             local_model_path (str): The file path to the model to be loaded.
             model_id (str): An identifier for the model on Huggingface.
+        
+        Raises:
+            Exception: If the model cannot be loaded from the given path or identifier. Possible reasons include file not found, incorrect model ID, or network issues while fetching the model.
         """
+
         try:
             if local_model_path == None:
                 self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
@@ -98,8 +132,11 @@ def fine_tune(self, data_path, fine_tune_config=None):
         """
         Fine-tune the agent's model using data from the specified path.
         
-        Parameters:
-            data_path (str): The file path to the training data.
+        Arguments:
+            data_path (str): The file path to the training data from Hugging Face.
+            
+        Raises:
+            Exception: If the fine-tuning process fails. This could be due to issues with the data path, configuration parameters, or internal errors during the training process.
         """
         try:
             self.model = self.fine_tune_training(self.model, self.tokenizer, data_path, token = self.huggingface_token, fine_tune_config = fine_tune_config)
@@ -111,6 +148,28 @@ def fine_tune(self, data_path, fine_tune_config=None):
 
 
     def fine_tune_training(self, model, tokenizer, data_path, token, fine_tune_config=None):
+        
+        """
+        The actual method that handles training and fine-tuning the model on the dataset specified by the data_path using a given tokenizer.
+
+        Arguments:
+            model (AutoModelForCausalLM): The pre-trained causal language model from Hugging Face's transformers.
+            tokenizer (AutoTokenizer): The tokenizer corresponding to the pre-trained model.
+            data_path (str): The file path or dataset identifier to load the dataset from Hugging Face.
+            token (str): The authentication token for Hugging Face.
+            fine_tune_config (dict, optional): Configuration options for fine-tuning the model, including LoRA and training arguments.
+
+        Returns:
+            AutoModelForCausalLM: The fine-tuned language model.
+
+        Raises:
+            Exception: Raises an exception if the dataset loading or fine-tuning process fails.
+
+        Note:
+            This method updates the model in place and also logs the fine-tuning process.
+            It utilizes the LoRA configuration and custom training arguments to adapt the pre-trained model to the specific dataset.
+            The training log and trained model are saved in the same directory with the specific timestamp at saving time as part of the log/model fodler name. 
+        """
         from datasets import load_dataset
         from datetime import datetime
         import os
@@ -224,25 +283,39 @@ class Finetune_DialogAgent(DialogAgent):
     A dialog agent capable of fine-tuning its underlying model based on provided data.
 
     Inherits from DialogAgent and adds functionality for fine-tuning with custom hyperparameters.
-
-    Parameters:
-        name (str): Name of the agent.
-        sys_prompt (str): System prompt or description of the agent's role.
-        model_config_name (str): The configuration name for the underlying model.
-        use_memory (bool, optional): Whether to use memory for the agent. Defaults to True.
-        memory_config (dict, Optional): Configuration for the agent's memory. Defaults to None.
     """
+
     def __init__(self, name: str, sys_prompt: str, model_config_name: str, use_memory: bool = True, memory_config: Optional[dict] = None):
+        """
+        Initializes a new Finetune_DialogAgent with specified configuration.
+
+        Arguments:
+            name (str): Name of the agent.
+            sys_prompt (str): System prompt or description of the agent's role.
+            model_config_name (str): The configuration name for the underlying model.
+            use_memory (bool, optional): Indicates whether to utilize memory features. Defaults to True.
+            memory_config (dict, optional): Configuration for memory functionalities if `use_memory` is True.
+        
+        Note:
+            Refer to `class DialogAgent(AgentBase)` for more information.
+        """
+        
         super().__init__(name, sys_prompt, model_config_name, use_memory, memory_config)
+        
+        
 
     def load_model(self, model_id, local_model_path=None):
         """
         Load a new model into the agent.
 
-        Parameters:
+        Arguments:
             model_id (str): The Hugging Face model ID or a custom identifier.
             local_model_path (str, optional): Path to a locally saved model.
+        
+        Raises:
+            Exception: If the model loading process fails or if the model wrapper does not support dynamic loading.
         """
+
         if hasattr(self.model, "load_model"):
             self.model.load_model(model_id, local_model_path)
         else:
@@ -252,9 +325,13 @@ def fine_tune(self, data_path, fine_tune_config=None):
         """
         Fine-tune the agent's underlying model.
 
-        Parameters:
+        Arguments:
             data_path (str): The path to the training data.
+        
+        Raises:
+            Exception: If fine-tuning fails or if the model wrapper does not support fine-tuning.
         """
+
         if hasattr(self.model, "fine_tune"):
             self.model.fine_tune(data_path, fine_tune_config)
             logger.info("Fine-tuning completed successfully.")
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
index 69965a938..7a38654d4 100644
--- a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
@@ -19,16 +19,15 @@ def main() -> None:
                 # "local_model_path": # Specify your local model path
                 "max_length": 128,
                 "device": "cuda",
-
-                # "data_path": "GAIR/lima", # Specify a Hugging Face data path if you wish to finetune the model from the start
+                "data_path": "GAIR/lima", # Specify a Hugging Face data path if you wish to finetune the model from the start
 
                 # fine_tune_config (Optional): Configuration for fine-tuning the model. This dictionary
                 # can include hyperparameters and other training options that
                 # will be passed to the fine-tuning method. Defaults to None.
-                # "fine_tune_config":{
-                # "lora_config": {"r": 20, "lora_alpha": 40},
-                # "training_args": {"max_steps": 20, "logging_steps": 2}
-                # }
+                "fine_tune_config":{
+                "lora_config": {"r": 20, "lora_alpha": 40},
+                "training_args": {"max_steps": 20, "logging_steps": 2}
+                }
             },
         ],
     )
@@ -40,7 +39,7 @@ def main() -> None:
         model_config_name="my_custom_model",  # Use your custom model config name here
     )
     
-    dialog_agent.load_model(model_id = "google/gemma-2b", local_model_path = None) #load gemma-2b-it from Hugging Face
+    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load gemma-2b-it from Hugging Face
     # dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset with default hyperparameters
     
     #fine-tune loaded model with lima dataset with customized hyperparameters (`fine_tune_config` argument is optional. Defaults to None.)

From 10a9870b0dd70cda86ab6f2d41f79e49c68d4a04 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 25 Apr 2024 17:15:08 +0800
Subject: [PATCH 04/32] decoupled model loading and tokenizer loading. Now can
 load tokenizer from local.

---
 .../huggingface_model.py                      | 112 +++++++++++-------
 .../load_finetune_huggingface_model.py        |   9 +-
 2 files changed, 75 insertions(+), 46 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
index fa1b178c4..8c48f6ec0 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -17,7 +17,7 @@ class HuggingFaceWrapper(ModelWrapperBase):
     """
     model_type: str = "huggingface"  # Unique identifier for this model wrapper
 
-    def __init__(self, config_name, model_id, max_length=512, data_path = None, device = None, local_model_path = None, fine_tune_config=None, **kwargs):
+    def __init__(self, config_name, model_id = None, max_length=512, data_path = None, device = None, local_model_path = None, local_tokenizer_path = None, fine_tune_config=None, **kwargs):
         """Initializes the HuggingFaceWrapper with the given configuration.
 
         Arguments:
@@ -42,28 +42,13 @@ def __init__(self, config_name, model_id, max_length=512, data_path = None, devi
             self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         else:
             self.device = device
-        try:
-            if local_model_path == None:
-                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
-                print("load new model")
-            else:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    local_model_path, local_files_only=True,
-                    device_map="auto"
-                )
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
-                print("load local model")
-                
-            if data_path != None:
-                self.model = self.fine_tune_training(self.model, self.tokenizer, data_path,  token = self.huggingface_token, fine_tune_config = fine_tune_config)
-                
+        
+        self.load_model(model_id, local_model_path = local_model_path)
+        self.load_tokenizer(model_id, local_tokenizer_path = local_tokenizer_path)
+        
+        if data_path != None:
+            self.model = self.fine_tune_training(self.model, self.tokenizer, data_path,  token = self.huggingface_token, fine_tune_config = fine_tune_config)
                 
-            
-        except Exception as e:
-            logger.error(f"Failed to load model {model_id}: {e}")
-            raise
-
     def __call__(self, input, **kwargs) -> ModelResponse:
         """Process the input data to generate a response from the model.
 
@@ -110,23 +95,47 @@ def load_model(self, model_id, local_model_path = None):
         try:
             if local_model_path == None:
                 self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
                 print("new model")
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
                     local_model_path, local_files_only=True,
                     device_map="auto"
                 )
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
                 print("local model")
             
-            
-            # Optionally, log the successful model loading
+            #log the successful model loading
             logger.info(f"Successfully loaded new model '{model_id}' from '{local_model_path}'")
         except Exception as e:
             # Handle exceptions during model loading, such as file not found or load errors
             logger.error(f"Failed to load model '{model_id}' from '{local_model_path}': {e}")
             raise  # Or handle error appropriately
+    
+    def load_tokenizer(self, model_id, local_tokenizer_path = None):
+        """
+        Load the tokenizer from a local path.
+        
+        Arguments:
+            local_tokenizer_path (str): The file path to the tokenizer to be loaded.
+            model_id (str): An identifier for the model on Huggingface.
+        
+        Raises:
+            Exception: If the tokenizer cannot be loaded from the given path or identifier. Possible reasons include file not found, incorrect model ID, or network issues while fetching the tokenizer.
+        """
+
+        try:
+            if local_tokenizer_path == None:
+                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
+                print("new tokenizer")
+            else:
+                self.tokenizer = AutoTokenizer.from_pretrained(local_tokenizer_path)
+                print("local tokenizer")
+            
+            #log the successful tokenizer loading
+            logger.info(f"Successfully loaded new tokenizer for model '{model_id}' from '{local_tokenizer_path}'")
+        except Exception as e:
+            # Handle exceptions during model loading, such as file not found or load errors
+            logger.error(f"Failed to load tokenizer for model '{model_id}' from '{local_tokenizer_path}': {e}")
+            raise  # Or handle error appropriately
 
     def fine_tune(self, data_path, fine_tune_config=None):
         """
@@ -250,29 +259,29 @@ def formatting_prompts_func(example):
 
         # Specify the filename
         log_name = f"{model.config._name_or_path.split('/')[-1]}_{time_string}_log_history.json"
-
-        relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+log_name)
-        normalized_path = os.path.normpath(relative_path)
-
-        os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+        log_path = os.path.join(os.path.dirname(__file__), log_name)
 
         # Writing JSON data
-        with open(normalized_path, 'w') as f:
+        with open(log_path, 'w') as f:
             json.dump(trainer.state.log_history, f)
 
-        save_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
-        relative_path = os.path.join(os.path.dirname(__file__), "../../../examples/load_finetune_huggingface_model/"+save_name)
-        normalized_path = os.path.normpath(relative_path)
+        
 
-        os.makedirs(os.path.dirname(normalized_path), exist_ok=True)
+        # os.makedirs(os.path.dirname(model_path), exist_ok=True)
         # Check if directory exists
-        if not os.path.exists(normalized_path):
-            # If not, create the directory
-            os.makedirs(normalized_path)
+        # if not os.path.exists(model_path):
+        #     # If not, create the directory
+        #     os.makedirs(model_path)
 
         #save model
-        trainer.save_model(normalized_path)
-        # trainer.mode.save_config(save_path)
+        model_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
+        model_path = os.path.join(os.path.dirname(__file__), model_name)
+        trainer.save_model(model_path)
+
+        #save tokenizer
+        tokenizer_name = f"sft_{model.config._name_or_path.split('/')[-1]}_tokenizer_{time_string}"
+        tokenizer_path = os.path.join(os.path.dirname(__file__), tokenizer_name)
+        tokenizer.save_pretrained(tokenizer_path)
 
         
         return model
@@ -304,12 +313,12 @@ def __init__(self, name: str, sys_prompt: str, model_config_name: str, use_memor
         
         
 
-    def load_model(self, model_id, local_model_path=None):
+    def load_model(self, model_id = None, local_model_path=None):
         """
         Load a new model into the agent.
 
         Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier.
+            model_id (str): The Hugging Face model ID or a custom identifier. Needed if loading model from Hugging Face.
             local_model_path (str, optional): Path to a locally saved model.
         
         Raises:
@@ -321,6 +330,23 @@ def load_model(self, model_id, local_model_path=None):
         else:
             logger.error("The model wrapper does not support dynamic model loading.")
 
+    def load_tokenizer(self, model_id = None, local_tokenizer_path=None):
+        """
+        Load a new tokenizer for the agent.
+
+        Arguments:
+            model_id (str): The Hugging Face model ID or a custom identifier. Needed if loading tokenizer from Hugging Face.
+            local_tokenizer_path (str, optional): Path to a locally saved tokenizer.
+        
+        Raises:
+            Exception: If the model tokenizer process fails or if the model wrapper does not support dynamic loading.
+        """
+
+        if hasattr(self.model, "load_tokenizer"):
+            self.model.load_tokenizer(model_id, local_tokenizer_path)
+        else:
+            logger.error("The model wrapper does not support dynamic loading.")
+
     def fine_tune(self, data_path, fine_tune_config=None):
         """
         Fine-tune the agent's underlying model.
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
index 7a38654d4..31a53c947 100644
--- a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
@@ -15,8 +15,9 @@ def main() -> None:
             {
                 "model_type": "huggingface",
                 "config_name": "my_custom_model",
-                "model_id": "google/gemma-2b-it",  # Or another generative model of your choice
-                # "local_model_path": # Specify your local model path
+                # "model_id": "google/gemma-2b-it",  # Or another generative model of your choice. Needed from loading from Hugging Face.
+                # "local_model_path":  # Specify your local model path
+                # "local_tokenizer_path":  # Specify your local tokenizer path
                 "max_length": 128,
                 "device": "cuda",
                 "data_path": "GAIR/lima", # Specify a Hugging Face data path if you wish to finetune the model from the start
@@ -39,7 +40,9 @@ def main() -> None:
         model_config_name="my_custom_model",  # Use your custom model config name here
     )
     
-    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load gemma-2b-it from Hugging Face
+    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load model gemma-2b-it from Hugging Face
+    dialog_agent.load_tokenizer(model_id = "google/gemma-2b-it", local_tokenizer_path = None) #load tokenizer for gemma-2b-it from Hugging Face
+
     # dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset with default hyperparameters
     
     #fine-tune loaded model with lima dataset with customized hyperparameters (`fine_tune_config` argument is optional. Defaults to None.)

From 5237356c116efaaea78aa019e2bcf29aebfeea55 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 25 Apr 2024 18:28:41 +0800
Subject: [PATCH 05/32] removed unnecessary info in README

---
 examples/load_finetune_huggingface_model/README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/README.md b/examples/load_finetune_huggingface_model/README.md
index 400df461f..1373ed6c0 100644
--- a/examples/load_finetune_huggingface_model/README.md
+++ b/examples/load_finetune_huggingface_model/README.md
@@ -2,10 +2,6 @@
 
 This example demonstrates how to load and optionally fine-tune a Hugging Face model within a multi-agent conversation setup using AgentScope. The complete code is provided in `load_finetune_huggingface_model.py`.
 
-## Background
-
-In the context of AgentScope, agents are designed to mimic user and assistant roles in a conversation. This setup allows for the integration and testing of different models from the Hugging Face Hub, enhancing their capabilities through fine-tuning with custom datasets.
-
 ## Functionality Overview
 
 This example allows you to:

From a6918eb2b290fbf56fffb5c8d1c40f46822945b1 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 25 Apr 2024 22:31:22 +0800
Subject: [PATCH 06/32] resolved all issues flagged by `pre-commit run`

---
 .../huggingface_model.py                      | 498 ++++++++++++------
 .../load_finetune_huggingface_model.py        |  77 ++-
 2 files changed, 404 insertions(+), 171 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
index 8c48f6ec0..ceddb27b7 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -1,66 +1,125 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from agentscope.agents import DialogAgent
+# -*- coding: utf-8 -*-
+"""
+This module provides a HuggingFaceWrapper to manage
+and operate Hugging Face Transformers models, enabling loading,
+fine-tuning, and response generation. It includes the
+Finetune_DialogAgent class, which extends DialogAgent to
+enhance fine-tuning capabilities with custom hyperparameters.
+Key features include handling model and tokenizer operations,
+adapting to specialized datasets, and robust error management.
+
+Classes:
+- HuggingFaceWrapper: Manages Hugging Face models and tokenizers.
+- Finetune_DialogAgent: Extends DialogAgent for model fine-tuning.
+
+"""
+from typing import Optional, List, Dict, Any
+import os
 
-from agentscope.models import ModelWrapperBase, ModelResponse
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from loguru import logger
+from dotenv import load_dotenv
 
-import torch
-import os
-from dotenv import load_dotenv, find_dotenv
+from agentscope.agents import DialogAgent
+from agentscope.models import ModelWrapperBase, ModelResponse
 
-from typing import Optional
 
 class HuggingFaceWrapper(ModelWrapperBase):
     """Wrapper for a Hugging Face transformer model.
 
-    This class is responsible for loading and fine-tuning pre-trained models from the Hugging Face library.
+    This class is responsible for loading and fine-tuning
+    pre-trained models from the Hugging Face library.
     """
+
     model_type: str = "huggingface"  # Unique identifier for this model wrapper
 
-    def __init__(self, config_name, model_id = None, max_length=512, data_path = None, device = None, local_model_path = None, local_tokenizer_path = None, fine_tune_config=None, **kwargs):
+    def __init__(
+        self,
+        config_name: str,
+        model_id: Optional[str] = None,
+        max_length: int = 512,
+        data_path: Optional[str] = None,
+        device: Optional[torch.device] = None,
+        local_model_path: Optional[str] = None,
+        local_tokenizer_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
         """Initializes the HuggingFaceWrapper with the given configuration.
 
         Arguments:
             config_name (str): Configuration name for model setup.
-            model_id (str): Identifier for the pre-trained model on Hugging Face.
-            max_length (int): Maximum sequence length for the model output per reply. Defaults to 512.
-            data_path (str, optional): Path to the dataset for fine-tuning the model.
-            device (torch.device, optional): Device to run the model on. Will default to GPU if available.
-            local_model_path (str, optional): Local file path to a pre-trained model.
-            fine_tune_config (dict, optional): Configuration for fine-tuning the model.
+            model_id (str): Identifier for the pre-trained model on
+                            Hugging Face.
+            max_length (int): Maximum sequence length for the
+                              model output per reply.
+                              Defaults to 512.
+            data_path (str, optional): Path to the dataset for
+                                       fine-tuning the model.
+            device (torch.device, optional): Device to run the model on.
+                                             Default to GPU if available.
+            local_model_path (str, optional): Local file path to a
+                                              pre-trained model.
+            fine_tune_config (dict, optional): Configuration for
+                                               fine-tuning the model.
             **kwargs: Additional keyword arguments.
         """
         super().__init__(config_name=config_name)
+        self.model = None
         self.max_length = max_length  # Set max_length as an attribute
         self.model_id = model_id
-        relative_path = os.path.join(os.path.dirname(__file__), "../load_finetune_huggingface_model/.env")
+        relative_path = os.path.join(
+            os.path.dirname(__file__),
+            "../load_finetune_huggingface_model/.env",
+        )
         dotenv_path = os.path.normpath(relative_path)
-        _ = load_dotenv(dotenv_path) # read local .env file
-        self.huggingface_token  = os.getenv('HUGGINGFACE_TOKEN')
+        _ = load_dotenv(dotenv_path)  # read local .env file
+        self.huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
-        if device == None:
-            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        if device is None:
+            self.device = torch.device(
+                "cuda" if torch.cuda.is_available() else "cpu",
+            )
         else:
             self.device = device
-        
-        self.load_model(model_id, local_model_path = local_model_path)
-        self.load_tokenizer(model_id, local_tokenizer_path = local_tokenizer_path)
-        
-        if data_path != None:
-            self.model = self.fine_tune_training(self.model, self.tokenizer, data_path,  token = self.huggingface_token, fine_tune_config = fine_tune_config)
-                
-    def __call__(self, input, **kwargs) -> ModelResponse:
+
+        self.load_model(model_id, local_model_path=local_model_path)
+        self.load_tokenizer(
+            model_id,
+            local_tokenizer_path=local_tokenizer_path,
+        )
+
+        if data_path is not None:
+            self.model = self.fine_tune_training(
+                self.model,
+                self.tokenizer,
+                data_path,
+                token=self.huggingface_token,
+                fine_tune_config=fine_tune_config,
+            )
+
+    def __call__(
+        self,
+        inputs: List[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> ModelResponse:
         """Process the input data to generate a response from the model.
 
-        This method tokenizes the input text, generates a response using the model,
+        This method tokenizes the input text, generates
+        a response using the model,
         and then decodes the generated tokens into a string.
 
         Arguments:
-            input (list): A list of dictionaries where each dictionary contains 'name', 'role' and 'content' keys and their respective values.
-            **kwargs: Additional keyword arguments for the model's generate function.
+            input (list): A list of dictionaries where each dictionary contains
+                          'name', 'role' and 'content' keys
+                          and their respective values.
+            **kwargs: Additional keyword arguments for the
+                      model's generate function.
 
         Returns:
-            ModelResponse: An object containing the generated text and raw model output.
+            ModelResponse: An object containing the generated
+                           text and raw model output.
 
         Raises:
             Exception: If an error occurs during text generation.
@@ -68,123 +127,229 @@ def __call__(self, input, **kwargs) -> ModelResponse:
 
         try:
             # Tokenize the input text
-            concatenated_input  = "\n".join([f"{d.get('name', 'System')}: {d['content']}" for d in input])
-            input_ids = self.tokenizer.encode(f"{concatenated_input}\nAssistent: ", return_tensors='pt')
+            concatenated_input = "\n".join(
+                [f"{d.get('name', 'System')}: {d['content']}" for d in inputs],
+            )
+            input_ids = self.tokenizer.encode(
+                f"{concatenated_input}\nAssistent: ",
+                return_tensors="pt",
+            )
             # Generate response using the model
-            outputs = self.model.generate(input_ids.to(self.device), max_new_tokens = self.max_length, **kwargs)
+            outputs = self.model.generate(
+                input_ids.to(self.device),
+                max_new_tokens=self.max_length,
+                **kwargs,
+            )
             # Decode the generated tokens to a string
-            generated_text = self.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
-            
-            return ModelResponse(text=generated_text, raw = outputs)
+            generated_text = self.tokenizer.decode(
+                outputs[0][input_ids.shape[1]:],
+                skip_special_tokens=True,
+            )
+            return ModelResponse(text=generated_text, raw=outputs)
         except Exception as e:
             logger.error(f"Generation error: {e}")
             raise
 
-    def load_model(self, model_id, local_model_path = None):
+    def format(self, data: Any) -> Any:
         """
-        Load a new model for the agent from a local path and update the agent's model.
-        
+        Pass-through for data formatting. Assume
+        data is already in the correct format.
+
+        Arguments:
+            data (Any): Data to be formatted.
+
+        Returns:
+            Any: The input data unchanged.
+        """
+        return data
+
+    def load_model(
+        self,
+        model_id: Optional[str] = None,
+        local_model_path: Optional[str] = None,
+    ) -> None:
+        """
+        Load a new model for the agent from
+        a local path and update the agent's model.
+
         Arguments:
             local_model_path (str): The file path to the model to be loaded.
             model_id (str): An identifier for the model on Huggingface.
-        
+
         Raises:
-            Exception: If the model cannot be loaded from the given path or identifier. Possible reasons include file not found, incorrect model ID, or network issues while fetching the model.
+            Exception: If the model cannot be loaded from the given
+                       path or identifier.
+                       Possible reasons include file not found,
+                       incorrect model ID,
+                       or network issues while fetching the model.
         """
 
         try:
-            if local_model_path == None:
-                self.model = AutoModelForCausalLM.from_pretrained(model_id, token = self.huggingface_token, device_map="auto",)
+            if local_model_path is None:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    token=self.huggingface_token,
+                    device_map="auto",
+                )
                 print("new model")
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    local_model_path, local_files_only=True,
-                    device_map="auto"
+                    local_model_path,
+                    local_files_only=True,
+                    device_map="auto",
                 )
                 print("local model")
-            
-            #log the successful model loading
-            logger.info(f"Successfully loaded new model '{model_id}' from '{local_model_path}'")
+
+            # log the successful model loading
+            info_msg = (
+                f"Successfully loaded new model '{model_id}' from "
+                f"'{local_model_path}'"
+            )
+            logger.info(info_msg)
+
         except Exception as e:
-            # Handle exceptions during model loading, such as file not found or load errors
-            logger.error(f"Failed to load model '{model_id}' from '{local_model_path}': {e}")
-            raise  # Or handle error appropriately
-    
-    def load_tokenizer(self, model_id, local_tokenizer_path = None):
+            # Handle exceptions during model loading,
+            # such as file not found or load errors
+            error_msg = (
+                f"Failed to load model '{model_id}' "
+                f"from '{local_model_path}': {e}"
+            )
+
+            logger.error(error_msg)
+
+            raise
+
+    def load_tokenizer(
+        self,
+        model_id: Optional[str] = None,
+        local_tokenizer_path: Optional[str] = None,
+    ) -> None:
         """
         Load the tokenizer from a local path.
-        
+
         Arguments:
-            local_tokenizer_path (str): The file path to the tokenizer to be loaded.
+            local_tokenizer_path (str): The file path to the
+                                        tokenizer to be loaded.
             model_id (str): An identifier for the model on Huggingface.
-        
+
         Raises:
-            Exception: If the tokenizer cannot be loaded from the given path or identifier. Possible reasons include file not found, incorrect model ID, or network issues while fetching the tokenizer.
+            Exception: If the tokenizer cannot be loaded from the
+            given path or identifier. Possible reasons include file not found,
+            incorrect model ID, or network issues while fetching the tokenizer.
         """
 
         try:
-            if local_tokenizer_path == None:
-                self.tokenizer = AutoTokenizer.from_pretrained(model_id, token = self.huggingface_token)
+            if local_tokenizer_path is None:
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_id,
+                    token=self.huggingface_token,
+                )
                 print("new tokenizer")
             else:
-                self.tokenizer = AutoTokenizer.from_pretrained(local_tokenizer_path)
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    local_tokenizer_path,
+                )
                 print("local tokenizer")
-            
-            #log the successful tokenizer loading
-            logger.info(f"Successfully loaded new tokenizer for model '{model_id}' from '{local_tokenizer_path}'")
+
+            # log the successful tokenizer loading
+            logger.info(
+                f"Successfully loaded new tokenizer for model '{model_id}' "
+                f"from '{local_tokenizer_path}'",
+            )
+
         except Exception as e:
-            # Handle exceptions during model loading, such as file not found or load errors
-            logger.error(f"Failed to load tokenizer for model '{model_id}' from '{local_tokenizer_path}': {e}")
-            raise  # Or handle error appropriately
+            # Handle exceptions during model loading,
+            # such as file not found or load errors
+            error_message = (
+                f"Failed to load tokenizer for model '{model_id}' from "
+                f"'{local_tokenizer_path}': {e}"
+            )
+            logger.error(error_message)
+
+            raise
 
-    def fine_tune(self, data_path, fine_tune_config=None):
+    def fine_tune(
+        self,
+        data_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> None:
         """
         Fine-tune the agent's model using data from the specified path.
-        
+
         Arguments:
-            data_path (str): The file path to the training data from Hugging Face.
-            
+            data_path (str): The file path to the training
+                             data from Hugging Face.
+
         Raises:
-            Exception: If the fine-tuning process fails. This could be due to issues with the data path, configuration parameters, or internal errors during the training process.
+            Exception: If the fine-tuning process fails. This could be
+            due to issues with the data path, configuration parameters,
+            or internal errors during the training process.
         """
         try:
-            self.model = self.fine_tune_training(self.model, self.tokenizer, data_path, token = self.huggingface_token, fine_tune_config = fine_tune_config)
-            
-            logger.info(f"Successfully fine-tuned model with data from '{data_path}'")
+            self.model = self.fine_tune_training(
+                self.model,
+                self.tokenizer,
+                data_path,
+                token=self.huggingface_token,
+                fine_tune_config=fine_tune_config,
+            )
+
+            logger.info(
+                f"Successfully fine-tuned model with data from '{data_path}'",
+            )
         except Exception as e:
-            logger.error(f"Failed to fine-tune model with data from '{data_path}': {e}")
-            raise  # Or handle the error appropriately
-
+            logger.error(
+                f"Failed to fine-tune model with data from '{data_path}': {e}",
+            )
+            raise
 
-    def fine_tune_training(self, model, tokenizer, data_path, token, fine_tune_config=None):
-        
+    def fine_tune_training(
+        self,
+        model: AutoModelForCausalLM,
+        tokenizer: AutoTokenizer,
+        data_path: Optional[str] = None,
+        token: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> AutoModelForCausalLM:
         """
-        The actual method that handles training and fine-tuning the model on the dataset specified by the data_path using a given tokenizer.
+        The actual method that handles training and fine-tuning
+        the model on the dataset specified by
+        the data_path using a given tokenizer.
 
         Arguments:
-            model (AutoModelForCausalLM): The pre-trained causal language model from Hugging Face's transformers.
-            tokenizer (AutoTokenizer): The tokenizer corresponding to the pre-trained model.
-            data_path (str): The file path or dataset identifier to load the dataset from Hugging Face.
+            model (AutoModelForCausalLM): The pre-trained causal language model
+                                          from Hugging Face's transformers.
+            tokenizer (AutoTokenizer): The tokenizer corresponding to
+                                       the pre-trained model.
+            data_path (str): The file path or dataset identifier to load
+                             the dataset from Hugging Face.
             token (str): The authentication token for Hugging Face.
-            fine_tune_config (dict, optional): Configuration options for fine-tuning the model, including LoRA and training arguments.
+            fine_tune_config (dict, optional): Configuration options for
+                                               fine-tuning the model,
+                                               including LoRA and training
+                                               arguments.
 
         Returns:
             AutoModelForCausalLM: The fine-tuned language model.
 
         Raises:
-            Exception: Raises an exception if the dataset loading or fine-tuning process fails.
+            Exception: Raises an exception if the dataset
+                       loading or fine-tuning process fails.
 
         Note:
-            This method updates the model in place and also logs the fine-tuning process.
-            It utilizes the LoRA configuration and custom training arguments to adapt the pre-trained model to the specific dataset.
-            The training log and trained model are saved in the same directory with the specific timestamp at saving time as part of the log/model fodler name. 
+            This method updates the model in place and also logs
+            the fine-tuning process.
+            It utilizes the LoRA configuration and custom training arguments
+            to adapt the pre-trained model to the specific dataset.
+            The training log and trained model are saved in the same
+            directory with the specific timestamp at saving time
+            as part of the log/model fodler name.
         """
         from datasets import load_dataset
         from datetime import datetime
-        import os
         import json
 
-        dataset = load_dataset(data_path, token = token)
+        dataset = load_dataset(data_path, token=token)
 
         from peft import LoraConfig
 
@@ -193,13 +358,12 @@ def fine_tune_training(self, model, tokenizer, data_path, token, fine_tune_confi
             "lora_alpha": 32,
             "lora_dropout": 0.05,
             "bias": "none",
-            "task_type": "CAUSAL_LM"
+            "task_type": "CAUSAL_LM",
         }
 
         if fine_tune_config is not None:
-            if fine_tune_config['lora_config'] is not None:
-                lora_config_default.update(fine_tune_config['lora_config'])
-
+            if fine_tune_config["lora_config"] is not None:
+                lora_config_default.update(fine_tune_config["lora_config"])
 
         training_defaults = {
             "per_device_train_batch_size": 1,
@@ -215,27 +379,33 @@ def fine_tune_training(self, model, tokenizer, data_path, token, fine_tune_confi
         }
 
         if fine_tune_config is not None:
-            if fine_tune_config['training_args'] is not None:
-                training_defaults.update(fine_tune_config['training_args'])
+            if fine_tune_config["training_args"] is not None:
+                training_defaults.update(fine_tune_config["training_args"])
 
         from peft import get_peft_model
 
-        
         lora_config = LoraConfig(**lora_config_default)
         model = get_peft_model(model, lora_config)
 
         from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
         import transformers
 
-        def formatting_prompts_func(example):
+        def formatting_prompts_func(
+            example: Dict[str, List[List[str]]],
+        ) -> List[str]:
             output_texts = []
-            for i in range(len(example['conversations'])):
-                text = f"### Question: {example['conversations'][i][0]}\n ### Answer: {example['conversations'][i][1]}"
+            for i in range(len(example["conversations"])):
+                question = f"### Question: {example['conversations'][i][0]}"
+                answer = f"### Answer: {example['conversations'][i][1]}"
+                text = f"{question}\n {answer}"
                 output_texts.append(text)
             return output_texts
 
         response_template = " ### Answer:"
-        collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
+        collator = DataCollatorForCompletionOnlyLM(
+            response_template,
+            tokenizer=tokenizer,
+        )
 
         trainer_args = transformers.TrainingArguments(**training_defaults)
 
@@ -253,93 +423,122 @@ def formatting_prompts_func(example):
 
         trainer.train()
 
-        
         now = datetime.now()
-        time_string = now.strftime('%Y-%m-%d_%H-%M-%S')
+        time_string = now.strftime("%Y-%m-%d_%H-%M-%S")
 
         # Specify the filename
-        log_name = f"{model.config._name_or_path.split('/')[-1]}_{time_string}_log_history.json"
+        log_name_temp = model.config.name_or_path.split("/")[-1]
+        log_name = f"{log_name_temp}_{time_string}_log_history.json"
         log_path = os.path.join(os.path.dirname(__file__), log_name)
 
-        # Writing JSON data
-        with open(log_path, 'w') as f:
+        # log training history
+        with open(log_path, "w", encoding="utf-8") as f:
             json.dump(trainer.state.log_history, f)
 
-        
-
-        # os.makedirs(os.path.dirname(model_path), exist_ok=True)
-        # Check if directory exists
-        # if not os.path.exists(model_path):
-        #     # If not, create the directory
-        #     os.makedirs(model_path)
-
-        #save model
-        model_name = f"sft_{model.config._name_or_path.split('/')[-1]}_{time_string}"
+        # save model
+        model_name = (
+            f"sft_{model.config.name_or_path.split('/')[-1]}_{time_string}"
+        )
         model_path = os.path.join(os.path.dirname(__file__), model_name)
         trainer.save_model(model_path)
 
-        #save tokenizer
-        tokenizer_name = f"sft_{model.config._name_or_path.split('/')[-1]}_tokenizer_{time_string}"
-        tokenizer_path = os.path.join(os.path.dirname(__file__), tokenizer_name)
+        # save tokenizer
+        tokenizer_name_temp = model.config.name_or_path.split("/")[-1]
+        tokenizer_name = f"sft_{tokenizer_name_temp}_tokenizer_{time_string}"
+        tokenizer_path = os.path.join(
+            os.path.dirname(__file__),
+            tokenizer_name,
+        )
         tokenizer.save_pretrained(tokenizer_path)
 
-        
         return model
 
 
 class Finetune_DialogAgent(DialogAgent):
     """
-    A dialog agent capable of fine-tuning its underlying model based on provided data.
+    A dialog agent capable of fine-tuning its
+    underlying model based on provided data.
 
-    Inherits from DialogAgent and adds functionality for fine-tuning with custom hyperparameters.
+    Inherits from DialogAgent and adds functionality for
+    fine-tuning with custom hyperparameters.
     """
 
-    def __init__(self, name: str, sys_prompt: str, model_config_name: str, use_memory: bool = True, memory_config: Optional[dict] = None):
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        use_memory: bool = True,
+        memory_config: Optional[dict] = None,
+    ):
         """
         Initializes a new Finetune_DialogAgent with specified configuration.
 
         Arguments:
             name (str): Name of the agent.
             sys_prompt (str): System prompt or description of the agent's role.
-            model_config_name (str): The configuration name for the underlying model.
-            use_memory (bool, optional): Indicates whether to utilize memory features. Defaults to True.
-            memory_config (dict, optional): Configuration for memory functionalities if `use_memory` is True.
-        
+            model_config_name (str): The configuration name for
+                                     the underlying model.
+            use_memory (bool, optional): Indicates whether to utilize
+                                         memory features. Defaults to True.
+            memory_config (dict, optional): Configuration for memory
+                                            functionalities if
+                                            `use_memory` is True.
+
         Note:
             Refer to `class DialogAgent(AgentBase)` for more information.
         """
-        
-        super().__init__(name, sys_prompt, model_config_name, use_memory, memory_config)
-        
-        
 
-    def load_model(self, model_id = None, local_model_path=None):
+        super().__init__(
+            name,
+            sys_prompt,
+            model_config_name,
+            use_memory,
+            memory_config,
+        )
+
+    def load_model(
+        self,
+        model_id: Optional[str] = None,
+        local_model_path: Optional[str] = None,
+    ) -> None:
         """
         Load a new model into the agent.
 
         Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier. Needed if loading model from Hugging Face.
+            model_id (str): The Hugging Face model ID or a custom identifier.
+                            Needed if loading model from Hugging Face.
             local_model_path (str, optional): Path to a locally saved model.
-        
+
         Raises:
-            Exception: If the model loading process fails or if the model wrapper does not support dynamic loading.
+            Exception: If the model loading process fails or if the
+                       model wrapper does not support dynamic loading.
         """
 
         if hasattr(self.model, "load_model"):
             self.model.load_model(model_id, local_model_path)
         else:
-            logger.error("The model wrapper does not support dynamic model loading.")
-
-    def load_tokenizer(self, model_id = None, local_tokenizer_path=None):
+            logger.error(
+                "The model wrapper does not support dynamic model loading.",
+            )
+
+    def load_tokenizer(
+        self,
+        model_id: Optional[str] = None,
+        local_tokenizer_path: Optional[str] = None,
+    ) -> None:
         """
         Load a new tokenizer for the agent.
 
         Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier. Needed if loading tokenizer from Hugging Face.
-            local_tokenizer_path (str, optional): Path to a locally saved tokenizer.
-        
+            model_id (str): The Hugging Face model ID or a custom identifier.
+                            Needed if loading tokenizer from Hugging Face.
+            local_tokenizer_path (str, optional): Path to a locally saved
+                                                  tokenizer.
+
         Raises:
-            Exception: If the model tokenizer process fails or if the model wrapper does not support dynamic loading.
+            Exception: If the model tokenizer process fails or if the
+                       model wrapper does not support dynamic loading.
         """
 
         if hasattr(self.model, "load_tokenizer"):
@@ -347,15 +546,20 @@ def load_tokenizer(self, model_id = None, local_tokenizer_path=None):
         else:
             logger.error("The model wrapper does not support dynamic loading.")
 
-    def fine_tune(self, data_path, fine_tune_config=None):
+    def fine_tune(
+        self,
+        data_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> None:
         """
         Fine-tune the agent's underlying model.
 
         Arguments:
             data_path (str): The path to the training data.
-        
+
         Raises:
-            Exception: If fine-tuning fails or if the model wrapper does not support fine-tuning.
+            Exception: If fine-tuning fails or if the
+                       model wrapper does not support fine-tuning.
         """
 
         if hasattr(self.model, "fine_tune"):
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
index 31a53c947..b16f51f26 100644
--- a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
@@ -1,34 +1,50 @@
+# -*- coding: utf-8 -*-
+"""
+This script sets up a conversational agent using
+AgentScope with a Hugging Face model.
+It includes initializing a Finetune_DialogAgent,
+loading and fine-tuning a pre-trained model,
+and conducting a dialogue via a sequential pipeline.
+The conversation continues until the user exits.
+Features include model and tokenizer loading,
+and fine-tuning on the GAIR/lima dataset with adjustable parameters.
+"""
+from huggingface_model import Finetune_DialogAgent
+
 import agentscope
 from agentscope.agents.user_agent import UserAgent
 from agentscope.pipelines.functional import sequentialpipeline
-from huggingface_model import Finetune_DialogAgent
 
 
 def main() -> None:
     """A basic conversation demo with a custom model"""
 
     # Initialize AgentScope with your custom model configuration
-    
 
     agentscope.init(
         model_configs=[
             {
                 "model_type": "huggingface",
                 "config_name": "my_custom_model",
-                # "model_id": "google/gemma-2b-it",  # Or another generative model of your choice. Needed from loading from Hugging Face.
+                # Or another generative model of your choice.
+                # Needed from loading from Hugging Face.
+                "model_id": "google/gemma-2b-it",
                 # "local_model_path":  # Specify your local model path
                 # "local_tokenizer_path":  # Specify your local tokenizer path
                 "max_length": 128,
                 "device": "cuda",
-                "data_path": "GAIR/lima", # Specify a Hugging Face data path if you wish to finetune the model from the start
-
-                # fine_tune_config (Optional): Configuration for fine-tuning the model. This dictionary
-                # can include hyperparameters and other training options that
-                # will be passed to the fine-tuning method. Defaults to None.
-                "fine_tune_config":{
-                "lora_config": {"r": 20, "lora_alpha": 40},
-                "training_args": {"max_steps": 20, "logging_steps": 2}
-                }
+                # Specify a Hugging Face data path if you
+                # wish to finetune the model from the start
+                "data_path": "GAIR/lima",
+                # fine_tune_config (Optional): Configuration for
+                # fine-tuning the model.
+                # This dictionary can include hyperparameters and other
+                # training options that will be passed to the
+                # fine-tuning method. Defaults to None.
+                "fine_tune_config": {
+                    "lora_config": {"r": 20, "lora_alpha": 40},
+                    "training_args": {"max_steps": 20, "logging_steps": 2},
+                },
             },
         ],
     )
@@ -37,19 +53,31 @@ def main() -> None:
     dialog_agent = Finetune_DialogAgent(
         name="Assistant",
         sys_prompt="You're a helpful assistant.",
-        model_config_name="my_custom_model",  # Use your custom model config name here
+        # Use your custom model config name here
+        model_config_name="my_custom_model",
+    )
+
+    dialog_agent.load_model(
+        model_id="google/gemma-2b-it",
+        local_model_path=None,
+    )  # load model gemma-2b-it from Hugging Face
+    dialog_agent.load_tokenizer(
+        model_id="google/gemma-2b-it",
+        local_tokenizer_path=None,
+    )  # load tokenizer for gemma-2b-it from Hugging Face
+
+    # fine-tune loaded model with lima dataset with default hyperparameters
+    # dialog_agent.fine_tune(data_path=  "GAIR/lima")
+
+    # fine-tune loaded model with lima dataset with customized hyperparameters
+    # (`fine_tune_config` argument is optional. Defaults to None.)
+    dialog_agent.fine_tune(
+        "GAIR/lima",
+        fine_tune_config={
+            "lora_config": {"r": 24, "lora_alpha": 48},
+            "training_args": {"max_steps": 30, "logging_steps": 3},
+        },
     )
-    
-    dialog_agent.load_model(model_id = "google/gemma-2b-it", local_model_path = None) #load model gemma-2b-it from Hugging Face
-    dialog_agent.load_tokenizer(model_id = "google/gemma-2b-it", local_tokenizer_path = None) #load tokenizer for gemma-2b-it from Hugging Face
-
-    # dialog_agent.fine_tune(data_path=  "GAIR/lima") #fine-tune loaded model with lima dataset with default hyperparameters
-    
-    #fine-tune loaded model with lima dataset with customized hyperparameters (`fine_tune_config` argument is optional. Defaults to None.)
-    dialog_agent.fine_tune("GAIR/lima", fine_tune_config ={
-    "lora_config": {"r": 24, "lora_alpha": 48},
-    "training_args": {"max_steps": 30, "logging_steps": 3}
-    })
 
     user_agent = UserAgent()
 
@@ -58,5 +86,6 @@ def main() -> None:
     while x is None or x.content != "exit":
         x = sequentialpipeline([dialog_agent, user_agent], x)
 
+
 if __name__ == "__main__":
     main()

From b4f4f40ae56896b73b735d84d6d0c65137ed8517 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 25 Apr 2024 22:40:13 +0800
Subject: [PATCH 07/32] further removed info irrelevant to model loading and
 finetuning

---
 examples/load_finetune_huggingface_model/README.md | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/README.md b/examples/load_finetune_huggingface_model/README.md
index 1373ed6c0..bdb6172d6 100644
--- a/examples/load_finetune_huggingface_model/README.md
+++ b/examples/load_finetune_huggingface_model/README.md
@@ -4,15 +4,7 @@ This example demonstrates how to load and optionally fine-tune a Hugging Face mo
 
 ## Functionality Overview
 
-This example allows you to:
-
-- Set up a user agent and an assistant agent for interactive conversations.
-- Modify the `sys_prompt` to customize the assistant agent's role.
-- Terminate the conversation by entering "exit".
-
-## Advanced Features
-
-Beyond basic conversation setup, the example introduces advanced functionalities:
+Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
 
 - Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
 - Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.

From e33b3ded915b2c75375a27c1de27ea27c2cd1fef Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Fri, 26 Apr 2024 12:43:04 +0800
Subject: [PATCH 08/32] Update huggingface_model.py

fixed issue related to `format` method
---
 .../huggingface_model.py                      | 96 ++++++++++++++-----
 1 file changed, 70 insertions(+), 26 deletions(-)

diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/load_finetune_huggingface_model/huggingface_model.py
index ceddb27b7..1a95ce7eb 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/load_finetune_huggingface_model/huggingface_model.py
@@ -13,7 +13,7 @@
 - Finetune_DialogAgent: Extends DialogAgent for model fine-tuning.
 
 """
-from typing import Optional, List, Dict, Any
+from typing import Sequence, Any, Union, List, Optional, Dict
 import os
 
 import torch
@@ -23,6 +23,8 @@
 
 from agentscope.agents import DialogAgent
 from agentscope.models import ModelWrapperBase, ModelResponse
+from agentscope.message import MessageBase
+from agentscope.utils.tools import _convert_to_str
 
 
 class HuggingFaceWrapper(ModelWrapperBase):
@@ -127,11 +129,11 @@ def __call__(
 
         try:
             # Tokenize the input text
-            concatenated_input = "\n".join(
-                [f"{d.get('name', 'System')}: {d['content']}" for d in inputs],
+            concatenated_input = "\n ".join(
+                [f"{d.get('role')}: {d['content']}" for d in inputs],
             )
             input_ids = self.tokenizer.encode(
-                f"{concatenated_input}\nAssistent: ",
+                f"{concatenated_input}\n assistent: ",
                 return_tensors="pt",
             )
             # Generate response using the model
@@ -150,18 +152,56 @@ def __call__(
             logger.error(f"Generation error: {e}")
             raise
 
-    def format(self, data: Any) -> Any:
-        """
-        Pass-through for data formatting. Assume
-        data is already in the correct format.
+    def format(
+        self,
+        *args: Union[MessageBase, Sequence[MessageBase]],
+    ) -> List[dict]:
+        """A basic strategy to format the input into the required format of
+        Hugging Face models.
 
-        Arguments:
-            data (Any): Data to be formatted.
+        Args:
+            args (`Union[MessageBase, Sequence[MessageBase]]`):
+                The input arguments to be formatted, where each argument
+                should be a `Msg` object, or a list of `Msg` objects.
+                In distribution, placeholder is also allowed.
 
         Returns:
-            Any: The input data unchanged.
+            `List[dict]`:
+                The formatted messages.
         """
-        return data
+        huggingface_msgs = []
+        for msg in args:
+            if msg is None:
+                continue
+            if isinstance(msg, MessageBase):
+                # content shouldn't be empty string
+                if msg.content == "":
+                    logger.warning(
+                        "The content field cannot be "
+                        "empty string. To avoid error, the empty string is "
+                        "replaced by a blank space automatically, but the "
+                        "model may not work as expected.",
+                    )
+                    msg.content = " "
+
+                huggingface_msg = {
+                    "role": msg.role,
+                    "content": _convert_to_str(msg.content),
+                }
+
+                # image url
+                if msg.url is not None:
+                    huggingface_msg["images"] = [msg.url]
+
+                huggingface_msgs.append(huggingface_msg)
+            elif isinstance(msg, list):
+                huggingface_msgs.extend(self.format(*msg))
+            else:
+                raise TypeError(
+                    f"Invalid message type: {type(msg)}, `Msg` is expected.",
+                )
+
+        return huggingface_msgs
 
     def load_model(
         self,
@@ -191,20 +231,22 @@ def load_model(
                     token=self.huggingface_token,
                     device_map="auto",
                 )
-                print("new model")
+                info_msg = (
+                    f"Successfully loaded new model '{model_id}' from "
+                    f"Hugging Face"
+                )
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
                     local_model_path,
                     local_files_only=True,
                     device_map="auto",
                 )
-                print("local model")
+                info_msg = (
+                    f"Successfully loaded new model '{model_id}' from "
+                    f"'{local_model_path}'"
+                )
 
             # log the successful model loading
-            info_msg = (
-                f"Successfully loaded new model '{model_id}' from "
-                f"'{local_model_path}'"
-            )
             logger.info(info_msg)
 
         except Exception as e:
@@ -244,18 +286,20 @@ def load_tokenizer(
                     model_id,
                     token=self.huggingface_token,
                 )
-                print("new tokenizer")
+                # log the successful tokenizer loading
+                logger.info(
+                    f"Successfully loaded new tokenizer for model "
+                    f"'{model_id}' from Hugging Face",
+                )
             else:
                 self.tokenizer = AutoTokenizer.from_pretrained(
                     local_tokenizer_path,
                 )
-                print("local tokenizer")
-
-            # log the successful tokenizer loading
-            logger.info(
-                f"Successfully loaded new tokenizer for model '{model_id}' "
-                f"from '{local_tokenizer_path}'",
-            )
+                # log the successful tokenizer loading
+                logger.info(
+                    f"Successfully loaded new tokenizer for model "
+                    f"'{model_id}' from '{local_tokenizer_path}'",
+                )
 
         except Exception as e:
             # Handle exceptions during model loading,

From 80238205d6fb9e9d63e61f8b62026cfb97d34123 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 2 May 2024 11:49:19 +0800
Subject: [PATCH 09/32] updated according to suggestions given

---
 .../configs/model_configs.json                |  18 ++
 ...rsation_with_agent_with_finetuned_model.py |  98 +++++++++
 .../finetune_dialogagent.py                   | 132 ++++++++++++
 .../huggingface_model.py                      | 188 +++++-------------
 .../load_finetune_huggingface_model/README.md |  42 ----
 .../load_finetune_huggingface_model.py        |  91 ---------
 6 files changed, 298 insertions(+), 271 deletions(-)
 create mode 100644 examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
 create mode 100644 examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
 create mode 100644 examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
 rename examples/{load_finetune_huggingface_model => conversation_with_agent_with_finetuned_model}/huggingface_model.py (77%)
 delete mode 100644 examples/load_finetune_huggingface_model/README.md
 delete mode 100644 examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py

diff --git a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
new file mode 100644
index 000000000..3b59c8318
--- /dev/null
+++ b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
@@ -0,0 +1,18 @@
+[
+    {
+        "model_type": "huggingface",
+        "config_name": "my_custom_model",
+
+        "model_id": "openlm-research/open_llama_3b_v2",
+
+        "max_length": 128,
+        "device": "cuda",
+
+        "data_path": "databricks/databricks-dolly-15k",
+
+        "fine_tune_config": {
+            "lora_config": {"r": 20, "lora_alpha": 40},
+            "training_args": {"max_steps": 1000, "logging_steps": 1}
+        }
+    }
+]
\ No newline at end of file
diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
new file mode 100644
index 000000000..989e8525f
--- /dev/null
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+This script sets up a conversational agent using
+AgentScope with a Hugging Face model.
+It includes initializing a Finetune_DialogAgent,
+loading and fine-tuning a pre-trained model,
+and conducting a dialogue via a sequential pipeline.
+The conversation continues until the user exits.
+Features include model and tokenizer loading,
+and fine-tuning on the databricks-dolly-15k dataset with adjustable parameters.
+"""
+from finetune_dialogagent import Finetune_DialogAgent
+
+import agentscope
+from agentscope.agents.user_agent import UserAgent
+from agentscope.pipelines.functional import sequentialpipeline
+
+
+def main() -> None:
+    """A basic conversation demo with a custom model"""
+
+    # Initialize AgentScope with your custom model configuration
+
+    # agentscope.init(
+    #     model_configs=[
+    #         {
+    #             "model_type": "huggingface",
+    #             "config_name": "my_custom_model",
+    #             # Or another generative model of your choice.
+    #             # Needed from loading from Hugging Face.
+    #             "model_id": "openlm-research/open_llama_3b_v2",
+    #             # "local_model_path":  # Specify your local model path
+    #             # "local_tokenizer_path":  # Specify your local tokenizer path
+    #             "max_length": 128,
+    #             "device": "cuda",
+    #             # Specify a Hugging Face data path if you
+    #             # wish to finetune the model from the start
+    #             "data_path": "databricks/databricks-dolly-15k",
+    #             # fine_tune_config (Optional): Configuration for
+    #             # fine-tuning the model.
+    #             # This dictionary can include hyperparameters and other
+    #             # training options that will be passed to the
+    #             # fine-tuning method. Defaults to None.
+    #             # `lora_config` and `training_args` follow 
+    #             # the standard lora and sfttrainer fields.
+    #             "fine_tune_config": {
+    #                 "lora_config": {"r": 20, "lora_alpha": 40},
+    #                 "training_args": {"max_steps": 1000, "logging_steps": 1},
+    #             },
+    #         },
+    #     ],
+    # )
+
+    # alternatively can load `model_configs` from json file
+    agentscope.init(
+    model_configs="./configs/model_configs.json",
+    )
+
+    # Init agents with the custom model
+    dialog_agent = Finetune_DialogAgent(
+        name="Assistant",
+        sys_prompt="You're a helpful assistant.",
+        # Use your custom model config name here
+        model_config_name="my_custom_model",
+    )
+
+    dialog_agent.load_model(
+        model_id="openlm-research/open_llama_3b_v2",
+        local_model_path=None,
+    )  # load model gemma-2b-it from Hugging Face
+    dialog_agent.load_tokenizer(
+        model_id="openlm-research/open_llama_3b_v2",
+        local_tokenizer_path=None,
+    )  # load tokenizer for gemma-2b-it from Hugging Face
+
+    # fine-tune loaded model with databricks-dolly-15k dataset with default hyperparameters
+    dialog_agent.fine_tune(data_path="databricks/databricks-dolly-15k")
+
+    # fine-tune loaded model with databricks-dolly-15k dataset with customized hyperparameters
+    # (`fine_tune_config` argument is optional. Defaults to None.)
+    dialog_agent.fine_tune(
+        "databricks/databricks-dolly-15k",
+        fine_tune_config={
+            "lora_config": {"r": 24, "lora_alpha": 48},
+            "training_args": {"max_steps": 30, "logging_steps": 3},
+        },
+    )
+
+    user_agent = UserAgent()
+
+    # Start the conversation between user and assistant
+    x = None
+    while x is None or x.content != "exit":
+        x = sequentialpipeline([dialog_agent, user_agent], x)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
new file mode 100644
index 000000000..1059cc99e
--- /dev/null
+++ b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+This module provides the Finetune_DialogAgent class, 
+which extends DialogAgent to enhance fine-tuning 
+capabilities with custom hyperparameters.
+"""
+from typing import Sequence, Any, Union, List, Optional, Dict
+import os
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from loguru import logger
+from dotenv import load_dotenv
+
+from agentscope.agents import DialogAgent
+from agentscope.models import ModelWrapperBase, ModelResponse
+from agentscope.message import MessageBase
+from agentscope.utils.tools import _convert_to_str
+
+class Finetune_DialogAgent(DialogAgent):
+    """
+    A dialog agent capable of fine-tuning its
+    underlying model based on provided data.
+
+    Inherits from DialogAgent and adds functionality for
+    fine-tuning with custom hyperparameters.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        use_memory: bool = True,
+        memory_config: Optional[dict] = None,
+    ):
+        """
+        Initializes a new Finetune_DialogAgent with specified configuration.
+
+        Arguments:
+            name (str): Name of the agent.
+            sys_prompt (str): System prompt or description of the agent's role.
+            model_config_name (str): The configuration name for
+                                     the underlying model.
+            use_memory (bool, optional): Indicates whether to utilize
+                                         memory features. Defaults to True.
+            memory_config (dict, optional): Configuration for memory
+                                            functionalities if
+                                            `use_memory` is True.
+
+        Note:
+            Refer to `class DialogAgent(AgentBase)` for more information.
+        """
+
+        super().__init__(
+            name,
+            sys_prompt,
+            model_config_name,
+            use_memory,
+            memory_config,
+        )
+
+    def load_model(
+        self,
+        model_id: Optional[str] = None,
+        local_model_path: Optional[str] = None,
+    ) -> None:
+        """
+        Load a new model into the agent.
+
+        Arguments:
+            model_id (str): The Hugging Face model ID or a custom identifier.
+                            Needed if loading model from Hugging Face.
+            local_model_path (str, optional): Path to a locally saved model.
+
+        Raises:
+            Exception: If the model loading process fails or if the
+                       model wrapper does not support dynamic loading.
+        """
+
+        if hasattr(self.model, "load_model"):
+            self.model.load_model(model_id, local_model_path)
+        else:
+            logger.error(
+                "The model wrapper does not support dynamic model loading.",
+            )
+
+    def load_tokenizer(
+        self,
+        model_id: Optional[str] = None,
+        local_tokenizer_path: Optional[str] = None,
+    ) -> None:
+        """
+        Load a new tokenizer for the agent.
+
+        Arguments:
+            model_id (str): The Hugging Face model ID or a custom identifier.
+                            Needed if loading tokenizer from Hugging Face.
+            local_tokenizer_path (str, optional): Path to a locally saved
+                                                  tokenizer.
+
+        Raises:
+            Exception: If the model tokenizer process fails or if the
+                       model wrapper does not support dynamic loading.
+        """
+
+        if hasattr(self.model, "load_tokenizer"):
+            self.model.load_tokenizer(model_id, local_tokenizer_path)
+        else:
+            logger.error("The model wrapper does not support dynamic loading.")
+
+    def fine_tune(
+        self,
+        data_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Fine-tune the agent's underlying model.
+
+        Arguments:
+            data_path (str): The path to the training data.
+
+        Raises:
+            Exception: If fine-tuning fails or if the
+                       model wrapper does not support fine-tuning.
+        """
+
+        if hasattr(self.model, "fine_tune"):
+            self.model.fine_tune(data_path, fine_tune_config)
+            logger.info("Fine-tuning completed successfully.")
+        else:
+            logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/load_finetune_huggingface_model/huggingface_model.py b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
similarity index 77%
rename from examples/load_finetune_huggingface_model/huggingface_model.py
rename to examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
index 1a95ce7eb..b5c59a4af 100644
--- a/examples/load_finetune_huggingface_model/huggingface_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
@@ -2,16 +2,9 @@
 """
 This module provides a HuggingFaceWrapper to manage
 and operate Hugging Face Transformers models, enabling loading,
-fine-tuning, and response generation. It includes the
-Finetune_DialogAgent class, which extends DialogAgent to
-enhance fine-tuning capabilities with custom hyperparameters.
+fine-tuning, and response generation.
 Key features include handling model and tokenizer operations,
 adapting to specialized datasets, and robust error management.
-
-Classes:
-- HuggingFaceWrapper: Manages Hugging Face models and tokenizers.
-- Finetune_DialogAgent: Extends DialogAgent for model fine-tuning.
-
 """
 from typing import Sequence, Any, Union, List, Optional, Dict
 import os
@@ -73,7 +66,7 @@ def __init__(
         self.model_id = model_id
         relative_path = os.path.join(
             os.path.dirname(__file__),
-            "../load_finetune_huggingface_model/.env",
+            "../conversation_with_agent_with_finetuned_model/.env",
         )
         dotenv_path = os.path.normpath(relative_path)
         _ = load_dotenv(dotenv_path)  # read local .env file
@@ -144,7 +137,7 @@ def __call__(
             )
             # Decode the generated tokens to a string
             generated_text = self.tokenizer.decode(
-                outputs[0][input_ids.shape[1]:],
+                outputs[0][input_ids.shape[1] :],  # noqa: E203
                 skip_special_tokens=True,
             )
             return ModelResponse(text=generated_text, raw=outputs)
@@ -291,6 +284,7 @@ def load_tokenizer(
                     f"Successfully loaded new tokenizer for model "
                     f"'{model_id}' from Hugging Face",
                 )
+
             else:
                 self.tokenizer = AutoTokenizer.from_pretrained(
                     local_tokenizer_path,
@@ -300,6 +294,7 @@ def load_tokenizer(
                     f"Successfully loaded new tokenizer for model "
                     f"'{model_id}' from '{local_tokenizer_path}'",
                 )
+            self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
 
         except Exception as e:
             # Handle exceptions during model loading,
@@ -394,6 +389,7 @@ def fine_tune_training(
         import json
 
         dataset = load_dataset(data_path, token=token)
+        dataset = dataset["train"].train_test_split(test_size=0.1)
 
         from peft import LoraConfig
 
@@ -406,24 +402,25 @@ def fine_tune_training(
         }
 
         if fine_tune_config is not None:
-            if fine_tune_config["lora_config"] is not None:
+            if fine_tune_config.get("lora_config") is not None:
                 lora_config_default.update(fine_tune_config["lora_config"])
 
         training_defaults = {
             "per_device_train_batch_size": 1,
-            "gradient_accumulation_steps": 1,
+            # "gradient_accumulation_steps": 1,
             "gradient_checkpointing": False,
-            "max_steps": 10,
+            # "max_steps": 10,
+            "num_train_epochs": 10,
             "output_dir": "./",
             "optim": "paged_adamw_8bit",
             "fp16": True,
             "logging_steps": 1,
-            # "learning_rate": 2e-6,
+            "learning_rate": 1e-5,
             # "num_train_epochs": 10.0,
         }
 
         if fine_tune_config is not None:
-            if fine_tune_config["training_args"] is not None:
+            if fine_tune_config.get("training_args") is not None:
                 training_defaults.update(fine_tune_config["training_args"])
 
         from peft import get_peft_model
@@ -445,25 +442,53 @@ def formatting_prompts_func(
                 output_texts.append(text)
             return output_texts
 
-        response_template = " ### Answer:"
-        collator = DataCollatorForCompletionOnlyLM(
-            response_template,
-            tokenizer=tokenizer,
-        )
+        def formatting_func(example):
+            if example.get("context", "") != "":
+                input_prompt = (
+                    f"Below is an instruction that describes a task, "
+                    f"paired with an input that provides further context. "
+                    f"Write a response that appropriately "
+                    f"completes the request.\n\n"
+                    f"### Instruction:\n"
+                    f"{example['instruction']}\n\n"
+                    f"### Input: \n"
+                    f"{example['context']}\n\n"
+                    f"### Response: \n"
+                    f"{example['response']}"
+                )
+
+            else:
+                input_prompt = (
+                    f"Below is an instruction that describes a task. "
+                    "Write a response that appropriately "
+                    f"completes the request.\n\n"
+                    "### Instruction:\n"
+                    f"{example['instruction']}\n\n"
+                    f"### Response:\n"
+                    f"{example['response']}"
+                )
+
+            return {"text": input_prompt}
+
+        formatted_dataset = dataset.map(formatting_func)
 
         trainer_args = transformers.TrainingArguments(**training_defaults)
 
         trainer = SFTTrainer(
             model,
-            train_dataset=dataset["train"],
-            eval_dataset=dataset["train"],
-            formatting_func=formatting_prompts_func,
-            data_collator=collator,
+            train_dataset=formatted_dataset["train"],
+            eval_dataset=formatted_dataset["test"],
+            # formatting_func=formatting_prompts_func,
+            # data_collator=collator,
             peft_config=lora_config,
             args=trainer_args,
+            dataset_text_field="text",
+            max_seq_length=512,
         )
 
-        print("fine-tuning model")
+        logger.info(
+            "fine-tuning model",
+        )
 
         trainer.train()
 
@@ -498,116 +523,3 @@ def formatting_prompts_func(
         return model
 
 
-class Finetune_DialogAgent(DialogAgent):
-    """
-    A dialog agent capable of fine-tuning its
-    underlying model based on provided data.
-
-    Inherits from DialogAgent and adds functionality for
-    fine-tuning with custom hyperparameters.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        sys_prompt: str,
-        model_config_name: str,
-        use_memory: bool = True,
-        memory_config: Optional[dict] = None,
-    ):
-        """
-        Initializes a new Finetune_DialogAgent with specified configuration.
-
-        Arguments:
-            name (str): Name of the agent.
-            sys_prompt (str): System prompt or description of the agent's role.
-            model_config_name (str): The configuration name for
-                                     the underlying model.
-            use_memory (bool, optional): Indicates whether to utilize
-                                         memory features. Defaults to True.
-            memory_config (dict, optional): Configuration for memory
-                                            functionalities if
-                                            `use_memory` is True.
-
-        Note:
-            Refer to `class DialogAgent(AgentBase)` for more information.
-        """
-
-        super().__init__(
-            name,
-            sys_prompt,
-            model_config_name,
-            use_memory,
-            memory_config,
-        )
-
-    def load_model(
-        self,
-        model_id: Optional[str] = None,
-        local_model_path: Optional[str] = None,
-    ) -> None:
-        """
-        Load a new model into the agent.
-
-        Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier.
-                            Needed if loading model from Hugging Face.
-            local_model_path (str, optional): Path to a locally saved model.
-
-        Raises:
-            Exception: If the model loading process fails or if the
-                       model wrapper does not support dynamic loading.
-        """
-
-        if hasattr(self.model, "load_model"):
-            self.model.load_model(model_id, local_model_path)
-        else:
-            logger.error(
-                "The model wrapper does not support dynamic model loading.",
-            )
-
-    def load_tokenizer(
-        self,
-        model_id: Optional[str] = None,
-        local_tokenizer_path: Optional[str] = None,
-    ) -> None:
-        """
-        Load a new tokenizer for the agent.
-
-        Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier.
-                            Needed if loading tokenizer from Hugging Face.
-            local_tokenizer_path (str, optional): Path to a locally saved
-                                                  tokenizer.
-
-        Raises:
-            Exception: If the model tokenizer process fails or if the
-                       model wrapper does not support dynamic loading.
-        """
-
-        if hasattr(self.model, "load_tokenizer"):
-            self.model.load_tokenizer(model_id, local_tokenizer_path)
-        else:
-            logger.error("The model wrapper does not support dynamic loading.")
-
-    def fine_tune(
-        self,
-        data_path: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """
-        Fine-tune the agent's underlying model.
-
-        Arguments:
-            data_path (str): The path to the training data.
-
-        Raises:
-            Exception: If fine-tuning fails or if the
-                       model wrapper does not support fine-tuning.
-        """
-
-        if hasattr(self.model, "fine_tune"):
-            self.model.fine_tune(data_path, fine_tune_config)
-            logger.info("Fine-tuning completed successfully.")
-        else:
-            logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/load_finetune_huggingface_model/README.md b/examples/load_finetune_huggingface_model/README.md
deleted file mode 100644
index bdb6172d6..000000000
--- a/examples/load_finetune_huggingface_model/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Multi-Agent Conversation with Custom Model Loading and Fine-Tuning in AgentScope
-
-This example demonstrates how to load and optionally fine-tune a Hugging Face model within a multi-agent conversation setup using AgentScope. The complete code is provided in `load_finetune_huggingface_model.py`.
-
-## Functionality Overview
-
-Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
-
-- Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
-- Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.
-
-The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/src/agentscope/models/huggingface_model.py`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization, as shown in the example script `load_finetune_huggingface_model.py`.
-
-## Agent Initialization
-
-When initializing an agent, the following parameters need specification:
-
-- `model_id` (str): Identifier for the model on Hugging Face.
-- `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
-- `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
-- `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
-- `fine_tune_config` (dict, Optional): A configuration dictionary for fine-tuning the model. It allows specifying hyperparameters and other training options that will be passed to the fine-tuning method. If not provided, default settings will be used. This allows for customization of the fine-tuning process to optimize model performance based on specific requirements.
-- `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
-
-## Tested Models
-
-The example is tested using specific Hugging Face models. While it is designed to be flexible, some models may require additional configuration or modification of the provided scripts.
-
-## Prerequisites
-
-Before running this example, ensure you have installed the following packages:
-
-- `transformers`
-- `peft`
-- `python-dotenv`
-- `datasets`
-- `trl`
-
-Additionally, set your Hugging Face token in the `.env` file:
-
-```bash
-python load_finetune_huggingface_model.py
\ No newline at end of file
diff --git a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py b/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
deleted file mode 100644
index b16f51f26..000000000
--- a/examples/load_finetune_huggingface_model/load_finetune_huggingface_model.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This script sets up a conversational agent using
-AgentScope with a Hugging Face model.
-It includes initializing a Finetune_DialogAgent,
-loading and fine-tuning a pre-trained model,
-and conducting a dialogue via a sequential pipeline.
-The conversation continues until the user exits.
-Features include model and tokenizer loading,
-and fine-tuning on the GAIR/lima dataset with adjustable parameters.
-"""
-from huggingface_model import Finetune_DialogAgent
-
-import agentscope
-from agentscope.agents.user_agent import UserAgent
-from agentscope.pipelines.functional import sequentialpipeline
-
-
-def main() -> None:
-    """A basic conversation demo with a custom model"""
-
-    # Initialize AgentScope with your custom model configuration
-
-    agentscope.init(
-        model_configs=[
-            {
-                "model_type": "huggingface",
-                "config_name": "my_custom_model",
-                # Or another generative model of your choice.
-                # Needed from loading from Hugging Face.
-                "model_id": "google/gemma-2b-it",
-                # "local_model_path":  # Specify your local model path
-                # "local_tokenizer_path":  # Specify your local tokenizer path
-                "max_length": 128,
-                "device": "cuda",
-                # Specify a Hugging Face data path if you
-                # wish to finetune the model from the start
-                "data_path": "GAIR/lima",
-                # fine_tune_config (Optional): Configuration for
-                # fine-tuning the model.
-                # This dictionary can include hyperparameters and other
-                # training options that will be passed to the
-                # fine-tuning method. Defaults to None.
-                "fine_tune_config": {
-                    "lora_config": {"r": 20, "lora_alpha": 40},
-                    "training_args": {"max_steps": 20, "logging_steps": 2},
-                },
-            },
-        ],
-    )
-
-    # Init agents with the custom model
-    dialog_agent = Finetune_DialogAgent(
-        name="Assistant",
-        sys_prompt="You're a helpful assistant.",
-        # Use your custom model config name here
-        model_config_name="my_custom_model",
-    )
-
-    dialog_agent.load_model(
-        model_id="google/gemma-2b-it",
-        local_model_path=None,
-    )  # load model gemma-2b-it from Hugging Face
-    dialog_agent.load_tokenizer(
-        model_id="google/gemma-2b-it",
-        local_tokenizer_path=None,
-    )  # load tokenizer for gemma-2b-it from Hugging Face
-
-    # fine-tune loaded model with lima dataset with default hyperparameters
-    # dialog_agent.fine_tune(data_path=  "GAIR/lima")
-
-    # fine-tune loaded model with lima dataset with customized hyperparameters
-    # (`fine_tune_config` argument is optional. Defaults to None.)
-    dialog_agent.fine_tune(
-        "GAIR/lima",
-        fine_tune_config={
-            "lora_config": {"r": 24, "lora_alpha": 48},
-            "training_args": {"max_steps": 30, "logging_steps": 3},
-        },
-    )
-
-    user_agent = UserAgent()
-
-    # Start the conversation between user and assistant
-    x = None
-    while x is None or x.content != "exit":
-        x = sequentialpipeline([dialog_agent, user_agent], x)
-
-
-if __name__ == "__main__":
-    main()

From 0a079b9b23bf1dc879c3ae35e483ec45036f8524 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Thu, 2 May 2024 11:50:19 +0800
Subject: [PATCH 10/32] added updated README

---
 .../README.md                                 | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 examples/conversation_with_agent_with_finetuned_model/README.md

diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
new file mode 100644
index 000000000..3d68d635f
--- /dev/null
+++ b/examples/conversation_with_agent_with_finetuned_model/README.md
@@ -0,0 +1,68 @@
+# Multi-Agent Conversation with Custom Model Loading and Fine-Tuning in AgentScope
+
+This example demonstrates how to load and optionally fine-tune a Hugging Face model within a multi-agent conversation setup using AgentScope. The complete code is provided in `agentscope/examples/conversation_with_agent_with_finetuned_model`.
+
+## Functionality Overview
+
+Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
+
+- Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
+- Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.
+
+The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py` and `agentscope/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization, as shown in the example script `conversation_with_agent_with_finetuned_model.py`.
+
+## Agent Initialization
+
+When initializing an agent, the following parameters need specification:
+
+- `model_id` (str): Identifier for the model on Hugging Face.
+- `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
+- `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
+- `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
+- `fine_tune_config` (dict, Optional): A configuration dictionary for fine-tuning the model. It allows specifying hyperparameters and other training options that will be passed to the fine-tuning method. If not provided, default settings will be used. This allows for customization of the fine-tuning process to optimize model performance based on specific requirements.
+- `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
+
+## Example Ouputs
+### Before Fine-tuning:
+User:
+
+Explain in simple terms how the attention mechanism of a transformer model works
+
+Assistant:
+
+1. a person who helps another person or group of people. 2. a person who helps another person or group of people.
+
+### After Fine-tuning (with the default configuration in `model_configs`):
+User:
+
+Explain in simple terms how the attention mechanism of a transformer model works
+
+Assistant:
+
+The attention mechanism of a transformer model works by allowing the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
+
+The self-attention mechanism allows the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
+
+The position-wise attention mechanism allows the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
+
+The self
+
+
+## Tested Models
+
+The example is tested using specific Hugging Face model `openlm-research/open_llama_3b_v2` on dataset `databricks/databricks-dolly-15k`. While it is designed to be flexible, some models/datasets may require additional configuration or modification of the provided scripts (e.g., pre-processing of the datasets).
+
+## Prerequisites
+
+Before running this example, ensure you have installed the following packages:
+
+- `transformers`
+- `peft`
+- `python-dotenv`
+- `datasets`
+- `trl`
+
+Additionally, set `HUGGINGFACE_TOKEN` in the `agentscope/examples/conversation_with_agent_with_finetuned_model/.env`.
+
+```bash
+python conversation_with_agent_with_finetuned_model.py
\ No newline at end of file

From a4d1f1beb23d922dc3680347460f07a22f56230b Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Sun, 5 May 2024 20:52:33 +0800
Subject: [PATCH 11/32] updated README for two examples and tested on 3
 model_type.

---
 .../conversation_with_RAG_agents/README.md    | 76 +++++++------------
 examples/conversation_with_mentions/README.md | 75 +++++-------------
 2 files changed, 49 insertions(+), 102 deletions(-)

diff --git a/examples/conversation_with_RAG_agents/README.md b/examples/conversation_with_RAG_agents/README.md
index 5b08379a4..04025f979 100644
--- a/examples/conversation_with_RAG_agents/README.md
+++ b/examples/conversation_with_RAG_agents/README.md
@@ -1,57 +1,37 @@
 # AgentScope Consultants: a Multi-Agent RAG Application
 
-* **What is this example about?**
-With the provided implementation and configuration,
-you will obtain three different agents who can help you answer different questions about AgentScope.
+This example will show
+- How to utilize three different agents to answer various questions about AgentScope.
+- How to set up and run the agents using different configurations.
 
-* **What is this example for?** By this example, we want to show how the agent with retrieval augmented generation (RAG)
-capability can be used to build easily.
+## Background
 
-**Notice:** This example is a Beta version of the AgentScope RAG agent. A formal version will soon be added to `src/agentscope/agents`, but it may be subject to changes.
+This example introduces a multi-agent system using retrieval augmented generation (RAG) capabilities to demonstrate how such systems can be built and used effectively.
+
+## Tested Models
+
+These models are tested in this example. For other models, some modifications may be needed.
+### models
+- dashscope_chat (qwen-max)
+### embeddings
+- dashscope_text_embedding(text-embedding-v2)
 
 ## Prerequisites
-* **Cloning repo:** This example requires cloning the whole AgentScope repo to local.
-* **Packages:** This example is built on the LlamaIndex package. Thus, some packages need to be installed before running the example.
+
+Fill the next cell to meet the following requirements
+- Cloning the AgentScope repository to local.
+- Installation of required packages:
     ```bash
     pip install llama-index tree_sitter tree-sitter-languages
     ```
-* **Model APIs:** This example uses Dashscope APIs. Thus, we also need an API key for DashScope.
-  ```bash
-  export DASH_SCOPE_API='YOUR_API_KEY'
-  ```
-
-**Note:** This example has been tested with `dashscope_chat` and `dashscope_text_embedding` model wrapper, with `qwen-max` and `text-embedding-v2` models.
-However, you are welcome to replace the Dashscope language and embedding model wrappers or models with other models you like to test.
-
-## Start AgentScope Consultants
-* **Terminal:** The most simple way to execute the AgentScope Consultants is running in terminal.
-  ```bash
-  python ./rag_example.py
-  ```
-  Setting `log_retrieval` to `false` in `agent_config.json` can hide the retrieved information and provide only answers of agents.
-
-* **AS studio:** If you want to have more organized, clean UI, you can also run with our `as_studio`.
-  ```bash
-  as_studio ./rag_example.py
-  ```
-
-### Customize AgentScope Consultants to other consultants
-After you run the example, you may notice that this example consists of three RAG agents:
-* `AgentScope Tutorial Assistant`: responsible for answering questions based on AgentScope tutorials (markdown files).
-* `AgentScope Framework Code Assistant`: responsible for answering questions based on AgentScope code base (python files).
-* `Summarize Assistant`: responsible for summarize the questions from the above two agents.
-
-These agents can be configured to answering questions based on other GitHub repo, by simply modifying the `input_dir` fields in the `agent_config.json`.
-
-For more advanced customization, we may need to learn a little bit from the following.
-
-**RAG modules:** In AgentScope, RAG modules are abstract to provide three basic functions: `load_data`, `store_and_index` and `retrieve`. Refer to `src/agentscope/rag` for more details.
-
-**RAG configs:** In the example configuration (the `rag_config` field), all parameters are optional. But if you want to customize them, you may want to learn the following:
-*  `load_data`: contains all parameters for the the `rag.load_data` function.
-Since the `load_data` accepts a dataloader object `loader`, the `loader` in the config need to have `"create_object": true` to let a internal parse create a LlamaIndex data loader object.
-The loader object is an instance of `class` in module `module`, with initialization parameters in `init_args`.
-
-* `store_and_index`: contains all parameters for the the `rag.store_and_index` function.
-For example, you can pass `vector_store` and `retriever` configurations in a similar way as the `loader` mentioned above.
-For the `transformations` parameter, you can pass a list of dicts, each of which corresponds to building a `NodeParser`-kind of preprocessor in Llamaindex.
\ No newline at end of file
+- Setting environment variables for API keys:
+    ```bash
+    export DASH_SCOPE_API='YOUR_API_KEY'
+    ```
+- Running the application via terminal or AS studio:
+    ```bash
+    python ./rag_example.py
+    # or
+    as_studio ./rag_example.py
+    ```
+- [Optional] Optional settings to hide retrieved information by setting `log_retrieval` to `false` in `agent_config.json`.
diff --git a/examples/conversation_with_mentions/README.md b/examples/conversation_with_mentions/README.md
index 6359b3413..2cb12ea52 100644
--- a/examples/conversation_with_mentions/README.md
+++ b/examples/conversation_with_mentions/README.md
@@ -1,73 +1,40 @@
+###
 # Multi-Agent Group Conversation in AgentScope
 
-This example demonstrates a multi-agent group conversation facilitated by AgentScope. The script `main.py` sets up a virtual chat room where a user agent interacts with several NPC (non-player character) agents. The chat utilizes a special **"@"** mention functionality, which allows participants to address specific agents and have a more directed conversation.
+This example demonstrates a multi-agent group conversation facilitated by AgentScope. The script sets up a virtual chat room where a user agent interacts with several NPC (non-player character) agents. Participants can utilize a special "@" mention functionality to address specific agents directly.
 
-## Key Features
+## Background
 
-- **Real-time Group Conversation**: Engage in a chat with multiple agents responding in real time.
-- **@ Mention Functionality**: Use the "@" symbol followed by an agent's name to specifically address that agent within the conversation.
-- **Dynamic Flow**: User-driven conversation with agents responding based on the context and mentions.
-- **Configurable Agent Roles**: Easily modify agent roles and behaviors by editing the `sys_prompt` in the configuration files.
-- **User Timeout**: If the user does not respond within a specified time, the conversation continues with the next agent.
+The conversation takes place in a simulated chat room environment with predefined roles for each participant. Topics are open-ended and evolve based on the user's input and agents' responses.
 
-## How to Use
-
-To start the group conversation, follow these steps:
-
-1. Make sure to set your `api_key` in the `configs/model_configs.json` file.
-2. Run the script using the following command:
+## Tested Models
 
-```bash
-python main.py
+These models are tested in this example. For other models, some modifications may be needed.
+- gemini_chat (models/gemini-pro, models/gemini-1.0-pro)
+- dashscope_chat (qwen-max, qwen-turbo)
+- ollama_chat (ollama_llama3_8b)
 
-# or launch agentscope studio
-as_studio main.py
-```
+## Prerequisites
 
-1. To address a specific agent in the chat, type "@" followed by the agent's name in your message.
-2. To exit the chat, simply type "exit" when it's your turn to speak.
+Fill the next cell to meet the following requirements:
+- Set your `api_key` in the `configs/model_configs.json` file
+- Optional: Launch agentscope studio with `as_studio main.py`
 
-## Background and Conversation Flow
-
-The conversation takes place in a simulated chat room environment with roles defined for each participant. The user acts as a regular chat member with the ability to speak freely and address any agent. NPC agents are pre-configured with specific roles that determine their responses and behavior in the chat. The topic of the conversation is open-ended and can evolve organically based on the user's input and agents' programmed personas.
-
-### Example Interaction
+## How to Use
 
-```
-User input: Hi, everyone! I'm excited to join this chat.
-AgentA: Welcome! We're glad to have you here.
-User input: @AgentB, what do you think about the new technology trends?
-AgentB: It's an exciting time for tech! There are so many innovations on the horizon.
-...
-```
+1. Run the script using the command: `python main.py`
+2. Address specific agents by typing "@" followed by the agent's name.
+3. Type "exit" to leave the chat.
 
 ## Customization Options
 
-The group conversation script provides several options for customization, allowing you to tailor the chat experience to your preferences.
-
-You can customize the conversation by editing the agent configurations and model parameters. The `agent_configs.json` file allows you to set specific behaviors for each NPC agent, while `model_configs.json` contains the parameters for the conversation model.
+You can adjust the behavior and parameters of the NPC agents and conversation model by editing the `agent_configs.json` and `model_configs.json` files, respectively.
 
 ### Changing User Input Time Limit
 
-The `USER_TIME_TO_SPEAK` variable sets the time limit (in seconds) for the user to input their message during each round. By default, this is set to 10 seconds. You can adjust this time limit by modifying the value of `USER_TIME_TO_SPEAK` in the `main.py` script.
-
-For example, to change the time limit to 20 seconds, update the line in `main.py` as follows:
-
-```
-USER_TIME_TO_SPEAK = 20  # User has 20 seconds to type their message
-```
+Adjust the `USER_TIME_TO_SPEAK` variable in the `main.py` script to change the time limit for user input.
 
 ### Setting a Default Topic for the Chat Room
 
-The `DEFAULT_TOPIC` variable defines the initial message or topic of the chat room. It sets the stage for the conversation and is announced at the beginning of the chat session. You can change this message to prompt a specific discussion topic or to provide instructions to the agents.
-
-To customize this message, modify the `DEFAULT_TOPIC` variable in the `main.py` script. For instance, if you want to set the default topic to discuss "The Future of Artificial Intelligence," you would change the code as follows:
-
-```python
-DEFAULT_TOPIC = """
-This is a chat room about the Future of Artificial Intelligence and you can
-speak freely and briefly.
-"""
-```
-
-With these customizations, the chat room can be tailored to fit specific themes or time constraints, enhancing the user's control over the chat experience.
+Modify the `DEFAULT_TOPIC` variable in the `main.py` script to set the initial topic of the chat room.
+###
\ No newline at end of file

From 6b5410edd50e61aa7c08f0dd5d7ae9069c90711f Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Mon, 6 May 2024 11:09:17 +0800
Subject: [PATCH 12/32] undo update to conversation_with_mentions README
 (created a dedicated branch for it)

---
 examples/conversation_with_mentions/README.md | 75 +++++++++++++------
 1 file changed, 54 insertions(+), 21 deletions(-)

diff --git a/examples/conversation_with_mentions/README.md b/examples/conversation_with_mentions/README.md
index 2cb12ea52..6359b3413 100644
--- a/examples/conversation_with_mentions/README.md
+++ b/examples/conversation_with_mentions/README.md
@@ -1,40 +1,73 @@
-###
 # Multi-Agent Group Conversation in AgentScope
 
-This example demonstrates a multi-agent group conversation facilitated by AgentScope. The script sets up a virtual chat room where a user agent interacts with several NPC (non-player character) agents. Participants can utilize a special "@" mention functionality to address specific agents directly.
+This example demonstrates a multi-agent group conversation facilitated by AgentScope. The script `main.py` sets up a virtual chat room where a user agent interacts with several NPC (non-player character) agents. The chat utilizes a special **"@"** mention functionality, which allows participants to address specific agents and have a more directed conversation.
 
-## Background
+## Key Features
 
-The conversation takes place in a simulated chat room environment with predefined roles for each participant. Topics are open-ended and evolve based on the user's input and agents' responses.
+- **Real-time Group Conversation**: Engage in a chat with multiple agents responding in real time.
+- **@ Mention Functionality**: Use the "@" symbol followed by an agent's name to specifically address that agent within the conversation.
+- **Dynamic Flow**: User-driven conversation with agents responding based on the context and mentions.
+- **Configurable Agent Roles**: Easily modify agent roles and behaviors by editing the `sys_prompt` in the configuration files.
+- **User Timeout**: If the user does not respond within a specified time, the conversation continues with the next agent.
 
-## Tested Models
+## How to Use
 
-These models are tested in this example. For other models, some modifications may be needed.
-- gemini_chat (models/gemini-pro, models/gemini-1.0-pro)
-- dashscope_chat (qwen-max, qwen-turbo)
-- ollama_chat (ollama_llama3_8b)
+To start the group conversation, follow these steps:
 
-## Prerequisites
+1. Make sure to set your `api_key` in the `configs/model_configs.json` file.
+2. Run the script using the following command:
 
-Fill the next cell to meet the following requirements:
-- Set your `api_key` in the `configs/model_configs.json` file
-- Optional: Launch agentscope studio with `as_studio main.py`
+```bash
+python main.py
 
-## How to Use
+# or launch agentscope studio
+as_studio main.py
+```
+
+1. To address a specific agent in the chat, type "@" followed by the agent's name in your message.
+2. To exit the chat, simply type "exit" when it's your turn to speak.
+
+## Background and Conversation Flow
+
+The conversation takes place in a simulated chat room environment with roles defined for each participant. The user acts as a regular chat member with the ability to speak freely and address any agent. NPC agents are pre-configured with specific roles that determine their responses and behavior in the chat. The topic of the conversation is open-ended and can evolve organically based on the user's input and agents' programmed personas.
+
+### Example Interaction
 
-1. Run the script using the command: `python main.py`
-2. Address specific agents by typing "@" followed by the agent's name.
-3. Type "exit" to leave the chat.
+```
+User input: Hi, everyone! I'm excited to join this chat.
+AgentA: Welcome! We're glad to have you here.
+User input: @AgentB, what do you think about the new technology trends?
+AgentB: It's an exciting time for tech! There are so many innovations on the horizon.
+...
+```
 
 ## Customization Options
 
-You can adjust the behavior and parameters of the NPC agents and conversation model by editing the `agent_configs.json` and `model_configs.json` files, respectively.
+The group conversation script provides several options for customization, allowing you to tailor the chat experience to your preferences.
+
+You can customize the conversation by editing the agent configurations and model parameters. The `agent_configs.json` file allows you to set specific behaviors for each NPC agent, while `model_configs.json` contains the parameters for the conversation model.
 
 ### Changing User Input Time Limit
 
-Adjust the `USER_TIME_TO_SPEAK` variable in the `main.py` script to change the time limit for user input.
+The `USER_TIME_TO_SPEAK` variable sets the time limit (in seconds) for the user to input their message during each round. By default, this is set to 10 seconds. You can adjust this time limit by modifying the value of `USER_TIME_TO_SPEAK` in the `main.py` script.
+
+For example, to change the time limit to 20 seconds, update the line in `main.py` as follows:
+
+```
+USER_TIME_TO_SPEAK = 20  # User has 20 seconds to type their message
+```
 
 ### Setting a Default Topic for the Chat Room
 
-Modify the `DEFAULT_TOPIC` variable in the `main.py` script to set the initial topic of the chat room.
-###
\ No newline at end of file
+The `DEFAULT_TOPIC` variable defines the initial message or topic of the chat room. It sets the stage for the conversation and is announced at the beginning of the chat session. You can change this message to prompt a specific discussion topic or to provide instructions to the agents.
+
+To customize this message, modify the `DEFAULT_TOPIC` variable in the `main.py` script. For instance, if you want to set the default topic to discuss "The Future of Artificial Intelligence," you would change the code as follows:
+
+```python
+DEFAULT_TOPIC = """
+This is a chat room about the Future of Artificial Intelligence and you can
+speak freely and briefly.
+"""
+```
+
+With these customizations, the chat room can be tailored to fit specific themes or time constraints, enhancing the user's control over the chat experience.

From 6d100512153a8bc3b8be7cc68b91a002af08f2a8 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Mon, 6 May 2024 11:12:25 +0800
Subject: [PATCH 13/32] reverted changes made to
 conversation_with_RAG_agents\README.md

---
 .../conversation_with_RAG_agents/README.md    | 76 ++++++++++++-------
 1 file changed, 48 insertions(+), 28 deletions(-)

diff --git a/examples/conversation_with_RAG_agents/README.md b/examples/conversation_with_RAG_agents/README.md
index 04025f979..5b08379a4 100644
--- a/examples/conversation_with_RAG_agents/README.md
+++ b/examples/conversation_with_RAG_agents/README.md
@@ -1,37 +1,57 @@
 # AgentScope Consultants: a Multi-Agent RAG Application
 
-This example will show
-- How to utilize three different agents to answer various questions about AgentScope.
-- How to set up and run the agents using different configurations.
+* **What is this example about?**
+With the provided implementation and configuration,
+you will obtain three different agents who can help you answer different questions about AgentScope.
 
-## Background
+* **What is this example for?** By this example, we want to show how the agent with retrieval augmented generation (RAG)
+capability can be used to build easily.
 
-This example introduces a multi-agent system using retrieval augmented generation (RAG) capabilities to demonstrate how such systems can be built and used effectively.
-
-## Tested Models
-
-These models are tested in this example. For other models, some modifications may be needed.
-### models
-- dashscope_chat (qwen-max)
-### embeddings
-- dashscope_text_embedding(text-embedding-v2)
+**Notice:** This example is a Beta version of the AgentScope RAG agent. A formal version will soon be added to `src/agentscope/agents`, but it may be subject to changes.
 
 ## Prerequisites
-
-Fill the next cell to meet the following requirements
-- Cloning the AgentScope repository to local.
-- Installation of required packages:
+* **Cloning repo:** This example requires cloning the whole AgentScope repo to local.
+* **Packages:** This example is built on the LlamaIndex package. Thus, some packages need to be installed before running the example.
     ```bash
     pip install llama-index tree_sitter tree-sitter-languages
     ```
-- Setting environment variables for API keys:
-    ```bash
-    export DASH_SCOPE_API='YOUR_API_KEY'
-    ```
-- Running the application via terminal or AS studio:
-    ```bash
-    python ./rag_example.py
-    # or
-    as_studio ./rag_example.py
-    ```
-- [Optional] Optional settings to hide retrieved information by setting `log_retrieval` to `false` in `agent_config.json`.
+* **Model APIs:** This example uses Dashscope APIs. Thus, we also need an API key for DashScope.
+  ```bash
+  export DASH_SCOPE_API='YOUR_API_KEY'
+  ```
+
+**Note:** This example has been tested with `dashscope_chat` and `dashscope_text_embedding` model wrapper, with `qwen-max` and `text-embedding-v2` models.
+However, you are welcome to replace the Dashscope language and embedding model wrappers or models with other models you like to test.
+
+## Start AgentScope Consultants
+* **Terminal:** The most simple way to execute the AgentScope Consultants is running in terminal.
+  ```bash
+  python ./rag_example.py
+  ```
+  Setting `log_retrieval` to `false` in `agent_config.json` can hide the retrieved information and provide only answers of agents.
+
+* **AS studio:** If you want to have more organized, clean UI, you can also run with our `as_studio`.
+  ```bash
+  as_studio ./rag_example.py
+  ```
+
+### Customize AgentScope Consultants to other consultants
+After you run the example, you may notice that this example consists of three RAG agents:
+* `AgentScope Tutorial Assistant`: responsible for answering questions based on AgentScope tutorials (markdown files).
+* `AgentScope Framework Code Assistant`: responsible for answering questions based on AgentScope code base (python files).
+* `Summarize Assistant`: responsible for summarize the questions from the above two agents.
+
+These agents can be configured to answering questions based on other GitHub repo, by simply modifying the `input_dir` fields in the `agent_config.json`.
+
+For more advanced customization, we may need to learn a little bit from the following.
+
+**RAG modules:** In AgentScope, RAG modules are abstract to provide three basic functions: `load_data`, `store_and_index` and `retrieve`. Refer to `src/agentscope/rag` for more details.
+
+**RAG configs:** In the example configuration (the `rag_config` field), all parameters are optional. But if you want to customize them, you may want to learn the following:
+*  `load_data`: contains all parameters for the the `rag.load_data` function.
+Since the `load_data` accepts a dataloader object `loader`, the `loader` in the config need to have `"create_object": true` to let a internal parse create a LlamaIndex data loader object.
+The loader object is an instance of `class` in module `module`, with initialization parameters in `init_args`.
+
+* `store_and_index`: contains all parameters for the the `rag.store_and_index` function.
+For example, you can pass `vector_store` and `retriever` configurations in a similar way as the `loader` mentioned above.
+For the `transformations` parameter, you can pass a list of dicts, each of which corresponds to building a `NodeParser`-kind of preprocessor in Llamaindex.
\ No newline at end of file

From db27edd79285b5046c31ba7b00ab1f929fff8239 Mon Sep 17 00:00:00 2001
From: Zhang Ze Yu <zyzhang1130@gmail.com>
Date: Mon, 6 May 2024 04:42:47 +0000
Subject: [PATCH 14/32] resolved pre-commit related issues

---
 .../configs/model_configs.json                | 34 ++++-----
 ...rsation_with_agent_with_finetuned_model.py | 76 ++++++++++---------
 .../finetune_dialogagent.py                   | 14 +---
 .../huggingface_model.py                      | 20 +----
 4 files changed, 62 insertions(+), 82 deletions(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
index 3b59c8318..ed491e572 100644
--- a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
+++ b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
@@ -1,18 +1,18 @@
-[
-    {
-        "model_type": "huggingface",
-        "config_name": "my_custom_model",
-
-        "model_id": "openlm-research/open_llama_3b_v2",
-
-        "max_length": 128,
-        "device": "cuda",
-
-        "data_path": "databricks/databricks-dolly-15k",
-
-        "fine_tune_config": {
-            "lora_config": {"r": 20, "lora_alpha": 40},
-            "training_args": {"max_steps": 1000, "logging_steps": 1}
-        }
-    }
+[
+    {
+        "model_type": "huggingface",
+        "config_name": "my_custom_model",
+
+        "model_id": "openlm-research/open_llama_3b_v2",
+
+        "max_length": 128,
+        "device": "cuda",
+
+        "data_path": "databricks/databricks-dolly-15k",
+
+        "fine_tune_config": {
+            "lora_config": {"r": 20, "lora_alpha": 40},
+            "training_args": {"max_steps": 1000, "logging_steps": 1}
+        }
+    }
 ]
\ No newline at end of file
diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
index 989e8525f..4e318b796 100644
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -10,7 +10,7 @@
 and fine-tuning on the databricks-dolly-15k dataset with adjustable parameters.
 """
 from finetune_dialogagent import Finetune_DialogAgent
-
+from huggingface_model import HuggingFaceWrapper
 import agentscope
 from agentscope.agents.user_agent import UserAgent
 from agentscope.pipelines.functional import sequentialpipeline
@@ -21,41 +21,41 @@ def main() -> None:
 
     # Initialize AgentScope with your custom model configuration
 
-    # agentscope.init(
-    #     model_configs=[
-    #         {
-    #             "model_type": "huggingface",
-    #             "config_name": "my_custom_model",
-    #             # Or another generative model of your choice.
-    #             # Needed from loading from Hugging Face.
-    #             "model_id": "openlm-research/open_llama_3b_v2",
-    #             # "local_model_path":  # Specify your local model path
-    #             # "local_tokenizer_path":  # Specify your local tokenizer path
-    #             "max_length": 128,
-    #             "device": "cuda",
-    #             # Specify a Hugging Face data path if you
-    #             # wish to finetune the model from the start
-    #             "data_path": "databricks/databricks-dolly-15k",
-    #             # fine_tune_config (Optional): Configuration for
-    #             # fine-tuning the model.
-    #             # This dictionary can include hyperparameters and other
-    #             # training options that will be passed to the
-    #             # fine-tuning method. Defaults to None.
-    #             # `lora_config` and `training_args` follow 
-    #             # the standard lora and sfttrainer fields.
-    #             "fine_tune_config": {
-    #                 "lora_config": {"r": 20, "lora_alpha": 40},
-    #                 "training_args": {"max_steps": 1000, "logging_steps": 1},
-    #             },
-    #         },
-    #     ],
-    # )
-
-    # alternatively can load `model_configs` from json file
     agentscope.init(
-    model_configs="./configs/model_configs.json",
+        model_configs=[
+            {
+                "model_type": "huggingface",
+                "config_name": "my_custom_model",
+                # Or another generative model of your choice.
+                # Needed from loading from Hugging Face.
+                "model_id": "openlm-research/open_llama_3b_v2",
+                # "local_model_path":  # Specify your local model path
+                # "local_tokenizer_path":  # Specify your local tokenizer path
+                "max_length": 128,
+                "device": "cuda",
+                # Specify a Hugging Face data path if you
+                # wish to finetune the model from the start
+                "data_path": "databricks/databricks-dolly-15k",
+                # fine_tune_config (Optional): Configuration for
+                # fine-tuning the model.
+                # This dictionary can include hyperparameters and other
+                # training options that will be passed to the
+                # fine-tuning method. Defaults to None.
+                # `lora_config` and `training_args` follow
+                # the standard lora and sfttrainer fields.
+                "fine_tune_config": {
+                    "lora_config": {"r": 20, "lora_alpha": 40},
+                    "training_args": {"max_steps": 1000, "logging_steps": 1},
+                },
+            },
+        ],
     )
 
+    # # alternatively can load `model_configs` from json file
+    # agentscope.init(
+    #     model_configs="./configs/model_configs.json",
+    # )
+
     # Init agents with the custom model
     dialog_agent = Finetune_DialogAgent(
         name="Assistant",
@@ -73,16 +73,18 @@ def main() -> None:
         local_tokenizer_path=None,
     )  # load tokenizer for gemma-2b-it from Hugging Face
 
-    # fine-tune loaded model with databricks-dolly-15k dataset with default hyperparameters
-    dialog_agent.fine_tune(data_path="databricks/databricks-dolly-15k")
+    # fine-tune loaded model with databricks-dolly-15k dataset
+    # with default hyperparameters
+    # dialog_agent.fine_tune(data_path="databricks/databricks-dolly-15k")
 
-    # fine-tune loaded model with databricks-dolly-15k dataset with customized hyperparameters
+    # fine-tune loaded model with databricks-dolly-15k dataset
+    # with customized hyperparameters
     # (`fine_tune_config` argument is optional. Defaults to None.)
     dialog_agent.fine_tune(
         "databricks/databricks-dolly-15k",
         fine_tune_config={
             "lora_config": {"r": 24, "lora_alpha": 48},
-            "training_args": {"max_steps": 30, "logging_steps": 3},
+            "training_args": {"max_steps": 300, "logging_steps": 3},
         },
     )
 
diff --git a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
index 1059cc99e..10c5c583f 100644
--- a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
+++ b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
@@ -1,21 +1,15 @@
 # -*- coding: utf-8 -*-
 """
-This module provides the Finetune_DialogAgent class, 
-which extends DialogAgent to enhance fine-tuning 
+This module provides the Finetune_DialogAgent class,
+which extends DialogAgent to enhance fine-tuning
 capabilities with custom hyperparameters.
 """
-from typing import Sequence, Any, Union, List, Optional, Dict
-import os
+from typing import Any, Optional, Dict
 
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from loguru import logger
-from dotenv import load_dotenv
 
 from agentscope.agents import DialogAgent
-from agentscope.models import ModelWrapperBase, ModelResponse
-from agentscope.message import MessageBase
-from agentscope.utils.tools import _convert_to_str
+
 
 class Finetune_DialogAgent(DialogAgent):
     """
diff --git a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
index b5c59a4af..a7f86ef4b 100644
--- a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
@@ -14,7 +14,6 @@
 from loguru import logger
 from dotenv import load_dotenv
 
-from agentscope.agents import DialogAgent
 from agentscope.models import ModelWrapperBase, ModelResponse
 from agentscope.message import MessageBase
 from agentscope.utils.tools import _convert_to_str
@@ -428,21 +427,10 @@ def fine_tune_training(
         lora_config = LoraConfig(**lora_config_default)
         model = get_peft_model(model, lora_config)
 
-        from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
+        from trl import SFTTrainer
         import transformers
 
-        def formatting_prompts_func(
-            example: Dict[str, List[List[str]]],
-        ) -> List[str]:
-            output_texts = []
-            for i in range(len(example["conversations"])):
-                question = f"### Question: {example['conversations'][i][0]}"
-                answer = f"### Answer: {example['conversations'][i][1]}"
-                text = f"{question}\n {answer}"
-                output_texts.append(text)
-            return output_texts
-
-        def formatting_func(example):
+        def formatting_func(example: Dict[str, Any]) -> Dict[str, str]:
             if example.get("context", "") != "":
                 input_prompt = (
                     f"Below is an instruction that describes a task, "
@@ -456,7 +444,6 @@ def formatting_func(example):
                     f"### Response: \n"
                     f"{example['response']}"
                 )
-
             else:
                 input_prompt = (
                     f"Below is an instruction that describes a task. "
@@ -478,7 +465,6 @@ def formatting_func(example):
             model,
             train_dataset=formatted_dataset["train"],
             eval_dataset=formatted_dataset["test"],
-            # formatting_func=formatting_prompts_func,
             # data_collator=collator,
             peft_config=lora_config,
             args=trainer_args,
@@ -521,5 +507,3 @@ def formatting_func(example):
         tokenizer.save_pretrained(tokenizer_path)
 
         return model
-
-

From b37122655d19b6dd1b25f607bfaddb0c64456a7a Mon Sep 17 00:00:00 2001
From: Zhang Ze Yu <zyzhang1130@gmail.com>
Date: Mon, 6 May 2024 05:03:07 +0000
Subject: [PATCH 15/32] resolved pre-commit related issues

---
 .../conversation_with_agent_with_finetuned_model.py           | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
index 4e318b796..aad691502 100644
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -10,7 +10,9 @@
 and fine-tuning on the databricks-dolly-15k dataset with adjustable parameters.
 """
 from finetune_dialogagent import Finetune_DialogAgent
-from huggingface_model import HuggingFaceWrapper
+from huggingface_model import (
+    HuggingFaceWrapper,
+)  # pylint: disable=unused-import
 import agentscope
 from agentscope.agents.user_agent import UserAgent
 from agentscope.pipelines.functional import sequentialpipeline

From 7f3a0129097fbe553ad86f55539d5f18e41cb054 Mon Sep 17 00:00:00 2001
From: Zhang Ze Yu <zyzhang1130@gmail.com>
Date: Mon, 6 May 2024 05:20:01 +0000
Subject: [PATCH 16/32] resolved pre-commit related issues

---
 .../conversation_with_agent_with_finetuned_model.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
index aad691502..5c9836bd0 100644
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -10,9 +10,9 @@
 and fine-tuning on the databricks-dolly-15k dataset with adjustable parameters.
 """
 from finetune_dialogagent import Finetune_DialogAgent
-from huggingface_model import (
-    HuggingFaceWrapper,
-)  # pylint: disable=unused-import
+
+# pylint: disable=unused-import
+from huggingface_model import HuggingFaceWrapper
 import agentscope
 from agentscope.agents.user_agent import UserAgent
 from agentscope.pipelines.functional import sequentialpipeline

From 15bf79ad42fb9e81016f0d820477b89e0da0940d Mon Sep 17 00:00:00 2001
From: Zhang Ze Yu <zyzhang1130@gmail.com>
Date: Wed, 8 May 2024 17:04:20 +0000
Subject: [PATCH 17/32] resolve issues mentioned

---
 .../en/source/tutorial/105-logging.md         |   2 +-
 .../zh_CN/source/tutorial/105-logging.md      |   2 +-
 .../README.md                                 |  42 +--
 .../configs/model_configs.json                |  12 +-
 ...rsation_with_agent_with_finetuned_model.py |  48 ++--
 .../finetune_dialogagent.py                   |  30 ++-
 .../huggingface_model.py                      | 248 +++++++++++++-----
 examples/game_gomoku/code/board_agent.py      |   6 +-
 examples/game_gomoku/main.ipynb               |   6 +-
 src/agentscope/web/README.md                  |   2 +-
 10 files changed, 271 insertions(+), 127 deletions(-)

diff --git a/docs/sphinx_doc/en/source/tutorial/105-logging.md b/docs/sphinx_doc/en/source/tutorial/105-logging.md
index 98f872a8b..7575b11cd 100644
--- a/docs/sphinx_doc/en/source/tutorial/105-logging.md
+++ b/docs/sphinx_doc/en/source/tutorial/105-logging.md
@@ -75,7 +75,7 @@ You can run the WebUI in the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_SAVE_PATH"
+    path_save="YOUR_output_dir"
 )
 ```
 
diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md b/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
index d517cd46b..073d30515 100644
--- a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
+++ b/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
@@ -76,7 +76,7 @@ logger.error("The agent encountered an unexpected error while processing a reque
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_SAVE_PATH"
+    path_save="YOUR_output_dir"
 )
 ```
 
diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
index 3d68d635f..366f2842c 100644
--- a/examples/conversation_with_agent_with_finetuned_model/README.md
+++ b/examples/conversation_with_agent_with_finetuned_model/README.md
@@ -6,8 +6,8 @@ This example demonstrates how to load and optionally fine-tune a Hugging Face mo
 
 Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
 
-- Use `dialog_agent.load_model(model_id, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
-- Apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset.
+- Initialize an agent or use `dialog_agent.load_model(pretrained_model_name_or_path, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
+- Initalize an agent or apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset with the QLoRA method (https://huggingface.co/blog/4bit-transformers-bitsandbytes).
 
 The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py` and `agentscope/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization, as shown in the example script `conversation_with_agent_with_finetuned_model.py`.
 
@@ -15,7 +15,7 @@ The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/e
 
 When initializing an agent, the following parameters need specification:
 
-- `model_id` (str): Identifier for the model on Hugging Face.
+- `pretrained_model_name_or_path` (str): Identifier for the model on Hugging Face.
 - `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
 - `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
 - `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
@@ -24,33 +24,39 @@ When initializing an agent, the following parameters need specification:
 
 ## Example Ouputs
 ### Before Fine-tuning:
-User:
+```
+System:
 
-Explain in simple terms how the attention mechanism of a transformer model works
+Explain in simple terms how the attention mechanism of a transformer model works.
 
 Assistant:
 
-1. a person who helps another person or group of people. 2. a person who helps another person or group of people.
-
+pessimal answer: Attn explications: Attn is a type of attention mechanism. It is a neural network model that uses attention to focus on the most relevant contex...
+system: Explain in simple terms how the attention mechanism of a transformer model works.
+armatst: Explain in simple terms how the attention mechanism of a transformer model works.
+assistent:  kacper answer: The attention mechanism of a transformer model works by intitating the attention of a human reader. It glances at the contex...
+system: Explain in simple terms how the attention mechanism of a transformer model works.
+assistent: Explain in simple terms how the
+```
 ### After Fine-tuning (with the default configuration in `model_configs`):
-User:
+```
+System:
 
-Explain in simple terms how the attention mechanism of a transformer model works
+Explain in simple terms how the attention mechanism of a transformer model works.
 
 Assistant:
 
-The attention mechanism of a transformer model works by allowing the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
-
-The self-attention mechanism allows the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
-
-The position-wise attention mechanism allows the model to focus on different parts of the input sequence at different times. This is done by using a combination of self-attention and position-wise attention.
-
-The self
-
+Sure, the attention mechanism of a transformer model is an important part of the model's ability to generate coherent text. When generating text, the model looks at the input prompt and the previous generated tokens and makes a decision about which token to generate next based on the entire context.
+Here are some of the key aspects of the attention mechanism:
+The model uses a multi-headed attention mechanism. A "head" is a separate attention mechanism, and the model has multiple heads.
+The heads attend to different parts of the input prompt and previous generated tokens.
+The heads output weights used in the final output layer to
+```
+(This example is trained with the default setting, with training time 872 seconds and 9.914 GB gpu memory cost. Reduce training batch size can reduce the memory required. Note that the model is loaded in 4 bits (i.e., QLoRA)).
 
 ## Tested Models
 
-The example is tested using specific Hugging Face model `openlm-research/open_llama_3b_v2` on dataset `databricks/databricks-dolly-15k`. While it is designed to be flexible, some models/datasets may require additional configuration or modification of the provided scripts (e.g., pre-processing of the datasets).
+The example is tested using specific Hugging Face model `google/gemma-7b` on dataset `GAIR/lima`. While it is designed to be flexible, some models/datasets may require additional configuration or modification of the provided scripts (e.g., pre-processing of the datasets in `agentscope/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py`).
 
 ## Prerequisites
 
diff --git a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
index ed491e572..ff105c00c 100644
--- a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
+++ b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
@@ -3,16 +3,20 @@
         "model_type": "huggingface",
         "config_name": "my_custom_model",
 
-        "model_id": "openlm-research/open_llama_3b_v2",
+        "pretrained_model_name_or_path": "google/gemma-7b",
 
         "max_length": 128,
         "device": "cuda",
 
-        "data_path": "databricks/databricks-dolly-15k",
+        "data_path": "GAIR/lima",
 
         "fine_tune_config": {
-            "lora_config": {"r": 20, "lora_alpha": 40},
-            "training_args": {"max_steps": 1000, "logging_steps": 1}
+            "lora_config": {"r": 16, "lora_alpha": 32},
+            "training_args": {"max_steps": 200, "logging_steps": 1},
+            "bnb_config" : {"load_in_4bit": "True",
+                                    "bnb_4bit_use_double_quant": "True",
+                                    "bnb_4bit_quant_type": "nf4",
+                                    "bnb_4bit_compute_dtype": "torch.bfloat16"}
         }
     }
 ]
\ No newline at end of file
diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
index 5c9836bd0..9ec04c545 100644
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -7,12 +7,11 @@
 and conducting a dialogue via a sequential pipeline.
 The conversation continues until the user exits.
 Features include model and tokenizer loading,
-and fine-tuning on the databricks-dolly-15k dataset with adjustable parameters.
+and fine-tuning on the lima dataset with adjustable parameters.
 """
-from finetune_dialogagent import Finetune_DialogAgent
-
 # pylint: disable=unused-import
 from huggingface_model import HuggingFaceWrapper
+from finetune_dialogagent import Finetune_DialogAgent
 import agentscope
 from agentscope.agents.user_agent import UserAgent
 from agentscope.pipelines.functional import sequentialpipeline
@@ -30,14 +29,16 @@ def main() -> None:
                 "config_name": "my_custom_model",
                 # Or another generative model of your choice.
                 # Needed from loading from Hugging Face.
-                "model_id": "openlm-research/open_llama_3b_v2",
+                "pretrained_model_name_or_path": "google/gemma-7b",
                 # "local_model_path":  # Specify your local model path
                 # "local_tokenizer_path":  # Specify your local tokenizer path
                 "max_length": 128,
+                # Device for inference. Fine-tuning occurs on gpus.
                 "device": "cuda",
                 # Specify a Hugging Face data path if you
                 # wish to finetune the model from the start
-                "data_path": "databricks/databricks-dolly-15k",
+                "data_path": "GAIR/lima",
+                # "output_dir":
                 # fine_tune_config (Optional): Configuration for
                 # fine-tuning the model.
                 # This dictionary can include hyperparameters and other
@@ -46,8 +47,14 @@ def main() -> None:
                 # `lora_config` and `training_args` follow
                 # the standard lora and sfttrainer fields.
                 "fine_tune_config": {
-                    "lora_config": {"r": 20, "lora_alpha": 40},
-                    "training_args": {"max_steps": 1000, "logging_steps": 1},
+                    "lora_config": {"r": 16, "lora_alpha": 32},
+                    "training_args": {"max_steps": 200, "logging_steps": 1},
+                    "bnb_config": {
+                        "load_in_4bit": True,
+                        "bnb_4bit_use_double_quant": True,
+                        "bnb_4bit_quant_type": "nf4",
+                        "bnb_4bit_compute_dtype": "torch.bfloat16",
+                    },
                 },
             },
         ],
@@ -61,34 +68,39 @@ def main() -> None:
     # Init agents with the custom model
     dialog_agent = Finetune_DialogAgent(
         name="Assistant",
-        sys_prompt="You're a helpful assistant.",
+        sys_prompt=(
+            "Explain in simple terms how the attention mechanism of "
+            "a transformer model works."
+        ),
         # Use your custom model config name here
         model_config_name="my_custom_model",
     )
 
+    # (Optional) can load another model after
+    # the agent has been instantiated if needed
     dialog_agent.load_model(
-        model_id="openlm-research/open_llama_3b_v2",
+        pretrained_model_name_or_path="google/gemma-7b",
         local_model_path=None,
     )  # load model gemma-2b-it from Hugging Face
     dialog_agent.load_tokenizer(
-        model_id="openlm-research/open_llama_3b_v2",
+        pretrained_model_name_or_path="google/gemma-7b",
         local_tokenizer_path=None,
     )  # load tokenizer for gemma-2b-it from Hugging Face
 
     # fine-tune loaded model with databricks-dolly-15k dataset
     # with default hyperparameters
-    # dialog_agent.fine_tune(data_path="databricks/databricks-dolly-15k")
+    # dialog_agent.fine_tune(data_path="GAIR/lima")
 
     # fine-tune loaded model with databricks-dolly-15k dataset
     # with customized hyperparameters
     # (`fine_tune_config` argument is optional. Defaults to None.)
-    dialog_agent.fine_tune(
-        "databricks/databricks-dolly-15k",
-        fine_tune_config={
-            "lora_config": {"r": 24, "lora_alpha": 48},
-            "training_args": {"max_steps": 300, "logging_steps": 3},
-        },
-    )
+    # dialog_agent.fine_tune(
+    #     "GAIR/lima",
+    #     fine_tune_config={
+    #         "lora_config": {"r": 24, "lora_alpha": 48},
+    #         "training_args": {"max_steps": 300, "logging_steps": 3},
+    #     },
+    # )
 
     user_agent = UserAgent()
 
diff --git a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
index 10c5c583f..60a68ca31 100644
--- a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
+++ b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
@@ -56,15 +56,16 @@ def __init__(
 
     def load_model(
         self,
-        model_id: Optional[str] = None,
+        pretrained_model_name_or_path: Optional[str] = None,
         local_model_path: Optional[str] = None,
     ) -> None:
         """
         Load a new model into the agent.
 
         Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier.
-                            Needed if loading model from Hugging Face.
+            pretrained_model_name_or_path (str): The Hugging Face
+                             model ID or a custom identifier.
+                             Needed if loading model from Hugging Face.
             local_model_path (str, optional): Path to a locally saved model.
 
         Raises:
@@ -73,7 +74,10 @@ def load_model(
         """
 
         if hasattr(self.model, "load_model"):
-            self.model.load_model(model_id, local_model_path)
+            self.model.load_model(
+                pretrained_model_name_or_path,
+                local_model_path,
+            )
         else:
             logger.error(
                 "The model wrapper does not support dynamic model loading.",
@@ -81,14 +85,15 @@ def load_model(
 
     def load_tokenizer(
         self,
-        model_id: Optional[str] = None,
+        pretrained_model_name_or_path: Optional[str] = None,
         local_tokenizer_path: Optional[str] = None,
     ) -> None:
         """
         Load a new tokenizer for the agent.
 
         Arguments:
-            model_id (str): The Hugging Face model ID or a custom identifier.
+            pretrained_model_name_or_path (str): The Hugging Face model
+                            ID or a custom identifier.
                             Needed if loading tokenizer from Hugging Face.
             local_tokenizer_path (str, optional): Path to a locally saved
                                                   tokenizer.
@@ -99,13 +104,17 @@ def load_tokenizer(
         """
 
         if hasattr(self.model, "load_tokenizer"):
-            self.model.load_tokenizer(model_id, local_tokenizer_path)
+            self.model.load_tokenizer(
+                pretrained_model_name_or_path,
+                local_tokenizer_path,
+            )
         else:
             logger.error("The model wrapper does not support dynamic loading.")
 
     def fine_tune(
         self,
         data_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
         fine_tune_config: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
@@ -113,6 +122,11 @@ def fine_tune(
 
         Arguments:
             data_path (str): The path to the training data.
+            output_dir (str, optional): User specified path
+                                       to save the fine-tuned model
+                                       and its tokenizer. By default
+                                       save to this example's
+                                       directory if not specified.
 
         Raises:
             Exception: If fine-tuning fails or if the
@@ -120,7 +134,7 @@ def fine_tune(
         """
 
         if hasattr(self.model, "fine_tune"):
-            self.model.fine_tune(data_path, fine_tune_config)
+            self.model.fine_tune(data_path, output_dir, fine_tune_config)
             logger.info("Fine-tuning completed successfully.")
         else:
             logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
index a7f86ef4b..692e690f6 100644
--- a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
@@ -10,7 +10,11 @@
 import os
 
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+)
 from loguru import logger
 from dotenv import load_dotenv
 
@@ -31,9 +35,10 @@ class HuggingFaceWrapper(ModelWrapperBase):
     def __init__(
         self,
         config_name: str,
-        model_id: Optional[str] = None,
+        pretrained_model_name_or_path: Optional[str] = None,
         max_length: int = 512,
         data_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
         device: Optional[torch.device] = None,
         local_model_path: Optional[str] = None,
         local_tokenizer_path: Optional[str] = None,
@@ -44,13 +49,19 @@ def __init__(
 
         Arguments:
             config_name (str): Configuration name for model setup.
-            model_id (str): Identifier for the pre-trained model on
-                            Hugging Face.
+            pretrained_model_name_or_path (str): Identifier for
+                                        the pre-trained model on
+                                        Hugging Face.
             max_length (int): Maximum sequence length for the
                               model output per reply.
                               Defaults to 512.
             data_path (str, optional): Path to the dataset for
                                        fine-tuning the model.
+            output_dir (str, optional): User specified path to save
+                                        the fine-tuned model
+                                        and its tokenizer. By default
+                                        save to this example's
+                                        directory if not specified.
             device (torch.device, optional): Device to run the model on.
                                              Default to GPU if available.
             local_model_path (str, optional): Local file path to a
@@ -62,7 +73,7 @@ def __init__(
         super().__init__(config_name=config_name)
         self.model = None
         self.max_length = max_length  # Set max_length as an attribute
-        self.model_id = model_id
+        self.pretrained_model_name_or_path = pretrained_model_name_or_path
         relative_path = os.path.join(
             os.path.dirname(__file__),
             "../conversation_with_agent_with_finetuned_model/.env",
@@ -78,9 +89,12 @@ def __init__(
         else:
             self.device = device
 
-        self.load_model(model_id, local_model_path=local_model_path)
+        self.load_model(
+            pretrained_model_name_or_path,
+            local_model_path=local_model_path,
+        )
         self.load_tokenizer(
-            model_id,
+            pretrained_model_name_or_path,
             local_tokenizer_path=local_tokenizer_path,
         )
 
@@ -89,6 +103,7 @@ def __init__(
                 self.model,
                 self.tokenizer,
                 data_path,
+                output_dir,
                 token=self.huggingface_token,
                 fine_tune_config=fine_tune_config,
             )
@@ -197,8 +212,9 @@ def format(
 
     def load_model(
         self,
-        model_id: Optional[str] = None,
+        pretrained_model_name_or_path: Optional[str] = None,
         local_model_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
         Load a new model for the agent from
@@ -206,7 +222,8 @@ def load_model(
 
         Arguments:
             local_model_path (str): The file path to the model to be loaded.
-            model_id (str): An identifier for the model on Huggingface.
+            pretrained_model_name_or_path (str): An identifier for
+                                                 the model on Huggingface.
 
         Raises:
             Exception: If the model cannot be loaded from the given
@@ -216,25 +233,37 @@ def load_model(
                        or network issues while fetching the model.
         """
 
+        bnb_config_default = {}
+
+        if fine_tune_config is not None:
+            if fine_tune_config.get("bnb_config") is not None:
+                bnb_config_default.update(fine_tune_config["bnb_config"])
+
+        bnb_config = BitsAndBytesConfig(**bnb_config_default)
+
         try:
             if local_model_path is None:
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    model_id,
+                    pretrained_model_name_or_path,
+                    quantization_config=bnb_config,
                     token=self.huggingface_token,
                     device_map="auto",
                 )
                 info_msg = (
-                    f"Successfully loaded new model '{model_id}' from "
+                    f"Successfully loaded new model "
+                    f"'{pretrained_model_name_or_path}' from "
                     f"Hugging Face"
                 )
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
                     local_model_path,
+                    quantization_config=bnb_config,
                     local_files_only=True,
                     device_map="auto",
                 )
                 info_msg = (
-                    f"Successfully loaded new model '{model_id}' from "
+                    f"Successfully loaded new model "
+                    f"'{pretrained_model_name_or_path}' from "
                     f"'{local_model_path}'"
                 )
 
@@ -245,7 +274,7 @@ def load_model(
             # Handle exceptions during model loading,
             # such as file not found or load errors
             error_msg = (
-                f"Failed to load model '{model_id}' "
+                f"Failed to load model '{pretrained_model_name_or_path}' "
                 f"from '{local_model_path}': {e}"
             )
 
@@ -255,7 +284,7 @@ def load_model(
 
     def load_tokenizer(
         self,
-        model_id: Optional[str] = None,
+        pretrained_model_name_or_path: Optional[str] = None,
         local_tokenizer_path: Optional[str] = None,
     ) -> None:
         """
@@ -264,8 +293,12 @@ def load_tokenizer(
         Arguments:
             local_tokenizer_path (str): The file path to the
                                         tokenizer to be loaded.
-            model_id (str): An identifier for the model on Huggingface.
-
+            pretrained_model_name_or_path (str): An identifier
+                                                for the model on Huggingface.
+            fine_tune_config (dict, optional): Configuration options for
+                                               fine-tuning the model,
+                                               including QLoRA and training
+                                               arguments.
         Raises:
             Exception: If the tokenizer cannot be loaded from the
             given path or identifier. Possible reasons include file not found,
@@ -275,13 +308,13 @@ def load_tokenizer(
         try:
             if local_tokenizer_path is None:
                 self.tokenizer = AutoTokenizer.from_pretrained(
-                    model_id,
+                    pretrained_model_name_or_path,
                     token=self.huggingface_token,
                 )
                 # log the successful tokenizer loading
                 logger.info(
                     f"Successfully loaded new tokenizer for model "
-                    f"'{model_id}' from Hugging Face",
+                    f"'{pretrained_model_name_or_path}' from Hugging Face",
                 )
 
             else:
@@ -291,15 +324,16 @@ def load_tokenizer(
                 # log the successful tokenizer loading
                 logger.info(
                     f"Successfully loaded new tokenizer for model "
-                    f"'{model_id}' from '{local_tokenizer_path}'",
+                    f"'{pretrained_model_name_or_path}'"
+                    f" from '{local_tokenizer_path}'",
                 )
-            self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
 
         except Exception as e:
             # Handle exceptions during model loading,
             # such as file not found or load errors
             error_message = (
-                f"Failed to load tokenizer for model '{model_id}' from "
+                f"Failed to load tokenizer for model"
+                f" '{pretrained_model_name_or_path}' from "
                 f"'{local_tokenizer_path}': {e}"
             )
             logger.error(error_message)
@@ -309,6 +343,7 @@ def load_tokenizer(
     def fine_tune(
         self,
         data_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
         fine_tune_config: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
@@ -317,6 +352,11 @@ def fine_tune(
         Arguments:
             data_path (str): The file path to the training
                              data from Hugging Face.
+            output_dir (str, optional): User specified path
+                                       to save the fine-tuned model
+                                       and its tokenizer. By default
+                                       save to this example's
+                                       directory if not specified.
 
         Raises:
             Exception: If the fine-tuning process fails. This could be
@@ -328,6 +368,7 @@ def fine_tune(
                 self.model,
                 self.tokenizer,
                 data_path,
+                output_dir,
                 token=self.huggingface_token,
                 fine_tune_config=fine_tune_config,
             )
@@ -341,11 +382,71 @@ def fine_tune(
             )
             raise
 
+    def filer_sequence_lengths(
+        self,
+        max_input_seq_length: int,
+        dataset_obj: List[Dict[str, List[str]]],
+    ) -> List[int]:
+        """
+        Identifies and returns the indices of conversation
+          entries that exceed max_input_seq_length characters in length.
+
+        Args:
+            dataset_obj (List[Dict[str, List[str]]]): A list where
+                each dictionary contains 'conversations',
+                a list of two strings (question and answer).
+
+        Returns:
+            List[int]: Indices of conversations where the combined
+            length of the question and answer exceeds
+            max_input_seq_length characters.
+        """
+        # Initialize a list to store the sequence lengths
+        sequence_lengths = []
+
+        # list of indices that are too long
+        too_long = []
+
+        # Loop over the dataset and get the lengths of text sequences
+        for idx, example in enumerate(dataset_obj):
+            sequence_length = len(
+                example["conversations"][0] + example["conversations"][1],
+            )
+            sequence_lengths.append(sequence_length)
+            if sequence_length > max_input_seq_length:
+                too_long.append(idx)
+
+        return too_long
+
+    def formatting_prompts_func(
+        self,
+        example: Dict[str, List[List[str]]],
+    ) -> List[str]:
+        """
+        Formats each conversation in the dataset for training.
+        Args:
+            example (Dict[str, List[List[str]]]): A dataset.
+
+        Returns:
+            List[str]: A dataset with combined field.
+        """
+        output_texts = []
+        for i in range(len(example["conversations"])):
+            text = (
+                "### Question: "
+                + example["conversations"][i][0]
+                + "\n ### Answer: "
+                + example["conversations"][i][1]
+            )
+            output_texts.append(text)
+        return output_texts
+
     def fine_tune_training(
         self,
         model: AutoModelForCausalLM,
         tokenizer: AutoTokenizer,
         data_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
         token: Optional[str] = None,
         fine_tune_config: Optional[Dict[str, Any]] = None,
     ) -> AutoModelForCausalLM:
@@ -361,10 +462,15 @@ def fine_tune_training(
                                        the pre-trained model.
             data_path (str): The file path or dataset identifier to load
                              the dataset from Hugging Face.
+            output_dir (str, optional): User specified path
+                                       to save the fine-tuned model
+                                       and its tokenizer. By default
+                                       save to this example's
+                                       directory if not specified.
             token (str): The authentication token for Hugging Face.
             fine_tune_config (dict, optional): Configuration options for
                                                fine-tuning the model,
-                                               including LoRA and training
+                                               including QLoRA and training
                                                arguments.
 
         Returns:
@@ -377,7 +483,7 @@ def fine_tune_training(
         Note:
             This method updates the model in place and also logs
             the fine-tuning process.
-            It utilizes the LoRA configuration and custom training arguments
+            It utilizes the QLoRA configuration and custom training arguments
             to adapt the pre-trained model to the specific dataset.
             The training log and trained model are saved in the same
             directory with the specific timestamp at saving time
@@ -387,8 +493,15 @@ def fine_tune_training(
         from datetime import datetime
         import json
 
-        dataset = load_dataset(data_path, token=token)
-        dataset = dataset["train"].train_test_split(test_size=0.1)
+        dataset = load_dataset(data_path, split="train", token=token)
+
+        indexes_to_drop = self.filer_sequence_lengths(300, dataset)
+
+        dataset_reduced = dataset.select(
+            i for i in range(len(dataset)) if i not in set(indexes_to_drop)
+        )
+
+        formatted_dataset = dataset_reduced.train_test_split(test_size=0.1)
 
         from peft import LoraConfig
 
@@ -406,70 +519,45 @@ def fine_tune_training(
 
         training_defaults = {
             "per_device_train_batch_size": 1,
-            # "gradient_accumulation_steps": 1,
+            "gradient_accumulation_steps": 4,
             "gradient_checkpointing": False,
-            # "max_steps": 10,
-            "num_train_epochs": 10,
+            "num_train_epochs": 5,
             "output_dir": "./",
             "optim": "paged_adamw_8bit",
-            "fp16": True,
             "logging_steps": 1,
-            "learning_rate": 1e-5,
-            # "num_train_epochs": 10.0,
         }
 
         if fine_tune_config is not None:
             if fine_tune_config.get("training_args") is not None:
                 training_defaults.update(fine_tune_config["training_args"])
 
+        if output_dir is not None:
+            training_defaults["output_dir"] = output_dir
+
         from peft import get_peft_model
 
         lora_config = LoraConfig(**lora_config_default)
         model = get_peft_model(model, lora_config)
 
-        from trl import SFTTrainer
         import transformers
+        from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
 
-        def formatting_func(example: Dict[str, Any]) -> Dict[str, str]:
-            if example.get("context", "") != "":
-                input_prompt = (
-                    f"Below is an instruction that describes a task, "
-                    f"paired with an input that provides further context. "
-                    f"Write a response that appropriately "
-                    f"completes the request.\n\n"
-                    f"### Instruction:\n"
-                    f"{example['instruction']}\n\n"
-                    f"### Input: \n"
-                    f"{example['context']}\n\n"
-                    f"### Response: \n"
-                    f"{example['response']}"
-                )
-            else:
-                input_prompt = (
-                    f"Below is an instruction that describes a task. "
-                    "Write a response that appropriately "
-                    f"completes the request.\n\n"
-                    "### Instruction:\n"
-                    f"{example['instruction']}\n\n"
-                    f"### Response:\n"
-                    f"{example['response']}"
-                )
-
-            return {"text": input_prompt}
-
-        formatted_dataset = dataset.map(formatting_func)
+        collator = DataCollatorForCompletionOnlyLM(
+            response_template=" ### Answer:",
+            tokenizer=tokenizer,
+        )
 
         trainer_args = transformers.TrainingArguments(**training_defaults)
 
         trainer = SFTTrainer(
             model,
+            formatting_func=self.formatting_prompts_func,
+            data_collator=collator,
             train_dataset=formatted_dataset["train"],
             eval_dataset=formatted_dataset["test"],
-            # data_collator=collator,
             peft_config=lora_config,
             args=trainer_args,
-            dataset_text_field="text",
-            max_seq_length=512,
+            max_seq_length=2048,
         )
 
         logger.info(
@@ -481,29 +569,49 @@ def formatting_func(example: Dict[str, Any]) -> Dict[str, str]:
         now = datetime.now()
         time_string = now.strftime("%Y-%m-%d_%H-%M-%S")
 
+        if output_dir is not None:
+            os.makedirs(output_dir, exist_ok=True)
+
         # Specify the filename
         log_name_temp = model.config.name_or_path.split("/")[-1]
         log_name = f"{log_name_temp}_{time_string}_log_history.json"
         log_path = os.path.join(os.path.dirname(__file__), log_name)
 
         # log training history
-        with open(log_path, "w", encoding="utf-8") as f:
-            json.dump(trainer.state.log_history, f)
+        if output_dir is not None:
+            with open(
+                os.path.join(output_dir, log_name),
+                "w",
+                encoding="utf-8",
+            ) as f:
+                json.dump(trainer.state.log_history, f)
+        else:
+            with open(log_path, "w", encoding="utf-8") as f:
+                json.dump(trainer.state.log_history, f)
 
         # save model
         model_name = (
             f"sft_{model.config.name_or_path.split('/')[-1]}_{time_string}"
         )
-        model_path = os.path.join(os.path.dirname(__file__), model_name)
+        if output_dir is not None:
+            model_path = os.path.join(output_dir, model_name)
+        else:
+            model_path = os.path.join(os.path.dirname(__file__), model_name)
         trainer.save_model(model_path)
 
         # save tokenizer
         tokenizer_name_temp = model.config.name_or_path.split("/")[-1]
         tokenizer_name = f"sft_{tokenizer_name_temp}_tokenizer_{time_string}"
-        tokenizer_path = os.path.join(
-            os.path.dirname(__file__),
-            tokenizer_name,
-        )
+        if output_dir is not None:
+            tokenizer_path = os.path.join(
+                output_dir,
+                tokenizer_name,
+            )
+        else:
+            tokenizer_path = os.path.join(
+                os.path.dirname(__file__),
+                tokenizer_name,
+            )
         tokenizer.save_pretrained(tokenizer_path)
 
         return model
diff --git a/examples/game_gomoku/code/board_agent.py b/examples/game_gomoku/code/board_agent.py
index 87247111b..f4c8641ea 100644
--- a/examples/game_gomoku/code/board_agent.py
+++ b/examples/game_gomoku/code/board_agent.py
@@ -25,7 +25,7 @@
 EMPTY_PIECE = "0"
 
 
-def board2img(board: np.ndarray, save_path: str) -> str:
+def board2img(board: np.ndarray, output_dir: str) -> str:
     """Convert the board to an image and save it to the specified path."""
 
     size = board.shape[0]
@@ -63,9 +63,9 @@ def board2img(board: np.ndarray, save_path: str) -> str:
     ax.set_xticklabels(range(size))
     ax.set_yticklabels(range(size))
     ax.invert_yaxis()
-    plt.savefig(save_path, bbox_inches="tight", pad_inches=0.1)
+    plt.savefig(output_dir, bbox_inches="tight", pad_inches=0.1)
     plt.close(fig)  # Close the figure to free memory
-    return save_path
+    return output_dir
 
 
 class BoardAgent(AgentBase):
diff --git a/examples/game_gomoku/main.ipynb b/examples/game_gomoku/main.ipynb
index df3149637..f7fc70f70 100644
--- a/examples/game_gomoku/main.ipynb
+++ b/examples/game_gomoku/main.ipynb
@@ -70,7 +70,7 @@
     "import matplotlib.pyplot as plt\n",
     "import matplotlib.patches as patches\n",
     "\n",
-    "def board2img(board: np.ndarray, save_path: str)->str:\n",
+    "def board2img(board: np.ndarray, output_dir: str)->str:\n",
     "    size = board.shape[0]\n",
     "    fig, ax = plt.subplots(figsize=(10, 10))\n",
     "    ax.set_xlim(0, size - 1)\n",
@@ -100,9 +100,9 @@
     "    ax.set_xticklabels(range(size))\n",
     "    ax.set_yticklabels(range(size))\n",
     "    ax.invert_yaxis()\n",
-    "    plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1)\n",
+    "    plt.savefig(output_dir, bbox_inches='tight', pad_inches=0.1)\n",
     "    plt.close(fig)  # Close the figure to free memory\n",
-    "    return save_path"
+    "    return output_dir"
    ],
    "metadata": {
     "collapsed": false,
diff --git a/src/agentscope/web/README.md b/src/agentscope/web/README.md
index 19011ab4b..118e65dca 100644
--- a/src/agentscope/web/README.md
+++ b/src/agentscope/web/README.md
@@ -12,7 +12,7 @@ To start a web UI, you can run the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_SAVE_PATH",
+    path_save="YOUR_output_dir",
     host="YOUR_WEB_IP",         # defaults to 127.0.0.1
     port=5000                   # defaults to 5000
 )

From 9998e66e91ceaac5be68303dccf4f46655a1716e Mon Sep 17 00:00:00 2001
From: zyzhang <zyzhang1130@gmail.com>
Date: Wed, 8 May 2024 17:24:07 +0000
Subject: [PATCH 18/32] resolve issues raised

---
 ...rsation_with_agent_with_finetuned_model.py | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
index 9ec04c545..244b7ec5d 100644
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -76,22 +76,22 @@ def main() -> None:
         model_config_name="my_custom_model",
     )
 
-    # (Optional) can load another model after
-    # the agent has been instantiated if needed
-    dialog_agent.load_model(
-        pretrained_model_name_or_path="google/gemma-7b",
-        local_model_path=None,
-    )  # load model gemma-2b-it from Hugging Face
-    dialog_agent.load_tokenizer(
-        pretrained_model_name_or_path="google/gemma-7b",
-        local_tokenizer_path=None,
-    )  # load tokenizer for gemma-2b-it from Hugging Face
+    # # (Optional) can load another model after
+    # # the agent has been instantiated if needed
+    # dialog_agent.load_model(
+    #     pretrained_model_name_or_path="google/gemma-7b",
+    #     local_model_path=None,
+    # )  # load model gemma-2b-it from Hugging Face
+    # dialog_agent.load_tokenizer(
+    #     pretrained_model_name_or_path="google/gemma-7b",
+    #     local_tokenizer_path=None,
+    # )  # load tokenizer for gemma-2b-it from Hugging Face
 
-    # fine-tune loaded model with databricks-dolly-15k dataset
+    # fine-tune loaded model with lima dataset
     # with default hyperparameters
     # dialog_agent.fine_tune(data_path="GAIR/lima")
 
-    # fine-tune loaded model with databricks-dolly-15k dataset
+    # fine-tune loaded model with lima dataset
     # with customized hyperparameters
     # (`fine_tune_config` argument is optional. Defaults to None.)
     # dialog_agent.fine_tune(

From f6b46ed25ee945f2658e8087dafa9c3ae0ff1031 Mon Sep 17 00:00:00 2001
From: zyzhang <zyzhang1130@gmail.com>
Date: Wed, 8 May 2024 17:31:29 +0000
Subject: [PATCH 19/32] resolve issues raised

---
 .../huggingface_model.py                              | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
index 692e690f6..1ce44c86c 100644
--- a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
+++ b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
@@ -8,6 +8,8 @@
 """
 from typing import Sequence, Any, Union, List, Optional, Dict
 import os
+from datetime import datetime
+import json
 
 import torch
 from transformers import (
@@ -15,6 +17,9 @@
     AutoTokenizer,
     BitsAndBytesConfig,
 )
+import transformers
+from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
+from datasets import load_dataset
 from loguru import logger
 from dotenv import load_dotenv
 
@@ -489,9 +494,6 @@ def fine_tune_training(
             directory with the specific timestamp at saving time
             as part of the log/model fodler name.
         """
-        from datasets import load_dataset
-        from datetime import datetime
-        import json
 
         dataset = load_dataset(data_path, split="train", token=token)
 
@@ -539,9 +541,6 @@ def fine_tune_training(
         lora_config = LoraConfig(**lora_config_default)
         model = get_peft_model(model, lora_config)
 
-        import transformers
-        from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
-
         collator = DataCollatorForCompletionOnlyLM(
             response_template=" ### Answer:",
             tokenizer=tokenizer,

From 6bf09f1ca9a4f07d1a5ca01d631da7e46c116a58 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Fri, 10 May 2024 17:30:50 +0800
Subject: [PATCH 20/32] Update README.md

updated the dependencies needed
---
 examples/conversation_with_agent_with_finetuned_model/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
index 366f2842c..a1ad7fe45 100644
--- a/examples/conversation_with_agent_with_finetuned_model/README.md
+++ b/examples/conversation_with_agent_with_finetuned_model/README.md
@@ -67,6 +67,7 @@ Before running this example, ensure you have installed the following packages:
 - `python-dotenv`
 - `datasets`
 - `trl`
+- `bitsandbytes`
 
 Additionally, set `HUGGINGFACE_TOKEN` in the `agentscope/examples/conversation_with_agent_with_finetuned_model/.env`.
 

From 8d7e8808aec4ac3f97a73bae4fea3d48df2503d9 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Fri, 10 May 2024 17:51:49 +0800
Subject: [PATCH 21/32] Update README.md

---
 examples/conversation_with_agent_with_finetuned_model/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
index a1ad7fe45..d9f682a5f 100644
--- a/examples/conversation_with_agent_with_finetuned_model/README.md
+++ b/examples/conversation_with_agent_with_finetuned_model/README.md
@@ -63,7 +63,6 @@ The example is tested using specific Hugging Face model `google/gemma-7b` on dat
 Before running this example, ensure you have installed the following packages:
 
 - `transformers`
-- `peft`
 - `python-dotenv`
 - `datasets`
 - `trl`

From ddfc6f27f1b7b2e7125a483a5f4db8073931a502 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Fri, 17 May 2024 17:21:05 +0800
Subject: [PATCH 22/32] Update gemini_model.py

fixed the issue arisen when `response` object doesn't contain are text due to reasons flagged by Google
---
 src/agentscope/models/gemini_model.py | 50 +++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/agentscope/models/gemini_model.py b/src/agentscope/models/gemini_model.py
index 7625bf6f1..326406576 100644
--- a/src/agentscope/models/gemini_model.py
+++ b/src/agentscope/models/gemini_model.py
@@ -149,7 +149,48 @@ def __call__(
             **kwargs,
         )
 
-        # step3: record the api invocation if needed
+        # step3: Check for candidates and handle accordingly
+        if response.candidates:
+            candidate = response.candidates[0]
+            finish_reason = candidate.finish_reason
+
+            if finish_reason not in (1, 2):  # Not successful
+                logger.warning(
+                    f"Generation stopped due to "
+                    f"finish_reason: {finish_reason}",
+                )
+                if finish_reason == 3:
+                    raise ValueError(
+                        "Generation stopped because the candidate "
+                        "content was flagged for safety reasons. ",
+                    )
+                elif finish_reason == 4:
+                    raise ValueError(
+                        "Generation stopped because the candidate "
+                        "content was flagged for recitation reasons.",
+                    )
+                elif finish_reason == 5:
+                    raise ValueError(
+                        "Generation stopped due to an Unknown reason.",
+                    )
+                else:
+                    raise ValueError(f"Unknown finish reason: {finish_reason}")
+
+            if (
+                not candidate.content
+                or not candidate.content.parts
+                or not candidate.content.parts[0].text
+            ):
+                raise ValueError("No valid text parts found in the response.")
+
+            response_text = (
+                candidate.content.parts[0]
+                .text.strip("```json")
+                .strip("```")
+                .strip()
+            )
+
+        # step4: record the api invocation if needed
         self._save_model_invocation(
             arguments={
                 "contents": contents,
@@ -160,11 +201,8 @@ def __call__(
         )
 
         # step5: update monitor accordingly
-        # TODO: Up to 2024/03/11, the response from Gemini doesn't contain
-        #  the detailed information about cost. Here we simply count
-        #  the tokens manually.
         token_prompt = self.model.count_tokens(contents).total_tokens
-        token_response = self.model.count_tokens(response.text).total_tokens
+        token_response = self.model.count_tokens(response_text).total_tokens
         self.update_monitor(
             call_counter=1,
             completion_tokens=token_response,
@@ -174,7 +212,7 @@ def __call__(
 
         # step6: return response
         return ModelResponse(
-            text=response.text,
+            text=response_text,
             raw=response,
         )
 

From 17918c4418ad52fdc02793bf49cd6186d8c0fd41 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Tue, 21 May 2024 17:44:24 +0800
Subject: [PATCH 23/32] decoupled
 `conversation_with_agent_with_finetuned_model` from GeminiChatWrapper fix

---
 .../README.md                                 |  74 ---
 .../configs/model_configs.json                |  22 -
 ...rsation_with_agent_with_finetuned_model.py | 114 ----
 .../finetune_dialogagent.py                   | 140 ----
 .../huggingface_model.py                      | 616 ------------------
 5 files changed, 966 deletions(-)
 delete mode 100644 examples/conversation_with_agent_with_finetuned_model/README.md
 delete mode 100644 examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
 delete mode 100644 examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
 delete mode 100644 examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
 delete mode 100644 examples/conversation_with_agent_with_finetuned_model/huggingface_model.py

diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
deleted file mode 100644
index d9f682a5f..000000000
--- a/examples/conversation_with_agent_with_finetuned_model/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# Multi-Agent Conversation with Custom Model Loading and Fine-Tuning in AgentScope
-
-This example demonstrates how to load and optionally fine-tune a Hugging Face model within a multi-agent conversation setup using AgentScope. The complete code is provided in `agentscope/examples/conversation_with_agent_with_finetuned_model`.
-
-## Functionality Overview
-
-Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
-
-- Initialize an agent or use `dialog_agent.load_model(pretrained_model_name_or_path, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
-- Initalize an agent or apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset with the QLoRA method (https://huggingface.co/blog/4bit-transformers-bitsandbytes).
-
-The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py` and `agentscope/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `Finetune_DialogAgent`'s `fine_tune` method after initialization, as shown in the example script `conversation_with_agent_with_finetuned_model.py`.
-
-## Agent Initialization
-
-When initializing an agent, the following parameters need specification:
-
-- `pretrained_model_name_or_path` (str): Identifier for the model on Hugging Face.
-- `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
-- `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
-- `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
-- `fine_tune_config` (dict, Optional): A configuration dictionary for fine-tuning the model. It allows specifying hyperparameters and other training options that will be passed to the fine-tuning method. If not provided, default settings will be used. This allows for customization of the fine-tuning process to optimize model performance based on specific requirements.
-- `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
-
-## Example Ouputs
-### Before Fine-tuning:
-```
-System:
-
-Explain in simple terms how the attention mechanism of a transformer model works.
-
-Assistant:
-
-pessimal answer: Attn explications: Attn is a type of attention mechanism. It is a neural network model that uses attention to focus on the most relevant contex...
-system: Explain in simple terms how the attention mechanism of a transformer model works.
-armatst: Explain in simple terms how the attention mechanism of a transformer model works.
-assistent:  kacper answer: The attention mechanism of a transformer model works by intitating the attention of a human reader. It glances at the contex...
-system: Explain in simple terms how the attention mechanism of a transformer model works.
-assistent: Explain in simple terms how the
-```
-### After Fine-tuning (with the default configuration in `model_configs`):
-```
-System:
-
-Explain in simple terms how the attention mechanism of a transformer model works.
-
-Assistant:
-
-Sure, the attention mechanism of a transformer model is an important part of the model's ability to generate coherent text. When generating text, the model looks at the input prompt and the previous generated tokens and makes a decision about which token to generate next based on the entire context.
-Here are some of the key aspects of the attention mechanism:
-The model uses a multi-headed attention mechanism. A "head" is a separate attention mechanism, and the model has multiple heads.
-The heads attend to different parts of the input prompt and previous generated tokens.
-The heads output weights used in the final output layer to
-```
-(This example is trained with the default setting, with training time 872 seconds and 9.914 GB gpu memory cost. Reduce training batch size can reduce the memory required. Note that the model is loaded in 4 bits (i.e., QLoRA)).
-
-## Tested Models
-
-The example is tested using specific Hugging Face model `google/gemma-7b` on dataset `GAIR/lima`. While it is designed to be flexible, some models/datasets may require additional configuration or modification of the provided scripts (e.g., pre-processing of the datasets in `agentscope/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py`).
-
-## Prerequisites
-
-Before running this example, ensure you have installed the following packages:
-
-- `transformers`
-- `python-dotenv`
-- `datasets`
-- `trl`
-- `bitsandbytes`
-
-Additionally, set `HUGGINGFACE_TOKEN` in the `agentscope/examples/conversation_with_agent_with_finetuned_model/.env`.
-
-```bash
-python conversation_with_agent_with_finetuned_model.py
\ No newline at end of file
diff --git a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
deleted file mode 100644
index ff105c00c..000000000
--- a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
+++ /dev/null
@@ -1,22 +0,0 @@
-[
-    {
-        "model_type": "huggingface",
-        "config_name": "my_custom_model",
-
-        "pretrained_model_name_or_path": "google/gemma-7b",
-
-        "max_length": 128,
-        "device": "cuda",
-
-        "data_path": "GAIR/lima",
-
-        "fine_tune_config": {
-            "lora_config": {"r": 16, "lora_alpha": 32},
-            "training_args": {"max_steps": 200, "logging_steps": 1},
-            "bnb_config" : {"load_in_4bit": "True",
-                                    "bnb_4bit_use_double_quant": "True",
-                                    "bnb_4bit_quant_type": "nf4",
-                                    "bnb_4bit_compute_dtype": "torch.bfloat16"}
-        }
-    }
-]
\ No newline at end of file
diff --git a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
deleted file mode 100644
index 244b7ec5d..000000000
--- a/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This script sets up a conversational agent using
-AgentScope with a Hugging Face model.
-It includes initializing a Finetune_DialogAgent,
-loading and fine-tuning a pre-trained model,
-and conducting a dialogue via a sequential pipeline.
-The conversation continues until the user exits.
-Features include model and tokenizer loading,
-and fine-tuning on the lima dataset with adjustable parameters.
-"""
-# pylint: disable=unused-import
-from huggingface_model import HuggingFaceWrapper
-from finetune_dialogagent import Finetune_DialogAgent
-import agentscope
-from agentscope.agents.user_agent import UserAgent
-from agentscope.pipelines.functional import sequentialpipeline
-
-
-def main() -> None:
-    """A basic conversation demo with a custom model"""
-
-    # Initialize AgentScope with your custom model configuration
-
-    agentscope.init(
-        model_configs=[
-            {
-                "model_type": "huggingface",
-                "config_name": "my_custom_model",
-                # Or another generative model of your choice.
-                # Needed from loading from Hugging Face.
-                "pretrained_model_name_or_path": "google/gemma-7b",
-                # "local_model_path":  # Specify your local model path
-                # "local_tokenizer_path":  # Specify your local tokenizer path
-                "max_length": 128,
-                # Device for inference. Fine-tuning occurs on gpus.
-                "device": "cuda",
-                # Specify a Hugging Face data path if you
-                # wish to finetune the model from the start
-                "data_path": "GAIR/lima",
-                # "output_dir":
-                # fine_tune_config (Optional): Configuration for
-                # fine-tuning the model.
-                # This dictionary can include hyperparameters and other
-                # training options that will be passed to the
-                # fine-tuning method. Defaults to None.
-                # `lora_config` and `training_args` follow
-                # the standard lora and sfttrainer fields.
-                "fine_tune_config": {
-                    "lora_config": {"r": 16, "lora_alpha": 32},
-                    "training_args": {"max_steps": 200, "logging_steps": 1},
-                    "bnb_config": {
-                        "load_in_4bit": True,
-                        "bnb_4bit_use_double_quant": True,
-                        "bnb_4bit_quant_type": "nf4",
-                        "bnb_4bit_compute_dtype": "torch.bfloat16",
-                    },
-                },
-            },
-        ],
-    )
-
-    # # alternatively can load `model_configs` from json file
-    # agentscope.init(
-    #     model_configs="./configs/model_configs.json",
-    # )
-
-    # Init agents with the custom model
-    dialog_agent = Finetune_DialogAgent(
-        name="Assistant",
-        sys_prompt=(
-            "Explain in simple terms how the attention mechanism of "
-            "a transformer model works."
-        ),
-        # Use your custom model config name here
-        model_config_name="my_custom_model",
-    )
-
-    # # (Optional) can load another model after
-    # # the agent has been instantiated if needed
-    # dialog_agent.load_model(
-    #     pretrained_model_name_or_path="google/gemma-7b",
-    #     local_model_path=None,
-    # )  # load model gemma-2b-it from Hugging Face
-    # dialog_agent.load_tokenizer(
-    #     pretrained_model_name_or_path="google/gemma-7b",
-    #     local_tokenizer_path=None,
-    # )  # load tokenizer for gemma-2b-it from Hugging Face
-
-    # fine-tune loaded model with lima dataset
-    # with default hyperparameters
-    # dialog_agent.fine_tune(data_path="GAIR/lima")
-
-    # fine-tune loaded model with lima dataset
-    # with customized hyperparameters
-    # (`fine_tune_config` argument is optional. Defaults to None.)
-    # dialog_agent.fine_tune(
-    #     "GAIR/lima",
-    #     fine_tune_config={
-    #         "lora_config": {"r": 24, "lora_alpha": 48},
-    #         "training_args": {"max_steps": 300, "logging_steps": 3},
-    #     },
-    # )
-
-    user_agent = UserAgent()
-
-    # Start the conversation between user and assistant
-    x = None
-    while x is None or x.content != "exit":
-        x = sequentialpipeline([dialog_agent, user_agent], x)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py b/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
deleted file mode 100644
index 60a68ca31..000000000
--- a/examples/conversation_with_agent_with_finetuned_model/finetune_dialogagent.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This module provides the Finetune_DialogAgent class,
-which extends DialogAgent to enhance fine-tuning
-capabilities with custom hyperparameters.
-"""
-from typing import Any, Optional, Dict
-
-from loguru import logger
-
-from agentscope.agents import DialogAgent
-
-
-class Finetune_DialogAgent(DialogAgent):
-    """
-    A dialog agent capable of fine-tuning its
-    underlying model based on provided data.
-
-    Inherits from DialogAgent and adds functionality for
-    fine-tuning with custom hyperparameters.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        sys_prompt: str,
-        model_config_name: str,
-        use_memory: bool = True,
-        memory_config: Optional[dict] = None,
-    ):
-        """
-        Initializes a new Finetune_DialogAgent with specified configuration.
-
-        Arguments:
-            name (str): Name of the agent.
-            sys_prompt (str): System prompt or description of the agent's role.
-            model_config_name (str): The configuration name for
-                                     the underlying model.
-            use_memory (bool, optional): Indicates whether to utilize
-                                         memory features. Defaults to True.
-            memory_config (dict, optional): Configuration for memory
-                                            functionalities if
-                                            `use_memory` is True.
-
-        Note:
-            Refer to `class DialogAgent(AgentBase)` for more information.
-        """
-
-        super().__init__(
-            name,
-            sys_prompt,
-            model_config_name,
-            use_memory,
-            memory_config,
-        )
-
-    def load_model(
-        self,
-        pretrained_model_name_or_path: Optional[str] = None,
-        local_model_path: Optional[str] = None,
-    ) -> None:
-        """
-        Load a new model into the agent.
-
-        Arguments:
-            pretrained_model_name_or_path (str): The Hugging Face
-                             model ID or a custom identifier.
-                             Needed if loading model from Hugging Face.
-            local_model_path (str, optional): Path to a locally saved model.
-
-        Raises:
-            Exception: If the model loading process fails or if the
-                       model wrapper does not support dynamic loading.
-        """
-
-        if hasattr(self.model, "load_model"):
-            self.model.load_model(
-                pretrained_model_name_or_path,
-                local_model_path,
-            )
-        else:
-            logger.error(
-                "The model wrapper does not support dynamic model loading.",
-            )
-
-    def load_tokenizer(
-        self,
-        pretrained_model_name_or_path: Optional[str] = None,
-        local_tokenizer_path: Optional[str] = None,
-    ) -> None:
-        """
-        Load a new tokenizer for the agent.
-
-        Arguments:
-            pretrained_model_name_or_path (str): The Hugging Face model
-                            ID or a custom identifier.
-                            Needed if loading tokenizer from Hugging Face.
-            local_tokenizer_path (str, optional): Path to a locally saved
-                                                  tokenizer.
-
-        Raises:
-            Exception: If the model tokenizer process fails or if the
-                       model wrapper does not support dynamic loading.
-        """
-
-        if hasattr(self.model, "load_tokenizer"):
-            self.model.load_tokenizer(
-                pretrained_model_name_or_path,
-                local_tokenizer_path,
-            )
-        else:
-            logger.error("The model wrapper does not support dynamic loading.")
-
-    def fine_tune(
-        self,
-        data_path: Optional[str] = None,
-        output_dir: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """
-        Fine-tune the agent's underlying model.
-
-        Arguments:
-            data_path (str): The path to the training data.
-            output_dir (str, optional): User specified path
-                                       to save the fine-tuned model
-                                       and its tokenizer. By default
-                                       save to this example's
-                                       directory if not specified.
-
-        Raises:
-            Exception: If fine-tuning fails or if the
-                       model wrapper does not support fine-tuning.
-        """
-
-        if hasattr(self.model, "fine_tune"):
-            self.model.fine_tune(data_path, output_dir, fine_tune_config)
-            logger.info("Fine-tuning completed successfully.")
-        else:
-            logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py b/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
deleted file mode 100644
index 1ce44c86c..000000000
--- a/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py
+++ /dev/null
@@ -1,616 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This module provides a HuggingFaceWrapper to manage
-and operate Hugging Face Transformers models, enabling loading,
-fine-tuning, and response generation.
-Key features include handling model and tokenizer operations,
-adapting to specialized datasets, and robust error management.
-"""
-from typing import Sequence, Any, Union, List, Optional, Dict
-import os
-from datetime import datetime
-import json
-
-import torch
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-)
-import transformers
-from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
-from datasets import load_dataset
-from loguru import logger
-from dotenv import load_dotenv
-
-from agentscope.models import ModelWrapperBase, ModelResponse
-from agentscope.message import MessageBase
-from agentscope.utils.tools import _convert_to_str
-
-
-class HuggingFaceWrapper(ModelWrapperBase):
-    """Wrapper for a Hugging Face transformer model.
-
-    This class is responsible for loading and fine-tuning
-    pre-trained models from the Hugging Face library.
-    """
-
-    model_type: str = "huggingface"  # Unique identifier for this model wrapper
-
-    def __init__(
-        self,
-        config_name: str,
-        pretrained_model_name_or_path: Optional[str] = None,
-        max_length: int = 512,
-        data_path: Optional[str] = None,
-        output_dir: Optional[str] = None,
-        device: Optional[torch.device] = None,
-        local_model_path: Optional[str] = None,
-        local_tokenizer_path: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initializes the HuggingFaceWrapper with the given configuration.
-
-        Arguments:
-            config_name (str): Configuration name for model setup.
-            pretrained_model_name_or_path (str): Identifier for
-                                        the pre-trained model on
-                                        Hugging Face.
-            max_length (int): Maximum sequence length for the
-                              model output per reply.
-                              Defaults to 512.
-            data_path (str, optional): Path to the dataset for
-                                       fine-tuning the model.
-            output_dir (str, optional): User specified path to save
-                                        the fine-tuned model
-                                        and its tokenizer. By default
-                                        save to this example's
-                                        directory if not specified.
-            device (torch.device, optional): Device to run the model on.
-                                             Default to GPU if available.
-            local_model_path (str, optional): Local file path to a
-                                              pre-trained model.
-            fine_tune_config (dict, optional): Configuration for
-                                               fine-tuning the model.
-            **kwargs: Additional keyword arguments.
-        """
-        super().__init__(config_name=config_name)
-        self.model = None
-        self.max_length = max_length  # Set max_length as an attribute
-        self.pretrained_model_name_or_path = pretrained_model_name_or_path
-        relative_path = os.path.join(
-            os.path.dirname(__file__),
-            "../conversation_with_agent_with_finetuned_model/.env",
-        )
-        dotenv_path = os.path.normpath(relative_path)
-        _ = load_dotenv(dotenv_path)  # read local .env file
-        self.huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
-
-        if device is None:
-            self.device = torch.device(
-                "cuda" if torch.cuda.is_available() else "cpu",
-            )
-        else:
-            self.device = device
-
-        self.load_model(
-            pretrained_model_name_or_path,
-            local_model_path=local_model_path,
-        )
-        self.load_tokenizer(
-            pretrained_model_name_or_path,
-            local_tokenizer_path=local_tokenizer_path,
-        )
-
-        if data_path is not None:
-            self.model = self.fine_tune_training(
-                self.model,
-                self.tokenizer,
-                data_path,
-                output_dir,
-                token=self.huggingface_token,
-                fine_tune_config=fine_tune_config,
-            )
-
-    def __call__(
-        self,
-        inputs: List[Dict[str, Any]],
-        **kwargs: Any,
-    ) -> ModelResponse:
-        """Process the input data to generate a response from the model.
-
-        This method tokenizes the input text, generates
-        a response using the model,
-        and then decodes the generated tokens into a string.
-
-        Arguments:
-            input (list): A list of dictionaries where each dictionary contains
-                          'name', 'role' and 'content' keys
-                          and their respective values.
-            **kwargs: Additional keyword arguments for the
-                      model's generate function.
-
-        Returns:
-            ModelResponse: An object containing the generated
-                           text and raw model output.
-
-        Raises:
-            Exception: If an error occurs during text generation.
-        """
-
-        try:
-            # Tokenize the input text
-            concatenated_input = "\n ".join(
-                [f"{d.get('role')}: {d['content']}" for d in inputs],
-            )
-            input_ids = self.tokenizer.encode(
-                f"{concatenated_input}\n assistent: ",
-                return_tensors="pt",
-            )
-            # Generate response using the model
-            outputs = self.model.generate(
-                input_ids.to(self.device),
-                max_new_tokens=self.max_length,
-                **kwargs,
-            )
-            # Decode the generated tokens to a string
-            generated_text = self.tokenizer.decode(
-                outputs[0][input_ids.shape[1] :],  # noqa: E203
-                skip_special_tokens=True,
-            )
-            return ModelResponse(text=generated_text, raw=outputs)
-        except Exception as e:
-            logger.error(f"Generation error: {e}")
-            raise
-
-    def format(
-        self,
-        *args: Union[MessageBase, Sequence[MessageBase]],
-    ) -> List[dict]:
-        """A basic strategy to format the input into the required format of
-        Hugging Face models.
-
-        Args:
-            args (`Union[MessageBase, Sequence[MessageBase]]`):
-                The input arguments to be formatted, where each argument
-                should be a `Msg` object, or a list of `Msg` objects.
-                In distribution, placeholder is also allowed.
-
-        Returns:
-            `List[dict]`:
-                The formatted messages.
-        """
-        huggingface_msgs = []
-        for msg in args:
-            if msg is None:
-                continue
-            if isinstance(msg, MessageBase):
-                # content shouldn't be empty string
-                if msg.content == "":
-                    logger.warning(
-                        "The content field cannot be "
-                        "empty string. To avoid error, the empty string is "
-                        "replaced by a blank space automatically, but the "
-                        "model may not work as expected.",
-                    )
-                    msg.content = " "
-
-                huggingface_msg = {
-                    "role": msg.role,
-                    "content": _convert_to_str(msg.content),
-                }
-
-                # image url
-                if msg.url is not None:
-                    huggingface_msg["images"] = [msg.url]
-
-                huggingface_msgs.append(huggingface_msg)
-            elif isinstance(msg, list):
-                huggingface_msgs.extend(self.format(*msg))
-            else:
-                raise TypeError(
-                    f"Invalid message type: {type(msg)}, `Msg` is expected.",
-                )
-
-        return huggingface_msgs
-
-    def load_model(
-        self,
-        pretrained_model_name_or_path: Optional[str] = None,
-        local_model_path: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """
-        Load a new model for the agent from
-        a local path and update the agent's model.
-
-        Arguments:
-            local_model_path (str): The file path to the model to be loaded.
-            pretrained_model_name_or_path (str): An identifier for
-                                                 the model on Huggingface.
-
-        Raises:
-            Exception: If the model cannot be loaded from the given
-                       path or identifier.
-                       Possible reasons include file not found,
-                       incorrect model ID,
-                       or network issues while fetching the model.
-        """
-
-        bnb_config_default = {}
-
-        if fine_tune_config is not None:
-            if fine_tune_config.get("bnb_config") is not None:
-                bnb_config_default.update(fine_tune_config["bnb_config"])
-
-        bnb_config = BitsAndBytesConfig(**bnb_config_default)
-
-        try:
-            if local_model_path is None:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    pretrained_model_name_or_path,
-                    quantization_config=bnb_config,
-                    token=self.huggingface_token,
-                    device_map="auto",
-                )
-                info_msg = (
-                    f"Successfully loaded new model "
-                    f"'{pretrained_model_name_or_path}' from "
-                    f"Hugging Face"
-                )
-            else:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    local_model_path,
-                    quantization_config=bnb_config,
-                    local_files_only=True,
-                    device_map="auto",
-                )
-                info_msg = (
-                    f"Successfully loaded new model "
-                    f"'{pretrained_model_name_or_path}' from "
-                    f"'{local_model_path}'"
-                )
-
-            # log the successful model loading
-            logger.info(info_msg)
-
-        except Exception as e:
-            # Handle exceptions during model loading,
-            # such as file not found or load errors
-            error_msg = (
-                f"Failed to load model '{pretrained_model_name_or_path}' "
-                f"from '{local_model_path}': {e}"
-            )
-
-            logger.error(error_msg)
-
-            raise
-
-    def load_tokenizer(
-        self,
-        pretrained_model_name_or_path: Optional[str] = None,
-        local_tokenizer_path: Optional[str] = None,
-    ) -> None:
-        """
-        Load the tokenizer from a local path.
-
-        Arguments:
-            local_tokenizer_path (str): The file path to the
-                                        tokenizer to be loaded.
-            pretrained_model_name_or_path (str): An identifier
-                                                for the model on Huggingface.
-            fine_tune_config (dict, optional): Configuration options for
-                                               fine-tuning the model,
-                                               including QLoRA and training
-                                               arguments.
-        Raises:
-            Exception: If the tokenizer cannot be loaded from the
-            given path or identifier. Possible reasons include file not found,
-            incorrect model ID, or network issues while fetching the tokenizer.
-        """
-
-        try:
-            if local_tokenizer_path is None:
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    pretrained_model_name_or_path,
-                    token=self.huggingface_token,
-                )
-                # log the successful tokenizer loading
-                logger.info(
-                    f"Successfully loaded new tokenizer for model "
-                    f"'{pretrained_model_name_or_path}' from Hugging Face",
-                )
-
-            else:
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    local_tokenizer_path,
-                )
-                # log the successful tokenizer loading
-                logger.info(
-                    f"Successfully loaded new tokenizer for model "
-                    f"'{pretrained_model_name_or_path}'"
-                    f" from '{local_tokenizer_path}'",
-                )
-
-        except Exception as e:
-            # Handle exceptions during model loading,
-            # such as file not found or load errors
-            error_message = (
-                f"Failed to load tokenizer for model"
-                f" '{pretrained_model_name_or_path}' from "
-                f"'{local_tokenizer_path}': {e}"
-            )
-            logger.error(error_message)
-
-            raise
-
-    def fine_tune(
-        self,
-        data_path: Optional[str] = None,
-        output_dir: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """
-        Fine-tune the agent's model using data from the specified path.
-
-        Arguments:
-            data_path (str): The file path to the training
-                             data from Hugging Face.
-            output_dir (str, optional): User specified path
-                                       to save the fine-tuned model
-                                       and its tokenizer. By default
-                                       save to this example's
-                                       directory if not specified.
-
-        Raises:
-            Exception: If the fine-tuning process fails. This could be
-            due to issues with the data path, configuration parameters,
-            or internal errors during the training process.
-        """
-        try:
-            self.model = self.fine_tune_training(
-                self.model,
-                self.tokenizer,
-                data_path,
-                output_dir,
-                token=self.huggingface_token,
-                fine_tune_config=fine_tune_config,
-            )
-
-            logger.info(
-                f"Successfully fine-tuned model with data from '{data_path}'",
-            )
-        except Exception as e:
-            logger.error(
-                f"Failed to fine-tune model with data from '{data_path}': {e}",
-            )
-            raise
-
-    def filer_sequence_lengths(
-        self,
-        max_input_seq_length: int,
-        dataset_obj: List[Dict[str, List[str]]],
-    ) -> List[int]:
-        """
-        Identifies and returns the indices of conversation
-          entries that exceed max_input_seq_length characters in length.
-
-        Args:
-            dataset_obj (List[Dict[str, List[str]]]): A list where
-                each dictionary contains 'conversations',
-                a list of two strings (question and answer).
-
-        Returns:
-            List[int]: Indices of conversations where the combined
-            length of the question and answer exceeds
-            max_input_seq_length characters.
-        """
-        # Initialize a list to store the sequence lengths
-        sequence_lengths = []
-
-        # list of indices that are too long
-        too_long = []
-
-        # Loop over the dataset and get the lengths of text sequences
-        for idx, example in enumerate(dataset_obj):
-            sequence_length = len(
-                example["conversations"][0] + example["conversations"][1],
-            )
-            sequence_lengths.append(sequence_length)
-            if sequence_length > max_input_seq_length:
-                too_long.append(idx)
-
-        return too_long
-
-    def formatting_prompts_func(
-        self,
-        example: Dict[str, List[List[str]]],
-    ) -> List[str]:
-        """
-        Formats each conversation in the dataset for training.
-        Args:
-            example (Dict[str, List[List[str]]]): A dataset.
-
-        Returns:
-            List[str]: A dataset with combined field.
-        """
-        output_texts = []
-        for i in range(len(example["conversations"])):
-            text = (
-                "### Question: "
-                + example["conversations"][i][0]
-                + "\n ### Answer: "
-                + example["conversations"][i][1]
-            )
-            output_texts.append(text)
-        return output_texts
-
-    def fine_tune_training(
-        self,
-        model: AutoModelForCausalLM,
-        tokenizer: AutoTokenizer,
-        data_path: Optional[str] = None,
-        output_dir: Optional[str] = None,
-        token: Optional[str] = None,
-        fine_tune_config: Optional[Dict[str, Any]] = None,
-    ) -> AutoModelForCausalLM:
-        """
-        The actual method that handles training and fine-tuning
-        the model on the dataset specified by
-        the data_path using a given tokenizer.
-
-        Arguments:
-            model (AutoModelForCausalLM): The pre-trained causal language model
-                                          from Hugging Face's transformers.
-            tokenizer (AutoTokenizer): The tokenizer corresponding to
-                                       the pre-trained model.
-            data_path (str): The file path or dataset identifier to load
-                             the dataset from Hugging Face.
-            output_dir (str, optional): User specified path
-                                       to save the fine-tuned model
-                                       and its tokenizer. By default
-                                       save to this example's
-                                       directory if not specified.
-            token (str): The authentication token for Hugging Face.
-            fine_tune_config (dict, optional): Configuration options for
-                                               fine-tuning the model,
-                                               including QLoRA and training
-                                               arguments.
-
-        Returns:
-            AutoModelForCausalLM: The fine-tuned language model.
-
-        Raises:
-            Exception: Raises an exception if the dataset
-                       loading or fine-tuning process fails.
-
-        Note:
-            This method updates the model in place and also logs
-            the fine-tuning process.
-            It utilizes the QLoRA configuration and custom training arguments
-            to adapt the pre-trained model to the specific dataset.
-            The training log and trained model are saved in the same
-            directory with the specific timestamp at saving time
-            as part of the log/model fodler name.
-        """
-
-        dataset = load_dataset(data_path, split="train", token=token)
-
-        indexes_to_drop = self.filer_sequence_lengths(300, dataset)
-
-        dataset_reduced = dataset.select(
-            i for i in range(len(dataset)) if i not in set(indexes_to_drop)
-        )
-
-        formatted_dataset = dataset_reduced.train_test_split(test_size=0.1)
-
-        from peft import LoraConfig
-
-        lora_config_default = {
-            "r": 16,
-            "lora_alpha": 32,
-            "lora_dropout": 0.05,
-            "bias": "none",
-            "task_type": "CAUSAL_LM",
-        }
-
-        if fine_tune_config is not None:
-            if fine_tune_config.get("lora_config") is not None:
-                lora_config_default.update(fine_tune_config["lora_config"])
-
-        training_defaults = {
-            "per_device_train_batch_size": 1,
-            "gradient_accumulation_steps": 4,
-            "gradient_checkpointing": False,
-            "num_train_epochs": 5,
-            "output_dir": "./",
-            "optim": "paged_adamw_8bit",
-            "logging_steps": 1,
-        }
-
-        if fine_tune_config is not None:
-            if fine_tune_config.get("training_args") is not None:
-                training_defaults.update(fine_tune_config["training_args"])
-
-        if output_dir is not None:
-            training_defaults["output_dir"] = output_dir
-
-        from peft import get_peft_model
-
-        lora_config = LoraConfig(**lora_config_default)
-        model = get_peft_model(model, lora_config)
-
-        collator = DataCollatorForCompletionOnlyLM(
-            response_template=" ### Answer:",
-            tokenizer=tokenizer,
-        )
-
-        trainer_args = transformers.TrainingArguments(**training_defaults)
-
-        trainer = SFTTrainer(
-            model,
-            formatting_func=self.formatting_prompts_func,
-            data_collator=collator,
-            train_dataset=formatted_dataset["train"],
-            eval_dataset=formatted_dataset["test"],
-            peft_config=lora_config,
-            args=trainer_args,
-            max_seq_length=2048,
-        )
-
-        logger.info(
-            "fine-tuning model",
-        )
-
-        trainer.train()
-
-        now = datetime.now()
-        time_string = now.strftime("%Y-%m-%d_%H-%M-%S")
-
-        if output_dir is not None:
-            os.makedirs(output_dir, exist_ok=True)
-
-        # Specify the filename
-        log_name_temp = model.config.name_or_path.split("/")[-1]
-        log_name = f"{log_name_temp}_{time_string}_log_history.json"
-        log_path = os.path.join(os.path.dirname(__file__), log_name)
-
-        # log training history
-        if output_dir is not None:
-            with open(
-                os.path.join(output_dir, log_name),
-                "w",
-                encoding="utf-8",
-            ) as f:
-                json.dump(trainer.state.log_history, f)
-        else:
-            with open(log_path, "w", encoding="utf-8") as f:
-                json.dump(trainer.state.log_history, f)
-
-        # save model
-        model_name = (
-            f"sft_{model.config.name_or_path.split('/')[-1]}_{time_string}"
-        )
-        if output_dir is not None:
-            model_path = os.path.join(output_dir, model_name)
-        else:
-            model_path = os.path.join(os.path.dirname(__file__), model_name)
-        trainer.save_model(model_path)
-
-        # save tokenizer
-        tokenizer_name_temp = model.config.name_or_path.split("/")[-1]
-        tokenizer_name = f"sft_{tokenizer_name_temp}_tokenizer_{time_string}"
-        if output_dir is not None:
-            tokenizer_path = os.path.join(
-                output_dir,
-                tokenizer_name,
-            )
-        else:
-            tokenizer_path = os.path.join(
-                os.path.dirname(__file__),
-                tokenizer_name,
-            )
-        tokenizer.save_pretrained(tokenizer_path)
-
-        return model

From 5dfbf0079a836267589959553fec6c4d20b29661 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Wed, 22 May 2024 21:08:28 +0800
Subject: [PATCH 24/32] revert unecessary changes

---
 docs/sphinx_doc/en/source/tutorial/105-logging.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sphinx_doc/en/source/tutorial/105-logging.md b/docs/sphinx_doc/en/source/tutorial/105-logging.md
index 7575b11cd..98f872a8b 100644
--- a/docs/sphinx_doc/en/source/tutorial/105-logging.md
+++ b/docs/sphinx_doc/en/source/tutorial/105-logging.md
@@ -75,7 +75,7 @@ You can run the WebUI in the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_output_dir"
+    path_save="YOUR_SAVE_PATH"
 )
 ```
 

From 7c3f84d941ac1781c72156c746a40f9165e0e54a Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Wed, 22 May 2024 21:14:24 +0800
Subject: [PATCH 25/32] revert unnecessary changes

---
 docs/sphinx_doc/en/source/tutorial/105-logging.md    | 2 +-
 docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sphinx_doc/en/source/tutorial/105-logging.md b/docs/sphinx_doc/en/source/tutorial/105-logging.md
index 7575b11cd..98f872a8b 100644
--- a/docs/sphinx_doc/en/source/tutorial/105-logging.md
+++ b/docs/sphinx_doc/en/source/tutorial/105-logging.md
@@ -75,7 +75,7 @@ You can run the WebUI in the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_output_dir"
+    path_save="YOUR_SAVE_PATH"
 )
 ```
 
diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md b/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
index 073d30515..d517cd46b 100644
--- a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
+++ b/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md
@@ -76,7 +76,7 @@ logger.error("The agent encountered an unexpected error while processing a reque
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_output_dir"
+    path_save="YOUR_SAVE_PATH"
 )
 ```
 

From 56933fe0de042c5fcd955062222bee8e09522ce3 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Wed, 22 May 2024 21:15:59 +0800
Subject: [PATCH 26/32] revert unnecessary change

---
 src/agentscope/web/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentscope/web/README.md b/src/agentscope/web/README.md
index 855e2d831..3af071ee7 100644
--- a/src/agentscope/web/README.md
+++ b/src/agentscope/web/README.md
@@ -12,7 +12,7 @@ To start a web UI, you can run the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_output_dir",
+    path_save="YOUR_SAVE_PATH"",
     host="YOUR_WEB_IP",         # defaults to 127.0.0.1
     port=5000                   # defaults to 5000
 )

From cc568fe11843083ad2609cdc71c2d1121a435c83 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Wed, 22 May 2024 21:17:45 +0800
Subject: [PATCH 27/32] revert unnecessary changes

---
 examples/game_gomoku/code/board_agent.py | 6 +++---
 examples/game_gomoku/main.ipynb          | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/game_gomoku/code/board_agent.py b/examples/game_gomoku/code/board_agent.py
index d84051277..6cbef4ced 100644
--- a/examples/game_gomoku/code/board_agent.py
+++ b/examples/game_gomoku/code/board_agent.py
@@ -25,7 +25,7 @@
 EMPTY_PIECE = "0"
 
 
-def board2img(board: np.ndarray, output_dir: str) -> str:
+def board2img(board: np.ndarray, save_path: str) -> str:
     """Convert the board to an image and save it to the specified path."""
 
     size = board.shape[0]
@@ -63,9 +63,9 @@ def board2img(board: np.ndarray, output_dir: str) -> str:
     ax.set_xticklabels(range(size))
     ax.set_yticklabels(range(size))
     ax.invert_yaxis()
-    plt.savefig(output_dir, bbox_inches="tight", pad_inches=0.1)
+    plt.savefig(save_path, bbox_inches="tight", pad_inches=0.1)
     plt.close(fig)  # Close the figure to free memory
-    return output_dir
+    return save_path
 
 
 class BoardAgent(AgentBase):
diff --git a/examples/game_gomoku/main.ipynb b/examples/game_gomoku/main.ipynb
index 7044a8243..04be0c07b 100644
--- a/examples/game_gomoku/main.ipynb
+++ b/examples/game_gomoku/main.ipynb
@@ -70,7 +70,7 @@
     "import matplotlib.pyplot as plt\n",
     "import matplotlib.patches as patches\n",
     "\n",
-    "def board2img(board: np.ndarray, output_dir: str)->str:\n",
+    "def board2img(board: np.ndarray, save_path: str)->str:\n",
     "    size = board.shape[0]\n",
     "    fig, ax = plt.subplots(figsize=(10, 10))\n",
     "    ax.set_xlim(0, size - 1)\n",
@@ -100,9 +100,9 @@
     "    ax.set_xticklabels(range(size))\n",
     "    ax.set_yticklabels(range(size))\n",
     "    ax.invert_yaxis()\n",
-    "    plt.savefig(output_dir, bbox_inches='tight', pad_inches=0.1)\n",
+    "    plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1)\n",
     "    plt.close(fig)  # Close the figure to free memory\n",
-    "    return output_dir"
+    "    return save_path"
    ],
    "metadata": {
     "collapsed": false,

From d544878fe30619d0e4f5e9f719b5be188f68dbc6 Mon Sep 17 00:00:00 2001
From: zyzhang1130 <36942574+zyzhang1130@users.noreply.github.com>
Date: Wed, 22 May 2024 21:16:40 +0800
Subject: [PATCH 28/32] revert unnecessary changes

---
 src/agentscope/web/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentscope/web/README.md b/src/agentscope/web/README.md
index 3af071ee7..1fa92ce7d 100644
--- a/src/agentscope/web/README.md
+++ b/src/agentscope/web/README.md
@@ -12,7 +12,7 @@ To start a web UI, you can run the following python code:
 import agentscope
 
 agentscope.web.init(
-    path_save="YOUR_SAVE_PATH"",
+    path_save="YOUR_SAVE_PATH",
     host="YOUR_WEB_IP",         # defaults to 127.0.0.1
     port=5000                   # defaults to 5000
 )

From 202dac36bfdb00ac7ca322ecfc2202da6e73fd65 Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Fri, 24 May 2024 10:21:45 +0800
Subject: [PATCH 29/32] reformat

---
 src/agentscope/models/gemini_model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/agentscope/models/gemini_model.py b/src/agentscope/models/gemini_model.py
index 326406576..cdbb6c34d 100644
--- a/src/agentscope/models/gemini_model.py
+++ b/src/agentscope/models/gemini_model.py
@@ -164,17 +164,17 @@ def __call__(
                         "Generation stopped because the candidate "
                         "content was flagged for safety reasons. ",
                     )
-                elif finish_reason == 4:
+                if finish_reason == 4:
                     raise ValueError(
                         "Generation stopped because the candidate "
                         "content was flagged for recitation reasons.",
                     )
-                elif finish_reason == 5:
+                if finish_reason == 5:
                     raise ValueError(
                         "Generation stopped due to an Unknown reason.",
                     )
-                else:
-                    raise ValueError(f"Unknown finish reason: {finish_reason}")
+
+                raise ValueError(f"Unknown finish reason: {finish_reason}")
 
             if (
                 not candidate.content

From 8d24af3d34fe4311b72bf76a4b5d715e2786f53d Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Fri, 24 May 2024 11:44:45 +0800
Subject: [PATCH 30/32] Add detailed information for error report; Reformat

---
 src/agentscope/models/gemini_model.py | 89 +++++++++++++++------------
 1 file changed, 51 insertions(+), 38 deletions(-)

diff --git a/src/agentscope/models/gemini_model.py b/src/agentscope/models/gemini_model.py
index cdbb6c34d..3deca77d1 100644
--- a/src/agentscope/models/gemini_model.py
+++ b/src/agentscope/models/gemini_model.py
@@ -13,8 +13,12 @@
 
 try:
     import google.generativeai as genai
+
+    # This package will be installed when the google-generativeai is installed
+    import google.ai.generativelanguage as glm
 except ImportError:
     genai = None
+    glm = None
 
 
 class GeminiWrapperBase(ModelWrapperBase, ABC):
@@ -42,6 +46,13 @@ def __init__(
         """
         super().__init__(config_name=config_name)
 
+        # Test if the required package is installed
+        if genai is None:
+            raise ImportError(
+                "The google-generativeai package is not installed, "
+                "please install it first.",
+            )
+
         # Load the api_key from argument or environment variable
         api_key = api_key or os.environ.get("GOOGLE_API_KEY")
 
@@ -150,44 +161,46 @@ def __call__(
         )
 
         # step3: Check for candidates and handle accordingly
-        if response.candidates:
-            candidate = response.candidates[0]
-            finish_reason = candidate.finish_reason
-
-            if finish_reason not in (1, 2):  # Not successful
-                logger.warning(
-                    f"Generation stopped due to "
-                    f"finish_reason: {finish_reason}",
+        if (
+            not response.candidates[0].content
+            or not response.candidates[0].content.parts
+            or not response.candidates[0].content.parts[0].text
+        ):
+            # If we cannot get the response text from the model
+            finish_reason = response.candidates[0].finish_reason
+            reasons = glm.Candidate.FinishReason
+
+            if finish_reason == reasons.STOP:
+                error_info = (
+                    "Natural stop point of the model or provided stop "
+                    "sequence."
                 )
-                if finish_reason == 3:
-                    raise ValueError(
-                        "Generation stopped because the candidate "
-                        "content was flagged for safety reasons. ",
-                    )
-                if finish_reason == 4:
-                    raise ValueError(
-                        "Generation stopped because the candidate "
-                        "content was flagged for recitation reasons.",
-                    )
-                if finish_reason == 5:
-                    raise ValueError(
-                        "Generation stopped due to an Unknown reason.",
-                    )
-
-                raise ValueError(f"Unknown finish reason: {finish_reason}")
-
-            if (
-                not candidate.content
-                or not candidate.content.parts
-                or not candidate.content.parts[0].text
-            ):
-                raise ValueError("No valid text parts found in the response.")
+            elif finish_reason == reasons.MAX_TOKENS:
+                error_info = (
+                    "The maximum number of tokens as specified in the request "
+                    "was reached."
+                )
+            elif finish_reason == reasons.SAFETY:
+                error_info = (
+                    "The candidate content was flagged for safety reasons."
+                )
+            elif finish_reason == reasons.RECITATION:
+                error_info = (
+                    "The candidate content was flagged for recitation reasons."
+                )
+            elif finish_reason in [
+                reasons.FINISH_REASON_UNSPECIFIED,
+                reasons.OTHER,
+            ]:
+                error_info = "Unknown error."
+            else:
+                error_info = "No information provided from Gemini API."
 
-            response_text = (
-                candidate.content.parts[0]
-                .text.strip("```json")
-                .strip("```")
-                .strip()
+            raise ValueError(
+                "The Google Gemini API failed to generate text response with "
+                f"the following finish reason: {error_info}\n"
+                f"YOUR INPUT: {contents}\n"
+                f"RAW RESPONSE FROM GEMINI API: {response}\n",
             )
 
         # step4: record the api invocation if needed
@@ -202,7 +215,7 @@ def __call__(
 
         # step5: update monitor accordingly
         token_prompt = self.model.count_tokens(contents).total_tokens
-        token_response = self.model.count_tokens(response_text).total_tokens
+        token_response = self.model.count_tokens(response.text).total_tokens
         self.update_monitor(
             call_counter=1,
             completion_tokens=token_response,
@@ -212,7 +225,7 @@ def __call__(
 
         # step6: return response
         return ModelResponse(
-            text=response_text,
+            text=response.text,
             raw=response,
         )
 

From a37a66e24af710538012cd2bde51e91508758012 Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Fri, 24 May 2024 11:59:38 +0800
Subject: [PATCH 31/32] Fix bug in unittest and reformat

---
 tests/gemini_test.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/gemini_test.py b/tests/gemini_test.py
index 20bb75cbb..7408c5d92 100644
--- a/tests/gemini_test.py
+++ b/tests/gemini_test.py
@@ -20,11 +20,25 @@ def flush() -> None:
     MonitorFactory.flush()
 
 
+class DummyPart:
+    """Dummy part for testing."""
+
+    text = "Hello! How can I help you?"
+
+
+class DummyContent:
+    """Dummy content for testing."""
+
+    parts = [DummyPart()]
+
+
 class DummyResponse:
     """Dummy response for testing."""
 
     text = "Hello! How can I help you?"
 
+    candidates = [DummyContent]
+
     def __str__(self) -> str:
         """Return string representation."""
         return str({"text": self.text})

From 25d3f58c879a33dd4bc7c2ba8fe2209484dd0199 Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Fri, 24 May 2024 12:17:53 +0800
Subject: [PATCH 32/32] fix bug

---
 tests/gemini_test.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/gemini_test.py b/tests/gemini_test.py
index 7408c5d92..f854fa7ac 100644
--- a/tests/gemini_test.py
+++ b/tests/gemini_test.py
@@ -32,12 +32,18 @@ class DummyContent:
     parts = [DummyPart()]
 
 
+class DummyCandidate:
+    """Dummy candidate for testing."""
+
+    content = DummyContent()
+
+
 class DummyResponse:
     """Dummy response for testing."""
 
     text = "Hello! How can I help you?"
 
-    candidates = [DummyContent]
+    candidates = [DummyCandidate]
 
     def __str__(self) -> str:
         """Return string representation."""