Merge pull request modelscope#7 from ZiTao-Li/zitao/dev_copilot

Reformat code
FredericW · Apr 28, 2024 · 32adc04 · 32adc04
2 parents df7a79e + 82eff0c
commit 32adc04
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 105 deletions.
diff --git a/examples/conversation_with_RAG_agents/configs/agent_config.json b/examples/conversation_with_RAG_agents/configs/agent_config.json
@@ -30,7 +30,7 @@
             "similarity_top_k": 5,
             "log_retrieval": false,
             "recent_n_mem": 1,
-            "persist_dir": "../../rag_storage/tutorial_assist"
+            "persist_dir": "./rag_storage/tutorial_assist"
       }
     }
   },
@@ -79,8 +79,8 @@
             "similarity_top_k": 5,
             "log_retrieval": false,
             "recent_n_mem": 1,
-            "persist_dir": "../../rag_storage/code_assist"
-          }
+            "persist_dir": "./rag_storage/code_assist"
+      }
     }
   },
   {
@@ -115,7 +115,8 @@
           "log_retrieval": true,
           "recent_n_mem": 1,
           "persist_dir": "../../rag_storage/api_assist",
-          "repo_base": "../../"
+          "repo_base": "../../",
+          "file_dir": "../../docs/docstring_html/"
           }
     }
   },
@@ -189,7 +190,7 @@
             "similarity_top_k": 5,
             "log_retrieval": false,
             "recent_n_mem": 1,
-            "persist_dir": "../../rag_storage/searching_assist"
+            "persist_dir": "./rag_storage/searching_assist"
           }
     }
   }

diff --git a/examples/conversation_with_RAG_agents/rag_example.py b/examples/conversation_with_RAG_agents/rag_example.py
@@ -6,12 +6,29 @@
 import json
 import os
 
-from rag_agents import LlamaIndexAgent
 from groupchat_utils import filter_agents
 
 import agentscope
-from agentscope.agents import UserAgent
-from agentscope.agents import DialogAgent
+from agentscope.agents import UserAgent, DialogAgent, LlamaIndexAgent
+
+
+AGENT_CHOICE_PROMPT = """
+There are following available agents. You need to choose the most appropriate
+agent(s) to answer the user's question.
+
+agent descriptions:{}
+
+First, rephrase the user's question, which must contain the key information.
+The you need to think step by step. If you believe some of the agents are
+good candidates to answer the question (e.g., AGENT_1 and AGENT_2), then
+you need to follow the following format to generate your output:
+
+'
+Because $YOUR_REASONING.
+I believe @AGENT_1 and @AGENT_2 are the most appropriate agents to answer
+your question.
+'
+"""
 
 
 def prepare_docstring_html(repo_path: str, html_dir: str) -> None:
@@ -49,20 +66,14 @@ def main() -> None:
     tutorial_agent = LlamaIndexAgent(**agent_configs[0]["args"])
     code_explain_agent = LlamaIndexAgent(**agent_configs[1]["args"])
 
-    # NOTE: before defining api-assist, we need to prepare the docstring html
-    # first
+    # prepare html for api agent
     prepare_docstring_html(
-        "../../",
-        "../../docs/docstring_html/",
+        agent_configs[2]["args"]["rag_config"]["repo_base"],
+        agent_configs[2]["args"]["rag_config"]["file_dir"],
     )
     # define an API agent
     api_agent = LlamaIndexAgent(**agent_configs[2]["args"])
 
-    # define a guide agent
-    agent_configs[3]["args"].pop("description")
-    guide_agent = DialogAgent(**agent_configs[3]["args"])
-
-    # define a searching agent
     searching_agent = LlamaIndexAgent(**agent_configs[4]["args"])
 
     rag_agents = [
@@ -73,6 +84,24 @@ def main() -> None:
     ]
     rag_agent_names = [agent.name for agent in rag_agents]
 
+    # define a guide agent
+    rag_agent_descriptions = [
+        "agent name: "
+        + agent.name
+        + "\n agent description："
+        + agent.description
+        + "\n"
+        for agent in rag_agents
+    ]
+    agent_configs[3]["args"].pop("description")
+    agent_configs[3]["args"]["sys_prompt"] = agent_configs[3]["args"][
+        "sys_prompt"
+    ] + AGENT_CHOICE_PROMPT.format(
+        "".join(rag_agent_descriptions),
+    )
+
+    guide_agent = DialogAgent(**agent_configs[3]["args"])
+
     user_agent = UserAgent()
     while True:
         # The workflow is the following:

diff --git a/src/agentscope/agents/__init__.py b/src/agentscope/agents/__init__.py
@@ -8,6 +8,7 @@
 from .text_to_image_agent import TextToImageAgent
 from .rpc_agent import RpcAgent, RpcAgentServerLauncher
 from .react_agent import ReActAgent
+from .rag_agents import RAGAgentBase, LlamaIndexAgent
 
 
 __all__ = [
@@ -20,4 +21,6 @@
     "ReActAgent",
     "RpcAgent",
     "RpcAgentServerLauncher",
+    "RAGAgentBase",
+    "LlamaIndexAgent",
 ]
diff --git a/...onversation_with_RAG_agents/rag_agents.py → src/agentscope/agents/rag_agents.py b/...onversation_with_RAG_agents/rag_agents.py → src/agentscope/agents/rag_agents.py
@@ -8,10 +8,9 @@
 
 from abc import ABC, abstractmethod
 from typing import Optional, Any
-import importlib
 from loguru import logger
 
-from rag import RAGBase, LlamaIndexRAG
+from agentscope.rag import RAGBase, LlamaIndexRAG
 
 from agentscope.agents.agent import AgentBase
 from agentscope.message import Msg
@@ -255,13 +254,13 @@ def init_rag(self) -> LlamaIndexRAG:
         # NOTE: as each selected file type may need to use a different loader
         # and transformations, the length of the list depends on
         # the total count of loaded data.
-        for index_config_i in range(len(index_config)):
-            docs = rag.load_docs(index_config = index_config[index_config_i])
+        for index_config_i, _ in enumerate(index_config):
+            docs = rag.load_docs(index_config=index_config[index_config_i])
             docs_list.append(docs)
 
             # store and indexing for each file type
             if "store_and_index" in index_config[index_config_i]:
-                store_and_index_args = self._prepare_args_from_config(
+                store_and_index_args = rag.prepare_args_from_config(
                     index_config[index_config_i]["store_and_index"],
                 )
             else:
@@ -272,8 +271,10 @@ def init_rag(self) -> LlamaIndexRAG:
         logger.info(f"store_and_index_args args: {store_and_index_args_list}")
 
         # pass the loaded documents and arguments to store_and_index
-        rag.store_and_index(docs_list=docs_list,
-                            store_and_index_args_list=store_and_index_args_list)
+        rag.store_and_index(
+            docs_list=docs_list,
+            store_and_index_args_list=store_and_index_args_list,
+        )
         return rag
 
     def reply(

diff --git a/...versation_with_RAG_agents/rag/__init__.py → src/agentscope/rag/__init__.py b/...versation_with_RAG_agents/rag/__init__.py → src/agentscope/rag/__init__.py
@@ -4,7 +4,6 @@
 
 from .llama_index_rag import LlamaIndexRAG
 
-
 try:
     from .langchain_rag import LangChainRAG
 except Exception:

diff --git a/...tion_with_RAG_agents/rag/langchain_rag.py → src/agentscope/rag/langchain_rag.py b/...tion_with_RAG_agents/rag/langchain_rag.py → src/agentscope/rag/langchain_rag.py
diff --git a/...on_with_RAG_agents/rag/llama_index_rag.py → src/agentscope/rag/llama_index_rag.py b/...on_with_RAG_agents/rag/llama_index_rag.py → src/agentscope/rag/llama_index_rag.py
@@ -3,21 +3,15 @@
 This module is an integration of the Llama index RAG
 into AgentScope package
 """
-
+import os.path
 from typing import Any, Optional, List, Union
 from loguru import logger
-import importlib
-import os.path
 
 try:
     from llama_index.core.readers.base import BaseReader
     from llama_index.core.base.base_retriever import BaseRetriever
     from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
     from llama_index.core.ingestion import IngestionPipeline
-    from llama_index.core.vector_stores.types import (
-        BasePydanticVectorStore,
-        VectorStore,
-    )
     from llama_index.core.bridge.pydantic import PrivateAttr
     from llama_index.core.node_parser.interface import NodeParser
     from llama_index.core.node_parser import SentenceSplitter
@@ -35,8 +29,8 @@
     load_index_from_storage = None
     PrivateAttr = None
 
-from rag import RAGBase
-from rag.rag import (
+from agentscope.rag import RAGBase
+from agentscope.rag.rag import (
     DEFAULT_CHUNK_SIZE,
     DEFAULT_CHUNK_OVERLAP,
     DEFAULT_TOP_K,
@@ -206,9 +200,8 @@ def load_data(
     def store_and_index(
         self,
         docs_list: Any,
-        retriever: Optional[BaseRetriever] = None,
-        transformations: Optional[list[NodeParser]] = None,
-        store_and_index_args_list: Optional[list] = None,
+        retriever: Any = None,
+        store_and_index_args_list: list[dict] = None,
         **kwargs: Any,
     ) -> Any:
         """
@@ -243,11 +236,13 @@ def store_and_index(
             # nodes, or called chunks, is a presentation of the documents
             nodes = []
             # we build nodes by using the IngestionPipeline for each document
-            for i in range(len(docs_list)):
+            for i, doc in enumerate(docs_list):
                 nodes = nodes + self.docs_to_nodes(
-                    docs=docs_list[i],
+                    docs=doc,
                     transformations=store_and_index_args_list[i].get(
-                        "transformations", None)
+                        "transformations",
+                        None,
+                    ),
                 )
 
             # feed all the nodes to embedding model to calculate index
@@ -261,7 +256,7 @@ def store_and_index(
             # load the storage_context
             storage_context = StorageContext.from_defaults(
                 persist_dir=self.persist_dir,
-                )
+            )
             # construct index from
             self.index = load_index_from_storage(
                 storage_context=storage_context,
@@ -285,7 +280,7 @@ def store_and_index(
             self.retriever = retriever
         return self.index
 
-    def persist_to_dir(self):
+    def persist_to_dir(self) -> None:
         """
         Persist the index to the directory.
         """
@@ -302,7 +297,7 @@ def load_docs(self, index_config: dict) -> Any:
         """
 
         if "load_data" in index_config:
-            load_data_args = self._prepare_args_from_config(
+            load_data_args = self.prepare_args_from_config(
                 index_config["load_data"],
             )
         else:
@@ -315,16 +310,17 @@ def load_docs(self, index_config: dict) -> Any:
                 ) from exc_inner
             load_data_args = {
                 "loader": SimpleDirectoryReader(
-                    index_config["set_default_data_path"]),
+                    index_config["set_default_data_path"],
+                ),
             }
         logger.info(f"rag.load_data args: {load_data_args}")
         docs = self.load_data(**load_data_args)
         return docs
 
     def docs_to_nodes(
-            self,
-            docs: Any,
-            transformations: Optional[list[NodeParser]] = None
+        self,
+        docs: Any,
+        transformations: Optional[list[NodeParser]] = None,
     ) -> Any:
         """
         Convert the documents to nodes.
@@ -397,59 +393,3 @@ def retrieve(self, query: str, to_list_strs: bool = False) -> list[Any]:
                 results.append(node.get_text())
             return results
         return retrieved
-
-    def _prepare_args_from_config(
-        self,
-        config: dict,
-    ) -> Any:
-        """
-        Helper function to build args for the two functions:
-        load_data(...) and store_and_index(docs, ...)
-        in RAG classes.
-        Args:
-            config (dict): a dictionary containing configurations
-
-        Returns:
-            Any: an object that is parsed/built to be an element
-                of input to the function of RAG module.
-        """
-        if not isinstance(config, dict):
-            return config
-
-        if "create_object" in config:
-            # if a term in args is a object,
-            # recursively create object with args from config
-            module_name = config.get("module", "")
-            class_name = config.get("class", "")
-            init_args = config.get("init_args", {})
-            try:
-                cur_module = importlib.import_module(module_name)
-                cur_class = getattr(cur_module, class_name)
-                init_args = self._prepare_args_from_config(init_args)
-                logger.info(
-                    f"load and build object{cur_module, cur_class, init_args}",
-                )
-                return cur_class(**init_args)
-            except ImportError as exc_inner:
-                logger.error(
-                    f"Fail to load class {class_name} "
-                    f"from module {module_name}",
-                )
-                raise ImportError(
-                    f"Fail to load class {class_name} "
-                    f"from module {module_name}",
-                ) from exc_inner
-        else:
-            prepared_args = {}
-            for key, value in config.items():
-                if isinstance(value, list):
-                    prepared_args[key] = []
-                    for c in value:
-                        prepared_args[key].append(
-                            self._prepare_args_from_config(c),
-                        )
-                elif isinstance(value, dict):
-                    prepared_args[key] = self._prepare_args_from_config(value)
-                else:
-                    prepared_args[key] = value
-            return prepared_args