Merge pull request emrgnt-cmplxty#157 from maks-ivanov/feature/add-co…

…de-index Feature/add code index
Huntemall · May 30, 2023 · 45082ef · 45082ef
2 parents 1bd43f0 + b75f4cd
commit 45082ef
Show file tree

Hide file tree

Showing 54 changed files with 1,333 additions and 1,121 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ __pycache__/
 /.coverage*
 /coverage_analyzer_report.xml
 jobs/*
+/test_path/core/agent/automata_agent.py
diff --git a/automata/cli/cli.md b/automata/cli/cli.md
@@ -17,7 +17,7 @@ Common options shared by both `main` and `evaluator` commands:
 - `--instructions`: The initial instructions for the agent.
 - `--model`: The model to use across the framework (default: "gpt-4").
 - `--session_id`: The session ID for the agent.
-- `--llm_toolkits`: A comma-separated list of toolkits to be used by the main agent (default: "python_indexer,python_writer,codebase_oracle").
+- `--llm_toolkits`: A comma-separated list of toolkits to be used by the main agent (default: "python_inspector,python_writer,codebase_oracle").
 - `--main_config_name`: The config version of the agent (default: AgentConfigName.AUTOMATA_MAIN_DEV.value).
 - `--helper_agent_names`: The config version of the agent (default: AgentConfigName.AUTOMATA_INDEXER_DEV.value,AgentConfigName.AUTOMATA_WRITER_DEV.value).
 - `--stream`: Whether to stream the responses (default: True).

diff --git a/automata/cli/cli_utils.py b/automata/cli/cli_utils.py
@@ -5,8 +5,8 @@
 from automata.configs.automata_agent_config_utils import AutomataAgentConfigFactory
 from automata.configs.automata_agent_configs import AutomataAgentConfig
 from automata.configs.config_enums import AgentConfigName
+from automata.core.code_indexing.utils import build_repository_overview
 from automata.core.utils import get_logging_config, root_py_path
-from automata.tools.python_tools.python_indexer import PythonIndexer
 
 logger = logging.getLogger(__name__)
 
@@ -69,7 +69,7 @@ def create_config_from_kwargs(**kwargs) -> AutomataAgentConfig:
 
     if kwargs.get("include_overview"):
         instruction_payload = kwargs.get("instruction_payload", {})
-        instruction_payload["overview"] = PythonIndexer.build_overview(root_py_path())
+        instruction_payload["overview"] = build_repository_overview(root_py_path())
         kwargs["instruction_payload"] = instruction_payload
 
     return AutomataAgentConfigFactory.create_config(None, **kwargs)
diff --git a/automata/cli/click_options.py b/automata/cli/click_options.py
@@ -13,7 +13,7 @@ def common_options(command: click.Command, *args, **kwargs) -> click.Command:
         click.option(
             "--llm_toolkits",
             type=str,
-            default="python_indexer,python_writer",
+            default="python_retriever,python_writer",
             help="Comma-separated list of toolkits to be used main agent.",
         ),
         click.option(

diff --git a/automata/cli/scripts/run_evaluator.py b/automata/cli/scripts/run_evaluator.py
@@ -1,15 +1,14 @@
-import logging
 import logging.config
 from typing import Dict, List, Union
 
 from automata.configs.automata_agent_config_utils import build_agent_message
 from automata.configs.automata_agent_configs import AutomataAgentConfig, AutomataInstructionPayload
 from automata.configs.config_enums import AgentConfigName, ConfigCategory
 from automata.core.agent.automata_actions import ResultAction, ToolAction
+from automata.core.code_indexing.utils import build_repository_overview
 from automata.core.utils import load_config, root_py_path
 from automata.evals.eval import Eval
 from automata.evals.eval_helpers import EvalAction, EvalResult
-from automata.tools.python_tools.python_indexer import PythonIndexer
 
 logger = logging.getLogger(__name__)
 
@@ -35,7 +34,7 @@ def main(args):
         instruction = sample["instruction"]
         expected_actions = sample["expected_actions"]
 
-        overview = PythonIndexer.build_overview(root_py_path())
+        overview = build_repository_overview(root_py_path())
         # TODO - Fix this..
         agent_messages = build_agent_message()
         instruction_payload = AutomataInstructionPayload(

diff --git a/automata/config.py b/automata/config.py
@@ -24,4 +24,4 @@
 REPOSITORY_PATH = os.getenv("REPOSITORY_PATH", ".")
 REPOSITORY_NAME = os.getenv("REPOSITORY_NAME", "maks-ivanov/automata")
 TASK_DB_PATH = os.getenv("TASK_DB_PATH", "tasks.sqlite3")
-TASKS_DIR_PATH = os.getenv("TASKS_DIR_PATH")
+TASKS_DIR_PATH = os.getenv("TASKS_DIR_PATH", "tasks")
diff --git a/automata/configs/agent_configs/automata_indexer_dev.yaml b/automata/configs/agent_configs/automata_indexer_dev.yaml
@@ -88,19 +88,19 @@ system_instruction_template: >
             - tool_name
               - python-indexer-retrieve-docstring
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer
           - tool_query_2
             - tool_name
               - python-indexer-retrieve-docstring
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer.retrieve_code
           - tool_query_3
             - tool_name
               - python-indexer-retrieve-code
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer.retrieve_code
 
       *User*
@@ -224,6 +224,6 @@ description: >
 number_of_expected_actions: 11
 
 tools: >
-  python_indexer
+  python_inspector
 
 template_format: "f-string"
diff --git a/automata/configs/agent_configs/automata_indexer_prod.yaml b/automata/configs/agent_configs/automata_indexer_prod.yaml
@@ -88,19 +88,19 @@ system_instruction_template: >
             - tool_name
               - python-indexer-retrieve-docstring
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer
           - tool_query_2
             - tool_name
               - python-indexer-retrieve-docstring
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer.retrieve_code
           - tool_query_3
             - tool_name
               - python-indexer-retrieve-code
             - tool_args
-              - tools.python_tools.python_indexer
+              - tools.python_tools.python_inspector
               - PythonIndexer.retrieve_code
 
       *User*
@@ -224,6 +224,6 @@ description: >
 number_of_expected_actions: 11
 
 tools: >
-  python_indexer
+  python_inspector
 
 template_format: "f-string"
diff --git a/automata/configs/automata_agent_configs.py b/automata/configs/automata_agent_configs.py
@@ -8,6 +8,7 @@
 
 from automata.configs.config_enums import AgentConfigName, ConfigCategory, InstructionConfigVersion
 from automata.core.base.tool import Toolkit, ToolkitType
+from automata.core.code_indexing.utils import build_repository_overview
 
 
 @dataclass
@@ -120,10 +121,9 @@ def load(cls, config_name: AgentConfigName) -> "AutomataAgentConfig":
     def _add_overview_to_instruction_payload(cls, config: "AutomataAgentConfig") -> None:
         """Handles the overview input for the agent."""
         from automata.core.utils import root_py_path
-        from automata.tools.python_tools.python_indexer import PythonIndexer
 
         if "overview" in config.instruction_input_variables:
-            config.instruction_payload.overview = PythonIndexer.build_overview(root_py_path())
+            config.instruction_payload.overview = build_repository_overview(root_py_path())
 
     @staticmethod
     def _format_prompt(format_variables: AutomataInstructionPayload, input_text: str) -> str:

diff --git a/automata/configs/symbols/index.scip b/automata/configs/symbols/index.scip
diff --git a/automata/configs/symbols/symbol_embedding.json b/automata/configs/symbols/symbol_embedding.json
diff --git a/automata/core/agent/automata_agent.py b/automata/core/agent/automata_agent.py
@@ -73,7 +73,6 @@ def iter_task(self) -> Optional[Tuple[OpenAIChatMessage, OpenAIChatMessage]]:
         Returns:
             Optional[Tuple[OpenAIChatMessage, OpenAIChatMessage]]: Latest assistant and user messages, or None if the task is completed.
         """
-
         if self.completed:
             raise ValueError("Cannot run an agent that has already completed.")
 
@@ -253,6 +252,26 @@ def _has_helper_agents(self) -> bool:
         """
         return self.coordinator is not None
 
+    def _extract_outputs(self, pattern: str, messages: list) -> dict:
+        """
+        Extract outputs from the given messages based on the provided regex pattern.
+
+        Args:
+            pattern (str): The regex pattern to use for extraction.
+            messages (list): The list of messages to process.
+
+        Returns:
+            dict: A dictionary where the keys are the names of the tools or agents and the values are their outputs.
+        """
+        outputs = {}
+        for message in messages:
+            matches = re.finditer(pattern, message.content, re.DOTALL)
+            for match in matches:
+                output_name, output_value = match.group(1), match.group(2).strip()
+                outputs[output_name] = output_value
+
+        return outputs
+
     def _parse_completion_message(self, completion_message: str) -> str:
         """
         Parses the completion message and replaces placeholders with actual tool outputs.
@@ -263,30 +282,19 @@ def _parse_completion_message(self, completion_message: str) -> str:
         Returns:
             str: The parsed completion message with placeholders replaced by tool outputs.
         """
-        outputs = {}
-        for message in self.messages:
-            pattern = r"-\s(tool_output_\d+)\s+-\s(.*?)(?=-\s(tool_output_\d+)|$)"
-            matches = re.finditer(pattern, message.content, re.DOTALL)
-            for match in matches:
-                tool_name, tool_output = match.group(1), match.group(2).strip()
-                outputs[tool_name] = tool_output
+        tool_pattern = r"-\s(tool_output_\d+)\s+-\s(.*?)(?=-\s(tool_output_\d+)|$)"
+        agent_pattern = r"-\s(agent_output_\d+)\s+-\s(.*?)(?=-\s(agent_output_\d+)|$)"
+        outputs = self._extract_outputs(tool_pattern, self.messages)
+
         if self._has_helper_agents():
-            for message in self.messages:
-                pattern = r"-\s(agent_output_\d+)\s+-\s(.*?)(?=-\s(agent_output_\d+)|$)"
-                matches = re.finditer(pattern, message.content, re.DOTALL)
-                for match in matches:
-                    agent_version, agent_output = match.group(1), match.group(2).strip()
-                    outputs[agent_version] = agent_output
-
-            for output_name in outputs:
-                completion_message = completion_message.replace(
-                    f"{{{output_name}}}", outputs[output_name]
-                )
+            agent_outputs = self._extract_outputs(agent_pattern, self.messages)
+            outputs.update(agent_outputs)
 
         for output_name in outputs:
             completion_message = completion_message.replace(
                 f"{{{output_name}}}", outputs[output_name]
             )
+
         return completion_message
 
     def _build_initial_messages(self, formatters: Dict[str, str]) -> List[OpenAIChatMessage]:

diff --git a/automata/core/agent/tests/conftest.py b/automata/core/agent/tests/conftest.py
@@ -8,7 +8,7 @@
 
 @pytest.fixture
 def automata_agent():
-    tool_list = ["python_indexer"]
+    tool_list = ["python_retriever"]
     mock_llm_toolkits = build_llm_toolkits(tool_list)
 
     instruction_payload = AutomataInstructionPayload(agents_message="", overview="", tools="")

diff --git a/automata/core/agent/tests/test_automata_agent.py b/automata/core/agent/tests/test_automata_agent.py
@@ -12,7 +12,7 @@
 
 
 def test_build_tool_message(automata_agent_config_builder):
-    tool_list = ["python_indexer", "python_writer"]
+    tool_list = ["python_retriever", "python_writer"]
     mock_llm_toolkits = build_llm_toolkits(tool_list)
 
     config = automata_agent_config_builder.with_llm_toolkits(mock_llm_toolkits).build()

diff --git a/automata/core/agent/tests/test_automata_agent_builder.py b/automata/core/agent/tests/test_automata_agent_builder.py
@@ -49,7 +49,7 @@ def test_builder_provided_parameters_override_defaults(automata_agent_config_bui
 
 
 def test_builder_accepts_all_fields(automata_agent_config_builder):
-    tool_list = ["python_indexer", "python_writer"]
+    tool_list = ["python_retriever", "python_writer"]
     mock_llm_toolkits = build_llm_toolkits(tool_list)
 
     config = (

diff --git a/automata/core/base/tests/test_tool.py b/automata/core/base/tests/test_tool.py
@@ -91,7 +91,8 @@ def test_toolkit():
 
 
 def test_toolkit_type():
-    assert len(ToolkitType) == 3
-    assert ToolkitType.PYTHON_INDEXER.name == "PYTHON_INDEXER"
+    assert len(ToolkitType) == 4
+    assert ToolkitType.PYTHON_RETRIEVER.name == "PYTHON_RETRIEVER"
     assert ToolkitType.PYTHON_WRITER.name == "PYTHON_WRITER"
     assert ToolkitType.COVERAGE_PROCESSOR.name == "COVERAGE_PROCESSOR"
+    assert ToolkitType.SYMBOL_SEARCHER.name == "SYMBOL_SEARCHER"
diff --git a/automata/core/base/tool.py b/automata/core/base/tool.py
@@ -119,8 +119,7 @@ def __repr__(self) -> str:
 
 
 class ToolkitType(Enum):
-    """An enum representing the different types of toolkits that can be built."""
-
-    PYTHON_INDEXER = auto()
+    PYTHON_RETRIEVER = auto()
     PYTHON_WRITER = auto()
     COVERAGE_PROCESSOR = auto()
+    SYMBOL_SEARCHER = auto()
diff --git a/automata/core/code_indexing/__init__.py b/automata/core/code_indexing/__init__.py