feat(agents-api): Add doc search system tool (julep-ai#604)

> [!IMPORTANT] > Adds document creation and search functionalities for agents and users, updates `list_docs` with a new parameter, and refactors `base_evaluate` for complex dictionaries. > > - **Behavior**: > - Adds support for document creation and search operations in `execute_system.py` for both agents and users. > - Introduces `create_agent_doc`, `create_user_doc`, `search_agent_docs`, and `search_user_docs` functions. > - Handles `text`, `vector`, and `hybrid` search requests using `HybridDocSearchRequest`, `TextOnlyDocSearchRequest`, and `VectorDocSearchRequest`. > - Replaces `developer_id` with `x_developer_id` for certain operations. > - **Models**: > - Updates `list_docs` in `list_docs.py` to include `include_without_embeddings` parameter. > - **Misc**: > - Refactors `base_evaluate` in `base_evaluate.py` to handle nested dictionaries with string values. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup> for 8432e95. It will automatically update as commits are pushed.</sup>  --------- Signed-off-by: Diwank Singh Tomer <[email protected]> Co-authored-by: Diwank Singh Tomer <[email protected]> Co-authored-by: HamadaSalhab <[email protected]>
Bhabuk10 · Oct 7, 2024 · e36f33b · e36f33b
1 parent 00af85d
commit e36f33b
Show file tree

Hide file tree

Showing 3 changed files with 100 additions and 15 deletions.
diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py
@@ -2,8 +2,15 @@
 from uuid import UUID
 
 from beartype import beartype
+from fastapi.background import BackgroundTasks
 from temporalio import activity
 
+from ..autogen.Docs import (
+    CreateDocRequest,
+    HybridDocSearchRequest,
+    TextOnlyDocSearchRequest,
+    VectorDocSearchRequest,
+)
 from ..autogen.Tools import SystemDef
 from ..common.protocol.tasks import StepContext
 from ..env import testing
@@ -31,6 +38,8 @@
 from ..models.user.get_user import get_user as get_user_query
 from ..models.user.list_users import list_users as list_users_query
 from ..models.user.update_user import update_user as update_user_query
+from ..routers.docs.create_doc import create_agent_doc, create_user_doc
+from ..routers.docs.search_docs import search_agent_docs, search_user_docs
 
 
 @beartype
@@ -63,17 +72,54 @@ async def execute_system(
                 agent_doc_args = {
                     **{
                         "owner_type": "agent",
-                        "owner_id": arguments.pop("agent_id"),
+                        "owner_id": arguments["agent_id"],
                     },
                     **arguments,
                 }
+                agent_doc_args.pop("agent_id")
+
                 if system.operation == "list":
                     return list_docs_query(**agent_doc_args)
+
                 elif system.operation == "create":
-                    return create_doc_query(**agent_doc_args)
+                    # The `create_agent_doc` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+                    return await create_agent_doc(
+                        data=CreateDocRequest(**arguments.pop("data")),
+                        background_tasks=BackgroundTasks(),
+                        **arguments,
+                    )
+
                 elif system.operation == "delete":
                     return delete_doc_query(**agent_doc_args)
 
+                elif system.operation == "search":
+                    # The `search_agent_docs` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+
+                    if "text" in arguments and "vector" in arguments:
+                        search_params = HybridDocSearchRequest(
+                            text=arguments.pop("text"),
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    elif "text" in arguments:
+                        search_params = TextOnlyDocSearchRequest(
+                            text=arguments.pop("text"),
+                            limit=arguments.get("limit", 10),
+                        )
+                    elif "vector" in arguments:
+                        search_params = VectorDocSearchRequest(
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    return await search_agent_docs(
+                        search_params=search_params,
+                        **arguments,
+                    )
+
             # NO SUBRESOURCE
             elif system.subresource == None:
                 if system.operation == "list":
@@ -95,17 +141,54 @@ async def execute_system(
                 user_doc_args = {
                     **{
                         "owner_type": "user",
-                        "owner_id": arguments.pop("user_id"),
+                        "owner_id": arguments["user_id"],
                     },
                     **arguments,
                 }
+                user_doc_args.pop("user_id")
+
                 if system.operation == "list":
                     return list_docs_query(**user_doc_args)
+
                 elif system.operation == "create":
-                    return create_doc_query(**user_doc_args)
+                    # The `create_user_doc` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+                    return await create_user_doc(
+                        data=CreateDocRequest(**arguments.pop("data")),
+                        background_tasks=BackgroundTasks(),
+                        **arguments,
+                    )
+
                 elif system.operation == "delete":
                     return delete_doc_query(**user_doc_args)
 
+                elif system.operation == "search":
+                    # The `search_user_docs` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+
+                    if "text" in arguments and "vector" in arguments:
+                        search_params = HybridDocSearchRequest(
+                            text=arguments.pop("text"),
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    elif "text" in arguments:
+                        search_params = TextOnlyDocSearchRequest(
+                            text=arguments.pop("text"),
+                            limit=arguments.get("limit", 10),
+                        )
+                    elif "vector" in arguments:
+                        search_params = VectorDocSearchRequest(
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    return await search_user_docs(
+                        search_params=search_params,
+                        **arguments,
+                    )
+
             # NO SUBRESOURCE
             elif system.subresource == None:
                 if system.operation == "list":

diff --git a/agents-api/agents_api/activities/task_steps/base_evaluate.py b/agents-api/agents_api/activities/task_steps/base_evaluate.py
@@ -46,29 +46,29 @@ async def base_evaluate(
     evaluator = get_evaluator(names=values, extra_functions=extra_lambdas)
 
     try:
+        result = None
         match exprs:
             case str():
-                return evaluator.eval(exprs)
-
+                result = evaluator.eval(exprs)
             case list():
-                return [evaluator.eval(expr) for expr in exprs]
-
-            case dict() as d if all(isinstance(v, dict) for v in d.values()):
-                return {
+                result = [evaluator.eval(expr) for expr in exprs]
+            case dict() as d if all(
+                isinstance(v, dict) or isinstance(v, str) for v in d.values()
+            ):
+                result = {
                     k: {ik: evaluator.eval(iv) for ik, iv in v.items()}
+                    if isinstance(v, dict)
+                    else evaluator.eval(v)
                     for k, v in d.items()
                 }
-
-            case dict():
-                return {k: evaluator.eval(v) for k, v in exprs.items()}
-
             case _:
                 raise ValueError(f"Invalid expression: {exprs}")
 
+        return result
+
     except BaseException as e:
         if activity.in_activity():
             activity.logger.error(f"Error in base_evaluate: {e}")
-
         raise
 
 

diff --git a/agents-api/agents_api/models/docs/list_docs.py b/agents-api/agents_api/models/docs/list_docs.py
@@ -50,6 +50,7 @@ def list_docs(
     sort_by: Literal["created_at"] = "created_at",
     direction: Literal["asc", "desc"] = "desc",
     metadata_filter: dict[str, Any] = {},
+    include_without_embeddings: bool = False,
 ) -> tuple[list[str], dict]:
     # Transforms the metadata_filter dictionary into a string representation for the datalog query.
     metadata_filter_str = ", ".join(
@@ -70,6 +71,7 @@ def list_docs(
                 content,
                 embedding,
             }},
+            {"" if include_without_embeddings else "not is_null(embedding),"}
             snippet_data = [index, content, embedding]
 
         ?[