From e36f33b785f0301999695636be2b37793f6d981b Mon Sep 17 00:00:00 2001
From: Hamada Salhab <hamada.a.salhab@gmail.com>
Date: Mon, 7 Oct 2024 14:32:37 +0300
Subject: [PATCH] feat(agents-api): Add doc search system tool (#604)

<!-- ELLIPSIS_HIDDEN -->



> [!IMPORTANT]
> Adds document creation and search functionalities for agents and
users, updates `list_docs` with a new parameter, and refactors
`base_evaluate` for complex dictionaries.
>
>   - **Behavior**:
> - Adds support for document creation and search operations in
`execute_system.py` for both agents and users.
> - Introduces `create_agent_doc`, `create_user_doc`,
`search_agent_docs`, and `search_user_docs` functions.
> - Handles `text`, `vector`, and `hybrid` search requests using
`HybridDocSearchRequest`, `TextOnlyDocSearchRequest`, and
`VectorDocSearchRequest`.
> - Replaces `developer_id` with `x_developer_id` for certain
operations.
>   - **Models**:
> - Updates `list_docs` in `list_docs.py` to include
`include_without_embeddings` parameter.
>   - **Misc**:
> - Refactors `base_evaluate` in `base_evaluate.py` to handle nested
dictionaries with string values.
>
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup>
for 8432e9584eb68dfd4f474386933271b6e1f601ca. It will automatically
update as commits are pushed.</sup>

<!-- ELLIPSIS_HIDDEN -->

---------

Signed-off-by: Diwank Singh Tomer <diwank.singh@gmail.com>
Co-authored-by: Diwank Singh Tomer <diwank.singh@gmail.com>
Co-authored-by: HamadaSalhab <HamadaSalhab@users.noreply.github.com>
---
 .../agents_api/activities/execute_system.py   | 91 ++++++++++++++++++-
 .../activities/task_steps/base_evaluate.py    | 22 ++---
 .../agents_api/models/docs/list_docs.py       |  2 +
 3 files changed, 100 insertions(+), 15 deletions(-)
diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py
index 8e4d71274..8ffb85814 100644
--- a/agents-api/agents_api/activities/execute_system.py
+++ b/agents-api/agents_api/activities/execute_system.py
@@ -2,8 +2,15 @@
 from uuid import UUID
 
 from beartype import beartype
+from fastapi.background import BackgroundTasks
 from temporalio import activity
 
+from ..autogen.Docs import (
+    CreateDocRequest,
+    HybridDocSearchRequest,
+    TextOnlyDocSearchRequest,
+    VectorDocSearchRequest,
+)
 from ..autogen.Tools import SystemDef
 from ..common.protocol.tasks import StepContext
 from ..env import testing
@@ -31,6 +38,8 @@
 from ..models.user.get_user import get_user as get_user_query
 from ..models.user.list_users import list_users as list_users_query
 from ..models.user.update_user import update_user as update_user_query
+from ..routers.docs.create_doc import create_agent_doc, create_user_doc
+from ..routers.docs.search_docs import search_agent_docs, search_user_docs
 
 
 @beartype
@@ -63,17 +72,54 @@ async def execute_system(
                 agent_doc_args = {
                     **{
                         "owner_type": "agent",
-                        "owner_id": arguments.pop("agent_id"),
+                        "owner_id": arguments["agent_id"],
                     },
                     **arguments,
                 }
+                agent_doc_args.pop("agent_id")
+
                 if system.operation == "list":
                     return list_docs_query(**agent_doc_args)
+
                 elif system.operation == "create":
-                    return create_doc_query(**agent_doc_args)
+                    # The `create_agent_doc` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+                    return await create_agent_doc(
+                        data=CreateDocRequest(**arguments.pop("data")),
+                        background_tasks=BackgroundTasks(),
+                        **arguments,
+                    )
+
                 elif system.operation == "delete":
                     return delete_doc_query(**agent_doc_args)
 
+                elif system.operation == "search":
+                    # The `search_agent_docs` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+
+                    if "text" in arguments and "vector" in arguments:
+                        search_params = HybridDocSearchRequest(
+                            text=arguments.pop("text"),
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    elif "text" in arguments:
+                        search_params = TextOnlyDocSearchRequest(
+                            text=arguments.pop("text"),
+                            limit=arguments.get("limit", 10),
+                        )
+                    elif "vector" in arguments:
+                        search_params = VectorDocSearchRequest(
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    return await search_agent_docs(
+                        search_params=search_params,
+                        **arguments,
+                    )
+
             # NO SUBRESOURCE
             elif system.subresource == None:
                 if system.operation == "list":
@@ -95,17 +141,54 @@ async def execute_system(
                 user_doc_args = {
                     **{
                         "owner_type": "user",
-                        "owner_id": arguments.pop("user_id"),
+                        "owner_id": arguments["user_id"],
                     },
                     **arguments,
                 }
+                user_doc_args.pop("user_id")
+
                 if system.operation == "list":
                     return list_docs_query(**user_doc_args)
+
                 elif system.operation == "create":
-                    return create_doc_query(**user_doc_args)
+                    # The `create_user_doc` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+                    return await create_user_doc(
+                        data=CreateDocRequest(**arguments.pop("data")),
+                        background_tasks=BackgroundTasks(),
+                        **arguments,
+                    )
+
                 elif system.operation == "delete":
                     return delete_doc_query(**user_doc_args)
 
+                elif system.operation == "search":
+                    # The `search_user_docs` function requires `x_developer_id` instead of `developer_id`.
+                    arguments["x_developer_id"] = arguments.pop("developer_id")
+
+                    if "text" in arguments and "vector" in arguments:
+                        search_params = HybridDocSearchRequest(
+                            text=arguments.pop("text"),
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    elif "text" in arguments:
+                        search_params = TextOnlyDocSearchRequest(
+                            text=arguments.pop("text"),
+                            limit=arguments.get("limit", 10),
+                        )
+                    elif "vector" in arguments:
+                        search_params = VectorDocSearchRequest(
+                            vector=arguments.pop("vector"),
+                            limit=arguments.get("limit", 10),
+                        )
+
+                    return await search_user_docs(
+                        search_params=search_params,
+                        **arguments,
+                    )
+
             # NO SUBRESOURCE
             elif system.subresource == None:
                 if system.operation == "list":
diff --git a/agents-api/agents_api/activities/task_steps/base_evaluate.py b/agents-api/agents_api/activities/task_steps/base_evaluate.py
index 3fcbf2f73..c6b83ba89 100644
--- a/agents-api/agents_api/activities/task_steps/base_evaluate.py
+++ b/agents-api/agents_api/activities/task_steps/base_evaluate.py
@@ -46,29 +46,29 @@ async def base_evaluate(
     evaluator = get_evaluator(names=values, extra_functions=extra_lambdas)
 
     try:
+        result = None
         match exprs:
             case str():
-                return evaluator.eval(exprs)
-
+                result = evaluator.eval(exprs)
             case list():
-                return [evaluator.eval(expr) for expr in exprs]
-
-            case dict() as d if all(isinstance(v, dict) for v in d.values()):
-                return {
+                result = [evaluator.eval(expr) for expr in exprs]
+            case dict() as d if all(
+                isinstance(v, dict) or isinstance(v, str) for v in d.values()
+            ):
+                result = {
                     k: {ik: evaluator.eval(iv) for ik, iv in v.items()}
+                    if isinstance(v, dict)
+                    else evaluator.eval(v)
                     for k, v in d.items()
                 }
-
-            case dict():
-                return {k: evaluator.eval(v) for k, v in exprs.items()}
-
             case _:
                 raise ValueError(f"Invalid expression: {exprs}")
 
+        return result
+
     except BaseException as e:
         if activity.in_activity():
             activity.logger.error(f"Error in base_evaluate: {e}")
-
         raise
 
 
diff --git a/agents-api/agents_api/models/docs/list_docs.py b/agents-api/agents_api/models/docs/list_docs.py
index 4dad7ec06..8f8d8c7a0 100644
--- a/agents-api/agents_api/models/docs/list_docs.py
+++ b/agents-api/agents_api/models/docs/list_docs.py
@@ -50,6 +50,7 @@ def list_docs(
     sort_by: Literal["created_at"] = "created_at",
     direction: Literal["asc", "desc"] = "desc",
     metadata_filter: dict[str, Any] = {},
+    include_without_embeddings: bool = False,
 ) -> tuple[list[str], dict]:
     # Transforms the metadata_filter dictionary into a string representation for the datalog query.
     metadata_filter_str = ", ".join(
@@ -70,6 +71,7 @@ def list_docs(
                 content,
                 embedding,
             }},
+            {"" if include_without_embeddings else "not is_null(embedding),"}
             snippet_data = [index, content, embedding]
 
         ?[