From e36f33b785f0301999695636be2b37793f6d981b Mon Sep 17 00:00:00 2001 From: Hamada Salhab Date: Mon, 7 Oct 2024 14:32:37 +0300 Subject: [PATCH] feat(agents-api): Add doc search system tool (#604) > [!IMPORTANT] > Adds document creation and search functionalities for agents and users, updates `list_docs` with a new parameter, and refactors `base_evaluate` for complex dictionaries. > > - **Behavior**: > - Adds support for document creation and search operations in `execute_system.py` for both agents and users. > - Introduces `create_agent_doc`, `create_user_doc`, `search_agent_docs`, and `search_user_docs` functions. > - Handles `text`, `vector`, and `hybrid` search requests using `HybridDocSearchRequest`, `TextOnlyDocSearchRequest`, and `VectorDocSearchRequest`. > - Replaces `developer_id` with `x_developer_id` for certain operations. > - **Models**: > - Updates `list_docs` in `list_docs.py` to include `include_without_embeddings` parameter. > - **Misc**: > - Refactors `base_evaluate` in `base_evaluate.py` to handle nested dictionaries with string values. > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral) for 8432e9584eb68dfd4f474386933271b6e1f601ca. It will automatically update as commits are pushed. --------- Signed-off-by: Diwank Singh Tomer Co-authored-by: Diwank Singh Tomer Co-authored-by: HamadaSalhab --- .../agents_api/activities/execute_system.py | 91 ++++++++++++++++++- .../activities/task_steps/base_evaluate.py | 22 ++--- .../agents_api/models/docs/list_docs.py | 2 + 3 files changed, 100 insertions(+), 15 deletions(-) diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py index 8e4d71274..8ffb85814 100644 --- a/agents-api/agents_api/activities/execute_system.py +++ b/agents-api/agents_api/activities/execute_system.py @@ -2,8 +2,15 @@ from uuid import UUID from beartype import beartype +from fastapi.background import BackgroundTasks from temporalio import activity +from ..autogen.Docs import ( + CreateDocRequest, + HybridDocSearchRequest, + TextOnlyDocSearchRequest, + VectorDocSearchRequest, +) from ..autogen.Tools import SystemDef from ..common.protocol.tasks import StepContext from ..env import testing @@ -31,6 +38,8 @@ from ..models.user.get_user import get_user as get_user_query from ..models.user.list_users import list_users as list_users_query from ..models.user.update_user import update_user as update_user_query +from ..routers.docs.create_doc import create_agent_doc, create_user_doc +from ..routers.docs.search_docs import search_agent_docs, search_user_docs @beartype @@ -63,17 +72,54 @@ async def execute_system( agent_doc_args = { **{ "owner_type": "agent", - "owner_id": arguments.pop("agent_id"), + "owner_id": arguments["agent_id"], }, **arguments, } + agent_doc_args.pop("agent_id") + if system.operation == "list": return list_docs_query(**agent_doc_args) + elif system.operation == "create": - return create_doc_query(**agent_doc_args) + # The `create_agent_doc` function requires `x_developer_id` instead of `developer_id`. + arguments["x_developer_id"] = arguments.pop("developer_id") + return await create_agent_doc( + data=CreateDocRequest(**arguments.pop("data")), + background_tasks=BackgroundTasks(), + **arguments, + ) + elif system.operation == "delete": return delete_doc_query(**agent_doc_args) + elif system.operation == "search": + # The `search_agent_docs` function requires `x_developer_id` instead of `developer_id`. + arguments["x_developer_id"] = arguments.pop("developer_id") + + if "text" in arguments and "vector" in arguments: + search_params = HybridDocSearchRequest( + text=arguments.pop("text"), + vector=arguments.pop("vector"), + limit=arguments.get("limit", 10), + ) + + elif "text" in arguments: + search_params = TextOnlyDocSearchRequest( + text=arguments.pop("text"), + limit=arguments.get("limit", 10), + ) + elif "vector" in arguments: + search_params = VectorDocSearchRequest( + vector=arguments.pop("vector"), + limit=arguments.get("limit", 10), + ) + + return await search_agent_docs( + search_params=search_params, + **arguments, + ) + # NO SUBRESOURCE elif system.subresource == None: if system.operation == "list": @@ -95,17 +141,54 @@ async def execute_system( user_doc_args = { **{ "owner_type": "user", - "owner_id": arguments.pop("user_id"), + "owner_id": arguments["user_id"], }, **arguments, } + user_doc_args.pop("user_id") + if system.operation == "list": return list_docs_query(**user_doc_args) + elif system.operation == "create": - return create_doc_query(**user_doc_args) + # The `create_user_doc` function requires `x_developer_id` instead of `developer_id`. + arguments["x_developer_id"] = arguments.pop("developer_id") + return await create_user_doc( + data=CreateDocRequest(**arguments.pop("data")), + background_tasks=BackgroundTasks(), + **arguments, + ) + elif system.operation == "delete": return delete_doc_query(**user_doc_args) + elif system.operation == "search": + # The `search_user_docs` function requires `x_developer_id` instead of `developer_id`. + arguments["x_developer_id"] = arguments.pop("developer_id") + + if "text" in arguments and "vector" in arguments: + search_params = HybridDocSearchRequest( + text=arguments.pop("text"), + vector=arguments.pop("vector"), + limit=arguments.get("limit", 10), + ) + + elif "text" in arguments: + search_params = TextOnlyDocSearchRequest( + text=arguments.pop("text"), + limit=arguments.get("limit", 10), + ) + elif "vector" in arguments: + search_params = VectorDocSearchRequest( + vector=arguments.pop("vector"), + limit=arguments.get("limit", 10), + ) + + return await search_user_docs( + search_params=search_params, + **arguments, + ) + # NO SUBRESOURCE elif system.subresource == None: if system.operation == "list": diff --git a/agents-api/agents_api/activities/task_steps/base_evaluate.py b/agents-api/agents_api/activities/task_steps/base_evaluate.py index 3fcbf2f73..c6b83ba89 100644 --- a/agents-api/agents_api/activities/task_steps/base_evaluate.py +++ b/agents-api/agents_api/activities/task_steps/base_evaluate.py @@ -46,29 +46,29 @@ async def base_evaluate( evaluator = get_evaluator(names=values, extra_functions=extra_lambdas) try: + result = None match exprs: case str(): - return evaluator.eval(exprs) - + result = evaluator.eval(exprs) case list(): - return [evaluator.eval(expr) for expr in exprs] - - case dict() as d if all(isinstance(v, dict) for v in d.values()): - return { + result = [evaluator.eval(expr) for expr in exprs] + case dict() as d if all( + isinstance(v, dict) or isinstance(v, str) for v in d.values() + ): + result = { k: {ik: evaluator.eval(iv) for ik, iv in v.items()} + if isinstance(v, dict) + else evaluator.eval(v) for k, v in d.items() } - - case dict(): - return {k: evaluator.eval(v) for k, v in exprs.items()} - case _: raise ValueError(f"Invalid expression: {exprs}") + return result + except BaseException as e: if activity.in_activity(): activity.logger.error(f"Error in base_evaluate: {e}") - raise diff --git a/agents-api/agents_api/models/docs/list_docs.py b/agents-api/agents_api/models/docs/list_docs.py index 4dad7ec06..8f8d8c7a0 100644 --- a/agents-api/agents_api/models/docs/list_docs.py +++ b/agents-api/agents_api/models/docs/list_docs.py @@ -50,6 +50,7 @@ def list_docs( sort_by: Literal["created_at"] = "created_at", direction: Literal["asc", "desc"] = "desc", metadata_filter: dict[str, Any] = {}, + include_without_embeddings: bool = False, ) -> tuple[list[str], dict]: # Transforms the metadata_filter dictionary into a string representation for the datalog query. metadata_filter_str = ", ".join( @@ -70,6 +71,7 @@ def list_docs( content, embedding, }}, + {"" if include_without_embeddings else "not is_null(embedding),"} snippet_data = [index, content, embedding] ?[