Skip to content

Commit

Permalink
feat(agents-api): Implement doc* models (#442)
Browse files Browse the repository at this point in the history
* refactor(agents-api): Minor refactors

Signed-off-by: Diwank Tomer <[email protected]>

* feat(typespec): Add create-doc endpoint

Signed-off-by: Diwank Tomer <[email protected]>

* feat(agents-api): Add migrations for unifying the owner-docs tables

Signed-off-by: Diwank Tomer <[email protected]>

* feat(agents-api): Implement doc* models

Signed-off-by: Diwank Tomer <[email protected]>

---------

Signed-off-by: Diwank Tomer <[email protected]>
Co-authored-by: Diwank Tomer <[email protected]>
  • Loading branch information
creatorrr and Diwank Tomer authored Aug 4, 2024
1 parent 59f3a3f commit 10d3600
Show file tree
Hide file tree
Showing 57 changed files with 1,780 additions and 570 deletions.
55 changes: 42 additions & 13 deletions agents-api/agents_api/autogen/Docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,30 @@ class BaseDocSearchRequest(BaseModel):
"""


class CreateDocRequest(BaseModel):
"""
Payload for creating a doc
"""

model_config = ConfigDict(
populate_by_name=True,
)
metadata: dict[str, Any] | None = None
title: Annotated[
str,
Field(
pattern="^[\\p{L}\\p{Nl}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]+[\\p{ID_Start}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]*$"
),
]
"""
Title describing what this document contains
"""
content: str | list[str]
"""
Contents of the document
"""


class Doc(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
Expand Down Expand Up @@ -76,12 +100,9 @@ class DocReference(BaseModel):
"""
ID of the document
"""
snippet_index: list[int]
"""
Snippets referred to of the document
"""
title: str | None = None
snippet: str | None = None
snippets: Annotated[list[Snippet], Field(min_length=1)]
distance: float | None = None


class EmbedQueryRequest(BaseModel):
Expand All @@ -108,31 +129,39 @@ class HybridDocSearchRequest(BaseDocSearchRequest):
model_config = ConfigDict(
populate_by_name=True,
)
text: str | list[str]
text: str
"""
Text or texts to use in the search. In `hybrid` search mode, either `text` or both `text` and `vector` fields are required.
Text to use in the search. In `hybrid` search mode, either `text` or both `text` and `vector` fields are required.
"""
vector: list[float] | list[list[float]]
vector: list[float]
"""
Vector or vectors to use in the search. Must be the same dimensions as the embedding model or else an error will be thrown.
Vector to use in the search. Must be the same dimensions as the embedding model or else an error will be thrown.
"""


class Snippet(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
)
index: int
content: str


class TextOnlyDocSearchRequest(BaseDocSearchRequest):
model_config = ConfigDict(
populate_by_name=True,
)
text: str | list[str]
text: str
"""
Text or texts to use in the search.
Text to use in the search.
"""


class VectorDocSearchRequest(BaseDocSearchRequest):
model_config = ConfigDict(
populate_by_name=True,
)
vector: list[float] | list[list[float]]
vector: list[float]
"""
Vector or vectors to use in the search. Must be the same dimensions as the embedding model or else an error will be thrown.
Vector to use in the search. Must be the same dimensions as the embedding model or else an error will be thrown.
"""
11 changes: 11 additions & 0 deletions agents-api/agents_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,14 @@
This module also integrates with the `common` module for exception handling and utility functions, ensuring robust error management and providing reusable components for data processing and query construction.
"""

# ruff: noqa: F401, F403, F405

import agents_api.models.agent as agent
import agents_api.models.docs as docs
import agents_api.models.entry as entry
import agents_api.models.execution as execution
import agents_api.models.session as session
import agents_api.models.task as task
import agents_api.models.tools as tools
import agents_api.models.user as user
10 changes: 10 additions & 0 deletions agents-api/agents_api/models/agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,13 @@
This module serves as the backbone for agent management within the CozoDB ecosystem, facilitating a wide range of operations necessary for the effective handling of agent data.
"""

# ruff: noqa: F401, F403, F405

from .create_agent import create_agent
from .create_or_update_agent import create_or_update_agent
from .delete_agent import delete_agent
from .get_agent import get_agent
from .list_agents import list_agents
from .patch_agent import patch_agent
from .update_agent import update_agent
9 changes: 9 additions & 0 deletions agents-api/agents_api/models/docs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,12 @@
This documentation aims to provide clear, concise, and sufficient context for new developers or contributors to understand the module's role without needing to dive deep into the code immediately.
"""

# ruff: noqa: F401, F403, F405

from .create_doc import create_doc
from .delete_doc import delete_doc
from .embed_snippets import embed_snippets
from .get_doc import get_doc
from .list_docs import list_docs
from .search_docs import search_docs_by_embedding
117 changes: 117 additions & 0 deletions agents-api/agents_api/models/docs/create_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from typing import Literal
from uuid import UUID, uuid4

from beartype import beartype
from fastapi import HTTPException
from pycozo.client import QueryException
from pydantic import ValidationError

from ...autogen.openapi_model import CreateDocRequest, Doc
from ...common.utils.cozo import cozo_process_mutate_data
from ..utils import (
cozo_query,
partialclass,
rewrap_exceptions,
verify_developer_id_query,
verify_developer_owns_resource_query,
wrap_in_class,
)


@rewrap_exceptions(
{
QueryException: partialclass(HTTPException, status_code=400),
ValidationError: partialclass(HTTPException, status_code=400),
TypeError: partialclass(HTTPException, status_code=400),
}
)
@wrap_in_class(
Doc,
one=True,
transform=lambda d: {
"id": UUID(d["doc_id"]),
"content": [], # <-- Note: we do not return content on creation
**d,
},
)
@cozo_query
@beartype
def create_doc(
*,
developer_id: UUID,
owner_type: Literal["user", "agent"],
owner_id: UUID,
doc_id: UUID | None = None,
data: CreateDocRequest,
) -> tuple[list[str], dict]:
"""
Constructs and executes a datalog query to create a new document and its associated snippets in the 'cozodb' database.
Parameters:
- owner_type (Literal["user", "agent"]): The type of the owner of the document.
- owner_id (UUID): The UUID of the document owner.
- id (UUID): The UUID of the document to be created.
- data (CreateDocRequest): The content of the document.
"""

doc_id = str(doc_id or uuid4())
owner_id = str(owner_id)

if isinstance(data.content, str):
data.content = [data.content]

data.metadata = data.metadata or {}

doc_data = data.model_dump()
content = doc_data.pop("content")

doc_data["owner_type"] = owner_type
doc_data["owner_id"] = owner_id
doc_data["doc_id"] = doc_id

doc_cols, doc_rows = cozo_process_mutate_data(doc_data)

snippet_cols, snippet_rows = "", []

# Process each content snippet and prepare data for the datalog query.
for snippet_idx, snippet in enumerate(content):
snippet_cols, new_snippet_rows = cozo_process_mutate_data(
dict(
doc_id=doc_id,
index=snippet_idx,
content=snippet,
)
)

snippet_rows += new_snippet_rows

create_snippets_query = f"""
?[{snippet_cols}] <- $snippet_rows
:insert snippets {{ {snippet_cols} }}
:returning
"""

# Construct the datalog query for creating the document and its snippets.
create_doc_query = f"""
?[{doc_cols}] <- $doc_rows
:insert docs {{ {doc_cols} }}
:returning
"""

queries = [
verify_developer_id_query(developer_id),
verify_developer_owns_resource_query(
developer_id, f"{owner_type}s", **{f"{owner_type}_id": owner_id}
),
create_snippets_query,
create_doc_query,
]

# Execute the constructed datalog query and return the results as a DataFrame.
return (
queries,
{
"doc_rows": doc_rows,
"snippet_rows": snippet_rows,
},
)
97 changes: 0 additions & 97 deletions agents-api/agents_api/models/docs/create_docs.py

This file was deleted.

Loading

0 comments on commit 10d3600

Please sign in to comment.