Skip to content

Commit

Permalink
Merge pull request #703 from dimagi/cs/save_image_files
Browse files Browse the repository at this point in the history
Save image files
  • Loading branch information
snopoke authored Oct 4, 2024
2 parents 99d4131 + 1c2e70c commit 9b1b195
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 61 deletions.
57 changes: 48 additions & 9 deletions apps/service_providers/llm_service/runnables.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,8 @@ def _save_response_annotations(self, output, thread_id, run_id) -> tuple[str, di
from apps.assistants.sync import get_and_store_openai_file

client = self.state.raw_client
generated_files = []
chat = self.state.session.chat
session_id = self.state.session.id

if isinstance(output, str):
output_message = output
Expand All @@ -350,11 +351,15 @@ def _save_response_annotations(self, output, thread_id, run_id) -> tuple[str, di
).values_list("external_id", flat=True)

file_ids = set()
image_file_attachments = []
file_path_attachments = []
for message in client.beta.threads.messages.list(thread_id, run_id=run_id):
for message_content in message.content:
if message_content.type == "image_file":
# Ignore these for now. Typically, they are also referenced in the text content
pass
if created_file := self._create_image_file_from_image_message(client, message_content.image_file):
image_file_attachments.append(created_file)
file_ids.add(created_file.external_id)

elif message_content.type == "text":
annotations = message_content.text.annotations
for idx, annotation in enumerate(annotations):
Expand Down Expand Up @@ -382,22 +387,56 @@ def _save_response_annotations(self, output, thread_id, run_id) -> tuple[str, di
# Original citation text example: sandbox:/mnt/data/the_file.csv.
# This is the link part in what looks like
# [Download the CSV file](sandbox:/mnt/data/the_file.csv)
session_id = self.state.session.id
output_message = output_message.replace(
file_ref_text, f"file:{team.slug}:{session_id}:{created_file.id}"
)
generated_files.append(created_file)

file_path_attachments.append(created_file)
file_ids.add(file_id)
else:
# Ignore any other type for now
pass

# Attach the generated files to the chat object as an annotation
if generated_files:
chat = self.state.session.chat
if file_path_attachments:
resource, _created = chat.attachments.get_or_create(tool_type="file_path")
resource.files.add(*generated_files)
resource.files.add(*file_path_attachments)

if image_file_attachments:
resource, _created = chat.attachments.get_or_create(tool_type="image_file")
resource.files.add(*image_file_attachments)

return output_message, list(file_ids)

def _create_image_file_from_image_message(self, client, image_file_message) -> File | None:
"""
Creates a File record from `image_file_message` by pulling the data from OpenAI. Typically, these files don't
have extentions, so we'll need to guess it based on the content. We know it will be an image, but not which
extention to use.
"""
from mimetypes import guess_extension

import magic

from apps.assistants.sync import get_and_store_openai_file

try:
file_id = image_file_message.file_id
openai_file = client.files.retrieve(file_id=file_id)
created_file = get_and_store_openai_file(
client=client,
file_name=f"{openai_file.filename}",
file_id=file_id,
team_id=self.state.experiment.team_id,
)
mimetype = magic.from_buffer(created_file.file.open().read(), mime=True)
extention = guess_extension(mimetype)
# extention looks like '.png'
created_file.name = f"{created_file.name}{extention}"
created_file.save()
return created_file
except Exception as ex:
logger.exception(ex)

def _get_file_name_and_link_for_citation(self, file_id: str, forbidden_file_ids: list[str]) -> tuple[str, str]:
"""Returns a file name and a link constructor for `file_id`. If `file_id` is a member of
`forbidden_file_ids`, the link will be empty to prevent unauthorized access.
Expand Down
45 changes: 45 additions & 0 deletions apps/service_providers/tests/test_assistant_runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,51 @@ def test_assistant_response_with_annotations(
assert "openai-file-2" in message.metadata["openai_file_ids"]


@pytest.mark.django_db()
@patch("openai.resources.files.Files.retrieve")
@patch("apps.assistants.sync.get_and_store_openai_file")
@patch("openai.resources.beta.threads.runs.Runs.retrieve")
@patch("openai.resources.beta.Threads.create_and_run")
@patch("openai.resources.beta.threads.messages.Messages.list")
def test_assistant_response_with_image_file_content_block(
list_messages,
create_and_run,
retrieve_run,
get_and_store_openai_file,
retrieve_openai_file,
db_session,
):
"""
Test that ImageFileContentBlock entries in the content array in an OpenAI message response saves the file to a new
"image_file" tool type
"""
retrieve_openai_file.return_value = FileObject(
id="local_file_openai_id",
bytes=1,
created_at=1,
filename="3fac0517-6367-4f92-a1f3-c9d9087c9085",
object="file",
purpose="assistants",
status="processed",
status_details=None,
)
openai_generated_file = FileFactory(external_id="openai-file-1", id=10)
get_and_store_openai_file.return_value = openai_generated_file

thread_id = "test_thread_id"
run = _create_run(ASSISTANT_ID, thread_id)
list_messages.return_value = _create_thread_messages(ASSISTANT_ID, run.id, thread_id, [{"assistant": "Ola"}])
create_and_run.return_value = run
retrieve_run.return_value = run
assistant = create_experiment_runnable(db_session.experiment, db_session)

# Run assistant
result = assistant.invoke("test", attachments=[])
assert result.output == "Ola"
assert db_session.chat.attachments.filter(tool_type="image_file").exists() is True
assert db_session.chat.attachments.get(tool_type="image_file").files.count() == 1


@pytest.mark.parametrize(
("messages", "thread_id", "thread_created", "messages_created"),
[
Expand Down
1 change: 1 addition & 0 deletions requirements/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ twilio
whitenoise[brotli]
phonenumberslite
emoji
python-magic
Loading

0 comments on commit 9b1b195

Please sign in to comment.