Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DashScope API based multimodal service functions in AgentScope library #255

Merged
merged 11 commits into from
Jun 5, 2024
8 changes: 4 additions & 4 deletions examples/conversation_with_customized_services/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
" weather_data = weather.run(f\"{city},{country}\")\n",
" return ServiceResponse(ServiceExecStatus.SUCCESS, weather_data)\n",
" except Exception as e:\n",
" return ServiceResponse(ServiceExecStatus.FAILURE, str(e))"
" return ServiceResponse(ServiceExecStatus.ERROR, str(e))"
]
},
{
Expand Down Expand Up @@ -408,7 +408,7 @@
" return ServiceResponse(ServiceExecStatus.SUCCESS, {\"urls\": urls})\n",
" else:\n",
" err_msg = f\"status_code: {response.status_code}, code: {response.code}, message: {response.message}\"\n",
" return ServiceResponse(ServiceExecStatus.FAILURE, err_msg)"
" return ServiceResponse(ServiceExecStatus.ERROR, err_msg)"
]
},
{
Expand Down Expand Up @@ -457,7 +457,7 @@
" return ServiceResponse(ServiceExecStatus.SUCCESS, description)\n",
" else:\n",
" err_msg = f\"status_code: {response.status_code}, code: {response.code}, message: {response.message}\"\n",
" return ServiceResponse(ServiceExecStatus.FAILURE, err_msg) \n",
" return ServiceResponse(ServiceExecStatus.ERROR, err_msg) \n",
" "
]
},
Expand Down Expand Up @@ -500,7 +500,7 @@
" f.write(result.get_audio_data())\n",
" return ServiceResponse(ServiceExecStatus.SUCCESS, 'output.wav')\n",
" else:\n",
" return ServiceResponse(ServiceExecStatus.FAILURE, \"Failed to generate audio file\")"
" return ServiceResponse(ServiceExecStatus.ERROR, \"Failed to generate audio file\")"
]
},
{
Expand Down
8 changes: 8 additions & 0 deletions src/agentscope/service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
dblp_search_authors,
dblp_search_venues,
)
from .multi_modality.dash_scope import (
dashscope_image_to_text,
dashscope_text_to_image,
dashscope_text_to_audio,
)
from .service_response import ServiceResponse
from .service_toolkit import ServiceToolkit
from .service_toolkit import ServiceFactory
Expand Down Expand Up @@ -78,6 +83,9 @@ def get_help() -> None:
"dblp_search_publications",
"dblp_search_authors",
"dblp_search_venues",
"dashscope_image_to_text",
"dashscope_text_to_image",
"dashscope_text_to_audio",
# to be deprecated
"ServiceFactory",
]
229 changes: 229 additions & 0 deletions src/agentscope/service/multi_modality/dash_scope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# -*- coding: utf-8 -*-
"""Use DashScope API to generate images,
convert text to audio, and convert images to text.
Please refer to the official documentation for more details:
https://dashscope.aliyun.com/
"""
from http import HTTPStatus
from typing import Union, Tuple

import os
import requests


import dashscope
from dashscope.audio.tts import SpeechSynthesizer


from agentscope.service.service_response import ServiceResponse
from agentscope.service.service_status import ServiceExecStatus


def dashscope_text_to_image(
prompt: str,
api_key: str,
number_of_images: int = 1,
size: str = "1024*1024",
model: str = "wanx-v1",
saved_dir: str = "./figs",
) -> ServiceResponse:
"""Generate an image based on a text prompt.

Args:
prompt (`str`): the text prompt.
api_key (`str`): The api key for the dashscope api.
number_of_images (`int`, defaults to `1`): the number of images
to generate.
size (`str`, defaults to `1024*1024`): size of the image
model (`str`, defaults to 'wanx-v1'): the model to use.
saved_dir (`str`, defaults to './figs'): the directory to save the
generated images.
Returns:
ServiceResponse:
A dictionary with two variables: `status` and`content`.
If `status` is ServiceExecStatus.SUCCESS,
the `content` is a dict with key 'fig_paths" and
value is a list of the paths to the generated images.
Example:
prompt = "A beautiful sunset in the mountains"
print( dashscope_text_to_image(prompt)) gives:
{'status': 'SUCCESS',
'content': {'fig_paths':
['FIG_RELATEIVE_PATH1',
'FIG_RELATEIVE_PATH2']}}
"""

dashscope.api_key = api_key
res = dashscope.ImageSynthesis.call(
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
model=model,
prompt=prompt,
n=number_of_images,
size=size,
)
if res.status_code == HTTPStatus.OK:
if res.output.task_status == "SUCCEEDED":
results = res.output.results
# write iamge to disk
# if saved_dir does not exist, create it
fig_paths = []
if not os.path.exists(saved_dir):
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
os.makedirs(saved_dir)
for i, result in enumerate(results):
fig_path = f"{saved_dir}/{prompt}_{i}.png"
with open(fig_path, "wb+") as f:
f.write(requests.get(result.url).content)
fig_paths.append(fig_path)
return ServiceResponse(
ServiceExecStatus.SUCCESS,
{"file_paths": fig_paths},
)
else:
err_msg = f"Task failed with status {res.output.task_status}"
err_msg += res.message
return ServiceResponse(
ServiceExecStatus.ERROR,
{"error": err_msg},
)
else:
err_msg = f"Error in calling the API: {res.status_code}"
err_msg += res.message
return ServiceResponse(
ServiceExecStatus.ERROR,
{"error": err_msg},
)


def dashscope_image_to_text(
image_urls: Union[str, Tuple[str, ...]],
prompt: str,
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
api_key: str,
model: str = "qwen-vl-plus",
) -> ServiceResponse:
"""Generate text based on an image.

Args:
image_urls (`str`): the url of single or multiple images.
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
query_prompt (`str`): the text prompt.
api_key (`str`): The api key for the dashscope api.
model (`str`, defaults to 'qwen-vl-plus'): the model to use.
Returns:
ServiceResponse:
A dictionary with two variables: `status` and`content`.
If `status` is ServiceExecStatus.SUCCESS,
the `content` is the generated text.
Example:
image_url = "image.jpg"
query_prompt = "Describe the image"
print(image_to_text(image_url, query_prompt)) gives:
{'status': 'SUCCESS', 'content': 'A beautiful sunset in the mountains'}
"""
dashscope.api_key = api_key
if not isinstance(image_urls, tuple):
image_urls = (image_urls,)
contents = []
for image_url in image_urls:
# check image url is local content or remote content
if not image_url.startswith(("http://", "https://")):
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
# check if the file exists
if not os.path.exists(image_url):
return ServiceResponse(
ServiceExecStatus.ERROR,
{"error": f"File {image_url} does not exist"},
)
image_path = str(os.path.abspath(image_url))
image_url = f"file://{image_path}"
contents.append(
{
"image": image_url,
},
)
contents.append(
{
"text": prompt,
},
)
# currently only support one round of conversation
# if multiple rounds of conversation are needed,
# it would be better to implement an Agent class
sys_message = {
"role": "system",
"content": [
{
"text": "You are a helpful assistant",
},
],
}
user_message = {
"role": "user",
"content": contents,
}
messages = [sys_message, user_message]
res = dashscope.MultiModalConversation.call(
model=model,
messages=messages,
)
if res.status_code == HTTPStatus.OK:
description = res.output.choices[0].message.content[0]["text"]
return ServiceResponse(
ServiceExecStatus.SUCCESS,
description,
)
else:
err_msg = f"Error in calling the API: {res.status_code}"
err_msg += res.message
return ServiceResponse(
ServiceExecStatus.ERROR,
{"error": err_msg},
)


def dashscope_text_to_audio(
text: str,
api_key: str,
model: str = "sambert-zhichu-v1",
sample_rate: int = 48000,
saved_dir: str = "./audio",
) -> ServiceResponse:
"""Convert text to audio.

Args:
text (`str`): the text to convert.
api_key (`str`): The api key for the dashscope api.
model (`str`, defaults to 'sambert-zhichu-v1'): the model to use.
sample_rate (`int`, defaults to 48000): samplerate of the audio.
saved_dir (`str`, defaults to './audio'): the directory
to save the generated audio.
Returns:
ServiceResponse:
A dictionary with two variables: `status` and`content`.
If `status` is ServiceExecStatus.SUCCESS,
the `content` contains a dictionary with key "audio_path" and
and value is the path to the generated audio.
Example:
text = "How is the weather today?"
print(text_to_audio(text)) gives:
{'status': 'SUCCESS',
'content': {"audio_path": "AUDIO_RELATEIVE_PATH"}}
"""
dashscope.api_key = api_key
res = SpeechSynthesizer.call(
model=model,
text=text,
sample_rate=sample_rate,
)
if res.get_audio_data() is not None:
if not os.path.exists(saved_dir):
os.makedirs(saved_dir)

audio_path = f"{saved_dir}/{text}.wav"
PengHongyiNTU marked this conversation as resolved.
Show resolved Hide resolved
with open(audio_path, "wb") as f:
f.write(res.get_audio_data())
return ServiceResponse(
ServiceExecStatus.SUCCESS,
{"audio_path": audio_path},
)
else:
return ServiceResponse(
ServiceExecStatus.ERROR,
{"error": "Failed to generate audio"},
)