modelscope · DavdGao · Jun 5, 2024 · May 24, 2024 · May 24, 2024 · May 30, 2024
diff --git a/examples/conversation_with_customized_services/main.ipynb b/examples/conversation_with_customized_services/main.ipynb
@@ -241,7 +241,7 @@
     "        weather_data = weather.run(f\"{city},{country}\")\n",
     "        return ServiceResponse(ServiceExecStatus.SUCCESS, weather_data)\n",
     "    except Exception as e:\n",
-    "        return ServiceResponse(ServiceExecStatus.FAILURE, str(e))"
+    "        return ServiceResponse(ServiceExecStatus.ERROR, str(e))"
    ]
   },
   {
@@ -408,7 +408,7 @@
     "        return ServiceResponse(ServiceExecStatus.SUCCESS, {\"urls\": urls})\n",
     "    else:\n",
     "        err_msg = f\"status_code: {response.status_code}, code: {response.code}, message: {response.message}\"\n",
-    "        return ServiceResponse(ServiceExecStatus.FAILURE, err_msg)"
+    "        return ServiceResponse(ServiceExecStatus.ERROR, err_msg)"
    ]
   },
   {
@@ -457,7 +457,7 @@
     "        return ServiceResponse(ServiceExecStatus.SUCCESS, description)\n",
     "    else:\n",
     "        err_msg = f\"status_code: {response.status_code}, code: {response.code}, message: {response.message}\"\n",
-    "        return ServiceResponse(ServiceExecStatus.FAILURE, err_msg)    \n",
+    "        return ServiceResponse(ServiceExecStatus.ERROR, err_msg)    \n",
     "                  "
    ]
   },
@@ -500,7 +500,7 @@
     "            f.write(result.get_audio_data())\n",
     "        return ServiceResponse(ServiceExecStatus.SUCCESS, 'output.wav')\n",
     "    else:\n",
-    "        return ServiceResponse(ServiceExecStatus.FAILURE, \"Failed to generate audio file\")"
+    "        return ServiceResponse(ServiceExecStatus.ERROR, \"Failed to generate audio file\")"
    ]
   },
   {

diff --git a/src/agentscope/models/dashscope_model.py b/src/agentscope/models/dashscope_model.py
@@ -606,7 +606,9 @@ def __call__(
             messages=messages,
             **kwargs,
         )
-
+        # Unhandle code path here
+        # response could be a generator , if stream is yes
+        # suggest add a check here
         if response.status_code != HTTPStatus.OK:
             error_msg = (
                 f" Request id: {response.request_id},"
@@ -770,7 +772,7 @@ def format(
         for i, unit in enumerate(input_msgs):
             if i == 0 and unit.role == "system":
                 # system prompt
-                content = self._convert_url(unit.url)
+                content = self.convert_url(unit.url)
                 content.append({"text": _convert_to_str(unit.content)})
 
                 messages.append(
@@ -785,7 +787,7 @@ def format(
                     f"{unit.name}: {_convert_to_str(unit.content)}",
                 )
                 # image and audio
-                image_or_audio_dicts.extend(self._convert_url(unit.url))
+                image_or_audio_dicts.extend(self.convert_url(unit.url))
 
         dialogue_history = "\n".join(dialogue)
 
@@ -808,7 +810,7 @@ def format(
 
         return messages
 
-    def _convert_url(self, url: Union[str, Sequence[str], None]) -> List[dict]:
+    def convert_url(self, url: Union[str, Sequence[str], None]) -> List[dict]:
         """Convert the url to the format of DashScope API. Note for local
         files, a prefix "file://" will be added.
 
@@ -841,7 +843,7 @@ def _convert_url(self, url: Union[str, Sequence[str], None]) -> List[dict]:
         elif isinstance(url, list):
             dicts = []
             for _ in url:
-                dicts.extend(self._convert_url(_))
+                dicts.extend(self.convert_url(_))
             return dicts
         else:
             raise TypeError(

diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
@@ -26,6 +26,11 @@
     dblp_search_authors,
     dblp_search_venues,
 )
+from .multi_modality.dashscope_services import (
+    dashscope_image_to_text,
+    dashscope_text_to_image,
+    dashscope_text_to_audio,
+)
 from .service_response import ServiceResponse
 from .service_toolkit import ServiceToolkit
 from .service_toolkit import ServiceFactory
@@ -78,6 +83,9 @@ def get_help() -> None:
     "dblp_search_publications",
     "dblp_search_authors",
     "dblp_search_venues",
+    "dashscope_image_to_text",
+    "dashscope_text_to_image",
+    "dashscope_text_to_audio",
     # to be deprecated
     "ServiceFactory",
 ]
diff --git a/src/agentscope/service/multi_modality/dashscope_services.py b/src/agentscope/service/multi_modality/dashscope_services.py
@@ -0,0 +1,289 @@
+# -*- coding: utf-8 -*-
+"""Use DashScope API to generate images,
+convert text to audio, and convert images to text.
+Please refer to the official documentation for more details:
+https://dashscope.aliyun.com/
+"""
+
+from typing import Union, Optional, Literal, Sequence
+
+import os
+
+import dashscope
+from dashscope.audio.tts import SpeechSynthesizer
+
+from agentscope.models import (
+    DashScopeImageSynthesisWrapper,
+    DashScopeMultiModalWrapper,
+)
+
+# SpeechSynthesizerWrapper is current not available
+
+
+from agentscope.service.service_response import (
+    ServiceResponse,
+    ServiceExecStatus,
+)
+from agentscope.utils.tools import _download_file
+
+
+def dashscope_text_to_image(
+    prompt: str,
+    api_key: str,
+    n: int = 1,
+    size: Literal["1024*1024", "720*1280", "1280*720"] = "1024*1024",
+    model: str = "wanx-v1",
+    save_dir: Optional[str] = None,
+) -> ServiceResponse:
+    """Generate image(s) based on the given prompt, and return image url(s).
+
+    Args:
+        prompt (`str`):
+            The text prompt to generate image.
+        api_key (`str`):
+            The api key for the dashscope api.
+        n (`int`, defaults to `1`):
+            The number of images to generate.
+        size (`Literal["1024*1024", "720*1280", "1280*720"]`, defaults to
+        `"1024*1024"`):
+            Size of the image.
+        model (`str`, defaults to '"wanx-v1"'):
+            The model to use.
+        save_dir (`Optional[str]`, defaults to 'None'):
+            The directory to save the generated images. If not specified,
+            will return the web urls.
+
+    Returns:
+        ServiceResponse:
+        A dictionary with two variables: `status` and`content`.
+        If `status` is ServiceExecStatus.SUCCESS,
+        the `content` is a dict with key 'fig_paths" and
+        value is a list of the paths to the generated images.
+
+    Example:
+
+        .. code-block:: python
+
+            prompt = "A beautiful sunset in the mountains"
+            print(dashscope_text_to_image(prompt, "{api_key}"))
+
+    > {
+    >     'status': 'SUCCESS',
+    >     'content': {'image_urls': ['IMAGE_URL1', 'IMAGE_URL2']}
+    > }
+
+    """
+    text2img = DashScopeImageSynthesisWrapper(
+        config_name="dashscope-text-to-image-service",  # Just a placeholder
+        model_name=model,
+        api_key=api_key,
+    )
+    try:
+        res = text2img(
+            prompt=prompt,
+            n=n,
+            size=size,
+        )
+        urls = res.image_urls
+
+        # save images to save_dir
+        if urls is not None:
+            if save_dir:
+                os.makedirs(save_dir, exist_ok=True)
+                urls_local = []
+                # Obtain the image file names in the url
+                for url in urls:
+                    image_name = url.split("/")[-1]
+                    image_path = os.path.join(save_dir, image_name)
+                    # Download the image
+                    _download_file(url, image_path)
+                    urls_local.append(image_path)
+
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls_local}
+                )
+            else:
+                # Return the web urls
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls}
+                )
+        else:
+            return ServiceResponse(
+                ServiceExecStatus.ERROR,
+                "Error: Failed to generate images",
+            )
+    except Exception as e:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            str(e),
+        )
+
+
+def dashscope_image_to_text(
+    image_urls: Union[str, Sequence[str]],
+    api_key: str,
+    prompt: str = "Describe the image",
+    model: str = "qwen-vl-plus",
+) -> ServiceResponse:
+    """Generate text based on the given images.
+
+    Args:
+        image_urls (`Union[str, Sequence[str]]`):
+            The url of single or multiple images.
+        api_key (`str`):
+            The api key for the dashscope api.
+        prompt (`str`, defaults to 'Describe the image' ):
+            The text prompt.
+        model (`str`, defaults to 'qwen-vl-plus'):
+            The model to use in DashScope MultiModal API.
+
+    Returns:
+        `ServiceResponse`:
+            A dictionary with two variables: `status` and`content`.
+            If `status` is ServiceExecStatus.SUCCESS, the `content` is the
+            generated text.
+
+    Example:
+
+        .. code-block:: python
+
+            image_url = "image.jpg"
+            prompt = "Describe the image"
+            print(image_to_text(image_url, prompt))
+
+    > {'status': 'SUCCESS', 'content': 'A beautiful sunset in the mountains'}
+
+    """
+
+    img2text = DashScopeMultiModalWrapper(
+        config_name="dashscope-image-to-text-service",  # Just a placeholder
+        model_name=model,
+        api_key=api_key,
+    )
+
+    if isinstance(image_urls, str):
+        image_urls = [image_urls]
+
+    # Check if the local url is valid
+    img_abs_urls = []
+    for url in image_urls:
+        if os.path.exists(url):
+            if os.path.isfile(url):
+                img_abs_urls.append(os.path.abspath(url))
+            else:
+                return ServiceResponse(
+                    ServiceExecStatus.ERROR,
+                    f'Error: The input image url "{url}" is not a file.'
+                )
+        else:
+            # Maybe a web url or an invalid url, we leave it to the API
+            # to handle
+            img_abs_urls.append(url)
+
+    # Convert image paths according to the model requirements
+    contents = img2text.convert_url(img_abs_urls)
+    contents.append({"text": prompt})
+    # currently only support one round of conversation
+    # if multiple rounds of conversation are needed,
+    # it would be better to implement an Agent class
+    sys_message = {
+        "role": "system",
+        "content": [{"text": "You are a helpful assistant."}],
+    }
+    user_message = {
+        "role": "user",
+        "content": contents,
+    }
+    messages = [sys_message, user_message]
+    try:
+        res = img2text(messages, stream=False)
+        description = res.text
+        if description is not None:
+            return ServiceResponse(
+                ServiceExecStatus.SUCCESS,
+                description,
+            )
+        else:
+            return ServiceResponse(
+                ServiceExecStatus.ERROR,
+                "Error: Failed to generate text",
+            )
+    except Exception as e:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            str(e),
+        )
+
+
+def dashscope_text_to_audio(
+    text: str,
+    api_key: str,
+    save_dir: str,
+    model: str = "sambert-zhichu-v1",
+    sample_rate: int = 48000,
+) -> ServiceResponse:
+    """Convert the given text to audio.
+
+    Args:
+        text (`str`):
+            The text to be converted into audio.
+        api_key (`str`):
+            The api key for the dashscope API.
+        save_dir (`str`):
+            The directory to save the generated audio.
+        model (`str`, defaults to 'sambert-zhichu-v1'):
+            The model to use. Full model list can be found in
+            https://help.aliyun.com/zh/dashscope/model-list
+        sample_rate (`int`, defaults to 48000):
+            Samplerate of the audio.
+
+    Returns:
+        `ServiceResponse`:
+            A dictionary with two variables: `status` and`content`. If
+            `status` is ServiceExecStatus.SUCCESS, the `content` contains
+            a dictionary with key "audio_path" and value is the path to
+            the generated audio.
+
+    Example:
+
+        .. code-block:: python
+
+            text = "How is the weather today?"
+            print(text_to_audio(text)) gives:
+
+
+    > {'status': 'SUCCESS', 'content': {"audio_path": "AUDIO_PATH"}}
+
+    """
+    dashscope.api_key = api_key
+
+    res = SpeechSynthesizer.call(
+        model=model,
+        text=text,
+        sample_rate=sample_rate,
+        format="wav",
+    )
+
+    audio_data = res.get_audio_data()
+
+    if audio_data is not None:
+        if save_dir is not None:
+            os.makedirs(save_dir, exist_ok=True)
+
+        # Save locally
+        text = text[0:15] if len(text) > 15 else text
+        audio_path = os.path.join(save_dir, f"{text.strip()}.wav")
+
+        with open(audio_path, "wb") as f:
+            f.write(audio_data)
+        return ServiceResponse(
+            ServiceExecStatus.SUCCESS,
+            {"audio_path": audio_path},
+        )
+    else:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            "Error: Failed to generate audio"
+        )