Fix audio format conversion

swiss-ai-center · Apr 27, 2024 · 069f1e3 · 069f1e3
1 parent 3c53318
commit 069f1e3
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 7 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,7 @@
 # Base image
 FROM python:3.11
 
-# Install all required packages to run the model
-# TODO: 1. Add any additional packages required to run your model
-# RUN apt update && apt install --yes package1 package2 ...
+RUN apt update && apt install --yes ffmpeg
 
 # Work directory
 WORKDIR /app

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 common-code[test] @ git+https://github.com/swiss-ai-center/common-code.git@main
 requests==2.31.0
+pydub==0.25.1
diff --git a/src/main.py b/src/main.py
@@ -2,6 +2,7 @@
 import json
 import time
 
+import uvicorn
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import RedirectResponse
@@ -22,6 +23,7 @@
 
 # Imports required by the service's model
 import requests
+from pydub import AudioSegment
 import zipfile
 import io
 
@@ -42,7 +44,7 @@ def __init__(self):
         super().__init__(
             name="Hugging Face",
             slug="hugging-face",
-            url=settings.service_url,
+            url="http://localhost:9090",
             summary=api_summary,
             description=api_description,
             status=ServiceStatus.AVAILABLE,
@@ -148,10 +150,11 @@ def audio_or_image_input_query(file_bytes):
             case FieldDescriptionType.IMAGE_JPEG:
                 processed_data = create_zip_from_bytes(result_data.content, "result.jpg")
             case FieldDescriptionType.AUDIO_MP3:
-                # TODO Find a way to convert this to mp3 as requested
-                processed_data = create_zip_from_bytes(result_data.content, "result.wav")
+                audio_segment = AudioSegment.from_file(io.BytesIO(result_data.content))
+                processed_data = create_zip_from_bytes(audio_segment.export(format='mp3').read(), "result.mp3")
             case FieldDescriptionType.AUDIO_OGG:
-                processed_data = create_zip_from_bytes(result_data.content, "result.ogg")
+                audio_segment = AudioSegment.from_file(io.BytesIO(result_data.content))
+                processed_data = create_zip_from_bytes(audio_segment.export(format='ogg').read(), "result.ogg")
 
         return {
             "result": TaskData(data=processed_data,
@@ -289,3 +292,7 @@ async def announce():
 @app.get("/", include_in_schema=False)
 async def root():
     return RedirectResponse("/docs", status_code=301)
+
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=9090)