livepeer · rickstaa · Aug 13, 2024 · Aug 2, 2024 · Aug 10, 2024 · Aug 13, 2024
@@ -5,8 +5,7 @@
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
 from app.pipelines.utils.audio import AudioConversionError
-from app.routes.util import (HTTPError, TextResponse, file_exceeds_max_size,
-                             http_error)
+from app.routes.util import HTTPError, TextResponse, file_exceeds_max_size, http_error
 from fastapi import APIRouter, Depends, File, Form, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -56,8 +55,13 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     include_in_schema=False,
 )
 async def audio_to_text(
-    audio: Annotated[UploadFile, File()],
-    model_id: Annotated[str, Form()] = "",
+    audio: Annotated[
+        UploadFile, File(description="Uploaded audio file to be transcribed.")
+    ],
+    model_id: Annotated[
+        str,
+        Form(description="Hugging Face model ID used for transcription."),
+    ] = "",
     pipeline: Pipeline = Depends(get_pipeline),
     token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),
 ):

@@ -5,8 +5,7 @@
 
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
-from app.routes.util import (HTTPError, ImageResponse, http_error,
-                             image_to_data_url)
+from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url
 from fastapi import APIRouter, Depends, File, Form, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -36,19 +35,59 @@
     include_in_schema=False,
 )
 async def image_to_image(
-    prompt: Annotated[str, Form()],
-    image: Annotated[UploadFile, File()],
-    model_id: Annotated[str, Form()] = "",
-    strength: Annotated[float, Form()] = 0.8,
-    guidance_scale: Annotated[float, Form()] = 7.5,
-    image_guidance_scale: Annotated[float, Form()] = 1.5,
-    negative_prompt: Annotated[str, Form()] = "",
-    safety_check: Annotated[bool, Form()] = True,
-    seed: Annotated[int, Form()] = None,
+    prompt: Annotated[
+        str,
+        Form(description="Text prompt(s) to guide image generation."),
+    ],
+    image: Annotated[
+        UploadFile,
+        File(description="Uploaded image to modify with the pipeline."),
+    ],
+    model_id: Annotated[
+        str,
+        Form(description="Hugging Face model ID used for image generation."),
+    ] = "",
+    strength: Annotated[
+        float,
+        Form(
+            description="Degree of transformation applied to the reference image (0 to 1)."
+        ),
+    ] = 0.8,
+    guidance_scale: Annotated[
+        float,
+        Form(
+            description="Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality)."
+        ),
+    ] = 7.5,
+    image_guidance_scale: Annotated[
+        float,
+        Form(
+            description="Degree to which the generated image is pushed towards the initial image."
+        ),
+    ] = 1.5,
+    negative_prompt: Annotated[
+        str,
+        Form(
+            description="Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1."
+        ),
+    ] = "",
+    safety_check: Annotated[
+        bool,
+        Form(
+            description="Perform a safety check to estimate if generated images could be offensive or harmful."
+        ),
+    ] = True,
+    seed: Annotated[int, Form(description="Seed for random number generation.")] = None,
     num_inference_steps: Annotated[
-        int, Form()
+        int,
+        Form(
+            description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength."
+        ),
     ] = 100,  # NOTE: Hardcoded due to varying pipeline values.
-    num_images_per_prompt: Annotated[int, Form()] = 1,
+    num_images_per_prompt: Annotated[
+        int,
+        Form(description="Number of images to generate per prompt."),
+    ] = 1,
     pipeline: Pipeline = Depends(get_pipeline),
     token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),
 ):

@@ -5,8 +5,7 @@
 
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
-from app.routes.util import (HTTPError, VideoResponse, http_error,
-                             image_to_data_url)
+from app.routes.util import HTTPError, VideoResponse, http_error, image_to_data_url
 from fastapi import APIRouter, Depends, File, Form, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -35,17 +34,46 @@
     include_in_schema=False,
 )
 async def image_to_video(
-    image: Annotated[UploadFile, File()],
-    model_id: Annotated[str, Form()] = "",
-    height: Annotated[int, Form()] = 576,
-    width: Annotated[int, Form()] = 1024,
-    fps: Annotated[int, Form()] = 6,
-    motion_bucket_id: Annotated[int, Form()] = 127,
-    noise_aug_strength: Annotated[float, Form()] = 0.02,
-    seed: Annotated[int, Form()] = None,
-    safety_check: Annotated[bool, Form()] = True,
+    image: Annotated[
+        UploadFile,
+        File(description="Uploaded image to generate a video from."),
+    ],
+    model_id: Annotated[
+        str, Form(description="Hugging Face model ID used for video generation.")
+    ] = "",
+    height: Annotated[
+        int, Form(description="The height in pixels of the generated video.")
+    ] = 576,
+    width: Annotated[
+        int, Form(description="The width in pixels of the generated video.")
+    ] = 1024,
+    fps: Annotated[
+        int, Form(description="The frames per second of the generated video.")
+    ] = 6,
+    motion_bucket_id: Annotated[
+        int,
+        Form(
+            description="Used for conditioning the amount of motion for the generation. The higher the number the more motion will be in the video."
+        ),
+    ] = 127,
+    noise_aug_strength: Annotated[
+        float,
+        Form(
+            description="Amount of noise added to the conditioning image. Higher values reduce resemblance to the conditioning image and increase motion."
+        ),
+    ] = 0.02,
+    safety_check: Annotated[
+        bool,
+        Form(
+            description="Perform a safety check to estimate if generated images could be offensive or harmful."
+        ),
+    ] = True,
+    seed: Annotated[int, Form(description="Seed for random number generation.")] = None,
     num_inference_steps: Annotated[
-        int, Form()
+        int,
+        Form(
+            description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength."
+        ),
     ] = 25,  # NOTE: Hardcoded due to varying pipeline values.
     pipeline: Pipeline = Depends(get_pipeline),
     token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),

@@ -5,8 +5,7 @@
 
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
-from app.routes.util import (HTTPError, ImageResponse, http_error,
-                             image_to_data_url)
+from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url
 from fastapi import APIRouter, Depends, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -22,17 +21,59 @@ class TextToImageParams(BaseModel):
     # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373
     model_id: Annotated[
         str,
-        Field(default="", description=""),
+        Field(
+            default="", description="Hugging Face model ID used for image generation."
+        ),
+    ]
+    prompt: Annotated[
+        str,
+        Field(
+            description="Text prompt(s) to guide image generation. Separate multiple prompts with '|' if supported by the model."
+        ),
+    ]
+    height: Annotated[
+        int,
+        Field(default=576, description="The height in pixels of the generated image."),
+    ]
+    width: Annotated[
+        int,
+        Field(default=1024, description="The width in pixels of the generated image."),
+    ]
+    guidance_scale: Annotated[
+        float,
+        Field(
+            default=7.5,
+            description="Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality).",
+        ),
+    ]
+    negative_prompt: Annotated[
+        str,
+        Field(
+            default="",
+            description="Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1.",
+        ),
+    ]
+    safety_check: Annotated[
+        bool,
+        Field(
+            default=True,
+            description="Perform a safety check to estimate if generated images could be offensive or harmful.",
+        ),
+    ]
+    seed: Annotated[
+        int, Field(default=None, description="Seed for random number generation.")
+    ]
+    num_inference_steps: Annotated[
+        int,
+        Field(
+            default=50,
+            description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.",
+        ),
+    ]
+    num_images_per_prompt: Annotated[
+        int,
+        Field(default=1, description="Number of images to generate per prompt."),
     ]
-    prompt: Annotated[str, Field(description="")]
-    height: Annotated[int, Field(default=576, description="")]
-    width: Annotated[int, Field(default=1024, description="")]
-    guidance_scale: Annotated[float, Field(default=7.5, description="")]
-    negative_prompt: Annotated[str, Field(default="", description="")]
-    safety_check: Annotated[bool, Field(default=True, description="")]
-    seed: Annotated[int, Field(default=None, description="")]
-    num_inference_steps: Annotated[int, Field(default=50, description="")]
-    num_images_per_prompt: Annotated[int, Field(default=1, description="")]
 
 
 RESPONSES = {

@@ -5,8 +5,7 @@
 
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
-from app.routes.util import (HTTPError, ImageResponse, http_error,
-                             image_to_data_url)
+from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url
 from fastapi import APIRouter, Depends, File, Form, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -36,13 +35,30 @@
     include_in_schema=False,
 )
 async def upscale(
-    prompt: Annotated[str, Form()],
-    image: Annotated[UploadFile, File()],
-    model_id: Annotated[str, Form()] = "",
-    safety_check: Annotated[bool, Form()] = True,
-    seed: Annotated[int, Form()] = None,
+    prompt: Annotated[
+        str,
+        Form(description="Text prompt(s) to guide upscaled image generation."),
+    ],
+    image: Annotated[
+        UploadFile,
+        File(description="Uploaded image to modify with the pipeline."),
+    ],
+    model_id: Annotated[
+        str,
+        Form(description="Hugging Face model ID used for upscaled image generation."),
+    ] = "",
+    safety_check: Annotated[
+        bool,
+        Form(
+            description="Perform a safety check to estimate if generated images could be offensive or harmful."
+        ),
+    ] = True,
+    seed: Annotated[int, Form(description="Seed for random number generation.")] = None,
     num_inference_steps: Annotated[
-        int, Form()
+        int,
+        Form(
+            description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength."
+        ),
     ] = 75,  # NOTE: Hardcoded due to varying pipeline values.
     pipeline: Pipeline = Depends(get_pipeline),
     token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)),