refactor: improve out of memory error handling

This commit improves the out of memory error handling by using the native torch error.
livepeer · Oct 14, 2024 · 53d76eb · 53d76eb
1 parent c7b759a
commit 53d76eb
Show file tree

Hide file tree

Showing 8 changed files with 26 additions and 19 deletions.
diff --git a/runner/app/pipelines/segment_anything_2.py b/runner/app/pipelines/segment_anything_2.py
@@ -3,7 +3,7 @@
 
 import PIL
 from app.pipelines.base import Pipeline
-from app.pipelines.utils import get_model_dir, get_torch_device
+from app.pipelines.utils import get_torch_device, get_model_dir
 from app.utils.errors import InferenceError
 from PIL import ImageFile
 from sam2.sam2_image_predictor import SAM2ImagePredictor

diff --git a/runner/app/pipelines/upscale.py b/runner/app/pipelines/upscale.py
@@ -117,6 +117,8 @@ def __call__(
 
         try:
             outputs = self.ldm(prompt, image=image, **kwargs)
+        except torch.cuda.OutOfMemoryError as e:
+            raise e
         except Exception as e:
             raise InferenceError(original_exception=e)
 

diff --git a/runner/app/routes/audio_to_text.py b/runner/app/routes/audio_to_text.py
@@ -2,6 +2,7 @@
 import os
 from typing import Annotated
 
+import torch
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
 from app.pipelines.utils.audio import AudioConversionError
@@ -42,15 +43,15 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(f"AudioToText pipeline error: {str(e)}")  # Log the detailed error
     if "Soundfile is either not in the correct format or is malformed" in str(
         e
     ) or isinstance(e, AudioConversionError):
         status_code = status.HTTP_415_UNSUPPORTED_MEDIA_TYPE
         error_message = "Unsupported audio format or malformed file."
-    elif "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError):
+    elif isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = "Out of memory error."
+        torch.cuda.empty_cache()
     elif isinstance(e, InferenceError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = str(e)
@@ -118,4 +119,5 @@ async def audio_to_text(
     try:
         return pipeline(audio=audio)
     except Exception as e:
+        logger.error(f"AudioToText pipeline error: {str(e)}")
         return handle_pipeline_error(e)
diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py
@@ -30,11 +30,7 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(
-        f"ImageToImagePipeline pipeline error: {str(e)}"
-    )  # Log the detailed error
-    logger.exception(e) # TODO: Check if needed.
-    if "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError) or isinstance(torch.cuda.OutOfMemoryError): # TODO: simplify condition.
+    if isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = "Out of memory error. Try reducing input image resolution."
         torch.cuda.empty_cache()
@@ -215,6 +211,7 @@ async def image_to_image(
                 num_inference_steps=num_inference_steps,
             )
         except Exception as e:
+            logger.error(f"ImageToImagePipeline pipeline error: {str(e)}")
             return handle_pipeline_error(e)
         images.extend(imgs)
         has_nsfw_concept.extend(nsfw_checks)

diff --git a/runner/app/routes/image_to_video.py b/runner/app/routes/image_to_video.py
@@ -4,9 +4,10 @@
 from typing import Annotated
 
 from app.dependencies import get_pipeline
+import torch
 from app.pipelines.base import Pipeline
 from app.routes.utils import HTTPError, VideoResponse, http_error, image_to_data_url
-from app.utils.errors import InferenceError, OutOfMemoryError
+from app.utils.errors import InferenceError
 from fastapi import APIRouter, Depends, File, Form, UploadFile, status
 from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -28,12 +29,12 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(f"ImageToVideo pipeline error: {str(e)}")  # Log the detailed error
-    if "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError):
+    if isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = (
             "Out of memory error. Try reducing input or output video resolution."
         )
+        torch.cuda.empty_cache()
     elif isinstance(e, InferenceError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = str(e)
@@ -181,6 +182,7 @@ async def image_to_video(
             seed=seed,
         )
     except Exception as e:
+        logger.error(f"ImageToVideo pipeline error: {str(e)}")
         return handle_pipeline_error(e)
 
     output_frames = []

diff --git a/runner/app/routes/segment_anything_2.py b/runner/app/routes/segment_anything_2.py
@@ -3,6 +3,7 @@
 from typing import Annotated
 
 import numpy as np
+import torch
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
 from app.routes.utils import HTTPError, MasksResponse, http_error, json_str_to_np_array
@@ -28,10 +29,10 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(f"SegmentAnything2 pipeline error: {str(e)}")  # Log the detailed error
-    if "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError):
+    if isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = "Out of memory error. Try reducing input image resolution."
+        torch.cuda.empty_cache()
     elif isinstance(e, InferenceError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = str(e)
@@ -192,6 +193,7 @@ async def segment_anything_2(
             normalize_coords=normalize_coords,
         )
     except Exception as e:
+        logger.error(f"SegmentAnything2 pipeline error: {str(e)}")
         return handle_pipeline_error(e)
 
     # Return masks sorted by descending score as string.

diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py
@@ -7,7 +7,7 @@
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
 from app.routes.utils import HTTPError, ImageResponse, http_error, image_to_data_url
-from app.utils.errors import InferenceError, OutOfMemoryError
+from app.utils.errors import InferenceError
 from app.pipelines.utils.utils import LoraLoadingError
 from fastapi import APIRouter, Depends, status
 from fastapi.responses import JSONResponse
@@ -28,8 +28,7 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(f"TextToImage pipeline error: {str(e)}")  # Log the detailed error
-    if "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError) or isinstance(e, torch.cuda.OutOfMemoryError): # TODO: Simplify.
+    if isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = "Out of memory error. Try reducing output image resolution."
         torch.cuda.empty_cache()
@@ -204,6 +203,7 @@ async def text_to_image(
         try:
             imgs, nsfw_check = pipeline(**kwargs)
         except Exception as e:
+            logger.error(f"TextToImage pipeline error: {str(e)}")
             return handle_pipeline_error(e)
         images.extend(imgs)
         has_nsfw_concept.extend(nsfw_check)

diff --git a/runner/app/routes/upscale.py b/runner/app/routes/upscale.py
@@ -3,7 +3,8 @@
 import random
 from typing import Annotated
 
-from app.utils.errors import InferenceError, OutOfMemoryError
+import torch
+from app.utils.errors import InferenceError
 from app.dependencies import get_pipeline
 from app.pipelines.base import Pipeline
 from app.routes.utils import HTTPError, ImageResponse, http_error, image_to_data_url
@@ -28,10 +29,10 @@ def handle_pipeline_error(e: Exception) -> JSONResponse:
     Returns:
         A JSONResponse with the appropriate error message and status code.
     """
-    logger.error(f"TextToImage pipeline error: {str(e)}")  # Log the detailed error
-    if "CUDA out of memory" in str(e) or isinstance(e, OutOfMemoryError):
+    if isinstance(e, torch.cuda.OutOfMemoryError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = "Out of memory error. Try reducing input image resolution."
+        torch.cuda.empty_cache()
     elif isinstance(e, InferenceError):
         status_code = status.HTTP_400_BAD_REQUEST
         error_message = str(e)
@@ -145,6 +146,7 @@ async def upscale(
             seed=seed,
         )
     except Exception as e:
+        logger.error(f"TextToImage pipeline error: {str(e)}")
         return handle_pipeline_error(e)
 
     seeds = [seed]