Skip to content

Commit

Permalink
Added video support (working)
Browse files Browse the repository at this point in the history
  • Loading branch information
valentinfrlch committed Jul 27, 2024
1 parent 8796342 commit b73507f
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 38 deletions.
10 changes: 8 additions & 2 deletions custom_components/llmvision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
MESSAGE,
IMAGE_FILE,
IMAGE_ENTITY,
VIDEO_FILE,
INTERVAL,
TEMPERATURE,
DETAIL,
INCLUDE_FILENAME
Expand Down Expand Up @@ -76,6 +78,9 @@ def __init__(self, data_call):
self.image_paths = data_call.data.get(IMAGE_FILE, "").split(
"\n") if data_call.data.get(IMAGE_FILE) else None
self.image_entities = data_call.data.get(IMAGE_ENTITY)
self.video_paths = data_call.data.get(VIDEO_FILE, "").split(
"\n") if data_call.data.get(VIDEO_FILE) else None
self.interval = int(data_call.data.get(INTERVAL, 3))
self.target_width = data_call.data.get(TARGET_WIDTH, 1280)
self.temperature = float(data_call.data.get(TEMPERATURE, 0.5))
self.max_tokens = int(data_call.data.get(MAXTOKENS, 100))
Expand Down Expand Up @@ -114,7 +119,7 @@ async def image_analyzer(data_call):
# Fetch and preprocess images
processor = MediaProcessor(hass, client)
# Send images to RequestHandler client
client = await processor.add_image(call.image_entities, call.image_paths, call.target_width, call.include_filename)
client = await processor.add_images(call.image_entities, call.image_paths, call.target_width, call.include_filename)

# Validate configuration, input data and make the call
try:
Expand All @@ -127,13 +132,14 @@ async def image_analyzer(data_call):
async def video_analyzer(data_call):
"""Handle the service call to analyze a video (future implementation)"""
call = ServiceCallData(data_call).get_service_call_data()
call.message = "The attached images are frames from a video." + call.message
client = RequestHandler(hass,
message=call.message,
max_tokens=call.max_tokens,
temperature=call.temperature,
detail=call.detail)
processor = MediaProcessor(hass, client)
client = await processor.add_video(call.video_paths, call.interval, call.target_width, call.include_filename)
client = await processor.add_videos(call.video_paths, call.interval, call.target_width, call.include_filename)
try:
response = await client.make_request(call)
except ServiceValidationError as e:
Expand Down
2 changes: 2 additions & 0 deletions custom_components/llmvision/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
MESSAGE = 'message'
IMAGE_FILE = 'image_file'
IMAGE_ENTITY = 'image_entity'
VIDEO_FILE = 'video_file'
INTERVAL = 'interval'
DETAIL = 'detail'
TEMPERATURE = 'temperature'
INCLUDE_FILENAME = 'include_filename'
Expand Down
59 changes: 39 additions & 20 deletions custom_components/llmvision/media_handlers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import base64
import io
import os
import shutil
import logging
from homeassistant.helpers.network import get_url
# TODO: Use ffmpeg instead of moviepy
from PIL import Image
from homeassistant.exceptions import ServiceValidationError

_LOGGER = logging.getLogger(__name__)


class MediaProcessor:
def __init__(self, hass, client):
Expand All @@ -15,14 +18,7 @@ def __init__(self, hass, client):
self.filenames = []

async def resize_image(self, target_width, image_path=None, image_data=None, img=None):
"""Encode image as base64
Args:
image_path (string): path where image is stored e.g.: "/config/www/tmp/image.jpg"
Returns:
string: image encoded as base64
"""
"""Resize image to target_width"""
loop = self.hass.loop
if image_path:
# Open the image file
Expand Down Expand Up @@ -70,13 +66,15 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
return base64_image

async def _encode_image(self, img):
"""Encode image as base64"""
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
base64_image = base64.b64encode(
img_byte_arr.getvalue()).decode('utf-8')
return base64_image

async def add_image(self, image_entities, image_paths, target_width, include_filename):
async def add_images(self, image_entities, image_paths, target_width, include_filename):
"""Wrapper for client.add_image"""
if image_entities:
for image_entity in image_entities:
try:
Expand Down Expand Up @@ -124,26 +122,47 @@ async def add_image(self, image_entities, image_paths, target_width, include_fil
raise ServiceValidationError(f"Error: {e}")
return self.client

async def add_video(self, video_paths, interval, target_width, include_filename):
async def add_videos(self, video_paths, interval, target_width, include_filename):
"""Wrapper for client.add_image for videos"""
if video_paths:
_LOGGER.debug(f"Processing videos: {video_paths}")
for video_path in video_paths:
try:
video_path = video_path.strip()
if os.path.exists(video_path):
# extract frames from video every interval seconds
clip = VideoFileClip(video_path)
duration = clip.duration
for t in range(0, int(duration), interval):
frame = clip.get_frame(t)
# Convert frame (numpy array) to image and encode it
img = Image.fromarray(frame)
# extract frames from video every 'interval' seconds using ffmpeg
tmp_dir = "tmp_frames"
os.makedirs(tmp_dir, exist_ok=True)
_LOGGER.debug(
f"Created {tmp_dir} {os.path.exists(tmp_dir)}")
ffmpeg_cmd = [
"ffmpeg",
"-i", video_path,
"-vf", f"fps=1/{interval},select='eq(n\,0)+not(mod(n\,{interval}))'",
os.path.join(tmp_dir, "frame%04d.png")
]
# Run ffmpeg command
loop = self.hass.loop
await loop.run_in_executor(None, os.system, " ".join(ffmpeg_cmd))

frame_counter = 0
for frame_file in await loop.run_in_executor(None, os.listdir, tmp_dir):
_LOGGER.debug(f"Adding frame {frame_file}")
frame_counter = 0
frame_path = os.path.join(tmp_dir, frame_file)
self.client.add_image(
base64_image=await self.resize_image(img=img, target_width=target_width),
base64_image=await self.resize_image(image_path=frame_path, target_width=target_width),
filename=video_path.split(
'/')[-1].split('.')[-2] if include_filename else ""
'/')[-1].split('.')[-2] + " (frame " + str(frame_counter) + ")" if include_filename else "Video frame " + str(frame_counter)
)
frame_counter += 1

if not os.path.exists(video_path):
raise ServiceValidationError(
f"File {video_path} does not exist")
except Exception as e:
raise ServiceValidationError(f"Error: {e}")

# Clean up tmp dir
await loop.run_in_executor(None, shutil.rmtree, tmp_dir)
return self.client
16 changes: 3 additions & 13 deletions custom_components/llmvision/request_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, hass, message, max_tokens, temperature, detail):
self.filenames = []

async def make_request(self, call):
_LOGGER.debug(f"Base64 Images: {self.base64_images}")
_LOGGER.debug(f"Base64 Images: {sanitize_data(self.base64_images)}")
if call.provider == 'OpenAI':
api_key = self.hass.data.get(DOMAIN).get(CONF_OPENAI_API_KEY)
model = call.model
Expand Down Expand Up @@ -338,17 +338,7 @@ async def _fetch(self, url):
return data

def _validate_call(self, provider, api_key, base64_images, ip_address=None, port=None):
"""Validate the configuration for the component
Args:
mode (string): "OpenAI" or "LocalAI"
api_key (string): OpenAI API key
ip_address (string): LocalAI server IP address
port (string): LocalAI server port
Raises:
ServiceValidationError: if configuration is invalid
"""
"""Validate the service call data"""
# Checks for OpenAI
if provider == 'OpenAI':
if not api_key:
Expand All @@ -368,7 +358,7 @@ def _validate_call(self, provider, api_key, base64_images, ip_address=None, port
elif provider == 'Ollama':
if not ip_address or not port:
raise ServiceValidationError(ERROR_OLLAMA_NOT_CONFIGURED)
# File path validation
# Check media input
if base64_images == []:
raise ServiceValidationError(ERROR_NO_IMAGE_INPUT)

Expand Down
6 changes: 3 additions & 3 deletions custom_components/llmvision/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ video_analyzer:
text:
multiline: true
video_file:
name: Image File
name: Video File
required: true
description: 'Local path to video'
example: "/config/www/recordings/front_door.mp4"
Expand All @@ -147,8 +147,8 @@ video_analyzer:
default: 3
selector:
number:
min: 0.1
max: 100.0
min: 1
max: 60
include_filename:
name: Include Filename
required: false
Expand Down

0 comments on commit b73507f

Please sign in to comment.