-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(api): add source image filters for controlnet and others
- Loading branch information
Showing
11 changed files
with
722 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from .utils import ( | ||
expand_image, | ||
valid_image, | ||
) | ||
from .mask_filter import ( | ||
mask_filter_gaussian_multiply, | ||
mask_filter_gaussian_screen, | ||
mask_filter_none, | ||
) | ||
from .noise_source import ( | ||
noise_source_fill_edge, | ||
noise_source_fill_mask, | ||
noise_source_gaussian, | ||
noise_source_histogram, | ||
noise_source_normal, | ||
noise_source_uniform, | ||
) | ||
from .source_filter import ( | ||
source_filter_canny, | ||
source_filter_depth, | ||
source_filter_face, | ||
source_filter_gaussian, | ||
source_filter_hed, | ||
source_filter_mlsd, | ||
source_filter_noise, | ||
source_filter_normal, | ||
source_filter_openpose, | ||
source_filter_scribble, | ||
source_filter_segment, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
# from https://github.com/ForserX/StableDiffusionUI/blob/main/data/repo/diffusion_scripts/modules/controlnet/laion_face_common.py | ||
|
||
from typing import Mapping | ||
|
||
import mediapipe as mp | ||
import numpy | ||
|
||
mp_drawing = mp.solutions.drawing_utils | ||
mp_drawing_styles = mp.solutions.drawing_styles | ||
mp_face_detection = mp.solutions.face_detection # Only for counting faces. | ||
mp_face_mesh = mp.solutions.face_mesh | ||
mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION | ||
mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS | ||
mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS | ||
|
||
DrawingSpec = mp.solutions.drawing_styles.DrawingSpec | ||
PoseLandmark = mp.solutions.drawing_styles.PoseLandmark | ||
|
||
min_face_size_pixels: int = 64 | ||
f_thick = 2 | ||
f_rad = 1 | ||
right_iris_draw = DrawingSpec( | ||
color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad | ||
) | ||
right_eye_draw = DrawingSpec( | ||
color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad | ||
) | ||
right_eyebrow_draw = DrawingSpec( | ||
color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad | ||
) | ||
left_iris_draw = DrawingSpec( | ||
color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad | ||
) | ||
left_eye_draw = DrawingSpec( | ||
color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad | ||
) | ||
left_eyebrow_draw = DrawingSpec( | ||
color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad | ||
) | ||
mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad) | ||
head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad) | ||
|
||
# mp_face_mesh.FACEMESH_CONTOURS has all the items we care about. | ||
face_connection_spec = {} | ||
for edge in mp_face_mesh.FACEMESH_FACE_OVAL: | ||
face_connection_spec[edge] = head_draw | ||
for edge in mp_face_mesh.FACEMESH_LEFT_EYE: | ||
face_connection_spec[edge] = left_eye_draw | ||
for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW: | ||
face_connection_spec[edge] = left_eyebrow_draw | ||
# for edge in mp_face_mesh.FACEMESH_LEFT_IRIS: | ||
# face_connection_spec[edge] = left_iris_draw | ||
for edge in mp_face_mesh.FACEMESH_RIGHT_EYE: | ||
face_connection_spec[edge] = right_eye_draw | ||
for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW: | ||
face_connection_spec[edge] = right_eyebrow_draw | ||
# for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS: | ||
# face_connection_spec[edge] = right_iris_draw | ||
for edge in mp_face_mesh.FACEMESH_LIPS: | ||
face_connection_spec[edge] = mouth_draw | ||
iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw} | ||
|
||
|
||
def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2): | ||
"""We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all | ||
landmarks. Until our PR is merged into mediapipe, we need this separate method.""" | ||
if len(image.shape) != 3: | ||
raise ValueError("Input image must be H,W,C.") | ||
image_rows, image_cols, image_channels = image.shape | ||
if image_channels != 3: # BGR channels | ||
raise ValueError("Input image must contain three channel bgr data.") | ||
for idx, landmark in enumerate(landmark_list.landmark): | ||
if (landmark.HasField("visibility") and landmark.visibility < 0.9) or ( | ||
landmark.HasField("presence") and landmark.presence < 0.5 | ||
): | ||
continue | ||
if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0: | ||
continue | ||
image_x = int(image_cols * landmark.x) | ||
image_y = int(image_rows * landmark.y) | ||
draw_color = None | ||
if isinstance(drawing_spec, Mapping): | ||
if drawing_spec.get(idx) is None: | ||
continue | ||
else: | ||
draw_color = drawing_spec[idx].color | ||
elif isinstance(drawing_spec, DrawingSpec): | ||
draw_color = drawing_spec.color | ||
image[ | ||
image_y - halfwidth : image_y + halfwidth, | ||
image_x - halfwidth : image_x + halfwidth, | ||
:, | ||
] = draw_color | ||
|
||
|
||
def reverse_channels(image): | ||
"""Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB.""" | ||
# im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order. | ||
# im[:,:,::[2,1,0]] would also work but makes a copy of the data. | ||
return image[:, :, ::-1] | ||
|
||
|
||
def generate_annotation(img_rgb, max_faces: int, min_confidence: float): | ||
""" | ||
Find up to 'max_faces' inside the provided input image. | ||
If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many | ||
pixels in the image. | ||
""" | ||
with mp_face_mesh.FaceMesh( | ||
static_image_mode=True, | ||
max_num_faces=max_faces, | ||
refine_landmarks=True, | ||
min_detection_confidence=min_confidence, | ||
) as facemesh: | ||
img_height, img_width, img_channels = img_rgb.shape | ||
assert img_channels == 3 | ||
|
||
results = facemesh.process(img_rgb).multi_face_landmarks | ||
|
||
if results is None: | ||
print("No faces detected in controlnet image for Mediapipe face annotator.") | ||
return numpy.zeros_like(img_rgb) | ||
|
||
# Filter faces that are too small | ||
filtered_landmarks = [] | ||
for lm in results: | ||
landmarks = lm.landmark | ||
face_rect = [ | ||
landmarks[0].x, | ||
landmarks[0].y, | ||
landmarks[0].x, | ||
landmarks[0].y, | ||
] # Left, up, right, down. | ||
for i in range(len(landmarks)): | ||
face_rect[0] = min(face_rect[0], landmarks[i].x) | ||
face_rect[1] = min(face_rect[1], landmarks[i].y) | ||
face_rect[2] = max(face_rect[2], landmarks[i].x) | ||
face_rect[3] = max(face_rect[3], landmarks[i].y) | ||
if min_face_size_pixels > 0: | ||
face_width = abs(face_rect[2] - face_rect[0]) | ||
face_height = abs(face_rect[3] - face_rect[1]) | ||
face_width_pixels = face_width * img_width | ||
face_height_pixels = face_height * img_height | ||
face_size = min(face_width_pixels, face_height_pixels) | ||
if face_size >= min_face_size_pixels: | ||
filtered_landmarks.append(lm) | ||
else: | ||
filtered_landmarks.append(lm) | ||
|
||
# Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start. | ||
empty = numpy.zeros_like(img_rgb) | ||
|
||
# Draw detected faces: | ||
for face_landmarks in filtered_landmarks: | ||
mp_drawing.draw_landmarks( | ||
empty, | ||
face_landmarks, | ||
connections=face_connection_spec.keys(), | ||
landmark_drawing_spec=None, | ||
connection_drawing_spec=face_connection_spec, | ||
) | ||
draw_pupils(empty, face_landmarks, iris_landmark_spec, 2) | ||
|
||
# Flip BGR back to RGB. | ||
empty = reverse_channels(empty).copy() | ||
|
||
return empty |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from PIL import Image, ImageChops, ImageFilter | ||
|
||
from .params import Point | ||
|
||
|
||
def mask_filter_none( | ||
mask: Image.Image, dims: Point, origin: Point, fill="white", **kw | ||
) -> Image.Image: | ||
width, height = dims | ||
|
||
noise = Image.new("RGB", (width, height), fill) | ||
noise.paste(mask, origin) | ||
|
||
return noise | ||
|
||
|
||
def mask_filter_gaussian_multiply( | ||
mask: Image.Image, dims: Point, origin: Point, rounds=3, **kw | ||
) -> Image.Image: | ||
""" | ||
Gaussian blur with multiply, source image centered on white canvas. | ||
""" | ||
noise = mask_filter_none(mask, dims, origin) | ||
|
||
for _i in range(rounds): | ||
blur = noise.filter(ImageFilter.GaussianBlur(5)) | ||
noise = ImageChops.multiply(noise, blur) | ||
|
||
return noise | ||
|
||
|
||
def mask_filter_gaussian_screen( | ||
mask: Image.Image, dims: Point, origin: Point, rounds=3, **kw | ||
) -> Image.Image: | ||
""" | ||
Gaussian blur, source image centered on white canvas. | ||
""" | ||
noise = mask_filter_none(mask, dims, origin) | ||
|
||
for _i in range(rounds): | ||
blur = noise.filter(ImageFilter.GaussianBlur(5)) | ||
noise = ImageChops.screen(noise, blur) | ||
|
||
return noise |
Oops, something went wrong.