Skip to content

Commit

Permalink
Merge branch 'main' into lora_loader_optional
Browse files Browse the repository at this point in the history
  • Loading branch information
skunkworxdark authored Jan 21, 2025
2 parents e52be5a + 83e33a4 commit 32766ab
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 41 deletions.
7 changes: 7 additions & 0 deletions invokeai/app/invocations/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,13 @@ def check_coords(self):
raise ValueError(f"y_min ({self.y_min}) is greater than y_max ({self.y_max}).")
return self

def tuple(self) -> Tuple[int, int, int, int]:
"""
Returns the bounding box as a tuple suitable for use with PIL's `Image.crop()` method.
This method returns a tuple of the form (left, upper, right, lower) == (x_min, y_min, x_max, y_max).
"""
return (self.x_min, self.y_min, self.x_max, self.y_max)


class MetadataField(RootModel[dict[str, Any]]):
"""
Expand Down
13 changes: 9 additions & 4 deletions invokeai/app/invocations/ideal_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class IdealSizeOutput(BaseInvocationOutput):
"ideal_size",
title="Ideal Size",
tags=["latents", "math", "ideal_size"],
version="1.0.3",
version="1.0.4",
)
class IdealSizeInvocation(BaseInvocation):
"""Calculates the ideal size for generation to avoid duplication"""
Expand All @@ -41,11 +41,16 @@ def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR
def invoke(self, context: InvocationContext) -> IdealSizeOutput:
unet_config = context.models.get_config(self.unet.unet.key)
aspect = self.width / self.height
dimension: float = 512
if unet_config.base == BaseModelType.StableDiffusion2:

if unet_config.base == BaseModelType.StableDiffusion1:
dimension = 512
elif unet_config.base == BaseModelType.StableDiffusion2:
dimension = 768
elif unet_config.base == BaseModelType.StableDiffusionXL:
elif unet_config.base in (BaseModelType.StableDiffusionXL, BaseModelType.Flux, BaseModelType.StableDiffusion3):
dimension = 1024
else:
raise ValueError(f"Unsupported model type: {unet_config.base}")

dimension = dimension * self.multiplier
min_dimension = math.floor(dimension * 0.5)
model_area = dimension * dimension # hardcoded for now since all models are trained on square images
Expand Down
91 changes: 66 additions & 25 deletions invokeai/app/invocations/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from invokeai.app.invocations.constants import IMAGE_MODES
from invokeai.app.invocations.fields import (
BoundingBoxField,
ColorField,
FieldDescriptions,
ImageField,
Expand Down Expand Up @@ -997,10 +998,10 @@ def invoke(self, context: InvocationContext) -> ImageOutput:

@invocation(
"mask_from_id",
title="Mask from ID",
title="Mask from Segmented Image",
tags=["image", "mask", "id"],
category="image",
version="1.0.0",
version="1.0.1",
)
class MaskFromIDInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generate a mask for a particular color in an ID Map"""
Expand All @@ -1010,40 +1011,24 @@ class MaskFromIDInvocation(BaseInvocation, WithMetadata, WithBoard):
threshold: int = InputField(default=100, description="Threshold for color detection")
invert: bool = InputField(default=False, description="Whether or not to invert the mask")

def rgba_to_hex(self, rgba_color: tuple[int, int, int, int]):
r, g, b, a = rgba_color
hex_code = "#{:02X}{:02X}{:02X}{:02X}".format(r, g, b, int(a * 255))
return hex_code

def id_to_mask(self, id_mask: Image.Image, color: tuple[int, int, int, int], threshold: int = 100):
if id_mask.mode != "RGB":
id_mask = id_mask.convert("RGB")
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name, mode="RGBA")

# Can directly just use the tuple but I'll leave this rgba_to_hex here
# incase anyone prefers using hex codes directly instead of the color picker
hex_color_str = self.rgba_to_hex(color)
rgb_color = numpy.array([int(hex_color_str[i : i + 2], 16) for i in (1, 3, 5)])
np_color = numpy.array(self.color.tuple())

# Maybe there's a faster way to calculate this distance but I can't think of any right now.
color_distance = numpy.linalg.norm(id_mask - rgb_color, axis=-1)
color_distance = numpy.linalg.norm(image - np_color, axis=-1)

# Create a mask based on the threshold and the distance calculated above
binary_mask = (color_distance < threshold).astype(numpy.uint8) * 255
binary_mask = (color_distance < self.threshold).astype(numpy.uint8) * 255

# Convert the mask back to PIL
binary_mask_pil = Image.fromarray(binary_mask)

return binary_mask_pil

def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name)

mask = self.id_to_mask(image, self.color.tuple(), self.threshold)

if self.invert:
mask = ImageOps.invert(mask)
binary_mask_pil = ImageOps.invert(binary_mask_pil)

image_dto = context.images.save(image=mask, image_category=ImageCategory.MASK)
image_dto = context.images.save(image=binary_mask_pil, image_category=ImageCategory.MASK)

return ImageOutput.build(image_dto)

Expand Down Expand Up @@ -1154,3 +1139,59 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
image_dto = context.images.save(image=noisy_image)

return ImageOutput.build(image_dto)


@invocation(
"crop_image_to_bounding_box",
title="Crop Image to Bounding Box",
category="image",
version="1.0.0",
tags=["image", "crop"],
classification=Classification.Beta,
)
class CropImageToBoundingBoxInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Crop an image to the given bounding box. If the bounding box is omitted, the image is cropped to the non-transparent pixels."""

image: ImageField = InputField(description="The image to crop")
bounding_box: BoundingBoxField | None = InputField(
default=None, description="The bounding box to crop the image to"
)

def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name)

bounding_box = self.bounding_box.tuple() if self.bounding_box is not None else image.getbbox()

cropped_image = image.crop(bounding_box)

image_dto = context.images.save(image=cropped_image)
return ImageOutput.build(image_dto)


@invocation(
"paste_image_into_bounding_box",
title="Paste Image into Bounding Box",
category="image",
version="1.0.0",
tags=["image", "crop"],
classification=Classification.Beta,
)
class PasteImageIntoBoundingBoxInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Paste the source image into the target image at the given bounding box.
The source image must be the same size as the bounding box, and the bounding box must fit within the target image."""

source_image: ImageField = InputField(description="The image to paste")
target_image: ImageField = InputField(description="The image to paste into")
bounding_box: BoundingBoxField = InputField(description="The bounding box to paste the image into")

def invoke(self, context: InvocationContext) -> ImageOutput:
source_image = context.images.get_pil(self.source_image.image_name, mode="RGBA")
target_image = context.images.get_pil(self.target_image.image_name, mode="RGBA")

bounding_box = self.bounding_box.tuple()

target_image.paste(source_image, bounding_box, source_image)

image_dto = context.images.save(image=target_image)
return ImageOutput.build(image_dto)
64 changes: 61 additions & 3 deletions invokeai/app/invocations/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,22 @@
import torch
from PIL import Image

from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithBoard, WithMetadata
from invokeai.app.invocations.primitives import ImageOutput, MaskOutput
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
Classification,
InvocationContext,
invocation,
)
from invokeai.app.invocations.fields import (
BoundingBoxField,
ColorField,
ImageField,
InputField,
TensorField,
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.primitives import BoundingBoxOutput, ImageOutput, MaskOutput
from invokeai.backend.image_util.util import pil_to_np


Expand Down Expand Up @@ -201,3 +214,48 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
image_dto = context.images.save(image=masked_image)

return ImageOutput.build(image_dto)


WHITE = ColorField(r=255, g=255, b=255, a=255)


@invocation(
"get_image_mask_bounding_box",
title="Get Image Mask Bounding Box",
tags=["mask"],
category="mask",
version="1.0.0",
classification=Classification.Beta,
)
class GetMaskBoundingBoxInvocation(BaseInvocation):
"""Gets the bounding box of the given mask image."""

mask: ImageField = InputField(description="The mask to crop.")
margin: int = InputField(default=0, description="Margin to add to the bounding box.")
mask_color: ColorField = InputField(default=WHITE, description="Color of the mask in the image.")

def invoke(self, context: InvocationContext) -> BoundingBoxOutput:
mask = context.images.get_pil(self.mask.image_name, mode="RGBA")
mask_np = np.array(mask)

# Convert mask_color to RGBA tuple
mask_color_rgb = self.mask_color.tuple()

# Find the bounding box of the mask color
y, x = np.where(np.all(mask_np == mask_color_rgb, axis=-1))

if len(x) == 0 or len(y) == 0:
# No pixels found with the given color
return BoundingBoxOutput(bounding_box=BoundingBoxField(x_min=0, y_min=0, x_max=0, y_max=0))

left, upper, right, lower = x.min(), y.min(), x.max(), y.max()

# Add the margin
left = max(0, left - self.margin)
upper = max(0, upper - self.margin)
right = min(mask_np.shape[1], right + self.margin)
lower = min(mask_np.shape[0], lower + self.margin)

bounding_box = BoundingBoxField(x_min=left, y_min=upper, x_max=right, y_max=lower)

return BoundingBoxOutput(bounding_box=bounding_box)
8 changes: 7 additions & 1 deletion invokeai/frontend/web/public/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -2196,7 +2196,13 @@
},
"whatsNew": {
"whatsNewInInvoke": "What's New in Invoke",
"items": ["Low-VRAM mode", "Dynamic memory management", "Faster model loading times", "Fewer memory errors"],
"items": [
"Low-VRAM mode",
"Dynamic memory management",
"Faster model loading times",
"Fewer memory errors",
"Expanded workflow batch capabilities"
],
"readReleaseNotes": "Read Release Notes",
"watchRecentReleaseVideos": "Watch Recent Release Videos",
"watchUiUpdatesOverview": "Watch UI Updates Overview"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,13 @@ const FieldHandle = (props: FieldHandleProps) => {
borderColor: color,
borderRadius: isModelType || type.batch ? 4 : '100%',
zIndex: 1,
transform: type.batch ? 'rotate(45deg) translateX(-0.3rem) translateY(-0.3rem)' : 'none',
transformOrigin: 'center',
};

if (type.batch) {
s.transform = 'rotate(45deg) translateX(-0.3rem) translateY(-0.3rem)';
}

if (handleType === 'target') {
s.insetInlineStart = '-1rem';
} else {
Expand Down
Loading

0 comments on commit 32766ab

Please sign in to comment.