Merge branch 'main' into lora_loader_optional

invoke-ai · Jan 21, 2025 · 32766ab · 32766ab
2 parents e52be5a + 83e33a4
commit 32766ab
Show file tree

Hide file tree

Showing 8 changed files with 327 additions and 41 deletions.
diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py
@@ -300,6 +300,13 @@ def check_coords(self):
             raise ValueError(f"y_min ({self.y_min}) is greater than y_max ({self.y_max}).")
         return self
 
+    def tuple(self) -> Tuple[int, int, int, int]:
+        """
+        Returns the bounding box as a tuple suitable for use with PIL's `Image.crop()` method.
+        This method returns a tuple of the form (left, upper, right, lower) == (x_min, y_min, x_max, y_max).
+        """
+        return (self.x_min, self.y_min, self.x_max, self.y_max)
+
 
 class MetadataField(RootModel[dict[str, Any]]):
     """

diff --git a/invokeai/app/invocations/ideal_size.py b/invokeai/app/invocations/ideal_size.py
@@ -21,7 +21,7 @@ class IdealSizeOutput(BaseInvocationOutput):
     "ideal_size",
     title="Ideal Size",
     tags=["latents", "math", "ideal_size"],
-    version="1.0.3",
+    version="1.0.4",
 )
 class IdealSizeInvocation(BaseInvocation):
     """Calculates the ideal size for generation to avoid duplication"""
@@ -41,11 +41,16 @@ def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR
     def invoke(self, context: InvocationContext) -> IdealSizeOutput:
         unet_config = context.models.get_config(self.unet.unet.key)
         aspect = self.width / self.height
-        dimension: float = 512
-        if unet_config.base == BaseModelType.StableDiffusion2:
+
+        if unet_config.base == BaseModelType.StableDiffusion1:
+            dimension = 512
+        elif unet_config.base == BaseModelType.StableDiffusion2:
             dimension = 768
-        elif unet_config.base == BaseModelType.StableDiffusionXL:
+        elif unet_config.base in (BaseModelType.StableDiffusionXL, BaseModelType.Flux, BaseModelType.StableDiffusion3):
             dimension = 1024
+        else:
+            raise ValueError(f"Unsupported model type: {unet_config.base}")
+
         dimension = dimension * self.multiplier
         min_dimension = math.floor(dimension * 0.5)
         model_area = dimension * dimension  # hardcoded for now since all models are trained on square images

diff --git a/invokeai/app/invocations/image.py b/invokeai/app/invocations/image.py
@@ -13,6 +13,7 @@
 )
 from invokeai.app.invocations.constants import IMAGE_MODES
 from invokeai.app.invocations.fields import (
+    BoundingBoxField,
     ColorField,
     FieldDescriptions,
     ImageField,
@@ -997,10 +998,10 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
 
 @invocation(
     "mask_from_id",
-    title="Mask from ID",
+    title="Mask from Segmented Image",
     tags=["image", "mask", "id"],
     category="image",
-    version="1.0.0",
+    version="1.0.1",
 )
 class MaskFromIDInvocation(BaseInvocation, WithMetadata, WithBoard):
     """Generate a mask for a particular color in an ID Map"""
@@ -1010,40 +1011,24 @@ class MaskFromIDInvocation(BaseInvocation, WithMetadata, WithBoard):
     threshold: int = InputField(default=100, description="Threshold for color detection")
     invert: bool = InputField(default=False, description="Whether or not to invert the mask")
 
-    def rgba_to_hex(self, rgba_color: tuple[int, int, int, int]):
-        r, g, b, a = rgba_color
-        hex_code = "#{:02X}{:02X}{:02X}{:02X}".format(r, g, b, int(a * 255))
-        return hex_code
-
-    def id_to_mask(self, id_mask: Image.Image, color: tuple[int, int, int, int], threshold: int = 100):
-        if id_mask.mode != "RGB":
-            id_mask = id_mask.convert("RGB")
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, mode="RGBA")
 
-        # Can directly just use the tuple but I'll leave this rgba_to_hex here
-        # incase anyone prefers using hex codes directly instead of the color picker
-        hex_color_str = self.rgba_to_hex(color)
-        rgb_color = numpy.array([int(hex_color_str[i : i + 2], 16) for i in (1, 3, 5)])
+        np_color = numpy.array(self.color.tuple())
 
         # Maybe there's a faster way to calculate this distance but I can't think of any right now.
-        color_distance = numpy.linalg.norm(id_mask - rgb_color, axis=-1)
+        color_distance = numpy.linalg.norm(image - np_color, axis=-1)
 
         # Create a mask based on the threshold and the distance calculated above
-        binary_mask = (color_distance < threshold).astype(numpy.uint8) * 255
+        binary_mask = (color_distance < self.threshold).astype(numpy.uint8) * 255
 
         # Convert the mask back to PIL
         binary_mask_pil = Image.fromarray(binary_mask)
 
-        return binary_mask_pil
-
-    def invoke(self, context: InvocationContext) -> ImageOutput:
-        image = context.images.get_pil(self.image.image_name)
-
-        mask = self.id_to_mask(image, self.color.tuple(), self.threshold)
-
         if self.invert:
-            mask = ImageOps.invert(mask)
+            binary_mask_pil = ImageOps.invert(binary_mask_pil)
 
-        image_dto = context.images.save(image=mask, image_category=ImageCategory.MASK)
+        image_dto = context.images.save(image=binary_mask_pil, image_category=ImageCategory.MASK)
 
         return ImageOutput.build(image_dto)
 
@@ -1154,3 +1139,59 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         image_dto = context.images.save(image=noisy_image)
 
         return ImageOutput.build(image_dto)
+
+
+@invocation(
+    "crop_image_to_bounding_box",
+    title="Crop Image to Bounding Box",
+    category="image",
+    version="1.0.0",
+    tags=["image", "crop"],
+    classification=Classification.Beta,
+)
+class CropImageToBoundingBoxInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Crop an image to the given bounding box. If the bounding box is omitted, the image is cropped to the non-transparent pixels."""
+
+    image: ImageField = InputField(description="The image to crop")
+    bounding_box: BoundingBoxField | None = InputField(
+        default=None, description="The bounding box to crop the image to"
+    )
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name)
+
+        bounding_box = self.bounding_box.tuple() if self.bounding_box is not None else image.getbbox()
+
+        cropped_image = image.crop(bounding_box)
+
+        image_dto = context.images.save(image=cropped_image)
+        return ImageOutput.build(image_dto)
+
+
+@invocation(
+    "paste_image_into_bounding_box",
+    title="Paste Image into Bounding Box",
+    category="image",
+    version="1.0.0",
+    tags=["image", "crop"],
+    classification=Classification.Beta,
+)
+class PasteImageIntoBoundingBoxInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Paste the source image into the target image at the given bounding box.
+
+    The source image must be the same size as the bounding box, and the bounding box must fit within the target image."""
+
+    source_image: ImageField = InputField(description="The image to paste")
+    target_image: ImageField = InputField(description="The image to paste into")
+    bounding_box: BoundingBoxField = InputField(description="The bounding box to paste the image into")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        source_image = context.images.get_pil(self.source_image.image_name, mode="RGBA")
+        target_image = context.images.get_pil(self.target_image.image_name, mode="RGBA")
+
+        bounding_box = self.bounding_box.tuple()
+
+        target_image.paste(source_image, bounding_box, source_image)
+
+        image_dto = context.images.save(image=target_image)
+        return ImageOutput.build(image_dto)
diff --git a/invokeai/app/invocations/mask.py b/invokeai/app/invocations/mask.py
@@ -2,9 +2,22 @@
 import torch
 from PIL import Image
 
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
-from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithBoard, WithMetadata
-from invokeai.app.invocations.primitives import ImageOutput, MaskOutput
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    Classification,
+    InvocationContext,
+    invocation,
+)
+from invokeai.app.invocations.fields import (
+    BoundingBoxField,
+    ColorField,
+    ImageField,
+    InputField,
+    TensorField,
+    WithBoard,
+    WithMetadata,
+)
+from invokeai.app.invocations.primitives import BoundingBoxOutput, ImageOutput, MaskOutput
 from invokeai.backend.image_util.util import pil_to_np
 
 
@@ -201,3 +214,48 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         image_dto = context.images.save(image=masked_image)
 
         return ImageOutput.build(image_dto)
+
+
+WHITE = ColorField(r=255, g=255, b=255, a=255)
+
+
+@invocation(
+    "get_image_mask_bounding_box",
+    title="Get Image Mask Bounding Box",
+    tags=["mask"],
+    category="mask",
+    version="1.0.0",
+    classification=Classification.Beta,
+)
+class GetMaskBoundingBoxInvocation(BaseInvocation):
+    """Gets the bounding box of the given mask image."""
+
+    mask: ImageField = InputField(description="The mask to crop.")
+    margin: int = InputField(default=0, description="Margin to add to the bounding box.")
+    mask_color: ColorField = InputField(default=WHITE, description="Color of the mask in the image.")
+
+    def invoke(self, context: InvocationContext) -> BoundingBoxOutput:
+        mask = context.images.get_pil(self.mask.image_name, mode="RGBA")
+        mask_np = np.array(mask)
+
+        # Convert mask_color to RGBA tuple
+        mask_color_rgb = self.mask_color.tuple()
+
+        # Find the bounding box of the mask color
+        y, x = np.where(np.all(mask_np == mask_color_rgb, axis=-1))
+
+        if len(x) == 0 or len(y) == 0:
+            # No pixels found with the given color
+            return BoundingBoxOutput(bounding_box=BoundingBoxField(x_min=0, y_min=0, x_max=0, y_max=0))
+
+        left, upper, right, lower = x.min(), y.min(), x.max(), y.max()
+
+        # Add the margin
+        left = max(0, left - self.margin)
+        upper = max(0, upper - self.margin)
+        right = min(mask_np.shape[1], right + self.margin)
+        lower = min(mask_np.shape[0], lower + self.margin)
+
+        bounding_box = BoundingBoxField(x_min=left, y_min=upper, x_max=right, y_max=lower)
+
+        return BoundingBoxOutput(bounding_box=bounding_box)
diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json
@@ -2196,7 +2196,13 @@
     },
     "whatsNew": {
         "whatsNewInInvoke": "What's New in Invoke",
-        "items": ["Low-VRAM mode", "Dynamic memory management", "Faster model loading times", "Fewer memory errors"],
+        "items": [
+            "Low-VRAM mode",
+            "Dynamic memory management",
+            "Faster model loading times",
+            "Fewer memory errors",
+            "Expanded workflow batch capabilities"
+        ],
         "readReleaseNotes": "Read Release Notes",
         "watchRecentReleaseVideos": "Watch Recent Release Videos",
         "watchUiUpdatesOverview": "Watch UI Updates Overview"

diff --git a/...i/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/FieldHandle.tsx b/...i/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/FieldHandle.tsx
@@ -38,10 +38,13 @@ const FieldHandle = (props: FieldHandleProps) => {
       borderColor: color,
       borderRadius: isModelType || type.batch ? 4 : '100%',
       zIndex: 1,
-      transform: type.batch ? 'rotate(45deg) translateX(-0.3rem) translateY(-0.3rem)' : 'none',
       transformOrigin: 'center',
     };
 
+    if (type.batch) {
+      s.transform = 'rotate(45deg) translateX(-0.3rem) translateY(-0.3rem)';
+    }
+
     if (handleType === 'target') {
       s.insetInlineStart = '-1rem';
     } else {