From f279a3f790ac0c3cdda723c37ac3f6731d56d4ea Mon Sep 17 00:00:00 2001 From: Anatoly Belikov Date: Wed, 21 Aug 2024 22:25:00 +0300 Subject: [PATCH] use StableDiffusionControl(XL)NetImg2ImgPipeline im Cond2ImPipe --- examples/cim2im.py | 28 ++++++++++++++++++ multigen/pipes.py | 71 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 15 deletions(-) create mode 100755 examples/cim2im.py diff --git a/examples/cim2im.py b/examples/cim2im.py new file mode 100755 index 0000000..00fb389 --- /dev/null +++ b/examples/cim2im.py @@ -0,0 +1,28 @@ +""" +first run promp2im.py + +this script expects https://huggingface.co/lllyasviel/control_v11p_sd15_softedge +to be placed in ./models-cn/control_v11p_sd15_softedge + + +modified versions of the input image will be placed in ./_projects/biolab/controlnet/ +""" + +from multigen.prompting import Cfgen +from multigen.sessions import GenSession +from multigen.pipes import Im2ImPipe, CIm2ImPipe, ModelType + + +model_dir = "./models-sd/" +model_id = "icbinp" + +prompt = ["bioinformatics lab with flasks and exotic flowers", + "happy vibrant", "green colors", "artwork", "high tech"] + +nprompt = "jpeg artifacts, blur, distortion, watermark, extra fingers, fewer fingers, lowres, bad hands, duplicate heads, bad anatomy" + + +pipe = CIm2ImPipe(model_dir + model_id, model_type=ModelType.SD, ctypes=['soft']) +pipe.setup("./_projects/biolab/00000.png", strength=0.6, steps=25) +gs = GenSession("./_projects/biolab/controlnet/", pipe, Cfgen(prompt, nprompt)) +gs.gen_sess(add_count=10) diff --git a/multigen/pipes.py b/multigen/pipes.py index 2bec70b..f1fe39b 100755 --- a/multigen/pipes.py +++ b/multigen/pipes.py @@ -16,6 +16,7 @@ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler, StableDiffusionXLControlNetPipeline from diffusers import StableDiffusionControlNetInpaintPipeline, StableDiffusionXLControlNetInpaintPipeline, DDIMScheduler from diffusers.schedulers import KarrasDiffusionSchedulers +from diffusers import StableDiffusionXLControlNetImg2ImgPipeline, StableDiffusionControlNetImg2ImgPipeline from .pipelines.masked_stable_diffusion_img2img import MaskedStableDiffusionImg2ImgPipeline from .pipelines.masked_stable_diffusion_xl_img2img import MaskedStableDiffusionXLImg2ImgPipeline @@ -559,8 +560,8 @@ def gen(self, inputs): class Cond2ImPipe(BasePipe): - _class = StableDiffusionControlNetPipeline - _classxl = StableDiffusionXLControlNetPipeline + _class = StableDiffusionControlNetImg2ImgPipeline + _classxl = StableDiffusionXLControlNetImg2ImgPipeline _autopipeline = DiffusionPipeline # TODO: set path @@ -686,7 +687,7 @@ def get_cpath(self): return cpath def setup(self, fimage, width=None, height=None, - image=None, cscales=None, guess_mode=False, **args): + image=None, cscales=None, guess_mode=False, strength=1, **args): """ Set up the pipeline with the given parameters. @@ -701,9 +702,11 @@ def setup(self, fimage, width=None, height=None, The input image. Defaults to None. fimage should be None if this argument is provided. cscales (list, optional): The list of conditioning scales. Defaults to None. - guess_mode (bool, optional): + guess_mode (bool, *optional*): Whether to use guess mode. Defaults to False. it enables image generation without text prompt. + strength (float, *optional*): + Strength image modification. Defaults to 1. A lower strength values keep result close to the input image. value of 1 means input image more or less ignored. **args: Additional arguments for the pipeline setup. """ super().setup(**args) @@ -711,6 +714,7 @@ def setup(self, fimage, width=None, height=None, self.fname = fimage image = Image.open(fimage) if image is None else image self._condition_image = [image] + self._input_image = [image] if cscales is None: cscales = [self.get_default_cond_scales()[c] for c in self.ctypes] self.pipe_params.update({ @@ -718,6 +722,7 @@ def setup(self, fimage, width=None, height=None, "height": image.size[1] if height is None else height, "controlnet_conditioning_scale": cscales, "guess_mode": guess_mode, + "strength": strength, }) def get_default_cond_scales(self): @@ -750,7 +755,8 @@ def gen(self, inputs): """ inputs = self.prepare_inputs(inputs) inputs.update(self.pipe_params) - inputs.update({"image": self._condition_image}) + inputs.update({"image": self._input_image, + "control_image": self._condition_image}) image = self.pipe(**inputs).images[0] return image @@ -779,24 +785,59 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] = Additional arguments passed to the Cond2ImPipe constructor. """ super().__init__(model_id=model_id, pipe=pipe, ctypes=ctypes, model_type=model_type, **args) + self.processor = None + self.body_estimation = None + self.draw_bodypose = None + self.dprocessor = None + self.dmodel = None + for c in self.ctypes: + self.load_processor(c) - if "soft" in ctypes: + def load_processor(self, ctype): + if "soft" == ctype: from controlnet_aux import PidiNetDetector, HEDdetector - self.processor = HEDdetector.from_pretrained('lllyasviel/Annotators') + if self.processor is None: + self.processor = HEDdetector.from_pretrained('lllyasviel/Annotators') #processor = PidiNetDetector.from_pretrained('lllyasviel/Annotators') - if "pose" in ctypes: + if "pose" == ctype: from pytorch_openpose.src.body import Body from pytorch_openpose.src import util - self.body_estimation = Body('pytorch_openpose/model/body_pose_model.pth') - self.draw_bodypose = util.draw_bodypose + if self.body_estimation is None: + self.body_estimation = Body('pytorch_openpose/model/body_pose_model.pth') + self.draw_bodypose = util.draw_bodypose #hand_estimation = Hand('model/hand_pose_model.pth') - if "depth" in ctypes: + if "depth" == ctype: from transformers import DPTImageProcessor, DPTForDepthEstimation - self.dprocessor = DPTImageProcessor.from_pretrained("./models-other/dpt-large") - self.dmodel = DPTForDepthEstimation.from_pretrained("./models-other/dpt-large") + if self.dprocessor is None: + self.dprocessor = DPTImageProcessor.from_pretrained("./models-other/dpt-large") + self.dmodel = DPTForDepthEstimation.from_pretrained("./models-other/dpt-large") + + def setup(self, fimage, width=None, height=None, image=None, + cscales=None, guess_mode=False, strength=0.75, **args): + """ + Set up the pipeline with the given parameters. - def setup(self, fimage, width=None, height=None, image=None, cscales=None, guess_mode=False, **args): - super().setup(fimage, width, height, image, cscales, guess_mode, **args) + Args: + fimage (str): + The path to the input image file. + width (int, *optional*): + The width of the generated image. Defaults to the width of the input image. + height (int, *optional*): + The height of the generated image. Defaults to the height of the input image. + image (PIL.Image.Image, *optional*): + The input image. Defaults to None. fimage should be None if this argument is provided. + cscales (list, optional): + The list of conditioning scales. Defaults to None. + guess_mode (bool, *optional*): + Whether to use guess mode. Defaults to False. + it enables image generation without text prompt. + strength (float, *optional*): + Strength image modification. Defaults to 0.75. A lower strength values keep result close to the input image. value of 1 means input image more or less ignored. + **args: Additional arguments for the pipeline setup. + """ + super().setup(fimage, width, height, image, cscales, guess_mode, strength=strength, **args) + if 'ctypes' in args: + raise RuntimeError("ctypes can be used only in constructor") # Additionally process the input image # REM: CIm2ImPipe expects only one image, which can be the base for multiple control images self._condition_image = self._proc_cimg(np.asarray(self._condition_image[0]))