diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a974d5c..7434201f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,20 @@ # Change Log for SD.Next -## Update for 2025-01-13 +## Update for 2025-01-15 + +### Highlights for 2025-01-15 Two weeks since last release, time for update! +This time a bit shorter highligh reel as this is primarily a service release, but still there is more than few updates + *What's New?" - Large [Wiki](https://github.com/vladmandic/automatic/wiki)/[Docs](https://vladmandic.github.io/sdnext-docs/) updates -- New models: **Allegro Video**, new pipelines: **PixelSmith**, updates: **Hunyuan-Video**, **LTX-Video** -- New schedulers (TDD) -- Improvements to **Detailer**, **XYZ grid**, **Sysinfo**, **Logging** +- New models: **Allegro Video**, new pipelines: **PixelSmith**, updates: **Hunyuan-Video**, **LTX-Video**, **Sana 4k** +- New version for **ZLUDA** +- New features in **Detailer**, **XYZ grid**, **Sysinfo**, **Logging**, **Schedulers**, **Video save/create** - And a tons of hotfixes... -### Details for 2025-01-13 +### Details for 2025-01-15 - [Wiki/Docs](https://vladmandic.github.io/sdnext-docs/): - updated: Detailer, Install, Update, Debug, Control-HowTo, ZLUDA @@ -26,6 +30,10 @@ Two weeks since last release, time for update! - example: - [LTX Video](https://github.com/Lightricks/LTX-Video) framewise decoding - enabled by default, allows generating longer videos with reduced memory requirements +- [Sana 4k](https://huggingface.co/Efficient-Large-Model/Sana_1600M_4Kpx_BF16_diffusers) + - new Sana variation with support of directly generating 4k images + - simply select from *networks -> models -> reference* + - tip: enable vae tiling when generating very large images - **Logging**: - reverted enable debug by default - updated [debug wiki](https://github.com/vladmandic/automatic/wiki/debug) @@ -38,6 +46,7 @@ Two weeks since last release, time for update! - startup tracing and optimizations - threading load locks on model loads - refactor native vs legacy model loader + - video save/create - **Schedulers**: - [TDD](https://github.com/RedAIGC/Target-Driven-Distillation) new super-fast scheduler that can generate images in 4-8 steps recommended to use with [TDD LoRA](https://huggingface.co/RED-AIGC/TDD/tree/main) @@ -51,6 +60,11 @@ Two weeks since last release, time for update! - since different TAESD versions produce different results and latest is not necessarily greatest you can choose TAESD version in settings -> live preview also added is support for another finetuned version of TAESD [Hybrid TinyVAE](https://huggingface.co/cqyan/hybrid-sd-tinyvae-xl) +- **Video** + - all video create/save code is now unified + - add support for video formats: GIF, PNG, MP4/MP4V, MP4/AVC1, MP4/JVT3, MKV/H264, AVI/DIVX, AVI/RGBA, MJPEG/MJPG, MPG/MPG1, AVR/AVR1 + - *note*: video format support is platform dependent and not all formats may be available on all platforms + - *note*: avc1 and h264 need custom opencv due to oss licensing issues - **ZLUDA** v3.8.7 - new runtime compiler implementation: complex types, JIT are now available - fast fourier transformation is implemented diff --git a/html/previews.json b/html/previews.json index a4ddfa7c9..3b59d376c 100644 --- a/html/previews.json +++ b/html/previews.json @@ -11,6 +11,7 @@ "THUDM--CogVideoX-5b-I2V": "models/Reference/THUDM--CogView3-Plus-3B.jpg", "Efficient-Large-Model--Sana_1600M_1024px_BF16_diffusers": "models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", "Efficient-Large-Model--Sana_1600M_2Kpx_BF16_diffusers": "models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", + "Efficient-Large-Model--Sana_1600M_4Kpx_BF16_diffusers": "models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", "Efficient-Large-Model--Sana_600M_1024px_diffusers": "models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", "stabilityai--stable-video-diffusion-img2vid-xt-1-1": "models/Reference/stabilityai--stable-video-diffusion-img2vid-xt.jpg", "shuttleai--shuttle-3-diffusion": "models/Reference/shuttleai--shuttle-3-diffusion.jpg" diff --git a/html/reference.json b/html/reference.json index 43115c549..ca55081a9 100644 --- a/html/reference.json +++ b/html/reference.json @@ -180,19 +180,25 @@ "extras": "sampler: Default, cfg_scale: 3.5" }, - "NVLabs Sana 1.6B 2048px": { + "NVLabs Sana 1.6B 4k": { + "path": "Efficient-Large-Model/Sana_1600M_4Kpx_BF16_diffusers", + "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.", + "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", + "skip": true + }, + "NVLabs Sana 1.6B 2k": { "path": "Efficient-Large-Model/Sana_1600M_2Kpx_BF16_diffusers", "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.", "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", "skip": true }, - "NVLabs Sana 1.6B 1024px": { + "NVLabs Sana 1.6B 1k": { "path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers", "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.", "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", "skip": true }, - "NVLabs Sana 0.6B 512px": { + "NVLabs Sana 0.6B 0.5k": { "path": "Efficient-Large-Model/Sana_600M_512px_diffusers", "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.", "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg", diff --git a/modules/images.py b/modules/images.py index c3a8cee54..00e5969ce 100644 --- a/modules/images.py +++ b/modules/images.py @@ -15,6 +15,7 @@ from modules.images_grid import image_grid, get_grid_size, split_grid, combine_grid, check_grid_size, get_font, draw_grid_annotations, draw_prompt_matrix, GridAnnotation, Grid # pylint: disable=unused-import from modules.images_resize import resize_image # pylint: disable=unused-import from modules.images_namegen import FilenameGenerator, get_next_sequence_number # pylint: disable=unused-import +from modules.video import save_video # pylint: disable=unused-import debug = errors.log.trace if os.environ.get('SD_PATH_DEBUG', None) is not None else lambda *args, **kwargs: None @@ -190,76 +191,6 @@ def save_image(image, return params.filename, filename_txt, exifinfo -def save_video_atomic(images, filename, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3): - try: - import cv2 - except Exception as e: - shared.log.error(f'Save video: cv2: {e}') - return - os.makedirs(os.path.dirname(filename), exist_ok=True) - if video_type.lower() == 'mp4': - frames = images - if interpolate > 0: - try: - import modules.rife - frames = modules.rife.interpolate(images, count=interpolate, scale=scale, pad=pad, change=change) - except Exception as e: - shared.log.error(f'RIFE interpolation: {e}') - errors.display(e, 'RIFE interpolation') - video_frames = [np.array(frame) for frame in frames] - fourcc = "mp4v" - h, w, _c = video_frames[0].shape - video_writer = cv2.VideoWriter(filename, fourcc=cv2.VideoWriter_fourcc(*fourcc), fps=len(frames)/duration, frameSize=(w, h)) - for i in range(len(video_frames)): - img = cv2.cvtColor(video_frames[i], cv2.COLOR_RGB2BGR) - video_writer.write(img) - size = os.path.getsize(filename) - shared.log.info(f'Save video: file="{filename}" frames={len(frames)} duration={duration} fourcc={fourcc} size={size}') - if video_type.lower() == 'gif' or video_type.lower() == 'png': - append = images.copy() - image = append.pop(0) - if loop: - append += append[::-1] - frames=len(append) + 1 - image.save( - filename, - save_all = True, - append_images = append, - optimize = False, - duration = 1000.0 * duration / frames, - loop = 0 if loop else 1, - ) - size = os.path.getsize(filename) - shared.log.info(f'Save video: file="{filename}" frames={len(append) + 1} duration={duration} loop={loop} size={size}') - - -def save_video(p, images, filename = None, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3, sync: bool = False): - if images is None or len(images) < 2 or video_type is None or video_type.lower() == 'none': - return None - image = images[0] - if p is not None: - seed = p.all_seeds[0] if getattr(p, 'all_seeds', None) is not None else p.seed - prompt = p.all_prompts[0] if getattr(p, 'all_prompts', None) is not None else p.prompt - namegen = FilenameGenerator(p, seed=seed, prompt=prompt, image=image) - else: - namegen = FilenameGenerator(None, seed=0, prompt='', image=image) - if filename is None and p is not None: - filename = namegen.apply(shared.opts.samples_filename_pattern if shared.opts.samples_filename_pattern and len(shared.opts.samples_filename_pattern) > 0 else "[seq]-[prompt_words]") - filename = os.path.join(shared.opts.outdir_video, filename) - filename = namegen.sequence(filename, shared.opts.outdir_video, '') - else: - if os.pathsep not in filename: - filename = os.path.join(shared.opts.outdir_video, filename) - if not filename.lower().endswith(video_type.lower()): - filename += f'.{video_type.lower()}' - filename = namegen.sanitize(filename) - if not sync: - threading.Thread(target=save_video_atomic, args=(images, filename, video_type, duration, loop, interpolate, scale, pad, change)).start() - else: - save_video_atomic(images, filename, video_type, duration, loop, interpolate, scale, pad, change) - return filename - - def safe_decode_string(s: bytes): remove_prefix = lambda text, prefix: text[len(prefix):] if text.startswith(prefix) else text # pylint: disable=unnecessary-lambda-assignment for encoding in ['utf-8', 'utf-16', 'ascii', 'latin_1', 'cp1252', 'cp437']: # try different encodings diff --git a/modules/processing_args.py b/modules/processing_args.py index 4e51d6d4f..81f4ae5d3 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -179,6 +179,9 @@ def set_pipeline_args(p, model, prompts:list, negative_prompts:list, prompts_2:t p.extra_generation_params["CHI"] = chi if not chi: args['complex_human_instruction'] = None + if 'use_resolution_binning' in possible: + args['use_resolution_binning'] = True + p.extra_generation_params["Binning"] = True if prompt_parser_diffusers.embedder is not None and not prompt_parser_diffusers.embedder.scheduled_prompt: # not scheduled so we dont need it anymore prompt_parser_diffusers.embedder = None diff --git a/modules/shared.py b/modules/shared.py index 707657d5d..95a94b621 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -768,6 +768,7 @@ def get_default_modes(): "autolaunch": OptionInfo(False, "Autolaunch browser upon startup"), "font_size": OptionInfo(14, "Font size", gr.Slider, {"minimum": 8, "maximum": 32, "step": 1, "visible": True}), "aspect_ratios": OptionInfo("1:1, 4:3, 3:2, 16:9, 16:10, 21:9, 2:3, 3:4, 9:16, 10:16, 9:21", "Allowed aspect ratios"), + "logmonitor_show": OptionInfo(True, "Show log view"), "motd": OptionInfo(False, "Show MOTD"), "compact_view": OptionInfo(False, "Compact view"), "return_grid": OptionInfo(True, "Show grid in results"), @@ -787,7 +788,6 @@ def get_default_modes(): "taesd_layers": OptionInfo(3, "TAESD decode layers", gr.Slider, {"minimum": 1, "maximum": 3, "step": 1}), "live_preview_downscale": OptionInfo(True, "Downscale high resolution live previews"), - "logmonitor_show": OptionInfo(True, "Show log view"), "logmonitor_refresh_period": OptionInfo(5000, "Log view update period", gr.Slider, {"minimum": 0, "maximum": 30000, "step": 25}), "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"), "notification_audio_path": OptionInfo("html/notification.mp3","Path to notification sound", component_args=hide_dirs, folder=True), diff --git a/modules/ui_control.py b/modules/ui_control.py index 952b956f8..f577ad4c2 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -168,13 +168,8 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Row(): video_skip_frames = gr.Slider(minimum=0, maximum=100, step=1, label='Skip input frames', value=0, elem_id="control_video_skip_frames") with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None', elem_id="control_video_type") - video_duration = gr.Slider(label='Duration', minimum=0.25, maximum=300, step=0.25, value=2, visible=False, elem_id="control_video_duration") - with gr.Row(): - video_loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="control_video_loop") - video_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False, elem_id="control_video_pad") - video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False, elem_id="control_video_interpolate") - video_type.change(fn=helpers.video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, video_duration, video_loop, video_pad, video_interpolate = create_video_inputs() enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('control') detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('control') diff --git a/modules/ui_control_helpers.py b/modules/ui_control_helpers.py index cb25582c6..553b5fbac 100644 --- a/modules/ui_control_helpers.py +++ b/modules/ui_control_helpers.py @@ -181,15 +181,6 @@ def select_input(input_mode, input_image, init_image, init_type, input_resize, i return res -def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - - def copy_input(mode_from, mode_to, input_image, input_resize, input_inpaint): debug_log(f'Control transfter input: from={mode_from} to={mode_to} image={input_image} resize={input_resize} inpaint={input_inpaint}') def getimg(ctrl): diff --git a/modules/ui_sections.py b/modules/ui_sections.py index 346c53783..a9bbe4ef2 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -131,6 +131,26 @@ def create_seed_inputs(tab, reuse_visible=True): return seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w +def create_video_inputs(): + def video_type_change(video_type): + return [ + gr.update(visible=video_type != 'None'), + gr.update(visible=video_type in ['GIF', 'PNG']), + gr.update(visible=video_type not in ['None', 'GIF', 'PNG']), + gr.update(visible=video_type not in ['None', 'GIF', 'PNG']), + ] + with gr.Column(): + video_codecs = ['None', 'GIF', 'PNG', 'MP4/MP4V', 'MP4/AVC1', 'MP4/JVT3', 'MKV/H264', 'AVI/DIVX', 'AVI/RGBA', 'MJPEG/MJPG', 'MPG/MPG1', 'AVR/AVR1'] + video_type = gr.Dropdown(label='Video type', choices=video_codecs, value='None') + with gr.Column(): + video_duration = gr.Slider(label='Duration', minimum=0.25, maximum=300, step=0.25, value=2, visible=False) + video_loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="control_video_loop") + video_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) + video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) + video_type.change(fn=video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) + return video_type, video_duration, video_loop, video_pad, video_interpolate + + def create_cfg_inputs(tab): with gr.Row(): cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_cfg_scale") diff --git a/modules/video.py b/modules/video.py new file mode 100644 index 000000000..d9e40a27f --- /dev/null +++ b/modules/video.py @@ -0,0 +1,85 @@ +import os +import threading +import numpy as np +from modules import shared, errors +from modules.images_namegen import FilenameGenerator # pylint: disable=unused-import + + +def interpolate_frames(images, count: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3): + if images is None: + return [] + if not isinstance(images, list): + images = [images] + if count > 0: + try: + import modules.rife + frames = modules.rife.interpolate(images, count=count, scale=scale, pad=pad, change=change) + if len(frames) > 0: + images = frames + except Exception as e: + shared.log.error(f'RIFE interpolation: {e}') + errors.display(e, 'RIFE interpolation') + return [np.array(image) for image in images] + + +def save_video_atomic(images, filename, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3): + try: + import cv2 + except Exception as e: + shared.log.error(f'Save video: cv2: {e}') + return + os.makedirs(os.path.dirname(filename), exist_ok=True) + if video_type.lower() in ['gif', 'png']: + append = images.copy() + image = append.pop(0) + if loop: + append += append[::-1] + frames=len(append) + 1 + image.save( + filename, + save_all = True, + append_images = append, + optimize = False, + duration = 1000.0 * duration / frames, + loop = 0 if loop else 1, + ) + size = os.path.getsize(filename) + shared.log.info(f'Save video: file="{filename}" frames={len(append) + 1} duration={duration} loop={loop} size={size}') + elif video_type.lower() != 'none': + frames = interpolate_frames(images, count=interpolate, scale=scale, pad=pad, change=change) + fourcc = "mp4v" + h, w, _c = frames[0].shape + video_writer = cv2.VideoWriter(filename, fourcc=cv2.VideoWriter_fourcc(*fourcc), fps=len(frames)/duration, frameSize=(w, h)) + for i in range(len(frames)): + img = cv2.cvtColor(frames[i], cv2.COLOR_RGB2BGR) + video_writer.write(img) + size = os.path.getsize(filename) + shared.log.info(f'Save video: file="{filename}" frames={len(frames)} duration={duration} fourcc={fourcc} size={size}') + + +def save_video(p, images, filename = None, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3, sync: bool = False): + if images is None or len(images) < 2 or video_type is None or video_type.lower() == 'none': + return None + image = images[0] + if p is not None: + seed = p.all_seeds[0] if getattr(p, 'all_seeds', None) is not None else p.seed + prompt = p.all_prompts[0] if getattr(p, 'all_prompts', None) is not None else p.prompt + namegen = FilenameGenerator(p, seed=seed, prompt=prompt, image=image) + else: + namegen = FilenameGenerator(None, seed=0, prompt='', image=image) + if filename is None and p is not None: + filename = namegen.apply(shared.opts.samples_filename_pattern if shared.opts.samples_filename_pattern and len(shared.opts.samples_filename_pattern) > 0 else "[seq]-[prompt_words]") + filename = os.path.join(shared.opts.outdir_video, filename) + filename = namegen.sequence(filename, shared.opts.outdir_video, '') + else: + if os.pathsep not in filename: + filename = os.path.join(shared.opts.outdir_video, filename) + ext = video_type.lower().split('/')[0] if '/' in video_type else video_type.lower() + if not filename.lower().endswith(ext): + filename += f'.{ext}' + filename = namegen.sanitize(filename) + if not sync: + threading.Thread(target=save_video_atomic, args=(images, filename, video_type, duration, loop, interpolate, scale, pad, change)).start() + else: + save_video_atomic(images, filename, video_type, duration, loop, interpolate, scale, pad, change) + return filename diff --git a/scripts/allegrovideo.py b/scripts/allegrovideo.py index 675c6fb06..b340e4962 100644 --- a/scripts/allegrovideo.py +++ b/scripts/allegrovideo.py @@ -38,14 +38,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML('  Allegro Video
') with gr.Row(): @@ -53,13 +45,8 @@ def video_type_change(video_type): with gr.Row(): override_scheduler = gr.Checkbox(label='Override scheduler', value=True) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [num_frames, override_scheduler, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] def run(self, p: processing.StableDiffusionProcessing, num_frames, override_scheduler, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/animatediff.py b/scripts/animatediff.py index f44c85bb7..a704baae7 100644 --- a/scripts/animatediff.py +++ b/scripts/animatediff.py @@ -197,14 +197,6 @@ def show(self, is_img2img): def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML("  AnimateDiff
") with gr.Row(): @@ -217,9 +209,6 @@ def video_type_change(video_type): strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.05, value=1.0) with gr.Row(): latent_mode = gr.Checkbox(label='Latent mode', value=True, visible=False) - with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) with gr.Accordion('FreeInit', open=False): with gr.Row(): fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none') @@ -231,10 +220,8 @@ def video_type_change(video_type): fi_spatial = gr.Slider(label='Spatial frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) fi_temporal = gr.Slider(label='Temporal frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal] def run(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py index e689a5e3f..7b7a557f8 100644 --- a/scripts/cogvideo.py +++ b/scripts/cogvideo.py @@ -29,14 +29,6 @@ def show(self, is_img2img): def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML("  CogVideoX
") with gr.Row(): @@ -48,18 +40,13 @@ def video_type_change(video_type): with gr.Row(): offload = gr.Dropdown(label='Offload', choices=['none', 'balanced', 'model', 'sequential'], value='balanced') override = gr.Checkbox(label='Override resolution', value=True) - with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=30, step=0.25, value=8, visible=False) with gr.Accordion('Optional init image or video', open=False): with gr.Row(): image = gr.Image(value=None, label='Image', type='pil', source='upload', width=256, height=256) video = gr.Video(value=None, label='Video', source='upload', width=256, height=256) with gr.Row(): - loop = gr.Checkbox(label='Loop', value=True, visible=False) - pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, loop, pad, interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, loop, pad, interpolate = create_video_inputs() return [model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video] def load(self, model): diff --git a/scripts/hunyuanvideo.py b/scripts/hunyuanvideo.py index 0e99a26a9..874e20fc0 100644 --- a/scripts/hunyuanvideo.py +++ b/scripts/hunyuanvideo.py @@ -61,14 +61,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML('  Hunyuan Video
') with gr.Row(): @@ -79,13 +71,8 @@ def video_type_change(video_type): with gr.Row(): template = gr.TextArea(label='Prompt processor', lines=3, value=default_template) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [num_frames, tile_frames, override_scheduler, template, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] def run(self, p: processing.StableDiffusionProcessing, num_frames, tile_frames, override_scheduler, template, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/image2video.py b/scripts/image2video.py index ad6615f67..6ef27412c 100644 --- a/scripts/image2video.py +++ b/scripts/image2video.py @@ -21,15 +21,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - - def video_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - def model_change(model_name): model = next(m for m in MODELS if m['name'] == model_name) return gr.update(value=model['info']), gr.update(visible=model_name == 'PIA'), gr.update(visible=model_name == 'VGen') @@ -40,9 +31,6 @@ def model_change(model_name): model_info = gr.HTML() with gr.Row(): num_frames = gr.Slider(label='Frames', minimum=0, maximum=50, step=1, value=16) - with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) with gr.Accordion('FreeInit', open=False, visible=False) as fi_accordion: with gr.Row(): fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none') @@ -58,11 +46,9 @@ def model_change(model_name): vg_chunks = gr.Slider(label='Decode chunks', minimum=0.1, maximum=1.0, step=0.1, value=0.5) vg_fps = gr.Slider(label='Change rate', minimum=0.1, maximum=1.0, step=0.1, value=0.5) with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() model_name.change(fn=model_change, inputs=[model_name], outputs=[model_info, fi_accordion, vgen_accordion]) - video_type.change(fn=video_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) return [model_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal, vg_chunks, vg_fps] def run(self, p: processing.StableDiffusionProcessing, model_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal, vg_chunks, vg_fps): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/ltxvideo.py b/scripts/ltxvideo.py index 37038d7ba..3f6cad08e 100644 --- a/scripts/ltxvideo.py +++ b/scripts/ltxvideo.py @@ -66,13 +66,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] def model_change(model): return gr.update(visible=model == 'custom') @@ -90,13 +83,8 @@ def model_change(model): with gr.Row(): model_custom = gr.Textbox(value='', label='Path to model file', visible=False) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() model.change(fn=model_change, inputs=[model], outputs=[model_custom]) return [model, model_custom, decode, sampler, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, teacache_enable, teacache_threshold] diff --git a/scripts/mochivideo.py b/scripts/mochivideo.py index f85616a5e..cb2950eda 100644 --- a/scripts/mochivideo.py +++ b/scripts/mochivideo.py @@ -17,26 +17,13 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML('  Mochi.1 Video
') with gr.Row(): num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=45) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/stablevideodiffusion.py b/scripts/stablevideodiffusion.py index c1283e1b6..59cba1e96 100644 --- a/scripts/stablevideodiffusion.py +++ b/scripts/stablevideodiffusion.py @@ -23,14 +23,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - with gr.Row(): gr.HTML('  Stable Video Diffusion
') with gr.Row(): @@ -46,13 +38,8 @@ def video_type_change(video_type): with gr.Row(): override_resolution = gr.Checkbox(label='Override resolution', value=True) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [model, num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] def run(self, p: processing.StableDiffusionProcessing, model, num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument @@ -75,7 +62,7 @@ def run(self, p: processing.StableDiffusionProcessing, model, num_frames, overri if model_name != model_loaded or c != 'StableVideoDiffusionPipeline': shared.opts.sd_model_checkpoint = model_path sd_models.reload_model_weights() - shared.sd_model = shared.sd_model.to(torch.float32) # TODO svd: runs in fp32 causing dtype mismatch + shared.sd_model = shared.sd_model.to(torch.float32) # TODO svd: runs in fp32 due to dtype mismatch # set params if override_resolution: diff --git a/scripts/text2video.py b/scripts/text2video.py index c7b3d1c05..a58340f60 100644 --- a/scripts/text2video.py +++ b/scripts/text2video.py @@ -31,14 +31,6 @@ def show(self, is_img2img): # return signature is array of gradio components def ui(self, _is_img2img): - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - def model_info_change(model_name): if model_name == 'None': return gr.update(value='') @@ -57,13 +49,8 @@ def model_info_change(model_name): use_default = gr.Checkbox(label='Use defaults', value=True) num_frames = gr.Slider(label='Frames', minimum=1, maximum=50, step=1, value=0) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + from modules.ui_sections import create_video_inputs + video_type, duration, gif_loop, mp4_pad, mp4_interpolate = create_video_inputs() return [model_name, use_default, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] def run(self, p: processing.StableDiffusionProcessing, model_name, use_default, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 8bf149777..38ee177e1 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -15,6 +15,7 @@ from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing # pylint: disable=no-name-in-module, unused-import from modules import shared, errors, scripts, images, processing from modules.ui_components import ToolButton +from modules.ui_sections import create_video_inputs import modules.ui_symbols as symbols @@ -64,23 +65,8 @@ def ui(self, is_img2img): create_video = gr.Checkbox(label='Create video', value=False, elem_id=self.elem_id("xyz_create_video"), container=False) with gr.Row(visible=False) as ui_video: - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - - with gr.Column(): - video_type = gr.Dropdown(label='Video type', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - with gr.Column(): - video_duration = gr.Slider(label='Duration', minimum=0.25, maximum=300, step=0.25, value=2, visible=False) - video_loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="control_video_loop") - video_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) - create_video.change(fn=lambda x: gr.update(visible=x), inputs=[create_video], outputs=[ui_video]) + video_type, video_duration, video_loop, video_pad, video_interpolate = create_video_inputs() + create_video.change(fn=lambda x: gr.update(visible=x), inputs=[create_video], outputs=[ui_video]) with gr.Row(): margin_size = gr.Slider(label="Grid margins", minimum=0, maximum=500, value=0, step=2, elem_id=self.elem_id("margin_size")) diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py index 5e03e1c5c..2a8f22aba 100644 --- a/scripts/xyz_grid_on.py +++ b/scripts/xyz_grid_on.py @@ -14,6 +14,7 @@ from scripts.xyz_grid_draw import draw_xyz_grid # pylint: disable=no-name-in-module from modules import shared, errors, scripts, images, processing from modules.ui_components import ToolButton +from modules.ui_sections import create_video_inputs import modules.ui_symbols as symbols @@ -70,23 +71,8 @@ def ui(self, is_img2img): create_video = gr.Checkbox(label='Create video', value=False, elem_id=self.elem_id("xyz_create_video"), container=False) with gr.Row(visible=False) as ui_video: - def video_type_change(video_type): - return [ - gr.update(visible=video_type != 'None'), - gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), - gr.update(visible=video_type == 'MP4'), - gr.update(visible=video_type == 'MP4'), - ] - - with gr.Column(): - video_type = gr.Dropdown(label='Video type', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - with gr.Column(): - video_duration = gr.Slider(label='Duration', minimum=0.25, maximum=300, step=0.25, value=2, visible=False) - video_loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="control_video_loop") - video_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) - create_video.change(fn=lambda x: gr.update(visible=x), inputs=[create_video], outputs=[ui_video]) + video_type, video_duration, video_loop, video_pad, video_interpolate = create_video_inputs() + create_video.change(fn=lambda x: gr.update(visible=x), inputs=[create_video], outputs=[ui_video]) with gr.Row(): margin_size = gr.Slider(label="Grid margins", minimum=0, maximum=500, value=0, step=2, elem_id=self.elem_id("margin_size"))