From 1748ca8c04994d40dc9af67734cd2d3296c2db41 Mon Sep 17 00:00:00 2001
From: Anatoly Belikov <abelikov@singularitynet.io>
Date: Tue, 3 Dec 2024 18:53:22 +0300
Subject: [PATCH] more low-level tests

---
 .github/workflows/python-app.yml |   9 ++
 multigen/pipes.py                |  17 ++--
 tests/base_test.py               |   1 +
 tests/pipe_test.py               |  11 ++-
 tests/test_loader.py             | 152 +++++++++++++++++++++++++++++--
 5 files changed, 171 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 5478cff..99de4c1 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -53,9 +53,18 @@ jobs:
         METAFUSION_MODELS_DIR: models-full
       run: |
         cd tests && python pipe_test.py
+    - name: Test loader
+      env:
+        METAFUSION_MODELS_DIR: models-full
+      run: |
+        cd tests && python test_loader.py
     - name: Test worker 
+      env:
+        METAFUSION_MODELS_DIR: models-full
       run: |
         cd tests && python test_worker.py
     - name: Test worker flux
+      env:
+        METAFUSION_MODELS_DIR: models-full
       run: |
         cd tests && python test_worker_flux.py
diff --git a/multigen/pipes.py b/multigen/pipes.py
index cb9d6f8..421b391 100755
--- a/multigen/pipes.py
+++ b/multigen/pipes.py
@@ -150,7 +150,7 @@ def _get_model_type(self):
         elif module.startswith('diffusers.pipelines.flux.pipeline_flux'):
             return ModelType.FLUX
         else:
-            raise RuntimeError("unsuported model type {self.pipe.__class__}")
+            raise RuntimeError(f"unsuported model type {self.pipe.__class__}")
 
     def _initialize_pipe(self, device, offload_device):
         # sometimes text encoder is on a different device
@@ -744,7 +744,8 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] =
                 else:
                     raise RuntimeError(f"Unexpected model type {type(self.pipe)}")
                 self.model_type = t_model_type
-                logging.debug(f"from_pipe source dtype {self.pipe.dtype}")
+                device = self.pipe.device
+                logging.debug(f"from_pipe source dtype {self.pipe.dtype} {device}")
                 cnets = self._load_cnets(cnets, cnet_ids, args.get('offload_device', None), self.pipe.dtype)
                 prev_dtype = self.pipe.dtype
                 if self.model_type == ModelType.SDXL:
@@ -754,11 +755,11 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] =
                 else:
                     self.pipe = self._class.from_pipe(self.pipe, controlnet=cnets)
                 logging.debug(f"after from_pipe result dtype {self.pipe.dtype}")
-            for cnet in cnets:
-                cnet.to(prev_dtype)
-                logging.debug(f'moving cnet {id(cnet)} to self.pipe.dtype {prev_dtype}')
-                if 'offload_device' not in args:
-                    cnet.to(self.pipe.device)
+                for cnet in cnets:
+                    cnet.to(prev_dtype)
+                    logging.debug(f'moving cnet {id(cnet)} to self.pipe.dtype {prev_dtype}')
+                    if 'offload_device' not in args:
+                        cnet.to(device)
         else:
             # don't load anything, just reuse pipe
             super().__init__(model_id=model_id, pipe=pipe, **args)
@@ -1052,7 +1053,7 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] =
         """
         dtype = torch.float32
         if torch.cuda.is_available():
-            dtype = torch.float16
+            dtype = torch.bfloat16
         dtype =  args.get('torch_type', dtype)
         cnet = ControlNetModel.from_pretrained(
             Cond2ImPipe.cpath+Cond2ImPipe.cmodels["inpaint"], torch_dtype=dtype)
diff --git a/tests/base_test.py b/tests/base_test.py
index cccb256..48aec92 100644
--- a/tests/base_test.py
+++ b/tests/base_test.py
@@ -30,6 +30,7 @@ def compute_diff(self, im1: PIL.Image.Image, im2: PIL.Image.Image) -> float:
         return diff
 
     def setUp(self):
+        super().setUp()
         self._pipeline = None
         self._img_count = 0
         self.schedulers = 'DPMSolverMultistepScheduler', 'DDIMScheduler', 'EulerAncestralDiscreteScheduler'
diff --git a/tests/pipe_test.py b/tests/pipe_test.py
index 9192ba1..d4f6bd2 100644
--- a/tests/pipe_test.py
+++ b/tests/pipe_test.py
@@ -16,6 +16,8 @@
 
 class MyTestCase(TestCase):
 
+    def setUp(self):
+        TestCase.setUp(self)
 
     def test_basic_txt2im(self):
         model = self.get_model()
@@ -105,12 +107,14 @@ def test_maskedimg2img_basic(self):
                scheduler=scheduler, clip_skip=0, blur=blur, blur_compose=3, steps=5, guidance_scale=7.6)
         pipe.setup(**param_3_3)
         self.assertEqual(3.3, pipe.pipe_params['guidance_scale'])
-        image = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed)))
+        image = pipe.gen(dict(prompt="cube planet cartoon style", 
+                              generator=torch.Generator().manual_seed(seed)))
         self.assertEqual(image.width, img.width)
         self.assertEqual(image.height, img.height)
         image.save('test_img2img_basic.png')
         pipe.setup(**param_7_6)
-        image1 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed)))
+        image1 = pipe.gen(dict(prompt="cube planet cartoon style", 
+                               generator=torch.Generator().manual_seed(seed)))
         diff = self.compute_diff(image1, image)
         # check that difference is large
         self.assertGreater(diff, 1000)
@@ -217,7 +221,6 @@ def get_model(self):
         return "hf-internal-testing/tiny-stable-diffusion-xl-pipe"
 
 
-
 class TestFlux(MyTestCase):
 
     def setUp(self):
@@ -225,7 +228,7 @@ def setUp(self):
         self._pipeline = None
         self.schedulers = ['FlowMatchEulerDiscreteScheduler']
         self.device_args = dict()
-        self.device_args['device'] = torch.device('cpu', 0)
+        self.device_args['device'] = torch.device('cpu')
         if torch.cuda.is_available():
             self.device_args['offload_device'] = 0
 
diff --git a/tests/test_loader.py b/tests/test_loader.py
index 807d4fe..b7494ac 100644
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@@ -1,8 +1,23 @@
+import unittest
+import os
+import logging
+import shutil
+import PIL
+import torch
+import numpy
+
+from PIL import Image
+from multigen import Prompt2ImPipe, Im2ImPipe, Cfgen, GenSession, Loader, MaskedIm2ImPipe, CIm2ImPipe
+from multigen.log import setup_logger
+from multigen.pipes import ModelType
+from dummy import DummyDiffusionPipeline
+from base_test import TestCase, found_models
+from multigen.util import quantize, weightshare_copy
 
 
 class LoaderTestCase(TestCase):
 
-    def test_loader(self):
+    def test_loader_same_weights(self):
         """
         Test that weights are shared for different pipelines loaded from the same
         checkpoint
@@ -15,9 +30,12 @@ def test_loader(self):
             device = torch.device('cuda', 0)
         if 'device' not in self.device_args:
             self.device_args['device'] = device
+        if 'offload_device' in self.device_args:
+            del self.device_args['offload_device']
         classes = self.get_cls_by_type(MaskedIm2ImPipe)
         # load inpainting pipe
         cls = classes[model_type]
+        logging.info(f'loading {cls} from {model_id} {self.device_args}')
         pipeline = loader.load_pipeline(cls, model_id, **self.device_args)
         inpaint = MaskedIm2ImPipe(model_id, pipe=pipeline,  **self.device_args)
 
@@ -30,13 +48,133 @@ def test_loader(self):
             if torch.cuda.is_available():
                 device = torch.device('cuda', 0)
             else:
-                device = torch.device('cpu', 0)
+                device = torch.device('cpu')
             device_args['device'] = device
-        
+
         pipeline = loader.load_pipeline(cls, model_id, **device_args)
         prompt2image = Prompt2ImPipe(model_id, pipe=pipeline, **device_args)
         prompt2image.setup(width=512, height=512, scheduler=self.schedulers[0], clip_skip=2, steps=5)
-        if device.type == 'cuda':
-            self.assertEqual(inpaint.pipe.unet.conv_out.weight.data_ptr(),
-                         prompt2image.pipe.unet.conv_out.weight.data_ptr(),
-                         "unets are different")
+        self.assertEqual(inpaint.pipe.vae.decoder.conv_out.weight.data_ptr(),
+                        prompt2image.pipe.vae.decoder.conv_out.weight.data_ptr(),
+                        "unets are different")
+
+    def test_different_id(self):
+        """
+        Check that loader returns new pipeline with new components when loading the same checkpoint
+        """
+        model_id = self.get_model()
+        model_type = self.model_type()
+        classes = self.get_cls_by_type(MaskedIm2ImPipe)
+        cls = classes[model_type]
+        loader = Loader()
+        load_device = torch.device('cpu')
+        pipe11 = loader.load_pipeline(cls, model_id,
+                torch_dtype=torch.bfloat16,
+                device=load_device)
+        for value in loader._cpu_pipes.values():
+            assert id(value) != id(pipe11)
+        pipe1 = Prompt2ImPipe('flux', pipe=pipe11, device=load_device, offload_device=0)
+        pipe22 = loader.load_pipeline(cls, model_id,
+                torch_dtype=torch.bfloat16,
+                device=load_device)
+
+        pipe2 = Prompt2ImPipe('flux', pipe=pipe22, device=load_device, offload_device=1)
+
+        for comp_name in pipe2.pipe.components.keys():
+            comp1 = pipe1.pipe.components[comp_name]
+            comp2 = pipe2.pipe.components[comp_name]
+            if comp_name not in ['tokenizer', 'tokenizer_2']:
+                assert id(comp1) != id(comp2)
+
+    def test_weightshare(self):
+        """
+        Check that pipes after weightshare copy share weights, but otherwise
+        independant. Check that enable_sequential_cpu_offload doesn't modify copy's device
+        """
+        model_id = self.get_model()
+        model_type = self.model_type()
+        cuda0 = torch.device('cuda', 0)
+        cuda1 = torch.device('cuda', 1)
+        prompt_classes = self.get_cls_by_type(Prompt2ImPipe)
+        # create prompt2im pipe
+        cls = prompt_classes[model_type]
+        offload_device = 0
+        cpu = torch.device('cpu')
+        pipe0 = cls.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(cpu)
+        pipe1 = weightshare_copy(pipe0)
+        self.assertNotEqual(id(pipe1.scheduler), id(pipe0.scheduler))
+        self.assertNotEqual(id(pipe1.vae), id(pipe0.vae))
+        self.assertEqual(pipe1.vae.decoder.conv_in.weight.data_ptr(),
+                         pipe0.vae.decoder.conv_in.weight.data_ptr())
+        pipe0.enable_sequential_cpu_offload(offload_device)
+        self.assertNotEqual(pipe1.device.type, 'meta', "Check that enable_sequential_cpu_offload doesn't modify copy's device")
+
+    def test_quantized(self):
+        model_id = self.get_model()
+        model_type = self.model_type()
+        cuda0 = torch.device('cuda', 0)
+        cuda1 = torch.device('cuda', 1)
+        prompt_classes = self.get_cls_by_type(Prompt2ImPipe)
+        # create prompt2im pipe
+        cls = prompt_classes[model_type]
+        offload_device = 1
+        cpu = torch.device('cpu')
+        pipe0 = cls.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(cpu)
+        quantize(pipe0)
+        pipe0.to(cuda0)
+        pipe1 = weightshare_copy(pipe0)
+        self.assertNotEqual(id(pipe1.scheduler), id(pipe0.scheduler))
+        self.assertNotEqual(id(pipe1.vae), id(pipe0.vae))
+        self.assertEqual(pipe1.vae.decoder.conv_in.weight.data_ptr(),
+                         pipe0.vae.decoder.conv_in.weight.data_ptr())
+
+
+class TestFlux(LoaderTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self._pipeline = None
+        self.schedulers = ['FlowMatchEulerDiscreteScheduler']
+        self.device_args = dict()
+        self.device_args['device'] = torch.device('cpu')
+        if torch.cuda.is_available():
+            self.device_args['offload_device'] = 0
+            self.device_args['torch_dtype'] = torch.bfloat16
+
+    def model_type(self):
+        return ModelType.FLUX
+
+    def get_model(self):
+        models_dir = os.environ.get('METAFUSION_MODELS_DIR', None)
+        if models_dir is not None:
+            return models_dir + '/flux-1-dev'
+        return './models-sd/' + "flux/tiny-flux-pipe"
+
+    @unittest.skip('flux does not need test')
+    def test_lpw_turned_off(self):
+        pass
+
+
+class TestSDXL(LoaderTestCase):
+
+    def get_model(self):
+        models_dir = os.environ.get('METAFUSION_MODELS_DIR', None)
+        if models_dir is not None:
+            return models_dir + '/SDXL/stable-diffusion-xl-base-1.0'
+        return "hf-internal-testing/tiny-stable-diffusion-xl-pipe"
+
+
+
+def get_test_cases():
+    suites = []
+    # Manually add or discover test case classes that are subclasses of LoaderTestCase
+    for subclass in LoaderTestCase.__subclasses__():
+        suite = unittest.TestLoader().loadTestsFromTestCase(subclass)
+        suites.append(suite)
+    return unittest.TestSuite(suites)
+
+
+if __name__ == '__main__':
+    setup_logger('test_loader.log')
+    runner = unittest.TextTestRunner()
+    result = runner.run(get_test_cases())