diff --git a/multigen/loader.py b/multigen/loader.py index 8dd8e97..cbe9ed8 100644 --- a/multigen/loader.py +++ b/multigen/loader.py @@ -45,6 +45,8 @@ def weightshare_copy(pipe): obj = getattr(copy, key) if hasattr(obj, 'load_state_dict'): obj.load_state_dict(getattr(pipe, key).state_dict(), assign=True) + # some buffers might not be transfered from pipe to copy + copy.to(pipe.device) return copy diff --git a/multigen/pipes.py b/multigen/pipes.py index 7e9a846..1298d0a 100755 --- a/multigen/pipes.py +++ b/multigen/pipes.py @@ -102,6 +102,7 @@ def __init__(self, model_id: str, """ if device is None: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + pipe_passed = pipe is not None self.pipe = pipe self._scheduler = None self._hypernets = [] @@ -125,7 +126,8 @@ def __init__(self, model_id: str, if mt != model_type: raise RuntimeError(f"passed model type {self.model_type} doesn't match actual type {mt}") - self._initialize_pipe(device, offload_device) + if not pipe_passed: + self._initialize_pipe(device, offload_device) self.lpw = lpw self._loras = [] @@ -155,6 +157,7 @@ def _get_model_type(self): def _initialize_pipe(self, device, offload_device): # sometimes text encoder is on a different device # if self.pipe.device != device: + logging.debug(f"initialising pipe to device {device}: offload_device {offload_device}") self.pipe.to(device) # self.pipe.enable_attention_slicing() # self.pipe.enable_vae_slicing() @@ -164,6 +167,7 @@ def _initialize_pipe(self, device, offload_device): if self.model_type == ModelType.FLUX: if offload_device is not None: self.pipe.enable_sequential_cpu_offload(offload_device) + logging.debug(f'enable_sequential_cpu_offload for pipe dtype {self.pipe.dtype}') else: try: import xformers @@ -172,19 +176,21 @@ def _initialize_pipe(self, device, offload_device): logging.warning("xformers not found, can't use efficient attention") def _load_pipeline(self, sd_pipe_class, model_type, args): + logging.debug(f"loading pipeline from {self._model_id} with {args}") if sd_pipe_class is None: if self._model_id.endswith('.safetensors'): if model_type is None: raise RuntimeError(f"model_type is not specified for safetensors file {self._model_id}") pipe_class = self._class if model_type == ModelType.SD else self._classxl - return pipe_class.from_single_file(self._model_id, **args) + result = pipe_class.from_single_file(self._model_id, **args) else: - return self._autopipeline.from_pretrained(self._model_id, **args) + result = self._autopipeline.from_pretrained(self._model_id, **args) else: if self._model_id.endswith('.safetensors'): - return sd_pipe_class.from_single_file(self._model_id, **args) + result = sd_pipe_class.from_single_file(self._model_id, **args) else: - return sd_pipe_class.from_pretrained(self._model_id, **args) + result = sd_pipe_class.from_pretrained(self._model_id, **args) + return result @property def scheduler(self): @@ -724,7 +730,7 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] = if model_id.endswith('.safetensors'): if self.model_type is None: raise RuntimeError(f"model type is not specified for safetensors file {model_id}") - cnets = self._load_cnets(cnets, cnet_ids, args.get('offload_device', None)) + cnets = self._load_cnets(cnets, cnet_ids, args.get('offload_device', None), args.get('torch_dtype', None)) super().__init__(model_id=model_id, pipe=pipe, controlnet=cnets, model_type=model_type, **args) else: super().__init__(model_id=model_id, pipe=pipe, controlnet=cnets, model_type=model_type, **args) @@ -738,22 +744,26 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] = else: raise RuntimeError(f"Unexpected model type {type(self.pipe)}") self.model_type = t_model_type - cnets = self._load_cnets(cnets, cnet_ids, args.get('offload_device', None)) + logging.debug(f"from_pipe source dtype {self.pipe.dtype}") + cnets = self._load_cnets(cnets, cnet_ids, args.get('offload_device', None), self.pipe.dtype) + prev_dtype = self.pipe.dtype if self.model_type == ModelType.SDXL: self.pipe = self._classxl.from_pipe(self.pipe, controlnet=cnets) elif self.model_type == ModelType.FLUX: self.pipe = self._classflux.from_pipe(self.pipe, controlnet=cnets[0]) else: self.pipe = self._class.from_pipe(self.pipe, controlnet=cnets) + logging.debug(f"after from_pipe result dtype {self.pipe.dtype}") for cnet in cnets: - cnet.to(self.pipe.dtype) + cnet.to(prev_dtype) + logging.debug(f'moving cnet {id(cnet)} to self.pipe.dtype {prev_dtype}') if 'offload_device' not in args: cnet.to(self.pipe.device) else: # don't load anything, just reuse pipe super().__init__(model_id=model_id, pipe=pipe, **args) - def _load_cnets(self, cnets, cnet_ids, offload_device=None): + def _load_cnets(self, cnets, cnet_ids, offload_device=None, dtype=None): if self.model_type == ModelType.FLUX: ControlNet = FluxControlNetModel else: @@ -773,9 +783,18 @@ def _load_cnets(self, cnets, cnet_ids, offload_device=None): else: cnets.append(ControlNet.from_pretrained(c, torch_dtype=torch_dtype)) if offload_device is not None: + # controlnet should be on the same device where main model is working dev = torch.device('cuda', offload_device) + logging.debug(f'moving cnets to offload device {dev}') for cnet in cnets: cnet.to(dev) + else: + logging.debug('offload device is None') + for cnet in cnets: + logging.debug(f"cnet dtype {cnet.dtype}") + if dtype is not None: + logging.debug(f"changing to {dtype}") + cnet.to(dtype) return cnets def get_cmodels(self): @@ -832,6 +851,8 @@ def setup(self, fimage, width=None, height=None, self._input_image = [image] if cscales is None: cscales = [self.get_default_cond_scales()[c] for c in self.ctypes] + if self.model_type == ModelType.FLUX and hasattr(cscales, '__len__'): + cscales = cscales[0] # multiple controlnets are not yet supported self.pipe_params.update({ "width": image.size[0] if width is None else width, "height": image.size[1] if height is None else height, @@ -905,6 +926,9 @@ def __init__(self, model_id, pipe: Optional[StableDiffusionControlNetPipeline] = Additional arguments passed to the Cond2ImPipe constructor. """ super().__init__(model_id=model_id, pipe=pipe, ctypes=ctypes, model_type=model_type, **args) + logging.debug("CIm2Im backend pipe was constructed") + logging.debug(f"self.pipe.dtype = {self.pipe.dtype}") + logging.debug(f"self.pipe.controlnet.dtype = {self.pipe.controlnet.dtype}") self.processor = None self.body_estimation = None self.draw_bodypose = None diff --git a/multigen/sessions.py b/multigen/sessions.py index 503117c..99696e5 100755 --- a/multigen/sessions.py +++ b/multigen/sessions.py @@ -3,6 +3,7 @@ import json from . import util from .prompting import Cfgen +import logging class GenSession: @@ -84,12 +85,14 @@ def gen_sess(self, add_count = 0, save_img=True, # collecting images to return if requested or images are not saved if not save_img or force_collect: images = [] + logging.info(f"add count = {add_count}") + jk = 0 for inputs in self.confg: self.last_index = self.confg.count - 1 self.last_conf = {**inputs} # TODO: multiple inputs? inputs['generator'] = torch.Generator().manual_seed(inputs['generator']) - + logging.debug("start generation") image = self.pipe.gen(inputs) if save_img: self.last_img_name = self.get_last_file_prefix() + ".png" @@ -103,5 +106,8 @@ def gen_sess(self, add_count = 0, save_img=True, if save_img and not drop_cfg: self.save_last_conf() if callback is not None: + logging.debug("call callback after generation") callback() + jk += 1 + logging.debug(f"done iteration {jk}") return images diff --git a/multigen/worker.py b/multigen/worker.py index 23a1b06..ac7b517 100755 --- a/multigen/worker.py +++ b/multigen/worker.py @@ -58,16 +58,16 @@ def _get_pipeline(self, pipe_class, model_id, model_type, cnet=None): if model_type == ModelType.SDXL: cls = pipe_class._classxl elif model_type == ModelType.FLUX: - cls = pipe_class._flux + # use offload by default for now + cls = pipe_class._classflux if device.type == 'cuda': offload_device = device.index - device = torch.device('cpu') + device = torch.device('cpu', 0) else: cls = pipe_class._class pipeline = self._loader.load_pipeline(cls, model_id, torch_dtype=torch.bfloat16, device=device) self.logger.debug(f'requested {cls} {model_id} on device {device}, got {pipeline.device}') - assert pipeline.device == device pipe = pipe_class(model_id, pipe=pipeline, device=device, offload_device=offload_device) if offload_device is None: assert pipeline.device == device @@ -164,7 +164,8 @@ def _update(sess, job, gs): data['finish_callback']() except (RuntimeError, TypeError, NotImplementedError) as e: self.logger.error("error in generation", exc_info=e) - self.logger.error(f"offload_device {pipe.pipe._offload_gpu_id}") + if hasattr(pipe.pipe, '_offload_gpu_id'): + self.logger.error(f"offload_device {pipe.pipe._offload_gpu_id}") if 'finish_callback' in data: data['finish_callback']("Can't generate image due to error") except Exception as e: diff --git a/tests/pipe_test.py b/tests/pipe_test.py index 8b6890e..a020b21 100644 --- a/tests/pipe_test.py +++ b/tests/pipe_test.py @@ -36,6 +36,8 @@ class MyTestCase(TestCase): def setUp(self): self._pipeline = None self._img_count = 0 + self.schedulers = 'DPMSolverMultistepScheduler', 'DDIMScheduler', 'EulerAncestralDiscreteScheduler' + self.device_args = dict() def get_model(self): models_dir = os.environ.get('METAFUSION_MODELS_DIR', None) @@ -57,17 +59,20 @@ def model_type(self): def test_basic_txt2im(self): model = self.get_model() # create pipe - pipe = Prompt2ImPipe(model, pipe=self._pipeline, model_type=self.model_type()) - pipe.setup(width=512, height=512, guidance_scale=7, scheduler="DPMSolverMultistepScheduler", steps=5) + pipe = Prompt2ImPipe(model, pipe=self._pipeline, model_type=self.model_type(), **self.device_args) + pipe.setup(width=512, height=512, guidance_scale=7, scheduler=self.schedulers[0], steps=5) seed = 49045438434843 params = dict(prompt="a cube planet, cube-shaped, space photo, masterpiece", negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image = pipe.gen(params) image.save("cube_test.png") # generate with different scheduler - params.update(scheduler="DDIMScheduler") + if self.model_type() == ModelType.FLUX: + params.update(generator=torch.Generator().manual_seed(seed + 1)) + else: + params.update(scheduler=self.schedulers[1]) image_ddim = pipe.gen(params) image_ddim.save("cube_test2_dimm.png") diff = self.compute_diff(image_ddim, image) @@ -85,8 +90,8 @@ def test_with_session(self): ["green colors", "dream colors", "neon glowing"], ["8k RAW photo, masterpiece, super quality", "artwork", "unity 3D"], ["surrealism", "impressionism", "high tech", "cyberpunk"]] - pipe = Prompt2ImPipe(model, pipe=self._pipeline, model_type=self.model_type()) - pipe.setup(width=512, height=512, scheduler="DPMSolverMultistepScheduler", steps=5) + pipe = Prompt2ImPipe(model, pipe=self._pipeline, model_type=self.model_type(), **self.device_args) + pipe.setup(width=512, height=512, scheduler=self.schedulers[0], steps=5) # remove directory if it exists dirname = "./gen_batch" if os.path.exists(dirname): @@ -98,57 +103,70 @@ def test_with_session(self): # each images goes with a txt file self.assertEqual(len(os.listdir(dirname)), 4) + def get_cls_by_type(self, pipe): + classes = dict() + classes[ModelType.SDXL] = pipe._classxl + classes[ModelType.SD] = pipe._class + classes[ModelType.FLUX] = pipe._classflux + return classes + def test_loader(self): loader = Loader() model_id = self.get_model() - - # load inpainting pipe - is_xl = 'TestSDXL' in str(self.__class__) - if is_xl: - cls = MaskedIm2ImPipe._classxl - else: - cls = MaskedIm2ImPipe._class + model_type = self.model_type() device = torch.device('cpu') if torch.cuda.is_available(): device = torch.device('cuda', 0) - pipeline = loader.load_pipeline(cls, model_id, device=device) + if 'device' not in self.device_args: + self.device_args['device'] = device + classes = self.get_cls_by_type(MaskedIm2ImPipe) + # load inpainting pipe + cls = classes[model_type] + pipeline = loader.load_pipeline(cls, model_id, **self.device_args) inpaint = MaskedIm2ImPipe(model_id, pipe=pipeline) + + prompt_classes = self.get_cls_by_type(Prompt2ImPipe) # create prompt2im pipe - if is_xl: - cls = Prompt2ImPipe._classxl - else: - cls = Prompt2ImPipe._class - pipeline = loader.load_pipeline(cls, model_id, device=device) + cls = prompt_classes[model_type] + device_args = dict(**self.device_args) + device = device_args.get('device', None) + if device is None: + if torch.cuda.is_available(): + device = torch.device('cuda', 0) + else: + device = torch.device('cpu', 0) + device_args['device'] = device + pipeline = loader.load_pipeline(cls, model_id, **device_args) prompt2image = Prompt2ImPipe(model_id, pipe=pipeline) - prompt2image.setup(width=512, height=512, scheduler="DPMSolverMultistepScheduler", clip_skip=2, steps=5) + prompt2image.setup(width=512, height=512, scheduler=self.schedulers[0], clip_skip=2, steps=5) if device.type == 'cuda': self.assertEqual(inpaint.pipe.unet.conv_out.weight.data_ptr(), prompt2image.pipe.unet.conv_out.weight.data_ptr(), "unets are different") def test_img2img_basic(self): - pipe = Im2ImPipe(self.get_model(), model_type=self.model_type()) + pipe = Im2ImPipe(self.get_model(), model_type=self.model_type(), **self.device_args) dw, dh = -1, 1 im = self.get_ref_image(dw, dh) seed = 49045438434843 pipe.setup(im, strength=0.7, steps=5, guidance_scale=3.3) self.assertEqual(3.3, pipe.pipe_params['guidance_scale']) - image = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) + image = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) image.save('test_img2img_basic.png') pipe.setup(im, strength=0.7, steps=5, guidance_scale=7.6) - image1 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) + image1 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) diff = self.compute_diff(image1, image) # check that difference is large self.assertGreater(diff, 1000) pipe.setup(im, strength=0.7, steps=5, guidance_scale=3.3) - image2 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) + image2 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) diff = self.compute_diff(image2, image) # check that difference is small self.assertLess(diff, 1) def test_maskedimg2img_basic(self): - pipe = MaskedIm2ImPipe(self.get_model(), model_type=self.model_type()) + pipe = MaskedIm2ImPipe(self.get_model(), model_type=self.model_type(), **self.device_args) img = PIL.Image.open("./mech_beard_sigm.png") dw, dh = -1, -1 img = img.crop((0, 0, img.width + dw, img.height + dh)) @@ -159,7 +177,7 @@ def test_maskedimg2img_basic(self): img_paint = img_paint.crop((0, 0, img_paint.width + dw, img_paint.height + dh)) img_paint = numpy.asarray(img_paint) - scheduler = "EulerAncestralDiscreteScheduler" + scheduler = self.schedulers[-1] seed = 49045438434843 blur = 48 param_3_3 = dict(image=img, image_painted=img_paint, strength=0.96, @@ -168,17 +186,17 @@ def test_maskedimg2img_basic(self): scheduler=scheduler, clip_skip=0, blur=blur, blur_compose=3, steps=5, guidance_scale=7.6) pipe.setup(**param_3_3) self.assertEqual(3.3, pipe.pipe_params['guidance_scale']) - image = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) - self.assertEquals(image.width, img.width) - self.assertEquals(image.height, img.height) + image = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) + self.assertEqual(image.width, img.width) + self.assertEqual(image.height, img.height) image.save('test_img2img_basic.png') pipe.setup(**param_7_6) - image1 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) + image1 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) diff = self.compute_diff(image1, image) # check that difference is large self.assertGreater(diff, 1000) pipe.setup(**param_3_3) - image2 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator(pipe.pipe.device).manual_seed(seed))) + image2 = pipe.gen(dict(prompt="cube planet cartoon style", generator=torch.Generator().manual_seed(seed))) diff = self.compute_diff(image2, image) # check that difference is small self.assertLess(diff, 1) @@ -190,18 +208,18 @@ def test_lpw(self): """ Check that last part of long prompt affect the generation """ - pipe = Prompt2ImPipe(self.get_model(), model_type=self.model_type(), lpw=True) + pipe = Prompt2ImPipe(self.get_model(), model_type=self.model_type(), lpw=True, **self.device_args) prompt = ' a cubic planet with atmoshere as seen from low orbit, each side of the cubic planet is ocuppied by an ocean, oceans have islands, but no continents, atmoshere of the planet has usual sperical shape, corners of the cube are above the atmoshere, but edges largely are covered by the atomosphere, there are cyclones in the atmoshere, the photo is made from low-orbit, famous sci-fi illustration' - pipe.setup(width=512, height=512, guidance_scale=7, scheduler="DPMSolverMultistepScheduler", steps=5) + pipe.setup(width=512, height=512, guidance_scale=7, scheduler=self.schedulers[0], steps=5) seed = 49045438434843 params = dict(prompt=prompt, negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image = pipe.gen(params) image.save("cube_test_lpw.png") params = dict(prompt=prompt + " , best quality, famous photo", negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image1 = pipe.gen(params) image.save("cube_test_lpw1.png") diff = self.compute_diff(image1, image) @@ -215,16 +233,16 @@ def test_lpw_turned_off(self): """ pipe = Prompt2ImPipe(self.get_model(), model_type=self.model_type(), lpw=False) prompt = ' a cubic planet with atmoshere as seen from low orbit, each side of the cubic planet is ocuppied by an ocean, oceans have islands, but no continents, atmoshere of the planet has usual sperical shape, corners of the cube are above the atmoshere, but edges largely are covered by the atomosphere, there are cyclones in the atmoshere, the photo is made from low-orbit, famous sci-fi illustration' - pipe.setup(width=512, height=512, guidance_scale=7, scheduler="DPMSolverMultistepScheduler", steps=5) + pipe.setup(width=512, height=512, guidance_scale=7, scheduler=self.schedulers[0], steps=5) seed = 49045438434843 params = dict(prompt=prompt, negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image = pipe.gen(params) image.save("cube_test_no_lpw.png") params = dict(prompt=prompt + " , best quality, famous photo", negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image1 = pipe.gen(params) image.save("cube_test_no_lpw1.png") diff = self.compute_diff(image1, image) @@ -234,29 +252,43 @@ def test_lpw_turned_off(self): @unittest.skipIf(not found_models(), "can't run on tiny version of SD") def test_controlnet(self): model = self.get_model() + model_type = self.model_type() # create pipe - pipe = CIm2ImPipe(model, model_type=self.model_type(), ctypes=['soft']) + if model_type == ModelType.FLUX: + # pass + canny_path = os.path.join(os.environ.get('METAFUSION_MODELS_DIR'), "ControlNetFlux/FLUX.1-dev-Controlnet-Canny-alpha/") + canny_path = "InstantX/FLUX.1-dev-Controlnet-Canny" + pipe = CIm2ImPipe(model, model_type=self.model_type(), cnet_ids=[canny_path], ctypes=['soft'], **self.device_args) + else: + pipe = CIm2ImPipe(model, model_type=self.model_type(), ctypes=['soft'], **self.device_args) + + logging.info(f"pipe's device {pipe.pipe.device}") dw, dh = 1, -1 imgpth = self.get_ref_image(dw, dh) - pipe.setup(imgpth, cscales=[0.3], guidance_scale=7, scheduler="DPMSolverMultistepScheduler", steps=5) + pipe.setup(imgpth, cscales=[0.3], guidance_scale=7, scheduler=self.schedulers[0], steps=5) seed = 49045438434843 params = dict(prompt="cube planet minecraft style", negative_prompt="spherical", - generator=torch.Generator(pipe.pipe.device).manual_seed(seed)) + generator=torch.Generator().manual_seed(seed)) image = pipe.gen(params) image.save("mech_test.png") img_ref = PIL.Image.open(imgpth) self.assertEqual(image.width, img_ref.width) self.assertEqual(image.height, img_ref.height) - # generate with different scheduler - params.update(scheduler="DDIMScheduler") + if self.model_type() == ModelType.FLUX: + # generate with different generator + params.update(generator=torch.Generator().manual_seed(seed + 1)) + else: + # generate with different scheduler + params.update(scheduler=self.schedulers[1]) image_ddim = pipe.gen(params) image_ddim.save("cube_test2_dimm.png") diff = self.compute_diff(image_ddim, image) # check that difference is large self.assertGreater(diff, 1000) + class TestSDXL(MyTestCase): def get_model(self): @@ -267,10 +299,16 @@ def get_model(self): -class TestFlux(TestCase): +class TestFlux(MyTestCase): def setUp(self): + super().setUp() self._pipeline = None + self.schedulers = ['FlowMatchEulerDiscreteScheduler'] + self.device_args = dict() + self.device_args['device'] = torch.device('cpu', 0) + if torch.cuda.is_available(): + self.device_args['offload_device'] = 0 def model_type(self): return ModelType.FLUX @@ -278,11 +316,14 @@ def model_type(self): def get_model(self): models_dir = os.environ.get('METAFUSION_MODELS_DIR', None) if models_dir is not None: - return models_dir + '/flux.1-schnell' - return './models-sd/' + "/tiny-flux-pipe" + return models_dir + '/flux-1-dev' + return './models-sd/' + "flux/tiny-flux-pipe" + @unittest.skip('flux does not need test') + def test_lpw_turned_off(self): + pass - def test_basic_txt2im(self): + def est_basic_txt2im(self): model = self.get_model() device = torch.device('cpu', 0) # create pipe diff --git a/tests/test_worker.py b/tests/test_worker.py index ceebbc4..3eb4102 100755 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -12,8 +12,6 @@ from multigen.worker import ServiceThread from multigen.log import setup_logger -setup_logger() - nprompt = "jpeg artifacts, blur, distortion, watermark, signature, extra fingers, fewer fingers, lowres, bad hands, duplicate heads, bad anatomy, bad crop" @@ -86,4 +84,5 @@ def tearDown(self): if __name__ == '__main__': + setup_logger('test_worker.log') unittest.main()