wooyeolbaek
diff --git a/‎assets/0-<|startoftext|>.png
-2.83 KB b/‎assets/0-<|startoftext|>.png
-2.83 KB
diff --git a/‎assets/1-<a>.png
-1.2 KB b/‎assets/1-<a>.png
-1.2 KB
diff --git a/‎assets/10-<hello>.png
-956 Bytes b/‎assets/10-<hello>.png
-956 Bytes
diff --git a/‎assets/11-<world>.png
-834 Bytes b/‎assets/11-<world>.png
-834 Bytes
diff --git a/‎assets/12-<.>.png
-803 Bytes b/‎assets/12-<.>.png
-803 Bytes
diff --git a/‎assets/13-<|endoftext|>.png
-1.4 KB b/‎assets/13-<|endoftext|>.png
-1.4 KB
diff --git a/‎assets/2-<cap-.png
-921 Bytes b/‎assets/2-<cap-.png
-921 Bytes
diff --git a/‎assets/4--bara>.png
-1.99 KB b/‎assets/4--bara>.png
-1.99 KB
diff --git a/‎assets/5-<holding>.png
-1.2 KB b/‎assets/5-<holding>.png
-1.2 KB
diff --git a/‎assets/6-<a>.png
-1.25 KB b/‎assets/6-<a>.png
-1.25 KB
diff --git a/‎assets/7-<sign>.png
-1.26 KB b/‎assets/7-<sign>.png
-1.26 KB
diff --git a/‎assets/8-<that>.png
-954 Bytes b/‎assets/8-<that>.png
-954 Bytes
diff --git a/‎assets/9-<reads>.png
-982 Bytes b/‎assets/9-<reads>.png
-982 Bytes
diff --git a/‎attention_map_diffusers/modules.py
+463-8 b/‎attention_map_diffusers/modules.py
+463-8
diff --git a/‎attention_map_diffusers/utils.py
+23-2 b/‎attention_map_diffusers/utils.py
+23-2
diff --git a/‎demo/demo-flux-dev.py
+37 b/‎demo/demo-flux-dev.py
+37
diff --git a/‎demo/demo-flux-schnell.py
+37 b/‎demo/demo-flux-schnell.py
+37
diff --git a/‎demo/demo-sana.py
+42 b/‎demo/demo-sana.py
+42
diff --git a/‎demo/demo-sd3-5.py
+36 b/‎demo/demo-sd3-5.py
+36
@@ -6,10 +6,11 @@
 
 from diffusers.models import Transformer2DModel
 from diffusers.models.unets import UNet2DConditionModel
-from diffusers.models.transformers import SD3Transformer2DModel, FluxTransformer2DModel
+from diffusers.models.transformers import SD3Transformer2DModel, FluxTransformer2DModel, SanaTransformer2DModel
+from diffusers.models.transformers.sana_transformer import SanaTransformerBlock
 from diffusers.models.transformers.transformer_flux import FluxTransformerBlock
 from diffusers.models.attention import BasicTransformerBlock, JointTransformerBlock
-from diffusers import FluxPipeline
+from diffusers import FluxPipeline, SanaPipeline
 from diffusers.models.attention_processor import (
     AttnProcessor,
     AttnProcessor2_0,
@@ -46,6 +47,7 @@ def register_cross_attention_hook(model, hook_function, target_name):
             module.processor.store_attn_map = True
         elif isinstance(module.processor, AttnProcessor2_0):
             module.processor.store_attn_map = True
+            print('registered at {name}')
         elif isinstance(module.processor, LoRAAttnProcessor):
             module.processor.store_attn_map = True
         elif isinstance(module.processor, LoRAAttnProcessor2_0):
@@ -77,6 +79,20 @@ def replace_call_method_for_unet(model):
     return model
 
 
+def replace_call_method_for_sana(model):
+    if model.__class__.__name__ == 'SanaTransformer2DModel':
+        model.forward = SanaTransformer2DModelForward.__get__(model, SanaTransformer2DModel)
+
+    for name, layer in model.named_children():
+        
+        if layer.__class__.__name__ == 'SanaTransformerBlock':
+            layer.forward = SanaTransformerBlockForward.__get__(layer, SanaTransformerBlock)
+        
+        replace_call_method_for_sana(layer)
+    
+    return model
+
+
 def replace_call_method_for_sd3(model):
     if model.__class__.__name__ == 'SD3Transformer2DModel':
         model.forward = SD3Transformer2DModelForward.__get__(model, SD3Transformer2DModel)
@@ -122,6 +138,11 @@ def init_pipeline(pipeline):
             pipeline.transformer = register_cross_attention_hook(pipeline.transformer, hook_function, 'attn')
             pipeline.transformer = replace_call_method_for_flux(pipeline.transformer)
 
+        elif pipeline.transformer.__class__.__name__ == 'SanaTransformer2DModel':
+            SanaPipeline.__call__ == SanaPipeline_call
+            pipeline.transformer = register_cross_attention_hook(pipeline.transformer, hook_function, 'attn2')
+            pipeline.transformer = replace_call_method_for_sana(pipeline.transformer)
+
     else:
         if pipeline.unet.__class__.__name__ == 'UNet2DConditionModel':
             pipeline.unet = register_cross_attention_hook(pipeline.unet, hook_function, 'attn2')
 
@@ -0,0 +1,37 @@
+import torch
+from diffusers import FluxPipeline
+from attention_map_diffusers import (
+    attn_maps,
+    init_pipeline,
+    save_attention_maps
+)
+
+pipe = FluxPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-dev",
+    torch_dtype=torch.bfloat16
+)
+# pipe.enable_model_cpu_offload() #save some VRAM by offloading the model to CPU. Remove this if you have enough GPU power
+pipe.to('cuda')
+
+##### 1. Replace modules and Register hook #####
+pipe = init_pipeline(pipe)
+################################################
+
+# recommend not using batch operations for sd3, as cpu memory could be exceeded.
+prompts = [
+    # "A photo of a puppy wearing a hat.",
+    "A capybara holding a sign that reads Hello World.",
+]
+
+images = pipe(
+    prompts,
+    num_inference_steps=15,
+    guidance_scale=4.5,
+).images
+
+for batch, image in enumerate(images):
+    image.save(f'{batch}-flux-dev.png')
+
+##### 2. Process and Save attention map #####
+save_attention_maps(attn_maps, pipe.tokenizer, prompts, base_dir='attn_maps', unconditional=False)
+#############################################
@@ -0,0 +1,37 @@
+import torch
+from diffusers import FluxPipeline
+from attention_map_diffusers import (
+    attn_maps,
+    init_pipeline,
+    save_attention_maps
+)
+
+pipe = FluxPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-schnell",
+    torch_dtype=torch.bfloat16
+)
+# pipe.enable_model_cpu_offload() #save some VRAM by offloading the model to CPU. Remove this if you have enough GPU power
+pipe.to('cuda')
+
+##### 1. Replace modules and Register hook #####
+pipe = init_pipeline(pipe)
+################################################
+
+# recommend not using batch operations for sd3, as cpu memory could be exceeded.
+prompts = [
+    # "A photo of a puppy wearing a hat.",
+    "A capybara holding a sign that reads Hello World.",
+]
+
+images = pipe(
+    prompts,
+    num_inference_steps=15,
+    guidance_scale=4.5,
+).images
+
+for batch, image in enumerate(images):
+    image.save(f'{batch}-flux-schnell.png')
+
+##### 2. Process and Save attention map #####
+save_attention_maps(attn_maps, pipe.tokenizer, prompts, base_dir='attn_maps', unconditional=False)
+#############################################
@@ -0,0 +1,42 @@
+import torch
+from diffusers import SanaPipeline
+from attention_map_diffusers import (
+    attn_maps,
+    init_pipeline,
+    save_attention_maps
+)
+
+pipe = SanaPipeline.from_pretrained(
+    "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
+    variant="fp16",
+    torch_dtype=torch.float16,
+)
+pipe.to("cuda")
+
+pipe.vae.to(torch.bfloat16)
+pipe.text_encoder.to(torch.bfloat16)
+
+##### 1. Replace modules and Register hook #####
+# TODO: not implemented yet.
+pipe = init_pipeline(pipe)
+################################################
+
+prompts = [
+    "a cyberpunk cat with a neon sign that says 'Sana'",
+    # "A capybara holding a sign that reads Hello World.",
+]
+images = pipe(
+    prompt=prompts,
+    height=1024,
+    width=1024,
+    guidance_scale=5.0,
+    num_inference_steps=20,
+    generator=torch.Generator(device="cuda").manual_seed(42),
+).images
+
+for batch, image in enumerate(images):
+    image.save(f'{batch}-sana.png')
+
+##### 2. Process and Save attention map #####
+save_attention_maps(attn_maps, pipe.tokenizer, prompts, base_dir='attn_maps', unconditional=True)
+#############################################
@@ -0,0 +1,36 @@
+import torch
+from diffusers import StableDiffusion3Pipeline
+from attention_map_diffusers import (
+    attn_maps,
+    init_pipeline,
+    save_attention_maps
+)
+
+pipe = StableDiffusion3Pipeline.from_pretrained(
+    "stabilityai/stable-diffusion-3.5-medium",
+    torch_dtype=torch.bfloat16
+)
+pipe = pipe.to("cuda")
+
+##### 1. Replace modules and Register hook #####
+pipe = init_pipeline(pipe)
+################################################
+
+# recommend not using batch operations for sd3, as cpu memory could be exceeded.
+prompts = [
+    # "A photo of a puppy wearing a hat.",
+    "A capybara holding a sign that reads Hello World.",
+]
+
+images = pipe(
+    prompts,
+    num_inference_steps=15,
+    guidance_scale=4.5,
+).images
+
+for batch, image in enumerate(images):
+    image.save(f'{batch}-sd3-5.png')
+
+##### 2. Process and Save attention map #####
+save_attention_maps(attn_maps, pipe.tokenizer, prompts, base_dir='attn_maps', unconditional=True)
+#############################################