huggingface
diff --git a/‎src/diffusers/models/modeling_utils.py‎
Lines changed: 7 additions & 18 deletions b/‎src/diffusers/models/modeling_utils.py‎
Lines changed: 7 additions & 18 deletions
diff --git a/‎src/diffusers/models/transformers/transformer_chroma.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/models/transformers/transformer_chroma.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/models/transformers/transformer_flux.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffusers/models/transformers/transformer_flux.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/diffusers/models/transformers/transformer_hunyuan_video.py‎
Lines changed: 6 additions & 0 deletions b/‎src/diffusers/models/transformers/transformer_hunyuan_video.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/diffusers/models/transformers/transformer_ltx.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/models/transformers/transformer_ltx.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/models/transformers/transformer_wan.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffusers/models/transformers/transformer_wan.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/diffusers/models/unets/unet_2d_condition.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/models/unets/unet_2d_condition.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py‎
Lines changed: 2 additions & 4 deletions b/‎src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/diffusers/pipelines/consisid/consisid_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffusers/pipelines/consisid/consisid_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/diffusers/pipelines/controlnet/pipeline_controlnet.py‎
Lines changed: 2 additions & 2 deletions b/‎src/diffusers/pipelines/controlnet/pipeline_controlnet.py‎
Lines changed: 2 additions & 2 deletions
@@ -1414,15 +1414,10 @@ def compile_repeated_blocks(self, *args, **kwargs):
         can reduce end-to-end compile time substantially, while preserving the
         runtime speed-ups you would expect from a full `torch.compile`.
 
-        The set of sub-modules to compile is discovered in one of two ways:
-
-        1. **`_repeated_blocks`** – Preferred.  Define this attribute on your
-        subclass as a list/tuple of class names (strings).  Every module whose
-        class name matches will be compiled.
-
-        2. **`_no_split_modules`** – Fallback.  If the preferred attribute is
-        missing or empty, we fall back to the legacy Diffusers attribute
-        `_no_split_modules`.
+        The set of sub-modules to compile is discovered by the presence of
+        **`_repeated_blocks`** attribute in the model definition. Define this
+        attribute on your model subclass as a list/tuple of class names
+        (strings). Every module whose class name matches will be compiled.
 
         Once discovered, each matching sub-module is compiled by calling
         `submodule.compile(*args, **kwargs)`.  Any positional or keyword
@@ -1431,22 +1426,16 @@ class name matches will be compiled.
         """
         repeated_blocks = getattr(self, "_repeated_blocks", None)
 
-        if not repeated_blocks:
-            logger.warning("`_repeated_blocks` attribute is empty. Using `_no_split_modules` to find compile regions.")
-
-            repeated_blocks = getattr(self, "_no_split_modules", None)
-
         if not repeated_blocks:
             raise ValueError(
-                "Both `_repeated_blocks` and `_no_split_modules` attribute are empty. "
-                "Set `_repeated_blocks` for the model to benefit from faster compilation. "
+                "`_repeated_blocks` attribute is empty. "
+                f"Set `_repeated_blocks` for the class `{self.__class__.__name__}` to benefit from faster compilation. "
             )
-
         has_compiled_region = False
         for submod in self.modules():
             if submod.__class__.__name__ in repeated_blocks:
-                has_compiled_region = True
                 submod.compile(*args, **kwargs)
+                has_compiled_region = True
 
         if not has_compiled_region:
             raise ValueError(
 
@@ -407,6 +407,7 @@ class ChromaTransformer2DModel(
 
     _supports_gradient_checkpointing = True
     _no_split_modules = ["ChromaTransformerBlock", "ChromaSingleTransformerBlock"]
+    _repeated_blocks = ["ChromaTransformerBlock", "ChromaSingleTransformerBlock"]
     _skip_layerwise_casting_patterns = ["pos_embed", "norm"]
 
     @register_to_config
 
@@ -227,7 +227,7 @@ class FluxTransformer2DModel(
     _supports_gradient_checkpointing = True
     _no_split_modules = ["FluxTransformerBlock", "FluxSingleTransformerBlock"]
     _skip_layerwise_casting_patterns = ["pos_embed", "norm"]
-    _repeated_blocks = _no_split_modules
+    _repeated_blocks = ["FluxTransformerBlock", "FluxSingleTransformerBlock"]
 
     @register_to_config
     def __init__(
 
@@ -870,6 +870,12 @@ class HunyuanVideoTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin,
         "HunyuanVideoPatchEmbed",
         "HunyuanVideoTokenRefiner",
     ]
+    _repeated_blocks = [
+        "HunyuanVideoTransformerBlock",
+        "HunyuanVideoSingleTransformerBlock",
+        "HunyuanVideoPatchEmbed",
+        "HunyuanVideoTokenRefiner",
+    ]
 
     @register_to_config
     def __init__(
 
@@ -328,6 +328,7 @@ class LTXVideoTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin
 
     _supports_gradient_checkpointing = True
     _skip_layerwise_casting_patterns = ["norm"]
+    _repeated_blocks = ["LTXVideoTransformerBlock"]
 
     @register_to_config
     def __init__(
 
@@ -345,7 +345,7 @@ class WanTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
     _no_split_modules = ["WanTransformerBlock"]
     _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"]
     _keys_to_ignore_on_load_unexpected = ["norm_added_q"]
-    _repeated_blocks = _no_split_modules
+    _repeated_blocks = ["WanTransformerBlock"]
 
     @register_to_config
     def __init__(
 
@@ -167,6 +167,7 @@ class conditioning with `class_embed_type` equal to `None`.
     _supports_gradient_checkpointing = True
     _no_split_modules = ["BasicTransformerBlock", "ResnetBlock2D", "CrossAttnUpBlock2D"]
     _skip_layerwise_casting_patterns = ["norm"]
+    _repeated_blocks = ["BasicTransformerBlock"]
 
     @register_to_config
     def __init__(
 
@@ -41,7 +41,7 @@
     replace_example_docstring,
 )
 from ...utils.import_utils import is_transformers_version
-from ...utils.torch_utils import randn_tensor
+from ...utils.torch_utils import empty_device_cache, randn_tensor
 from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
 from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel
 
@@ -267,9 +267,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
 
         if self.device.type != "cpu":
             self.to("cpu", silence_dtype_warnings=True)
-            device_mod = getattr(torch, device.type, None)
-            if hasattr(device_mod, "empty_cache") and device_mod.is_available():
-                device_mod.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
+            empty_device_cache(device.type)
 
         model_sequence = [
             self.text_encoder.text_model,
 
@@ -294,7 +294,7 @@ def prepare_face_models(model_path, device, dtype):
 
     Parameters:
     - model_path: Path to the directory containing model files.
-    - device: The device (e.g., 'cuda', 'cpu') where models will be loaded.
+    - device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
     - dtype: Data type (e.g., torch.float32) for model inference.
 
     Returns:
 
@@ -37,7 +37,7 @@
     scale_lora_layers,
     unscale_lora_layers,
 )
-from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
+from ...utils.torch_utils import empty_device_cache, is_compiled_module, is_torch_version, randn_tensor
 from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
 from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -1339,7 +1339,7 @@ def __call__(
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.unet.to("cpu")
             self.controlnet.to("cpu")
-            torch.cuda.empty_cache()
+            empty_device_cache()
 
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[