2121
2222from ...callbacks import MultiPipelineCallbacks , PipelineCallback
2323from ...loaders import Mochi1LoraLoaderMixin
24- from ...models .autoencoders import AutoencoderKL
24+ from ...models .autoencoders import AutoencoderKLMochi
2525from ...models .transformers import MochiTransformer3DModel
2626from ...schedulers import FlowMatchEulerDiscreteScheduler
2727from ...utils import (
@@ -151,8 +151,8 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
151151 Conditional Transformer architecture to denoise the encoded video latents.
152152 scheduler ([`FlowMatchEulerDiscreteScheduler`]):
153153 A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
154- vae ([`AutoencoderKL `]):
155- Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
154+ vae ([`AutoencoderKLMochi `]):
155+ Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.
156156 text_encoder ([`T5EncoderModel`]):
157157 [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
158158 the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
@@ -171,7 +171,7 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
171171 def __init__ (
172172 self ,
173173 scheduler : FlowMatchEulerDiscreteScheduler ,
174- vae : AutoencoderKL ,
174+ vae : AutoencoderKLMochi ,
175175 text_encoder : T5EncoderModel ,
176176 tokenizer : T5TokenizerFast ,
177177 transformer : MochiTransformer3DModel ,
0 commit comments