@@ -71,11 +71,11 @@ def retrieve_latents(
7171 Examples:
7272 ```python
7373 >>> import torch
74- >>> from diffusers import Cosmos2_5_PredictBase
74+ >>> from diffusers import Cosmos2_5_PredictBasePipeline
7575 >>> from diffusers.utils import export_to_video, load_image, load_video
7676
7777 >>> model_id = "nvidia/Cosmos-Predict2.5-Base-2B"
78- >>> pipe = Cosmos2_5_PredictBase .from_pretrained(model_id, torch_dtype=torch.bfloat16)
78+ >>> pipe = Cosmos2_5_PredictBasePipeline .from_pretrained(model_id, torch_dtype=torch.bfloat16)
7979 >>> pipe = pipe.to("cuda")
8080
8181 >>> # Common negative prompt reused across modes.
@@ -163,7 +163,7 @@ def retrieve_latents(
163163"""
164164
165165
166- class Cosmos2_5_PredictBase (DiffusionPipeline ):
166+ class Cosmos2_5_PredictBasePipeline (DiffusionPipeline ):
167167 r"""
168168 Pipeline for [Cosmos Predict2.5](https://github.com/nvidia-cosmos/cosmos-predict2.5) base model.
169169
@@ -233,20 +233,6 @@ def __init__(
233233 if self .latents_mean is None or self .latents_std is None :
234234 raise ValueError ("VAE configuration must define both `latents_mean` and `latents_std`." )
235235
236-
237- @property
238- def _execution_device (self ):
239- device = super ()._execution_device
240- if isinstance (device , torch .device ) and device .type == "cpu" :
241- for module_name in ("transformer" , "text_encoder" , "vae" ):
242- module = getattr (self , module_name , None )
243- if module is None or not isinstance (module , torch .nn .Module ):
244- continue
245- module_device = getattr (module , "device" , None )
246- if isinstance (module_device , torch .device ) and module_device .type != "cpu" :
247- return module_device
248- return device
249-
250236 # Copied from diffusers.pipelines.cosmos.pipeline_cosmos_text2world.CosmosTextToWorldPipeline._get_prompt_embeds
251237 def _get_prompt_embeds (
252238 self ,
@@ -398,6 +384,8 @@ def encode_prompt(
398384
399385 return prompt_embeds , negative_prompt_embeds
400386
387+ # Modified from diffusers.pipelines.cosmos.pipeline_cosmos2_video2world.Cosmos2VideoToWorldPipeline.prepare_latents and
388+ # diffusers.pipelines.cosmos.pipeline_cosmos2_video2world.Cosmos2TextToImagePipeline.prepare_latents
401389 def prepare_latents (
402390 self ,
403391 video : Optional [torch .Tensor ],
@@ -458,8 +446,6 @@ def prepare_latents(
458446
459447 cond_latents = torch .cat (cond_latents , dim = 0 ).to (dtype )
460448
461- if self .latents_mean is None or self .latents_std is None :
462- raise ValueError ("VAE configuration must define `latents_mean` and `latents_std`." )
463449 latents_mean = self .latents_mean .to (device = device , dtype = dtype )
464450 latents_std = self .latents_std .to (device = device , dtype = dtype )
465451 cond_latents = (cond_latents - latents_mean ) / latents_std
0 commit comments