Address comments

miguelmartin75 · miguelmartin75 · commit 4133c68bd929 · 2025-12-18T03:21:45.000Z
diff --git a/docs/source/en/api/pipelines/cosmos.md b/docs/source/en/api/pipelines/cosmos.md
@@ -70,6 +70,12 @@ output.save("output.png")
   - all
   - __call__
 
+## Cosmos2_5_PredictBasePipeline
+
+[[autodoc]] Cosmos2_5_PredictBasePipeline
+  - all
+  - __call__
+
 ## CosmosPipelineOutput
 
 [[autodoc]] pipelines.cosmos.pipeline_output.CosmosPipelineOutput
diff --git a/scripts/convert_cosmos_to_diffusers.py b/scripts/convert_cosmos_to_diffusers.py
@@ -63,7 +63,7 @@
     FlowMatchEulerDiscreteScheduler,
     UniPCMultistepScheduler,
 )
-from diffusers.pipelines.cosmos.pipeline_cosmos2_5_predict import Cosmos2_5_PredictBase
+from diffusers.pipelines.cosmos.pipeline_cosmos2_5_predict import Cosmos2_5_PredictBasePipeline
 
 
 def remove_keys_(key: str, state_dict: Dict[str, Any]):
@@ -545,7 +545,7 @@ def save_pipeline_cosmos2_5(args, transformer, vae):
         sigma_min=0.01,
     )
 
-    pipe = Cosmos2_5_PredictBase(
+    pipe = Cosmos2_5_PredictBasePipeline(
         text_encoder=text_encoder,
         tokenizer=tokenizer,
         transformer=transformer,
diff --git a/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py b/src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py
@@ -233,20 +233,6 @@ def __init__(
         if self.latents_mean is None or self.latents_std is None:
             raise ValueError("VAE configuration must define both `latents_mean` and `latents_std`.")
 
-    
-    @property
-    def _execution_device(self):
-        device = super()._execution_device
-        if isinstance(device, torch.device) and device.type == "cpu":
-            for module_name in ("transformer", "text_encoder", "vae"):
-                module = getattr(self, module_name, None)
-                if module is None or not isinstance(module, torch.nn.Module):
-                    continue
-                module_device = getattr(module, "device", None)
-                if isinstance(module_device, torch.device) and module_device.type != "cpu":
-                    return module_device
-        return device
-
     # Copied from diffusers.pipelines.cosmos.pipeline_cosmos_text2world.CosmosTextToWorldPipeline._get_prompt_embeds
     def _get_prompt_embeds(
         self,
@@ -398,6 +384,8 @@ def encode_prompt(
 
         return prompt_embeds, negative_prompt_embeds
 
+    # Modified from diffusers.pipelines.cosmos.pipeline_cosmos2_video2world.Cosmos2VideoToWorldPipeline.prepare_latents and 
+    # diffusers.pipelines.cosmos.pipeline_cosmos2_video2world.Cosmos2TextToImagePipeline.prepare_latents
     def prepare_latents(
         self,
         video: Optional[torch.Tensor],
@@ -458,8 +446,6 @@ def prepare_latents(
 
             cond_latents = torch.cat(cond_latents, dim=0).to(dtype)
 
-            if self.latents_mean is None or self.latents_std is None:
-                raise ValueError("VAE configuration must define `latents_mean` and `latents_std`.")
             latents_mean = self.latents_mean.to(device=device, dtype=dtype)
             latents_std = self.latents_std.to(device=device, dtype=dtype)
             cond_latents = (cond_latents - latents_mean) / latents_std

Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@`
`63`	`63`	`FlowMatchEulerDiscreteScheduler,`
`64`	`64`	`UniPCMultistepScheduler,`
`65`	`65`	`)`
`66`		`-from diffusers.pipelines.cosmos.pipeline_cosmos2_5_predict import Cosmos2_5_PredictBase`
	`66`	`+from diffusers.pipelines.cosmos.pipeline_cosmos2_5_predict import Cosmos2_5_PredictBasePipeline`
`67`	`67`
`68`	`68`
`69`	`69`	`def remove_keys_(key: str, state_dict: Dict[str, Any]):`
`@@ -545,7 +545,7 @@ def save_pipeline_cosmos2_5(args, transformer, vae):`
`545`	`545`	`sigma_min=0.01,`
`546`	`546`	`)`
`547`	`547`
`548`		`- pipe = Cosmos2_5_PredictBase(`
	`548`	`+ pipe = Cosmos2_5_PredictBasePipeline(`
`549`	`549`	`text_encoder=text_encoder,`
`550`	`550`	`tokenizer=tokenizer,`
`551`	`551`	`transformer=transformer,`