From 17b8fee758e8ab23c6aaf311f1e84df054b5832e Mon Sep 17 00:00:00 2001 From: hlky Date: Thu, 18 Dec 2025 12:04:28 +0000 Subject: [PATCH 1/2] Detect 2.0 vs 2.1 ZImageControlNetModel --- src/diffusers/loaders/single_file_utils.py | 8 ++++++-- .../pipelines/z_image/pipeline_z_image_controlnet.py | 3 +-- .../z_image/pipeline_z_image_controlnet_inpaint.py | 3 +-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index aac4835fe849..b07a0e479137 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -223,7 +223,8 @@ "cosmos-2.0-v2w-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-14B-Video2World"}, "z-image-turbo": {"pretrained_model_name_or_path": "Tongyi-MAI/Z-Image-Turbo"}, "z-image-turbo-controlnet": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union"}, - "z-image-turbo-controlnet-2.x": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.1"}, + "z-image-turbo-controlnet-2.0": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.0"}, + "z-image-turbo-controlnet-2.1": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.1"}, } # Use to configure model sample size when original config is provided @@ -784,7 +785,10 @@ def infer_diffusers_model_type(checkpoint): raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 2.0 model.") elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet-2.x"] in checkpoint: - model_type = "z-image-turbo-controlnet-2.x" + if torch.all(checkpoint["control_noise_refiner.0.before_proj.weight"] == 0.0): + model_type = "z-image-turbo-controlnet-2.0" + else: + model_type = "z-image-turbo-controlnet-2.1" elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet"] in checkpoint: model_type = "z-image-turbo-controlnet" diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py index 5e26862b018e..08fc4da0e7ba 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet.py @@ -58,14 +58,13 @@ >>> # torch_dtype=torch.bfloat16, >>> # ) - >>> # 2.0 - `config` is required + >>> # 2.0 >>> # controlnet = ZImageControlNetModel.from_single_file( >>> # hf_hub_download( >>> # "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.0", >>> # filename="Z-Image-Turbo-Fun-Controlnet-Union-2.0.safetensors", >>> # ), >>> # torch_dtype=torch.bfloat16, - >>> # config="hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.0", >>> # ) >>> pipe = ZImageControlNetPipeline.from_pretrained( diff --git a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py index 73ea7d0fddec..3b0f8dc288d3 100644 --- a/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py +++ b/src/diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py @@ -50,14 +50,13 @@ ... torch_dtype=torch.bfloat16, ... ) - >>> # 2.0 - `config` is required + >>> # 2.0 >>> # controlnet = ZImageControlNetModel.from_single_file( >>> # hf_hub_download( >>> # "alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union-2.0", >>> # filename="Z-Image-Turbo-Fun-Controlnet-Union-2.0.safetensors", >>> # ), >>> # torch_dtype=torch.bfloat16, - >>> # config="hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.0", >>> # ) >>> pipe = ZImageControlNetInpaintPipeline.from_pretrained( From caf49b86415ebb49acb38ae6984aecc6234880df Mon Sep 17 00:00:00 2001 From: hlky Date: Thu, 18 Dec 2025 12:14:42 +0000 Subject: [PATCH 2/2] Possibility of control_noise_refiner being removed --- src/diffusers/loaders/single_file_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index b07a0e479137..80fbd840907a 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -785,7 +785,10 @@ def infer_diffusers_model_type(checkpoint): raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 2.0 model.") elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet-2.x"] in checkpoint: - if torch.all(checkpoint["control_noise_refiner.0.before_proj.weight"] == 0.0): + before_proj_weight = checkpoint.get("control_noise_refiner.0.before_proj.weight", None) + if before_proj_weight is None: + model_type = "z-image-turbo-controlnet-2.0" + elif before_proj_weight is not None and torch.all(before_proj_weight == 0.0): model_type = "z-image-turbo-controlnet-2.0" else: model_type = "z-image-turbo-controlnet-2.1"