From 8f6d81e6d8c43e5edcdd533be6b3c940e5080d11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joel=20Andr=C3=A9s=20Navarro=20Navarro?=
 <navarrojoelandres@gmail.com>
Date: Thu, 27 Nov 2025 17:13:30 -0500
Subject: [PATCH] Adjust Qwen Image quant type

Set low quants to Q5_K and raise Q5_K_M to Q6_K for first and last blocks
---
 tools/lcpp.patch | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/lcpp.patch b/tools/lcpp.patch
index a341a4d..4481632 100644
--- a/tools/lcpp.patch
+++ b/tools/lcpp.patch
@@ -126,7 +126,7 @@ index 24e1f1f0..ee68edfd 100644
      // get hparams kv
      ml.get_key(LLM_KV_VOCAB_SIZE, hparams.n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab);
  
-@@ -18016,6 +18072,158 @@ static void llama_tensor_dequantize_internal(
+@@ -18016,6 +18072,159 @@ static void llama_tensor_dequantize_internal(
      workers.clear();
  }
  
@@ -237,14 +237,15 @@ index 24e1f1f0..ee68edfd 100644
 +            (name.find("transformer_blocks.0.") != std::string::npos) ||
 +            (name.find("transformer_blocks.59.") != std::string::npos) // this should be dynamic
 +        ) {
-+            if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
-+                new_type = GGML_TYPE_Q4_K;
-+            }
-+            else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) {
-+                new_type = GGML_TYPE_Q4_K;
-+            }
-+            else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) {
-+                new_type = GGML_TYPE_Q5_K;
++            if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S ||
++                ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) {
++                new_type = GGML_TYPE_Q5_K;  // Minimum Q5_K for low quants
 +            }
 +            else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) {
 +                new_type = GGML_TYPE_Q6_K;