From be0240749982c5d11ba5b0380686d927a3675667 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Mon, 15 Dec 2025 12:31:24 +0200
Subject: [PATCH 1/2] Math: FFT: Move coefficients to same directory as FFT
 library

This patch moves the twiddle factors tables to sof/src/math/fft/coef
directory. The purpose is to make the FFT library more modular. Also
there is no usage for the coefficients data beyond local usage by
the FFT functions.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 .../sof/audio/coefficients/fft => math/fft/coef}/twiddle_16.h  | 0
 .../audio/coefficients/fft => math/fft/coef}/twiddle_3072_32.h | 0
 .../sof/audio/coefficients/fft => math/fft/coef}/twiddle_32.h  | 0
 src/math/fft/fft_16.c                                          | 2 +-
 src/math/fft/fft_16_hifi3.c                                    | 2 +-
 src/math/fft/fft_32.c                                          | 3 +--
 src/math/fft/fft_32_hifi3.c                                    | 2 +-
 src/math/fft/fft_common.c                                      | 2 ++
 src/math/fft/fft_multi.c                                       | 2 +-
 9 files changed, 7 insertions(+), 6 deletions(-)
 rename src/{include/sof/audio/coefficients/fft => math/fft/coef}/twiddle_16.h (100%)
 rename src/{include/sof/audio/coefficients/fft => math/fft/coef}/twiddle_3072_32.h (100%)
 rename src/{include/sof/audio/coefficients/fft => math/fft/coef}/twiddle_32.h (100%)

diff --git a/src/include/sof/audio/coefficients/fft/twiddle_16.h b/src/math/fft/coef/twiddle_16.h
similarity index 100%
rename from src/include/sof/audio/coefficients/fft/twiddle_16.h
rename to src/math/fft/coef/twiddle_16.h
diff --git a/src/include/sof/audio/coefficients/fft/twiddle_3072_32.h b/src/math/fft/coef/twiddle_3072_32.h
similarity index 100%
rename from src/include/sof/audio/coefficients/fft/twiddle_3072_32.h
rename to src/math/fft/coef/twiddle_3072_32.h
diff --git a/src/include/sof/audio/coefficients/fft/twiddle_32.h b/src/math/fft/coef/twiddle_32.h
similarity index 100%
rename from src/include/sof/audio/coefficients/fft/twiddle_32.h
rename to src/math/fft/coef/twiddle_32.h
diff --git a/src/math/fft/fft_16.c b/src/math/fft/fft_16.c
index 7af6c1eb2768..7d707a00e5ea 100644
--- a/src/math/fft/fft_16.c
+++ b/src/math/fft/fft_16.c
@@ -10,7 +10,7 @@
 #include <sof/math/fft.h>
 
 #ifdef FFT_GENERIC
-#include <sof/audio/coefficients/fft/twiddle_16.h>
+#include "coef/twiddle_16.h"
 
 /*
  * Helpers for 16 bit FFT calculation
diff --git a/src/math/fft/fft_16_hifi3.c b/src/math/fft/fft_16_hifi3.c
index 54e9cebc4a23..049061d6b3de 100644
--- a/src/math/fft/fft_16_hifi3.c
+++ b/src/math/fft/fft_16_hifi3.c
@@ -9,8 +9,8 @@
 #include <sof/math/fft.h>
 
 #ifdef FFT_HIFI3
-#include <sof/audio/coefficients/fft/twiddle_16.h>
 #include <xtensa/tie/xt_hifi3.h>
+#include "coef/twiddle_16.h"
 
 /**
  * \brief Execute the 16-bits Fast Fourier Transform (FFT) or Inverse FFT (IFFT)
diff --git a/src/math/fft/fft_32.c b/src/math/fft/fft_32.c
index 25dc71f57e2e..40bee9550683 100644
--- a/src/math/fft/fft_32.c
+++ b/src/math/fft/fft_32.c
@@ -13,9 +13,8 @@
 #include <stdio.h>
 
 #ifdef FFT_GENERIC
-#include <sof/audio/coefficients/fft/twiddle_32.h>
-
 #include "fft_32.h"
+#include "coef/twiddle_32.h"
 
 
 /**
diff --git a/src/math/fft/fft_32_hifi3.c b/src/math/fft/fft_32_hifi3.c
index 1ca88c48268d..c9b8c7769a8c 100644
--- a/src/math/fft/fft_32_hifi3.c
+++ b/src/math/fft/fft_32_hifi3.c
@@ -10,8 +10,8 @@
 #include <sof/math/fft.h>
 
 #ifdef FFT_HIFI3
-#include <sof/audio/coefficients/fft/twiddle_32.h>
 #include <xtensa/tie/xt_hifi3.h>
+#include "coef/twiddle_32.h"
 
 void fft_execute_32(struct fft_plan *plan, bool ifft)
 {
diff --git a/src/math/fft/fft_common.c b/src/math/fft/fft_common.c
index 5ce47acd025a..e6fb4d76daac 100644
--- a/src/math/fft/fft_common.c
+++ b/src/math/fft/fft_common.c
@@ -13,7 +13,9 @@
 #include <sof/common.h>
 #include <rtos/alloc.h>
 #include <sof/math/fft.h>
+
 #include "fft_common.h"
+#include "fft_32.h"
 
 LOG_MODULE_REGISTER(math_fft, CONFIG_SOF_LOG_LEVEL);
 SOF_DEFINE_REG_UUID(math_fft);
diff --git a/src/math/fft/fft_multi.c b/src/math/fft/fft_multi.c
index baa404315380..4a4f66e42872 100644
--- a/src/math/fft/fft_multi.c
+++ b/src/math/fft/fft_multi.c
@@ -4,7 +4,6 @@
 //
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
-#include <sof/audio/coefficients/fft/twiddle_3072_32.h>
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/audio/format.h>
 #include <sof/trace/trace.h>
@@ -14,6 +13,7 @@
 #include <sof/math/fft.h>
 #include "fft_common.h"
 #include "fft_32.h"
+#include "coef/twiddle_3072_32.h"
 
 LOG_MODULE_REGISTER(math_fft_multi, CONFIG_SOF_LOG_LEVEL);
 SOF_DEFINE_REG_UUID(math_fft_multi);

From ca0d3afe2872614aaa32656539e6073ef3fb5479 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Tue, 23 Dec 2025 13:36:01 +0200
Subject: [PATCH 2/2] Math: FFT: Move twiddle factors data to DRAM

This patch sets the twiddle factors data as __cold_rodata
when MATH_FFT_COLD_TWIDDLE_FACTORS is set to y. The twiddle
factors for the maximum FFT size are linked to DRAM and the
needed coefficients are copied to SRAM when the FFT plan is
initialized.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 src/include/sof/math/fft.h          |   3 +
 src/math/Kconfig                    |  10 +++
 src/math/fft/coef/twiddle_16.h      |   4 +-
 src/math/fft/coef/twiddle_3072_32.h |   4 +-
 src/math/fft/coef/twiddle_32.h      |   4 +-
 src/math/fft/fft_16.c               |   8 +-
 src/math/fft/fft_16_hifi3.c         |  10 +--
 src/math/fft/fft_32.c               |   9 +-
 src/math/fft/fft_32_hifi3.c         |  11 ++-
 src/math/fft/fft_common.c           |  54 +++++++++++-
 src/math/fft/fft_common.h           |  28 ++++++
 src/math/fft/fft_multi.c            | 129 ++++++++++++++++++----------
 12 files changed, 202 insertions(+), 72 deletions(-)

diff --git a/src/include/sof/math/fft.h b/src/include/sof/math/fft.h
index b3f0fd7faec7..66e2189d55f4 100644
--- a/src/include/sof/math/fft.h
+++ b/src/include/sof/math/fft.h
@@ -52,6 +52,7 @@ struct fft_plan {
 	struct icomplex32 *outb32;	/* pointer to output integer complex buffer */
 	struct icomplex16 *inb16;	/* pointer to input integer complex buffer */
 	struct icomplex16 *outb16;	/* pointer to output integer complex buffer */
+	void *twiddle;
 };
 
 struct fft_multi_plan {
@@ -60,6 +61,8 @@ struct fft_multi_plan {
 	struct icomplex32 *tmp_o32[FFT_MULTI_COUNT_MAX]; /* pointer to output buffer */
 	struct icomplex32 *inb32;	/* pointer to input integer complex buffer */
 	struct icomplex32 *outb32;	/* pointer to output integer complex buffer */
+	int32_t *multi_twiddle;		/* twiddle factors for combine FFT */
+	int32_t *shared_twiddle;	/* twiddle factors for three FFTs */
 	uint16_t *bit_reverse_idx;
 	uint32_t total_size;
 	uint32_t fft_size;
diff --git a/src/math/Kconfig b/src/math/Kconfig
index a513f88799fc..357b313dbfe8 100644
--- a/src/math/Kconfig
+++ b/src/math/Kconfig
@@ -111,6 +111,16 @@ config MATH_FFT_MULTI
 	  this should not be selected directly, please select it from other
 	  audio components where need it.
 
+config MATH_FFT_COLD_TWIDDLE_FACTORS
+	bool "FFT cold twiddle factors"
+	depends on MATH_FFT
+	default y
+	help
+	  Link twiddle factors for the maximum FFT size to
+	  DRAM and copy the needed set to SRAM when the FFT
+	  plan is initialized. The twiddle factors data
+	  size is currently 29 kB.
+
 menu "Supported FFT word lengths"
 	visible if MATH_FFT
 
diff --git a/src/math/fft/coef/twiddle_16.h b/src/math/fft/coef/twiddle_16.h
index e67df9d929a7..83eea4464e87 100644
--- a/src/math/fft/coef/twiddle_16.h
+++ b/src/math/fft/coef/twiddle_16.h
@@ -14,7 +14,7 @@
 #define FFT_SIZE_MAX	1024
 
 /* in Q1.15, generated from cos(i * 2 * pi / FFT_SIZE_MAX) */
-const int16_t twiddle_real_16[FFT_SIZE_MAX] = {
+SOF_MATH_FFT_COLD_RODATA static const int16_t twiddle_real_16[FFT_SIZE_MAX] = {
 	32767,
 	32767,
 	32766,
@@ -1042,7 +1042,7 @@ const int16_t twiddle_real_16[FFT_SIZE_MAX] = {
 };
 
 /* in Q1.15, generated from sin(i * 2 * pi / FFT_SIZE_MAX) */
-const int16_t twiddle_imag_16[FFT_SIZE_MAX] = {
+SOF_MATH_FFT_COLD_RODATA static const int16_t twiddle_imag_16[FFT_SIZE_MAX] = {
 	0,
 	-201,
 	-402,
diff --git a/src/math/fft/coef/twiddle_3072_32.h b/src/math/fft/coef/twiddle_3072_32.h
index 3024c7b49cff..0fadfb198411 100644
--- a/src/math/fft/coef/twiddle_3072_32.h
+++ b/src/math/fft/coef/twiddle_3072_32.h
@@ -14,7 +14,7 @@
 #define FFT_MULTI_TWIDDLE_SIZE	2048
 
 /* in Q1.31, generated from cos(i * 2 * pi / FFT_SIZE_MAX) */
-const int32_t multi_twiddle_real_32[FFT_MULTI_TWIDDLE_SIZE] = {
+SOF_MATH_FFT_COLD_RODATA static const int32_t multi_twiddle_real_32[FFT_MULTI_TWIDDLE_SIZE] = {
 	2147483647,
 	2147479156,
 	2147465681,
@@ -2066,7 +2066,7 @@ const int32_t multi_twiddle_real_32[FFT_MULTI_TWIDDLE_SIZE] = {
 };
 
 /* in Q1.31, generated from sin(i * 2 * pi / FFT_SIZE_MAX) */
-const int32_t multi_twiddle_imag_32[FFT_MULTI_TWIDDLE_SIZE] = {
+SOF_MATH_FFT_COLD_RODATA static const int32_t multi_twiddle_imag_32[FFT_MULTI_TWIDDLE_SIZE] = {
 	0,
 	-4392262,
 	-8784505,
diff --git a/src/math/fft/coef/twiddle_32.h b/src/math/fft/coef/twiddle_32.h
index 2d73dda1d0f4..b1ae97210870 100644
--- a/src/math/fft/coef/twiddle_32.h
+++ b/src/math/fft/coef/twiddle_32.h
@@ -14,7 +14,7 @@
 #define FFT_SIZE_MAX	1024
 
 /* in Q1.31, generated from cos(i * 2 * pi / FFT_SIZE_MAX) */
-const int32_t twiddle_real_32[FFT_SIZE_MAX] = {
+SOF_MATH_FFT_COLD_RODATA static const int32_t twiddle_real_32[FFT_SIZE_MAX] = {
 	2147483647,
 	2147443222,
 	2147321946,
@@ -1042,7 +1042,7 @@ const int32_t twiddle_real_32[FFT_SIZE_MAX] = {
 };
 
 /* in Q1.31, generated from sin(i * 2 * pi / FFT_SIZE_MAX) */
-const int32_t twiddle_imag_32[FFT_SIZE_MAX] = {
+SOF_MATH_FFT_COLD_RODATA static const int32_t twiddle_imag_32[FFT_SIZE_MAX] = {
 	0,
 	-13176712,
 	-26352928,
diff --git a/src/math/fft/fft_16.c b/src/math/fft/fft_16.c
index 7d707a00e5ea..55a8edcdc8b6 100644
--- a/src/math/fft/fft_16.c
+++ b/src/math/fft/fft_16.c
@@ -10,7 +10,6 @@
 #include <sof/math/fft.h>
 
 #ifdef FFT_GENERIC
-#include "coef/twiddle_16.h"
 
 /*
  * Helpers for 16 bit FFT calculation
@@ -75,6 +74,7 @@ void fft_execute_16(struct fft_plan *plan, bool ifft)
 	struct icomplex16 tmp2;
 	struct icomplex16 *inb;
 	struct icomplex16 *outb;
+	struct icomplex16 *twiddle;
 	int depth;
 	int top;
 	int bottom;
@@ -104,10 +104,11 @@ void fft_execute_16(struct fft_plan *plan, bool ifft)
 		icomplex16_shift(&inb[i], -(plan->len), &outb[plan->bit_reverse_idx[i]]);
 
 	/* step 2: loop to do FFT transform in smaller size */
+	twiddle = plan->twiddle;
 	for (depth = 1; depth <= plan->len; ++depth) {
 		m = 1 << depth;
 		n = m >> 1;
-		i = FFT_SIZE_MAX >> depth;
+		i = plan->size >> depth;
 
 		/* doing FFT transforms in size m */
 		for (k = 0; k < plan->size; k += m) {
@@ -116,8 +117,7 @@ void fft_execute_16(struct fft_plan *plan, bool ifft)
 				index = i * j;
 				top = k + j;
 				bottom = top + n;
-				tmp1.real = twiddle_real_16[index];
-				tmp1.imag = twiddle_imag_16[index];
+				tmp1 = twiddle[index];
 				/* calculate the accumulator: twiddle * bottom */
 				icomplex16_mul(&tmp1, &outb[bottom], &tmp2);
 				tmp1 = outb[top];
diff --git a/src/math/fft/fft_16_hifi3.c b/src/math/fft/fft_16_hifi3.c
index 049061d6b3de..11ab2064b709 100644
--- a/src/math/fft/fft_16_hifi3.c
+++ b/src/math/fft/fft_16_hifi3.c
@@ -10,7 +10,6 @@
 
 #ifdef FFT_HIFI3
 #include <xtensa/tie/xt_hifi3.h>
-#include "coef/twiddle_16.h"
 
 /**
  * \brief Execute the 16-bits Fast Fourier Transform (FFT) or Inverse FFT (IFFT)
@@ -31,6 +30,7 @@ void fft_execute_16(struct fft_plan *plan, bool ifft)
 	ae_int16x4 *out16x4;
 	ae_valign inu = AE_ZALIGN64();
 	ae_valign outu = AE_ZALIGN64();
+	int16_t *twiddle;
 	int depth, top, bottom, index;
 	int i, j, k, m, n;
 	int size = plan->size;
@@ -67,22 +67,22 @@ void fft_execute_16(struct fft_plan *plan, bool ifft)
 	}
 
 	/* step 2: loop to do FFT transform in smaller size */
+	twiddle = plan->twiddle;
 	for (depth = 1; depth <= plan->len; ++depth) {
 		m = 1 << depth;
 		n = m >> 1;
-		i = FFT_SIZE_MAX >> depth;
+		i = size >> depth;
 
 		/* doing FFT transforms in size m */
 		for (k = 0; k < plan->size; k += m) {
 			/* doing one FFT transform for size m */
 			for (j = 0; j < n; ++j) {
-				index = i * j;
+				index = 2 * i * j;
 				top = k + j;
 				bottom = top + n;
 				/* store twiddle and bottom as Q9.23*/
 				temp1 = AE_CVTP24A16X2_LL(outb[bottom].real, outb[bottom].imag);
-				temp2 = AE_CVTP24A16X2_LL(twiddle_real_16[index],
-							  twiddle_imag_16[index]);
+				temp2 = AE_CVTP24A16X2_LL(twiddle[index], twiddle[index + 1]);
 				/* calculate the accumulator: twiddle * bottom */
 				res = AE_MULFC24RA(temp1, temp2);
 				/* saturate and round the result to 16bit and put it in
diff --git a/src/math/fft/fft_32.c b/src/math/fft/fft_32.c
index 40bee9550683..732af9608e3e 100644
--- a/src/math/fft/fft_32.c
+++ b/src/math/fft/fft_32.c
@@ -14,8 +14,6 @@
 
 #ifdef FFT_GENERIC
 #include "fft_32.h"
-#include "coef/twiddle_32.h"
-
 
 /**
  * \brief Execute the 32-bits Fast Fourier Transform (FFT) or Inverse FFT (IFFT)
@@ -29,6 +27,7 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
 	struct icomplex32 tmp2;
 	struct icomplex32 *inb;
 	struct icomplex32 *outb;
+	struct icomplex32 *twiddle;
 	int depth;
 	int top;
 	int bottom;
@@ -58,10 +57,11 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
 		icomplex32_shift(&inb[i], -(plan->len), &outb[plan->bit_reverse_idx[i]]);
 
 	/* step 2: loop to do FFT transform in smaller size */
+	twiddle = plan->twiddle;
 	for (depth = 1; depth <= plan->len; ++depth) {
 		m = 1 << depth;
 		n = m >> 1;
-		i = FFT_SIZE_MAX >> depth;
+		i = plan->size >> depth;
 
 		/* doing FFT transforms in size m */
 		for (k = 0; k < plan->size; k += m) {
@@ -70,8 +70,7 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
 				index = i * j;
 				top = k + j;
 				bottom = top + n;
-				tmp1.real = twiddle_real_32[index];
-				tmp1.imag = twiddle_imag_32[index];
+				tmp1 = twiddle[index];
 				/* calculate the accumulator: twiddle * bottom */
 				icomplex32_mul(&tmp1, &outb[bottom], &tmp2);
 				tmp1 = outb[top];
diff --git a/src/math/fft/fft_32_hifi3.c b/src/math/fft/fft_32_hifi3.c
index c9b8c7769a8c..da7bc2b5525f 100644
--- a/src/math/fft/fft_32_hifi3.c
+++ b/src/math/fft/fft_32_hifi3.c
@@ -11,7 +11,6 @@
 
 #ifdef FFT_HIFI3
 #include <xtensa/tie/xt_hifi3.h>
-#include "coef/twiddle_32.h"
 
 void fft_execute_32(struct fft_plan *plan, bool ifft)
 {
@@ -23,6 +22,7 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
 	ae_int32x2 *outx = (ae_int32x2 *)plan->outb32;
 	ae_int32x2 *outtop;
 	ae_int32x2 *outbottom;
+	ae_int32x2 *twiddle;
 	uint16_t *idx = &plan->bit_reverse_idx[0];
 	int depth, top, bottom, index;
 	int i, j, k, m, n;
@@ -55,23 +55,22 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
 	}
 
 	/* step 2: loop to do FFT transform in smaller size */
+	twiddle = plan->twiddle;
 	for (depth = 1; depth <= len; ++depth) {
 		m = 1 << depth;
 		n = m >> 1;
-		i = FFT_SIZE_MAX >> depth;
+		i = size >> depth;
 
 		/* doing FFT transforms in size m */
 		for (k = 0; k < size; k += m) {
 			/* doing one FFT transform for size m */
 			for (j = 0; j < n; ++j) {
-				index = i * j;
+				index = i * j * sizeof(ae_int32x2);
 				top = k + j;
 				bottom = top + n;
 
 				/* load twiddle factor to sample1 */
-				sample1 = twiddle_real_32[index];
-				sample2 = twiddle_imag_32[index];
-				sample1 = AE_SEL32_LH(sample1, sample2);
+				sample1 = AE_L32X2_X(twiddle, index);
 
 				/* calculate the accumulator: twiddle * bottom */
 				sample2 = outx[bottom];
diff --git a/src/math/fft/fft_common.c b/src/math/fft/fft_common.c
index e6fb4d76daac..ae7e8a35b038 100644
--- a/src/math/fft/fft_common.c
+++ b/src/math/fft/fft_common.c
@@ -16,6 +16,8 @@
 
 #include "fft_common.h"
 #include "fft_32.h"
+#include "coef/twiddle_32.h"
+#include "coef/twiddle_16.h"
 
 LOG_MODULE_REGISTER(math_fft, CONFIG_SOF_LOG_LEVEL);
 SOF_DEFINE_REG_UUID(math_fft);
@@ -80,6 +82,37 @@ void fft_plan_init_bit_reverse(uint16_t *bit_reverse_idx, int size, int len)
 		bit_reverse_idx[i] = (bit_reverse_idx[i >> 1] >> 1) | ((i & 1) << (len - 1));
 }
 
+void *fft_plan_allocate_twiddle(struct processing_module *mod, int size, int bits)
+{
+	int twiddle_size = ((bits == 32) ? sizeof(int32_t) : sizeof(int16_t)) * 2 * size;
+
+	return mod_alloc_align(mod, twiddle_size, 2 * sizeof(int32_t));
+}
+
+void fft_plan_init_twiddle(void *twiddle, int size, int bits)
+{
+	int32_t *twiddle32;
+	int16_t *twiddle16;
+	int k = FFT_SIZE_MAX / size;
+	int i, j;
+
+	if (bits == 32) {
+		twiddle32 = twiddle;
+		for (i = 0; i < size; i++) {
+			j = i * k;
+			*twiddle32++ = twiddle_real_32[j];
+			*twiddle32++ = twiddle_imag_32[j];
+		}
+	} else {
+		twiddle16 = twiddle;
+		for (i = 0; i < size; i++) {
+			j = i * k;
+			*twiddle16++ = twiddle_real_16[j];
+			*twiddle16++ = twiddle_imag_16[j];
+		}
+	}
+}
+
 struct fft_plan *mod_fft_plan_new(struct processing_module *mod, void *inb,
 				  void *outb, uint32_t size, int bits)
 {
@@ -97,12 +130,29 @@ struct fft_plan *mod_fft_plan_new(struct processing_module *mod, void *inb,
 	plan->bit_reverse_idx = mod_zalloc(mod,	plan->size * sizeof(uint16_t));
 	if (!plan->bit_reverse_idx) {
 		comp_cl_err(mod->dev, "Failed to allocate bit reverse table.");
-		mod_free(mod, plan);
-		return NULL;
+		goto err;
 	}
 
 	fft_plan_init_bit_reverse(plan->bit_reverse_idx, plan->size, plan->len);
+
+	/* Allocate memory for packed twiddle factors */
+	plan->twiddle = fft_plan_allocate_twiddle(mod, size, bits);
+	if (!plan->twiddle) {
+		comp_cl_err(mod->dev, "Failed to allocate twiddle factors.");
+		goto err_free_bit_reverse;
+	}
+
+	/* Pack twiddle factors from sparse real and image to complex pairs */
+	fft_plan_init_twiddle(plan->twiddle, size, bits);
+
 	return plan;
+
+err_free_bit_reverse:
+	mod_free(mod, plan->bit_reverse_idx);
+
+err:
+	mod_free(mod, plan);
+	return NULL;
 }
 
 void mod_fft_plan_free(struct processing_module *mod, struct fft_plan *plan)
diff --git a/src/math/fft/fft_common.h b/src/math/fft/fft_common.h
index 9ea9998073ca..addaf57819de 100644
--- a/src/math/fft/fft_common.h
+++ b/src/math/fft/fft_common.h
@@ -4,6 +4,15 @@
 //
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
+#ifndef __SOF_FFT_COMMON_H__
+#define __SOF_FFT_COMMON_H__
+
+#if CONFIG_MATH_FFT_COLD_TWIDDLE_FACTORS
+#define SOF_MATH_FFT_COLD_RODATA __cold_rodata
+#else
+#define SOF_MATH_FFT_COLD_RODATA
+#endif
+
 /**
  * fft_plan_common_new() - Common FFT prepare function
  * @param mod: Pointer to module
@@ -23,3 +32,22 @@ struct fft_plan *fft_plan_common_new(struct processing_module *mod, void *inb,
  * @param len: Power of two value equals FFT size
  */
 void fft_plan_init_bit_reverse(uint16_t *bit_reverse_idx, int size, int len);
+
+/**
+ * fft_plan_allocate_twiddle - Allocates memory for twiddle factors
+ * @param mod: pointer to module
+ * @param size: size of FFT
+ * @param bits: word length of FFT, 32 or 16
+ * @return Pointer to allocated memory
+ */
+void *fft_plan_allocate_twiddle(struct processing_module *mod, int size, int bits);
+
+/**
+ * fft_plan_init_twiddle - sets up a decimated and re-arranged twiddle factors tabled
+ * @param twiddle: pointer to destination twiddle factors
+ * @param size: size of FFT
+ * @param bits: word length of FFT, 32 or 16
+ */
+void fft_plan_init_twiddle(void *twiddle, int size, int bits);
+
+#endif /* __SOF_FFT_COMMON_H__ */
diff --git a/src/math/fft/fft_multi.c b/src/math/fft/fft_multi.c
index 4a4f66e42872..5986af6597c4 100644
--- a/src/math/fft/fft_multi.c
+++ b/src/math/fft/fft_multi.c
@@ -24,6 +24,23 @@ DECLARE_TR_CTX(math_fft_multi_tr, SOF_UUID(math_fft_multi_uuid), LOG_LEVEL_INFO)
 #define DFT3_COEFI	1859775393	/* int32(sqrt(3) / 2 * 2^31) */
 #define DFT3_SCALE	715827883	/* int32(1/3*2^31) */
 
+static void init_multi_fft_twiddle(struct fft_multi_plan *plan)
+{
+	int32_t *twiddle;
+	int i, j, k, m;
+
+	/* Copy The twiddle factors to usage order */
+	m = FFT_MULTI_TWIDDLE_SIZE / 2 / plan->fft_size;
+	twiddle = plan->multi_twiddle;
+	for (j = 1; j < plan->num_ffts; j++) {
+		for (i = 0; i < plan->fft_size; i++) {
+			k = j * i * m;
+			*twiddle++ = multi_twiddle_real_32[k];
+			*twiddle++ = multi_twiddle_imag_32[k];
+		}
+	}
+}
+
 struct fft_multi_plan *mod_fft_multi_plan_new(struct processing_module *mod, void *inb,
 					      void *outb, uint32_t size, int bits)
 {
@@ -32,6 +49,11 @@ struct fft_multi_plan *mod_fft_multi_plan_new(struct processing_module *mod, voi
 	const int size_div3 = size / 3;
 	int i;
 
+	if (bits != 32) {
+		comp_cl_err(mod->dev, "Not supported word length %d", bits);
+		return NULL;
+	}
+
 	if (!inb || !outb) {
 		comp_cl_err(mod->dev, "Null buffers");
 		return NULL;
@@ -46,6 +68,9 @@ struct fft_multi_plan *mod_fft_multi_plan_new(struct processing_module *mod, voi
 	if (!plan)
 		return NULL;
 
+	plan->inb32 = inb;
+	plan->outb32 = outb;
+
 	if (is_power_of_2(size)) {
 		plan->num_ffts = 1;
 	} else if (size_div3 * 3 == size) {
@@ -69,55 +94,72 @@ struct fft_multi_plan *mod_fft_multi_plan_new(struct processing_module *mod, voi
 		goto err;
 	}
 
-	switch (bits) {
-	case 32:
-		plan->inb32 = inb;
-		plan->outb32 = outb;
-
-		if (plan->num_ffts > 1) {
-			/* Allocate input/output buffers for FFTs */
-			tmp_size = 2 * plan->num_ffts * plan->fft_size * sizeof(struct icomplex32);
-			plan->tmp_i32[0] = mod_balloc(mod, tmp_size);
-			if (!plan->tmp_i32[0]) {
-				comp_cl_err(mod->dev, "Failed to allocate FFT buffers");
-				goto err_free_bit_reverse;
-			}
-
-			/* Set up buffers */
-			plan->tmp_o32[0] = plan->tmp_i32[0] + plan->fft_size;
-			for (i = 1; i < plan->num_ffts; i++) {
-				plan->tmp_i32[i] = plan->tmp_o32[i - 1] + plan->fft_size;
-				plan->tmp_o32[i] = plan->tmp_i32[i] + plan->fft_size;
-			}
-		} else {
-			plan->tmp_i32[0] = inb;
-			plan->tmp_o32[0] = outb;
-		}
+	/* Allocate twiddle factors, 2x int32_t for real and complex */
+	plan->multi_twiddle = mod_alloc_align(mod, size * sizeof(int64_t), sizeof(int64_t));
+	if (!plan->multi_twiddle) {
+		comp_cl_err(mod->dev, "Failed to allocate twiddle factors buffer");
+		goto err_free_bit_reverse;
+	}
 
-		for (i = 0; i < plan->num_ffts; i++) {
-			plan->fft_plan[i] = fft_plan_common_new(mod,
-								plan->tmp_i32[i],
-								plan->tmp_o32[i],
-								plan->fft_size, 32);
-			if (!plan->fft_plan[i])
-				goto err_free_buffer;
+	init_multi_fft_twiddle(plan);
 
-			plan->fft_plan[i]->bit_reverse_idx = plan->bit_reverse_idx;
+	/* Allocate memory for packed twiddle factors */
+	plan->shared_twiddle = fft_plan_allocate_twiddle(mod, plan->fft_size, bits);
+	if (!plan->shared_twiddle) {
+		comp_cl_err(mod->dev, "Failed to allocate twiddle factors.");
+		goto err_free_multi_twiddle;
+	}
+
+	/* Pack twiddle factors from sparse real and image to complex pairs */
+	fft_plan_init_twiddle(plan->shared_twiddle, plan->fft_size, bits);
+
+	if (plan->num_ffts > 1) {
+		/* Allocate input/output buffers for FFTs */
+		tmp_size = 2 * plan->num_ffts * plan->fft_size * sizeof(struct icomplex32);
+		plan->tmp_i32[0] = mod_balloc(mod, tmp_size);
+		if (!plan->tmp_i32[0]) {
+			comp_cl_err(mod->dev, "Failed to allocate FFT buffers");
+			goto err_free_shared_twiddle;
 		}
-		break;
-	default:
-		comp_cl_err(mod->dev, "Not supported word length %d", bits);
-		goto err;
+
+		/* Set up buffers */
+		plan->tmp_o32[0] = plan->tmp_i32[0] + plan->fft_size;
+		for (i = 1; i < plan->num_ffts; i++) {
+			plan->tmp_i32[i] = plan->tmp_o32[i - 1] + plan->fft_size;
+			plan->tmp_o32[i] = plan->tmp_i32[i] + plan->fft_size;
+		}
+	} else {
+		plan->tmp_i32[0] = inb;
+		plan->tmp_o32[0] = outb;
+	}
+
+	for (i = 0; i < plan->num_ffts; i++) {
+		plan->fft_plan[i] = fft_plan_common_new(mod,
+							plan->tmp_i32[i],
+							plan->tmp_o32[i],
+							plan->fft_size, 32);
+		if (!plan->fft_plan[i])
+			goto err_free_buffer;
+
+		plan->fft_plan[i]->bit_reverse_idx = plan->bit_reverse_idx;
+		plan->fft_plan[i]->twiddle = plan->shared_twiddle;
 	}
 
 	/* Set up common bit index reverse table */
 	fft_plan_init_bit_reverse(plan->bit_reverse_idx, plan->fft_plan[0]->size,
 				  plan->fft_plan[0]->len);
+
 	return plan;
 
 err_free_buffer:
 	mod_free(mod, plan->tmp_i32[0]);
 
+err_free_shared_twiddle:
+	mod_free(mod, plan->shared_twiddle);
+
+err_free_multi_twiddle:
+	mod_free(mod, plan->multi_twiddle);
+
 err_free_bit_reverse:
 	mod_free(mod, plan->bit_reverse_idx);
 
@@ -141,6 +183,7 @@ void mod_fft_multi_plan_free(struct processing_module *mod, struct fft_multi_pla
 		mod_free(mod, plan->tmp_i32[0]);
 
 	mod_free(mod, plan->bit_reverse_idx);
+	mod_free(mod, plan->multi_twiddle);
 	mod_free(mod, plan);
 }
 
@@ -188,8 +231,9 @@ void fft_multi_execute_32(struct fft_multi_plan *plan, bool ifft)
 {
 	struct icomplex32 x[FFT_MULTI_COUNT_MAX];
 	struct icomplex32 y[FFT_MULTI_COUNT_MAX];
-	struct icomplex32 t, c;
-	int i, j, k, m;
+	struct icomplex32 c;
+	struct icomplex32 *t;
+	int i, j, k;
 
 	/* Handle 2^N FFT */
 	if (plan->num_ffts == 1) {
@@ -230,15 +274,12 @@ void fft_multi_execute_32(struct fft_multi_plan *plan, bool ifft)
 #endif
 
 	/* Multiply with twiddle factors */
-	m = FFT_MULTI_TWIDDLE_SIZE / 2 / plan->fft_size;
+	t = (struct icomplex32 *)plan->multi_twiddle;
 	for (j = 1; j < plan->num_ffts; j++) {
 		for (i = 0; i < plan->fft_size; i++) {
 			c = plan->tmp_o32[j][i];
-			k = j * i * m;
-			t.real = multi_twiddle_real_32[k];
-			t.imag = multi_twiddle_imag_32[k];
-			//fprintf(fh3, "%d %d\n", t.real, t.imag);
-			icomplex32_mul(&t, &c, &plan->tmp_o32[j][i]);
+			icomplex32_mul(t, &c, &plan->tmp_o32[j][i]);
+			t++;
 		}
 	}