From 75dc55dbd92d24a5211b143fc8ded8deb2c08fe6 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Wed, 19 Nov 2025 09:23:56 +0100
Subject: [PATCH 01/11] dp: call module_is_ready_to_process() from DP thread
 context

module_is_ready_to_process() can call the .is_ready_to_process()
module method, therefore it can only be called from the DP thread
context itself, not from the scheduler.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/schedule/zephyr_dp_schedule_application.c | 60 ++++++++++---------
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/src/schedule/zephyr_dp_schedule_application.c b/src/schedule/zephyr_dp_schedule_application.c
index f5b53d426b01..d41a5b47ca7e 100644
--- a/src/schedule/zephyr_dp_schedule_application.c
+++ b/src/schedule/zephyr_dp_schedule_application.c
@@ -46,28 +46,12 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 		}
 
 		if (curr_task->state == SOF_TASK_STATE_QUEUED) {
-			bool mod_ready;
-
-			mod_ready = module_is_ready_to_process(mod, mod->sources,
-							       mod->num_of_sources,
-							       mod->sinks,
-							       mod->num_of_sinks);
-			if (mod_ready) {
-				/* trigger the task */
-				curr_task->state = SOF_TASK_STATE_RUNNING;
-				if (mod->dp_startup_delay && !pdata->ll_cycles_to_start) {
-					/* first time run - use delayed start */
-					pdata->ll_cycles_to_start =
-						module_get_lpt(pdata->mod) / LL_TIMER_PERIOD_US;
-
-					/* in case LPT < LL cycle - delay at least cycle */
-					if (!pdata->ll_cycles_to_start)
-						pdata->ll_cycles_to_start = 1;
-				}
-				trigger_task = true;
-				k_event_post(pdata->event, DP_TASK_EVENT_PROCESS);
-			}
+			/* trigger the task */
+			curr_task->state = SOF_TASK_STATE_RUNNING;
+			trigger_task = true;
+			k_event_post(pdata->event, DP_TASK_EVENT_PROCESS);
 		}
+
 		if (curr_task->state == SOF_TASK_STATE_RUNNING) {
 			/* (re) calculate deadline for all running tasks */
 			/* get module deadline in us*/
@@ -103,13 +87,15 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 void dp_thread_fn(void *p1, void *p2, void *p3)
 {
 	struct task *task = p1;
-	(void)p2;
-	(void)p3;
 	struct task_dp_pdata *task_pdata = task->priv_data;
+	struct processing_module *pmod = task_pdata->mod;
 	unsigned int lock_key;
 	enum task_state state;
 	bool task_stop;
 
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
 	do {
 		/*
 		 * the thread is started immediately after creation, it will stop on event.
@@ -118,17 +104,37 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 		k_event_wait_safe(task_pdata->event, DP_TASK_EVENT_PROCESS | DP_TASK_EVENT_CANCEL,
 				  false, K_FOREVER);
 
-		if (task->state == SOF_TASK_STATE_RUNNING)
-			state = task_run(task);
-		else
+		bool ready;
+
+		if (task->state == SOF_TASK_STATE_RUNNING) {
+			ready = module_is_ready_to_process(pmod, pmod->sources,
+							   pmod->num_of_sources,
+							   pmod->sinks, pmod->num_of_sinks);
+		} else {
 			state = task->state;	/* to avoid undefined variable warning */
+			ready = false;
+		}
+
+		if (ready) {
+			if (pmod->dp_startup_delay && !task_pdata->ll_cycles_to_start) {
+				/* first time run - use delayed start */
+				task_pdata->ll_cycles_to_start =
+					module_get_lpt(pmod) / LL_TIMER_PERIOD_US;
+
+				/* in case LPT < LL cycle - delay at least cycle */
+				if (!task_pdata->ll_cycles_to_start)
+					task_pdata->ll_cycles_to_start = 1;
+			}
+
+			state = task_run(task);
+		}
 
 		lock_key = scheduler_dp_lock(task->core);
 		/*
 		 * check if task is still running, may have been canceled by external call
 		 * if not, set the state returned by run procedure
 		 */
-		if (task->state == SOF_TASK_STATE_RUNNING) {
+		if (ready && task->state == SOF_TASK_STATE_RUNNING) {
 			task->state = state;
 			switch (state) {
 			case SOF_TASK_STATE_RESCHEDULE:

From ceca79cda1e03639c96c5e3934aff18631a67177 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Wed, 19 Nov 2025 15:35:12 +0100
Subject: [PATCH 02/11] dp: application: move IPC execution to the thread

With the application DP version IPCs should be processed on the thread
itself. Prepare the scheduler for the transition.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/include/sof/schedule/dp_schedule.h        |  39 +++
 src/schedule/zephyr_dp_schedule_application.c | 288 +++++++++++++++---
 2 files changed, 279 insertions(+), 48 deletions(-)

diff --git a/src/include/sof/schedule/dp_schedule.h b/src/include/sof/schedule/dp_schedule.h
index 99f1dcd2b16f..5b42a4282896 100644
--- a/src/include/sof/schedule/dp_schedule.h
+++ b/src/include/sof/schedule/dp_schedule.h
@@ -13,6 +13,8 @@
 #include <user/trace.h>
 #include <stdint.h>
 #include <ipc4/base_fw.h>
+#include <ipc4/module.h>
+#include <ipc4/pipeline.h>
 
 struct processing_module;
 
@@ -89,6 +91,43 @@ void scheduler_get_task_info_dp(struct scheduler_props *scheduler_props,
 enum {
 	DP_TASK_EVENT_PROCESS	= BIT(0),	/* Need to process data */
 	DP_TASK_EVENT_CANCEL	= BIT(1),	/* Thread cancellation */
+	DP_TASK_EVENT_IPC	= BIT(2),	/* IPC message */
 };
 
+struct bind_info;
+struct sof_source;
+struct sof_sink;
+/*
+ * Keeps the scheduler_dp_thread_ipc() flow simple - just one call that does all
+ * the IPC-message specific parameter packing internally. This is slightly
+ * suboptimal because IPC parameters first have to be collected in this
+ * structure and then packed in DP-accessible memory inside
+ * scheduler_dp_thread_ipc(). This could be split into two levels, by adding
+ * IPC-specific functions like ipc_flatten_pipeline_state() and similar, but
+ * that would add multiple functions to the API.
+ */
+union scheduler_dp_thread_ipc_param {
+	struct bind_info *bind_data;
+	struct {
+		unsigned int trigger_cmd;
+		enum ipc4_pipeline_state state;
+		int n_sources;
+		struct sof_source **sources;
+		int n_sinks;
+		struct sof_sink **sinks;
+	} pipeline_state;
+};
+
+#if CONFIG_ZEPHYR_DP_SCHEDULER
+int scheduler_dp_thread_ipc(struct processing_module *pmod, enum sof_ipc4_module_type cmd,
+			    const union scheduler_dp_thread_ipc_param *param);
+#else
+static inline int scheduler_dp_thread_ipc(struct processing_module *pmod,
+					  enum sof_ipc4_module_type cmd,
+					  const union scheduler_dp_thread_ipc_param *param)
+{
+	return 0;
+}
+#endif
+
 #endif /* __SOF_SCHEDULE_DP_SCHEDULE_H__ */
diff --git a/src/schedule/zephyr_dp_schedule_application.c b/src/schedule/zephyr_dp_schedule_application.c
index d41a5b47ca7e..2c1f158b2f9f 100644
--- a/src/schedule/zephyr_dp_schedule_application.c
+++ b/src/schedule/zephyr_dp_schedule_application.c
@@ -10,8 +10,8 @@
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/common.h>
 #include <sof/list.h>
-#include <sof/schedule/ll_schedule_domain.h>
 #include <sof/schedule/dp_schedule.h>
+#include <sof/schedule/ll_schedule_domain.h>
 
 #include <zephyr/kernel.h>
 
@@ -20,6 +20,179 @@
 
 #include "zephyr_dp_schedule.h"
 
+LOG_MODULE_DECLARE(dp_schedule, CONFIG_SOF_LOG_LEVEL);
+extern struct tr_ctx dp_tr;
+
+/* Synchronization semaphore for the scheduler thread to wait for DP startup */
+#define DP_SYNC_INIT(i, _)	Z_SEM_INITIALIZER(dp_sync[i], 0, 1)
+#define DP_SYNC_INIT_LIST	LISTIFY(CONFIG_CORE_COUNT, DP_SYNC_INIT, (,))
+static STRUCT_SECTION_ITERABLE_ARRAY(k_sem, dp_sync, CONFIG_CORE_COUNT) = { DP_SYNC_INIT_LIST };
+
+/* TODO: make this a shared kernel->module buffer for IPC parameters */
+static uint8_t ipc_buf[4096];
+
+struct ipc4_flat {
+	unsigned int cmd;
+	int ret;
+	union {
+		struct {
+			struct ipc4_module_bind_unbind bu;
+			enum bind_type type;
+		} bind;
+		struct {
+			unsigned int trigger_cmd;
+			enum ipc4_pipeline_state state;
+			int n_sources;
+			int n_sinks;
+			void *source_sink[];
+		} pipeline_state;
+	};
+};
+
+/* Pack IPC input data */
+static int ipc_thread_flatten(unsigned int cmd, const union scheduler_dp_thread_ipc_param *param,
+			      struct ipc4_flat *flat)
+{
+	flat->cmd = cmd;
+
+	/*
+	 * FIXME: SOF_IPC4_MOD_* and SOF_IPC4_GLB_* aren't fully orthogonal, but
+	 * so far none of the used ones overlap
+	 */
+	switch (cmd) {
+	case SOF_IPC4_MOD_BIND:
+	case SOF_IPC4_MOD_UNBIND:
+		flat->bind.bu = *param->bind_data->ipc4_data;
+		flat->bind.type = param->bind_data->bind_type;
+		break;
+	case SOF_IPC4_GLB_SET_PIPELINE_STATE:
+		flat->pipeline_state.trigger_cmd = param->pipeline_state.trigger_cmd;
+		switch (param->pipeline_state.trigger_cmd) {
+		case COMP_TRIGGER_STOP:
+			break;
+		case COMP_TRIGGER_PREPARE:
+			if (sizeof(flat->cmd) + sizeof(flat->ret) + sizeof(flat->pipeline_state) +
+			    sizeof(void *) * (param->pipeline_state.n_sources +
+					      param->pipeline_state.n_sinks) >
+			    sizeof(ipc_buf))
+				return -ENOMEM;
+
+			flat->pipeline_state.state = param->pipeline_state.state;
+			flat->pipeline_state.n_sources = param->pipeline_state.n_sources;
+			flat->pipeline_state.n_sinks = param->pipeline_state.n_sinks;
+			memcpy(flat->pipeline_state.source_sink, param->pipeline_state.sources,
+			       flat->pipeline_state.n_sources *
+			       sizeof(flat->pipeline_state.source_sink[0]));
+			memcpy(flat->pipeline_state.source_sink + flat->pipeline_state.n_sources,
+			       param->pipeline_state.sinks,
+			       flat->pipeline_state.n_sinks *
+			       sizeof(flat->pipeline_state.source_sink[0]));
+		}
+	}
+
+	return 0;
+}
+
+/* Unpack IPC data and execute a callback */
+static void ipc_thread_unflatten_run(struct processing_module *pmod, struct ipc4_flat *flat)
+{
+	const struct module_interface *const ops = pmod->dev->drv->adapter_ops;
+
+	switch (flat->cmd) {
+	case SOF_IPC4_MOD_BIND:
+		if (ops->bind) {
+			struct bind_info bind_data = {
+				.ipc4_data = &flat->bind.bu,
+				.bind_type = flat->bind.type,
+			};
+
+			flat->ret = ops->bind(pmod, &bind_data);
+		} else {
+			flat->ret = 0;
+		}
+		break;
+	case SOF_IPC4_MOD_UNBIND:
+		if (ops->unbind) {
+			struct bind_info bind_data = {
+				.ipc4_data = &flat->bind.bu,
+				.bind_type = flat->bind.type,
+			};
+
+			flat->ret = ops->unbind(pmod, &bind_data);
+		} else {
+			flat->ret = 0;
+		}
+		break;
+	case SOF_IPC4_MOD_DELETE_INSTANCE:
+		flat->ret = ops->free(pmod);
+		break;
+	case SOF_IPC4_MOD_INIT_INSTANCE:
+		flat->ret = ops->init(pmod);
+		break;
+	case SOF_IPC4_GLB_SET_PIPELINE_STATE:
+		switch (flat->pipeline_state.trigger_cmd) {
+		case COMP_TRIGGER_STOP:
+			flat->ret = ops->reset(pmod);
+			break;
+		case COMP_TRIGGER_PREPARE:
+			flat->ret = ops->prepare(pmod,
+				(struct sof_source **)flat->pipeline_state.source_sink,
+				flat->pipeline_state.n_sources,
+				(struct sof_sink **)(flat->pipeline_state.source_sink +
+						     flat->pipeline_state.n_sources),
+				flat->pipeline_state.n_sinks);
+		}
+	}
+}
+
+#define DP_THREAD_IPC_TIMEOUT K_MSEC(100)
+
+/* Signal an IPC and wait for processing completion */
+int scheduler_dp_thread_ipc(struct processing_module *pmod, enum sof_ipc4_module_type cmd,
+			    const union scheduler_dp_thread_ipc_param *param)
+{
+	struct task_dp_pdata *pdata = pmod->dev->task->priv_data;
+	int ret;
+
+	if (!pmod) {
+		tr_err(&dp_tr, "no thread module");
+		return -EINVAL;
+	}
+
+	if (cmd == SOF_IPC4_MOD_INIT_INSTANCE) {
+		/* Wait for the DP thread to start */
+		ret = k_sem_take(&dp_sync[pmod->dev->task->core], DP_THREAD_IPC_TIMEOUT);
+		if (ret < 0) {
+			tr_err(&dp_tr, "Failed waiting for DP thread to start: %d", ret);
+			return ret;
+		}
+	}
+
+	unsigned int lock_key = scheduler_dp_lock(pmod->dev->task->core);
+
+	struct ipc4_flat *flat = (struct ipc4_flat *)ipc_buf;
+
+	/* IPCs are serialised */
+	flat->ret = -ENOSYS;
+
+	ret = ipc_thread_flatten(cmd, param, flat);
+	if (!ret)
+		k_event_post(pdata->event, DP_TASK_EVENT_IPC);
+
+	scheduler_dp_unlock(lock_key);
+
+	if (!ret) {
+		/* Wait for completion */
+		ret = k_sem_take(&dp_sync[cpu_get_id()], DP_THREAD_IPC_TIMEOUT);
+		if (ret < 0)
+			tr_err(&dp_tr, "Failed waiting for DP thread: %d", ret);
+		else
+			ret = flat->ret;
+	}
+
+	return ret;
+}
+
 /* Go through all DP tasks and recalculate their readiness and deadlines
  * NOT REENTRANT, should be called with scheduler_dp_lock()
  */
@@ -45,7 +218,8 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 				mod->dp_startup_delay = false;
 		}
 
-		if (curr_task->state == SOF_TASK_STATE_QUEUED) {
+		if (curr_task->state == SOF_TASK_STATE_QUEUED &&
+		    mod->dev->state >= COMP_STATE_ACTIVE) {
 			/* trigger the task */
 			curr_task->state = SOF_TASK_STATE_RUNNING;
 			trigger_task = true;
@@ -54,7 +228,7 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 
 		if (curr_task->state == SOF_TASK_STATE_RUNNING) {
 			/* (re) calculate deadline for all running tasks */
-			/* get module deadline in us*/
+			/* get module deadline in us */
 			uint32_t deadline = module_get_deadline(mod);
 
 			/* if a deadline cannot be calculated, use a fixed value relative to its
@@ -96,62 +270,80 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	ARG_UNUSED(p2);
 	ARG_UNUSED(p3);
 
+	/*
+	 * The IPC thread is waiting for the thread to be
+	 * started, it can proceed now.
+	 */
+	k_sem_give(&dp_sync[task->core]);
+
 	do {
 		/*
-		 * the thread is started immediately after creation, it will stop on event.
-		 * Event will be signalled once the task is ready to process.
+		 * The thread is started immediately after creation, it stops on event.
+		 * An event is signalled to handle IPC or process audio data.
 		 */
-		k_event_wait_safe(task_pdata->event, DP_TASK_EVENT_PROCESS | DP_TASK_EVENT_CANCEL,
-				  false, K_FOREVER);
+		uint32_t mask = k_event_wait_safe(task_pdata->event,
+						  DP_TASK_EVENT_PROCESS | DP_TASK_EVENT_CANCEL |
+						  DP_TASK_EVENT_IPC, false, K_FOREVER);
 
-		bool ready;
-
-		if (task->state == SOF_TASK_STATE_RUNNING) {
-			ready = module_is_ready_to_process(pmod, pmod->sources,
-							   pmod->num_of_sources,
-							   pmod->sinks, pmod->num_of_sinks);
-		} else {
-			state = task->state;	/* to avoid undefined variable warning */
-			ready = false;
+		if (mask & DP_TASK_EVENT_IPC) {
+			/* handle IPC */
+			tr_dbg(&dp_tr, "got IPC wake up for %p state %d", pmod, task->state);
+			ipc_thread_unflatten_run(pmod, (struct ipc4_flat *)ipc_buf);
+			k_sem_give(&dp_sync[task->core]);
 		}
 
-		if (ready) {
-			if (pmod->dp_startup_delay && !task_pdata->ll_cycles_to_start) {
-				/* first time run - use delayed start */
-				task_pdata->ll_cycles_to_start =
-					module_get_lpt(pmod) / LL_TIMER_PERIOD_US;
+		if (mask & DP_TASK_EVENT_PROCESS) {
+			bool ready;
 
-				/* in case LPT < LL cycle - delay at least cycle */
-				if (!task_pdata->ll_cycles_to_start)
-					task_pdata->ll_cycles_to_start = 1;
+			if (task->state == SOF_TASK_STATE_RUNNING) {
+				ready = module_is_ready_to_process(pmod, pmod->sources,
+								   pmod->num_of_sources,
+								   pmod->sinks, pmod->num_of_sinks);
+			} else {
+				state = task->state;	/* to avoid undefined variable warning */
+				ready = false;
 			}
 
-			state = task_run(task);
-		}
+			if (ready) {
+				if (pmod->dp_startup_delay && !task_pdata->ll_cycles_to_start) {
+					/* first time run - use delayed start */
+					task_pdata->ll_cycles_to_start =
+						module_get_lpt(pmod) / LL_TIMER_PERIOD_US;
 
-		lock_key = scheduler_dp_lock(task->core);
-		/*
-		 * check if task is still running, may have been canceled by external call
-		 * if not, set the state returned by run procedure
-		 */
-		if (ready && task->state == SOF_TASK_STATE_RUNNING) {
-			task->state = state;
-			switch (state) {
-			case SOF_TASK_STATE_RESCHEDULE:
-				/* mark to reschedule, schedule time is already calculated */
-				task->state = SOF_TASK_STATE_QUEUED;
-				break;
-
-			case SOF_TASK_STATE_CANCEL:
-			case SOF_TASK_STATE_COMPLETED:
-				/* remove from scheduling */
-				list_item_del(&task->list);
-				break;
-
-			default:
-				/* illegal state, serious defect, won't happen */
-				k_panic();
+					/* in case LPT < LL cycle - delay at least cycle */
+					if (!task_pdata->ll_cycles_to_start)
+						task_pdata->ll_cycles_to_start = 1;
+				}
+
+				state = task_run(task);
 			}
+
+			lock_key = scheduler_dp_lock(task->core);
+			/*
+			 * check if task is still running, may have been canceled by external call
+			 * if not, set the state returned by run procedure
+			 */
+			if (ready && task->state == SOF_TASK_STATE_RUNNING) {
+				task->state = state;
+				switch (state) {
+				case SOF_TASK_STATE_RESCHEDULE:
+					/* mark to reschedule, schedule time is already calculated */
+					task->state = SOF_TASK_STATE_QUEUED;
+					break;
+
+				case SOF_TASK_STATE_CANCEL:
+				case SOF_TASK_STATE_COMPLETED:
+					/* remove from scheduling */
+					list_item_del(&task->list);
+					break;
+
+				default:
+					/* illegal state, serious defect, won't happen */
+					k_oops();
+				}
+			}
+		} else {
+			lock_key = scheduler_dp_lock(task->core);
 		}
 
 		/* if true exit the while loop, terminate the thread */

From 5950bf8a85fc9053198014be4d19bd7101395734 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Thu, 2 Oct 2025 17:14:11 +0200
Subject: [PATCH 03/11] module-adapter: switch to DP signaling

Instead of calling module hooks inline signal the DP thread to call
them in the thread context in DP-scheduled module case.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/audio/module_adapter/module/generic.c | 85 +++++++++++++++++++----
 src/audio/module_adapter/module_adapter.c | 27 ++++++-
 src/include/sof/audio/component_ext.h     | 22 +++---
 3 files changed, 112 insertions(+), 22 deletions(-)

diff --git a/src/audio/module_adapter/module/generic.c b/src/audio/module_adapter/module/generic.c
index 5b8eb8e43ee6..b9afa8d9d62c 100644
--- a/src/audio/module_adapter/module/generic.c
+++ b/src/audio/module_adapter/module/generic.c
@@ -12,15 +12,22 @@
  */
 
 #include <rtos/symbol.h>
-
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/audio/data_blob.h>
 #include <sof/lib/fast-get.h>
+#include <sof/schedule/dp_schedule.h>
+#if CONFIG_IPC_MAJOR_4
+#include <ipc4/header.h>
+#include <ipc4/module.h>
+#include <ipc4/pipeline.h>
+#endif
 
 /* The __ZEPHYR__ condition is to keep cmocka tests working */
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
-#define MEM_API_CHECK_THREAD(res) __ASSERT((res)->rsrc_mngr == k_current_get(), \
-		"Module memory API operation from wrong thread")
+#define MEM_API_CHECK_THREAD(res) do { \
+	if ((res)->rsrc_mngr != k_current_get()) \
+		LOG_WRN("mngr %p != cur %p", (res)->rsrc_mngr, k_current_get()); \
+} while (0)
 #else
 #define MEM_API_CHECK_THREAD(res)
 #endif
@@ -114,7 +121,13 @@ int module_init(struct processing_module *mod)
 	mod->priv.resources.rsrc_mngr = k_current_get();
 #endif
 	/* Now we can proceed with module specific initialization */
-	ret = interface->init(mod);
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP)
+		ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_INIT_INSTANCE, NULL);
+	else
+#endif
+		ret = interface->init(mod);
+
 	if (ret) {
 		comp_err(dev, "error %d: module specific init failed", ret);
 		mod_free_all(mod);
@@ -433,7 +446,24 @@ int module_prepare(struct processing_module *mod,
 		return -EPERM;
 #endif
 	if (ops->prepare) {
-		int ret = ops->prepare(mod, sources, num_of_sources, sinks, num_of_sinks);
+		int ret;
+
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state = {
+					.trigger_cmd = COMP_TRIGGER_PREPARE,
+					.state = SOF_IPC4_PIPELINE_STATE_RUNNING,
+					.n_sources = num_of_sources,
+					.sources = sources,
+					.n_sinks = num_of_sinks,
+					.sinks = sinks,
+				},
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE, &param);
+		} else
+#endif
+			ret = ops->prepare(mod, sources, num_of_sources, sinks, num_of_sinks);
 
 		if (ret) {
 			comp_err(dev, "error %d: module specific prepare failed", ret);
@@ -552,11 +582,23 @@ int module_reset(struct processing_module *mod)
 	if (md->state < MODULE_IDLE)
 		return 0;
 #endif
+
 	/* cancel task if DP task*/
-	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP && mod->dev->task)
+	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP && mod->dev->task &&
+	    (IS_ENABLED(CONFIG_SOF_USERSPACE_PROXY) || !IS_ENABLED(CONFIG_USERSPACE)))
 		schedule_task_cancel(mod->dev->task);
+
 	if (ops->reset) {
-		ret = ops->reset(mod);
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state.trigger_cmd = COMP_TRIGGER_STOP,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE, &param);
+		} else
+#endif
+			ret = ops->reset(mod);
+
 		if (ret) {
 			if (ret != PPL_STATUS_PATH_STOP)
 				comp_err(mod->dev,
@@ -627,7 +669,8 @@ int module_free(struct processing_module *mod)
 	struct module_data *md = &mod->priv;
 	int ret = 0;
 
-	if (ops->free) {
+	if (ops->free && (mod->dev->ipc_config.proc_domain != COMP_PROCESSING_DOMAIN_DP ||
+			  IS_ENABLED(CONFIG_SOF_USERSPACE_PROXY) || !IS_ENABLED(CONFIG_USERSPACE))) {
 		ret = ops->free(mod);
 		if (ret)
 			comp_warn(mod->dev, "error: %d", ret);
@@ -772,8 +815,17 @@ int module_bind(struct processing_module *mod, struct bind_info *bind_data)
 	if (ret)
 		return ret;
 
-	if (ops->bind)
-		ret = ops->bind(mod, bind_data);
+	if (ops->bind) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.bind_data = bind_data,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_BIND, &param);
+		} else
+#endif
+			ret = ops->bind(mod, bind_data);
+	}
 
 	return ret;
 }
@@ -796,8 +848,17 @@ int module_unbind(struct processing_module *mod, struct bind_info *unbind_data)
 	if (ret)
 		return ret;
 
-	if (ops->unbind)
-		ret = ops->unbind(mod, unbind_data);
+	if (ops->unbind) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.bind_data = unbind_data,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_UNBIND, &param);
+		} else
+#endif
+			ret = ops->unbind(mod, unbind_data);
+	}
 
 	return ret;
 }
diff --git a/src/audio/module_adapter/module_adapter.c b/src/audio/module_adapter/module_adapter.c
index 6d63fb561d01..6c72f3f7228f 100644
--- a/src/audio/module_adapter/module_adapter.c
+++ b/src/audio/module_adapter/module_adapter.c
@@ -18,10 +18,16 @@
 #include <sof/audio/source_api.h>
 #include <sof/audio/audio_buffer.h>
 #include <sof/audio/pipeline.h>
+#include <sof/schedule/dp_schedule.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <sof/common.h>
 #include <sof/platform.h>
 #include <sof/ut.h>
+#if CONFIG_IPC_MAJOR_4
+#include <ipc4/base_fw.h>
+#include <ipc4/header.h>
+#include <ipc4/module.h>
+#endif
 #include <rtos/interrupt.h>
 #include <rtos/kernel.h>
 #include <rtos/symbol.h>
@@ -1296,8 +1302,20 @@ int module_adapter_trigger(struct comp_dev *dev, int cmd)
 		dev->state = COMP_STATE_ACTIVE;
 		return PPL_STATUS_PATH_STOP;
 	}
-	if (interface->trigger)
+
+	if (interface->trigger) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			/* Process DP module's trigger */
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state.trigger_cmd = cmd,
+			};
+			return scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE,
+						       &param);
+		}
+#endif
 		return interface->trigger(mod, cmd);
+	}
 
 	return module_adapter_set_state(mod, dev, cmd);
 }
@@ -1359,8 +1377,13 @@ void module_adapter_free(struct comp_dev *dev)
 
 	comp_dbg(dev, "start");
 
-	if (dev->task)
+	if (dev->task) {
+		/* Run DP module's .free() method in its thread context */
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_DELETE_INSTANCE, NULL);
+#endif
 		schedule_task_cancel(dev->task);
+	}
 
 	ret = module_free(mod);
 	if (ret)
diff --git a/src/include/sof/audio/component_ext.h b/src/include/sof/audio/component_ext.h
index 55b8dc9e4f40..1c48619c2459 100644
--- a/src/include/sof/audio/component_ext.h
+++ b/src/include/sof/audio/component_ext.h
@@ -44,17 +44,23 @@ struct comp_dev *comp_new_ipc4(struct ipc4_module_init_instance *module_init);
 /** See comp_ops::free */
 static inline void comp_free(struct comp_dev *dev)
 {
-	assert(dev->drv->ops.free);
+	struct task *task = dev->is_shared ||
+		dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP ? dev->task : NULL;
+	const struct comp_driver *drv = dev->drv;
+
+	assert(drv->ops.free);
+
+	/*
+	 * In DP case this will run in DP thread context, so the task can only
+	 * be freed after this.
+	 */
+	drv->ops.free(dev);
 
 	/* free task if shared component or DP task*/
-	if ((dev->is_shared || dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) &&
-	    dev->task) {
-		schedule_task_free(dev->task);
-		sof_heap_free(dev->drv->user_heap, dev->task);
-		dev->task = NULL;
+	if (task) {
+		schedule_task_free(task);
+		sof_heap_free(drv->user_heap, task);
 	}
-
-	dev->drv->ops.free(dev);
 }
 
 /**

From cf7fed9de162207ba6dbfcc90133705846350a20 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Fri, 26 Sep 2025 16:52:20 +0200
Subject: [PATCH 04/11] mod_alloc: convert to syscalls

The only functions that have to be converted to syscalls are
mod_alloc_ext() and mod_free(), the rest of the API is implemented
using inline functions.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/audio/module_adapter/module/generic.c     | 53 ++++++++++++++++---
 .../sof/audio/module_adapter/module/generic.h | 11 ++--
 zephyr/CMakeLists.txt                         |  1 +
 3 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/src/audio/module_adapter/module/generic.c b/src/audio/module_adapter/module/generic.c
index b9afa8d9d62c..503e435cddac 100644
--- a/src/audio/module_adapter/module/generic.c
+++ b/src/audio/module_adapter/module/generic.c
@@ -243,7 +243,8 @@ EXPORT_SYMBOL(mod_balloc_align);
  *
  * The allocated memory is automatically freed when the module is unloaded.
  */
-void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size, size_t alignment)
+void *z_impl_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -281,7 +282,7 @@ void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
 
 	return ptr;
 }
-EXPORT_SYMBOL(mod_alloc_ext);
+EXPORT_SYMBOL(z_impl_mod_alloc_ext);
 
 /**
  * Creates a blob handler and releases it when the module is unloaded
@@ -327,7 +328,8 @@ EXPORT_SYMBOL(mod_data_blob_handler_new);
  * Like fast_get() but the handler is automatically freed.
  */
 #if CONFIG_FAST_GET
-const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr, size_t size)
+const void *z_impl_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -352,7 +354,7 @@ const void *mod_fast_get(struct processing_module *mod, const void * const dram_
 
 	return ptr;
 }
-EXPORT_SYMBOL(mod_fast_get);
+EXPORT_SYMBOL(z_impl_mod_fast_get);
 #endif
 
 static int free_contents(struct processing_module *mod, struct module_resource *container)
@@ -385,7 +387,7 @@ static int free_contents(struct processing_module *mod, struct module_resource *
  * @param mod	Pointer to module this memory block was allocated for.
  * @param ptr	Pointer to the memory block.
  */
-int mod_free(struct processing_module *mod, const void *ptr)
+int z_impl_mod_free(struct processing_module *mod, const void *ptr)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -411,7 +413,46 @@ int mod_free(struct processing_module *mod, const void *ptr)
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(mod_free);
+EXPORT_SYMBOL(z_impl_mod_free);
+
+#ifdef CONFIG_USERSPACE
+#include <zephyr/internal/syscall_handler.h>
+const void *z_vrfy_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+	K_OOPS(K_SYSCALL_MEMORY_READ(dram_ptr, size));
+
+	return z_impl_mod_fast_get(mod, dram_ptr, size);
+}
+#include <zephyr/syscalls/mod_fast_get_mrsh.c>
+
+void *z_vrfy_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+
+	return z_impl_mod_alloc_ext(mod, flags, size, alignment);
+}
+#include <zephyr/syscalls/mod_alloc_ext_mrsh.c>
+
+int z_vrfy_mod_free(struct processing_module *mod, const void *ptr)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+
+	return z_impl_mod_free(mod, ptr);
+}
+#include <zephyr/syscalls/mod_free_mrsh.c>
+#endif
 
 #if CONFIG_COMP_BLOB
 void mod_data_blob_handler_free(struct processing_module *mod, struct comp_data_blob_handler *dbh)
diff --git a/src/include/sof/audio/module_adapter/module/generic.h b/src/include/sof/audio/module_adapter/module/generic.h
index f339ac158ab0..47d068827924 100644
--- a/src/include/sof/audio/module_adapter/module/generic.h
+++ b/src/include/sof/audio/module_adapter/module/generic.h
@@ -23,6 +23,7 @@
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
 #include <zephyr/kernel/thread.h>
 #endif
+#include <sof/compiler_attributes.h>
 
 /*
  * helpers to determine processing type
@@ -190,7 +191,9 @@ struct module_processing_data {
 int module_load_config(struct comp_dev *dev, const void *cfg, size_t size);
 int module_init(struct processing_module *mod);
 void *mod_balloc_align(struct processing_module *mod, size_t size, size_t alignment);
-void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size, size_t alignment);
+__syscall void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			      size_t alignment);
+__syscall int mod_free(struct processing_module *mod, const void *ptr);
 
 /**
  * Allocates aligned memory block for module.
@@ -226,13 +229,13 @@ static inline void *mod_zalloc(struct processing_module *mod, size_t size)
 	return ret;
 }
 
-int mod_free(struct processing_module *mod, const void *ptr);
 #if CONFIG_COMP_BLOB
 struct comp_data_blob_handler *mod_data_blob_handler_new(struct processing_module *mod);
 void mod_data_blob_handler_free(struct processing_module *mod, struct comp_data_blob_handler *dbh);
 #endif
 #if CONFIG_FAST_GET
-const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr, size_t size);
+__syscall const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				   size_t size);
 void mod_fast_put(struct processing_module *mod, const void *sram_ptr);
 #endif
 void mod_free_all(struct processing_module *mod);
@@ -240,6 +243,8 @@ int module_prepare(struct processing_module *mod,
 		   struct sof_source **sources, int num_of_sources,
 		   struct sof_sink **sinks, int num_of_sinks);
 
+#include <zephyr/syscalls/generic.h>
+
 static inline
 bool generic_module_is_ready_to_process(struct processing_module *mod,
 					struct sof_source **sources,
diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
index b1af82ae856b..ab5d9525ae6f 100644
--- a/zephyr/CMakeLists.txt
+++ b/zephyr/CMakeLists.txt
@@ -534,6 +534,7 @@ zephyr_library_sources_ifdef(CONFIG_SHELL
        sof_shell.c
 )
 
+zephyr_syscall_header(${SOF_SRC_PATH}/include/sof/audio/module_adapter/module/generic.h)
 zephyr_syscall_header(${SOF_SRC_PATH}/include/sof/lib/fast-get.h)
 
 zephyr_library_link_libraries(SOF)

From bd1035339e9006746af70fb1544ec87c740a0916 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Wed, 8 Oct 2025 14:08:34 +0200
Subject: [PATCH 05/11] dp: convert to user-space

Run DP threads in user-space. Move all the respective memory and
kobjects to a dedicated memory domain. Work around Zephyr inability
to remove memory domains on Xtensa.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/audio/buffers/comp_buffer.c               |  11 +-
 src/audio/module_adapter/module/generic.c     |  16 +-
 src/audio/module_adapter/module_adapter.c     |  64 +++-
 src/include/sof/audio/component_ext.h         |   8 -
 .../sof/audio/module_adapter/module/generic.h |  22 +-
 src/include/sof/schedule/dp_schedule.h        |  10 +-
 src/ipc/ipc4/helper.c                         |   9 +
 src/schedule/zephyr_dp_schedule.c             | 155 ++--------
 src/schedule/zephyr_dp_schedule.h             |  34 ++-
 src/schedule/zephyr_dp_schedule_application.c | 273 ++++++++++++++++--
 src/schedule/zephyr_dp_schedule_thread.c      | 139 +++++++++
 11 files changed, 552 insertions(+), 189 deletions(-)

diff --git a/src/audio/buffers/comp_buffer.c b/src/audio/buffers/comp_buffer.c
index d3734b8c5906..6d0113e96252 100644
--- a/src/audio/buffers/comp_buffer.c
+++ b/src/audio/buffers/comp_buffer.c
@@ -18,6 +18,7 @@
 #include <rtos/cache.h>
 #include <sof/lib/notifier.h>
 #include <sof/list.h>
+#include <sof/schedule/dp_schedule.h>
 #include <rtos/spinlock.h>
 #include <rtos/symbol.h>
 #include <ipc/topology.h>
@@ -158,8 +159,16 @@ static void comp_buffer_free(struct sof_audio_buffer *audio_buffer)
 	/* In case some listeners didn't unregister from buffer's callbacks */
 	notifier_unregister_all(NULL, buffer);
 
+	struct k_heap *heap = buffer->audio_buffer.heap;
+
 	rfree(buffer->stream.addr);
-	sof_heap_free(buffer->audio_buffer.heap, buffer);
+	sof_heap_free(heap, buffer);
+	if (heap) {
+		struct dp_heap_user *mod_heap_user = container_of(heap, struct dp_heap_user, heap);
+
+		if (!--mod_heap_user->client_count)
+			rfree(mod_heap_user);
+	}
 }
 
 APP_TASK_DATA static const struct source_ops comp_buffer_source_ops = {
diff --git a/src/audio/module_adapter/module/generic.c b/src/audio/module_adapter/module/generic.c
index 503e435cddac..892422b5a7d4 100644
--- a/src/audio/module_adapter/module/generic.c
+++ b/src/audio/module_adapter/module/generic.c
@@ -78,7 +78,7 @@ int module_load_config(struct comp_dev *dev, const void *cfg, size_t size)
 	return ret;
 }
 
-static void mod_resource_init(struct processing_module *mod)
+void mod_resource_init(struct processing_module *mod)
 {
 	struct module_data *md = &mod->priv;
 	/* Init memory list */
@@ -116,7 +116,6 @@ int module_init(struct processing_module *mod)
 		return -EIO;
 	}
 
-	mod_resource_init(mod);
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
 	mod->priv.resources.rsrc_mngr = k_current_get();
 #endif
@@ -181,6 +180,19 @@ static void container_put(struct processing_module *mod, struct module_resource
 	list_item_append(&container->list, &res->free_cont_list);
 }
 
+#if CONFIG_USERSPACE
+void mod_heap_info(struct processing_module *mod, size_t *size, uintptr_t *start)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	if (size)
+		*size = res->heap->heap.init_bytes;
+
+	if (start)
+		*start = (uintptr_t)container_of(res->heap, struct dp_heap_user, heap);
+}
+#endif
+
 /**
  * Allocates aligned buffer memory block for module.
  * @param mod		Pointer to the module this memory block is allocated for.
diff --git a/src/audio/module_adapter/module_adapter.c b/src/audio/module_adapter/module_adapter.c
index 6c72f3f7228f..82d6b2706159 100644
--- a/src/audio/module_adapter/module_adapter.c
+++ b/src/audio/module_adapter/module_adapter.c
@@ -58,30 +58,33 @@ struct comp_dev *module_adapter_new(const struct comp_driver *drv,
 #define PAGE_SZ HOST_PAGE_SIZE
 #endif
 
-static struct k_heap *module_adapter_dp_heap_new(const struct comp_ipc_config *config)
+static struct dp_heap_user *module_adapter_dp_heap_new(const struct comp_ipc_config *config,
+						       size_t *heap_size)
 {
 	/* src-lite with 8 channels has been seen allocating 14k in one go */
 	/* FIXME: the size will be derived from configuration */
-	const size_t heap_size = 20 * 1024;
+	const size_t buf_size = 20 * 1024;
 
 	/* Keep uncached to match the default SOF heap! */
 	uint8_t *mod_heap_mem = rballoc_align(SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
-					      heap_size, PAGE_SZ);
+					      buf_size, PAGE_SZ);
 
 	if (!mod_heap_mem)
 		return NULL;
 
-	struct k_heap *mod_heap = (struct k_heap *)mod_heap_mem;
-	const size_t heap_prefix_size = ALIGN_UP(sizeof(*mod_heap), 8);
+	struct dp_heap_user *mod_heap_user = (struct dp_heap_user *)mod_heap_mem;
+	struct k_heap *mod_heap = &mod_heap_user->heap;
+	const size_t heap_prefix_size = ALIGN_UP(sizeof(*mod_heap_user), 4);
 	void *mod_heap_buf = mod_heap_mem + heap_prefix_size;
 
-	k_heap_init(mod_heap, mod_heap_buf, heap_size - heap_prefix_size);
+	*heap_size = buf_size - heap_prefix_size;
+	k_heap_init(mod_heap, mod_heap_buf, *heap_size);
 #ifdef __ZEPHYR__
 	mod_heap->heap.init_mem = mod_heap_buf;
-	mod_heap->heap.init_bytes = heap_size - heap_prefix_size;
+	mod_heap->heap.init_bytes = *heap_size;
 #endif
 
-	return mod_heap;
+	return mod_heap_user;
 }
 
 static struct processing_module *module_adapter_mem_alloc(const struct comp_driver *drv,
@@ -97,16 +100,21 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 	 */
 	uint32_t flags = config->proc_domain == COMP_PROCESSING_DOMAIN_DP ?
 		SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT : SOF_MEM_FLAG_USER;
+	struct dp_heap_user *mod_heap_user;
+	size_t heap_size;
 
 	if (config->proc_domain == COMP_PROCESSING_DOMAIN_DP && IS_ENABLED(CONFIG_USERSPACE) &&
 	    !IS_ENABLED(CONFIG_SOF_USERSPACE_USE_DRIVER_HEAP)) {
-		mod_heap = module_adapter_dp_heap_new(config);
-		if (!mod_heap) {
+		mod_heap_user = module_adapter_dp_heap_new(config, &heap_size);
+		if (!mod_heap_user) {
 			comp_cl_err(drv, "Failed to allocate DP module heap");
 			return NULL;
 		}
+		mod_heap = &mod_heap_user->heap;
 	} else {
 		mod_heap = drv->user_heap;
+		mod_heap_user = NULL;
+		heap_size = 0;
 	}
 
 	struct processing_module *mod = sof_heap_alloc(mod_heap, flags, sizeof(*mod), 0);
@@ -118,6 +126,7 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 
 	memset(mod, 0, sizeof(*mod));
 	mod->priv.resources.heap = mod_heap;
+	mod_resource_init(mod);
 
 	/*
 	 * Would be difficult to optimize the allocation to use cache. Only if
@@ -138,13 +147,15 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 	mod->dev = dev;
 	dev->mod = mod;
 
+	if (mod_heap_user)
+		mod_heap_user->client_count++;
+
 	return mod;
 
 err:
 	sof_heap_free(mod_heap, mod);
 emod:
-	if (mod_heap != drv->user_heap)
-		rfree(mod_heap);
+	rfree(mod_heap_user);
 
 	return NULL;
 }
@@ -152,12 +163,24 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 static void module_adapter_mem_free(struct processing_module *mod)
 {
 	struct k_heap *mod_heap = mod->priv.resources.heap;
+	unsigned int domain = mod->dev->ipc_config.proc_domain;
 
+	/*
+	 * In principle it shouldn't even be needed to free individual objects
+	 * on the module heap since we're freeing the heap itself too
+	 */
 #if CONFIG_IPC_MAJOR_4
 	sof_heap_free(mod_heap, mod->priv.cfg.input_pins);
 #endif
 	sof_heap_free(mod_heap, mod->dev);
 	sof_heap_free(mod_heap, mod);
+	if (domain == COMP_PROCESSING_DOMAIN_DP) {
+		struct dp_heap_user *mod_heap_user = container_of(mod_heap, struct dp_heap_user,
+								  heap);
+
+		if (mod_heap && !--mod_heap_user->client_count)
+			rfree(mod_heap_user);
+	}
 }
 
 /*
@@ -571,6 +594,14 @@ int module_adapter_prepare(struct comp_dev *dev)
 				goto free;
 			}
 
+			if (md->resources.heap && md->resources.heap != dev->drv->user_heap) {
+				struct dp_heap_user *dp_user = container_of(md->resources.heap,
+									    struct dp_heap_user,
+									    heap);
+
+				dp_user->client_count++;
+			}
+
 			irq_local_disable(flags);
 			list_item_prepend(&buffer->buffers_list, &mod->raw_data_buffers_list);
 			irq_local_enable(flags);
@@ -1378,11 +1409,16 @@ void module_adapter_free(struct comp_dev *dev)
 	comp_dbg(dev, "start");
 
 	if (dev->task) {
-		/* Run DP module's .free() method in its thread context */
+		/*
+		 * Run DP module's .free() method in its thread context.
+		 * Unlike with other IPCs we first run module's .free() in
+		 * thread context, then cancel the thread, and then execute
+		 * final clean up
+		 */
 #if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
 		scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_DELETE_INSTANCE, NULL);
 #endif
-		schedule_task_cancel(dev->task);
+		schedule_task_free(dev->task);
 	}
 
 	ret = module_free(mod);
diff --git a/src/include/sof/audio/component_ext.h b/src/include/sof/audio/component_ext.h
index 1c48619c2459..f0a1ad7e8512 100644
--- a/src/include/sof/audio/component_ext.h
+++ b/src/include/sof/audio/component_ext.h
@@ -44,8 +44,6 @@ struct comp_dev *comp_new_ipc4(struct ipc4_module_init_instance *module_init);
 /** See comp_ops::free */
 static inline void comp_free(struct comp_dev *dev)
 {
-	struct task *task = dev->is_shared ||
-		dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP ? dev->task : NULL;
 	const struct comp_driver *drv = dev->drv;
 
 	assert(drv->ops.free);
@@ -55,12 +53,6 @@ static inline void comp_free(struct comp_dev *dev)
 	 * be freed after this.
 	 */
 	drv->ops.free(dev);
-
-	/* free task if shared component or DP task*/
-	if (task) {
-		schedule_task_free(task);
-		sof_heap_free(drv->user_heap, task);
-	}
 }
 
 /**
diff --git a/src/include/sof/audio/module_adapter/module/generic.h b/src/include/sof/audio/module_adapter/module/generic.h
index 47d068827924..a283b6830e32 100644
--- a/src/include/sof/audio/module_adapter/module/generic.h
+++ b/src/include/sof/audio/module_adapter/module/generic.h
@@ -191,9 +191,19 @@ struct module_processing_data {
 int module_load_config(struct comp_dev *dev, const void *cfg, size_t size);
 int module_init(struct processing_module *mod);
 void *mod_balloc_align(struct processing_module *mod, size_t size, size_t alignment);
+void mod_resource_init(struct processing_module *mod);
+void mod_heap_info(struct processing_module *mod, size_t *size, uintptr_t *start);
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
 __syscall void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
 			      size_t alignment);
 __syscall int mod_free(struct processing_module *mod, const void *ptr);
+#else
+void *z_impl_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment);
+int z_impl_mod_free(struct processing_module *mod, const void *ptr);
+#define mod_alloc_ext z_impl_mod_alloc_ext
+#define mod_free z_impl_mod_free
+#endif
 
 /**
  * Allocates aligned memory block for module.
@@ -234,8 +244,14 @@ struct comp_data_blob_handler *mod_data_blob_handler_new(struct processing_modul
 void mod_data_blob_handler_free(struct processing_module *mod, struct comp_data_blob_handler *dbh);
 #endif
 #if CONFIG_FAST_GET
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
 __syscall const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
 				   size_t size);
+#else
+const void *z_impl_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size);
+#define mod_fast_get z_impl_mod_fast_get
+#endif
 void mod_fast_put(struct processing_module *mod, const void *sram_ptr);
 #endif
 void mod_free_all(struct processing_module *mod);
@@ -243,8 +259,6 @@ int module_prepare(struct processing_module *mod,
 		   struct sof_source **sources, int num_of_sources,
 		   struct sof_sink **sinks, int num_of_sinks);
 
-#include <zephyr/syscalls/generic.h>
-
 static inline
 bool generic_module_is_ready_to_process(struct processing_module *mod,
 					struct sof_source **sources,
@@ -454,4 +468,8 @@ static inline uint32_t module_get_lpt(struct processing_module *mod)
 	return mod->dev->period;
 }
 
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
+#include <zephyr/syscalls/generic.h>
+#endif
+
 #endif /* __SOF_AUDIO_MODULE_GENERIC__ */
diff --git a/src/include/sof/schedule/dp_schedule.h b/src/include/sof/schedule/dp_schedule.h
index 5b42a4282896..27afcf9e5d01 100644
--- a/src/include/sof/schedule/dp_schedule.h
+++ b/src/include/sof/schedule/dp_schedule.h
@@ -118,12 +118,18 @@ union scheduler_dp_thread_ipc_param {
 	} pipeline_state;
 };
 
+struct dp_heap_user {
+	struct k_heap heap;
+	/* So far relying on linear processing of serialized IPCs, but might need protection */
+	unsigned int client_count;	/* devices and buffers */
+};
+
 #if CONFIG_ZEPHYR_DP_SCHEDULER
-int scheduler_dp_thread_ipc(struct processing_module *pmod, enum sof_ipc4_module_type cmd,
+int scheduler_dp_thread_ipc(struct processing_module *pmod, unsigned int cmd,
 			    const union scheduler_dp_thread_ipc_param *param);
 #else
 static inline int scheduler_dp_thread_ipc(struct processing_module *pmod,
-					  enum sof_ipc4_module_type cmd,
+					  unsigned int cmd,
 					  const union scheduler_dp_thread_ipc_param *param)
 {
 	return 0;
diff --git a/src/ipc/ipc4/helper.c b/src/ipc/ipc4/helper.c
index 55ec4483ff1d..0bc49a7f0df3 100644
--- a/src/ipc/ipc4/helper.c
+++ b/src/ipc/ipc4/helper.c
@@ -23,6 +23,7 @@
 #include <sof/lib/memory.h>
 #include <sof/list.h>
 #include <sof/platform.h>
+#include <sof/schedule/dp_schedule.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <rtos/symbol.h>
 #include <rtos/wait.h>
@@ -601,6 +602,14 @@ __cold int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
 		return IPC4_OUT_OF_MEMORY;
 	}
 
+#if CONFIG_ZEPHYR_DP_SCHEDULER
+	if (dp_heap) {
+		struct dp_heap_user *dp_user = container_of(dp_heap, struct dp_heap_user, heap);
+
+		dp_user->client_count++;
+	}
+#endif
+
 	/*
 	 * set min_free_space and min_available in sink/src api of created buffer.
 	 * buffer is connected like:
diff --git a/src/schedule/zephyr_dp_schedule.c b/src/schedule/zephyr_dp_schedule.c
index 1f006d50e52c..fc394066b6b1 100644
--- a/src/schedule/zephyr_dp_schedule.c
+++ b/src/schedule/zephyr_dp_schedule.c
@@ -7,6 +7,7 @@
 
 #include <sof/audio/component.h>
 #include <sof/audio/module_adapter/module/generic.h>
+#include <sof/llext_manager.h>
 #include <rtos/task.h>
 #include <rtos/userspace_helper.h>
 #include <stdint.h>
@@ -57,7 +58,7 @@ void scheduler_dp_unlock(unsigned int key)
 	k_sem_give(&dp_lock[key]);
 }
 
-static inline void scheduler_dp_grant(k_tid_t thread_id, uint16_t core)
+void scheduler_dp_grant(k_tid_t thread_id, uint16_t core)
 {
 #if CONFIG_USERSPACE
 	k_thread_access_grant(thread_id, &dp_lock[core]);
@@ -239,6 +240,7 @@ void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *
 	scheduler_dp_unlock(lock_key);
 }
 
+// FIXME: is .cancel() always followed by .free()? Where should we free stack and thread?
 static int scheduler_dp_task_cancel(void *data, struct task *task)
 {
 	unsigned int lock_key;
@@ -256,8 +258,12 @@ static int scheduler_dp_task_cancel(void *data, struct task *task)
 	if (list_is_empty(&dp_sch->tasks))
 		schedule_task_cancel(&dp_sch->ll_tick_src);
 
-	/* if the task is waiting on a event - let it run and self-terminate */
+	/* if the task is waiting - let it run and self-terminate */
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
 	k_event_set(pdata->event, DP_TASK_EVENT_CANCEL);
+#else
+	k_sem_give(pdata->sem);
+#endif
 	scheduler_dp_unlock(lock_key);
 
 	/* wait till the task has finished, if there was any task created */
@@ -283,16 +289,23 @@ static int scheduler_dp_task_free(void *data, struct task *task)
 	}
 
 #ifdef CONFIG_USERSPACE
+#if CONFIG_SOF_USERSPACE_PROXY
 	if (pdata->event != &pdata->event_struct)
 		k_object_free(pdata->event);
+#else
+	if (pdata->sem != &pdata->sem_struct)
+		k_object_free(pdata->sem);
+#endif
 	if (pdata->thread != &pdata->thread_struct)
 		k_object_free(pdata->thread);
 #endif
 
 	/* free task stack */
-	ret = user_stack_free((__sparse_force void *)pdata->p_stack);
+	ret = user_stack_free(pdata->p_stack);
 	pdata->p_stack = NULL;
 
+	scheduler_dp_domain_free(pdata->mod);
+
 	/* all other memory has been allocated as a single malloc, will be freed later by caller */
 	return ret;
 }
@@ -358,143 +371,9 @@ int scheduler_dp_init(void)
 
 	notifier_register(NULL, NULL, NOTIFIER_ID_LL_POST_RUN, scheduler_dp_ll_tick, 0);
 
-	return 0;
-}
-
-int scheduler_dp_task_init(struct task **task,
-			   const struct sof_uuid_entry *uid,
-			   const struct task_ops *ops,
-			   struct processing_module *mod,
-			   uint16_t core,
-			   size_t stack_size,
-			   uint32_t options)
-{
-	void __sparse_cache *p_stack = NULL;
-	struct k_heap *const user_heap = mod->dev->drv->user_heap;
-
-	/* memory allocation helper structure */
-	struct {
-		struct task task;
-		struct task_dp_pdata pdata;
-	} *task_memory;
-
-	int ret;
-
-	/* must be called on the same core the task will be binded to */
-	assert(cpu_get_id() == core);
-
-	/*
-	 * allocate memory
-	 * to avoid multiple malloc operations allocate all required memory as a single structure
-	 * and return pointer to task_memory->task
-	 * As the structure contains zephyr kernel specific data, it must be located in
-	 * shared, non cached memory
-	 */
-	task_memory = sof_heap_alloc(user_heap, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
-				     sizeof(*task_memory), 0);
-	if (!task_memory) {
-		tr_err(&dp_tr, "memory alloc failed");
-		return -ENOMEM;
-	}
-
-	memset(task_memory, 0, sizeof(*task_memory));
-	/* allocate stack - must be aligned and cached so a separate alloc */
-	p_stack = user_stack_allocate(stack_size, options);
-	if (!p_stack) {
-		tr_err(&dp_tr, "stack alloc failed");
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/* internal SOF task init */
-	ret = schedule_task_init(&task_memory->task, uid, SOF_SCHEDULE_DP, 0, ops->run,
-				 mod, core, options);
-	if (ret < 0) {
-		tr_err(&dp_tr, "schedule_task_init failed");
-		goto err;
-	}
-
-	struct task_dp_pdata *pdata = &task_memory->pdata;
-
-	/* Point to event_struct event for kernel threads synchronization */
-	/* It will be overwritten for K_USER threads to dynamic ones.  */
-	pdata->event = &pdata->event_struct;
-	pdata->thread = &pdata->thread_struct;
-
-#ifdef CONFIG_USERSPACE
-	if (options & K_USER) {
-		pdata->event = k_object_alloc(K_OBJ_EVENT);
-		if (!pdata->event) {
-			tr_err(&dp_tr, "Event object allocation failed");
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		pdata->thread = k_object_alloc(K_OBJ_THREAD);
-		if (!pdata->thread) {
-			tr_err(&dp_tr, "Thread object allocation failed");
-			ret = -ENOMEM;
-			goto err;
-		}
-	}
-#endif /* CONFIG_USERSPACE */
-
-	/* initialize other task structures */
-	task_memory->task.ops.complete = ops->complete;
-	task_memory->task.ops.get_deadline = ops->get_deadline;
-	task_memory->task.state = SOF_TASK_STATE_INIT;
-	task_memory->task.core = core;
-	task_memory->task.priv_data = pdata;
-
-	/* success, fill the structures */
-	pdata->p_stack = p_stack;
-	pdata->stack_size = stack_size;
-	pdata->mod = mod;
-	*task = &task_memory->task;
-
-	/* create a zephyr thread for the task */
-	pdata->thread_id = k_thread_create(pdata->thread, (__sparse_force void *)p_stack,
-					   stack_size, dp_thread_fn, *task, NULL, NULL,
-					   CONFIG_DP_THREAD_PRIORITY, (*task)->flags, K_FOREVER);
-
-	k_thread_access_grant(pdata->thread_id, pdata->event);
-	scheduler_dp_grant(pdata->thread_id, cpu_get_id());
-
-	/* pin the thread to specific core */
-	ret = k_thread_cpu_pin(pdata->thread_id, core);
-	if (ret < 0) {
-		tr_err(&dp_tr, "zephyr task pin to core failed");
-		goto e_thread;
-	}
-
-#ifdef CONFIG_USERSPACE
-	if ((*task)->flags & K_USER) {
-		ret = user_memory_init_shared(pdata->thread_id, pdata->mod);
-		if (ret < 0) {
-			tr_err(&dp_tr, "user_memory_init_shared() failed");
-			goto e_thread;
-		}
-	}
-#endif /* CONFIG_USERSPACE */
-
-	/* start the thread, it should immediately stop at an event */
-	k_event_init(pdata->event);
-	k_thread_start(pdata->thread_id);
+	scheduler_dp_domain_init();
 
 	return 0;
-
-e_thread:
-	k_thread_abort(pdata->thread_id);
-err:
-	/* cleanup - free all allocated resources */
-	if (user_stack_free((__sparse_force void *)p_stack))
-		tr_err(&dp_tr, "user_stack_free failed!");
-
-	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
-	k_object_free(task_memory->pdata.event);
-	k_object_free(task_memory->pdata.thread);
-	sof_heap_free(user_heap, task_memory);
-	return ret;
 }
 
 void scheduler_get_task_info_dp(struct scheduler_props *scheduler_props, uint32_t *data_off_size)
diff --git a/src/schedule/zephyr_dp_schedule.h b/src/schedule/zephyr_dp_schedule.h
index d146afab65cf..9a5d30077e15 100644
--- a/src/schedule/zephyr_dp_schedule.h
+++ b/src/schedule/zephyr_dp_schedule.h
@@ -10,6 +10,8 @@
 #include <sof/list.h>
 #include <sof/compiler_attributes.h>
 
+#include <zephyr/app_memory/mem_domain.h>
+
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -19,7 +21,13 @@ struct scheduler_dp_data {
 	uint32_t last_ll_tick_timestamp;/* a timestamp as k_cycle_get_32 of last LL tick,
 					 * "NOW" for DP deadline calculation
 					 */
+};
 
+enum sof_dp_part_type {
+	SOF_DP_PART_HEAP,
+	SOF_DP_PART_IPC,
+	SOF_DP_PART_CFG,
+	SOF_DP_PART_TYPE_COUNT,
 };
 
 struct task_dp_pdata {
@@ -27,15 +35,33 @@ struct task_dp_pdata {
 	struct k_thread *thread;	/* pointer to the kernels' thread object */
 	struct k_thread thread_struct;	/* thread object for kernel threads */
 	uint32_t deadline_clock_ticks;	/* dp module deadline in Zephyr ticks */
-	k_thread_stack_t __sparse_cache *p_stack;	/* pointer to thread stack */
-	size_t stack_size;		/* size of the stack in bytes */
-	struct k_event *event;		/* pointer to event for task scheduling */
-	struct k_event event_struct;	/* event for task scheduling for kernel threads */
+	k_thread_stack_t *p_stack;	/* pointer to thread stack */
 	struct processing_module *mod;	/* the module to be scheduled */
 	uint32_t ll_cycles_to_start;    /* current number of LL cycles till delayed start */
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+	struct k_event *event;		/* pointer to event for task scheduling */
+	struct k_event event_struct;	/* event for task scheduling for kernel threads */
+#else
+	struct k_sem *sem;              /* pointer to semaphore for task scheduling */
+	struct k_sem sem_struct;        /* semaphore for task scheduling for kernel threads */
+	unsigned char pend_ipc;
+	unsigned char pend_proc;
+	struct k_mem_partition mpart[SOF_DP_PART_TYPE_COUNT];
+#endif
 };
 
 void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_run);
 void dp_thread_fn(void *p1, void *p2, void *p3);
 unsigned int scheduler_dp_lock(uint16_t core);
 void scheduler_dp_unlock(unsigned int key);
+void scheduler_dp_grant(k_tid_t thread_id, uint16_t core);
+int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops, struct processing_module *mod,
+			   uint16_t core, size_t stack_size, uint32_t options);
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+static inline void scheduler_dp_domain_free(struct processing_module *pmod) {}
+static inline int scheduler_dp_domain_init(void) {return 0;}
+#else
+void scheduler_dp_domain_free(struct processing_module *pmod);
+int scheduler_dp_domain_init(void);
+#endif
diff --git a/src/schedule/zephyr_dp_schedule_application.c b/src/schedule/zephyr_dp_schedule_application.c
index 2c1f158b2f9f..4614da3c1fd7 100644
--- a/src/schedule/zephyr_dp_schedule_application.c
+++ b/src/schedule/zephyr_dp_schedule_application.c
@@ -10,10 +10,14 @@
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/common.h>
 #include <sof/list.h>
+#include <sof/llext_manager.h>
 #include <sof/schedule/dp_schedule.h>
 #include <sof/schedule/ll_schedule_domain.h>
 
+#include <zephyr/app_memory/mem_domain.h>
 #include <zephyr/kernel.h>
+#include <zephyr/logging/log.h>
+#include <zephyr/sys/slist.h>
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -23,13 +27,17 @@
 LOG_MODULE_DECLARE(dp_schedule, CONFIG_SOF_LOG_LEVEL);
 extern struct tr_ctx dp_tr;
 
+#if CONFIG_USERSPACE
+static struct k_mem_domain dp_mdom[CONFIG_CORE_COUNT];
+#endif
+
 /* Synchronization semaphore for the scheduler thread to wait for DP startup */
 #define DP_SYNC_INIT(i, _)	Z_SEM_INITIALIZER(dp_sync[i], 0, 1)
 #define DP_SYNC_INIT_LIST	LISTIFY(CONFIG_CORE_COUNT, DP_SYNC_INIT, (,))
 static STRUCT_SECTION_ITERABLE_ARRAY(k_sem, dp_sync, CONFIG_CORE_COUNT) = { DP_SYNC_INIT_LIST };
 
 /* TODO: make this a shared kernel->module buffer for IPC parameters */
-static uint8_t ipc_buf[4096];
+static uint8_t ipc_buf[4096] __aligned(4096);
 
 struct ipc4_flat {
 	unsigned int cmd;
@@ -148,7 +156,7 @@ static void ipc_thread_unflatten_run(struct processing_module *pmod, struct ipc4
 #define DP_THREAD_IPC_TIMEOUT K_MSEC(100)
 
 /* Signal an IPC and wait for processing completion */
-int scheduler_dp_thread_ipc(struct processing_module *pmod, enum sof_ipc4_module_type cmd,
+int scheduler_dp_thread_ipc(struct processing_module *pmod, unsigned int cmd,
 			    const union scheduler_dp_thread_ipc_param *param)
 {
 	struct task_dp_pdata *pdata = pmod->dev->task->priv_data;
@@ -176,8 +184,10 @@ int scheduler_dp_thread_ipc(struct processing_module *pmod, enum sof_ipc4_module
 	flat->ret = -ENOSYS;
 
 	ret = ipc_thread_flatten(cmd, param, flat);
-	if (!ret)
-		k_event_post(pdata->event, DP_TASK_EVENT_IPC);
+	if (!ret) {
+		pdata->pend_ipc++;
+		k_sem_give(pdata->sem);
+	}
 
 	scheduler_dp_unlock(lock_key);
 
@@ -223,7 +233,8 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 			/* trigger the task */
 			curr_task->state = SOF_TASK_STATE_RUNNING;
 			trigger_task = true;
-			k_event_post(pdata->event, DP_TASK_EVENT_PROCESS);
+			pdata->pend_proc++;
+			k_sem_give(pdata->sem);
 		}
 
 		if (curr_task->state == SOF_TASK_STATE_RUNNING) {
@@ -270,29 +281,45 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	ARG_UNUSED(p2);
 	ARG_UNUSED(p3);
 
-	/*
-	 * The IPC thread is waiting for the thread to be
-	 * started, it can proceed now.
-	 */
+	/* The IPC thread is waiting for the thread to be started, it can proceed now. */
 	k_sem_give(&dp_sync[task->core]);
 
 	do {
 		/*
-		 * The thread is started immediately after creation, it stops on event.
-		 * An event is signalled to handle IPC or process audio data.
+		 * The thread is started immediately after creation, it stops here and waits
+		 * for the semaphore to be signalled to handle IPC or process audio data.
 		 */
-		uint32_t mask = k_event_wait_safe(task_pdata->event,
-						  DP_TASK_EVENT_PROCESS | DP_TASK_EVENT_CANCEL |
-						  DP_TASK_EVENT_IPC, false, K_FOREVER);
+		k_sem_take(task_pdata->sem, K_FOREVER);
+
+		lock_key = scheduler_dp_lock(task->core);
+
+		unsigned char pend_ipc = task_pdata->pend_ipc,
+			pend_proc = task_pdata->pend_proc;
+
+		task_pdata->pend_proc = 0;
+		task_pdata->pend_ipc = 0;
+
+		scheduler_dp_unlock(lock_key);
 
-		if (mask & DP_TASK_EVENT_IPC) {
+		/*
+		 * Only 0:1, 1:0 and 1:1 are valid. 0:0 is also possible if IPC and audio
+		 * were signalled in a quick succession before we took the lock above. Any
+		 * value > 1 would mean that we've missed IPCs or LL ticks while in queued /
+		 * idle state, which shouldn't happen.
+		 */
+		if (pend_ipc > 1 || pend_proc > 1) {
+			tr_err(&dp_tr, "Invalid wake up %u:%u", pend_proc, pend_ipc);
+			continue;
+		}
+
+		if (pend_ipc) {
 			/* handle IPC */
 			tr_dbg(&dp_tr, "got IPC wake up for %p state %d", pmod, task->state);
 			ipc_thread_unflatten_run(pmod, (struct ipc4_flat *)ipc_buf);
 			k_sem_give(&dp_sync[task->core]);
 		}
 
-		if (mask & DP_TASK_EVENT_PROCESS) {
+		if (pend_proc) {
 			bool ready;
 
 			if (task->state == SOF_TASK_STATE_RUNNING) {
@@ -333,14 +360,15 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 
 				case SOF_TASK_STATE_CANCEL:
 				case SOF_TASK_STATE_COMPLETED:
-					/* remove from scheduling */
-					list_item_del(&task->list);
+					/* task already removed from scheduling */
 					break;
 
 				default:
 					/* illegal state, serious defect, won't happen */
 					k_oops();
 				}
+			} else {
+				task->state = SOF_TASK_STATE_QUEUED;
 			}
 		} else {
 			lock_key = scheduler_dp_lock(task->core);
@@ -357,3 +385,212 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	if (task->state == SOF_TASK_STATE_COMPLETED)
 		task_complete(task);
 }
+
+/*
+ * Safe to call with partial successful initialisation,
+ * k_mem_domain_remove_partition() then just returns -ENOENT
+ */
+void scheduler_dp_domain_free(struct processing_module *pmod)
+{
+#if CONFIG_USERSPACE
+	unsigned int core = pmod->dev->task->core;
+
+	llext_manager_rm_domain(pmod->dev->ipc_config.id, dp_mdom + core);
+
+	struct task_dp_pdata *pdata = pmod->dev->task->priv_data;
+
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_HEAP);
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_IPC);
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_CFG);
+#endif
+}
+
+int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops, struct processing_module *mod,
+			   uint16_t core, size_t stack_size, uint32_t options)
+{
+	k_thread_stack_t *p_stack;
+	/* memory allocation helper structure */
+	struct {
+		struct task task;
+		struct task_dp_pdata pdata;
+		struct comp_driver drv;
+		struct module_interface ops;
+	} *task_memory;
+
+	int ret;
+
+	/* must be called on the same core the task will be binded to */
+	assert(cpu_get_id() == core);
+
+	/*
+	 * allocate memory
+	 * to avoid multiple malloc operations allocate all required memory as a single structure
+	 * and return pointer to task_memory->task
+	 * As the structure contains zephyr kernel specific data, it must be located in
+	 * shared, non cached memory
+	 */
+	task_memory = mod_alloc_ext(mod, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
+				    sizeof(*task_memory), 0);
+	if (!task_memory) {
+		tr_err(&dp_tr, "memory alloc failed");
+		return -ENOMEM;
+	}
+
+	memset(task_memory, 0, sizeof(*task_memory));
+
+	task_memory->drv = *mod->dev->drv;
+	task_memory->ops = *mod->dev->drv->adapter_ops;		// FIXME: is this needed?
+	task_memory->drv.adapter_ops = &task_memory->ops;
+	mod->dev->drv = &task_memory->drv;
+
+	/* allocate stack - must be aligned and cached so a separate alloc */
+	p_stack = user_stack_allocate(stack_size, options);
+	if (!p_stack) {
+		tr_err(&dp_tr, "stack alloc failed");
+		ret = -ENOMEM;
+		goto e_tmem;
+	}
+
+	struct task *ptask = &task_memory->task;
+
+	/* internal SOF task init */
+	ret = schedule_task_init(ptask, uid, SOF_SCHEDULE_DP, 0, ops->run, mod, core, options);
+	if (ret < 0) {
+		tr_err(&dp_tr, "schedule_task_init failed");
+		goto e_stack;
+	}
+
+	struct task_dp_pdata *pdata = &task_memory->pdata;
+
+	/* Point to event_struct event for kernel threads synchronization */
+	/* It will be overwritten for K_USER threads to dynamic ones.  */
+	pdata->sem = &pdata->sem_struct;
+	pdata->thread = &pdata->thread_struct;
+
+#ifdef CONFIG_USERSPACE
+	if (options & K_USER) {
+		pdata->sem = k_object_alloc(K_OBJ_SEM);
+		if (!pdata->sem) {
+			tr_err(&dp_tr, "Event object allocation failed");
+			ret = -ENOMEM;
+			goto e_stack;
+		}
+
+		pdata->thread = k_object_alloc(K_OBJ_THREAD);
+		if (!pdata->thread) {
+			tr_err(&dp_tr, "Thread object allocation failed");
+			ret = -ENOMEM;
+			goto e_kobj;
+		}
+		memset(&pdata->thread->arch, 0, sizeof(pdata->thread->arch));
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* success, fill the structures */
+	pdata->p_stack = p_stack;
+	pdata->mod = mod;
+
+	/* initialize other task structures */
+	ptask->ops.complete = ops->complete;
+	ptask->ops.get_deadline = ops->get_deadline;
+	ptask->priv_data = pdata;
+	list_init(&ptask->list);
+	*task = ptask;
+
+	/* create a zephyr thread for the task */
+	pdata->thread_id = k_thread_create(pdata->thread, p_stack,
+					   stack_size, dp_thread_fn, ptask, NULL, NULL,
+					   CONFIG_DP_THREAD_PRIORITY, ptask->flags, K_FOREVER);
+
+	/* pin the thread to specific core */
+	ret = k_thread_cpu_pin(pdata->thread_id, core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "zephyr task pin to core failed");
+		goto e_thread;
+	}
+
+#if CONFIG_USERSPACE
+	k_thread_access_grant(pdata->thread_id, pdata->sem, &dp_sync[core]);
+	scheduler_dp_grant(pdata->thread_id, core);
+
+	unsigned int pidx;
+	size_t size;
+	uintptr_t start;
+	struct k_mem_partition *ppart[SOF_DP_PART_TYPE_COUNT];
+
+	for (pidx = 0; pidx < ARRAY_SIZE(ppart); pidx++)
+		ppart[pidx] = pdata->mpart + pidx;
+
+	/* Module heap partition */
+	mod_heap_info(mod, &size, &start);
+	pdata->mpart[SOF_DP_PART_HEAP] = (struct k_mem_partition){
+		.start = start,
+		.size = size,
+		.attr = K_MEM_PARTITION_P_RW_U_RW,
+	};
+	/* IPC flattening buffer partition */
+	pdata->mpart[SOF_DP_PART_IPC] = (struct k_mem_partition){
+		.start = (uintptr_t)&ipc_buf,
+		.size = sizeof(ipc_buf),
+		.attr = K_MEM_PARTITION_P_RW_U_RW,
+	};
+	/* Host mailbox partition for additional IPC parameters: read-only */
+	pdata->mpart[SOF_DP_PART_CFG] = (struct k_mem_partition){
+		.start = (uintptr_t)MAILBOX_HOSTBOX_BASE,
+		.size = 4096,
+		.attr = K_MEM_PARTITION_P_RO_U_RO,
+	};
+
+	for (pidx = 0; pidx < SOF_DP_PART_TYPE_COUNT; pidx++) {
+		ret = k_mem_domain_add_partition(dp_mdom + core, pdata->mpart + pidx);
+		if (ret < 0)
+			goto e_dom;
+	}
+
+	ret = llext_manager_add_domain(mod->dev->ipc_config.id, dp_mdom + core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "failed to add LLEXT to domain %d", ret);
+		goto e_dom;
+	}
+
+	/*
+	 * Keep this call last, able to fail, otherwise domain will be removed
+	 * before its thread
+	 */
+	ret = k_mem_domain_add_thread(dp_mdom + core, pdata->thread_id);
+	if (ret < 0) {
+		tr_err(&dp_tr, "failed to add thread to domain %d", ret);
+		goto e_dom;
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* start the thread, it should immediately stop at the semaphore */
+	k_sem_init(pdata->sem, 0, 1);
+	k_thread_start(pdata->thread_id);
+
+	return 0;
+
+#ifdef CONFIG_USERSPACE
+e_dom:
+	scheduler_dp_domain_free(mod);
+#endif
+e_thread:
+	k_thread_abort(pdata->thread_id);
+#ifdef CONFIG_USERSPACE
+e_kobj:
+	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
+	k_object_free(pdata->thread);
+	k_object_free(pdata->sem);
+#endif
+e_stack:
+	user_stack_free(p_stack);
+e_tmem:
+	mod_free(mod, task_memory);
+	return ret;
+}
+
+int scheduler_dp_domain_init(void)
+{
+	return k_mem_domain_init(dp_mdom + cpu_get_id(), 0, NULL);
+}
diff --git a/src/schedule/zephyr_dp_schedule_thread.c b/src/schedule/zephyr_dp_schedule_thread.c
index 7a79e214eaca..c9874155893c 100644
--- a/src/schedule/zephyr_dp_schedule_thread.c
+++ b/src/schedule/zephyr_dp_schedule_thread.c
@@ -12,6 +12,7 @@
 #include <sof/list.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <sof/schedule/dp_schedule.h>
+#include <sof/trace/trace.h>
 
 #include <zephyr/kernel.h>
 
@@ -20,6 +21,9 @@
 
 #include "zephyr_dp_schedule.h"
 
+LOG_MODULE_DECLARE(dp_schedule, CONFIG_SOF_LOG_LEVEL);
+extern struct tr_ctx dp_tr;
+
 /* Go through all DP tasks and recalculate their readiness and deadlines
  * NOT REENTRANT, should be called with scheduler_dp_lock()
  */
@@ -169,3 +173,138 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	if (task->state == SOF_TASK_STATE_COMPLETED)
 		task_complete(task);
 }
+
+int scheduler_dp_task_init(struct task **task,
+			   const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops,
+			   struct processing_module *mod,
+			   uint16_t core,
+			   size_t stack_size,
+			   uint32_t options)
+{
+	void __sparse_cache *p_stack = NULL;
+	struct k_heap *const user_heap = mod->dev->drv->user_heap;
+
+	/* memory allocation helper structure */
+	struct {
+		struct task task;
+		struct task_dp_pdata pdata;
+	} *task_memory;
+
+	int ret;
+
+	/* must be called on the same core the task will be binded to */
+	assert(cpu_get_id() == core);
+
+	/*
+	 * allocate memory
+	 * to avoid multiple malloc operations allocate all required memory as a single structure
+	 * and return pointer to task_memory->task
+	 * As the structure contains zephyr kernel specific data, it must be located in
+	 * shared, non cached memory
+	 */
+	task_memory = sof_heap_alloc(user_heap, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
+				     sizeof(*task_memory), 0);
+	if (!task_memory) {
+		tr_err(&dp_tr, "memory alloc failed");
+		return -ENOMEM;
+	}
+
+	memset(task_memory, 0, sizeof(*task_memory));
+	/* allocate stack - must be aligned and cached so a separate alloc */
+	p_stack = user_stack_allocate(stack_size, options);
+	if (!p_stack) {
+		tr_err(&dp_tr, "stack alloc failed");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* internal SOF task init */
+	ret = schedule_task_init(&task_memory->task, uid, SOF_SCHEDULE_DP, 0, ops->run,
+				 mod, core, options);
+	if (ret < 0) {
+		tr_err(&dp_tr, "schedule_task_init failed");
+		goto err;
+	}
+
+	struct task_dp_pdata *pdata = &task_memory->pdata;
+
+	/* Point to event_struct event for kernel threads synchronization */
+	/* It will be overwritten for K_USER threads to dynamic ones.  */
+	pdata->event = &pdata->event_struct;
+	pdata->thread = &pdata->thread_struct;
+
+#ifdef CONFIG_USERSPACE
+	if (options & K_USER) {
+		pdata->event = k_object_alloc(K_OBJ_EVENT);
+		if (!pdata->event) {
+			tr_err(&dp_tr, "Event object allocation failed");
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		pdata->thread = k_object_alloc(K_OBJ_THREAD);
+		if (!pdata->thread) {
+			tr_err(&dp_tr, "Thread object allocation failed");
+			ret = -ENOMEM;
+			goto err;
+		}
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* initialize other task structures */
+	task_memory->task.ops.complete = ops->complete;
+	task_memory->task.ops.get_deadline = ops->get_deadline;
+	task_memory->task.state = SOF_TASK_STATE_INIT;
+	task_memory->task.core = core;
+	task_memory->task.priv_data = pdata;
+
+	/* success, fill the structures */
+	pdata->p_stack = p_stack;
+	pdata->mod = mod;
+	*task = &task_memory->task;
+
+	/* create a zephyr thread for the task */
+	pdata->thread_id = k_thread_create(pdata->thread, (__sparse_force void *)p_stack,
+					   stack_size, dp_thread_fn, *task, NULL, NULL,
+					   CONFIG_DP_THREAD_PRIORITY, (*task)->flags, K_FOREVER);
+
+	k_thread_access_grant(pdata->thread_id, pdata->event);
+	scheduler_dp_grant(pdata->thread_id, cpu_get_id());
+
+	/* pin the thread to specific core */
+	ret = k_thread_cpu_pin(pdata->thread_id, core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "zephyr task pin to core failed");
+		goto e_thread;
+	}
+
+#ifdef CONFIG_USERSPACE
+	if ((*task)->flags & K_USER) {
+		ret = user_memory_init_shared(pdata->thread_id, pdata->mod);
+		if (ret < 0) {
+			tr_err(&dp_tr, "user_memory_init_shared() failed");
+			goto e_thread;
+		}
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* start the thread, it should immediately stop at an event */
+	k_event_init(pdata->event);
+	k_thread_start(pdata->thread_id);
+
+	return 0;
+
+e_thread:
+	k_thread_abort(pdata->thread_id);
+err:
+	/* cleanup - free all allocated resources */
+	if (user_stack_free((__sparse_force void *)p_stack))
+		tr_err(&dp_tr, "user_stack_free failed!");
+
+	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
+	k_object_free(task_memory->pdata.event);
+	k_object_free(task_memory->pdata.thread);
+	sof_heap_free(user_heap, task_memory);
+	return ret;
+}

From f56a11cf557f8a6baa59587faff2634158ae5e98 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Thu, 11 Dec 2025 15:15:46 +0100
Subject: [PATCH 06/11] dp: disable the .cancel() method when unused

DP scheduler's .cancel() method is so far only used with the system
agent and with proxy, make it panic in other configurations to avoid
accidental use.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/schedule/zephyr_dp_schedule.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/schedule/zephyr_dp_schedule.c b/src/schedule/zephyr_dp_schedule.c
index fc394066b6b1..73c0e0275d94 100644
--- a/src/schedule/zephyr_dp_schedule.c
+++ b/src/schedule/zephyr_dp_schedule.c
@@ -240,14 +240,21 @@ void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *
 	scheduler_dp_unlock(lock_key);
 }
 
-// FIXME: is .cancel() always followed by .free()? Where should we free stack and thread?
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
 static int scheduler_dp_task_cancel(void *data, struct task *task)
+{
+	/* Should never be called */
+	k_panic();
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int scheduler_dp_task_stop(void *data, struct task *task)
 {
 	unsigned int lock_key;
 	struct scheduler_dp_data *dp_sch = (struct scheduler_dp_data *)data;
 	struct task_dp_pdata *pdata = task->priv_data;
 
-
 	/* this is asyn cancel - mark the task as canceled and remove it from scheduling */
 	lock_key = scheduler_dp_lock(cpu_get_id());
 
@@ -278,7 +285,7 @@ static int scheduler_dp_task_free(void *data, struct task *task)
 	struct task_dp_pdata *pdata = task->priv_data;
 	int ret;
 
-	scheduler_dp_task_cancel(data, task);
+	scheduler_dp_task_stop(data, task);
 
 	/* the thread should be terminated at this moment,
 	 * abort is safe and will ensure no use after free
@@ -343,7 +350,11 @@ static int scheduler_dp_task_shedule(void *data, struct task *task, uint64_t sta
 
 static struct scheduler_ops schedule_dp_ops = {
 	.schedule_task		= scheduler_dp_task_shedule,
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+	.schedule_task_cancel	= scheduler_dp_task_stop,
+#else
 	.schedule_task_cancel	= scheduler_dp_task_cancel,
+#endif
 	.schedule_task_free	= scheduler_dp_task_free,
 };
 

From f1cd7f61f88e922b8f73dbb5ffc2abd8ab0f6ac3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Mon, 15 Dec 2025 17:33:49 +0100
Subject: [PATCH 07/11] userspace: llext: add support for cold code and data

Add userspace mapping for cold module code and data.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/include/sof/lib_manager.h       |   2 +
 src/library_manager/llext_manager.c | 112 +++++++++++++++++++++++++++-
 zephyr/include/sof/lib/memory.h     |   3 +
 3 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/src/include/sof/lib_manager.h b/src/include/sof/lib_manager.h
index 83c3efb4d890..f1ead3028984 100644
--- a/src/include/sof/lib_manager.h
+++ b/src/include/sof/lib_manager.h
@@ -91,6 +91,8 @@ enum {
 	LIB_MANAGER_DATA,
 	LIB_MANAGER_RODATA,
 	LIB_MANAGER_BSS,
+	LIB_MANAGER_COLD,
+	LIB_MANAGER_COLDRODATA,
 	LIB_MANAGER_N_SEGMENTS,
 };
 
diff --git a/src/library_manager/llext_manager.c b/src/library_manager/llext_manager.c
index 41b8bb4c9246..b044b1fcfac9 100644
--- a/src/library_manager/llext_manager.c
+++ b/src/library_manager/llext_manager.c
@@ -125,7 +125,7 @@ static int llext_manager_load_data_from_storage(const struct sys_mm_drv_region *
 		int ret = llext_get_section_info(ldr, ext, i, &shdr, &s_region, &s_offset);
 
 		if (ret < 0) {
-			tr_err(lib_manager_tr, "no section info: %d", ret);
+			tr_err(&lib_manager_tr, "no section info: %d", ret);
 			continue;
 		}
 
@@ -760,6 +760,8 @@ int llext_manager_add_domain(const uint32_t component_id, struct k_mem_domain *d
 	const uint32_t entry_index = LIB_MANAGER_GET_MODULE_INDEX(module_id);
 	const unsigned int mod_idx = llext_manager_mod_find(ctx, entry_index);
 	struct lib_manager_module *mctx = ctx->mod + mod_idx;
+	const struct llext *ext = mctx->llext;
+	const struct llext_loader *ldr = &mctx->ebl->loader;
 
 	/* Executable code (.text) */
 	uintptr_t va_base_text = mctx->segment[LIB_MANAGER_TEXT].addr;
@@ -793,7 +795,90 @@ int llext_manager_add_domain(const uint32_t component_id, struct k_mem_domain *d
 			goto e_rodata;
 	}
 
+	elf_shdr_t shdr_cold, shdr_coldrodata;
+	bool rodata = false, text = false;
+	const void *rodata_addr = NULL, *text_addr = NULL;
+	size_t text_offset = 0, rodata_offset = 0;
+
+	shdr_cold.sh_size = 0;
+	shdr_coldrodata.sh_size = 0;
+
+	ret = llext_get_section_header(ldr, ext, ".cold", &shdr_cold);
+	if (ret < 0)
+		tr_warn(&lib_manager_tr, "couldn't get .cold header");
+	else
+		llext_get_region_info(ldr, ext, LLEXT_MEM_TEXT, NULL, &text_addr, NULL);
+
+	ret = llext_get_section_header(ldr, ext, ".coldrodata", &shdr_coldrodata);
+	if (ret < 0)
+		tr_warn(&lib_manager_tr, "couldn't get .coldrodata header");
+	else
+		llext_get_region_info(ldr, ext, LLEXT_MEM_RODATA, NULL, &rodata_addr, NULL);
+
+	for (unsigned int i = 0; i < llext_section_count(ext) && (!rodata || !text); i++) {
+		const elf_shdr_t *shdr;
+		enum llext_mem s_region = LLEXT_MEM_COUNT;
+		size_t s_offset = 0;
+
+		ret = llext_get_section_info(ldr, ext, i, &shdr, &s_region, &s_offset);
+		if (ret < 0)
+			continue;
+
+		switch (s_region) {
+		case LLEXT_MEM_TEXT:
+			if (shdr_cold.sh_size &&
+			    shdr->sh_name == shdr_cold.sh_name &&
+			    shdr->sh_offset == shdr_cold.sh_offset && !text) {
+				text = true;
+				text_offset = s_offset;
+			}
+			break;
+		case LLEXT_MEM_RODATA:
+			if (shdr_coldrodata.sh_size &&
+			    shdr->sh_name == shdr_coldrodata.sh_name &&
+			    shdr->sh_offset == shdr_coldrodata.sh_offset && !rodata) {
+				rodata = true;
+				rodata_offset = s_offset;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (text) {
+		tr_dbg(&lib_manager_tr, ".cold %#x @ %#lx",
+		       shdr_cold.sh_size, (uintptr_t)text_addr + text_offset);
+		ret = llext_manager_add_partition(domain, (uintptr_t)text_addr + text_offset,
+						  shdr_cold.sh_size,
+						  K_MEM_PARTITION_P_RX_U_RX);
+		if (ret < 0)
+			goto e_data;
+		mctx->segment[LIB_MANAGER_COLD].addr = (uintptr_t)text_addr + text_offset;
+		mctx->segment[LIB_MANAGER_COLD].size = shdr_cold.sh_size;
+	}
+
+	if (rodata) {
+		tr_dbg(&lib_manager_tr, ".coldrodata %#x @ %#lx",
+		       shdr_coldrodata.sh_size, (uintptr_t)rodata_addr + rodata_offset);
+		ret = llext_manager_add_partition(domain, (uintptr_t)rodata_addr + rodata_offset,
+						  shdr_coldrodata.sh_size,
+						  K_MEM_PARTITION_P_RO_U_RO);
+		if (ret < 0)
+			goto e_cold;
+		mctx->segment[LIB_MANAGER_COLDRODATA].addr = (uintptr_t)rodata_addr + rodata_offset;
+		mctx->segment[LIB_MANAGER_COLDRODATA].size = shdr_coldrodata.sh_size;
+	}
+
 	return 0;
+
+e_cold:
+	llext_manager_rm_partition(domain, (uintptr_t)text_addr + text_offset, shdr_cold.sh_size,
+				   K_MEM_PARTITION_P_RX_U_RX);
+	mctx->segment[LIB_MANAGER_COLD].addr = 0;
+	mctx->segment[LIB_MANAGER_COLD].size = 0;
+e_data:
+	llext_manager_rm_partition(domain, va_base_data, data_size, K_MEM_PARTITION_P_RW_U_RW);
 e_rodata:
 	llext_manager_rm_partition(domain, va_base_rodata, rodata_size, K_MEM_PARTITION_P_RO_U_RO);
 e_text:
@@ -848,6 +933,31 @@ int llext_manager_rm_domain(const uint32_t component_id, struct k_mem_domain *do
 		}
 	}
 
+	if (mctx->segment[LIB_MANAGER_COLD].addr) {
+		err = llext_manager_rm_partition(domain,
+						 mctx->segment[LIB_MANAGER_COLD].addr,
+						 mctx->segment[LIB_MANAGER_COLD].size,
+						 K_MEM_PARTITION_P_RX_U_RX);
+		if (err < 0) {
+			tr_err(&lib_manager_tr, "failed to remove .cold memory partition: %d", err);
+			if (!ret)
+				ret = err;
+		}
+	}
+
+	if (mctx->segment[LIB_MANAGER_COLDRODATA].addr) {
+		err = llext_manager_rm_partition(domain,
+						 mctx->segment[LIB_MANAGER_COLDRODATA].addr,
+						 mctx->segment[LIB_MANAGER_COLDRODATA].size,
+						 K_MEM_PARTITION_P_RO_U_RO);
+		if (err < 0) {
+			tr_err(&lib_manager_tr,
+			       "failed to remove .coldrodata memory partition: %d", err);
+			if (!ret)
+				ret = err;
+		}
+	}
+
 	return ret;
 }
 #endif
diff --git a/zephyr/include/sof/lib/memory.h b/zephyr/include/sof/lib/memory.h
index be01675951d5..6fa6a8ef558d 100644
--- a/zephyr/include/sof/lib/memory.h
+++ b/zephyr/include/sof/lib/memory.h
@@ -32,6 +32,9 @@ void dbg_path_cold_enter(const char *fn);
 
 static inline void __assert_can_be_cold(const char *fn)
 {
+	if (k_is_user_context())
+		return;
+
 	__ASSERT(!ll_sch_is_current(), "%s() called from an LL thread!", fn);
 	dbg_path_cold_enter(fn);
 }

From d30457fa7eed9cb31a249aa16bc9ec8780c764e0 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Mon, 15 Dec 2025 17:36:50 +0100
Subject: [PATCH 08/11] llext: temporary compilation fix waiting for Zephyr PR

While waiting for
https://github.com/zephyrproject-rtos/zephyr/pull/101073 to be
merged, need to drop const attribute.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/library_manager/llext_manager.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/library_manager/llext_manager.c b/src/library_manager/llext_manager.c
index b044b1fcfac9..b5f912fb16b9 100644
--- a/src/library_manager/llext_manager.c
+++ b/src/library_manager/llext_manager.c
@@ -803,13 +803,15 @@ int llext_manager_add_domain(const uint32_t component_id, struct k_mem_domain *d
 	shdr_cold.sh_size = 0;
 	shdr_coldrodata.sh_size = 0;
 
-	ret = llext_get_section_header(ldr, ext, ".cold", &shdr_cold);
+	ret = llext_get_section_header((struct llext_loader *)ldr, (struct llext *)ext,
+				       ".cold", &shdr_cold);
 	if (ret < 0)
 		tr_warn(&lib_manager_tr, "couldn't get .cold header");
 	else
 		llext_get_region_info(ldr, ext, LLEXT_MEM_TEXT, NULL, &text_addr, NULL);
 
-	ret = llext_get_section_header(ldr, ext, ".coldrodata", &shdr_coldrodata);
+	ret = llext_get_section_header((struct llext_loader *)ldr, (struct llext *)ext,
+				       ".coldrodata", &shdr_coldrodata);
 	if (ret < 0)
 		tr_warn(&lib_manager_tr, "couldn't get .coldrodata header");
 	else

From 16212dfd3450bf6290bc32609dc5320968a39935 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Fri, 5 Dec 2025 16:50:31 +0100
Subject: [PATCH 09/11] dp: switch all threads to userspace unless PROXY is
 used

If CONFIG_SOF_USERSPACE_PROXY isn't used all DP threads should run in
userspace mode.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 src/audio/pipeline/pipeline-schedule.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/audio/pipeline/pipeline-schedule.c b/src/audio/pipeline/pipeline-schedule.c
index a13e95982ccc..74f04f88d1dc 100644
--- a/src/audio/pipeline/pipeline-schedule.c
+++ b/src/audio/pipeline/pipeline-schedule.c
@@ -383,7 +383,6 @@ static enum task_state dp_task_run(void *data)
 
 int pipeline_comp_dp_task_init(struct comp_dev *comp)
 {
-	int ret;
 	/* DP tasks are guaranteed to have a module_adapter */
 	struct processing_module *mod = comp_mod(comp);
 	struct task_ops ops  = {
@@ -392,22 +391,17 @@ int pipeline_comp_dp_task_init(struct comp_dev *comp)
 		.complete	= NULL
 	};
 
-	if (!comp->task) {
-		ret = scheduler_dp_task_init(&comp->task,
-					     SOF_UUID(dp_task_uuid),
-					     &ops,
-					     mod,
-					     comp->ipc_config.core,
-					     TASK_DP_STACK_SIZE,
-#if CONFIG_USERSPACE
-					     mod->user_ctx ? K_USER : 
-#endif /* CONFIG_USERSPACE */
-					     0);
-		if (ret < 0)
-			return ret;
-	}
+	if (comp->task)
+		return 0;
 
-	return 0;
+#if CONFIG_SOF_USERSPACE_PROXY
+	unsigned int flags = mod->user_ctx ? K_USER : 0;
+#else
+	unsigned int flags = IS_ENABLED(CONFIG_USERSPACE) ? K_USER : 0;
+#endif
+
+	return scheduler_dp_task_init(&comp->task, SOF_UUID(dp_task_uuid), &ops, mod,
+				      comp->ipc_config.core, TASK_DP_STACK_SIZE, flags);
 }
 #endif /* CONFIG_ZEPHYR_DP_SCHEDULER */
 

From ecfa1e2ecc1acfcdcd7a16a66314f943cbe7075a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Fri, 19 Dec 2025 11:49:50 +0100
Subject: [PATCH 10/11] ptl: increase the number of L2 page tables

With userspace enabled we easily use up the currently configured for
ACE 3.0 by Zephyr 64 L2 page tables when running tests with multiple
pipelines on multiple cores with userspace enabled. Double the number
to cover current test cases.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 app/boards/intel_adsp_ace30_ptl.conf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/boards/intel_adsp_ace30_ptl.conf b/app/boards/intel_adsp_ace30_ptl.conf
index 2a77e29cf86b..ac78c331b83b 100644
--- a/app/boards/intel_adsp_ace30_ptl.conf
+++ b/app/boards/intel_adsp_ace30_ptl.conf
@@ -53,6 +53,7 @@ CONFIG_DMA_DW_LLI_POOL_SIZE=50
 CONFIG_MEMORY_WIN_2_SIZE=12288
 CONFIG_MM_DRV_INTEL_ADSP_TLB_REMAP_UNUSED_RAM=y
 CONFIG_MM_DRV_INTEL_VIRTUAL_REGION_COUNT=2
+CONFIG_XTENSA_MMU_NUM_L2_TABLES=128
 CONFIG_SYS_CLOCK_TICKS_PER_SEC=12000
 
 # Zephyr / power settings

From 9d9102c32835a71646529571c85368d2cba9af94 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Tue, 9 Dec 2025 15:11:06 +0100
Subject: [PATCH 11/11] DP: userspace: switch to "application" mode for Linux
 builds

Linux SOF builds should use the "application" userspace
implementation for DP.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
---
 app/os_linux_overlay.conf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/os_linux_overlay.conf b/app/os_linux_overlay.conf
index a1399d4ebe2c..e862a1cfdcc8 100644
--- a/app/os_linux_overlay.conf
+++ b/app/os_linux_overlay.conf
@@ -6,3 +6,4 @@
 # SOF Linux driver does not require FW to retain its
 # state, so context save can be disabled
 CONFIG_ADSP_IMR_CONTEXT_SAVE=n
+CONFIG_SOF_USERSPACE_PROXY=n