diff --git a/app/boards/intel_adsp_ace30_ptl.conf b/app/boards/intel_adsp_ace30_ptl.conf
index 2a77e29cf86b..ac78c331b83b 100644
--- a/app/boards/intel_adsp_ace30_ptl.conf
+++ b/app/boards/intel_adsp_ace30_ptl.conf
@@ -53,6 +53,7 @@ CONFIG_DMA_DW_LLI_POOL_SIZE=50
 CONFIG_MEMORY_WIN_2_SIZE=12288
 CONFIG_MM_DRV_INTEL_ADSP_TLB_REMAP_UNUSED_RAM=y
 CONFIG_MM_DRV_INTEL_VIRTUAL_REGION_COUNT=2
+CONFIG_XTENSA_MMU_NUM_L2_TABLES=128
 CONFIG_SYS_CLOCK_TICKS_PER_SEC=12000
 
 # Zephyr / power settings
diff --git a/app/os_linux_overlay.conf b/app/os_linux_overlay.conf
index a1399d4ebe2c..e862a1cfdcc8 100644
--- a/app/os_linux_overlay.conf
+++ b/app/os_linux_overlay.conf
@@ -6,3 +6,4 @@
 # SOF Linux driver does not require FW to retain its
 # state, so context save can be disabled
 CONFIG_ADSP_IMR_CONTEXT_SAVE=n
+CONFIG_SOF_USERSPACE_PROXY=n
diff --git a/src/audio/buffers/comp_buffer.c b/src/audio/buffers/comp_buffer.c
index d3734b8c5906..6d0113e96252 100644
--- a/src/audio/buffers/comp_buffer.c
+++ b/src/audio/buffers/comp_buffer.c
@@ -18,6 +18,7 @@
 #include <rtos/cache.h>
 #include <sof/lib/notifier.h>
 #include <sof/list.h>
+#include <sof/schedule/dp_schedule.h>
 #include <rtos/spinlock.h>
 #include <rtos/symbol.h>
 #include <ipc/topology.h>
@@ -158,8 +159,16 @@ static void comp_buffer_free(struct sof_audio_buffer *audio_buffer)
 	/* In case some listeners didn't unregister from buffer's callbacks */
 	notifier_unregister_all(NULL, buffer);
 
+	struct k_heap *heap = buffer->audio_buffer.heap;
+
 	rfree(buffer->stream.addr);
-	sof_heap_free(buffer->audio_buffer.heap, buffer);
+	sof_heap_free(heap, buffer);
+	if (heap) {
+		struct dp_heap_user *mod_heap_user = container_of(heap, struct dp_heap_user, heap);
+
+		if (!--mod_heap_user->client_count)
+			rfree(mod_heap_user);
+	}
 }
 
 APP_TASK_DATA static const struct source_ops comp_buffer_source_ops = {
diff --git a/src/audio/module_adapter/module/generic.c b/src/audio/module_adapter/module/generic.c
index 5b8eb8e43ee6..892422b5a7d4 100644
--- a/src/audio/module_adapter/module/generic.c
+++ b/src/audio/module_adapter/module/generic.c
@@ -12,15 +12,22 @@
  */
 
 #include <rtos/symbol.h>
-
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/audio/data_blob.h>
 #include <sof/lib/fast-get.h>
+#include <sof/schedule/dp_schedule.h>
+#if CONFIG_IPC_MAJOR_4
+#include <ipc4/header.h>
+#include <ipc4/module.h>
+#include <ipc4/pipeline.h>
+#endif
 
 /* The __ZEPHYR__ condition is to keep cmocka tests working */
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
-#define MEM_API_CHECK_THREAD(res) __ASSERT((res)->rsrc_mngr == k_current_get(), \
-		"Module memory API operation from wrong thread")
+#define MEM_API_CHECK_THREAD(res) do { \
+	if ((res)->rsrc_mngr != k_current_get()) \
+		LOG_WRN("mngr %p != cur %p", (res)->rsrc_mngr, k_current_get()); \
+} while (0)
 #else
 #define MEM_API_CHECK_THREAD(res)
 #endif
@@ -71,7 +78,7 @@ int module_load_config(struct comp_dev *dev, const void *cfg, size_t size)
 	return ret;
 }
 
-static void mod_resource_init(struct processing_module *mod)
+void mod_resource_init(struct processing_module *mod)
 {
 	struct module_data *md = &mod->priv;
 	/* Init memory list */
@@ -109,12 +116,17 @@ int module_init(struct processing_module *mod)
 		return -EIO;
 	}
 
-	mod_resource_init(mod);
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
 	mod->priv.resources.rsrc_mngr = k_current_get();
 #endif
 	/* Now we can proceed with module specific initialization */
-	ret = interface->init(mod);
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP)
+		ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_INIT_INSTANCE, NULL);
+	else
+#endif
+		ret = interface->init(mod);
+
 	if (ret) {
 		comp_err(dev, "error %d: module specific init failed", ret);
 		mod_free_all(mod);
@@ -168,6 +180,19 @@ static void container_put(struct processing_module *mod, struct module_resource
 	list_item_append(&container->list, &res->free_cont_list);
 }
 
+#if CONFIG_USERSPACE
+void mod_heap_info(struct processing_module *mod, size_t *size, uintptr_t *start)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	if (size)
+		*size = res->heap->heap.init_bytes;
+
+	if (start)
+		*start = (uintptr_t)container_of(res->heap, struct dp_heap_user, heap);
+}
+#endif
+
 /**
  * Allocates aligned buffer memory block for module.
  * @param mod		Pointer to the module this memory block is allocated for.
@@ -230,7 +255,8 @@ EXPORT_SYMBOL(mod_balloc_align);
  *
  * The allocated memory is automatically freed when the module is unloaded.
  */
-void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size, size_t alignment)
+void *z_impl_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -268,7 +294,7 @@ void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
 
 	return ptr;
 }
-EXPORT_SYMBOL(mod_alloc_ext);
+EXPORT_SYMBOL(z_impl_mod_alloc_ext);
 
 /**
  * Creates a blob handler and releases it when the module is unloaded
@@ -314,7 +340,8 @@ EXPORT_SYMBOL(mod_data_blob_handler_new);
  * Like fast_get() but the handler is automatically freed.
  */
 #if CONFIG_FAST_GET
-const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr, size_t size)
+const void *z_impl_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -339,7 +366,7 @@ const void *mod_fast_get(struct processing_module *mod, const void * const dram_
 
 	return ptr;
 }
-EXPORT_SYMBOL(mod_fast_get);
+EXPORT_SYMBOL(z_impl_mod_fast_get);
 #endif
 
 static int free_contents(struct processing_module *mod, struct module_resource *container)
@@ -372,7 +399,7 @@ static int free_contents(struct processing_module *mod, struct module_resource *
  * @param mod	Pointer to module this memory block was allocated for.
  * @param ptr	Pointer to the memory block.
  */
-int mod_free(struct processing_module *mod, const void *ptr)
+int z_impl_mod_free(struct processing_module *mod, const void *ptr)
 {
 	struct module_resources *res = &mod->priv.resources;
 	struct module_resource *container;
@@ -398,7 +425,46 @@ int mod_free(struct processing_module *mod, const void *ptr)
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(mod_free);
+EXPORT_SYMBOL(z_impl_mod_free);
+
+#ifdef CONFIG_USERSPACE
+#include <zephyr/internal/syscall_handler.h>
+const void *z_vrfy_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+	K_OOPS(K_SYSCALL_MEMORY_READ(dram_ptr, size));
+
+	return z_impl_mod_fast_get(mod, dram_ptr, size);
+}
+#include <zephyr/syscalls/mod_fast_get_mrsh.c>
+
+void *z_vrfy_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+
+	return z_impl_mod_alloc_ext(mod, flags, size, alignment);
+}
+#include <zephyr/syscalls/mod_alloc_ext_mrsh.c>
+
+int z_vrfy_mod_free(struct processing_module *mod, const void *ptr)
+{
+	struct module_resources *res = &mod->priv.resources;
+
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(mod, sizeof(*mod)));
+	K_OOPS(K_SYSCALL_MEMORY_WRITE(res->heap, sizeof(*res->heap)));
+
+	return z_impl_mod_free(mod, ptr);
+}
+#include <zephyr/syscalls/mod_free_mrsh.c>
+#endif
 
 #if CONFIG_COMP_BLOB
 void mod_data_blob_handler_free(struct processing_module *mod, struct comp_data_blob_handler *dbh)
@@ -433,7 +499,24 @@ int module_prepare(struct processing_module *mod,
 		return -EPERM;
 #endif
 	if (ops->prepare) {
-		int ret = ops->prepare(mod, sources, num_of_sources, sinks, num_of_sinks);
+		int ret;
+
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state = {
+					.trigger_cmd = COMP_TRIGGER_PREPARE,
+					.state = SOF_IPC4_PIPELINE_STATE_RUNNING,
+					.n_sources = num_of_sources,
+					.sources = sources,
+					.n_sinks = num_of_sinks,
+					.sinks = sinks,
+				},
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE, &param);
+		} else
+#endif
+			ret = ops->prepare(mod, sources, num_of_sources, sinks, num_of_sinks);
 
 		if (ret) {
 			comp_err(dev, "error %d: module specific prepare failed", ret);
@@ -552,11 +635,23 @@ int module_reset(struct processing_module *mod)
 	if (md->state < MODULE_IDLE)
 		return 0;
 #endif
+
 	/* cancel task if DP task*/
-	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP && mod->dev->task)
+	if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP && mod->dev->task &&
+	    (IS_ENABLED(CONFIG_SOF_USERSPACE_PROXY) || !IS_ENABLED(CONFIG_USERSPACE)))
 		schedule_task_cancel(mod->dev->task);
+
 	if (ops->reset) {
-		ret = ops->reset(mod);
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state.trigger_cmd = COMP_TRIGGER_STOP,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE, &param);
+		} else
+#endif
+			ret = ops->reset(mod);
+
 		if (ret) {
 			if (ret != PPL_STATUS_PATH_STOP)
 				comp_err(mod->dev,
@@ -627,7 +722,8 @@ int module_free(struct processing_module *mod)
 	struct module_data *md = &mod->priv;
 	int ret = 0;
 
-	if (ops->free) {
+	if (ops->free && (mod->dev->ipc_config.proc_domain != COMP_PROCESSING_DOMAIN_DP ||
+			  IS_ENABLED(CONFIG_SOF_USERSPACE_PROXY) || !IS_ENABLED(CONFIG_USERSPACE))) {
 		ret = ops->free(mod);
 		if (ret)
 			comp_warn(mod->dev, "error: %d", ret);
@@ -772,8 +868,17 @@ int module_bind(struct processing_module *mod, struct bind_info *bind_data)
 	if (ret)
 		return ret;
 
-	if (ops->bind)
-		ret = ops->bind(mod, bind_data);
+	if (ops->bind) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.bind_data = bind_data,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_BIND, &param);
+		} else
+#endif
+			ret = ops->bind(mod, bind_data);
+	}
 
 	return ret;
 }
@@ -796,8 +901,17 @@ int module_unbind(struct processing_module *mod, struct bind_info *unbind_data)
 	if (ret)
 		return ret;
 
-	if (ops->unbind)
-		ret = ops->unbind(mod, unbind_data);
+	if (ops->unbind) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			const union scheduler_dp_thread_ipc_param param = {
+				.bind_data = unbind_data,
+			};
+			ret = scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_UNBIND, &param);
+		} else
+#endif
+			ret = ops->unbind(mod, unbind_data);
+	}
 
 	return ret;
 }
diff --git a/src/audio/module_adapter/module_adapter.c b/src/audio/module_adapter/module_adapter.c
index 6d63fb561d01..82d6b2706159 100644
--- a/src/audio/module_adapter/module_adapter.c
+++ b/src/audio/module_adapter/module_adapter.c
@@ -18,10 +18,16 @@
 #include <sof/audio/source_api.h>
 #include <sof/audio/audio_buffer.h>
 #include <sof/audio/pipeline.h>
+#include <sof/schedule/dp_schedule.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <sof/common.h>
 #include <sof/platform.h>
 #include <sof/ut.h>
+#if CONFIG_IPC_MAJOR_4
+#include <ipc4/base_fw.h>
+#include <ipc4/header.h>
+#include <ipc4/module.h>
+#endif
 #include <rtos/interrupt.h>
 #include <rtos/kernel.h>
 #include <rtos/symbol.h>
@@ -52,30 +58,33 @@ struct comp_dev *module_adapter_new(const struct comp_driver *drv,
 #define PAGE_SZ HOST_PAGE_SIZE
 #endif
 
-static struct k_heap *module_adapter_dp_heap_new(const struct comp_ipc_config *config)
+static struct dp_heap_user *module_adapter_dp_heap_new(const struct comp_ipc_config *config,
+						       size_t *heap_size)
 {
 	/* src-lite with 8 channels has been seen allocating 14k in one go */
 	/* FIXME: the size will be derived from configuration */
-	const size_t heap_size = 20 * 1024;
+	const size_t buf_size = 20 * 1024;
 
 	/* Keep uncached to match the default SOF heap! */
 	uint8_t *mod_heap_mem = rballoc_align(SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
-					      heap_size, PAGE_SZ);
+					      buf_size, PAGE_SZ);
 
 	if (!mod_heap_mem)
 		return NULL;
 
-	struct k_heap *mod_heap = (struct k_heap *)mod_heap_mem;
-	const size_t heap_prefix_size = ALIGN_UP(sizeof(*mod_heap), 8);
+	struct dp_heap_user *mod_heap_user = (struct dp_heap_user *)mod_heap_mem;
+	struct k_heap *mod_heap = &mod_heap_user->heap;
+	const size_t heap_prefix_size = ALIGN_UP(sizeof(*mod_heap_user), 4);
 	void *mod_heap_buf = mod_heap_mem + heap_prefix_size;
 
-	k_heap_init(mod_heap, mod_heap_buf, heap_size - heap_prefix_size);
+	*heap_size = buf_size - heap_prefix_size;
+	k_heap_init(mod_heap, mod_heap_buf, *heap_size);
 #ifdef __ZEPHYR__
 	mod_heap->heap.init_mem = mod_heap_buf;
-	mod_heap->heap.init_bytes = heap_size - heap_prefix_size;
+	mod_heap->heap.init_bytes = *heap_size;
 #endif
 
-	return mod_heap;
+	return mod_heap_user;
 }
 
 static struct processing_module *module_adapter_mem_alloc(const struct comp_driver *drv,
@@ -91,16 +100,21 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 	 */
 	uint32_t flags = config->proc_domain == COMP_PROCESSING_DOMAIN_DP ?
 		SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT : SOF_MEM_FLAG_USER;
+	struct dp_heap_user *mod_heap_user;
+	size_t heap_size;
 
 	if (config->proc_domain == COMP_PROCESSING_DOMAIN_DP && IS_ENABLED(CONFIG_USERSPACE) &&
 	    !IS_ENABLED(CONFIG_SOF_USERSPACE_USE_DRIVER_HEAP)) {
-		mod_heap = module_adapter_dp_heap_new(config);
-		if (!mod_heap) {
+		mod_heap_user = module_adapter_dp_heap_new(config, &heap_size);
+		if (!mod_heap_user) {
 			comp_cl_err(drv, "Failed to allocate DP module heap");
 			return NULL;
 		}
+		mod_heap = &mod_heap_user->heap;
 	} else {
 		mod_heap = drv->user_heap;
+		mod_heap_user = NULL;
+		heap_size = 0;
 	}
 
 	struct processing_module *mod = sof_heap_alloc(mod_heap, flags, sizeof(*mod), 0);
@@ -112,6 +126,7 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 
 	memset(mod, 0, sizeof(*mod));
 	mod->priv.resources.heap = mod_heap;
+	mod_resource_init(mod);
 
 	/*
 	 * Would be difficult to optimize the allocation to use cache. Only if
@@ -132,13 +147,15 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 	mod->dev = dev;
 	dev->mod = mod;
 
+	if (mod_heap_user)
+		mod_heap_user->client_count++;
+
 	return mod;
 
 err:
 	sof_heap_free(mod_heap, mod);
 emod:
-	if (mod_heap != drv->user_heap)
-		rfree(mod_heap);
+	rfree(mod_heap_user);
 
 	return NULL;
 }
@@ -146,12 +163,24 @@ static struct processing_module *module_adapter_mem_alloc(const struct comp_driv
 static void module_adapter_mem_free(struct processing_module *mod)
 {
 	struct k_heap *mod_heap = mod->priv.resources.heap;
+	unsigned int domain = mod->dev->ipc_config.proc_domain;
 
+	/*
+	 * In principle it shouldn't even be needed to free individual objects
+	 * on the module heap since we're freeing the heap itself too
+	 */
 #if CONFIG_IPC_MAJOR_4
 	sof_heap_free(mod_heap, mod->priv.cfg.input_pins);
 #endif
 	sof_heap_free(mod_heap, mod->dev);
 	sof_heap_free(mod_heap, mod);
+	if (domain == COMP_PROCESSING_DOMAIN_DP) {
+		struct dp_heap_user *mod_heap_user = container_of(mod_heap, struct dp_heap_user,
+								  heap);
+
+		if (mod_heap && !--mod_heap_user->client_count)
+			rfree(mod_heap_user);
+	}
 }
 
 /*
@@ -565,6 +594,14 @@ int module_adapter_prepare(struct comp_dev *dev)
 				goto free;
 			}
 
+			if (md->resources.heap && md->resources.heap != dev->drv->user_heap) {
+				struct dp_heap_user *dp_user = container_of(md->resources.heap,
+									    struct dp_heap_user,
+									    heap);
+
+				dp_user->client_count++;
+			}
+
 			irq_local_disable(flags);
 			list_item_prepend(&buffer->buffers_list, &mod->raw_data_buffers_list);
 			irq_local_enable(flags);
@@ -1296,8 +1333,20 @@ int module_adapter_trigger(struct comp_dev *dev, int cmd)
 		dev->state = COMP_STATE_ACTIVE;
 		return PPL_STATUS_PATH_STOP;
 	}
-	if (interface->trigger)
+
+	if (interface->trigger) {
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		if (dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) {
+			/* Process DP module's trigger */
+			const union scheduler_dp_thread_ipc_param param = {
+				.pipeline_state.trigger_cmd = cmd,
+			};
+			return scheduler_dp_thread_ipc(mod, SOF_IPC4_GLB_SET_PIPELINE_STATE,
+						       &param);
+		}
+#endif
 		return interface->trigger(mod, cmd);
+	}
 
 	return module_adapter_set_state(mod, dev, cmd);
 }
@@ -1359,8 +1408,18 @@ void module_adapter_free(struct comp_dev *dev)
 
 	comp_dbg(dev, "start");
 
-	if (dev->task)
-		schedule_task_cancel(dev->task);
+	if (dev->task) {
+		/*
+		 * Run DP module's .free() method in its thread context.
+		 * Unlike with other IPCs we first run module's .free() in
+		 * thread context, then cancel the thread, and then execute
+		 * final clean up
+		 */
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
+		scheduler_dp_thread_ipc(mod, SOF_IPC4_MOD_DELETE_INSTANCE, NULL);
+#endif
+		schedule_task_free(dev->task);
+	}
 
 	ret = module_free(mod);
 	if (ret)
diff --git a/src/audio/pipeline/pipeline-schedule.c b/src/audio/pipeline/pipeline-schedule.c
index a13e95982ccc..74f04f88d1dc 100644
--- a/src/audio/pipeline/pipeline-schedule.c
+++ b/src/audio/pipeline/pipeline-schedule.c
@@ -383,7 +383,6 @@ static enum task_state dp_task_run(void *data)
 
 int pipeline_comp_dp_task_init(struct comp_dev *comp)
 {
-	int ret;
 	/* DP tasks are guaranteed to have a module_adapter */
 	struct processing_module *mod = comp_mod(comp);
 	struct task_ops ops  = {
@@ -392,22 +391,17 @@ int pipeline_comp_dp_task_init(struct comp_dev *comp)
 		.complete	= NULL
 	};
 
-	if (!comp->task) {
-		ret = scheduler_dp_task_init(&comp->task,
-					     SOF_UUID(dp_task_uuid),
-					     &ops,
-					     mod,
-					     comp->ipc_config.core,
-					     TASK_DP_STACK_SIZE,
-#if CONFIG_USERSPACE
-					     mod->user_ctx ? K_USER : 
-#endif /* CONFIG_USERSPACE */
-					     0);
-		if (ret < 0)
-			return ret;
-	}
+	if (comp->task)
+		return 0;
 
-	return 0;
+#if CONFIG_SOF_USERSPACE_PROXY
+	unsigned int flags = mod->user_ctx ? K_USER : 0;
+#else
+	unsigned int flags = IS_ENABLED(CONFIG_USERSPACE) ? K_USER : 0;
+#endif
+
+	return scheduler_dp_task_init(&comp->task, SOF_UUID(dp_task_uuid), &ops, mod,
+				      comp->ipc_config.core, TASK_DP_STACK_SIZE, flags);
 }
 #endif /* CONFIG_ZEPHYR_DP_SCHEDULER */
 
diff --git a/src/include/sof/audio/component_ext.h b/src/include/sof/audio/component_ext.h
index 55b8dc9e4f40..f0a1ad7e8512 100644
--- a/src/include/sof/audio/component_ext.h
+++ b/src/include/sof/audio/component_ext.h
@@ -44,17 +44,15 @@ struct comp_dev *comp_new_ipc4(struct ipc4_module_init_instance *module_init);
 /** See comp_ops::free */
 static inline void comp_free(struct comp_dev *dev)
 {
-	assert(dev->drv->ops.free);
-
-	/* free task if shared component or DP task*/
-	if ((dev->is_shared || dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_DP) &&
-	    dev->task) {
-		schedule_task_free(dev->task);
-		sof_heap_free(dev->drv->user_heap, dev->task);
-		dev->task = NULL;
-	}
+	const struct comp_driver *drv = dev->drv;
+
+	assert(drv->ops.free);
 
-	dev->drv->ops.free(dev);
+	/*
+	 * In DP case this will run in DP thread context, so the task can only
+	 * be freed after this.
+	 */
+	drv->ops.free(dev);
 }
 
 /**
diff --git a/src/include/sof/audio/module_adapter/module/generic.h b/src/include/sof/audio/module_adapter/module/generic.h
index f339ac158ab0..a283b6830e32 100644
--- a/src/include/sof/audio/module_adapter/module/generic.h
+++ b/src/include/sof/audio/module_adapter/module/generic.h
@@ -23,6 +23,7 @@
 #if CONFIG_MODULE_MEMORY_API_DEBUG && defined(__ZEPHYR__)
 #include <zephyr/kernel/thread.h>
 #endif
+#include <sof/compiler_attributes.h>
 
 /*
  * helpers to determine processing type
@@ -190,7 +191,19 @@ struct module_processing_data {
 int module_load_config(struct comp_dev *dev, const void *cfg, size_t size);
 int module_init(struct processing_module *mod);
 void *mod_balloc_align(struct processing_module *mod, size_t size, size_t alignment);
-void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size, size_t alignment);
+void mod_resource_init(struct processing_module *mod);
+void mod_heap_info(struct processing_module *mod, size_t *size, uintptr_t *start);
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
+__syscall void *mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			      size_t alignment);
+__syscall int mod_free(struct processing_module *mod, const void *ptr);
+#else
+void *z_impl_mod_alloc_ext(struct processing_module *mod, uint32_t flags, size_t size,
+			   size_t alignment);
+int z_impl_mod_free(struct processing_module *mod, const void *ptr);
+#define mod_alloc_ext z_impl_mod_alloc_ext
+#define mod_free z_impl_mod_free
+#endif
 
 /**
  * Allocates aligned memory block for module.
@@ -226,13 +239,19 @@ static inline void *mod_zalloc(struct processing_module *mod, size_t size)
 	return ret;
 }
 
-int mod_free(struct processing_module *mod, const void *ptr);
 #if CONFIG_COMP_BLOB
 struct comp_data_blob_handler *mod_data_blob_handler_new(struct processing_module *mod);
 void mod_data_blob_handler_free(struct processing_module *mod, struct comp_data_blob_handler *dbh);
 #endif
 #if CONFIG_FAST_GET
-const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr, size_t size);
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
+__syscall const void *mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				   size_t size);
+#else
+const void *z_impl_mod_fast_get(struct processing_module *mod, const void * const dram_ptr,
+				size_t size);
+#define mod_fast_get z_impl_mod_fast_get
+#endif
 void mod_fast_put(struct processing_module *mod, const void *sram_ptr);
 #endif
 void mod_free_all(struct processing_module *mod);
@@ -449,4 +468,8 @@ static inline uint32_t module_get_lpt(struct processing_module *mod)
 	return mod->dev->period;
 }
 
+#if defined(__ZEPHYR__) && defined(CONFIG_SOF_FULL_ZEPHYR_APPLICATION)
+#include <zephyr/syscalls/generic.h>
+#endif
+
 #endif /* __SOF_AUDIO_MODULE_GENERIC__ */
diff --git a/src/include/sof/lib_manager.h b/src/include/sof/lib_manager.h
index 83c3efb4d890..f1ead3028984 100644
--- a/src/include/sof/lib_manager.h
+++ b/src/include/sof/lib_manager.h
@@ -91,6 +91,8 @@ enum {
 	LIB_MANAGER_DATA,
 	LIB_MANAGER_RODATA,
 	LIB_MANAGER_BSS,
+	LIB_MANAGER_COLD,
+	LIB_MANAGER_COLDRODATA,
 	LIB_MANAGER_N_SEGMENTS,
 };
 
diff --git a/src/include/sof/schedule/dp_schedule.h b/src/include/sof/schedule/dp_schedule.h
index 99f1dcd2b16f..27afcf9e5d01 100644
--- a/src/include/sof/schedule/dp_schedule.h
+++ b/src/include/sof/schedule/dp_schedule.h
@@ -13,6 +13,8 @@
 #include <user/trace.h>
 #include <stdint.h>
 #include <ipc4/base_fw.h>
+#include <ipc4/module.h>
+#include <ipc4/pipeline.h>
 
 struct processing_module;
 
@@ -89,6 +91,49 @@ void scheduler_get_task_info_dp(struct scheduler_props *scheduler_props,
 enum {
 	DP_TASK_EVENT_PROCESS	= BIT(0),	/* Need to process data */
 	DP_TASK_EVENT_CANCEL	= BIT(1),	/* Thread cancellation */
+	DP_TASK_EVENT_IPC	= BIT(2),	/* IPC message */
 };
 
+struct bind_info;
+struct sof_source;
+struct sof_sink;
+/*
+ * Keeps the scheduler_dp_thread_ipc() flow simple - just one call that does all
+ * the IPC-message specific parameter packing internally. This is slightly
+ * suboptimal because IPC parameters first have to be collected in this
+ * structure and then packed in DP-accessible memory inside
+ * scheduler_dp_thread_ipc(). This could be split into two levels, by adding
+ * IPC-specific functions like ipc_flatten_pipeline_state() and similar, but
+ * that would add multiple functions to the API.
+ */
+union scheduler_dp_thread_ipc_param {
+	struct bind_info *bind_data;
+	struct {
+		unsigned int trigger_cmd;
+		enum ipc4_pipeline_state state;
+		int n_sources;
+		struct sof_source **sources;
+		int n_sinks;
+		struct sof_sink **sinks;
+	} pipeline_state;
+};
+
+struct dp_heap_user {
+	struct k_heap heap;
+	/* So far relying on linear processing of serialized IPCs, but might need protection */
+	unsigned int client_count;	/* devices and buffers */
+};
+
+#if CONFIG_ZEPHYR_DP_SCHEDULER
+int scheduler_dp_thread_ipc(struct processing_module *pmod, unsigned int cmd,
+			    const union scheduler_dp_thread_ipc_param *param);
+#else
+static inline int scheduler_dp_thread_ipc(struct processing_module *pmod,
+					  unsigned int cmd,
+					  const union scheduler_dp_thread_ipc_param *param)
+{
+	return 0;
+}
+#endif
+
 #endif /* __SOF_SCHEDULE_DP_SCHEDULE_H__ */
diff --git a/src/ipc/ipc4/helper.c b/src/ipc/ipc4/helper.c
index 55ec4483ff1d..0bc49a7f0df3 100644
--- a/src/ipc/ipc4/helper.c
+++ b/src/ipc/ipc4/helper.c
@@ -23,6 +23,7 @@
 #include <sof/lib/memory.h>
 #include <sof/list.h>
 #include <sof/platform.h>
+#include <sof/schedule/dp_schedule.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <rtos/symbol.h>
 #include <rtos/wait.h>
@@ -601,6 +602,14 @@ __cold int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
 		return IPC4_OUT_OF_MEMORY;
 	}
 
+#if CONFIG_ZEPHYR_DP_SCHEDULER
+	if (dp_heap) {
+		struct dp_heap_user *dp_user = container_of(dp_heap, struct dp_heap_user, heap);
+
+		dp_user->client_count++;
+	}
+#endif
+
 	/*
 	 * set min_free_space and min_available in sink/src api of created buffer.
 	 * buffer is connected like:
diff --git a/src/library_manager/llext_manager.c b/src/library_manager/llext_manager.c
index 41b8bb4c9246..b5f912fb16b9 100644
--- a/src/library_manager/llext_manager.c
+++ b/src/library_manager/llext_manager.c
@@ -125,7 +125,7 @@ static int llext_manager_load_data_from_storage(const struct sys_mm_drv_region *
 		int ret = llext_get_section_info(ldr, ext, i, &shdr, &s_region, &s_offset);
 
 		if (ret < 0) {
-			tr_err(lib_manager_tr, "no section info: %d", ret);
+			tr_err(&lib_manager_tr, "no section info: %d", ret);
 			continue;
 		}
 
@@ -760,6 +760,8 @@ int llext_manager_add_domain(const uint32_t component_id, struct k_mem_domain *d
 	const uint32_t entry_index = LIB_MANAGER_GET_MODULE_INDEX(module_id);
 	const unsigned int mod_idx = llext_manager_mod_find(ctx, entry_index);
 	struct lib_manager_module *mctx = ctx->mod + mod_idx;
+	const struct llext *ext = mctx->llext;
+	const struct llext_loader *ldr = &mctx->ebl->loader;
 
 	/* Executable code (.text) */
 	uintptr_t va_base_text = mctx->segment[LIB_MANAGER_TEXT].addr;
@@ -793,7 +795,92 @@ int llext_manager_add_domain(const uint32_t component_id, struct k_mem_domain *d
 			goto e_rodata;
 	}
 
+	elf_shdr_t shdr_cold, shdr_coldrodata;
+	bool rodata = false, text = false;
+	const void *rodata_addr = NULL, *text_addr = NULL;
+	size_t text_offset = 0, rodata_offset = 0;
+
+	shdr_cold.sh_size = 0;
+	shdr_coldrodata.sh_size = 0;
+
+	ret = llext_get_section_header((struct llext_loader *)ldr, (struct llext *)ext,
+				       ".cold", &shdr_cold);
+	if (ret < 0)
+		tr_warn(&lib_manager_tr, "couldn't get .cold header");
+	else
+		llext_get_region_info(ldr, ext, LLEXT_MEM_TEXT, NULL, &text_addr, NULL);
+
+	ret = llext_get_section_header((struct llext_loader *)ldr, (struct llext *)ext,
+				       ".coldrodata", &shdr_coldrodata);
+	if (ret < 0)
+		tr_warn(&lib_manager_tr, "couldn't get .coldrodata header");
+	else
+		llext_get_region_info(ldr, ext, LLEXT_MEM_RODATA, NULL, &rodata_addr, NULL);
+
+	for (unsigned int i = 0; i < llext_section_count(ext) && (!rodata || !text); i++) {
+		const elf_shdr_t *shdr;
+		enum llext_mem s_region = LLEXT_MEM_COUNT;
+		size_t s_offset = 0;
+
+		ret = llext_get_section_info(ldr, ext, i, &shdr, &s_region, &s_offset);
+		if (ret < 0)
+			continue;
+
+		switch (s_region) {
+		case LLEXT_MEM_TEXT:
+			if (shdr_cold.sh_size &&
+			    shdr->sh_name == shdr_cold.sh_name &&
+			    shdr->sh_offset == shdr_cold.sh_offset && !text) {
+				text = true;
+				text_offset = s_offset;
+			}
+			break;
+		case LLEXT_MEM_RODATA:
+			if (shdr_coldrodata.sh_size &&
+			    shdr->sh_name == shdr_coldrodata.sh_name &&
+			    shdr->sh_offset == shdr_coldrodata.sh_offset && !rodata) {
+				rodata = true;
+				rodata_offset = s_offset;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (text) {
+		tr_dbg(&lib_manager_tr, ".cold %#x @ %#lx",
+		       shdr_cold.sh_size, (uintptr_t)text_addr + text_offset);
+		ret = llext_manager_add_partition(domain, (uintptr_t)text_addr + text_offset,
+						  shdr_cold.sh_size,
+						  K_MEM_PARTITION_P_RX_U_RX);
+		if (ret < 0)
+			goto e_data;
+		mctx->segment[LIB_MANAGER_COLD].addr = (uintptr_t)text_addr + text_offset;
+		mctx->segment[LIB_MANAGER_COLD].size = shdr_cold.sh_size;
+	}
+
+	if (rodata) {
+		tr_dbg(&lib_manager_tr, ".coldrodata %#x @ %#lx",
+		       shdr_coldrodata.sh_size, (uintptr_t)rodata_addr + rodata_offset);
+		ret = llext_manager_add_partition(domain, (uintptr_t)rodata_addr + rodata_offset,
+						  shdr_coldrodata.sh_size,
+						  K_MEM_PARTITION_P_RO_U_RO);
+		if (ret < 0)
+			goto e_cold;
+		mctx->segment[LIB_MANAGER_COLDRODATA].addr = (uintptr_t)rodata_addr + rodata_offset;
+		mctx->segment[LIB_MANAGER_COLDRODATA].size = shdr_coldrodata.sh_size;
+	}
+
 	return 0;
+
+e_cold:
+	llext_manager_rm_partition(domain, (uintptr_t)text_addr + text_offset, shdr_cold.sh_size,
+				   K_MEM_PARTITION_P_RX_U_RX);
+	mctx->segment[LIB_MANAGER_COLD].addr = 0;
+	mctx->segment[LIB_MANAGER_COLD].size = 0;
+e_data:
+	llext_manager_rm_partition(domain, va_base_data, data_size, K_MEM_PARTITION_P_RW_U_RW);
 e_rodata:
 	llext_manager_rm_partition(domain, va_base_rodata, rodata_size, K_MEM_PARTITION_P_RO_U_RO);
 e_text:
@@ -848,6 +935,31 @@ int llext_manager_rm_domain(const uint32_t component_id, struct k_mem_domain *do
 		}
 	}
 
+	if (mctx->segment[LIB_MANAGER_COLD].addr) {
+		err = llext_manager_rm_partition(domain,
+						 mctx->segment[LIB_MANAGER_COLD].addr,
+						 mctx->segment[LIB_MANAGER_COLD].size,
+						 K_MEM_PARTITION_P_RX_U_RX);
+		if (err < 0) {
+			tr_err(&lib_manager_tr, "failed to remove .cold memory partition: %d", err);
+			if (!ret)
+				ret = err;
+		}
+	}
+
+	if (mctx->segment[LIB_MANAGER_COLDRODATA].addr) {
+		err = llext_manager_rm_partition(domain,
+						 mctx->segment[LIB_MANAGER_COLDRODATA].addr,
+						 mctx->segment[LIB_MANAGER_COLDRODATA].size,
+						 K_MEM_PARTITION_P_RO_U_RO);
+		if (err < 0) {
+			tr_err(&lib_manager_tr,
+			       "failed to remove .coldrodata memory partition: %d", err);
+			if (!ret)
+				ret = err;
+		}
+	}
+
 	return ret;
 }
 #endif
diff --git a/src/schedule/zephyr_dp_schedule.c b/src/schedule/zephyr_dp_schedule.c
index 1f006d50e52c..73c0e0275d94 100644
--- a/src/schedule/zephyr_dp_schedule.c
+++ b/src/schedule/zephyr_dp_schedule.c
@@ -7,6 +7,7 @@
 
 #include <sof/audio/component.h>
 #include <sof/audio/module_adapter/module/generic.h>
+#include <sof/llext_manager.h>
 #include <rtos/task.h>
 #include <rtos/userspace_helper.h>
 #include <stdint.h>
@@ -57,7 +58,7 @@ void scheduler_dp_unlock(unsigned int key)
 	k_sem_give(&dp_lock[key]);
 }
 
-static inline void scheduler_dp_grant(k_tid_t thread_id, uint16_t core)
+void scheduler_dp_grant(k_tid_t thread_id, uint16_t core)
 {
 #if CONFIG_USERSPACE
 	k_thread_access_grant(thread_id, &dp_lock[core]);
@@ -239,13 +240,21 @@ void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *
 	scheduler_dp_unlock(lock_key);
 }
 
+#if CONFIG_USERSPACE && !CONFIG_SOF_USERSPACE_PROXY
 static int scheduler_dp_task_cancel(void *data, struct task *task)
+{
+	/* Should never be called */
+	k_panic();
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int scheduler_dp_task_stop(void *data, struct task *task)
 {
 	unsigned int lock_key;
 	struct scheduler_dp_data *dp_sch = (struct scheduler_dp_data *)data;
 	struct task_dp_pdata *pdata = task->priv_data;
 
-
 	/* this is asyn cancel - mark the task as canceled and remove it from scheduling */
 	lock_key = scheduler_dp_lock(cpu_get_id());
 
@@ -256,8 +265,12 @@ static int scheduler_dp_task_cancel(void *data, struct task *task)
 	if (list_is_empty(&dp_sch->tasks))
 		schedule_task_cancel(&dp_sch->ll_tick_src);
 
-	/* if the task is waiting on a event - let it run and self-terminate */
+	/* if the task is waiting - let it run and self-terminate */
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
 	k_event_set(pdata->event, DP_TASK_EVENT_CANCEL);
+#else
+	k_sem_give(pdata->sem);
+#endif
 	scheduler_dp_unlock(lock_key);
 
 	/* wait till the task has finished, if there was any task created */
@@ -272,7 +285,7 @@ static int scheduler_dp_task_free(void *data, struct task *task)
 	struct task_dp_pdata *pdata = task->priv_data;
 	int ret;
 
-	scheduler_dp_task_cancel(data, task);
+	scheduler_dp_task_stop(data, task);
 
 	/* the thread should be terminated at this moment,
 	 * abort is safe and will ensure no use after free
@@ -283,16 +296,23 @@ static int scheduler_dp_task_free(void *data, struct task *task)
 	}
 
 #ifdef CONFIG_USERSPACE
+#if CONFIG_SOF_USERSPACE_PROXY
 	if (pdata->event != &pdata->event_struct)
 		k_object_free(pdata->event);
+#else
+	if (pdata->sem != &pdata->sem_struct)
+		k_object_free(pdata->sem);
+#endif
 	if (pdata->thread != &pdata->thread_struct)
 		k_object_free(pdata->thread);
 #endif
 
 	/* free task stack */
-	ret = user_stack_free((__sparse_force void *)pdata->p_stack);
+	ret = user_stack_free(pdata->p_stack);
 	pdata->p_stack = NULL;
 
+	scheduler_dp_domain_free(pdata->mod);
+
 	/* all other memory has been allocated as a single malloc, will be freed later by caller */
 	return ret;
 }
@@ -330,7 +350,11 @@ static int scheduler_dp_task_shedule(void *data, struct task *task, uint64_t sta
 
 static struct scheduler_ops schedule_dp_ops = {
 	.schedule_task		= scheduler_dp_task_shedule,
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+	.schedule_task_cancel	= scheduler_dp_task_stop,
+#else
 	.schedule_task_cancel	= scheduler_dp_task_cancel,
+#endif
 	.schedule_task_free	= scheduler_dp_task_free,
 };
 
@@ -358,143 +382,9 @@ int scheduler_dp_init(void)
 
 	notifier_register(NULL, NULL, NOTIFIER_ID_LL_POST_RUN, scheduler_dp_ll_tick, 0);
 
-	return 0;
-}
-
-int scheduler_dp_task_init(struct task **task,
-			   const struct sof_uuid_entry *uid,
-			   const struct task_ops *ops,
-			   struct processing_module *mod,
-			   uint16_t core,
-			   size_t stack_size,
-			   uint32_t options)
-{
-	void __sparse_cache *p_stack = NULL;
-	struct k_heap *const user_heap = mod->dev->drv->user_heap;
-
-	/* memory allocation helper structure */
-	struct {
-		struct task task;
-		struct task_dp_pdata pdata;
-	} *task_memory;
-
-	int ret;
-
-	/* must be called on the same core the task will be binded to */
-	assert(cpu_get_id() == core);
-
-	/*
-	 * allocate memory
-	 * to avoid multiple malloc operations allocate all required memory as a single structure
-	 * and return pointer to task_memory->task
-	 * As the structure contains zephyr kernel specific data, it must be located in
-	 * shared, non cached memory
-	 */
-	task_memory = sof_heap_alloc(user_heap, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
-				     sizeof(*task_memory), 0);
-	if (!task_memory) {
-		tr_err(&dp_tr, "memory alloc failed");
-		return -ENOMEM;
-	}
-
-	memset(task_memory, 0, sizeof(*task_memory));
-	/* allocate stack - must be aligned and cached so a separate alloc */
-	p_stack = user_stack_allocate(stack_size, options);
-	if (!p_stack) {
-		tr_err(&dp_tr, "stack alloc failed");
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/* internal SOF task init */
-	ret = schedule_task_init(&task_memory->task, uid, SOF_SCHEDULE_DP, 0, ops->run,
-				 mod, core, options);
-	if (ret < 0) {
-		tr_err(&dp_tr, "schedule_task_init failed");
-		goto err;
-	}
-
-	struct task_dp_pdata *pdata = &task_memory->pdata;
-
-	/* Point to event_struct event for kernel threads synchronization */
-	/* It will be overwritten for K_USER threads to dynamic ones.  */
-	pdata->event = &pdata->event_struct;
-	pdata->thread = &pdata->thread_struct;
-
-#ifdef CONFIG_USERSPACE
-	if (options & K_USER) {
-		pdata->event = k_object_alloc(K_OBJ_EVENT);
-		if (!pdata->event) {
-			tr_err(&dp_tr, "Event object allocation failed");
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		pdata->thread = k_object_alloc(K_OBJ_THREAD);
-		if (!pdata->thread) {
-			tr_err(&dp_tr, "Thread object allocation failed");
-			ret = -ENOMEM;
-			goto err;
-		}
-	}
-#endif /* CONFIG_USERSPACE */
-
-	/* initialize other task structures */
-	task_memory->task.ops.complete = ops->complete;
-	task_memory->task.ops.get_deadline = ops->get_deadline;
-	task_memory->task.state = SOF_TASK_STATE_INIT;
-	task_memory->task.core = core;
-	task_memory->task.priv_data = pdata;
-
-	/* success, fill the structures */
-	pdata->p_stack = p_stack;
-	pdata->stack_size = stack_size;
-	pdata->mod = mod;
-	*task = &task_memory->task;
-
-	/* create a zephyr thread for the task */
-	pdata->thread_id = k_thread_create(pdata->thread, (__sparse_force void *)p_stack,
-					   stack_size, dp_thread_fn, *task, NULL, NULL,
-					   CONFIG_DP_THREAD_PRIORITY, (*task)->flags, K_FOREVER);
-
-	k_thread_access_grant(pdata->thread_id, pdata->event);
-	scheduler_dp_grant(pdata->thread_id, cpu_get_id());
-
-	/* pin the thread to specific core */
-	ret = k_thread_cpu_pin(pdata->thread_id, core);
-	if (ret < 0) {
-		tr_err(&dp_tr, "zephyr task pin to core failed");
-		goto e_thread;
-	}
-
-#ifdef CONFIG_USERSPACE
-	if ((*task)->flags & K_USER) {
-		ret = user_memory_init_shared(pdata->thread_id, pdata->mod);
-		if (ret < 0) {
-			tr_err(&dp_tr, "user_memory_init_shared() failed");
-			goto e_thread;
-		}
-	}
-#endif /* CONFIG_USERSPACE */
-
-	/* start the thread, it should immediately stop at an event */
-	k_event_init(pdata->event);
-	k_thread_start(pdata->thread_id);
+	scheduler_dp_domain_init();
 
 	return 0;
-
-e_thread:
-	k_thread_abort(pdata->thread_id);
-err:
-	/* cleanup - free all allocated resources */
-	if (user_stack_free((__sparse_force void *)p_stack))
-		tr_err(&dp_tr, "user_stack_free failed!");
-
-	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
-	k_object_free(task_memory->pdata.event);
-	k_object_free(task_memory->pdata.thread);
-	sof_heap_free(user_heap, task_memory);
-	return ret;
 }
 
 void scheduler_get_task_info_dp(struct scheduler_props *scheduler_props, uint32_t *data_off_size)
diff --git a/src/schedule/zephyr_dp_schedule.h b/src/schedule/zephyr_dp_schedule.h
index d146afab65cf..9a5d30077e15 100644
--- a/src/schedule/zephyr_dp_schedule.h
+++ b/src/schedule/zephyr_dp_schedule.h
@@ -10,6 +10,8 @@
 #include <sof/list.h>
 #include <sof/compiler_attributes.h>
 
+#include <zephyr/app_memory/mem_domain.h>
+
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -19,7 +21,13 @@ struct scheduler_dp_data {
 	uint32_t last_ll_tick_timestamp;/* a timestamp as k_cycle_get_32 of last LL tick,
 					 * "NOW" for DP deadline calculation
 					 */
+};
 
+enum sof_dp_part_type {
+	SOF_DP_PART_HEAP,
+	SOF_DP_PART_IPC,
+	SOF_DP_PART_CFG,
+	SOF_DP_PART_TYPE_COUNT,
 };
 
 struct task_dp_pdata {
@@ -27,15 +35,33 @@ struct task_dp_pdata {
 	struct k_thread *thread;	/* pointer to the kernels' thread object */
 	struct k_thread thread_struct;	/* thread object for kernel threads */
 	uint32_t deadline_clock_ticks;	/* dp module deadline in Zephyr ticks */
-	k_thread_stack_t __sparse_cache *p_stack;	/* pointer to thread stack */
-	size_t stack_size;		/* size of the stack in bytes */
-	struct k_event *event;		/* pointer to event for task scheduling */
-	struct k_event event_struct;	/* event for task scheduling for kernel threads */
+	k_thread_stack_t *p_stack;	/* pointer to thread stack */
 	struct processing_module *mod;	/* the module to be scheduled */
 	uint32_t ll_cycles_to_start;    /* current number of LL cycles till delayed start */
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+	struct k_event *event;		/* pointer to event for task scheduling */
+	struct k_event event_struct;	/* event for task scheduling for kernel threads */
+#else
+	struct k_sem *sem;              /* pointer to semaphore for task scheduling */
+	struct k_sem sem_struct;        /* semaphore for task scheduling for kernel threads */
+	unsigned char pend_ipc;
+	unsigned char pend_proc;
+	struct k_mem_partition mpart[SOF_DP_PART_TYPE_COUNT];
+#endif
 };
 
 void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_run);
 void dp_thread_fn(void *p1, void *p2, void *p3);
 unsigned int scheduler_dp_lock(uint16_t core);
 void scheduler_dp_unlock(unsigned int key);
+void scheduler_dp_grant(k_tid_t thread_id, uint16_t core);
+int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops, struct processing_module *mod,
+			   uint16_t core, size_t stack_size, uint32_t options);
+#if CONFIG_SOF_USERSPACE_PROXY || !CONFIG_USERSPACE
+static inline void scheduler_dp_domain_free(struct processing_module *pmod) {}
+static inline int scheduler_dp_domain_init(void) {return 0;}
+#else
+void scheduler_dp_domain_free(struct processing_module *pmod);
+int scheduler_dp_domain_init(void);
+#endif
diff --git a/src/schedule/zephyr_dp_schedule_application.c b/src/schedule/zephyr_dp_schedule_application.c
index f5b53d426b01..4614da3c1fd7 100644
--- a/src/schedule/zephyr_dp_schedule_application.c
+++ b/src/schedule/zephyr_dp_schedule_application.c
@@ -10,16 +10,199 @@
 #include <sof/audio/module_adapter/module/generic.h>
 #include <sof/common.h>
 #include <sof/list.h>
-#include <sof/schedule/ll_schedule_domain.h>
+#include <sof/llext_manager.h>
 #include <sof/schedule/dp_schedule.h>
+#include <sof/schedule/ll_schedule_domain.h>
 
+#include <zephyr/app_memory/mem_domain.h>
 #include <zephyr/kernel.h>
+#include <zephyr/logging/log.h>
+#include <zephyr/sys/slist.h>
 
 #include <stdbool.h>
 #include <stdint.h>
 
 #include "zephyr_dp_schedule.h"
 
+LOG_MODULE_DECLARE(dp_schedule, CONFIG_SOF_LOG_LEVEL);
+extern struct tr_ctx dp_tr;
+
+#if CONFIG_USERSPACE
+static struct k_mem_domain dp_mdom[CONFIG_CORE_COUNT];
+#endif
+
+/* Synchronization semaphore for the scheduler thread to wait for DP startup */
+#define DP_SYNC_INIT(i, _)	Z_SEM_INITIALIZER(dp_sync[i], 0, 1)
+#define DP_SYNC_INIT_LIST	LISTIFY(CONFIG_CORE_COUNT, DP_SYNC_INIT, (,))
+static STRUCT_SECTION_ITERABLE_ARRAY(k_sem, dp_sync, CONFIG_CORE_COUNT) = { DP_SYNC_INIT_LIST };
+
+/* TODO: make this a shared kernel->module buffer for IPC parameters */
+static uint8_t ipc_buf[4096] __aligned(4096);
+
+struct ipc4_flat {
+	unsigned int cmd;
+	int ret;
+	union {
+		struct {
+			struct ipc4_module_bind_unbind bu;
+			enum bind_type type;
+		} bind;
+		struct {
+			unsigned int trigger_cmd;
+			enum ipc4_pipeline_state state;
+			int n_sources;
+			int n_sinks;
+			void *source_sink[];
+		} pipeline_state;
+	};
+};
+
+/* Pack IPC input data */
+static int ipc_thread_flatten(unsigned int cmd, const union scheduler_dp_thread_ipc_param *param,
+			      struct ipc4_flat *flat)
+{
+	flat->cmd = cmd;
+
+	/*
+	 * FIXME: SOF_IPC4_MOD_* and SOF_IPC4_GLB_* aren't fully orthogonal, but
+	 * so far none of the used ones overlap
+	 */
+	switch (cmd) {
+	case SOF_IPC4_MOD_BIND:
+	case SOF_IPC4_MOD_UNBIND:
+		flat->bind.bu = *param->bind_data->ipc4_data;
+		flat->bind.type = param->bind_data->bind_type;
+		break;
+	case SOF_IPC4_GLB_SET_PIPELINE_STATE:
+		flat->pipeline_state.trigger_cmd = param->pipeline_state.trigger_cmd;
+		switch (param->pipeline_state.trigger_cmd) {
+		case COMP_TRIGGER_STOP:
+			break;
+		case COMP_TRIGGER_PREPARE:
+			if (sizeof(flat->cmd) + sizeof(flat->ret) + sizeof(flat->pipeline_state) +
+			    sizeof(void *) * (param->pipeline_state.n_sources +
+					      param->pipeline_state.n_sinks) >
+			    sizeof(ipc_buf))
+				return -ENOMEM;
+
+			flat->pipeline_state.state = param->pipeline_state.state;
+			flat->pipeline_state.n_sources = param->pipeline_state.n_sources;
+			flat->pipeline_state.n_sinks = param->pipeline_state.n_sinks;
+			memcpy(flat->pipeline_state.source_sink, param->pipeline_state.sources,
+			       flat->pipeline_state.n_sources *
+			       sizeof(flat->pipeline_state.source_sink[0]));
+			memcpy(flat->pipeline_state.source_sink + flat->pipeline_state.n_sources,
+			       param->pipeline_state.sinks,
+			       flat->pipeline_state.n_sinks *
+			       sizeof(flat->pipeline_state.source_sink[0]));
+		}
+	}
+
+	return 0;
+}
+
+/* Unpack IPC data and execute a callback */
+static void ipc_thread_unflatten_run(struct processing_module *pmod, struct ipc4_flat *flat)
+{
+	const struct module_interface *const ops = pmod->dev->drv->adapter_ops;
+
+	switch (flat->cmd) {
+	case SOF_IPC4_MOD_BIND:
+		if (ops->bind) {
+			struct bind_info bind_data = {
+				.ipc4_data = &flat->bind.bu,
+				.bind_type = flat->bind.type,
+			};
+
+			flat->ret = ops->bind(pmod, &bind_data);
+		} else {
+			flat->ret = 0;
+		}
+		break;
+	case SOF_IPC4_MOD_UNBIND:
+		if (ops->unbind) {
+			struct bind_info bind_data = {
+				.ipc4_data = &flat->bind.bu,
+				.bind_type = flat->bind.type,
+			};
+
+			flat->ret = ops->unbind(pmod, &bind_data);
+		} else {
+			flat->ret = 0;
+		}
+		break;
+	case SOF_IPC4_MOD_DELETE_INSTANCE:
+		flat->ret = ops->free(pmod);
+		break;
+	case SOF_IPC4_MOD_INIT_INSTANCE:
+		flat->ret = ops->init(pmod);
+		break;
+	case SOF_IPC4_GLB_SET_PIPELINE_STATE:
+		switch (flat->pipeline_state.trigger_cmd) {
+		case COMP_TRIGGER_STOP:
+			flat->ret = ops->reset(pmod);
+			break;
+		case COMP_TRIGGER_PREPARE:
+			flat->ret = ops->prepare(pmod,
+				(struct sof_source **)flat->pipeline_state.source_sink,
+				flat->pipeline_state.n_sources,
+				(struct sof_sink **)(flat->pipeline_state.source_sink +
+						     flat->pipeline_state.n_sources),
+				flat->pipeline_state.n_sinks);
+		}
+	}
+}
+
+#define DP_THREAD_IPC_TIMEOUT K_MSEC(100)
+
+/* Signal an IPC and wait for processing completion */
+int scheduler_dp_thread_ipc(struct processing_module *pmod, unsigned int cmd,
+			    const union scheduler_dp_thread_ipc_param *param)
+{
+	struct task_dp_pdata *pdata = pmod->dev->task->priv_data;
+	int ret;
+
+	if (!pmod) {
+		tr_err(&dp_tr, "no thread module");
+		return -EINVAL;
+	}
+
+	if (cmd == SOF_IPC4_MOD_INIT_INSTANCE) {
+		/* Wait for the DP thread to start */
+		ret = k_sem_take(&dp_sync[pmod->dev->task->core], DP_THREAD_IPC_TIMEOUT);
+		if (ret < 0) {
+			tr_err(&dp_tr, "Failed waiting for DP thread to start: %d", ret);
+			return ret;
+		}
+	}
+
+	unsigned int lock_key = scheduler_dp_lock(pmod->dev->task->core);
+
+	struct ipc4_flat *flat = (struct ipc4_flat *)ipc_buf;
+
+	/* IPCs are serialised */
+	flat->ret = -ENOSYS;
+
+	ret = ipc_thread_flatten(cmd, param, flat);
+	if (!ret) {
+		pdata->pend_ipc++;
+		k_sem_give(pdata->sem);
+	}
+
+	scheduler_dp_unlock(lock_key);
+
+	if (!ret) {
+		/* Wait for completion */
+		ret = k_sem_take(&dp_sync[cpu_get_id()], DP_THREAD_IPC_TIMEOUT);
+		if (ret < 0)
+			tr_err(&dp_tr, "Failed waiting for DP thread: %d", ret);
+		else
+			ret = flat->ret;
+	}
+
+	return ret;
+}
+
 /* Go through all DP tasks and recalculate their readiness and deadlines
  * NOT REENTRANT, should be called with scheduler_dp_lock()
  */
@@ -45,32 +228,18 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 				mod->dp_startup_delay = false;
 		}
 
-		if (curr_task->state == SOF_TASK_STATE_QUEUED) {
-			bool mod_ready;
-
-			mod_ready = module_is_ready_to_process(mod, mod->sources,
-							       mod->num_of_sources,
-							       mod->sinks,
-							       mod->num_of_sinks);
-			if (mod_ready) {
-				/* trigger the task */
-				curr_task->state = SOF_TASK_STATE_RUNNING;
-				if (mod->dp_startup_delay && !pdata->ll_cycles_to_start) {
-					/* first time run - use delayed start */
-					pdata->ll_cycles_to_start =
-						module_get_lpt(pdata->mod) / LL_TIMER_PERIOD_US;
-
-					/* in case LPT < LL cycle - delay at least cycle */
-					if (!pdata->ll_cycles_to_start)
-						pdata->ll_cycles_to_start = 1;
-				}
-				trigger_task = true;
-				k_event_post(pdata->event, DP_TASK_EVENT_PROCESS);
-			}
+		if (curr_task->state == SOF_TASK_STATE_QUEUED &&
+		    mod->dev->state >= COMP_STATE_ACTIVE) {
+			/* trigger the task */
+			curr_task->state = SOF_TASK_STATE_RUNNING;
+			trigger_task = true;
+			pdata->pend_proc++;
+			k_sem_give(pdata->sem);
 		}
+
 		if (curr_task->state == SOF_TASK_STATE_RUNNING) {
 			/* (re) calculate deadline for all running tasks */
-			/* get module deadline in us*/
+			/* get module deadline in us */
 			uint32_t deadline = module_get_deadline(mod);
 
 			/* if a deadline cannot be calculated, use a fixed value relative to its
@@ -103,49 +272,106 @@ void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_
 void dp_thread_fn(void *p1, void *p2, void *p3)
 {
 	struct task *task = p1;
-	(void)p2;
-	(void)p3;
 	struct task_dp_pdata *task_pdata = task->priv_data;
+	struct processing_module *pmod = task_pdata->mod;
 	unsigned int lock_key;
 	enum task_state state;
 	bool task_stop;
 
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	/* The IPC thread is waiting for the thread to be started, it can proceed now. */
+	k_sem_give(&dp_sync[task->core]);
+
 	do {
 		/*
-		 * the thread is started immediately after creation, it will stop on event.
-		 * Event will be signalled once the task is ready to process.
+		 * The thread is started immediately after creation, it stops here and waits
+		 * for the semaphore to be signalled to handle IPC or process audio data.
 		 */
-		k_event_wait_safe(task_pdata->event, DP_TASK_EVENT_PROCESS | DP_TASK_EVENT_CANCEL,
-				  false, K_FOREVER);
-
-		if (task->state == SOF_TASK_STATE_RUNNING)
-			state = task_run(task);
-		else
-			state = task->state;	/* to avoid undefined variable warning */
+		k_sem_take(task_pdata->sem, K_FOREVER);
 
 		lock_key = scheduler_dp_lock(task->core);
+
+		unsigned char pend_ipc = task_pdata->pend_ipc,
+			pend_proc = task_pdata->pend_proc;
+
+		task_pdata->pend_proc = 0;
+		task_pdata->pend_ipc = 0;
+
+		scheduler_dp_unlock(lock_key);
+
 		/*
-		 * check if task is still running, may have been canceled by external call
-		 * if not, set the state returned by run procedure
+		 * Only 0:1, 1:0 and 1:1 are valid. 0:0 is also possible if IPC and audio
+		 * were signalled in a quick succession before we took the lock above. Any
+		 * value > 1 would mean that we've missed IPCs or LL ticks while in queued /
+		 * idle state, which shouldn't happen.
 		 */
-		if (task->state == SOF_TASK_STATE_RUNNING) {
-			task->state = state;
-			switch (state) {
-			case SOF_TASK_STATE_RESCHEDULE:
-				/* mark to reschedule, schedule time is already calculated */
-				task->state = SOF_TASK_STATE_QUEUED;
-				break;
+		if (pend_ipc > 1 || pend_proc > 1) {
+			tr_err(&dp_tr, "Invalid wake up %u:%u", pend_proc, pend_ipc);
+			continue;
+		}
+
+		if (pend_ipc) {
+			/* handle IPC */
+			tr_dbg(&dp_tr, "got IPC wake up for %p state %d", pmod, task->state);
+			ipc_thread_unflatten_run(pmod, (struct ipc4_flat *)ipc_buf);
+			k_sem_give(&dp_sync[task->core]);
+		}
 
-			case SOF_TASK_STATE_CANCEL:
-			case SOF_TASK_STATE_COMPLETED:
-				/* remove from scheduling */
-				list_item_del(&task->list);
-				break;
+		if (pend_proc) {
+			bool ready;
 
-			default:
-				/* illegal state, serious defect, won't happen */
-				k_panic();
+			if (task->state == SOF_TASK_STATE_RUNNING) {
+				ready = module_is_ready_to_process(pmod, pmod->sources,
+								   pmod->num_of_sources,
+								   pmod->sinks, pmod->num_of_sinks);
+			} else {
+				state = task->state;	/* to avoid undefined variable warning */
+				ready = false;
 			}
+
+			if (ready) {
+				if (pmod->dp_startup_delay && !task_pdata->ll_cycles_to_start) {
+					/* first time run - use delayed start */
+					task_pdata->ll_cycles_to_start =
+						module_get_lpt(pmod) / LL_TIMER_PERIOD_US;
+
+					/* in case LPT < LL cycle - delay at least cycle */
+					if (!task_pdata->ll_cycles_to_start)
+						task_pdata->ll_cycles_to_start = 1;
+				}
+
+				state = task_run(task);
+			}
+
+			lock_key = scheduler_dp_lock(task->core);
+			/*
+			 * check if task is still running, may have been canceled by external call
+			 * if not, set the state returned by run procedure
+			 */
+			if (ready && task->state == SOF_TASK_STATE_RUNNING) {
+				task->state = state;
+				switch (state) {
+				case SOF_TASK_STATE_RESCHEDULE:
+					/* mark to reschedule, schedule time is already calculated */
+					task->state = SOF_TASK_STATE_QUEUED;
+					break;
+
+				case SOF_TASK_STATE_CANCEL:
+				case SOF_TASK_STATE_COMPLETED:
+					/* task already removed from scheduling */
+					break;
+
+				default:
+					/* illegal state, serious defect, won't happen */
+					k_oops();
+				}
+			} else {
+				task->state = SOF_TASK_STATE_QUEUED;
+			}
+		} else {
+			lock_key = scheduler_dp_lock(task->core);
 		}
 
 		/* if true exit the while loop, terminate the thread */
@@ -159,3 +385,212 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	if (task->state == SOF_TASK_STATE_COMPLETED)
 		task_complete(task);
 }
+
+/*
+ * Safe to call with partial successful initialisation,
+ * k_mem_domain_remove_partition() then just returns -ENOENT
+ */
+void scheduler_dp_domain_free(struct processing_module *pmod)
+{
+#if CONFIG_USERSPACE
+	unsigned int core = pmod->dev->task->core;
+
+	llext_manager_rm_domain(pmod->dev->ipc_config.id, dp_mdom + core);
+
+	struct task_dp_pdata *pdata = pmod->dev->task->priv_data;
+
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_HEAP);
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_IPC);
+	k_mem_domain_remove_partition(dp_mdom + core, pdata->mpart + SOF_DP_PART_CFG);
+#endif
+}
+
+int scheduler_dp_task_init(struct task **task, const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops, struct processing_module *mod,
+			   uint16_t core, size_t stack_size, uint32_t options)
+{
+	k_thread_stack_t *p_stack;
+	/* memory allocation helper structure */
+	struct {
+		struct task task;
+		struct task_dp_pdata pdata;
+		struct comp_driver drv;
+		struct module_interface ops;
+	} *task_memory;
+
+	int ret;
+
+	/* must be called on the same core the task will be binded to */
+	assert(cpu_get_id() == core);
+
+	/*
+	 * allocate memory
+	 * to avoid multiple malloc operations allocate all required memory as a single structure
+	 * and return pointer to task_memory->task
+	 * As the structure contains zephyr kernel specific data, it must be located in
+	 * shared, non cached memory
+	 */
+	task_memory = mod_alloc_ext(mod, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
+				    sizeof(*task_memory), 0);
+	if (!task_memory) {
+		tr_err(&dp_tr, "memory alloc failed");
+		return -ENOMEM;
+	}
+
+	memset(task_memory, 0, sizeof(*task_memory));
+
+	task_memory->drv = *mod->dev->drv;
+	task_memory->ops = *mod->dev->drv->adapter_ops;		// FIXME: is this needed?
+	task_memory->drv.adapter_ops = &task_memory->ops;
+	mod->dev->drv = &task_memory->drv;
+
+	/* allocate stack - must be aligned and cached so a separate alloc */
+	p_stack = user_stack_allocate(stack_size, options);
+	if (!p_stack) {
+		tr_err(&dp_tr, "stack alloc failed");
+		ret = -ENOMEM;
+		goto e_tmem;
+	}
+
+	struct task *ptask = &task_memory->task;
+
+	/* internal SOF task init */
+	ret = schedule_task_init(ptask, uid, SOF_SCHEDULE_DP, 0, ops->run, mod, core, options);
+	if (ret < 0) {
+		tr_err(&dp_tr, "schedule_task_init failed");
+		goto e_stack;
+	}
+
+	struct task_dp_pdata *pdata = &task_memory->pdata;
+
+	/* Point to event_struct event for kernel threads synchronization */
+	/* It will be overwritten for K_USER threads to dynamic ones.  */
+	pdata->sem = &pdata->sem_struct;
+	pdata->thread = &pdata->thread_struct;
+
+#ifdef CONFIG_USERSPACE
+	if (options & K_USER) {
+		pdata->sem = k_object_alloc(K_OBJ_SEM);
+		if (!pdata->sem) {
+			tr_err(&dp_tr, "Event object allocation failed");
+			ret = -ENOMEM;
+			goto e_stack;
+		}
+
+		pdata->thread = k_object_alloc(K_OBJ_THREAD);
+		if (!pdata->thread) {
+			tr_err(&dp_tr, "Thread object allocation failed");
+			ret = -ENOMEM;
+			goto e_kobj;
+		}
+		memset(&pdata->thread->arch, 0, sizeof(pdata->thread->arch));
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* success, fill the structures */
+	pdata->p_stack = p_stack;
+	pdata->mod = mod;
+
+	/* initialize other task structures */
+	ptask->ops.complete = ops->complete;
+	ptask->ops.get_deadline = ops->get_deadline;
+	ptask->priv_data = pdata;
+	list_init(&ptask->list);
+	*task = ptask;
+
+	/* create a zephyr thread for the task */
+	pdata->thread_id = k_thread_create(pdata->thread, p_stack,
+					   stack_size, dp_thread_fn, ptask, NULL, NULL,
+					   CONFIG_DP_THREAD_PRIORITY, ptask->flags, K_FOREVER);
+
+	/* pin the thread to specific core */
+	ret = k_thread_cpu_pin(pdata->thread_id, core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "zephyr task pin to core failed");
+		goto e_thread;
+	}
+
+#if CONFIG_USERSPACE
+	k_thread_access_grant(pdata->thread_id, pdata->sem, &dp_sync[core]);
+	scheduler_dp_grant(pdata->thread_id, core);
+
+	unsigned int pidx;
+	size_t size;
+	uintptr_t start;
+	struct k_mem_partition *ppart[SOF_DP_PART_TYPE_COUNT];
+
+	for (pidx = 0; pidx < ARRAY_SIZE(ppart); pidx++)
+		ppart[pidx] = pdata->mpart + pidx;
+
+	/* Module heap partition */
+	mod_heap_info(mod, &size, &start);
+	pdata->mpart[SOF_DP_PART_HEAP] = (struct k_mem_partition){
+		.start = start,
+		.size = size,
+		.attr = K_MEM_PARTITION_P_RW_U_RW,
+	};
+	/* IPC flattening buffer partition */
+	pdata->mpart[SOF_DP_PART_IPC] = (struct k_mem_partition){
+		.start = (uintptr_t)&ipc_buf,
+		.size = sizeof(ipc_buf),
+		.attr = K_MEM_PARTITION_P_RW_U_RW,
+	};
+	/* Host mailbox partition for additional IPC parameters: read-only */
+	pdata->mpart[SOF_DP_PART_CFG] = (struct k_mem_partition){
+		.start = (uintptr_t)MAILBOX_HOSTBOX_BASE,
+		.size = 4096,
+		.attr = K_MEM_PARTITION_P_RO_U_RO,
+	};
+
+	for (pidx = 0; pidx < SOF_DP_PART_TYPE_COUNT; pidx++) {
+		ret = k_mem_domain_add_partition(dp_mdom + core, pdata->mpart + pidx);
+		if (ret < 0)
+			goto e_dom;
+	}
+
+	ret = llext_manager_add_domain(mod->dev->ipc_config.id, dp_mdom + core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "failed to add LLEXT to domain %d", ret);
+		goto e_dom;
+	}
+
+	/*
+	 * Keep this call last, able to fail, otherwise domain will be removed
+	 * before its thread
+	 */
+	ret = k_mem_domain_add_thread(dp_mdom + core, pdata->thread_id);
+	if (ret < 0) {
+		tr_err(&dp_tr, "failed to add thread to domain %d", ret);
+		goto e_dom;
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* start the thread, it should immediately stop at the semaphore */
+	k_sem_init(pdata->sem, 0, 1);
+	k_thread_start(pdata->thread_id);
+
+	return 0;
+
+#ifdef CONFIG_USERSPACE
+e_dom:
+	scheduler_dp_domain_free(mod);
+#endif
+e_thread:
+	k_thread_abort(pdata->thread_id);
+#ifdef CONFIG_USERSPACE
+e_kobj:
+	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
+	k_object_free(pdata->thread);
+	k_object_free(pdata->sem);
+#endif
+e_stack:
+	user_stack_free(p_stack);
+e_tmem:
+	mod_free(mod, task_memory);
+	return ret;
+}
+
+int scheduler_dp_domain_init(void)
+{
+	return k_mem_domain_init(dp_mdom + cpu_get_id(), 0, NULL);
+}
diff --git a/src/schedule/zephyr_dp_schedule_thread.c b/src/schedule/zephyr_dp_schedule_thread.c
index 7a79e214eaca..c9874155893c 100644
--- a/src/schedule/zephyr_dp_schedule_thread.c
+++ b/src/schedule/zephyr_dp_schedule_thread.c
@@ -12,6 +12,7 @@
 #include <sof/list.h>
 #include <sof/schedule/ll_schedule_domain.h>
 #include <sof/schedule/dp_schedule.h>
+#include <sof/trace/trace.h>
 
 #include <zephyr/kernel.h>
 
@@ -20,6 +21,9 @@
 
 #include "zephyr_dp_schedule.h"
 
+LOG_MODULE_DECLARE(dp_schedule, CONFIG_SOF_LOG_LEVEL);
+extern struct tr_ctx dp_tr;
+
 /* Go through all DP tasks and recalculate their readiness and deadlines
  * NOT REENTRANT, should be called with scheduler_dp_lock()
  */
@@ -169,3 +173,138 @@ void dp_thread_fn(void *p1, void *p2, void *p3)
 	if (task->state == SOF_TASK_STATE_COMPLETED)
 		task_complete(task);
 }
+
+int scheduler_dp_task_init(struct task **task,
+			   const struct sof_uuid_entry *uid,
+			   const struct task_ops *ops,
+			   struct processing_module *mod,
+			   uint16_t core,
+			   size_t stack_size,
+			   uint32_t options)
+{
+	void __sparse_cache *p_stack = NULL;
+	struct k_heap *const user_heap = mod->dev->drv->user_heap;
+
+	/* memory allocation helper structure */
+	struct {
+		struct task task;
+		struct task_dp_pdata pdata;
+	} *task_memory;
+
+	int ret;
+
+	/* must be called on the same core the task will be binded to */
+	assert(cpu_get_id() == core);
+
+	/*
+	 * allocate memory
+	 * to avoid multiple malloc operations allocate all required memory as a single structure
+	 * and return pointer to task_memory->task
+	 * As the structure contains zephyr kernel specific data, it must be located in
+	 * shared, non cached memory
+	 */
+	task_memory = sof_heap_alloc(user_heap, SOF_MEM_FLAG_USER | SOF_MEM_FLAG_COHERENT,
+				     sizeof(*task_memory), 0);
+	if (!task_memory) {
+		tr_err(&dp_tr, "memory alloc failed");
+		return -ENOMEM;
+	}
+
+	memset(task_memory, 0, sizeof(*task_memory));
+	/* allocate stack - must be aligned and cached so a separate alloc */
+	p_stack = user_stack_allocate(stack_size, options);
+	if (!p_stack) {
+		tr_err(&dp_tr, "stack alloc failed");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* internal SOF task init */
+	ret = schedule_task_init(&task_memory->task, uid, SOF_SCHEDULE_DP, 0, ops->run,
+				 mod, core, options);
+	if (ret < 0) {
+		tr_err(&dp_tr, "schedule_task_init failed");
+		goto err;
+	}
+
+	struct task_dp_pdata *pdata = &task_memory->pdata;
+
+	/* Point to event_struct event for kernel threads synchronization */
+	/* It will be overwritten for K_USER threads to dynamic ones.  */
+	pdata->event = &pdata->event_struct;
+	pdata->thread = &pdata->thread_struct;
+
+#ifdef CONFIG_USERSPACE
+	if (options & K_USER) {
+		pdata->event = k_object_alloc(K_OBJ_EVENT);
+		if (!pdata->event) {
+			tr_err(&dp_tr, "Event object allocation failed");
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		pdata->thread = k_object_alloc(K_OBJ_THREAD);
+		if (!pdata->thread) {
+			tr_err(&dp_tr, "Thread object allocation failed");
+			ret = -ENOMEM;
+			goto err;
+		}
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* initialize other task structures */
+	task_memory->task.ops.complete = ops->complete;
+	task_memory->task.ops.get_deadline = ops->get_deadline;
+	task_memory->task.state = SOF_TASK_STATE_INIT;
+	task_memory->task.core = core;
+	task_memory->task.priv_data = pdata;
+
+	/* success, fill the structures */
+	pdata->p_stack = p_stack;
+	pdata->mod = mod;
+	*task = &task_memory->task;
+
+	/* create a zephyr thread for the task */
+	pdata->thread_id = k_thread_create(pdata->thread, (__sparse_force void *)p_stack,
+					   stack_size, dp_thread_fn, *task, NULL, NULL,
+					   CONFIG_DP_THREAD_PRIORITY, (*task)->flags, K_FOREVER);
+
+	k_thread_access_grant(pdata->thread_id, pdata->event);
+	scheduler_dp_grant(pdata->thread_id, cpu_get_id());
+
+	/* pin the thread to specific core */
+	ret = k_thread_cpu_pin(pdata->thread_id, core);
+	if (ret < 0) {
+		tr_err(&dp_tr, "zephyr task pin to core failed");
+		goto e_thread;
+	}
+
+#ifdef CONFIG_USERSPACE
+	if ((*task)->flags & K_USER) {
+		ret = user_memory_init_shared(pdata->thread_id, pdata->mod);
+		if (ret < 0) {
+			tr_err(&dp_tr, "user_memory_init_shared() failed");
+			goto e_thread;
+		}
+	}
+#endif /* CONFIG_USERSPACE */
+
+	/* start the thread, it should immediately stop at an event */
+	k_event_init(pdata->event);
+	k_thread_start(pdata->thread_id);
+
+	return 0;
+
+e_thread:
+	k_thread_abort(pdata->thread_id);
+err:
+	/* cleanup - free all allocated resources */
+	if (user_stack_free((__sparse_force void *)p_stack))
+		tr_err(&dp_tr, "user_stack_free failed!");
+
+	/* k_object_free looks for a pointer in the list, any invalid value can be passed */
+	k_object_free(task_memory->pdata.event);
+	k_object_free(task_memory->pdata.thread);
+	sof_heap_free(user_heap, task_memory);
+	return ret;
+}
diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
index b1af82ae856b..ab5d9525ae6f 100644
--- a/zephyr/CMakeLists.txt
+++ b/zephyr/CMakeLists.txt
@@ -534,6 +534,7 @@ zephyr_library_sources_ifdef(CONFIG_SHELL
        sof_shell.c
 )
 
+zephyr_syscall_header(${SOF_SRC_PATH}/include/sof/audio/module_adapter/module/generic.h)
 zephyr_syscall_header(${SOF_SRC_PATH}/include/sof/lib/fast-get.h)
 
 zephyr_library_link_libraries(SOF)
diff --git a/zephyr/include/sof/lib/memory.h b/zephyr/include/sof/lib/memory.h
index be01675951d5..6fa6a8ef558d 100644
--- a/zephyr/include/sof/lib/memory.h
+++ b/zephyr/include/sof/lib/memory.h
@@ -32,6 +32,9 @@ void dbg_path_cold_enter(const char *fn);
 
 static inline void __assert_can_be_cold(const char *fn)
 {
+	if (k_is_user_context())
+		return;
+
 	__ASSERT(!ll_sch_is_current(), "%s() called from an LL thread!", fn);
 	dbg_path_cold_enter(fn);
 }