Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/audio/buffers/audio_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
#include <rtos/panic.h>
#include <rtos/alloc.h>
#include <ipc/stream.h>
#include <sof/audio/module_adapter/module/generic.h>
#include <module/ipc4/base-config.h>
#include <sof/audio/component.h>
#include <module/module/base.h>
#include <sof/audio/audio_buffer.h>
#include <sof/audio/sink_api.h>
#include <sof/audio/source_api.h>
Expand Down Expand Up @@ -182,6 +186,38 @@ int audio_buffer_source_set_alignment_constants(struct sof_source *source,
return 0;
}

uint32_t audio_buffer_sink_get_lft(struct sof_sink *sink)
{
struct sof_audio_buffer *buffer = sof_audio_buffer_from_sink(sink);
/* get number of ms in the buffer */
size_t bytes_per_sec = sink_get_frame_bytes(&buffer->_sink_api) *
sink_get_rate(&buffer->_sink_api);
size_t bytes_per_ms = bytes_per_sec / 1000;

/* round up for frequencies like 44100 */
if (bytes_per_ms * 1000 != bytes_per_sec)
bytes_per_ms++;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SOF_DIV_ROUND_UP()

uint32_t us_in_buffer =
1000 * source_get_data_available(&buffer->_source_api) / bytes_per_ms;

return us_in_buffer;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove the code below, we can add it when needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to leave it, it has been tested, just cannot be enabled because of cross core and comp_buffer
I'm afraid when I remove it it will be lost - at least very hard to find

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcinszkudlinski I'd then put it under some #ifdef GOOD_CODE_JUST_NOT_ENABLED_YET or similar macro with a comment. As is it's confusing both people and compilers IMHO


/*
* TODO, Currently there's no DP to DP connection
* >>> the code below is never accessible and won't work because of cache incoherence <<<
*
* to make DP to DP connection possible:
*
* 1) module data must be ALWAYS located in non cached memory alias, allowing
* cross core access to params like period (needed below) and calling
* module_get_deadline for the next module, regardless of cores the modules are
* running on
* 2) comp_buffer must be removed from all pipeline code, replaced with a generic abstract
* class audio_buffer - allowing using comp_buffer and ring_buffer without current
* "hybrid buffer" solution
*/
}

void audio_buffer_init(struct sof_audio_buffer *buffer, uint32_t buffer_type, bool is_shared,
const struct source_ops *source_ops, const struct sink_ops *sink_ops,
const struct audio_buffer_ops *audio_buffer_ops,
Expand Down
3 changes: 2 additions & 1 deletion src/audio/buffers/comp_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,15 @@ APP_TASK_DATA static const struct sink_ops comp_buffer_sink_ops = {
.audio_set_ipc_params = audio_buffer_sink_set_ipc_params,
.on_audio_format_set = audio_buffer_sink_on_audio_format_set,
.set_alignment_constants = audio_buffer_sink_set_alignment_constants,
.get_lft = audio_buffer_sink_get_lft,
};

static const struct audio_buffer_ops audio_buffer_ops = {
.free = comp_buffer_free,
.reset = comp_buffer_reset,
.audio_set_ipc_params = comp_buffer_set_ipc_params,
.on_audio_format_set = comp_buffer_format_set,
.set_alignment_constants = comp_buffer_set_alignment_constants
.set_alignment_constants = comp_buffer_set_alignment_constants,
};

static struct comp_buffer *buffer_alloc_struct(void *stream_addr, size_t size,
Expand Down
3 changes: 2 additions & 1 deletion src/audio/buffers/ring_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,12 @@ static const struct sink_ops ring_buffer_sink_ops = {
.audio_set_ipc_params = audio_buffer_sink_set_ipc_params,
.on_audio_format_set = audio_buffer_sink_on_audio_format_set,
.set_alignment_constants = audio_buffer_sink_set_alignment_constants,
.get_lft = audio_buffer_sink_get_lft,
};

static const struct audio_buffer_ops audio_buffer_ops = {
.free = ring_buffer_free,
.reset = ring_buffer_reset
.reset = ring_buffer_reset,
};

struct ring_buffer *ring_buffer_create(struct comp_dev *dev, size_t min_available,
Expand Down
24 changes: 24 additions & 0 deletions src/audio/module_adapter/module/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,27 @@ void module_update_buffer_position(struct input_stream_buffer *input_buffers,
output_buffers->size += audio_stream_frame_bytes(sink) * frames;
}
EXPORT_SYMBOL(module_update_buffer_position);

uint32_t module_get_deadline(struct processing_module *mod)
{
uint32_t deadline;

/* LL modules have no deadline - it is always "now" */
if (mod->dev->ipc_config.proc_domain == COMP_PROCESSING_DOMAIN_LL)
return 0;

/* startup condition - set deadline to "unknown" */
if (mod->dp_startup_delay)
return UINT32_MAX / 2;

deadline = UINT32_MAX;
/* calculate the shortest LFT for all sinks */
for (size_t i = 0; i < mod->num_of_sinks; i++) {
uint32_t sink_lft = sink_get_last_feeding_time(mod->sinks[i]);

deadline = MIN(deadline, sink_lft);
}

return deadline;
}
EXPORT_SYMBOL(module_get_deadline);
11 changes: 11 additions & 0 deletions src/include/module/audio/sink_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ struct sink_ops {
*/
int (*commit_buffer)(struct sof_sink *sink, size_t commit_size);

/**
* get latest feeding time for this sink, result is a number of microseconds since "NOW"
* where "now" means a start of last LL cycle, as described in zephyr_dp_schedule.c
*/
uint32_t (*get_lft)(struct sof_sink *sink);

/**
* OPTIONAL: Notification to the sink implementation about changes in audio format
*
Expand Down Expand Up @@ -348,4 +354,9 @@ static inline struct processing_module *sink_get_bound_module(struct sof_sink *s
return sink->bound_module;
}

static inline uint32_t sink_get_last_feeding_time(struct sof_sink *sink)
{
return sink->ops->get_lft(sink);
}

#endif /* __MODULE_AUDIO_SINK_API_H__ */
1 change: 1 addition & 0 deletions src/include/sof/audio/audio_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,5 +337,6 @@ int audio_buffer_sink_on_audio_format_set(struct sof_sink *sink);
int audio_buffer_sink_set_alignment_constants(struct sof_sink *sink,
const uint32_t byte_align,
const uint32_t frame_align_req);
uint32_t audio_buffer_sink_get_lft(struct sof_sink *sink);

#endif /* __SOF_AUDIO_BUFFER__ */
25 changes: 25 additions & 0 deletions src/include/sof/audio/module_adapter/module/generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,29 @@ void module_adapter_set_params(struct processing_module *mod, struct sof_ipc_str
int module_adapter_set_state(struct processing_module *mod, struct comp_dev *dev,
int cmd);
int module_adapter_sink_src_prepare(struct comp_dev *dev);

/**
* Get a deadline based on current LFT reported by all sinks
* it returns a value >= UINT32_MAX / 2 in case the deadline cannot be calculated:
* - if a module is in a dealayed start
* - if there's no sink - i.e. DP module is a pure data consumer (like key phrare detector)
*
* @return a deadline the module must finish processing since NOW [in us]
*/
uint32_t module_get_deadline(struct processing_module *mod);

/**
* Get a Longest Processing Time estimation for the module, in us
* It is either
* - a value taken from the manifest or estimated from module period (TODO)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should also be able to take from topology.

* - or a value taken from IPC call (TODO)
* - a worst case - module period
* note that module period may be calculated reasonably late, i.e. in prepare() method
*/
static inline uint32_t module_get_lpt(struct processing_module *mod)
{
/* return worst case of LPT - a module period. See zephyr_dp_schedule.c for description */
return mod->dev->period;
}

#endif /* __SOF_AUDIO_MODULE_GENERIC__ */
1 change: 1 addition & 0 deletions src/include/sof/lib/notifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ enum notify_id {
NOTIFIER_ID_BUFFER_FREE, /* struct buffer_cb_free* */
NOTIFIER_ID_DMA_COPY, /* struct dma_cb_data* */
NOTIFIER_ID_LL_POST_RUN, /* NULL */
NOTIFIER_ID_LL_PRE_RUN, /* NULL */
NOTIFIER_ID_DMA_IRQ, /* struct dma_chan_data * */
NOTIFIER_ID_DAI_TRIGGER, /* struct dai_group * */
NOTIFIER_ID_MIC_PRIVACY_STATE_CHANGE, /* struct mic_privacy_settings * */
Expand Down
3 changes: 3 additions & 0 deletions src/schedule/ll_schedule.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ static void schedule_ll_tasks_run(void *data)

perf_cnt_init(&sch->pcd);

notifier_event(sch, NOTIFIER_ID_LL_PRE_RUN,
NOTIFIER_TARGET_CORE_LOCAL, NULL, 0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this for cmocka / tb simulations?


/* run tasks if there are any pending */
if (schedule_ll_is_pending(sch))
schedule_ll_tasks_execute(sch);
Expand Down
96 changes: 74 additions & 22 deletions src/schedule/zephyr_dp_schedule.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ DECLARE_TR_CTX(dp_tr, SOF_UUID(dp_sched_uuid), LOG_LEVEL_INFO);
struct scheduler_dp_data {
struct list_item tasks; /* list of active dp tasks */
struct task ll_tick_src; /* LL task - source of DP tick */
uint32_t last_ll_tick_timestamp;/* a timestamp as k_cycle_get_32 of last LL tick,
* "NOW" for DP deadline calculation
*/

};

struct task_dp_pdata {
Expand Down Expand Up @@ -258,31 +262,31 @@ static enum task_state scheduler_dp_ll_tick_dummy(void *data)
* Now - pipeline is in stable state, CPU used almost in 100% (it would be 100% if DP3
* needed 1.2ms for processing - but the example would be too complicated)
*/
void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *caller_data)

/* Go through all DP tasks and recalculate their readness and dedlines
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"readiness and deadlines"

* NOT REENTRANT, should be called with scheduler_dp_lock()
*/
static void scheduler_dp_recalculate(struct scheduler_dp_data *dp_sch, bool is_ll_post_run)
{
(void)receiver_data;
(void)event_type;
(void)caller_data;
struct list_item *tlist;
struct task *curr_task;
struct task_dp_pdata *pdata;
unsigned int lock_key;
struct scheduler_dp_data *dp_sch = scheduler_get_data(SOF_SCHEDULE_DP);

lock_key = scheduler_dp_lock(cpu_get_id());
list_for_item(tlist, &dp_sch->tasks) {
curr_task = container_of(tlist, struct task, list);
pdata = curr_task->priv_data;
struct processing_module *mod = pdata->mod;
bool trigger_task = false;

/* decrease number of LL ticks/cycles left till the module reaches its deadline */
if (pdata->ll_cycles_to_start) {
if (mod->dp_startup_delay && is_ll_post_run && pdata->ll_cycles_to_start) {
pdata->ll_cycles_to_start--;
if (!pdata->ll_cycles_to_start)
/* deadline reached, clear startup delay flag.
/* delayed start complete, clear startup delay flag.
* see dp_startup_delay comment for details
*/
mod->dp_startup_delay = false;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not needed

}

if (curr_task->state == SOF_TASK_STATE_QUEUED) {
Expand All @@ -293,16 +297,66 @@ void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *
mod->sinks,
mod->num_of_sinks);
if (mod_ready) {
/* set a deadline for given num of ticks, starting now */
k_thread_deadline_set(pdata->thread_id,
pdata->deadline_clock_ticks);

/* trigger the task */
curr_task->state = SOF_TASK_STATE_RUNNING;
if (mod->dp_startup_delay && !pdata->ll_cycles_to_start) {
/* first time run - use delayed start */
pdata->ll_cycles_to_start =
module_get_lpt(pdata->mod) / LL_TIMER_PERIOD_US;

/* in case LPT < LL cycle - delay at least cycle */
if (!pdata->ll_cycles_to_start)
pdata->ll_cycles_to_start = 1;
}
trigger_task = true;
k_sem_give(pdata->sem);
}
}
if (curr_task->state == SOF_TASK_STATE_RUNNING) {
/* (re) calculate deadline for all running tasks */
/* get module deadline in us*/
uint32_t deadline = module_get_deadline(mod);

/* if a deadline cannot be calculated, use a fixed value relative to its
* first start
*/
if (deadline >= UINT32_MAX / 2 && trigger_task)
deadline = module_get_lpt(mod);

if (deadline < UINT32_MAX) {
/* round down to 1ms */
deadline = deadline / 1000;

/* calculate number of ticks */
deadline = deadline * (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC / 1000);

/* add to "NOW", overflows are OK */
deadline = dp_sch->last_ll_tick_timestamp + deadline;

/* set in Zephyr. Note that it may be in past, it does not matter,
* Zephyr still will schedule the thread with earlier deadline
* first
*/
k_thread_absolute_deadline_set(pdata->thread_id, deadline);
}
}
}
}

void scheduler_dp_ll_tick(void *receiver_data, enum notify_id event_type, void *caller_data)
{
(void)receiver_data;
(void)event_type;
(void)caller_data;
unsigned int lock_key;
struct scheduler_dp_data *dp_sch = scheduler_get_data(SOF_SCHEDULE_DP);

if (event_type == NOTIFIER_ID_LL_PRE_RUN)
/* remember current timestamp as "NOW" */
dp_sch->last_ll_tick_timestamp = k_cycle_get_32();

lock_key = scheduler_dp_lock(cpu_get_id());
scheduler_dp_recalculate(dp_sch, event_type == NOTIFIER_ID_LL_POST_RUN);
scheduler_dp_unlock(lock_key);
}

Expand Down Expand Up @@ -374,6 +428,7 @@ static void dp_thread_fn(void *p1, void *p2, void *p3)
unsigned int lock_key;
enum task_state state;
bool task_stop;
struct scheduler_dp_data *dp_sch = scheduler_get_data(SOF_SCHEDULE_DP);

do {
/*
Expand Down Expand Up @@ -415,6 +470,11 @@ static void dp_thread_fn(void *p1, void *p2, void *p3)
/* if true exit the while loop, terminate the thread */
task_stop = task->state == SOF_TASK_STATE_COMPLETED ||
task->state == SOF_TASK_STATE_CANCEL;
/* recalculate all DP tasks readiness and deadlines
* TODO: it should be for all tasks, for all cores
* currently its limited to current core only
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it not enough to let each core recalculate its DP tasks locally with its own scheduling rhythm? And what are the implications if this is indeed insufficient?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it is not. DP is core independent, and any change on any core may affect its readiness or deadline. Therefore there should be one single list of DP modules. Recalculation od deadlines should be peformed:

  • before LL starts. As LLs on every core start at the same time, it enough to do it on 1st core only
  • when LL finishes - this should be done on every core. When LL finishes, it is very likely - almost certain - deadlines will change.
  • when any DP finishes processing. As above - when it produces data, next DP may get readiness etc

of course starting calculations once at start of LL will also work, just less optimal. Will introduce delay of 1 cycle on every DP input and output.

i.e

LL1 core1 ----> DP1 core2 (1ms period) --> DP2 core3 (1ms period) --> LL2 core1

all LL start, DP is not ready
LL1 finishes -> recalculation. DP1 starts.
DP1 finishes -> recalculation. DP2 starts.

assuming DP1 and DP2 finish before next LL cycle

all LL start - LL1 may process, LL2 may process.

delay of whole pipeline is 1 ms.

if recalculation will be performed at LL start and separate for each core, the delay would be 4ms

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcinszkudlinski but why do we also need to relalculate at the beginning of LL? Wouldn't it be enough to recalculate at the end of each LL and DP? Because that's when data or buffer space can become available?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lyakh ok, true. There's no point for recalculation at LL start

*/
scheduler_dp_recalculate(dp_sch, false);

scheduler_dp_unlock(lock_key);
} while (!task_stop);
Expand All @@ -430,7 +490,6 @@ static int scheduler_dp_task_shedule(void *data, struct task *task, uint64_t sta
struct scheduler_dp_data *dp_sch = (struct scheduler_dp_data *)data;
struct task_dp_pdata *pdata = task->priv_data;
unsigned int lock_key;
uint64_t deadline_clock_ticks;
int ret;

lock_key = scheduler_dp_lock(cpu_get_id());
Expand Down Expand Up @@ -483,14 +542,6 @@ static int scheduler_dp_task_shedule(void *data, struct task *task, uint64_t sta
task->state = SOF_TASK_STATE_QUEUED;
list_item_prepend(&task->list, &dp_sch->tasks);

deadline_clock_ticks = period * CONFIG_SYS_CLOCK_TICKS_PER_SEC;
/* period/deadline is in us - convert to seconds in next step
* or it always will be zero because of integer calculation
*/
deadline_clock_ticks /= 1000000;

pdata->deadline_clock_ticks = deadline_clock_ticks;
pdata->ll_cycles_to_start = period / LL_TIMER_PERIOD_US;
pdata->mod->dp_startup_delay = true;
scheduler_dp_unlock(lock_key);

Expand Down Expand Up @@ -532,6 +583,7 @@ int scheduler_dp_init(void)
if (ret)
return ret;

notifier_register(NULL, NULL, NOTIFIER_ID_LL_PRE_RUN, scheduler_dp_ll_tick, 0);
notifier_register(NULL, NULL, NOTIFIER_ID_LL_POST_RUN, scheduler_dp_ll_tick, 0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in fact I'm just noticing this - the DP scheduler registers an LL task, but its callback is a NOP, while the actual processing is done in a notifier, which also is running inline in the LL scheduler thread. Can we not just use the LL task callback and remove notifiers? Just use the lowest priority to run it last?

Copy link
Contributor Author

@marcinszkudlinski marcinszkudlinski Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we can do it in several ways, but notifications are really for such things like DP recalculations. It is not an "LL task", but a schedule decision operation deep in SOF internals
Notifier is a right place in my opinion

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcinszkudlinski I actually think that running notifiers inline isn't a good idea. Notifiers should be asynchronous - something like a work queue or a semaphore. Triggering a notifier should be atomic and have a well bounded duration, while processing takes place later and can take a long time. In particular this isn't good because it's asymmetric whether the handler is on the same or a different core, while in fact it should be symmetric. We've discussed it before, and I still find this wrong now...

Copy link
Contributor Author

@marcinszkudlinski marcinszkudlinski Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introducing extra LL task(s) for DP calculations won't change a bit. DP calculations will be executed in exactly same way - in LL context, before first LL processing task and after last LL processing task. Timings will be the same. Just more complicated and less clear how it goes - because it will be hidden under priorities
I would even think about hard-coded calls to DP, not by notifications. @softwarecki - you're the owner now ;)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcinszkudlinski I'd prefer a hard-coded call to DP over notifiers, I don't think this is a good use-case for them. But an ordinary LL task still looks like a good solution for this too to me, potentially a better one.


return 0;
Expand Down
3 changes: 3 additions & 0 deletions src/schedule/zephyr_ll.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ static void zephyr_ll_run(void *data)
struct list_item *list, *tmp, task_head = LIST_INIT(task_head);
uint32_t flags;

notifier_event(sch, NOTIFIER_ID_LL_PRE_RUN,
NOTIFIER_TARGET_CORE_LOCAL, NULL, 0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this notifier is only directed to the same core as the one this LL scheduler is running on. So the notification handler will be called inline. Is this really what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as for now - yes.
Later we need to unify all DP schedulers for all cores - as one DP may affect other regardless of a core they are located on.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcinszkudlinski but that also means then that those notification handlers on this core will run in the LL scheduler context, thus delaying LL tasks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, - and what's wrong about it? LL has one principle - it must FINISH within 1ms. It does not matter how

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is indeed wrong that the context and the priority of the callback depend on the core it happens to run on.


zephyr_ll_lock(sch, &flags);

/*
Expand Down
Loading