From: Jianlin Lv <iecedge@gmail.com>
This commit excludes IRQ time from the total execution duration of BPF
programs. When CONFIG_IRQ_TIME_ACCOUNTING is enabled, IRQ time is
accounted for separately, offering a more accurate assessment of CPU
usage for BPF programs.
Signed-off-by: Jianlin Lv <iecedge@gmail.com>
---
include/linux/filter.h | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f5cf4d35d83e..3e0f975176a6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -703,12 +703,32 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
cant_migrate();
if (static_branch_unlikely(&bpf_stats_enabled_key)) {
struct bpf_prog_stats *stats;
- u64 duration, start = sched_clock();
+ u64 duration, start, start_time, end_time, irq_delta;
unsigned long flags;
+ unsigned int cpu;
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ if (in_task()) {
+ cpu = get_cpu();
+ put_cpu();
+ start_time = irq_time_read(cpu);
+ }
+ #endif
+ start = sched_clock();
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
duration = sched_clock() - start;
+
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ if (in_task()) {
+ end_time = irq_time_read(cpu);
+ if (end_time > start_time) {
+ irq_delta = end_time - start_time;
+ duration -= irq_delta;
+ }
+ }
+ #endif
+
stats = this_cpu_ptr(prog->stats);
flags = u64_stats_update_begin_irqsave(&stats->syncp);
u64_stats_inc(&stats->cnt);
--
2.34.1
On Tue, Apr 22, 2025 at 6:47 AM Jianlin Lv <iecedge@gmail.com> wrote:
>
> From: Jianlin Lv <iecedge@gmail.com>
>
> This commit excludes IRQ time from the total execution duration of BPF
> programs. When CONFIG_IRQ_TIME_ACCOUNTING is enabled, IRQ time is
> accounted for separately, offering a more accurate assessment of CPU
> usage for BPF programs.
>
> Signed-off-by: Jianlin Lv <iecedge@gmail.com>
> ---
> include/linux/filter.h | 24 ++++++++++++++++++++++--
> 1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index f5cf4d35d83e..3e0f975176a6 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -703,12 +703,32 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
> cant_migrate();
> if (static_branch_unlikely(&bpf_stats_enabled_key)) {
> struct bpf_prog_stats *stats;
> - u64 duration, start = sched_clock();
> + u64 duration, start, start_time, end_time, irq_delta;
> unsigned long flags;
> + unsigned int cpu;
>
> - ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> + #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> + if (in_task()) {
> + cpu = get_cpu();
> + put_cpu();
> + start_time = irq_time_read(cpu);
> + }
> + #endif
>
> + start = sched_clock();
> + ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> duration = sched_clock() - start;
> +
> + #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> + if (in_task()) {
> + end_time = irq_time_read(cpu);
> + if (end_time > start_time) {
> + irq_delta = end_time - start_time;
> + duration -= irq_delta;
> + }
> + }
> + #endif
> +
This is way too much overhead.
This timing loop is optimized to measure bpf prog runtime.
See commit ce09cbdd9888 ("bpf: Improve program stats run-time calculation")
IRQ can happen and distort the numbers, but you shouldn't
be running with bpf_stats_enabled for a long time.
You need to sample it instead.
Every couple minutes turn it on for a second, capture the stats
and aggregate over time. Filter out outliers due to IRQ or whatever.
On Tue, Apr 22, 2025 at 09:47:26PM +0800, Jianlin Lv wrote:
> From: Jianlin Lv <iecedge@gmail.com>
>
> This commit excludes IRQ time from the total execution duration of BPF
> programs. When CONFIG_IRQ_TIME_ACCOUNTING is enabled, IRQ time is
> accounted for separately, offering a more accurate assessment of CPU
> usage for BPF programs.
>
> Signed-off-by: Jianlin Lv <iecedge@gmail.com>
> ---
> include/linux/filter.h | 24 ++++++++++++++++++++++--
> 1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index f5cf4d35d83e..3e0f975176a6 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -703,12 +703,32 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
> cant_migrate();
> if (static_branch_unlikely(&bpf_stats_enabled_key)) {
> struct bpf_prog_stats *stats;
> - u64 duration, start = sched_clock();
> + u64 duration, start, start_time, end_time, irq_delta;
> unsigned long flags;
> + unsigned int cpu;
>
> - ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> + #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> + if (in_task()) {
> + cpu = get_cpu();
> + put_cpu();
> + start_time = irq_time_read(cpu);
This is all sorts of daft.. you don't need get_cpu()/put_cpu().
> + }
> + #endif
>
> + start = sched_clock();
> + ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> duration = sched_clock() - start;
> +
> + #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> + if (in_task()) {
> + end_time = irq_time_read(cpu);
> + if (end_time > start_time) {
> + irq_delta = end_time - start_time;
> + duration -= irq_delta;
> + }
> + }
> + #endif
This is really dodgy coding style. Please keep the preprocessor
directives at column 0.
What do you think about steal-time, do you want to remove that from your
BPF runtime too?
If so, perhaps expose the scheduler's clock_task, which does both things
already?
© 2016 - 2025 Red Hat, Inc.