kernel/irq/irqdesc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq.
This is only needed in free_desc(), which is used with non-sparse IRQs
so that the irq_desc can be recycled. For newly allocated irq_desc,
the memory comes from alloc_percpu() and is already zeroed out.
Move the loop to free_desc() to avoid wasting time unnecessarily.
This is especially important on large servers with 100+ CPUs, because
each write results in a cache miss, and the write buffer can only have
so many outstanding transactions.
Below is an example of cost on a host with 480 CPUs, taken with
local_irq_save()/restore() around the code to avoid interference.
Measurements taken with kstats
https://github.com/luigirizzo/lr-cstats/tree/main/kstats
BUCKET SAMPLES AVG TIME(ns) PERCENTILE
40 3 2432 0.000366
41 3 3000 0.000732
42 24 3241 0.003662
43 33 3971 0.007690
44 963 4742 0.125244
45 1071 5545 0.255981
46 494 6644 0.316284
47 352 7661 0.359252
48 816 9447 0.458862
49 2214 11493 0.729125
50 1440 13027 0.904907
51 428 15219 0.957153
52 275 18211 0.990722
53 69 21396 0.999145
54 4 26125 0.999633
55 1 28996 0.999755
56 2 37253 1.000000
Signed-off-by: Luigi Rizzo <lrizzo@google.com>
---
kernel/irq/irqdesc.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f8e4e13dbe339..ec963174e7e27 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { }
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
const struct cpumask *affinity, struct module *owner)
{
- int cpu;
-
desc->irq_common_data.handler_data = NULL;
desc->irq_common_data.msi_desc = NULL;
@@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->tot_count = 0;
desc->name = NULL;
desc->owner = owner;
- for_each_possible_cpu(cpu)
- *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
desc_smp_init(desc, node, affinity);
}
@@ -621,9 +617,13 @@ EXPORT_SYMBOL(irq_to_desc);
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
+ int cpu;
- scoped_guard(raw_spinlock_irqsave, &desc->lock)
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
+ }
delete_irq_desc(irq);
}
--
2.52.0.457.g6b5491de43-goog
On Mon, Jan 12, 2026 at 9:32 AM Luigi Rizzo <lrizzo@google.com> wrote:
>
> desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq.
>
> This is only needed in free_desc(), which is used with non-sparse IRQs
> so that the irq_desc can be recycled. For newly allocated irq_desc,
> the memory comes from alloc_percpu() and is already zeroed out.
>
> Move the loop to free_desc() to avoid wasting time unnecessarily.
>
> This is especially important on large servers with 100+ CPUs, because
> each write results in a cache miss, and the write buffer can only have
> so many outstanding transactions.
>
> Below is an example of cost on a host with 480 CPUs, taken with
> local_irq_save()/restore() around the code to avoid interference.
> Measurements taken with kstats
> https://github.com/luigirizzo/lr-cstats/tree/main/kstats
>
> BUCKET SAMPLES AVG TIME(ns) PERCENTILE
>
> 40 3 2432 0.000366
> 41 3 3000 0.000732
> 42 24 3241 0.003662
> 43 33 3971 0.007690
> 44 963 4742 0.125244
> 45 1071 5545 0.255981
> 46 494 6644 0.316284
> 47 352 7661 0.359252
> 48 816 9447 0.458862
> 49 2214 11493 0.729125
> 50 1440 13027 0.904907
> 51 428 15219 0.957153
> 52 275 18211 0.990722
> 53 69 21396 0.999145
> 54 4 26125 0.999633
> 55 1 28996 0.999755
> 56 2 37253 1.000000
>
> Signed-off-by: Luigi Rizzo <lrizzo@google.com>
> ---
> kernel/irq/irqdesc.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index f8e4e13dbe339..ec963174e7e27 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { }
> static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
> const struct cpumask *affinity, struct module *owner)
> {
> - int cpu;
> -
> desc->irq_common_data.handler_data = NULL;
> desc->irq_common_data.msi_desc = NULL;
>
> @@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
> desc->tot_count = 0;
> desc->name = NULL;
> desc->owner = owner;
> - for_each_possible_cpu(cpu)
> - *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
> desc_smp_init(desc, node, affinity);
> }
>
> @@ -621,9 +617,13 @@ EXPORT_SYMBOL(irq_to_desc);
> static void free_desc(unsigned int irq)
> {
> struct irq_desc *desc = irq_to_desc(irq);
> + int cpu;
>
> - scoped_guard(raw_spinlock_irqsave, &desc->lock)
> + scoped_guard(raw_spinlock_irqsave, &desc->lock) {
> desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
> + for_each_possible_cpu(cpu)
> + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
> + }
It seems that for_each_possible_cpu(cpu) could be done outside of the
desc->lock protection.
This would shorten hard-irq blocking by N cache line misses.
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f8e4e13dbe33965b8ede1872515596eb64dfdb74..577fb0ff4a328d44cef93922f41f8d200d12bbb1
100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -621,9 +621,14 @@ EXPORT_SYMBOL(irq_to_desc);
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
+ int cpu;
scoped_guard(raw_spinlock_irqsave, &desc->lock)
desc_set_defaults(irq, desc, irq_desc_get_node(desc),
NULL, NULL);
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
+
delete_irq_desc(irq);
}
On Mon, Jan 12 2026 at 21:18, Eric Dumazet wrote:
>> - scoped_guard(raw_spinlock_irqsave, &desc->lock)
>> + scoped_guard(raw_spinlock_irqsave, &desc->lock) {
>> desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
>> + for_each_possible_cpu(cpu)
>> + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
>> + }
>
>
> It seems that for_each_possible_cpu(cpu) could be done outside of the
> desc->lock protection.
>
> This would shorten hard-irq blocking by N cache line misses.
Right, though it really does not matter unless you deeply care about:
alpha, arc, hexagon, m68k, mips, parisc, sparc, xtensa and random old
ARM32 platforms :)
The following commit has been merged into the irq/core branch of tip:
Commit-ID: fb11a2493e685d0b733c2346f5b26f2e372584fb
Gitweb: https://git.kernel.org/tip/fb11a2493e685d0b733c2346f5b26f2e372584fb
Author: Luigi Rizzo <lrizzo@google.com>
AuthorDate: Mon, 12 Jan 2026 08:32:33
Committer: Thomas Gleixner <tglx@kernel.org>
CommitterDate: Tue, 13 Jan 2026 10:16:29 +01:00
genirq: Move clear of kstat_irqs to free_desc()
desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq.
This is only needed in free_desc(), which is used with non-sparse IRQs so
that the interrupt descriptor can be recycled. For newly allocated
descriptors, the memory comes from alloc_percpu() and is already zeroed
out.
Move the loop to free_desc() to avoid wasting time unnecessarily.
Signed-off-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260112083234.2665832-1-lrizzo@google.com
---
kernel/irq/irqdesc.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f8e4e13..c3bc00e 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { }
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
const struct cpumask *affinity, struct module *owner)
{
- int cpu;
-
desc->irq_common_data.handler_data = NULL;
desc->irq_common_data.msi_desc = NULL;
@@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->tot_count = 0;
desc->name = NULL;
desc->owner = owner;
- for_each_possible_cpu(cpu)
- *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
desc_smp_init(desc, node, affinity);
}
@@ -621,9 +617,14 @@ EXPORT_SYMBOL(irq_to_desc);
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
+ int cpu;
scoped_guard(raw_spinlock_irqsave, &desc->lock)
desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
+
delete_irq_desc(irq);
}
© 2016 - 2026 Red Hat, Inc.