[patch V6 00/16] Improve /proc/interrupts further

Thomas Gleixner posted 16 patches 1 week ago
[patch V6 00/16] Improve /proc/interrupts further
Posted by Thomas Gleixner 1 week ago
This is a follow up to v5 which can be found here:

  https://lore.kernel.org/20260401195625.213446764@kernel.org

The v1 cover letter contains a full analysis, explanation and numbers:

  https://lore.kernel.org/20260303150539.513068586@kernel.org

TLDR:

  - The performance of reading of /proc/interrupts has been improved
    piecewise over the years, but most of the low hanging fruit has been
    left on the table.

Changes vs. V5:

  - Rebased against v7.1-rc2

  - Addressed some formatting/alignment details - Radu, Michael

  - Fixed some 0-day fallout vs. various Kconfig combinations

  - Picked up tags where appropriate

Delta patch against v5 is below.

The series applies on top of v7.1-rc2 and is also available via git:

    git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v6

Thanks,

	tglx
---
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c67047c5d830..4a6a8b1d5a8b 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -72,16 +72,16 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	int j;
 
 #ifdef CONFIG_SMP
-	seq_puts(p, "IPI: ");
+	seq_puts(p, " IPI: ");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10lu ", cpu_data[j].ipi_count);
 	seq_putc(p, '\n');
 #endif
-	seq_puts(p, "PMI: ");
+	seq_puts(p, " PMI: ");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10lu ", per_cpu(irq_pmi_count, j));
-	seq_puts(p, "          Performance Monitoring\n");
-	seq_printf(p, "ERR: %10lu\n", irq_err_count);
+	seq_puts(p, " Performance Monitoring\n");
+	seq_printf(p, " ERR: %10lu\n", irq_err_count);
 	return 0;
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 4e8e89a26ca3..b5fb4697bc3f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -551,8 +551,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 		if (!ipi_desc[i])
 			continue;
 
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 1aa324104afb..1d0e0e6a5b92 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -833,11 +833,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	unsigned int cpu, i;
 
 	for (i = 0; i < MAX_IPI; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
-		seq_printf(p, "      %s\n", ipi_types[i]);
+		seq_printf(p, " %s\n", ipi_types[i]);
 	}
 
 	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 64a048f1b880..50922610758b 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -88,7 +88,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	unsigned int cpu, i;
 
 	for (i = 0; i < NR_IPI; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10);
 		seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 5ed5095320e6..fa66f9c97d74 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -226,8 +226,7 @@ void show_ipi_stats(struct seq_file *p, int prec)
 	unsigned int cpu, i;
 
 	for (i = 0; i < IPI_MAX; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
 		seq_printf(p, " %s\n", ipi_names[i]);
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 9022d8af9d68..03c39b5da50f 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -46,7 +46,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat.__nmi_count, j), 10);
-	seq_printf(p, "  Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index 5210991429d5..22db727652ba 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -199,19 +199,19 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	int j;
 
 #ifdef CONFIG_SMP
-	seq_printf(p, "RES:");
+	seq_printf(p, "%*s:", prec, "RES");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).irq_resched_count, 10);
-	seq_printf(p, "     IPI rescheduling interrupts\n");
-	seq_printf(p, "CAL:");
+	seq_printf(p, " IPI rescheduling interrupts\n");
+	seq_printf(p, "%*s:", prec, "CAL");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).irq_call_count, 10);
-	seq_printf(p, "     IPI function call interrupts\n");
+	seq_printf(p, " IPI function call interrupts\n");
 #endif
-	seq_printf(p, "NMI:");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).counter, 10);
-	seq_printf(p, "     Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 	return 0;
 }
 
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index c5466a9fd560..3f55c69d5f3b 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -303,10 +303,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	int j;
 
-	seq_printf(p, "NMI:");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).__nmi_count, 10);
-	seq_printf(p, "     Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 	return 0;
 }
 
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 5929d498b65f..ddfd6e9bd8c7 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -716,12 +716,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "%*s: ", prec, "RES");
 	for_each_online_cpu(cpu)
 		seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
-	seq_puts(p, "  Rescheduling interrupts\n");
+	seq_puts(p, " Rescheduling interrupts\n");
 
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(cpu)
 		seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
-	seq_puts(p, "  Function call interrupts\n");
+	seq_puts(p, " Function call interrupts\n");
 #endif
 
 	return 0;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index de1c35fa5e75..f399b993af50 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -77,54 +77,54 @@ struct irq_stat_info {
 	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
 
 static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
-	ISS(NMI,			"NMI", "  Non-maskable interrupts\n"),
+	ISS(NMI,			"NMI",	"  Non-maskable interrupts\n"),
 #ifdef CONFIG_X86_LOCAL_APIC
-	ISS(APIC_TIMER,			"LOC", "  Local timer interrupts\n"),
-	IDS(SPURIOUS,			"SPU", "  Spurious interrupts\n"),
-	ISS(APIC_PERF,			"PMI", "  Performance monitoring interrupts\n"),
-	ISS(IRQ_WORK,			"IWI", "  IRQ work interrupts\n"),
-	IDS(ICR_READ_RETRY,		"RTR", "  APIC ICR read retries\n"),
-	ISS(X86_PLATFORM_IPI,		"PLT", "  Platform interrupts\n"),
+	ISS(APIC_TIMER,			"LOC",	"  Local timer interrupts\n"),
+	IDS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
+	ISS(APIC_PERF,			"PMI",	"  Performance monitoring interrupts\n"),
+	ISS(IRQ_WORK,			"IWI",	"  IRQ work interrupts\n"),
+	IDS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
+	ISS(X86_PLATFORM_IPI,		"PLT",	"  Platform interrupts\n"),
 #endif
 #ifdef CONFIG_SMP
-	ISS(RESCHEDULE,			"RES", "  Rescheduling interrupts\n"),
-	ISS(CALL_FUNCTION,		"CAL", "  Function call interrupts\n"),
+	ISS(RESCHEDULE,			"RES",	"  Rescheduling interrupts\n"),
+	ISS(CALL_FUNCTION,		"CAL",	"  Function call interrupts\n"),
 #endif
-	ISS(TLB,			"TLB", "  TLB shootdowns\n"),
+	ISS(TLB,			"TLB",	"  TLB shootdowns\n"),
 #ifdef CONFIG_X86_THERMAL_VECTOR
-	ISS(THERMAL_APIC,		"TRM", "  Thermal event interrupt\n"),
+	ISS(THERMAL_APIC,		"TRM",	"  Thermal event interrupt\n"),
 #endif
 #ifdef CONFIG_X86_MCE_THRESHOLD
-	ISS(THRESHOLD_APIC,		"THR", "  Threshold APIC interrupts\n"),
+	ISS(THRESHOLD_APIC,		"THR",	"  Threshold APIC interrupts\n"),
 #endif
 #ifdef CONFIG_X86_MCE_AMD
-	ISS(DEFERRED_ERROR,		"DFR", "  Deferred Error APIC interrupts\n"),
+	ISS(DEFERRED_ERROR,		"DFR",	"  Deferred Error APIC interrupts\n"),
 #endif
 #ifdef CONFIG_X86_MCE
-	ISS(MCE_EXCEPTION,		"MCE", "  Machine check exceptions\n"),
-	ISS(MCE_POLL,			"MCP", "  Machine check polls\n"),
+	ISS(MCE_EXCEPTION,		"MCE",	"  Machine check exceptions\n"),
+	ISS(MCE_POLL,			"MCP",	"  Machine check polls\n"),
 #endif
 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
-	ITS(HYPERVISOR_CALLBACK,	"HYP", "  Hypervisor callback interrupts\n"),
+	ITS(HYPERVISOR_CALLBACK,	"HYP",	"  Hypervisor callback interrupts\n"),
 #endif
 #if IS_ENABLED(CONFIG_HYPERV)
-	ITS(HYPERV_REENLIGHTENMENT,	"HRE", "  Hyper-V reenlightenment interrupts\n"),
-	ITS(HYPERV_STIMER0,		"HVS", "  Hyper-V stimer0 interrupts\n"),
+	ITS(HYPERV_REENLIGHTENMENT,	"HRE",	"  Hyper-V reenlightenment interrupts\n"),
+	ITS(HYPERV_STIMER0,		"HVS",	"  Hyper-V stimer0 interrupts\n"),
 #endif
 #if IS_ENABLED(CONFIG_KVM)
-	ITS(POSTED_INTR,		"PIN", "  Posted-interrupt notification event\n"),
-	ITS(POSTED_INTR_NESTED,		"NPI", "  Nested posted-interrupt event\n"),
-	ITS(POSTED_INTR_WAKEUP,		"PIW", "  Posted-interrupt wakeup event\n"),
+	ITS(POSTED_INTR,		"PIN",	"  Posted-interrupt notification event\n"),
+	ITS(POSTED_INTR_NESTED,		"NPI",	"  Nested posted-interrupt event\n"),
+	ITS(POSTED_INTR_WAKEUP,		"PIW",	"  Posted-interrupt wakeup event\n"),
 #endif
 #ifdef CONFIG_GUEST_PERF_EVENTS
-	ISS(PERF_GUEST_MEDIATED_PMI,	"VPMI", " Perf Guest Mediated PMI\n"),
+	ISS(PERF_GUEST_MEDIATED_PMI,	"VPMI",	"  Perf Guest Mediated PMI\n"),
 #endif
 #ifdef CONFIG_X86_POSTED_MSI
-	ISS(POSTED_MSI_NOTIFICATION,	"PMN", "  Posted MSI notification event\n"),
+	ISS(POSTED_MSI_NOTIFICATION,	"PMN",	"  Posted MSI notification event\n"),
 #endif
-	IDS(PIC_APIC_ERROR,		"ERR", "  PIC/APIC error interrupts\n"),
+	IDS(PIC_APIC_ERROR,		"ERR",	"  PIC/APIC error interrupts\n"),
 #ifdef CONFIG_X86_IO_APIC
-	IDS(IOAPIC_MISROUTED,		"MIS", "  Misrouted IO/APIC interrupts\n"),
+	IDS(IOAPIC_MISROUTED,		"MIS",	"  Misrouted IO/APIC interrupts\n"),
 #endif
 };
 
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index b1e410f6b5ab..6f01f530868b 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -59,7 +59,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(cpu)
 		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
-	seq_puts(p, "   Non-maskable interrupts\n");
+	seq_puts(p, " Non-maskable interrupts\n");
 #endif
 	return 0;
 }
diff --git a/kernel/irq/debugfs.h b/kernel/irq/debugfs.h
new file mode 100644
index 000000000000..8a9360d5fefb
--- /dev/null
+++ b/kernel/irq/debugfs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_IRQ_DEBUGFS_H
+#define _KERNEL_IRQ_DEBUGFS_H
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+#include <linux/debugfs.h>
+
+struct irq_bit_descr {
+	unsigned int	mask;
+	char		*name;
+};
+
+#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
+
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+			 const struct irq_bit_descr *sd, int size);
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
+static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
+{
+	debugfs_remove(desc->debugfs_file);
+	kfree(desc->dev_name);
+}
+void irq_debugfs_copy_devname(int irq, struct device *dev);
+# ifdef CONFIG_IRQ_DOMAIN
+void irq_domain_debugfs_init(struct dentry *root);
+# else
+static inline void irq_domain_debugfs_init(struct dentry *root)
+{
+}
+# endif
+#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
+static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
+{
+}
+static inline void irq_remove_debugfs_entry(struct irq_desc *d)
+{
+}
+static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
+{
+}
+#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
+
+#endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 7fbf003c6e93..f9c099d45a64 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -12,6 +12,7 @@
 #include <linux/rcuref.h>
 #include <linux/sched/clock.h>
 
+#include "debugfs.h"
 #include "proc.h"
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -394,42 +395,3 @@ static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd)
 	return NULL;
 #endif
 }
-
-#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-#include <linux/debugfs.h>
-
-struct irq_bit_descr {
-	unsigned int	mask;
-	char		*name;
-};
-
-#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
-
-void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
-			 const struct irq_bit_descr *sd, int size);
-
-void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
-static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
-{
-	debugfs_remove(desc->debugfs_file);
-	kfree(desc->dev_name);
-}
-void irq_debugfs_copy_devname(int irq, struct device *dev);
-# ifdef CONFIG_IRQ_DOMAIN
-void irq_domain_debugfs_init(struct dentry *root);
-# else
-static inline void irq_domain_debugfs_init(struct dentry *root)
-{
-}
-# endif
-#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
-static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
-{
-}
-static inline void irq_remove_debugfs_entry(struct irq_desc *d)
-{
-}
-static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
-{
-}
-#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f524ed709b8..f15c9f1223bb 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -2084,7 +2084,7 @@ static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-#include "internals.h"
+#include "debugfs.h"
 
 static struct dentry *domain_dir;
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 3bd394aa7617..ca535472e657 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -465,7 +465,7 @@ static struct irq_proc_constraints {
 	unsigned int	num_prec;
 	unsigned int	chip_width;
 } irq_proc_constraints __read_mostly = {
-	.num_prec	= 3,
+	.num_prec	= 4,
 	.chip_width	= 8,
 };
 
@@ -477,7 +477,7 @@ void irq_proc_calc_prec(void)
 {
 	unsigned int prec, n;
 
-	for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec)
+	for (prec = 4, n = 10000; prec < 10 && n <= total_nr_irqs; ++prec)
 		n *= 10;
 
 	guard(raw_spinlock_irqsave)(&irq_proc_constraints_lock);
@@ -498,6 +498,7 @@ void irq_proc_update_chip(const struct irq_chip *chip)
 		WRITE_ONCE(irq_proc_constraints.chip_width, len);
 }
 
+/* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
 #define ZSTR1 "          0"
 #define ZSTR1_LEN	(sizeof(ZSTR1) - 1)
 #define ZSTR16		ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
diff --git a/kernel/irq/proc.h b/kernel/irq/proc.h
index ec9173d573f9..0631d57fbfb7 100644
--- a/kernel/irq/proc.h
+++ b/kernel/irq/proc.h
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_IRQ_PROC_H
+#define _KERNEL_IRQ_PROC_H
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
 void irq_proc_calc_prec(void);
@@ -7,3 +9,5 @@ void irq_proc_update_chip(const struct irq_chip *chip);
 static inline void irq_proc_calc_prec(void) { }
 static inline void irq_proc_update_chip(const struct irq_chip *chip) { }
 #endif
+
+#endif
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index cf0a02c8124d..a68ae91b4531 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -90,6 +90,13 @@ def show_irq_desc(prec, chip_width, irq):
 
     return text
 
+def show_irq_err_count(prec):
+    cnt = utils.gdb_eval_or_none("irq_err_count")
+    text = ""
+    if cnt is not None:
+        text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
+    return text
+
 def x86_show_irqstat(prec, pfx, idx, desc):
     irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
     text = "%*s: " % (prec, pfx)
@@ -124,34 +131,23 @@ def arm_common_show_interrupts(prec):
     if nr_ipi is None or ipi_desc is None or ipi_types is None:
         return text
 
-    if prec >= 4:
-        sep = " "
-    else:
-        sep = ""
-
     for ipi in range(nr_ipi):
-        text += "%*s%u:%s" % (prec - 1, "IPI", ipi, sep)
+        text += "%*s%u: " % (prec - 1, "IPI", ipi)
         desc = ipi_desc[ipi].cast(irq_desc_type.get_type().pointer())
         if desc == 0:
             continue
         for cpu in cpus.each_online_cpu():
-            text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'])
-        text += "      %s" % (ipi_types[ipi].string())
+            text += "%10u " % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'])
+        text += "%s" % (ipi_types[ipi].string())
         text += "\n"
     return text
 
 def aarch64_show_interrupts(prec):
+    # Does not work for ARM64 as "ipi_desc" is not available there
     text = arm_common_show_interrupts(prec)
     text += "%*s: %10lu\n" % (prec, "ERR", gdb.parse_and_eval("irq_err_count"))
     return text
 
-def show_irq_err_count(prec):
-    cnt = utils.gdb_eval_or_none("irq_err_count")
-    text = ""
-    if cnt is not None:
-        text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
-    return text
-
 def arch_show_interrupts(prec):
     text = ""
     if utils.is_target_arch("x86"):
@@ -181,8 +177,8 @@ class LxInterruptList(gdb.Command):
             prec = int(constr['num_prec'])
             chip_width = int(constr['chip_width'])
         else:
-            prec = 3
-            j = 1000
+            prec = 4
+            j = 10000
             while prec < 10 and j <= nr_irqs:
                 prec += 1
                 j *= 10
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Shrikanth Hegde 5 days, 8 hours ago
Hi Thomas.

On 5/18/26 1:31 AM, Thomas Gleixner wrote:
> This is a follow up to v5 which can be found here:
> 
>    https://lore.kernel.org/20260401195625.213446764@kernel.org
> 
> The v1 cover letter contains a full analysis, explanation and numbers:
> 
>    https://lore.kernel.org/20260303150539.513068586@kernel.org
> 
> TLDR:
> 
>    - The performance of reading of /proc/interrupts has been improved
>      piecewise over the years, but most of the low hanging fruit has been
>      left on the table.
> 

Ran this on powerVM box with 240 CPUs.

Ran perf stat -r 1000 cat /proc/interrupts > tmp.txt
and Observed minimal improvement with series.

Base:
  Performance counter stats for 'cat /proc/interrupts' (1000 runs):

               0.32 msec task-clock:HG                    #    0.617 CPUs utilized               ( +-  0.17% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  136.122 K/sec                       ( +-  0.03% )
          1,313,263      cycles:HG                        #    4.063 GHz                         ( +-  0.17% )
          2,172,511      instructions:HG                  #    1.65  insn per cycle              ( +-  0.05% )
            371,171      branches:HG                      #    1.148 G/sec                       ( +-  0.05% )
              4,918      branch-misses:HG                 #    1.32% of all branches             ( +-  0.35% )

        0.000523661 +- 0.000000914 seconds time elapsed  ( +-  0.17% )

v6 series:

  Performance counter stats for 'cat /proc/interrupts' (1000 runs):

               0.30 msec task-clock:HG                    #    0.591 CPUs utilized               ( +-  0.25% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  145.802 K/sec                       ( +-  0.03% )
          1,224,666      cycles:HG                        #    4.058 GHz                         ( +-  0.25% )
          1,667,435      instructions:HG                  #    1.36  insn per cycle              ( +-  0.08% )
            277,534      branches:HG                      #  919.660 M/sec                       ( +-  0.09% )
              5,066      branch-misses:HG                 #    1.83% of all branches             ( +-  0.45% )

         0.00051099 +- 0.00000110 seconds time elapsed  ( +-  0.21% )       << 3-4% improvement

Looking at powerpc arch_show_interrupts,
It could use the similar set of optimizations.
- move to array based
- use irq_proc_emit_counts
- some interrupts such as machine check, is hardly set. set skip_vector.


Copilot suggested below diff to quickly try irq_proc_emit_counts integration.
It showed little gains compared to v6. So it maybe worth fixing that in the
right way. (similar to x86 stuff you have done)

  Performance counter stats for 'cat /proc/interrupts' (1000 runs):

               0.29 msec task-clock:HG                    #    0.586 CPUs utilized               ( +-  0.22% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  153.067 K/sec                       ( +-  0.03% )
          1,166,567      cycles:HG                        #    4.058 GHz                         ( +-  0.22% )
          1,475,365      instructions:HG                  #    1.26  insn per cycle              ( +-  0.09% )
            249,051      branches:HG                      #  866.397 M/sec                       ( +-  0.10% )
              5,104      branch-misses:HG                 #    2.05% of all branches             ( +-  0.33% )

        0.000490211 +- 0.000000992 seconds time elapsed  ( +-  0.20% )   <<< 3-4% improvements.


diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a0e8b998c9b5..19c9f28c39d3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -83,6 +83,18 @@ u32 tau_interrupts(unsigned long cpu);
  #endif
  #endif /* CONFIG_PPC32 */
  
+
+/*
+ * Return a percpu pointer to a given unsigned int member of irq_stat.
+ */
+static __always_inline unsigned int __percpu *ppc_irq_stat_member(size_t off)
+{
+    return (unsigned int __percpu *)((char __percpu *)&irq_stat + off);
+}
+
+#define PPC_IRQ_STAT_PCPU(member) \
+    ppc_irq_stat_member(offsetof(irq_cpustat_t, member))
+
  int arch_show_interrupts(struct seq_file *p, int prec)
  {
         int j;
@@ -97,33 +109,27 @@ int arch_show_interrupts(struct seq_file *p, int prec)
  #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
  
         seq_printf(p, "%*s:", prec, "LOC");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(timer_irqs_event));
          seq_printf(p, "  Local timer interrupts for timer event device\n");
  
         seq_printf(p, "%*s:", prec, "BCT");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(broadcast_irqs_event));
         seq_printf(p, "  Broadcast timer interrupts for timer event device\n");
  
         seq_printf(p, "%*s:", prec, "LOC");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(timer_irqs_others));
          seq_printf(p, "  Local timer interrupts for others\n");
  
         seq_printf(p, "%*s:", prec, "SPU");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(spurious_irqs));
         seq_printf(p, "  Spurious interrupts\n");
  
         seq_printf(p, "%*s:", prec, "PMI");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(pmu_irqs));
         seq_printf(p, "  Performance monitoring interrupts\n");
  
         seq_printf(p, "%*s:", prec, "MCE");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(mce_exceptions));
         seq_printf(p, "  Machine check exceptions\n");
  
  #ifdef CONFIG_PPC_BOOK3S_64
@@ -136,22 +142,19 @@ int arch_show_interrupts(struct seq_file *p, int prec)
  #endif
  
         seq_printf(p, "%*s:", prec, "NMI");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(sreset_irqs));
         seq_printf(p, "  System Reset interrupts\n");
  
  #ifdef CONFIG_PPC_WATCHDOG
         seq_printf(p, "%*s:", prec, "WDG");
-       for_each_online_cpu(j)
-               seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10);
+       irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(soft_nmi_irqs));
         seq_printf(p, "  Watchdog soft-NMI interrupts\n");
  #endif
  
  #ifdef CONFIG_PPC_DOORBELL
         if (cpu_has_feature(CPU_FTR_DBELL)) {
                 seq_printf(p, "%*s:", prec, "DBL");
-               for_each_online_cpu(j)
-                       seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10);
+               irq_proc_emit_counts(p, PPC_IRQ_STAT_PCPU(doorbell_irqs));
                 seq_printf(p, "  Doorbell interrupts\n");
         }
  #endif
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Thomas Gleixner 4 days, 14 hours ago
Shrikanth!

On Wed, May 20 2026 at 02:48, Shrikanth Hegde wrote:
> On 5/18/26 1:31 AM, Thomas Gleixner wrote:
>
> Ran perf stat -r 1000 cat /proc/interrupts > tmp.txt
> and Observed minimal improvement with series.

Can you redirect it to /dev/null instead to take the file operations out
of the picture?

> Base:
>           1,313,263      cycles:HG                        #    4.063 GHz                         ( +-  0.17% )
>           2,172,511      instructions:HG                  #    1.65  insn per cycle              ( +-  0.05% )

> v6 series:
>           1,224,666      cycles:HG                        #    4.058 GHz                         ( +-  0.25% )
>           1,667,435      instructions:HG                  #    1.36  insn per cycle              ( +-  0.08% )

Interesting. The number of instructions goes down by 20+%, but at the
same time IPC drops too.

> Looking at powerpc arch_show_interrupts,
> It could use the similar set of optimizations.
> - move to array based
> - use irq_proc_emit_counts
> - some interrupts such as machine check, is hardly set. set skip_vector.
>
>
> Copilot suggested below diff to quickly try irq_proc_emit_counts integration.
> It showed little gains compared to v6. So it maybe worth fixing that in the
> right way. (similar to x86 stuff you have done)
>
>   Performance counter stats for 'cat /proc/interrupts' (1000 runs):
>
>                0.29 msec task-clock:HG                    #    0.586 CPUs utilized               ( +-  0.22% )
>                   0      context-switches:HG              #    0.000 /sec
>                   0      cpu-migrations:HG                #    0.000 /sec
>                  44      page-faults:HG                   #  153.067 K/sec                       ( +-  0.03% )
>           1,166,567      cycles:HG                        #    4.058 GHz                         ( +-  0.22% )
>           1,475,365      instructions:HG                  #    1.26  insn per cycle              ( +-  0.09% )
>             249,051      branches:HG                      #  866.397 M/sec                       ( +-  0.10% )
>               5,104      branch-misses:HG                 #    2.05% of all branches             ( +-  0.33% )
>
>         0.000490211 +- 0.000000992 seconds time elapsed  ( +-  0.20% )   <<< 3-4% improvements.

Again IPC drops ....
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Shrikanth Hegde 4 days, 1 hour ago
Hi Thomas.

On 5/20/26 8:57 PM, Thomas Gleixner wrote:
> Shrikanth!
> 
> On Wed, May 20 2026 at 02:48, Shrikanth Hegde wrote:
>> On 5/18/26 1:31 AM, Thomas Gleixner wrote:
>>
>> Ran perf stat -r 1000 cat /proc/interrupts > tmp.txt
>> and Observed minimal improvement with series.
> 
> Can you redirect it to /dev/null instead to take the file operations out
> of the picture?
> 

Yes. Did "perf stat -r 1000 cat /proc/interrupts > /dev/null".
It shows better improvement with the series compared to file write.


>> Base:
>>            1,313,263      cycles:HG                        #    4.063 GHz                         ( +-  0.17% )
>>            2,172,511      instructions:HG                  #    1.65  insn per cycle              ( +-  0.05% )
> 
>> v6 series:
>>            1,224,666      cycles:HG                        #    4.058 GHz                         ( +-  0.25% )
>>            1,667,435      instructions:HG                  #    1.36  insn per cycle              ( +-  0.08% )
> 
> Interesting. The number of instructions goes down by 20+%, but at the
> same time IPC drops too.
> 

base:

  Performance counter stats for 'cat /proc/interrupts' (1000 runs):

               0.32 msec task-clock:HG                    #    0.615 CPUs utilized               ( +-  0.17% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  136.347 K/sec                       ( +-  0.03% )
          1,310,621      cycles:HG                        #    4.061 GHz                         ( +-  0.17% )
          2,182,042      instructions:HG                  #    1.66  insn per cycle              ( +-  0.04% )
            372,189      branches:HG                      #    1.153 G/sec                       ( +-  0.04% )
              4,710      branch-misses:HG                 #    1.27% of all branches             ( +-  0.33% )

        0.000525061 +- 0.000000889 seconds time elapsed  ( +-  0.17% )

v6:
  Performance counter stats for 'cat /proc/interrupts' (1000 runs):

               0.28 msec task-clock:HG                    #    0.577 CPUs utilized               ( +-  0.25% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  155.906 K/sec                       ( +-  0.03% )
          1,144,964      cycles:HG                        #    4.057 GHz                         ( +-  0.24% )
          1,628,375      instructions:HG                  #    1.42  insn per cycle              ( +-  0.07% )
            271,934      branches:HG                      #  963.546 M/sec                       ( +-  0.07% )
              4,683      branch-misses:HG                 #    1.72% of all branches             ( +-  0.49% )
                  
         0.00048895 +- 0.00000114 seconds time elapsed  ( +-  0.23% )   << 7-8% improvement.


v6+ ppc_hack

  Performance counter stats for 'cat /proc/interrupts' (1000 runs):
          
               0.27 msec task-clock:HG                    #    0.582 CPUs utilized               ( +-  0.15% )
                  0      context-switches:HG              #    0.000 /sec
                  0      cpu-migrations:HG                #    0.000 /sec
                 44      page-faults:HG                   #  160.232 K/sec                       ( +-  0.03% )
          1,113,983      cycles:HG                        #    4.057 GHz                         ( +-  0.15% )
          1,501,868      instructions:HG                  #    1.35  insn per cycle              ( +-  0.07% )
            251,432      branches:HG                      #  915.627 M/sec                       ( +-  0.06% )
              4,528      branch-misses:HG                 #    1.80% of all branches             ( +-  0.18% )
  
        0.000472057 +- 0.000000668 seconds time elapsed  ( +-  0.14% )  << only slightly better.


>> Looking at powerpc arch_show_interrupts,
>> It could use the similar set of optimizations.
>> - move to array based
>> - use irq_proc_emit_counts
>> - some interrupts such as machine check, is hardly set. set skip_vector.
>>
>>
>> Copilot suggested below diff to quickly try irq_proc_emit_counts integration.
>> It showed little gains compared to v6. So it maybe worth fixing that in the
>> right way. (similar to x86 stuff you have done)
>>
>>    Performance counter stats for 'cat /proc/interrupts' (1000 runs):
>>
>>                 0.29 msec task-clock:HG                    #    0.586 CPUs utilized               ( +-  0.22% )
>>                    0      context-switches:HG              #    0.000 /sec
>>                    0      cpu-migrations:HG                #    0.000 /sec
>>                   44      page-faults:HG                   #  153.067 K/sec                       ( +-  0.03% )
>>            1,166,567      cycles:HG                        #    4.058 GHz                         ( +-  0.22% )
>>            1,475,365      instructions:HG                  #    1.26  insn per cycle              ( +-  0.09% )
>>              249,051      branches:HG                      #  866.397 M/sec                       ( +-  0.10% )
>>                5,104      branch-misses:HG                 #    2.05% of all branches             ( +-  0.33% )
>>
>>          0.000490211 +- 0.000000992 seconds time elapsed  ( +-  0.20% )   <<< 3-4% improvements.
> 
> Again IPC drops ....

Yes. IPC dropping is consistent. I see the same trend in (PATCH 1/16) in the series.
Copying that snippet below.

Before:
  8,932,242      instructions      #    1.66  insn per cycle  ( +-  0.34% )
After:
  7,020,982      instructions      #    1.30  insn per cycle  ( +-  0.52% )


So it might be common pattern across archs. Maybe perf stat subsystem is slow
enough it doesn't shows the aboslute benefit.

In addition, I ran "perf stat -a -r 1000 cat /proc/interrupts > /dev/null"
It is now 10x slower. IPC is same with series And improvement vanishes.
So heavier the infra testing it, gains are getting minimal i guess.
But i don't see any regression.

As you said in the cover-letter, the micro loops you ran maybe the best way to evaluate it.
If you have the code in shareable form, I can give it a try.

Other than that, code improvement looks good to me.
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Thomas Gleixner 3 days, 22 hours ago
>> Shrikanth!

On Thu, May 21 2026 at 10:04, Shrikanth Hegde wrote:
> On 5/20/26 8:57 PM, Thomas Gleixner wrote:
>> Can you redirect it to /dev/null instead to take the file operations out
>> of the picture?
>
> Yes. Did "perf stat -r 1000 cat /proc/interrupts > /dev/null".
> It shows better improvement with the series compared to file write.

Unsurprisingly :)

>>>          0.000490211 +- 0.000000992 seconds time elapsed  ( +-  0.20% )   <<< 3-4% improvements.
>> 
>> Again IPC drops ....
>
> Yes. IPC dropping is consistent. I see the same trend in (PATCH 1/16) in the series.
> Copying that snippet below.
>
> Before:
>   8,932,242      instructions      #    1.66  insn per cycle  ( +-  0.34% )
> After:
>   7,020,982      instructions      #    1.30  insn per cycle  ( +-  0.52% )
>
> So it might be common pattern across archs. Maybe perf stat subsystem is slow
> enough it doesn't shows the aboslute benefit.

The problem is that the overhead of starting and tearing down 'cat' is
accounted as well. That's constant, obviously.

But for the use cases like irqbalanced or similar things, there is no
startup/teardown cost involved. The process is up and running and they
care about the actual read performance.

It's clearly to observe by comparing the perf data with the read loop
timing data:

          Base line		v6
Perf	  3072.21 us		1564.40 us
Loop	  1310.36 us		 209.90 us

It doesn't add up completely, but the trend is there. And you can trick
perf to reveal the startup/teardown overhead it by comparing:

  perf stat -r 1000 head -q -c -0 /proc/interrupts >/dev/null
  perf stat -r 1000 head -q -c 0 /proc/interrupts >/dev/null

> In addition, I ran "perf stat -a -r 1000 cat /proc/interrupts > /dev/null"
> It is now 10x slower. IPC is same with series And improvement vanishes.
> So heavier the infra testing it, gains are getting minimal i guess.

As often :)

> But i don't see any regression.
>
> As you said in the cover-letter, the micro loops you ran maybe the best way to evaluate it.
> If you have the code in shareable form, I can give it a try.

See below. I thought I would come around some day to actually use perf
directly in the test program, but that never happened due to
-ENOTIME.

Thanks,

        tglx
---
#include <fcntl.h>
#include <math.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>

static char buf[1024*1024];

#define NSECS_PER_SEC	(1000L * 1000L * 1000L)

#define LOOPS	1000

static float td[LOOPS];

int main(int argc, char *argv[])
{
	int fd = open("/proc/interrupts", O_RDONLY);
	long tsum = 0, rs = 0;

	for (int i = 0; i < LOOPS; i++) {
		long r;

		do {
			r = read(fd, buf, sizeof(buf));
		} while (r);
		lseek(fd, 0, 0);
	}

	for (int i = 0; i < LOOPS; i++) {
		struct timespec t0, t1;
		unsigned long delta;
		long r;

		clock_gettime(CLOCK_MONOTONIC, &t0);
		do {
			r = read(fd, buf, sizeof(buf));
			rs += r;
		} while (r);
		clock_gettime(CLOCK_MONOTONIC, &t1);

		delta = t1.tv_nsec + t1.tv_sec * NSECS_PER_SEC;
		delta -= t0.tv_nsec + t0.tv_sec * NSECS_PER_SEC;
		tsum += delta;
		td[i] = delta * 1.0;

		lseek(fd, 0, 0);
	}

	float mean = tsum / LOOPS;
	float calc = 0;

	for (int i = 0; i < LOOPS; i++) {
		float tmp = td[i] - mean;

		calc += tmp * tmp;
	}

	calc /= LOOPS;

	float std = sqrt(calc * 1.0);

	printf("%lu %lu %5.3f\n", tsum / LOOPS, rs / LOOPS, (std / mean) * 100.0);
	return 0;
}
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Shrikanth Hegde 3 days, 15 hours ago

On 5/21/26 1:23 PM, Thomas Gleixner wrote:
>>> Shrikanth!
> 
> On Thu, May 21 2026 at 10:04, Shrikanth Hegde wrote:
>> On 5/20/26 8:57 PM, Thomas Gleixner wrote:
>>> Can you redirect it to /dev/null instead to take the file operations out
>>> of the picture?
>>
>> Yes. Did "perf stat -r 1000 cat /proc/interrupts > /dev/null".
>> It shows better improvement with the series compared to file write.
> 
> Unsurprisingly :)
> 
>>>>           0.000490211 +- 0.000000992 seconds time elapsed  ( +-  0.20% )   <<< 3-4% improvements.
>>>
>>> Again IPC drops ....
>>
>> Yes. IPC dropping is consistent. I see the same trend in (PATCH 1/16) in the series.
>> Copying that snippet below.
>>
>> Before:
>>    8,932,242      instructions      #    1.66  insn per cycle  ( +-  0.34% )
>> After:
>>    7,020,982      instructions      #    1.30  insn per cycle  ( +-  0.52% )
>>
>> So it might be common pattern across archs. Maybe perf stat subsystem is slow
>> enough it doesn't shows the aboslute benefit.
> 
> The problem is that the overhead of starting and tearing down 'cat' is
> accounted as well. That's constant, obviously.
> 
> But for the use cases like irqbalanced or similar things, there is no
> startup/teardown cost involved. The process is up and running and they
> care about the actual read performance.
> 

true.

> It's clearly to observe by comparing the perf data with the read loop
> timing data:
> 
>            Base line		v6
> Perf	  3072.21 us		1564.40 us
> Loop	  1310.36 us		 209.90 us
> 
> It doesn't add up completely, but the trend is there. And you can trick
> perf to reveal the startup/teardown overhead it by comparing:
> 
>    perf stat -r 1000 head -q -c -0 /proc/interrupts >/dev/null
>    perf stat -r 1000 head -q -c 0 /proc/interrupts >/dev/null
> 

Tried it, but doesn't affect much.

>> In addition, I ran "perf stat -a -r 1000 cat /proc/interrupts > /dev/null"
>> It is now 10x slower. IPC is same with series And improvement vanishes.
>> So heavier the infra testing it, gains are getting minimal i guess.
> 
> As often :)
> 
>> But i don't see any regression.
>>
>> As you said in the cover-letter, the micro loops you ran maybe the best way to evaluate it.
>> If you have the code in shareable form, I can give it a try.
> 
> See below. I thought I would come around some day to actually use perf
> directly in the test program, but that never happened due to
> -ENOTIME.
> 
> Thanks,
> 
>          tglx
> ---
> #include <fcntl.h>
> #include <math.h>
> #include <stdio.h>
> #include <time.h>
> #include <unistd.h>
> 
> static char buf[1024*1024];
> 
> #define NSECS_PER_SEC	(1000L * 1000L * 1000L)
> 
> #define LOOPS	1000
> 
> static float td[LOOPS];
> 
> int main(int argc, char *argv[])
> {
> 	int fd = open("/proc/interrupts", O_RDONLY);
> 	long tsum = 0, rs = 0;
> 
> 	for (int i = 0; i < LOOPS; i++) {
> 		long r;
> 
> 		do {
> 			r = read(fd, buf, sizeof(buf));
> 		} while (r);
> 		lseek(fd, 0, 0);
> 	}
> 
> 	for (int i = 0; i < LOOPS; i++) {
> 		struct timespec t0, t1;
> 		unsigned long delta;
> 		long r;
> 
> 		clock_gettime(CLOCK_MONOTONIC, &t0);
> 		do {
> 			r = read(fd, buf, sizeof(buf));
> 			rs += r;
> 		} while (r);
> 		clock_gettime(CLOCK_MONOTONIC, &t1);
> 
> 		delta = t1.tv_nsec + t1.tv_sec * NSECS_PER_SEC;
> 		delta -= t0.tv_nsec + t0.tv_sec * NSECS_PER_SEC;
> 		tsum += delta;
> 		td[i] = delta * 1.0;
> 
> 		lseek(fd, 0, 0);
> 	}
> 
> 	float mean = tsum / LOOPS;
> 	float calc = 0;
> 
> 	for (int i = 0; i < LOOPS; i++) {
> 		float tmp = td[i] - mean;
> 
> 		calc += tmp * tmp;
> 	}
> 
> 	calc /= LOOPS;
> 
> 	float std = sqrt(calc * 1.0);
> 
> 	printf("%lu %lu %5.3f\n", tsum / LOOPS, rs / LOOPS, (std / mean) * 100.0);
> 	return 0;
> }

This shows real benefits indeed.

base		  v6		v6+ppc_hack
101us		65us                  57us

So doing a proper powerpc fix indeed would make sense.
I think it is going to be similar. Let me go and read your
series again.


For the genirq bits of the series, consider the tag if applicable

Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Re: [patch V6 00/16] Improve /proc/interrupts further
Posted by Thomas Gleixner 3 days, 8 hours ago
On Thu, May 21 2026 at 20:18, Shrikanth Hegde wrote:
> On 5/21/26 1:23 PM, Thomas Gleixner wrote:
>> It doesn't add up completely, but the trend is there. And you can trick
>> perf to reveal the startup/teardown overhead it by comparing:
>> 
>>    perf stat -r 1000 head -q -c -0 /proc/interrupts >/dev/null
>>    perf stat -r 1000 head -q -c 0 /proc/interrupts >/dev/null
>> 
> Tried it, but doesn't affect much.

Weird. On a v6 patched kernel:

$ perf stat -r 1000 head -q -c -0 /proc/interrupts >/dev/null

        0.00130456 +- 0.00000306 seconds time elapsed  ( +-  0.23% )

$ perf stat -r 1000 head -q -c 0 /proc/interrupts >/dev/null

        0.00108667 +- 0.00000367 seconds time elapsed  ( +-  0.34% )

The -c -0 one reads the full output while the -c 0 one just opens the
file and closes it again, which means the actual read takes ~0.3ms while
the startup/teardown takes 1ms.

>> 	printf("%lu %lu %5.3f\n", tsum / LOOPS, rs / LOOPS, (std / mean) * 100.0);
>> 	return 0;
>> }
>
> This shows real benefits indeed.
>
> base		  v6		v6+ppc_hack
> 101us		65us                  57us
>
> So doing a proper powerpc fix indeed would make sense.
> I think it is going to be similar. Let me go and read your
> series again.

Have fun!

> For the genirq bits of the series, consider the tag if applicable
>
> Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>

Thank you!

      tglx
RE: [patch V6 00/16] Improve /proc/interrupts further
Posted by mhklkml@zohomail.com 1 week ago
From: Thomas Gleixner <tglx@kernel.org> Sent: Sunday, May 17, 2026 1:01 PM
> 
> This is a follow up to v5 which can be found here:
> 
>   https://lore.kernel.org/20260401195625.213446764@kernel.org
> 
> The v1 cover letter contains a full analysis, explanation and numbers:
> 
>   https://lore.kernel.org/20260303150539.513068586@kernel.org
> 
> TLDR:
> 
>   - The performance of reading of /proc/interrupts has been improved
>     piecewise over the years, but most of the low hanging fruit has been
>     left on the table.

As I did previously with v2 and v5 of the patch series, I tested in
Hyper-V guests on x86/x64 and arm64. Did basic smoke tests of taking
a CPU offline, and removing a PCI device along with its IRQs, then adding
them back again. Everything looks good. The improved alignment looks
100% good. The alignment nits I called out against v5 are fixed, and I
did not find anything new to be picky about.

I did not do anything with the new binary interface or the gdb Python
script.

Tested-by: Michael Kelley <mhklinux@outlook.com>

> 
> Changes vs. V5:
> 
>   - Rebased against v7.1-rc2
> 
>   - Addressed some formatting/alignment details - Radu, Michael
> 
>   - Fixed some 0-day fallout vs. various Kconfig combinations
> 
>   - Picked up tags where appropriate
> 
> Delta patch against v5 is below.
> 
> The series applies on top of v7.1-rc2 and is also available via git:
> 
>     git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v6
> 
> Thanks,
> 
> 	tglx
[patch V6 01/16] x86/irq: Optimize interrupts decimals printing
Posted by Thomas Gleixner 1 week ago
From: Dmitry Ilvokhin <d@ilvokhin.com>

Monitoring tools periodically scan /proc/interrupts to export metrics as a
timeseries for future analysis and investigation.

In large fleets, /proc/interrupts is polled (often every few seconds) on
every machine. The cumulative overhead adds up quickly across thousands
of nodes, so reducing the cost of generating these stats does have a
measurable operational impact. With the ongoing trend toward higher core
counts per machine, this cost becomes even more noticeable over time,
since interrupt counters are per-CPU. In Meta's fleet, we have observed
this overhead at scale.

Although a binary /proc interface would be a better long-term solution
due to lower formatting (kernel side) and parsing (userspace side)
overhead, the text interface will remain in use for some time, even if
better solutions will be available. Optimizing the /proc/interrupts
printing code is therefore still beneficial.

Function seq_printf() supports rich format string for decimals printing,
but it doesn't required for printing /proc/interrupts per CPU counters,
seq_put_decimal_ull_width() function can be used instead to print per
CPU counters, because very limited formatting is required for this case.
Similar optimization idea is already used in show_interrupts().

As a side effect this aligns the x86 decriptions with the generic
interrupts event descriptions.

Performance counter stats (truncated) for 'sh -c cat /proc/interrupts

Before:

      3.42 msec task-clock        #    0.802 CPUs utilized   ( +-  0.05% )
         1      context-switches  #  291.991 /sec            ( +-  0.74% )
         0      cpu-migrations    #    0.000 /sec
       343      page-faults       #  100.153 K/sec           ( +-  0.01% )
 8,932,242      instructions      #    1.66  insn per cycle  ( +-  0.34% )
 5,374,427      cycles            #    1.569 GHz             ( +-  0.04% )
 1,483,154      branches          #  433.068 M/sec           ( +-  0.22% )
    28,768      branch-misses     #    1.94% of all branches ( +-  0.31% )

0.00427182 +- 0.00000215 seconds time elapsed  ( +-  0.05% )

After:

      2.39 msec task-clock        #    0.796 CPUs utilized   ( +-  0.06% )
         1      context-switches  #  418.541 /sec            ( +-  0.70% )
         0      cpu-migrations    #    0.000 /sec
       343      page-faults       #  143.560 K/sec           ( +-  0.01% )
 7,020,982      instructions      #    1.30  insn per cycle  ( +-  0.52% )
 5,397,266      cycles            #    2.259 GHz             ( +-  0.06% )
 1,569,648      branches          #  656.962 M/sec           ( +-  0.08% )
    25,419      branch-misses     #    1.62% of all branches ( +-  0.72% )

0.00299996 +- 0.00000206 seconds time elapsed  ( +-  0.07% )

Relative speed up in time elapsed is around 29%.

[ tglx: Fixed it up so it applies to current mainline ]

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Radu Rendec <radu@rendec.net>
Link: https://patch.msgid.link/aQj5mGZ6_BBlAm3B@shell.ilvokhin.com

---
Changes v2:
- Expanded commit message: add more rationale for the proposed change.
- Renamed helper put_spaced_decimal() -> put_decimal() primarely to make
  checkpatch.pl --strict pass.

 arch/x86/kernel/irq.c |  112 ++++++++++++++++++++++++++------------------------
 1 file changed, 59 insertions(+), 53 deletions(-)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -62,6 +62,18 @@ void ack_bad_irq(unsigned int irq)
 	apic_eoi();
 }
 
+/*
+ * A helper routine for putting space and decimal number without overhead
+ * from rich format of printf().
+ */
+static void put_decimal(struct seq_file *p, unsigned long long num)
+{
+	const char *delimiter = " ";
+	unsigned int width = 10;
+
+	seq_put_decimal_ull_width(p, delimiter, num, width);
+}
+
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
 /*
  * /proc/interrupts printing for arch specific interrupts
@@ -70,103 +82,101 @@ int arch_show_interrupts(struct seq_file
 {
 	int j;
 
-	seq_printf(p, "%*s: ", prec, "NMI");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+		put_decimal(p, irq_stats(j)->__nmi_count);
 	seq_puts(p, "  Non-maskable interrupts\n");
 #ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "%*s: ", prec, "LOC");
+	seq_printf(p, "%*s:", prec, "LOC");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+		put_decimal(p, irq_stats(j)->apic_timer_irqs);
 	seq_puts(p, "  Local timer interrupts\n");
 
-	seq_printf(p, "%*s: ", prec, "SPU");
+	seq_printf(p, "%*s:", prec, "SPU");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+		put_decimal(p, irq_stats(j)->irq_spurious_count);
 	seq_puts(p, "  Spurious interrupts\n");
-	seq_printf(p, "%*s: ", prec, "PMI");
+	seq_printf(p, "%*s:", prec, "PMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+		put_decimal(p, irq_stats(j)->apic_perf_irqs);
 	seq_puts(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s: ", prec, "IWI");
+	seq_printf(p, "%*s:", prec, "IWI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+		put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
 	seq_puts(p, "  IRQ work interrupts\n");
-	seq_printf(p, "%*s: ", prec, "RTR");
+	seq_printf(p, "%*s:", prec, "RTR");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+		put_decimal(p, irq_stats(j)->icr_read_retry_count);
 	seq_puts(p, "  APIC ICR read retries\n");
 	if (x86_platform_ipi_callback) {
-		seq_printf(p, "%*s: ", prec, "PLT");
+		seq_printf(p, "%*s:", prec, "PLT");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
+			put_decimal(p, irq_stats(j)->x86_platform_ipis);
 		seq_puts(p, "  Platform interrupts\n");
 	}
 #endif
 #ifdef CONFIG_SMP
-	seq_printf(p, "%*s: ", prec, "RES");
+	seq_printf(p, "%*s:", prec, "RES");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+		put_decimal(p, irq_stats(j)->irq_resched_count);
 	seq_puts(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "%*s: ", prec, "CAL");
+	seq_printf(p, "%*s:", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+		put_decimal(p, irq_stats(j)->irq_call_count);
 	seq_puts(p, "  Function call interrupts\n");
-	seq_printf(p, "%*s: ", prec, "TLB");
+	seq_printf(p, "%*s:", prec, "TLB");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+		put_decimal(p, irq_stats(j)->irq_tlb_count);
 	seq_puts(p, "  TLB shootdowns\n");
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
-	seq_printf(p, "%*s: ", prec, "TRM");
+	seq_printf(p, "%*s:", prec, "TRM");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+		put_decimal(p, irq_stats(j)->irq_thermal_count);
 	seq_puts(p, "  Thermal event interrupts\n");
 #endif
 #ifdef CONFIG_X86_MCE_THRESHOLD
-	seq_printf(p, "%*s: ", prec, "THR");
+	seq_printf(p, "%*s:", prec, "THR");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+		put_decimal(p, irq_stats(j)->irq_threshold_count);
 	seq_puts(p, "  Threshold APIC interrupts\n");
 #endif
 #ifdef CONFIG_X86_MCE_AMD
-	seq_printf(p, "%*s: ", prec, "DFR");
+	seq_printf(p, "%*s:", prec, "DFR");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
+		put_decimal(p, irq_stats(j)->irq_deferred_error_count);
 	seq_puts(p, "  Deferred Error APIC interrupts\n");
 #endif
 #ifdef CONFIG_X86_MCE
-	seq_printf(p, "%*s: ", prec, "MCE");
+	seq_printf(p, "%*s:", prec, "MCE");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+		put_decimal(p, per_cpu(mce_exception_count, j));
 	seq_puts(p, "  Machine check exceptions\n");
-	seq_printf(p, "%*s: ", prec, "MCP");
+	seq_printf(p, "%*s:", prec, "MCP");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+		put_decimal(p, per_cpu(mce_poll_count, j));
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
 	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s: ", prec, "HYP");
+		seq_printf(p, "%*s:", prec, "HYP");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				   irq_stats(j)->irq_hv_callback_count);
+			put_decimal(p, irq_stats(j)->irq_hv_callback_count);
 		seq_puts(p, "  Hypervisor callback interrupts\n");
 	}
 #endif
 #if IS_ENABLED(CONFIG_HYPERV)
 	if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s: ", prec, "HRE");
+		seq_printf(p, "%*s:", prec, "HRE");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				   irq_stats(j)->irq_hv_reenlightenment_count);
+			put_decimal(p,
+				    irq_stats(j)->irq_hv_reenlightenment_count);
 		seq_puts(p, "  Hyper-V reenlightenment interrupts\n");
 	}
 	if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s: ", prec, "HVS");
+		seq_printf(p, "%*s:", prec, "HVS");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-				   irq_stats(j)->hyperv_stimer0_count);
+			put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
 		seq_puts(p, "  Hyper-V stimer0 interrupts\n");
 	}
 #endif
@@ -175,35 +185,31 @@ int arch_show_interrupts(struct seq_file
 	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
 #endif
 #if IS_ENABLED(CONFIG_KVM)
-	seq_printf(p, "%*s: ", prec, "PIN");
+	seq_printf(p, "%*s:", prec, "PIN");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
+		put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
 	seq_puts(p, "  Posted-interrupt notification event\n");
 
-	seq_printf(p, "%*s: ", prec, "NPI");
+	seq_printf(p, "%*s:", prec, "NPI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ",
-			   irq_stats(j)->kvm_posted_intr_nested_ipis);
+		put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
 	seq_puts(p, "  Nested posted-interrupt event\n");
 
-	seq_printf(p, "%*s: ", prec, "PIW");
+	seq_printf(p, "%*s:", prec, "PIW");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ",
-			   irq_stats(j)->kvm_posted_intr_wakeup_ipis);
+		put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
 	seq_puts(p, "  Posted-interrupt wakeup event\n");
 #endif
 #ifdef CONFIG_GUEST_PERF_EVENTS
-	seq_printf(p, "%*s: ", prec, "VPMI");
+	seq_printf(p, "%*s:", prec, "VPMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ",
-			   irq_stats(j)->perf_guest_mediated_pmis);
+		put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
 	seq_puts(p, " Perf Guest Mediated PMI\n");
 #endif
 #ifdef CONFIG_X86_POSTED_MSI
-	seq_printf(p, "%*s: ", prec, "PMN");
+	seq_printf(p, "%*s:", prec, "PMN");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ",
-			   irq_stats(j)->posted_msi_notification_count);
+		put_decimal(p, irq_stats(j)->posted_msi_notification_count);
 	seq_puts(p, "  Posted MSI notification event\n");
 #endif
 	return 0;
[patch V6 02/16] genirq/proc: Avoid formatting zero counts in /proc/interrupts
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

A large portion of interrupt count entries are zero. There is no point in
formatting the zero value as it is way cheeper to just emit a constant
string.

Collect the number of consecutive zero counts and emit them in one go
before a non-zero count and at the end of the line.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V2: Use sizeof() for ZSTR1_LEN - Dmitry
---
 include/linux/interrupt.h |    1 +
 kernel/irq/proc.c         |   43 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 39 insertions(+), 5 deletions(-)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -864,6 +864,7 @@ static inline void init_irq_proc(void)
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 int arch_show_interrupts(struct seq_file *p, int prec);
+void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts);
 
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -450,6 +450,43 @@ int __weak arch_show_interrupts(struct s
 # define ACTUAL_NR_IRQS irq_get_nr_irqs()
 #endif
 
+/* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
+#define ZSTR1 "          0"
+#define ZSTR1_LEN	(sizeof(ZSTR1) - 1)
+#define ZSTR16		ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
+			ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1
+#define ZSTR256		ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 \
+			ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16
+
+static inline void irq_proc_emit_zero_counts(struct seq_file *p, unsigned int zeros)
+{
+	if (!zeros)
+		return;
+
+	for (unsigned int n = min(zeros, 256); n; zeros -= n, n = min(zeros, 256))
+		seq_write(p, ZSTR256, n * ZSTR1_LEN);
+}
+
+static inline unsigned int irq_proc_emit_count(struct seq_file *p, unsigned int cnt,
+					       unsigned int zeros)
+{
+	if (!cnt)
+		return zeros + 1;
+
+	irq_proc_emit_zero_counts(p, zeros);
+	seq_put_decimal_ull_width(p, " ", cnt, 10);
+	return 0;
+}
+
+void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts)
+{
+	unsigned int cpu, zeros = 0;
+
+	for_each_online_cpu(cpu)
+		zeros = irq_proc_emit_count(p, per_cpu(*cnts, cpu), zeros);
+	irq_proc_emit_zero_counts(p, zeros);
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
 	const unsigned int nr_irqs = irq_get_nr_irqs();
@@ -485,11 +522,7 @@ int show_interrupts(struct seq_file *p,
 		return 0;
 
 	seq_printf(p, "%*d:", prec, i);
-	for_each_online_cpu(j) {
-		unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0;
-
-		seq_put_decimal_ull_width(p, " ", cnt, 10);
-	}
+	irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
 	seq_putc(p, ' ');
 
 	guard(raw_spinlock_irq)(&desc->lock);
Re: [patch V6 02/16] genirq/proc: Avoid formatting zero counts in /proc/interrupts
Posted by Shrikanth Hegde 5 days, 8 hours ago
Hi Thomas.

On 5/18/26 1:31 AM, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@kernel.org>
> 
> A large portion of interrupt count entries are zero. There is no point in
> formatting the zero value as it is way cheeper to just emit a constant
> string.

nit: s/cheeper/cheaper

> 
> Collect the number of consecutive zero counts and emit them in one go
> before a non-zero count and at the end of the line.
> 
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> Reviewed-by: Radu Rendec <radu@rendec.net>
> ---
> V2: Use sizeof() for ZSTR1_LEN - Dmitry
> ---
>   include/linux/interrupt.h |    1 +
>   kernel/irq/proc.c         |   43 ++++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 39 insertions(+), 5 deletions(-)
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -864,6 +864,7 @@ static inline void init_irq_proc(void)
>   struct seq_file;
>   int show_interrupts(struct seq_file *p, void *v);
>   int arch_show_interrupts(struct seq_file *p, int prec);
> +void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts);
>   
>   extern int early_irq_init(void);
>   extern int arch_probe_nr_irqs(void);
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -450,6 +450,43 @@ int __weak arch_show_interrupts(struct s
>   # define ACTUAL_NR_IRQS irq_get_nr_irqs()
>   #endif
>   
> +/* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */

nit: is this comment still valid to be here?

> +#define ZSTR1 "          0"
> +#define ZSTR1_LEN	(sizeof(ZSTR1) - 1)
> +#define ZSTR16		ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
> +			ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1
> +#define ZSTR256		ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 \
> +			ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16
> +
> +static inline void irq_proc_emit_zero_counts(struct seq_file *p, unsigned int zeros)
> +{
> +	if (!zeros)
> +		return;
> +
> +	for (unsigned int n = min(zeros, 256); n; zeros -= n, n = min(zeros, 256))
> +		seq_write(p, ZSTR256, n * ZSTR1_LEN);
> +}
> +
> +static inline unsigned int irq_proc_emit_count(struct seq_file *p, unsigned int cnt,
> +					       unsigned int zeros)
> +{
> +	if (!cnt)
> +		return zeros + 1;
> +
> +	irq_proc_emit_zero_counts(p, zeros);
> +	seq_put_decimal_ull_width(p, " ", cnt, 10);
> +	return 0;
> +}
> +
> +void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts)
> +{
> +	unsigned int cpu, zeros = 0;
> +
> +	for_each_online_cpu(cpu)
> +		zeros = irq_proc_emit_count(p, per_cpu(*cnts, cpu), zeros);
> +	irq_proc_emit_zero_counts(p, zeros);
> +}
> +
>   int show_interrupts(struct seq_file *p, void *v)
>   {
>   	const unsigned int nr_irqs = irq_get_nr_irqs();
> @@ -485,11 +522,7 @@ int show_interrupts(struct seq_file *p,
>   		return 0;
>   
>   	seq_printf(p, "%*d:", prec, i);
> -	for_each_online_cpu(j) {
> -		unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0;
> -
> -		seq_put_decimal_ull_width(p, " ", cnt, 10);
> -	}
> +	irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
>   	seq_putc(p, ' ');
>   
>   	guard(raw_spinlock_irq)(&desc->lock);
> 

Other than nits, feel free to include

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
[patch V6 03/16] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Interrupts which are not marked per CPU increment not only the per CPU
statistics, but also the accumulation counter irq_desc::tot_count.

Change the counter to type unsigned long so it does not produce sporadic
zeros due to wrap arounds on 64-bit machines and do a quick check for non
per CPU interrupts. If the counter is zero, then simply emit a full set of
zero strings. That spares the evaluation of the per CPU counters completely
for interrupts with zero events.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
 include/linux/irqdesc.h |    6 +++---
 kernel/irq/proc.c       |   11 ++++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -52,8 +52,8 @@ struct irq_redirect {
  * @depth:		disable-depth, for nested irq_disable() calls
  * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
  * @tot_count:		stats field for non-percpu irqs
- * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
+ * @irq_count:		stats field to detect stalled irqs
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @threads_handled:	stats field for deferred spurious detection of threaded handlers
  * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
@@ -87,9 +87,9 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
-	unsigned int		tot_count;
-	unsigned int		irq_count;	/* For detecting broken IRQs */
+	unsigned long		tot_count;
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned int		irqs_unhandled;
 	atomic_t		threads_handled;
 	int			threads_handled_last;
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -522,7 +522,16 @@ int show_interrupts(struct seq_file *p,
 		return 0;
 
 	seq_printf(p, "%*d:", prec, i);
-	irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
+
+	/*
+	 * Always output per CPU interrupts. Output device interrupts only when
+	 * desc::tot_count is not zero.
+	 */
+	if (irq_settings_is_per_cpu(desc) || irq_settings_is_per_cpu_devid(desc) ||
+	    data_race(desc->tot_count))
+		irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
+	else
+		irq_proc_emit_zero_counts(p, num_online_cpus());
 	seq_putc(p, ' ');
 
 	guard(raw_spinlock_irq)(&desc->lock);
[patch V6 04/16] x86/irq: Make irqstats array based
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Having the x86 specific interrupt statistics as a data structure with
individual members instead of an array is just stupid as it requires
endless copy and paste in arch_show_interrupts() and arch_irq_stat_cpu(),
where the latter does not even take the latest interrupt additions into
account. The resulting #ifdef orgy is just disgusting.

Convert it to an array of counters, which does not make a difference in the
actual interrupt hotpath increment as the array index is constant and
therefore not any different than the member based access.

But in arch_show_interrupts() and arch_irq_stat_cpu() this just turns into
a loop, which reduces the text size by ~2k (~12%):

   text	   data	    bss	    dec	    hex	filename
  19643	  15250	    904	  35797	   8bd5	../build/arch/x86/kernel/irq.o
  17355	  15250	    904	  33509	   82e5	../build/arch/x86/kernel/irq.o

Adding a new vector or software counter only requires to update the table
and everything just works. Using the core provided emit function which
speeds up 0 outputs makes it significantly faster.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V6: Fix the text alignment of VPMI - Michael
V5: Move the inc and enable function to the next patch
    Use COUNT and not VECTOR for the AMD deferred part
V4: Make irq_stats_init() a late initcall so that platform IPI and posted
    vector initialization has been done
    Supress AMD/HYGON specific vectors when CPU is from a different vendor

V3: Add the missing #ifdeffery - 0-day
    Add the alignment back - Radu
    Address dyslexia for real - Michael

V2: Simplified and extended vector skip mechanism
    Fixup the typoes - Micheal, Dmitry
    Added the lost precision back for ERR/MIS - Dmitry
---
 arch/x86/events/amd/core.c          |    2 
 arch/x86/events/amd/ibs.c           |    2 
 arch/x86/events/core.c              |    2 
 arch/x86/events/intel/core.c        |    2 
 arch/x86/events/intel/knc.c         |    2 
 arch/x86/events/intel/p4.c          |    2 
 arch/x86/events/zhaoxin/core.c      |    2 
 arch/x86/hyperv/hv_init.c           |    2 
 arch/x86/include/asm/hardirq.h      |   77 ++++++----
 arch/x86/include/asm/mce.h          |    3 
 arch/x86/kernel/apic/apic.c         |    4 
 arch/x86/kernel/apic/ipi.c          |    2 
 arch/x86/kernel/cpu/acrn.c          |    2 
 arch/x86/kernel/cpu/mce/amd.c       |    2 
 arch/x86/kernel/cpu/mce/core.c      |    8 -
 arch/x86/kernel/cpu/mce/threshold.c |    2 
 arch/x86/kernel/cpu/mshyperv.c      |    4 
 arch/x86/kernel/irq.c               |  259 +++++++++++++-----------------------
 arch/x86/kernel/irq_work.c          |    2 
 arch/x86/kernel/kvm.c               |    2 
 arch/x86/kernel/nmi.c               |    4 
 arch/x86/kernel/smp.c               |    6 
 arch/x86/mm/tlb.c                   |    2 
 arch/x86/xen/enlighten_hvm.c        |    2 
 arch/x86/xen/enlighten_pv.c         |    2 
 arch/x86/xen/smp.c                  |    6 
 arch/x86/xen/smp_pv.c               |    2 
 27 files changed, 174 insertions(+), 233 deletions(-)
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1032,7 +1032,7 @@ static int amd_pmu_v2_handle_irq(struct
 	 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
 	 * PMI entry is not set by the local APIC when a PMC overflow occurs
 	 */
-	inc_irq_stat(apic_perf_irqs);
+	inc_perf_irq_stat();
 
 done:
 	cpuc->enabled = pmu_enabled;
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1600,7 +1600,7 @@ perf_ibs_nmi_handler(unsigned int cmd, s
 	handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
 
 	if (handled)
-		inc_irq_stat(apic_perf_irqs);
+		inc_perf_irq_stat();
 
 	perf_sample_event_took(sched_clock() - stamp);
 
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1750,7 +1750,7 @@ int x86_pmu_handle_irq(struct pt_regs *r
 	}
 
 	if (handled)
-		inc_irq_stat(apic_perf_irqs);
+		inc_perf_irq_stat();
 
 	return handled;
 }
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3504,7 +3504,7 @@ static int handle_pmi_common(struct pt_r
 	int bit;
 	int handled = 0;
 
-	inc_irq_stat(apic_perf_irqs);
+	inc_perf_irq_stat();
 
 	/*
 	 * Ignore a range of extra bits in status that do not indicate
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -238,7 +238,7 @@ static int knc_pmu_handle_irq(struct pt_
 		goto done;
 	}
 
-	inc_irq_stat(apic_perf_irqs);
+	inc_perf_irq_stat();
 
 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1077,7 +1077,7 @@ static int p4_pmu_handle_irq(struct pt_r
 	}
 
 	if (handled)
-		inc_irq_stat(apic_perf_irqs);
+		inc_perf_irq_stat();
 
 	/*
 	 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -373,7 +373,7 @@ static int zhaoxin_pmu_handle_irq(struct
 	else
 		zhaoxin_pmu_ack_status(status);
 
-	inc_irq_stat(apic_perf_irqs);
+	inc_perf_irq_stat();
 
 	/*
 	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -219,7 +219,7 @@ static inline bool hv_reenlightenment_av
 DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment)
 {
 	apic_eoi();
-	inc_irq_stat(irq_hv_reenlightenment_count);
+	inc_irq_stat(HYPERV_REENLIGHTENMENT);
 	schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
 }
 
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -4,51 +4,60 @@
 
 #include <linux/threads.h>
 
-typedef struct {
-#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
-	u8	     kvm_cpu_l1tf_flush_l1d;
-#endif
-	unsigned int __nmi_count;	/* arch dependent */
+enum irq_stat_counts {
+	IRQ_COUNT_NMI,
 #ifdef CONFIG_X86_LOCAL_APIC
-	unsigned int apic_timer_irqs;	/* arch dependent */
-	unsigned int irq_spurious_count;
-	unsigned int icr_read_retry_count;
-#endif
-#if IS_ENABLED(CONFIG_KVM)
-	unsigned int kvm_posted_intr_ipis;
-	unsigned int kvm_posted_intr_wakeup_ipis;
-	unsigned int kvm_posted_intr_nested_ipis;
+	IRQ_COUNT_APIC_TIMER,
+	IRQ_COUNT_SPURIOUS,
+	IRQ_COUNT_APIC_PERF,
+	IRQ_COUNT_IRQ_WORK,
+	IRQ_COUNT_ICR_READ_RETRY,
+	IRQ_COUNT_X86_PLATFORM_IPI,
 #endif
-#ifdef CONFIG_GUEST_PERF_EVENTS
-	unsigned int perf_guest_mediated_pmis;
-#endif
-	unsigned int x86_platform_ipis;	/* arch dependent */
-	unsigned int apic_perf_irqs;
-	unsigned int apic_irq_work_irqs;
 #ifdef CONFIG_SMP
-	unsigned int irq_resched_count;
-	unsigned int irq_call_count;
+	IRQ_COUNT_RESCHEDULE,
+	IRQ_COUNT_CALL_FUNCTION,
 #endif
-	unsigned int irq_tlb_count;
+	IRQ_COUNT_TLB,
 #ifdef CONFIG_X86_THERMAL_VECTOR
-	unsigned int irq_thermal_count;
+	IRQ_COUNT_THERMAL_APIC,
 #endif
 #ifdef CONFIG_X86_MCE_THRESHOLD
-	unsigned int irq_threshold_count;
+	IRQ_COUNT_THRESHOLD_APIC,
 #endif
 #ifdef CONFIG_X86_MCE_AMD
-	unsigned int irq_deferred_error_count;
+	IRQ_COUNT_DEFERRED_ERROR,
+#endif
+#ifdef CONFIG_X86_MCE
+	IRQ_COUNT_MCE_EXCEPTION,
+	IRQ_COUNT_MCE_POLL,
 #endif
 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
-	unsigned int irq_hv_callback_count;
+	IRQ_COUNT_HYPERVISOR_CALLBACK,
 #endif
 #if IS_ENABLED(CONFIG_HYPERV)
-	unsigned int irq_hv_reenlightenment_count;
-	unsigned int hyperv_stimer0_count;
+	IRQ_COUNT_HYPERV_REENLIGHTENMENT,
+	IRQ_COUNT_HYPERV_STIMER0,
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+	IRQ_COUNT_POSTED_INTR,
+	IRQ_COUNT_POSTED_INTR_NESTED,
+	IRQ_COUNT_POSTED_INTR_WAKEUP,
+#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+	IRQ_COUNT_PERF_GUEST_MEDIATED_PMI,
 #endif
 #ifdef CONFIG_X86_POSTED_MSI
-	unsigned int posted_msi_notification_count;
+	IRQ_COUNT_POSTED_MSI_NOTIFICATION,
 #endif
+	IRQ_COUNT_MAX,
+};
+
+typedef struct {
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
+	u8	     kvm_cpu_l1tf_flush_l1d;
+#endif
+	unsigned int counts[IRQ_COUNT_MAX];
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
@@ -58,15 +67,23 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc,
 #endif
 #define __ARCH_IRQ_STAT
 
-#define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
+#define inc_irq_stat(index)	this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#define inc_perf_irq_stat()	inc_irq_stat(APIC_PERF)
+#else
+#define inc_perf_irq_stat()	do { } while (0)
+#endif
 
 extern void ack_bad_irq(unsigned int irq);
 
+#ifdef CONFIG_PROC_FS
 extern u64 arch_irq_stat_cpu(unsigned int cpu);
 #define arch_irq_stat_cpu	arch_irq_stat_cpu
 
 extern u64 arch_irq_stat(void);
 #define arch_irq_stat		arch_irq_stat
+#endif
 
 DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
 #define local_softirq_pending_ref       __softirq_pending
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -291,9 +291,6 @@ bool mce_is_memory_error(struct mce *m);
 bool mce_is_correctable(struct mce *m);
 bool mce_usable_address(struct mce *m);
 
-DECLARE_PER_CPU(unsigned, mce_exception_count);
-DECLARE_PER_CPU(unsigned, mce_poll_count);
-
 typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
 
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1045,7 +1045,7 @@ static void local_apic_timer_interrupt(v
 	/*
 	 * the NMI deadlock-detector uses this.
 	 */
-	inc_irq_stat(apic_timer_irqs);
+	inc_irq_stat(APIC_TIMER);
 
 	evt->event_handler(evt);
 }
@@ -2114,7 +2114,7 @@ static noinline void handle_spurious_int
 
 	trace_spurious_apic_entry(vector);
 
-	inc_irq_stat(irq_spurious_count);
+	inc_irq_stat(SPURIOUS);
 
 	/*
 	 * If this is a spurious interrupt then do not acknowledge
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void)
 	for (cnt = 0; cnt < 1000; cnt++) {
 		if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
 			return 0;
-		inc_irq_stat(icr_read_retry_count);
+		inc_irq_stat(ICR_READ_RETRY);
 		udelay(100);
 	}
 	return APIC_ICR_BUSY;
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -52,7 +52,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_ca
 	 * HYPERVISOR_CALLBACK_VECTOR.
 	 */
 	apic_eoi();
-	inc_irq_stat(irq_hv_callback_count);
+	inc_irq_stat(HYPERVISOR_CALLBACK);
 
 	if (acrn_intr_handler)
 		acrn_intr_handler();
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -850,7 +850,7 @@ bool amd_mce_usable_address(struct mce *
 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
 {
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
-	inc_irq_stat(irq_deferred_error_count);
+	inc_irq_stat(DEFERRED_ERROR);
 	deferred_error_int_vector();
 	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
 	apic_eoi();
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,8 +67,6 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
 
 #define SPINUNIT		100	/* 100ns */
 
-DEFINE_PER_CPU(unsigned, mce_exception_count);
-
 DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
 
 DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
@@ -716,8 +714,6 @@ static noinstr void mce_read_aux(struct
 	}
 }
 
-DEFINE_PER_CPU(unsigned, mce_poll_count);
-
 /*
  * We have three scenarios for checking for Deferred errors:
  *
@@ -820,7 +816,7 @@ void machine_check_poll(enum mcp_flags f
 	struct mce *m;
 	int i;
 
-	this_cpu_inc(mce_poll_count);
+	inc_irq_stat(MCE_POLL);
 
 	mce_gather_info(&err, NULL);
 	m = &err.m;
@@ -1595,7 +1591,7 @@ noinstr void do_machine_check(struct pt_
 	 */
 	lmce = 1;
 
-	this_cpu_inc(mce_exception_count);
+	inc_irq_stat(MCE_EXCEPTION);
 
 	mce_gather_info(&err, regs);
 	m = &err.m;
--- a/arch/x86/kernel/cpu/mce/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -37,7 +37,7 @@ void (*mce_threshold_vector)(void) = def
 DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
 {
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
-	inc_irq_stat(irq_threshold_count);
+	inc_irq_stat(THRESHOLD_APIC);
 	mce_threshold_vector();
 	trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
 	apic_eoi();
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -154,7 +154,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_cal
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
-	inc_irq_stat(irq_hv_callback_count);
+	inc_irq_stat(HYPERVISOR_CALLBACK);
 	if (mshv_handler)
 		mshv_handler();
 
@@ -193,7 +193,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_sti
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
-	inc_irq_stat(hyperv_stimer0_count);
+	inc_irq_stat(HYPERV_STIMER0);
 	if (hv_stimer0_handler)
 		hv_stimer0_handler();
 	add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -62,196 +62,126 @@ void ack_bad_irq(unsigned int irq)
 	apic_eoi();
 }
 
-/*
- * A helper routine for putting space and decimal number without overhead
- * from rich format of printf().
- */
-static void put_decimal(struct seq_file *p, unsigned long long num)
-{
-	const char *delimiter = " ";
-	unsigned int width = 10;
+struct irq_stat_info {
+	unsigned int	skip_vector;
+	const char	*symbol;
+	const char	*text;
+};
 
-	seq_put_decimal_ull_width(p, delimiter, num, width);
-}
+#define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
 
-#define irq_stats(x)		(&per_cpu(irq_stat, x))
-/*
- * /proc/interrupts printing for arch specific interrupts
- */
-int arch_show_interrupts(struct seq_file *p, int prec)
-{
-	int j;
+#define ITS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
+	{ .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
 
-	seq_printf(p, "%*s:", prec, "NMI");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->__nmi_count);
-	seq_puts(p, "  Non-maskable interrupts\n");
+static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+	ISS(NMI,			"NMI",	"  Non-maskable interrupts\n"),
 #ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(p, "%*s:", prec, "LOC");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->apic_timer_irqs);
-	seq_puts(p, "  Local timer interrupts\n");
-
-	seq_printf(p, "%*s:", prec, "SPU");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_spurious_count);
-	seq_puts(p, "  Spurious interrupts\n");
-	seq_printf(p, "%*s:", prec, "PMI");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->apic_perf_irqs);
-	seq_puts(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s:", prec, "IWI");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
-	seq_puts(p, "  IRQ work interrupts\n");
-	seq_printf(p, "%*s:", prec, "RTR");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->icr_read_retry_count);
-	seq_puts(p, "  APIC ICR read retries\n");
-	if (x86_platform_ipi_callback) {
-		seq_printf(p, "%*s:", prec, "PLT");
-		for_each_online_cpu(j)
-			put_decimal(p, irq_stats(j)->x86_platform_ipis);
-		seq_puts(p, "  Platform interrupts\n");
-	}
+	ISS(APIC_TIMER,			"LOC",	"  Local timer interrupts\n"),
+	ISS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
+	ISS(APIC_PERF,			"PMI",	"  Performance monitoring interrupts\n"),
+	ISS(IRQ_WORK,			"IWI",	"  IRQ work interrupts\n"),
+	ISS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
+	ISS(X86_PLATFORM_IPI,		"PLT",	"  Platform interrupts\n"),
 #endif
 #ifdef CONFIG_SMP
-	seq_printf(p, "%*s:", prec, "RES");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_resched_count);
-	seq_puts(p, "  Rescheduling interrupts\n");
-	seq_printf(p, "%*s:", prec, "CAL");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_call_count);
-	seq_puts(p, "  Function call interrupts\n");
-	seq_printf(p, "%*s:", prec, "TLB");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_tlb_count);
-	seq_puts(p, "  TLB shootdowns\n");
+	ISS(RESCHEDULE,			"RES",	"  Rescheduling interrupts\n"),
+	ISS(CALL_FUNCTION,		"CAL",	"  Function call interrupts\n"),
 #endif
+	ISS(TLB,			"TLB",	"  TLB shootdowns\n"),
 #ifdef CONFIG_X86_THERMAL_VECTOR
-	seq_printf(p, "%*s:", prec, "TRM");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_thermal_count);
-	seq_puts(p, "  Thermal event interrupts\n");
+	ISS(THERMAL_APIC,		"TRM",	"  Thermal event interrupt\n"),
 #endif
 #ifdef CONFIG_X86_MCE_THRESHOLD
-	seq_printf(p, "%*s:", prec, "THR");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_threshold_count);
-	seq_puts(p, "  Threshold APIC interrupts\n");
+	ISS(THRESHOLD_APIC,		"THR",	"  Threshold APIC interrupts\n"),
 #endif
 #ifdef CONFIG_X86_MCE_AMD
-	seq_printf(p, "%*s:", prec, "DFR");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->irq_deferred_error_count);
-	seq_puts(p, "  Deferred Error APIC interrupts\n");
+	ISS(DEFERRED_ERROR,		"DFR",	"  Deferred Error APIC interrupts\n"),
 #endif
 #ifdef CONFIG_X86_MCE
-	seq_printf(p, "%*s:", prec, "MCE");
-	for_each_online_cpu(j)
-		put_decimal(p, per_cpu(mce_exception_count, j));
-	seq_puts(p, "  Machine check exceptions\n");
-	seq_printf(p, "%*s:", prec, "MCP");
-	for_each_online_cpu(j)
-		put_decimal(p, per_cpu(mce_poll_count, j));
-	seq_puts(p, "  Machine check polls\n");
+	ISS(MCE_EXCEPTION,		"MCE",	"  Machine check exceptions\n"),
+	ISS(MCE_POLL,			"MCP",	"  Machine check polls\n"),
 #endif
 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
-	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s:", prec, "HYP");
-		for_each_online_cpu(j)
-			put_decimal(p, irq_stats(j)->irq_hv_callback_count);
-		seq_puts(p, "  Hypervisor callback interrupts\n");
-	}
+	ITS(HYPERVISOR_CALLBACK,	"HYP",	"  Hypervisor callback interrupts\n"),
 #endif
 #if IS_ENABLED(CONFIG_HYPERV)
-	if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s:", prec, "HRE");
-		for_each_online_cpu(j)
-			put_decimal(p,
-				    irq_stats(j)->irq_hv_reenlightenment_count);
-		seq_puts(p, "  Hyper-V reenlightenment interrupts\n");
-	}
-	if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
-		seq_printf(p, "%*s:", prec, "HVS");
-		for_each_online_cpu(j)
-			put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
-		seq_puts(p, "  Hyper-V stimer0 interrupts\n");
-	}
-#endif
-	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
+	ITS(HYPERV_REENLIGHTENMENT,	"HRE",	"  Hyper-V reenlightenment interrupts\n"),
+	ITS(HYPERV_STIMER0,		"HVS",	"  Hyper-V stimer0 interrupts\n"),
 #endif
 #if IS_ENABLED(CONFIG_KVM)
-	seq_printf(p, "%*s:", prec, "PIN");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
-	seq_puts(p, "  Posted-interrupt notification event\n");
-
-	seq_printf(p, "%*s:", prec, "NPI");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
-	seq_puts(p, "  Nested posted-interrupt event\n");
-
-	seq_printf(p, "%*s:", prec, "PIW");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
-	seq_puts(p, "  Posted-interrupt wakeup event\n");
+	ITS(POSTED_INTR,		"PIN",	"  Posted-interrupt notification event\n"),
+	ITS(POSTED_INTR_NESTED,		"NPI",	"  Nested posted-interrupt event\n"),
+	ITS(POSTED_INTR_WAKEUP,		"PIW",	"  Posted-interrupt wakeup event\n"),
 #endif
 #ifdef CONFIG_GUEST_PERF_EVENTS
-	seq_printf(p, "%*s:", prec, "VPMI");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
-	seq_puts(p, " Perf Guest Mediated PMI\n");
+	ISS(PERF_GUEST_MEDIATED_PMI,	"VPMI",	"  Perf Guest Mediated PMI\n"),
 #endif
 #ifdef CONFIG_X86_POSTED_MSI
-	seq_printf(p, "%*s:", prec, "PMN");
-	for_each_online_cpu(j)
-		put_decimal(p, irq_stats(j)->posted_msi_notification_count);
-	seq_puts(p, "  Posted MSI notification event\n");
+	ISS(POSTED_MSI_NOTIFICATION,	"PMN",	"  Posted MSI notification event\n"),
+#endif
+};
+
+static int __init irq_init_stats(void)
+{
+	struct irq_stat_info *info = irq_stat_info;
+
+	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
+		if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
+			info->skip_vector = 0;
+	}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!x86_platform_ipi_callback)
+		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+#endif
+
+#ifdef CONFIG_X86_POSTED_MSI
+	if (!posted_msi_enabled())
+		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		irq_stat_info[IRQ_COUNT_DEFERRED_ERROR].skip_vector = 1;
 #endif
 	return 0;
 }
+late_initcall(irq_init_stats);
+
+#ifdef CONFIG_PROC_FS
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	const struct irq_stat_info *info = irq_stat_info;
+
+	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
+		if (info->skip_vector)
+			continue;
+
+		seq_printf(p, "%*s:", prec, info->symbol);
+		irq_proc_emit_counts(p, &irq_stat.counts[i]);
+		seq_puts(p, info->text);
+	}
+
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+	if (IS_ENABLED(CONFIG_X86_IO_APIC))
+		seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
+	return 0;
+}
 
 /*
  * /proc/stat helpers
  */
 u64 arch_irq_stat_cpu(unsigned int cpu)
 {
-	u64 sum = irq_stats(cpu)->__nmi_count;
+	irq_cpustat_t *p = per_cpu_ptr(&irq_stat, cpu);
+	u64 sum = 0;
 
-#ifdef CONFIG_X86_LOCAL_APIC
-	sum += irq_stats(cpu)->apic_timer_irqs;
-	sum += irq_stats(cpu)->irq_spurious_count;
-	sum += irq_stats(cpu)->apic_perf_irqs;
-	sum += irq_stats(cpu)->apic_irq_work_irqs;
-	sum += irq_stats(cpu)->icr_read_retry_count;
-	if (x86_platform_ipi_callback)
-		sum += irq_stats(cpu)->x86_platform_ipis;
-#endif
-#ifdef CONFIG_SMP
-	sum += irq_stats(cpu)->irq_resched_count;
-	sum += irq_stats(cpu)->irq_call_count;
-#endif
-#ifdef CONFIG_X86_THERMAL_VECTOR
-	sum += irq_stats(cpu)->irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
-	sum += irq_stats(cpu)->irq_threshold_count;
-#endif
-#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
-	sum += irq_stats(cpu)->irq_hv_callback_count;
-#endif
-#if IS_ENABLED(CONFIG_HYPERV)
-	sum += irq_stats(cpu)->irq_hv_reenlightenment_count;
-	sum += irq_stats(cpu)->hyperv_stimer0_count;
-#endif
-#ifdef CONFIG_X86_MCE
-	sum += per_cpu(mce_exception_count, cpu);
-	sum += per_cpu(mce_poll_count, cpu);
-#endif
+	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++)
+		sum += p->counts[i];
 	return sum;
 }
 
@@ -260,6 +190,7 @@ u64 arch_irq_stat(void)
 	u64 sum = atomic_read(&irq_err_count);
 	return sum;
 }
+#endif /* CONFIG_PROC_FS */
 
 static __always_inline void handle_irq(struct irq_desc *desc,
 				       struct pt_regs *regs)
@@ -344,7 +275,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 /* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) __ro_after_init = NULL;
 /*
  * Handler for X86_PLATFORM_IPI_VECTOR.
  */
@@ -354,7 +285,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platfo
 
 	apic_eoi();
 	trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
-	inc_irq_stat(x86_platform_ipis);
+	inc_irq_stat(X86_PLATFORM_IPI);
 	if (x86_platform_ipi_callback)
 		x86_platform_ipi_callback();
 	trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
@@ -369,7 +300,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platfo
 DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler)
 {
 	 apic_eoi();
-	 inc_irq_stat(perf_guest_mediated_pmis);
+	 inc_irq_stat(PERF_GUEST_MEDIATED_PMI);
 	 perf_guest_handle_mediated_pmi();
 }
 #endif
@@ -395,7 +326,7 @@ EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_int
 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi)
 {
 	apic_eoi();
-	inc_irq_stat(kvm_posted_intr_ipis);
+	inc_irq_stat(POSTED_INTR);
 }
 
 /*
@@ -404,7 +335,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm
 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
 {
 	apic_eoi();
-	inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+	inc_irq_stat(POSTED_INTR_WAKEUP);
 	kvm_posted_intr_wakeup_handler();
 }
 
@@ -414,7 +345,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted
 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
 {
 	apic_eoi();
-	inc_irq_stat(kvm_posted_intr_nested_ipis);
+	inc_irq_stat(POSTED_INTR_NESTED);
 }
 #endif
 
@@ -488,7 +419,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
 
 	/* Mark the handler active for intel_ack_posted_msi_irq() */
 	__this_cpu_write(posted_msi_handler_active, true);
-	inc_irq_stat(posted_msi_notification_count);
+	inc_irq_stat(POSTED_MSI_NOTIFICATION);
 	irq_enter();
 
 	/*
@@ -583,7 +514,7 @@ static void smp_thermal_vector(void)
 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
 {
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
-	inc_irq_stat(irq_thermal_count);
+	inc_irq_stat(THERMAL_APIC);
 	smp_thermal_vector();
 	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
 	apic_eoi();
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -18,7 +18,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work)
 {
 	apic_eoi();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
-	inc_irq_stat(apic_irq_work_irqs);
+	inc_irq_stat(IRQ_WORK);
 	irq_work_run();
 	trace_irq_work_exit(IRQ_WORK_VECTOR);
 }
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -304,7 +304,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncp
 
 	apic_eoi();
 
-	inc_irq_stat(irq_hv_callback_count);
+	inc_irq_stat(HYPERVISOR_CALLBACK);
 
 	if (__this_cpu_read(async_pf_enabled)) {
 		token = __this_cpu_read(apf_reason.token);
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -576,7 +576,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 
 	irq_state = irqentry_nmi_enter(regs);
 
-	inc_irq_stat(__nmi_count);
+	inc_irq_stat(NMI);
 
 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
 		WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
@@ -725,7 +725,7 @@ DEFINE_FREDENTRY_NMI(exc_nmi)
 
 	irq_state = irqentry_nmi_enter(regs);
 
-	inc_irq_stat(__nmi_count);
+	inc_irq_stat(NMI);
 	default_do_nmi(regs);
 
 	irqentry_nmi_exit(regs, irq_state);
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -250,7 +250,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_res
 {
 	apic_eoi();
 	trace_reschedule_entry(RESCHEDULE_VECTOR);
-	inc_irq_stat(irq_resched_count);
+	inc_irq_stat(RESCHEDULE);
 	scheduler_ipi();
 	trace_reschedule_exit(RESCHEDULE_VECTOR);
 }
@@ -259,7 +259,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_funct
 {
 	apic_eoi();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
-	inc_irq_stat(irq_call_count);
+	inc_irq_stat(CALL_FUNCTION);
 	generic_smp_call_function_interrupt();
 	trace_call_function_exit(CALL_FUNCTION_VECTOR);
 }
@@ -268,7 +268,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_funct
 {
 	apic_eoi();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
-	inc_irq_stat(irq_call_count);
+	inc_irq_stat(CALL_FUNCTION);
 	generic_smp_call_function_single_interrupt();
 	trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
 }
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1123,7 +1123,7 @@ static void flush_tlb_func(void *info)
 	VM_WARN_ON(!irqs_disabled());
 
 	if (!local) {
-		inc_irq_stat(irq_tlb_count);
+		inc_irq_stat(TLB);
 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	}
 
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -125,7 +125,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_ca
 	if (xen_percpu_upcall)
 		apic_eoi();
 
-	inc_irq_stat(irq_hv_callback_count);
+	inc_irq_stat(HYPERVISOR_CALLBACK);
 
 	xen_evtchn_do_upcall();
 
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -728,7 +728,7 @@ static void __xen_pv_evtchn_do_upcall(st
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
-	inc_irq_stat(irq_hv_callback_count);
+	inc_irq_stat(HYPERVISOR_CALLBACK);
 
 	xen_evtchn_do_upcall();
 
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -23,7 +23,7 @@ static irqreturn_t xen_call_function_sin
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
-	inc_irq_stat(irq_resched_count);
+	inc_irq_stat(RESCHEDULE);
 	scheduler_ipi();
 
 	return IRQ_HANDLED;
@@ -254,7 +254,7 @@ void xen_send_IPI_allbutself(int vector)
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 	generic_smp_call_function_interrupt();
-	inc_irq_stat(irq_call_count);
+	inc_irq_stat(CALL_FUNCTION);
 
 	return IRQ_HANDLED;
 }
@@ -262,7 +262,7 @@ static irqreturn_t xen_call_function_int
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
 	generic_smp_call_function_single_interrupt();
-	inc_irq_stat(irq_call_count);
+	inc_irq_stat(CALL_FUNCTION);
 
 	return IRQ_HANDLED;
 }
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -400,7 +400,7 @@ static void xen_pv_stop_other_cpus(int w
 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
 {
 	irq_work_run();
-	inc_irq_stat(apic_irq_work_irqs);
+	inc_irq_stat(IRQ_WORK);
 
 	return IRQ_HANDLED;
 }
[patch V6 05/16] x86/irq: Suppress unlikely interrupt stats by default
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Unlikely interrupt counters like the spurious vector and the synthetic APIC
ICR read retry show up in /proc/interrupts with all counts 0 most of the
time.

As these are events which should never happen, suppress them by default and
enable them for output when they actually happen.

This requires a seperate bitmap as the description array is marked
__ro_after_init. With that bitmap in place it becomes RO data.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V5: Move irq_stat_inc_and_enable() here
V4: Fix the bad idea of writing to __ro_after_init marked data
V3: New patch
---
 arch/x86/include/asm/hardirq.h |    1 +
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/apic/ipi.c     |    2 +-
 arch/x86/kernel/irq.c          |   38 ++++++++++++++++++++++++++++----------
 4 files changed, 31 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -68,6 +68,7 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc,
 #define __ARCH_IRQ_STAT
 
 #define inc_irq_stat(index)	this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
+void irq_stat_inc_and_enable(enum irq_stat_counts which);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #define inc_perf_irq_stat()	inc_irq_stat(APIC_PERF)
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2114,7 +2114,7 @@ static noinline void handle_spurious_int
 
 	trace_spurious_apic_entry(vector);
 
-	inc_irq_stat(SPURIOUS);
+	irq_stat_inc_and_enable(IRQ_COUNT_SPURIOUS);
 
 	/*
 	 * If this is a spurious interrupt then do not acknowledge
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void)
 	for (cnt = 0; cnt < 1000; cnt++) {
 		if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
 			return 0;
-		inc_irq_stat(ICR_READ_RETRY);
+		irq_stat_inc_and_enable(IRQ_COUNT_ICR_READ_RETRY);
 		udelay(100);
 	}
 	return APIC_ICR_BUSY;
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -68,19 +68,24 @@ struct irq_stat_info {
 	const char	*text;
 };
 
+#define DEFAULT_SUPPRESSED_VECTOR	UINT_MAX
+
 #define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
 
 #define ITS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
 	{ .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
 
-static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+#define IDS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
+	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
+
+static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
 	ISS(NMI,			"NMI",	"  Non-maskable interrupts\n"),
 #ifdef CONFIG_X86_LOCAL_APIC
 	ISS(APIC_TIMER,			"LOC",	"  Local timer interrupts\n"),
-	ISS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
+	IDS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
 	ISS(APIC_PERF,			"PMI",	"  Performance monitoring interrupts\n"),
 	ISS(IRQ_WORK,			"IWI",	"  IRQ work interrupts\n"),
-	ISS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
+	IDS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
 	ISS(X86_PLATFORM_IPI,		"PLT",	"  Platform interrupts\n"),
 #endif
 #ifdef CONFIG_SMP
@@ -121,34 +126,47 @@ static struct irq_stat_info irq_stat_inf
 #endif
 };
 
+static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
+
 static int __init irq_init_stats(void)
 {
-	struct irq_stat_info *info = irq_stat_info;
+	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
-			info->skip_vector = 0;
+		if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
+					   test_bit(info->skip_vector, system_vectors)))
+			set_bit(i, irq_stat_count_show);
 	}
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (!x86_platform_ipi_callback)
-		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+		clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
 #endif
 
 #ifdef CONFIG_X86_POSTED_MSI
 	if (!posted_msi_enabled())
-		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
+		clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
 #endif
 
 #ifdef CONFIG_X86_MCE_AMD
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
-		irq_stat_info[IRQ_COUNT_DEFERRED_ERROR].skip_vector = 1;
+		clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
 #endif
 	return 0;
 }
 late_initcall(irq_init_stats);
 
+/*
+ * Used for default enabled counters to increment the stats and to enable the
+ * entry for /proc/interrupts output.
+ */
+void irq_stat_inc_and_enable(enum irq_stat_counts which)
+{
+	this_cpu_inc(irq_stat.counts[which]);
+	set_bit(which, irq_stat_count_show);
+}
+
 #ifdef CONFIG_PROC_FS
 /*
  * /proc/interrupts printing for arch specific interrupts
@@ -158,7 +176,7 @@ int arch_show_interrupts(struct seq_file
 	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (info->skip_vector)
+		if (!test_bit(i, irq_stat_count_show))
 			continue;
 
 		seq_printf(p, "%*s:", prec, info->symbol);
Re: [patch V6 05/16] x86/irq: Suppress unlikely interrupt stats by default
Posted by Shrikanth Hegde 1 day, 12 hours ago

On 5/18/26 1:31 AM, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@kernel.org>
> 
> Unlikely interrupt counters like the spurious vector and the synthetic APIC
> ICR read retry show up in /proc/interrupts with all counts 0 most of the
> time.
> 
> As these are events which should never happen, suppress them by default and
> enable them for output when they actually happen.
> 
> This requires a seperate bitmap as the description array is marked
> __ro_after_init. With that bitmap in place it becomes RO data.
> 
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Reviewed-by: Radu Rendec <radu@rendec.net>
> ---
> V5: Move irq_stat_inc_and_enable() here
> V4: Fix the bad idea of writing to __ro_after_init marked data
> V3: New patch
> ---
>   arch/x86/include/asm/hardirq.h |    1 +
>   arch/x86/kernel/apic/apic.c    |    2 +-
>   arch/x86/kernel/apic/ipi.c     |    2 +-
>   arch/x86/kernel/irq.c          |   38 ++++++++++++++++++++++++++++----------
>   4 files changed, 31 insertions(+), 12 deletions(-)
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -68,6 +68,7 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc,
>   #define __ARCH_IRQ_STAT
>   
>   #define inc_irq_stat(index)	this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
> +void irq_stat_inc_and_enable(enum irq_stat_counts which);
>   
>   #ifdef CONFIG_X86_LOCAL_APIC
>   #define inc_perf_irq_stat()	inc_irq_stat(APIC_PERF)
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -2114,7 +2114,7 @@ static noinline void handle_spurious_int
>   
>   	trace_spurious_apic_entry(vector);
>   
> -	inc_irq_stat(SPURIOUS);
> +	irq_stat_inc_and_enable(IRQ_COUNT_SPURIOUS);
>   
>   	/*
>   	 * If this is a spurious interrupt then do not acknowledge
> --- a/arch/x86/kernel/apic/ipi.c
> +++ b/arch/x86/kernel/apic/ipi.c
> @@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void)
>   	for (cnt = 0; cnt < 1000; cnt++) {
>   		if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
>   			return 0;
> -		inc_irq_stat(ICR_READ_RETRY);
> +		irq_stat_inc_and_enable(IRQ_COUNT_ICR_READ_RETRY);
>   		udelay(100);
>   	}
>   	return APIC_ICR_BUSY;
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -68,19 +68,24 @@ struct irq_stat_info {
>   	const char	*text;
>   };
>   
> +#define DEFAULT_SUPPRESSED_VECTOR	UINT_MAX
> +
>   #define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
>   
>   #define ITS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
>   	{ .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
>   
> -static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
> +#define IDS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
> +	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
> +
> +static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
>   	ISS(NMI,			"NMI",	"  Non-maskable interrupts\n"),
>   #ifdef CONFIG_X86_LOCAL_APIC
>   	ISS(APIC_TIMER,			"LOC",	"  Local timer interrupts\n"),
> -	ISS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
> +	IDS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
>   	ISS(APIC_PERF,			"PMI",	"  Performance monitoring interrupts\n"),
>   	ISS(IRQ_WORK,			"IWI",	"  IRQ work interrupts\n"),
> -	ISS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
> +	IDS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
>   	ISS(X86_PLATFORM_IPI,		"PLT",	"  Platform interrupts\n"),
>   #endif
>   #ifdef CONFIG_SMP
> @@ -121,34 +126,47 @@ static struct irq_stat_info irq_stat_inf
>   #endif
>   };
>   
> +static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
> +
>   static int __init irq_init_stats(void)
>   {
> -	struct irq_stat_info *info = irq_stat_info;
> +	const struct irq_stat_info *info = irq_stat_info;
>   
>   	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
> -		if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
> -			info->skip_vector = 0;
> +		if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
> +					   test_bit(info->skip_vector, system_vectors)))
> +			set_bit(i, irq_stat_count_show);
>   	}
>   
>   #ifdef CONFIG_X86_LOCAL_APIC
>   	if (!x86_platform_ipi_callback)
> -		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
>   #endif
>   
>   #ifdef CONFIG_X86_POSTED_MSI
>   	if (!posted_msi_enabled())
> -		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
>   #endif
>   
>   #ifdef CONFIG_X86_MCE_AMD
>   	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
>   	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
> -		irq_stat_info[IRQ_COUNT_DEFERRED_ERROR].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
>   #endif
>   	return 0;
>   }
>   late_initcall(irq_init_stats);
>   
> +/*
> + * Used for default enabled counters to increment the stats and to enable the
> + * entry for /proc/interrupts output.
> + */

nit:

Noticed this while copying the code.
I think you meant "Used for default disabled counters"

> +void irq_stat_inc_and_enable(enum irq_stat_counts which)
> +{
> +	this_cpu_inc(irq_stat.counts[which]);
> +	set_bit(which, irq_stat_count_show);
> +}
> +
>   #ifdef CONFIG_PROC_FS
>   /*
>    * /proc/interrupts printing for arch specific interrupts
> @@ -158,7 +176,7 @@ int arch_show_interrupts(struct seq_file
>   	const struct irq_stat_info *info = irq_stat_info;
>   
>   	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
> -		if (info->skip_vector)
> +		if (!test_bit(i, irq_stat_count_show))
>   			continue;
>   
>   		seq_printf(p, "%*s:", prec, info->symbol);
>
Re: [patch V6 05/16] x86/irq: Suppress unlikely interrupt stats by default
Posted by Thomas Gleixner 17 hours ago
On Sat, May 23 2026 at 23:18, Shrikanth Hegde wrote:
> On 5/18/26 1:31 AM, Thomas Gleixner wrote:
>> +/*
>> + * Used for default enabled counters to increment the stats and to enable the
>> + * entry for /proc/interrupts output.
>> + */
>
> nit:
>
> Noticed this while copying the code.
> I think you meant "Used for default disabled counters"

Indeed.
Re: [patch V6 05/16] x86/irq: Suppress unlikely interrupt stats by default
Posted by Shrikanth Hegde 3 days, 14 hours ago

On 5/18/26 1:31 AM, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@kernel.org>
> 
> Unlikely interrupt counters like the spurious vector and the synthetic APIC
> ICR read retry show up in /proc/interrupts with all counts 0 most of the
> time.
> 
> As these are events which should never happen, suppress them by default and
> enable them for output when they actually happen.
> 
> This requires a seperate bitmap as the description array is marked
> __ro_after_init. With that bitmap in place it becomes RO data.
> 
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Reviewed-by: Radu Rendec <radu@rendec.net>
> ---
> V5: Move irq_stat_inc_and_enable() here
> V4: Fix the bad idea of writing to __ro_after_init marked data
> V3: New patch
> ---
>   arch/x86/include/asm/hardirq.h |    1 +
>   arch/x86/kernel/apic/apic.c    |    2 +-
>   arch/x86/kernel/apic/ipi.c     |    2 +-
>   arch/x86/kernel/irq.c          |   38 ++++++++++++++++++++++++++++----------
>   4 files changed, 31 insertions(+), 12 deletions(-)
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -68,6 +68,7 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc,
>   #define __ARCH_IRQ_STAT
>   
>   #define inc_irq_stat(index)	this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
> +void irq_stat_inc_and_enable(enum irq_stat_counts which);
>   
>   #ifdef CONFIG_X86_LOCAL_APIC
>   #define inc_perf_irq_stat()	inc_irq_stat(APIC_PERF)
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -2114,7 +2114,7 @@ static noinline void handle_spurious_int
>   
>   	trace_spurious_apic_entry(vector);
>   
> -	inc_irq_stat(SPURIOUS);
> +	irq_stat_inc_and_enable(IRQ_COUNT_SPURIOUS);
>   
>   	/*
>   	 * If this is a spurious interrupt then do not acknowledge
> --- a/arch/x86/kernel/apic/ipi.c
> +++ b/arch/x86/kernel/apic/ipi.c
> @@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void)
>   	for (cnt = 0; cnt < 1000; cnt++) {
>   		if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
>   			return 0;
> -		inc_irq_stat(ICR_READ_RETRY);
> +		irq_stat_inc_and_enable(IRQ_COUNT_ICR_READ_RETRY);
>   		udelay(100);
>   	}
>   	return APIC_ICR_BUSY;
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -68,19 +68,24 @@ struct irq_stat_info {
>   	const char	*text;
>   };
>   
> +#define DEFAULT_SUPPRESSED_VECTOR	UINT_MAX
> +
>   #define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
>   
>   #define ITS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
>   	{ .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
>   
> -static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
> +#define IDS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
> +	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
> +
> +static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
>   	ISS(NMI,			"NMI",	"  Non-maskable interrupts\n"),
>   #ifdef CONFIG_X86_LOCAL_APIC
>   	ISS(APIC_TIMER,			"LOC",	"  Local timer interrupts\n"),
> -	ISS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
> +	IDS(SPURIOUS,			"SPU",	"  Spurious interrupts\n"),
>   	ISS(APIC_PERF,			"PMI",	"  Performance monitoring interrupts\n"),
>   	ISS(IRQ_WORK,			"IWI",	"  IRQ work interrupts\n"),
> -	ISS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
> +	IDS(ICR_READ_RETRY,		"RTR",	"  APIC ICR read retries\n"),
>   	ISS(X86_PLATFORM_IPI,		"PLT",	"  Platform interrupts\n"),
>   #endif
>   #ifdef CONFIG_SMP
> @@ -121,34 +126,47 @@ static struct irq_stat_info irq_stat_inf
>   #endif
>   };
>   
> +static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
> +
>   static int __init irq_init_stats(void)
>   {
> -	struct irq_stat_info *info = irq_stat_info;
> +	const struct irq_stat_info *info = irq_stat_info;
>   
>   	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
> -		if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
> -			info->skip_vector = 0;
> +		if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
> +					   test_bit(info->skip_vector, system_vectors)))
> +			set_bit(i, irq_stat_count_show);
>   	}
>   
>   #ifdef CONFIG_X86_LOCAL_APIC
>   	if (!x86_platform_ipi_callback)
> -		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
>   #endif
>   
>   #ifdef CONFIG_X86_POSTED_MSI
>   	if (!posted_msi_enabled())
> -		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
>   #endif
>   
>   #ifdef CONFIG_X86_MCE_AMD
>   	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
>   	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
> -		irq_stat_info[IRQ_COUNT_DEFERRED_ERROR].skip_vector = 1;
> +		clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
>   #endif
>   	return 0;
>   }
>   late_initcall(irq_init_stats);
>   
> +/*
> + * Used for default enabled counters to increment the stats and to enable the
> + * entry for /proc/interrupts output.
> + */
> +void irq_stat_inc_and_enable(enum irq_stat_counts which)
> +{
> +	this_cpu_inc(irq_stat.counts[which]);
> +	set_bit(which, irq_stat_count_show);
> +}
> +
>   #ifdef CONFIG_PROC_FS
>   /*
>    * /proc/interrupts printing for arch specific interrupts
> @@ -158,7 +176,7 @@ int arch_show_interrupts(struct seq_file
>   	const struct irq_stat_info *info = irq_stat_info;
>   
>   	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
> -		if (info->skip_vector)
> +		if (!test_bit(i, irq_stat_count_show))
>   			continue;

If this is done, then those lines will be absent in /proc/interrupts right?

If there was a tool expecting these entries, it will fail get to see that entry.
Is that ok?

>   
>   		seq_printf(p, "%*s:", prec, info->symbol);
>
Re: [patch V6 05/16] x86/irq: Suppress unlikely interrupt stats by default
Posted by Thomas Gleixner 3 days, 9 hours ago
On Thu, May 21 2026 at 21:22, Shrikanth Hegde wrote:
> On 5/18/26 1:31 AM, Thomas Gleixner wrote:
>>   	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
>> -		if (info->skip_vector)
>> +		if (!test_bit(i, irq_stat_count_show))
>>   			continue;
>
> If this is done, then those lines will be absent in /proc/interrupts right?
>
> If there was a tool expecting these entries, it will fail get to see that entry.
> Is that ok?

I checked the obvious tools and they don't care. They have no
expectations of what's there as that varies due to Kconfig settings,
architectures etc.

Thanks,

        tglx
[patch V6 06/16] x86/irq: Move IOAPIC misrouted and PIC/APIC error counts into irq_stats
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

The special treatment of these counts is just adding extra code for no real
value. The irq_stats mechanism allows to suppress output of counters, which
should never happen by default and provides a mechanism to enable them for
the rare case that they occur.

Move the IOAPIC misrouted and the PIC/APIC error counts into irq_stats,
mark them suppressed by default and update the sites which increment them.

This changes the output format of 'ERR' and 'MIS' in case there are events
to the regular per CPU display format and otherwise suppresses them
completely.

As a side effect this removes the arch_cpu_stat() mechanism from proc/stat
which was only there to account for the error interrupts on x86 and missed
to take the misrouted ones into account.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V3: New patch
---
 arch/x86/include/asm/hardirq.h |    7 ++++---
 arch/x86/include/asm/hw_irq.h  |    4 ----
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/apic/io_apic.c |    4 +---
 arch/x86/kernel/i8259.c        |    2 +-
 arch/x86/kernel/irq.c          |   16 ++++------------
 fs/proc/stat.c                 |    4 ----
 7 files changed, 11 insertions(+), 28 deletions(-)
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -50,6 +50,10 @@ enum irq_stat_counts {
 #ifdef CONFIG_X86_POSTED_MSI
 	IRQ_COUNT_POSTED_MSI_NOTIFICATION,
 #endif
+	IRQ_COUNT_PIC_APIC_ERROR,
+#ifdef CONFIG_X86_IO_APIC
+	IRQ_COUNT_IOAPIC_MISROUTED,
+#endif
 	IRQ_COUNT_MAX,
 };
 
@@ -81,9 +85,6 @@ extern void ack_bad_irq(unsigned int irq
 #ifdef CONFIG_PROC_FS
 extern u64 arch_irq_stat_cpu(unsigned int cpu);
 #define arch_irq_stat_cpu	arch_irq_stat_cpu
-
-extern u64 arch_irq_stat(void);
-#define arch_irq_stat		arch_irq_stat
 #endif
 
 DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -110,10 +110,6 @@ static inline void lock_vector_lock(void
 static inline void unlock_vector_lock(void) {}
 #endif
 
-/* Statistics */
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
-
 extern void elcr_set_level_irq(unsigned int irq);
 
 extern char irq_entries_start[];
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2186,7 +2186,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_error_inte
 		apic_write(APIC_ESR, 0);
 	v = apic_read(APIC_ESR);
 	apic_eoi();
-	atomic_inc(&irq_err_count);
+	irq_stat_inc_and_enable(IRQ_COUNT_PIC_APIC_ERROR);
 
 	apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);
 
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1575,8 +1575,6 @@ static unsigned int startup_ioapic_irq(s
 	return was_pending;
 }
 
-atomic_t irq_mis_count;
-
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 static bool io_apic_level_ack_pending(struct mp_chip_data *data)
 {
@@ -1713,7 +1711,7 @@ static void ioapic_ack_level(struct irq_
 	 * at the cpu.
 	 */
 	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
+		irq_stat_inc_and_enable(IRQ_COUNT_IOAPIC_MISROUTED);
 		eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
 	}
 
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -214,7 +214,7 @@ static void mask_and_ack_8259A(struct ir
 			       "spurious 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
-		atomic_inc(&irq_err_count);
+		irq_stat_inc_and_enable(IRQ_COUNT_PIC_APIC_ERROR);
 		/*
 		 * Theoretically we do not have to handle this IRQ,
 		 * but in Linux this does not cause problems and is
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -39,8 +39,6 @@ EXPORT_PER_CPU_SYMBOL(__softirq_pending)
 
 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
 
-atomic_t irq_err_count;
-
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -124,6 +122,10 @@ static const struct irq_stat_info irq_st
 #ifdef CONFIG_X86_POSTED_MSI
 	ISS(POSTED_MSI_NOTIFICATION,	"PMN",	"  Posted MSI notification event\n"),
 #endif
+	IDS(PIC_APIC_ERROR,		"ERR",	"  PIC/APIC error interrupts\n"),
+#ifdef CONFIG_X86_IO_APIC
+	IDS(IOAPIC_MISROUTED,		"MIS",	"  Misrouted IO/APIC interrupts\n"),
+#endif
 };
 
 static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
@@ -183,10 +185,6 @@ int arch_show_interrupts(struct seq_file
 		irq_proc_emit_counts(p, &irq_stat.counts[i]);
 		seq_puts(p, info->text);
 	}
-
-	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
-	if (IS_ENABLED(CONFIG_X86_IO_APIC))
-		seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
 	return 0;
 }
 
@@ -202,12 +200,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 		sum += p->counts[i];
 	return sum;
 }
-
-u64 arch_irq_stat(void)
-{
-	u64 sum = atomic_read(&irq_err_count);
-	return sum;
-}
 #endif /* CONFIG_PROC_FS */
 
 static __always_inline void handle_irq(struct irq_desc *desc,
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,9 +18,6 @@
 #ifndef arch_irq_stat_cpu
 #define arch_irq_stat_cpu(cpu) 0
 #endif
-#ifndef arch_irq_stat
-#define arch_irq_stat() 0
-#endif
 
 u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 {
@@ -122,7 +119,6 @@ static int show_stat(struct seq_file *p,
 			sum_softirq += softirq_stat;
 		}
 	}
-	sum += arch_irq_stat();
 
 	seq_put_decimal_ull(p, "cpu  ", nsec_to_clock_t(user));
 	seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
[patch V6 07/16] scripts/gdb: Update x86 interrupts to the array based storage
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

x86 changed the interrupt statistics from a struct with individual members
to an counter array. It also provides a corresponding info array with the
strings for prefix and description and an indicator to skip the entry.

Update the already out of sync GDB script to use the counter and the info
array, which keeps the GDB script in sync automatically.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
---
V6: Fixup alignment for interrupts - Radu
V5: Bring the show_err_irq() back as it is used on mips
    Fix the index calculation for the show bitmap so it works with more than one
    unsigned long.
V4: Adopted to irq_stat_count_show bitmap and removed the duplicate ERR/MIS output
V3: New patch - Radu
---
 scripts/gdb/linux/interrupts.py |   71 ++++++++++------------------------------
 1 file changed, 18 insertions(+), 53 deletions(-)
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -48,7 +48,7 @@ irq_desc_type = utils.CachedType("struct
             count = cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt']
         else:
             count = 0
-        text += "%10u" % (count)
+        text += "%10u " % (count)
 
     name = "None"
     if desc['irq_data']['chip']:
@@ -58,7 +58,7 @@ irq_desc_type = utils.CachedType("struct
         else:
             name = "-"
 
-    text += "  %8s" % (name)
+    text += "  %-8s" % (name)
 
     if desc['irq_data']['domain']:
         text += "  %*lu" % (prec, desc['irq_data']['hwirq'])
@@ -97,64 +97,29 @@ irq_desc_type = utils.CachedType("struct
         text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
     return text
 
-def x86_show_irqstat(prec, pfx, field, desc):
-    irq_stat = gdb.parse_and_eval("&irq_stat")
+def x86_show_irqstat(prec, pfx, idx, desc):
+    irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
     text = "%*s: " % (prec, pfx)
     for cpu in cpus.each_online_cpu():
         stat = cpus.per_cpu(irq_stat, cpu)
-        text += "%10u " % (stat[field])
-    text += "  %s\n" % (desc)
-    return text
-
-def x86_show_mce(prec, var, pfx, desc):
-    pvar = gdb.parse_and_eval(var)
-    text = "%*s: " % (prec, pfx)
-    for cpu in cpus.each_online_cpu():
-        text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference())
-    text += "  %s\n" % (desc)
+        text += "%10u " % (stat.dereference())
+    text += desc
     return text
 
 def x86_show_interupts(prec):
-    text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
+    info_type = gdb.lookup_type('struct irq_stat_info')
+    info = gdb.parse_and_eval('irq_stat_info')
+    bitmap = gdb.parse_and_eval('irq_stat_count_show')
+    bitsperlong = 8 * int(bitmap.type.target().sizeof)
 
-    if constants.LX_CONFIG_X86_LOCAL_APIC:
-        text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
-        text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
-        text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
-        text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
-        text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
-        if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
-            text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
-
-    if constants.LX_CONFIG_SMP:
-        text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
-        text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
-        text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
-
-    if constants.LX_CONFIG_X86_THERMAL_VECTOR:
-        text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
-
-    if constants.LX_CONFIG_X86_MCE_THRESHOLD:
-        text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
-
-    if constants.LX_CONFIG_X86_MCE_AMD:
-        text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
-
-    if constants.LX_CONFIG_X86_MCE:
-        text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
-        text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
-
-    text += show_irq_err_count(prec)
-
-    if constants.LX_CONFIG_X86_IO_APIC:
-        cnt = utils.gdb_eval_or_none("irq_mis_count")
-        if cnt is not None:
-            text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
-
-    if constants.LX_CONFIG_KVM:
-        text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
-        text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
-        text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
+    text = ""
+    for idx in range(int(info.type.sizeof / info_type.sizeof)):
+        show = bitmap[int(idx / bitsperlong)]
+        if not show & 1 << int(idx % bitsperlong):
+            continue
+        pfx = info[idx]['symbol'].string()
+        desc = info[idx]['text'].string()
+        text += x86_show_irqstat(prec, pfx, idx, desc)
 
     return text
[patch V6 08/16] genirq: Expose nr_irqs in core code
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

... to avoid function calls in the core code to retrieve the maximum number
of interrupts.

Rename it to 'total_nr_irqs' as 'nr_irqs' is too generic and fix up the
'nr_irqs' reference in the related GDB script as well.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V3: Fix up the missing usage sites and gdb script - Radu
---
 kernel/irq/internals.h          |    1 +
 kernel/irq/irqdesc.c            |   28 ++++++++++++++--------------
 kernel/irq/proc.c               |    4 ++--
 scripts/gdb/linux/interrupts.py |    2 +-
 4 files changed, 18 insertions(+), 17 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -21,6 +21,7 @@
 
 extern bool noirqdebug;
 extern int irq_poll_cpu;
+extern unsigned int total_nr_irqs;
 
 extern struct irqaction chained_action;
 
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -140,14 +140,14 @@ static void desc_set_defaults(unsigned i
 	desc_smp_init(desc, node, affinity);
 }
 
-static unsigned int nr_irqs = NR_IRQS;
+unsigned int total_nr_irqs __read_mostly = NR_IRQS;
 
 /**
  * irq_get_nr_irqs() - Number of interrupts supported by the system.
  */
 unsigned int irq_get_nr_irqs(void)
 {
-	return nr_irqs;
+	return total_nr_irqs;
 }
 EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
 
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
  */
 unsigned int irq_set_nr_irqs(unsigned int nr)
 {
-	nr_irqs = nr;
+	total_nr_irqs = nr;
 
 	return nr;
 }
@@ -187,9 +187,9 @@ static unsigned int irq_find_at_or_after
 	struct irq_desc *desc;
 
 	guard(rcu)();
-	desc = mt_find(&sparse_irqs, &index, nr_irqs);
+	desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
 
-	return desc ? irq_desc_get_irq(desc) : nr_irqs;
+	return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
 }
 
 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -543,7 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
 {
 	if (nr > MAX_SPARSE_IRQS)
 		return false;
-	nr_irqs = nr;
+	total_nr_irqs = nr;
 	return true;
 }
 
@@ -557,16 +557,16 @@ int __init early_irq_init(void)
 	/* Let arch update nr_irqs and return the nr of preallocated irqs */
 	initcnt = arch_probe_nr_irqs();
 	printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
-	       NR_IRQS, nr_irqs, initcnt);
+	       NR_IRQS, total_nr_irqs, initcnt);
 
-	if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
-		nr_irqs = MAX_SPARSE_IRQS;
+	if (WARN_ON(total_nr_irqs > MAX_SPARSE_IRQS))
+		total_nr_irqs = MAX_SPARSE_IRQS;
 
 	if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
 		initcnt = MAX_SPARSE_IRQS;
 
-	if (initcnt > nr_irqs)
-		nr_irqs = initcnt;
+	if (initcnt > total_nr_irqs)
+		total_nr_irqs = initcnt;
 
 	for (i = 0; i < initcnt; i++) {
 		desc = alloc_desc(i, node, 0, NULL, NULL);
@@ -862,7 +862,7 @@ void irq_free_descs(unsigned int from, u
 {
 	int i;
 
-	if (from >= nr_irqs || (from + cnt) > nr_irqs)
+	if (from >= total_nr_irqs || (from + cnt) > total_nr_irqs)
 		return;
 
 	guard(mutex)(&sparse_irq_lock);
@@ -911,7 +911,7 @@ int __ref __irq_alloc_descs(int irq, uns
 	if (irq >=0 && start != irq)
 		return -EEXIST;
 
-	if (start + cnt > nr_irqs) {
+	if (start + cnt > total_nr_irqs) {
 		if (!irq_expand_nr_irqs(start + cnt))
 			return -ENOMEM;
 	}
@@ -923,7 +923,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  * irq_get_next_irq - get next allocated irq number
  * @offset:	where to start the search
  *
- * Returns next irq number after offset or nr_irqs if none is found.
+ * Returns next irq number after offset or total_nr_irqs if none is found.
  */
 unsigned int irq_get_next_irq(unsigned int offset)
 {
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -447,7 +447,7 @@ int __weak arch_show_interrupts(struct s
 }
 
 #ifndef ACTUAL_NR_IRQS
-# define ACTUAL_NR_IRQS irq_get_nr_irqs()
+# define ACTUAL_NR_IRQS total_nr_irqs
 #endif
 
 /* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
@@ -489,7 +489,7 @@ void irq_proc_emit_counts(struct seq_fil
 
 int show_interrupts(struct seq_file *p, void *v)
 {
-	const unsigned int nr_irqs = irq_get_nr_irqs();
+	const unsigned int nr_irqs = total_nr_irqs;
 	static int prec;
 
 	int i = *(loff_t *) v, j;
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -174,7 +174,7 @@ irq_desc_type = utils.CachedType("struct
         super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA)
 
     def invoke(self, arg, from_tty):
-        nr_irqs = gdb.parse_and_eval("nr_irqs")
+        nr_irqs = gdb.parse_and_eval("total_nr_irqs")
         prec = 3
         j = 1000
         while prec < 10 and j <= nr_irqs:
Re: [patch V6 08/16] genirq: Expose nr_irqs in core code
Posted by Shrikanth Hegde 5 days, 8 hours ago

On 5/18/26 1:32 AM, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@kernel.org>
> 
> ... to avoid function calls in the core code to retrieve the maximum number
> of interrupts.
> 
> Rename it to 'total_nr_irqs' as 'nr_irqs' is too generic and fix up the
> 'nr_irqs' reference in the related GDB script as well.
> 
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> Reviewed-by: Radu Rendec <radu@rendec.net>
> ---
> V3: Fix up the missing usage sites and gdb script - Radu
> ---
>   kernel/irq/internals.h          |    1 +
>   kernel/irq/irqdesc.c            |   28 ++++++++++++++--------------
>   kernel/irq/proc.c               |    4 ++--
>   scripts/gdb/linux/interrupts.py |    2 +-
>   4 files changed, 18 insertions(+), 17 deletions(-)
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -21,6 +21,7 @@
>   
>   extern bool noirqdebug;
>   extern int irq_poll_cpu;
> +extern unsigned int total_nr_irqs;
>   
>   extern struct irqaction chained_action;
>   
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -140,14 +140,14 @@ static void desc_set_defaults(unsigned i
>   	desc_smp_init(desc, node, affinity);
>   }
>   
> -static unsigned int nr_irqs = NR_IRQS;
> +unsigned int total_nr_irqs __read_mostly = NR_IRQS;
>   
>   /**
>    * irq_get_nr_irqs() - Number of interrupts supported by the system.
>    */
>   unsigned int irq_get_nr_irqs(void)
>   {
> -	return nr_irqs;
> +	return total_nr_irqs;
>   }
>   EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
>   
> @@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
>    */
>   unsigned int irq_set_nr_irqs(unsigned int nr)
>   {
> -	nr_irqs = nr;
> +	total_nr_irqs = nr;
>   
>   	return nr;
>   }
> @@ -187,9 +187,9 @@ static unsigned int irq_find_at_or_after
>   	struct irq_desc *desc;
>   
>   	guard(rcu)();
> -	desc = mt_find(&sparse_irqs, &index, nr_irqs);
> +	desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
>   
> -	return desc ? irq_desc_get_irq(desc) : nr_irqs;
> +	return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
>   }
>   
>   static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
> @@ -543,7 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
>   {
>   	if (nr > MAX_SPARSE_IRQS)
>   		return false;
> -	nr_irqs = nr;
> +	total_nr_irqs = nr;
>   	return true;
>   }
>   
> @@ -557,16 +557,16 @@ int __init early_irq_init(void)
>   	/* Let arch update nr_irqs and return the nr of preallocated irqs */
>   	initcnt = arch_probe_nr_irqs();
>   	printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
> -	       NR_IRQS, nr_irqs, initcnt);
> +	       NR_IRQS, total_nr_irqs, initcnt);
>   
> -	if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
> -		nr_irqs = MAX_SPARSE_IRQS;
> +	if (WARN_ON(total_nr_irqs > MAX_SPARSE_IRQS))
> +		total_nr_irqs = MAX_SPARSE_IRQS;
>   
>   	if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
>   		initcnt = MAX_SPARSE_IRQS;
>   
> -	if (initcnt > nr_irqs)
> -		nr_irqs = initcnt;
> +	if (initcnt > total_nr_irqs)
> +		total_nr_irqs = initcnt;
>   
>   	for (i = 0; i < initcnt; i++) {
>   		desc = alloc_desc(i, node, 0, NULL, NULL);
> @@ -862,7 +862,7 @@ void irq_free_descs(unsigned int from, u
>   {
>   	int i;
>   
> -	if (from >= nr_irqs || (from + cnt) > nr_irqs)
> +	if (from >= total_nr_irqs || (from + cnt) > total_nr_irqs)
>   		return;
>   
>   	guard(mutex)(&sparse_irq_lock);
> @@ -911,7 +911,7 @@ int __ref __irq_alloc_descs(int irq, uns
>   	if (irq >=0 && start != irq)
>   		return -EEXIST;
>   
> -	if (start + cnt > nr_irqs) {
> +	if (start + cnt > total_nr_irqs) {
>   		if (!irq_expand_nr_irqs(start + cnt))
>   			return -ENOMEM;
>   	}
> @@ -923,7 +923,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
>    * irq_get_next_irq - get next allocated irq number
>    * @offset:	where to start the search
>    *
> - * Returns next irq number after offset or nr_irqs if none is found.
> + * Returns next irq number after offset or total_nr_irqs if none is found.
>    */
>   unsigned int irq_get_next_irq(unsigned int offset)
>   {
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -447,7 +447,7 @@ int __weak arch_show_interrupts(struct s
>   }
>   
>   #ifndef ACTUAL_NR_IRQS
> -# define ACTUAL_NR_IRQS irq_get_nr_irqs()
> +# define ACTUAL_NR_IRQS total_nr_irqs
>   #endif
>   
>   /* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
> @@ -489,7 +489,7 @@ void irq_proc_emit_counts(struct seq_fil
>   
>   int show_interrupts(struct seq_file *p, void *v)
>   {
> -	const unsigned int nr_irqs = irq_get_nr_irqs();
> +	const unsigned int nr_irqs = total_nr_irqs;
>   	static int prec;
>   
>   	int i = *(loff_t *) v, j;
> --- a/scripts/gdb/linux/interrupts.py
> +++ b/scripts/gdb/linux/interrupts.py
> @@ -174,7 +174,7 @@ irq_desc_type = utils.CachedType("struct
>           super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA)
>   
>       def invoke(self, arg, from_tty):
> -        nr_irqs = gdb.parse_and_eval("nr_irqs")
> +        nr_irqs = gdb.parse_and_eval("total_nr_irqs")
>           prec = 3
>           j = 1000
>           while prec < 10 and j <= nr_irqs:
> 

nr_irqs was used as arg in some of those irq_chip methods.
This can help avoid any such confusion.

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
[patch V6 09/16] genirq/manage: Make NMI cleanup RT safe
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Eventually blocking functions cannot be invoked with interrupts disabled
and a raw spin lock held. Restructure the code so this happens outside of
the descriptor lock held region.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
---
V4: New patch. Found when adding the validation update to it
---
 kernel/irq/manage.c |   37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2026,24 +2026,30 @@ const void *free_irq(unsigned int irq, v
 }
 EXPORT_SYMBOL(free_irq);
 
-/* This function must be called with desc->lock held */
 static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
 {
+	struct irqaction *action = NULL;
 	const char *devname = NULL;
 
-	desc->istate &= ~IRQS_NMI;
+	scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+		irq_nmi_teardown(desc);
 
-	if (!WARN_ON(desc->action == NULL)) {
-		irq_pm_remove_action(desc, desc->action);
-		devname = desc->action->name;
-		unregister_handler_proc(irq, desc->action);
+		desc->istate &= ~IRQS_NMI;
 
-		kfree(desc->action);
+		if (!WARN_ON(desc->action == NULL)) {
+			action = desc->action;
+			irq_pm_remove_action(desc, action);
+			devname = action->name;
+		}
 		desc->action = NULL;
+
+		irq_settings_clr_disable_unlazy(desc);
+		irq_shutdown_and_deactivate(desc);
 	}
 
-	irq_settings_clr_disable_unlazy(desc);
-	irq_shutdown_and_deactivate(desc);
+	if (action)
+		unregister_handler_proc(irq, action);
+	kfree(action);
 
 	irq_release_resources(desc);
 
@@ -2067,8 +2073,6 @@ const void *free_nmi(unsigned int irq, v
 	if (WARN_ON(desc->depth == 0))
 		disable_nmi_nosync(irq);
 
-	guard(raw_spinlock_irqsave)(&desc->lock);
-	irq_nmi_teardown(desc);
 	return __cleanup_nmi(irq, desc);
 }
 
@@ -2318,13 +2322,14 @@ int request_nmi(unsigned int irq, irq_ha
 		/* Setup NMI state */
 		desc->istate |= IRQS_NMI;
 		retval = irq_nmi_setup(desc);
-		if (retval) {
-			__cleanup_nmi(irq, desc);
-			return -EINVAL;
-		}
-		return 0;
 	}
 
+	if (retval) {
+		__cleanup_nmi(irq, desc);
+		return -EINVAL;
+	}
+	return 0;
+
 err_irq_setup:
 	irq_chip_pm_put(&desc->irq_data);
 err_out:
[patch V6 10/16] genirq: Cache the condition for /proc/interrupts exposure
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

show_interrupts() evaluates a boatload of conditions to establish whether
it should expose an interrupt in /proc/interrupts or not.

That can be simplified by caching the condition in an internal status flag,
which is updated when one of the relevant inputs changes.

The irq_desc::kstat_irq check is dropped because visible interrupt
descriptors always have a valid pointer.

As a result the number of instructions and branches for reading
/proc/interrupts is reduced significantly.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
V4: Cover freeing of per CPU and NMI type interrupts
V3: Remove the historical kstat_irq check - Radu
V2: s/IRQF_/IRQ/ and fixup the enum treatment - Dmitry
---
 include/linux/irq.h    |    1 +
 kernel/irq/chip.c      |    2 ++
 kernel/irq/internals.h |    2 ++
 kernel/irq/manage.c    |    8 +++++++-
 kernel/irq/proc.c      |   15 +++++++++++----
 kernel/irq/settings.h  |   13 +++++++++++++
 6 files changed, 36 insertions(+), 5 deletions(-)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -103,6 +103,7 @@ enum {
 	IRQ_DISABLE_UNLAZY	= (1 << 19),
 	IRQ_HIDDEN		= (1 << 20),
 	IRQ_NO_DEBUG		= (1 << 21),
+	IRQ_RESERVED		= (1 << 22),
 };
 
 #define IRQF_MODIFY_MASK	\
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1007,6 +1007,7 @@ static void
 		WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
 		irq_activate_and_startup(desc, IRQ_RESEND);
 	}
+	irq_proc_update_valid(desc);
 }
 
 void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
@@ -1067,6 +1068,7 @@ void irq_modify_status(unsigned int irq,
 			trigger = tmp;
 
 		irqd_set(&desc->irq_data, trigger);
+		irq_proc_update_valid(desc);
 	}
 }
 EXPORT_SYMBOL_GPL(irq_modify_status);
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -123,6 +123,7 @@ extern void register_irq_proc(unsigned i
 extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
 extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
+void irq_proc_update_valid(struct irq_desc *desc);
 #else
 static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
 static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
@@ -130,6 +131,7 @@ static inline void register_handler_proc
 					 struct irqaction *action) { }
 static inline void unregister_handler_proc(unsigned int irq,
 					   struct irqaction *action) { }
+static inline void irq_proc_update_valid(struct irq_desc *desc) { }
 #endif
 
 extern bool irq_can_set_affinity_usr(unsigned int irq);
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1802,6 +1802,7 @@ static int
 		__enable_irq(desc);
 	}
 
+	irq_proc_update_valid(desc);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(desc);
 	mutex_unlock(&desc->request_mutex);
@@ -1906,6 +1907,7 @@ static struct irqaction *__free_irq(stru
 		desc->affinity_hint = NULL;
 #endif
 
+	irq_proc_update_valid(desc);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	/*
 	 * Drop bus_lock here so the changes which were done in the chip
@@ -2047,6 +2049,8 @@ static const void *__cleanup_nmi(unsigne
 		irq_shutdown_and_deactivate(desc);
 	}
 
+	irq_proc_update_valid(desc);
+
 	if (action)
 		unregister_handler_proc(irq, action);
 	kfree(action);
@@ -2433,8 +2437,10 @@ static struct irqaction *__free_percpu_i
 		*action_ptr = action->next;
 
 		/* Demote from NMI if we killed the last action */
-		if (!desc->action)
+		if (!desc->action) {
 			desc->istate &= ~IRQS_NMI;
+			irq_proc_update_valid(desc);
+		}
 	}
 
 	unregister_handler_proc(irq, action);
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -439,6 +439,16 @@ void init_irq_proc(void)
 		register_irq_proc(irq, desc);
 }
 
+void irq_proc_update_valid(struct irq_desc *desc)
+{
+	u32 set = _IRQ_PROC_VALID;
+
+	if (irq_settings_is_hidden(desc) || irq_desc_is_chained(desc) || !desc->action)
+		set = 0;
+
+	irq_settings_update_proc_valid(desc, set);
+}
+
 #ifdef CONFIG_GENERIC_IRQ_SHOW
 
 int __weak arch_show_interrupts(struct seq_file *p, int prec)
@@ -515,10 +525,7 @@ int show_interrupts(struct seq_file *p,
 
 	guard(rcu)();
 	desc = irq_to_desc(i);
-	if (!desc || irq_settings_is_hidden(desc))
-		return 0;
-
-	if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
+	if (!desc || !irq_settings_proc_valid(desc))
 		return 0;
 
 	seq_printf(p, "%*d:", prec, i);
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -18,6 +18,7 @@ enum {
 	_IRQ_DISABLE_UNLAZY	= IRQ_DISABLE_UNLAZY,
 	_IRQ_HIDDEN		= IRQ_HIDDEN,
 	_IRQ_NO_DEBUG		= IRQ_NO_DEBUG,
+	_IRQ_PROC_VALID		= IRQ_RESERVED,
 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
 };
 
@@ -34,6 +35,7 @@ enum {
 #define IRQ_DISABLE_UNLAZY	GOT_YOU_MORON
 #define IRQ_HIDDEN		GOT_YOU_MORON
 #define IRQ_NO_DEBUG		GOT_YOU_MORON
+#define IRQ_RESERVED		GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
 #define IRQF_MODIFY_MASK	GOT_YOU_MORON
 
@@ -180,3 +182,14 @@ static inline bool irq_settings_no_debug
 {
 	return desc->status_use_accessors & _IRQ_NO_DEBUG;
 }
+
+static inline bool irq_settings_proc_valid(struct irq_desc *desc)
+{
+	return desc->status_use_accessors & _IRQ_PROC_VALID;
+}
+
+static inline void irq_settings_update_proc_valid(struct irq_desc *desc, u32 set)
+{
+	desc->status_use_accessors &= ~_IRQ_PROC_VALID;
+	desc->status_use_accessors |= (set & _IRQ_PROC_VALID);
+}
[patch V6 11/16] genirq: Calculate precision only when required
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Calculating the precision of the interrupt number column on every initial
show_interrupt() invocation is a pointless exercise as the underlying
maximum number of interrupts rarely changes.

Calculate it only when that number is modified and let show_interrupts()
use the cached value.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
Reviewed-by: Radu Rendec <radu@rendec.net>
---
 kernel/irq/internals.h |    6 ++++++
 kernel/irq/irqdesc.c   |   10 ++++++----
 kernel/irq/proc.c      |   33 +++++++++++++++++++++++----------
 3 files changed, 35 insertions(+), 14 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -134,6 +134,12 @@ static inline void unregister_handler_pr
 static inline void irq_proc_update_valid(struct irq_desc *desc) { }
 #endif
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
+void irq_proc_calc_prec(void);
+#else
+static inline void irq_proc_calc_prec(void) { }
+#endif
+
 extern bool irq_can_set_affinity_usr(unsigned int irq);
 
 extern int irq_do_set_affinity(struct irq_data *data,
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -157,13 +157,12 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
  *
  * Return: @nr.
  */
-unsigned int irq_set_nr_irqs(unsigned int nr)
+unsigned int __init irq_set_nr_irqs(unsigned int nr)
 {
 	total_nr_irqs = nr;
-
+	irq_proc_calc_prec();
 	return nr;
 }
-EXPORT_SYMBOL_GPL(irq_set_nr_irqs);
 
 static DEFINE_MUTEX(sparse_irq_lock);
 static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs,
@@ -544,6 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
 	if (nr > MAX_SPARSE_IRQS)
 		return false;
 	total_nr_irqs = nr;
+	irq_proc_calc_prec();
 	return true;
 }
 
@@ -572,6 +572,7 @@ int __init early_irq_init(void)
 		desc = alloc_desc(i, node, 0, NULL, NULL);
 		irq_insert_desc(i, desc);
 	}
+	irq_proc_calc_prec();
 	return arch_early_irq_init();
 }
 
@@ -592,7 +593,7 @@ int __init early_irq_init(void)
 
 	init_irq_default_affinity();
 
-	printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
+	pr_info("NR_IRQS: %d\n", NR_IRQS);
 
 	count = ARRAY_SIZE(irq_desc);
 
@@ -602,6 +603,7 @@ int __init early_irq_init(void)
 			goto __free_desc_res;
 	}
 
+	irq_proc_calc_prec();
 	return arch_early_irq_init();
 
 __free_desc_res:
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -456,10 +456,25 @@ int __weak arch_show_interrupts(struct s
 	return 0;
 }
 
+static struct irq_proc_constraints {
+	unsigned int	num_prec;
+} irq_proc_constraints __read_mostly = {
+	.num_prec	= 3,
+};
+
 #ifndef ACTUAL_NR_IRQS
 # define ACTUAL_NR_IRQS total_nr_irqs
 #endif
 
+void irq_proc_calc_prec(void)
+{
+	unsigned int prec, n;
+
+	for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec)
+		n *= 10;
+	WRITE_ONCE(irq_proc_constraints.num_prec, prec);
+}
+
 /* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
 #define ZSTR1 "          0"
 #define ZSTR1_LEN	(sizeof(ZSTR1) - 1)
@@ -499,9 +514,7 @@ void irq_proc_emit_counts(struct seq_fil
 
 int show_interrupts(struct seq_file *p, void *v)
 {
-	const unsigned int nr_irqs = total_nr_irqs;
-	static int prec;
-
+	unsigned int prec = READ_ONCE(irq_proc_constraints.num_prec);
 	int i = *(loff_t *) v, j;
 	struct irqaction *action;
 	struct irq_desc *desc;
@@ -514,9 +527,6 @@ int show_interrupts(struct seq_file *p,
 
 	/* print header and calculate the width of the first column */
 	if (i == 0) {
-		for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
-			j *= 10;
-
 		seq_printf(p, "%*s", prec + 8, "");
 		for_each_online_cpu(j)
 			seq_printf(p, "CPU%-8d", j);
@@ -552,13 +562,16 @@ int show_interrupts(struct seq_file *p,
 	} else {
 		seq_printf(p, "%8s", "None");
 	}
+
+	seq_putc(p, ' ');
 	if (desc->irq_data.domain)
-		seq_printf(p, " %*lu", prec, desc->irq_data.hwirq);
+		seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, prec);
 	else
 		seq_printf(p, " %*s", prec, "");
-#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
-	seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
-#endif
+
+	if (IS_ENABLED(CONFIG_GENERIC_IRQ_SHOW_LEVEL))
+		seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
+
 	if (desc->name)
 		seq_printf(p, "-%-8s", desc->name);
[patch V6 12/16] genirq/proc: Increase default interrupt number precision to four
Posted by Thomas Gleixner 1 week ago
Quite some architectures have four character wide acronyms for architecture
specific interrupts like IPI, NMI, etc.

The default precision of printing the Linux device interrupt numbers is
three, which causes quite some code to play games with adding or omitting
space after the acronym and the colon in order to keep the per CPU numbers
properly aligned.

Increase the default number precision to four in the core code and get rid
of the space games all over the place. At the same time align all
architecture specific descriptor texts left so that they show up in the
same column as the interrupt chip names, which makes the output more
uniform accross architectures. Fix up the GDB script to this new scheme as
well.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
V6: New patch after Michael mentioned the various alignment issues.
---
 arch/alpha/kernel/irq.c         |    8 ++++----
 arch/arm/kernel/smp.c           |    3 +--
 arch/arm64/kernel/smp.c         |    5 ++---
 arch/loongarch/kernel/smp.c     |    2 +-
 arch/riscv/kernel/smp.c         |    3 +--
 arch/sh/kernel/irq.c            |    2 +-
 arch/sparc/kernel/irq_32.c      |   12 ++++++------
 arch/sparc/kernel/irq_64.c      |    4 ++--
 arch/um/kernel/irq.c            |    4 ++--
 arch/xtensa/kernel/irq.c        |    2 +-
 kernel/irq/proc.c               |    4 ++--
 scripts/gdb/linux/interrupts.py |   16 ++++++----------
 12 files changed, 29 insertions(+), 36 deletions(-)

--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -72,16 +72,16 @@ int arch_show_interrupts(struct seq_file
 	int j;
 
 #ifdef CONFIG_SMP
-	seq_puts(p, "IPI: ");
+	seq_puts(p, " IPI: ");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10lu ", cpu_data[j].ipi_count);
 	seq_putc(p, '\n');
 #endif
-	seq_puts(p, "PMI: ");
+	seq_puts(p, " PMI: ");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10lu ", per_cpu(irq_pmi_count, j));
-	seq_puts(p, "          Performance Monitoring\n");
-	seq_printf(p, "ERR: %10lu\n", irq_err_count);
+	seq_puts(p, " Performance Monitoring\n");
+	seq_printf(p, " ERR: %10lu\n", irq_err_count);
 	return 0;
 }
 
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -551,8 +551,7 @@ void show_ipi_list(struct seq_file *p, i
 		if (!ipi_desc[i])
 			continue;
 
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -833,11 +833,10 @@ int arch_show_interrupts(struct seq_file
 	unsigned int cpu, i;
 
 	for (i = 0; i < MAX_IPI; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
-		seq_printf(p, "      %s\n", ipi_types[i]);
+		seq_printf(p, " %s\n", ipi_types[i]);
 	}
 
 	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -88,7 +88,7 @@ void show_ipi_list(struct seq_file *p, i
 	unsigned int cpu, i;
 
 	for (i = 0; i < NR_IPI; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10);
 		seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -226,8 +226,7 @@ void show_ipi_stats(struct seq_file *p,
 	unsigned int cpu, i;
 
 	for (i = 0; i < IPI_MAX; i++) {
-		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
-			   prec >= 4 ? " " : "");
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
 		seq_printf(p, " %s\n", ipi_names[i]);
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -46,7 +46,7 @@ int arch_show_interrupts(struct seq_file
 	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat.__nmi_count, j), 10);
-	seq_printf(p, "  Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -199,19 +199,19 @@ int arch_show_interrupts(struct seq_file
 	int j;
 
 #ifdef CONFIG_SMP
-	seq_printf(p, "RES:");
+	seq_printf(p, "%*s:", prec, "RES");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).irq_resched_count, 10);
-	seq_printf(p, "     IPI rescheduling interrupts\n");
-	seq_printf(p, "CAL:");
+	seq_printf(p, " IPI rescheduling interrupts\n");
+	seq_printf(p, "%*s:", prec, "CAL");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).irq_call_count, 10);
-	seq_printf(p, "     IPI function call interrupts\n");
+	seq_printf(p, " IPI function call interrupts\n");
 #endif
-	seq_printf(p, "NMI:");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).counter, 10);
-	seq_printf(p, "     Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 	return 0;
 }
 
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -303,10 +303,10 @@ int arch_show_interrupts(struct seq_file
 {
 	int j;
 
-	seq_printf(p, "NMI:");
+	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(j)
 		seq_put_decimal_ull_width(p, " ", cpu_data(j).__nmi_count, 10);
-	seq_printf(p, "     Non-maskable interrupts\n");
+	seq_printf(p, " Non-maskable interrupts\n");
 	return 0;
 }
 
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -716,12 +716,12 @@ int arch_show_interrupts(struct seq_file
 	seq_printf(p, "%*s: ", prec, "RES");
 	for_each_online_cpu(cpu)
 		seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
-	seq_puts(p, "  Rescheduling interrupts\n");
+	seq_puts(p, " Rescheduling interrupts\n");
 
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(cpu)
 		seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
-	seq_puts(p, "  Function call interrupts\n");
+	seq_puts(p, " Function call interrupts\n");
 #endif
 
 	return 0;
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -59,7 +59,7 @@ int arch_show_interrupts(struct seq_file
 	seq_printf(p, "%*s:", prec, "NMI");
 	for_each_online_cpu(cpu)
 		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
-	seq_puts(p, "   Non-maskable interrupts\n");
+	seq_puts(p, " Non-maskable interrupts\n");
 #endif
 	return 0;
 }
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -459,7 +459,7 @@ int __weak arch_show_interrupts(struct s
 static struct irq_proc_constraints {
 	unsigned int	num_prec;
 } irq_proc_constraints __read_mostly = {
-	.num_prec	= 3,
+	.num_prec	= 4,
 };
 
 #ifndef ACTUAL_NR_IRQS
@@ -470,7 +470,7 @@ void irq_proc_calc_prec(void)
 {
 	unsigned int prec, n;
 
-	for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec)
+	for (prec = 4, n = 10000; prec < 10 && n <= total_nr_irqs; ++prec)
 		n *= 10;
 	WRITE_ONCE(irq_proc_constraints.num_prec, prec);
 }
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -131,23 +131,19 @@ irq_desc_type = utils.CachedType("struct
     if nr_ipi is None or ipi_desc is None or ipi_types is None:
         return text
 
-    if prec >= 4:
-        sep = " "
-    else:
-        sep = ""
-
     for ipi in range(nr_ipi):
-        text += "%*s%u:%s" % (prec - 1, "IPI", ipi, sep)
+        text += "%*s%u: " % (prec - 1, "IPI", ipi)
         desc = ipi_desc[ipi].cast(irq_desc_type.get_type().pointer())
         if desc == 0:
             continue
         for cpu in cpus.each_online_cpu():
-            text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'])
-        text += "      %s" % (ipi_types[ipi].string())
+            text += "%10u " % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'])
+        text += "%s" % (ipi_types[ipi].string())
         text += "\n"
     return text
 
 def aarch64_show_interrupts(prec):
+    # Does not work for ARM64 as "ipi_desc" is not available there
     text = arm_common_show_interrupts(prec)
     text += "%*s: %10lu\n" % (prec, "ERR", gdb.parse_and_eval("irq_err_count"))
     return text
@@ -175,8 +171,8 @@ irq_desc_type = utils.CachedType("struct
 
     def invoke(self, arg, from_tty):
         nr_irqs = gdb.parse_and_eval("total_nr_irqs")
-        prec = 3
-        j = 1000
+        prec = 4
+        j = 10000
         while prec < 10 and j <= nr_irqs:
             prec += 1
             j *= 10
[patch V6 13/16] genirq: Add rcuref count to struct irq_desc
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Prepare for a smarter iterator for /proc/interrupts so that the next
interrupt descriptor can be cached after lookup.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 include/linux/irqdesc.h |    2 ++
 kernel/irq/internals.h  |   17 ++++++++++++++++-
 kernel/irq/irqdesc.c    |   21 +++++++++++++--------
 3 files changed, 31 insertions(+), 9 deletions(-)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -70,6 +70,7 @@ struct irq_redirect {
  *			IRQF_NO_SUSPEND set
  * @force_resume_depth:	number of irqactions on a irq descriptor with
  *			IRQF_FORCE_RESUME set
+ * @refcnt:		Reference count mainly for /proc/interrupts
  * @rcu:		rcu head for delayed free
  * @kobj:		kobject used to represent this struct in sysfs
  * @request_mutex:	mutex to protect request/free before locking desc->lock
@@ -119,6 +120,7 @@ struct irq_desc {
 	struct dentry		*debugfs_file;
 	const char		*dev_name;
 #endif
+	rcuref_t		refcnt;
 #ifdef CONFIG_SPARSE_IRQ
 	struct rcu_head		rcu;
 	struct kobject		kobj;
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -9,6 +9,7 @@
 #include <linux/irqdesc.h>
 #include <linux/kernel_stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/rcuref.h>
 #include <linux/sched/clock.h>
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -101,9 +102,23 @@ extern void unmask_irq(struct irq_desc *
 extern void unmask_threaded_irq(struct irq_desc *desc);
 
 #ifdef CONFIG_SPARSE_IRQ
-static inline void irq_mark_irq(unsigned int irq) { }
+static __always_inline void irq_mark_irq(unsigned int irq) { }
+void irq_desc_free_rcu(struct irq_desc *desc);
+
+static __always_inline bool irq_desc_get_ref(struct irq_desc *desc)
+{
+	return rcuref_get(&desc->refcnt);
+}
+
+static __always_inline void irq_desc_put_ref(struct irq_desc *desc)
+{
+	if (rcuref_put(&desc->refcnt))
+		irq_desc_free_rcu(desc);
+}
 #else
 extern void irq_mark_irq(unsigned int irq);
+static __always_inline bool irq_desc_get_ref(struct irq_desc *desc) { return true; }
+static __always_inline void irq_desc_put_ref(struct irq_desc *desc) { }
 #endif
 
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -137,6 +137,7 @@ static void desc_set_defaults(unsigned i
 	desc->tot_count = 0;
 	desc->name = NULL;
 	desc->owner = owner;
+	rcuref_init(&desc->refcnt, 1);
 	desc_smp_init(desc, node, affinity);
 }
 
@@ -465,6 +466,17 @@ static void delayed_free_desc(struct rcu
 	kobject_put(&desc->kobj);
 }
 
+void irq_desc_free_rcu(struct irq_desc *desc)
+{
+	/*
+	 * We free the descriptor, masks and stat fields via RCU. That
+	 * allows demultiplex interrupts to do rcu based management of
+	 * the child interrupts.
+	 * This also allows us to use rcu in kstat_irqs_usr().
+	 */
+	call_rcu(&desc->rcu, delayed_free_desc);
+}
+
 static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -483,14 +495,7 @@ static void free_desc(unsigned int irq)
 	 */
 	irq_sysfs_del(desc);
 	delete_irq_desc(irq);
-
-	/*
-	 * We free the descriptor, masks and stat fields via RCU. That
-	 * allows demultiplex interrupts to do rcu based management of
-	 * the child interrupts.
-	 * This also allows us to use rcu in kstat_irqs_usr().
-	 */
-	call_rcu(&desc->rcu, delayed_free_desc);
+	irq_desc_put_ref(desc);
 }
 
 static int alloc_descs(unsigned int start, unsigned int cnt, int node,
[patch V6 14/16] genirq: Expose irq_find_desc_at_or_after() in core code
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

... in preparation for a smarter iterator for /proc/interrupts.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
V3: Add missing RCU guard and a lockdep assert - Dmitry
---
 kernel/irq/internals.h |    2 ++
 kernel/irq/irqdesc.c   |   15 ++++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -155,6 +155,8 @@ void irq_proc_calc_prec(void);
 static inline void irq_proc_calc_prec(void) { }
 #endif
 
+struct irq_desc *irq_find_desc_at_or_after(unsigned int offset);
+
 extern bool irq_can_set_affinity_usr(unsigned int irq);
 
 extern int irq_do_set_affinity(struct irq_data *data,
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -181,15 +181,12 @@ static int irq_find_free_area(unsigned i
 	return mas.index;
 }
 
-static unsigned int irq_find_at_or_after(unsigned int offset)
+struct irq_desc *irq_find_desc_at_or_after(unsigned int offset)
 {
 	unsigned long index = offset;
-	struct irq_desc *desc;
-
-	guard(rcu)();
-	desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
 
-	return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
+	lockdep_assert_in_rcu_read_lock();
+	return mt_find(&sparse_irqs, &index, total_nr_irqs);
 }
 
 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -934,7 +931,11 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  */
 unsigned int irq_get_next_irq(unsigned int offset)
 {
-	return irq_find_at_or_after(offset);
+	struct irq_desc *desc;
+
+	guard(rcu)();
+	desc = irq_find_desc_at_or_after(offset);
+	return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
 }
 
 struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
[patch V6 15/16] genirq/proc: Runtime size the chip name
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

The chip name column in the /proc/interrupt output is 8 characters and
right aligned, which causes visual clutter due to the fixed length and the
alignment. Many interrupt chips, e.g. PCI/MSI[X] have way longer names.

Update the length when a chip is assigned to an interrupt and utilize this
information for the output. Align it left so all chip names start at the
begin of the column.

Update the GDB script as well and disentangle the header maze so it
actually works with all .config combinations.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
V6: Fix recursive header inclusion - 0day
V3: New patch
---
 kernel/irq/chip.c               |    6 +++--
 kernel/irq/debugfs.h            |   44 ++++++++++++++++++++++++++++++++++++
 kernel/irq/internals.h          |   48 ++--------------------------------------
 kernel/irq/irqdomain.c          |    5 +++-
 kernel/irq/proc.c               |   33 +++++++++++++++++++++++----
 kernel/irq/proc.h               |   13 ++++++++++
 scripts/gdb/linux/interrupts.py |   23 ++++++++++++-------
 7 files changed, 111 insertions(+), 61 deletions(-)
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -47,9 +47,11 @@ int irq_set_chip(unsigned int irq, const
 		scoped_irqdesc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
 		ret = 0;
 	}
-	/* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */
-	if (!ret)
+	if (!ret) {
+		/* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */
 		irq_mark_irq(irq);
+		irq_proc_update_chip(chip);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(irq_set_chip);
--- /dev/null
+++ b/kernel/irq/debugfs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_IRQ_DEBUGFS_H
+#define _KERNEL_IRQ_DEBUGFS_H
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+#include <linux/debugfs.h>
+
+struct irq_bit_descr {
+	unsigned int	mask;
+	char		*name;
+};
+
+#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
+
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+			 const struct irq_bit_descr *sd, int size);
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
+static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
+{
+	debugfs_remove(desc->debugfs_file);
+	kfree(desc->dev_name);
+}
+void irq_debugfs_copy_devname(int irq, struct device *dev);
+# ifdef CONFIG_IRQ_DOMAIN
+void irq_domain_debugfs_init(struct dentry *root);
+# else
+static inline void irq_domain_debugfs_init(struct dentry *root)
+{
+}
+# endif
+#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
+static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
+{
+}
+static inline void irq_remove_debugfs_entry(struct irq_desc *d)
+{
+}
+static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
+{
+}
+#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
+
+#endif
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -12,6 +12,9 @@
 #include <linux/rcuref.h>
 #include <linux/sched/clock.h>
 
+#include "debugfs.h"
+#include "proc.h"
+
 #ifdef CONFIG_SPARSE_IRQ
 # define MAX_SPARSE_IRQS	INT_MAX
 #else
@@ -149,12 +152,6 @@ static inline void unregister_handler_pr
 static inline void irq_proc_update_valid(struct irq_desc *desc) { }
 #endif
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
-void irq_proc_calc_prec(void);
-#else
-static inline void irq_proc_calc_prec(void) { }
-#endif
-
 struct irq_desc *irq_find_desc_at_or_after(unsigned int offset);
 
 extern bool irq_can_set_affinity_usr(unsigned int irq);
@@ -398,42 +395,3 @@ static inline struct irq_data *irqd_get_
 	return NULL;
 #endif
 }
-
-#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-#include <linux/debugfs.h>
-
-struct irq_bit_descr {
-	unsigned int	mask;
-	char		*name;
-};
-
-#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
-
-void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
-			 const struct irq_bit_descr *sd, int size);
-
-void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
-static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
-{
-	debugfs_remove(desc->debugfs_file);
-	kfree(desc->dev_name);
-}
-void irq_debugfs_copy_devname(int irq, struct device *dev);
-# ifdef CONFIG_IRQ_DOMAIN
-void irq_domain_debugfs_init(struct dentry *root);
-# else
-static inline void irq_domain_debugfs_init(struct dentry *root)
-{
-}
-# endif
-#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
-static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
-{
-}
-static inline void irq_remove_debugfs_entry(struct irq_desc *d)
-{
-}
-static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
-{
-}
-#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -20,6 +20,8 @@
 #include <linux/smp.h>
 #include <linux/fs.h>
 
+#include "proc.h"
+
 static LIST_HEAD(irq_domain_list);
 static DEFINE_MUTEX(irq_domain_mutex);
 
@@ -1532,6 +1534,7 @@ int irq_domain_set_hwirq_and_chip(struct
 	irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
 	irq_data->chip_data = chip_data;
 
+	irq_proc_update_chip(chip);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
@@ -2081,7 +2084,7 @@ static void irq_domain_free_one_irq(stru
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-#include "internals.h"
+#include "debugfs.h"
 
 static struct dentry *domain_dir;
 
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -456,10 +456,14 @@ int __weak arch_show_interrupts(struct s
 	return 0;
 }
 
+static DEFINE_RAW_SPINLOCK(irq_proc_constraints_lock);
+
 static struct irq_proc_constraints {
 	unsigned int	num_prec;
+	unsigned int	chip_width;
 } irq_proc_constraints __read_mostly = {
 	.num_prec	= 4,
+	.chip_width	= 8,
 };
 
 #ifndef ACTUAL_NR_IRQS
@@ -472,7 +476,23 @@ void irq_proc_calc_prec(void)
 
 	for (prec = 4, n = 10000; prec < 10 && n <= total_nr_irqs; ++prec)
 		n *= 10;
-	WRITE_ONCE(irq_proc_constraints.num_prec, prec);
+
+	guard(raw_spinlock_irqsave)(&irq_proc_constraints_lock);
+	if (prec > irq_proc_constraints.num_prec)
+		WRITE_ONCE(irq_proc_constraints.num_prec, prec);
+}
+
+void irq_proc_update_chip(const struct irq_chip *chip)
+{
+	unsigned int len = chip && chip->name ? strlen(chip->name) : 0;
+
+	if (!len || len <= READ_ONCE(irq_proc_constraints.chip_width))
+		return;
+
+	/* Can be invoked from interrupt disabled contexts */
+	guard(raw_spinlock_irqsave)(&irq_proc_constraints_lock);
+	if (len > irq_proc_constraints.chip_width)
+		WRITE_ONCE(irq_proc_constraints.chip_width, len);
 }
 
 /* Same as seq_put_decimal_ull_width(p, " ", cnt, 10) */
@@ -514,6 +534,7 @@ void irq_proc_emit_counts(struct seq_fil
 
 int show_interrupts(struct seq_file *p, void *v)
 {
+	unsigned int chip_width = READ_ONCE(irq_proc_constraints.chip_width);
 	unsigned int prec = READ_ONCE(irq_proc_constraints.num_prec);
 	int i = *(loff_t *) v, j;
 	struct irqaction *action;
@@ -549,18 +570,20 @@ int show_interrupts(struct seq_file *p,
 		irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
 	else
 		irq_proc_emit_zero_counts(p, num_online_cpus());
-	seq_putc(p, ' ');
+
+	/* Enforce a visual gap */
+	seq_write(p, "  ", 2);
 
 	guard(raw_spinlock_irq)(&desc->lock);
 	if (desc->irq_data.chip) {
 		if (desc->irq_data.chip->irq_print_chip)
 			desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
 		else if (desc->irq_data.chip->name)
-			seq_printf(p, "%8s", desc->irq_data.chip->name);
+			seq_printf(p, "%-*s", chip_width, desc->irq_data.chip->name);
 		else
-			seq_printf(p, "%8s", "-");
+			seq_printf(p, "%-*s", chip_width, "-");
 	} else {
-		seq_printf(p, "%8s", "None");
+		seq_printf(p, "%-*s", chip_width, "None");
 	}
 
 	seq_putc(p, ' ');
--- /dev/null
+++ b/kernel/irq/proc.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_IRQ_PROC_H
+#define _KERNEL_IRQ_PROC_H
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
+void irq_proc_calc_prec(void);
+void irq_proc_update_chip(const struct irq_chip *chip);
+#else
+static inline void irq_proc_calc_prec(void) { }
+static inline void irq_proc_update_chip(const struct irq_chip *chip) { }
+#endif
+
+#endif
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -20,7 +20,7 @@ irq_desc_type = utils.CachedType("struct
 def irqd_is_level(desc):
     return desc['irq_data']['common']['state_use_accessors'] & constants.LX_IRQD_LEVEL
 
-def show_irq_desc(prec, irq):
+def show_irq_desc(prec, chip_width, irq):
     text = ""
 
     desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq)
@@ -58,7 +58,7 @@ irq_desc_type = utils.CachedType("struct
         else:
             name = "-"
 
-    text += "  %-8s" % (name)
+    text += "  %-*s" % (chip_width, name)
 
     if desc['irq_data']['domain']:
         text += "  %*lu" % (prec, desc['irq_data']['hwirq'])
@@ -171,11 +171,18 @@ irq_desc_type = utils.CachedType("struct
 
     def invoke(self, arg, from_tty):
         nr_irqs = gdb.parse_and_eval("total_nr_irqs")
-        prec = 4
-        j = 10000
-        while prec < 10 and j <= nr_irqs:
-            prec += 1
-            j *= 10
+        constr = utils.gdb_eval_or_none('irq_proc_constraints')
+
+        if constr:
+            prec = int(constr['num_prec'])
+            chip_width = int(constr['chip_width'])
+        else:
+            prec = 4
+            j = 10000
+            while prec < 10 and j <= nr_irqs:
+                prec += 1
+                j *= 10
+            chip_width = 8
 
         gdb.write("%*s" % (prec + 8, ""))
         for cpu in cpus.each_online_cpu():
@@ -186,7 +193,7 @@ irq_desc_type = utils.CachedType("struct
             raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?")
 
         for irq in range(nr_irqs):
-            gdb.write(show_irq_desc(prec, irq))
+            gdb.write(show_irq_desc(prec, chip_width, irq))
         gdb.write(arch_show_interrupts(prec))
[patch V6 16/16] genirq/proc: Speed up /proc/interrupts iteration
Posted by Thomas Gleixner 1 week ago
From: Thomas Gleixner <tglx@kernel.org>

Reading /proc/interrupts iterates over the interrupt number space one by
one and looks up the descriptors one by one. That's just a waste of time.

When CONFIG_GENERIC_IRQ_SHOW is enabled this can utilize the maple tree and
cache the descriptor pointer efficiently for the sequence file operations.

Implement a CONFIG_GENERIC_IRQ_SHOW specific version in the core code and
leave the fs/proc/ variant for the legacy architectures which ignore generic
code.

This reduces the time wasted for looking up the next record significantly.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
V5: Reorder the header print - Dmitry
---
 fs/proc/Makefile  |    4 +
 kernel/irq/proc.c |  118 ++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 95 insertions(+), 27 deletions(-)
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -16,7 +16,9 @@ proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
 proc-y	+= devices.o
-proc-y	+= interrupts.o
+ifneq ($(CONFIG_GENERIC_IRQ_SHOW),y)
+proc-y += interrupts.o
+endif
 proc-y	+= loadavg.o
 proc-y	+= meminfo.o
 proc-y	+= stat.o
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -451,6 +451,8 @@ void irq_proc_update_valid(struct irq_de
 
 #ifdef CONFIG_GENERIC_IRQ_SHOW
 
+#define ARCH_PROC_IRQDESC ((void *)0x00001111)
+
 int __weak arch_show_interrupts(struct seq_file *p, int prec)
 {
 	return 0;
@@ -459,6 +461,7 @@ int __weak arch_show_interrupts(struct s
 static DEFINE_RAW_SPINLOCK(irq_proc_constraints_lock);
 
 static struct irq_proc_constraints {
+	bool		print_header;
 	unsigned int	num_prec;
 	unsigned int	chip_width;
 } irq_proc_constraints __read_mostly = {
@@ -532,34 +535,28 @@ void irq_proc_emit_counts(struct seq_fil
 	irq_proc_emit_zero_counts(p, zeros);
 }
 
-int show_interrupts(struct seq_file *p, void *v)
+static int irq_seq_show(struct seq_file *p, void *v)
 {
-	unsigned int chip_width = READ_ONCE(irq_proc_constraints.chip_width);
-	unsigned int prec = READ_ONCE(irq_proc_constraints.num_prec);
-	int i = *(loff_t *) v, j;
+	struct irq_proc_constraints *constr = p->private;
+	struct irq_desc *desc = v;
 	struct irqaction *action;
-	struct irq_desc *desc;
-
-	if (i > ACTUAL_NR_IRQS)
-		return 0;
 
-	if (i == ACTUAL_NR_IRQS)
-		return arch_show_interrupts(p, prec);
-
-	/* print header and calculate the width of the first column */
-	if (i == 0) {
-		seq_printf(p, "%*s", prec + 8, "");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%-8d", j);
+	/* Print header for the first interrupt? */
+	if (constr->print_header) {
+		unsigned int cpu;
+
+		seq_printf(p, "%*s", constr->num_prec + 8, "");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%-8d", cpu);
 		seq_putc(p, '\n');
+		constr->print_header = false;
 	}
 
-	guard(rcu)();
-	desc = irq_to_desc(i);
-	if (!desc || !irq_settings_proc_valid(desc))
-		return 0;
+	if (desc == ARCH_PROC_IRQDESC)
+		return arch_show_interrupts(p, constr->num_prec);
 
-	seq_printf(p, "%*d:", prec, i);
+	seq_put_decimal_ull_width(p, "", irq_desc_get_irq(desc), constr->num_prec);
+	seq_putc(p, ':');
 
 	/*
 	 * Always output per CPU interrupts. Output device interrupts only when
@@ -579,18 +576,18 @@ int show_interrupts(struct seq_file *p,
 		if (desc->irq_data.chip->irq_print_chip)
 			desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
 		else if (desc->irq_data.chip->name)
-			seq_printf(p, "%-*s", chip_width, desc->irq_data.chip->name);
+			seq_printf(p, "%-*s", constr->chip_width, desc->irq_data.chip->name);
 		else
-			seq_printf(p, "%-*s", chip_width, "-");
+			seq_printf(p, "%-*s", constr->chip_width, "-");
 	} else {
-		seq_printf(p, "%-*s", chip_width, "None");
+		seq_printf(p, "%-*s", constr->chip_width, "None");
 	}
 
 	seq_putc(p, ' ');
 	if (desc->irq_data.domain)
-		seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, prec);
+		seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, constr->num_prec);
 	else
-		seq_printf(p, " %*s", prec, "");
+		seq_printf(p, " %*s", constr->num_prec, "");
 
 	if (IS_ENABLED(CONFIG_GENERIC_IRQ_SHOW_LEVEL))
 		seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
@@ -608,4 +605,73 @@ int show_interrupts(struct seq_file *p,
 	seq_putc(p, '\n');
 	return 0;
 }
+
+static void *irq_seq_next_desc(loff_t *pos)
+{
+	if (*pos > total_nr_irqs)
+		return NULL;
+
+	guard(rcu)();
+	for (;;) {
+		struct irq_desc *desc = irq_find_desc_at_or_after((unsigned int) *pos);
+
+		if (desc) {
+			*pos = irq_desc_get_irq(desc);
+			/*
+			 * If valid for output then try to acquire a reference
+			 * count on the descriptor so that it can't be freed
+			 * after dropping RCU read lock on return.
+			 */
+			if (irq_settings_proc_valid(desc) && irq_desc_get_ref(desc))
+				return desc;
+			(*pos)++;
+		} else {
+			*pos = total_nr_irqs;
+			return ARCH_PROC_IRQDESC;
+		}
+	}
+}
+
+static void *irq_seq_start(struct seq_file *f, loff_t *pos)
+{
+	if (!*pos) {
+		struct irq_proc_constraints *constr = f->private;
+
+		constr->num_prec = READ_ONCE(irq_proc_constraints.num_prec);
+		constr->chip_width = READ_ONCE(irq_proc_constraints.chip_width);
+		constr->print_header = true;
+	}
+	return irq_seq_next_desc(pos);
+}
+
+static void *irq_seq_next(struct seq_file *f, void *v, loff_t *pos)
+{
+	if (v && v != ARCH_PROC_IRQDESC)
+		irq_desc_put_ref(v);
+
+	(*pos)++;
+	return irq_seq_next_desc(pos);
+}
+
+static void irq_seq_stop(struct seq_file *f, void *v)
+{
+	if (v && v != ARCH_PROC_IRQDESC)
+		irq_desc_put_ref(v);
+}
+
+static const struct seq_operations irq_seq_ops = {
+	.start = irq_seq_start,
+	.next  = irq_seq_next,
+	.stop  = irq_seq_stop,
+	.show  = irq_seq_show,
+};
+
+static int __init irq_proc_init(void)
+{
+	proc_create_seq_private("interrupts", 0, NULL, &irq_seq_ops,
+				sizeof(irq_proc_constraints), NULL);
+	return 0;
+}
+fs_initcall(irq_proc_init);
+
 #endif