Currently, on arm64 platforms, the handle_arch_irq is a pointer which
is set during booting, and every irq processing needs to access it,
so it sits in hot code path. We can use the runtime constant mechanism
which was introduced by Linus to speed up its accessing.
Tested on Quad CA55 platform, the perf sched benchmark is improved
by ~6.5%
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
arch/arm64/kernel/entry-common.c | 4 +++-
arch/arm64/kernel/irq.c | 9 ++++++---
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3625797e9ee8..46a4c012e15f 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -25,6 +25,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/processor.h>
+#include <asm/runtime-const.h>
#include <asm/sdei.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -139,7 +140,8 @@ static void do_interrupt_handler(struct pt_regs *regs,
set_irq_regs(old_regs);
}
-extern void (*handle_arch_irq)(struct pt_regs *);
+extern void (*_handle_arch_irq)(struct pt_regs *);
+#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
extern void (*handle_arch_fiq)(struct pt_regs *);
static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 15dedb385b9e..30629c183606 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -23,6 +23,7 @@
#include <asm/daifflags.h>
#include <asm/exception.h>
#include <asm/numa.h>
+#include <asm/runtime-const.h>
#include <asm/softirq_stack.h>
#include <asm/stacktrace.h>
#include <asm/vmap_stack.h>
@@ -84,15 +85,17 @@ static void default_handle_fiq(struct pt_regs *regs)
panic("FIQ taken without a root FIQ handler\n");
}
-void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
+void (*_handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
+#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
- if (handle_arch_irq != default_handle_irq)
+ if (_handle_arch_irq != default_handle_irq)
return -EBUSY;
- handle_arch_irq = handle_irq;
+ _handle_arch_irq = handle_irq;
+ runtime_const_init(ptr, _handle_arch_irq);
pr_info("Root IRQ handler: %ps\n", handle_irq);
return 0;
}
--
2.51.0
On Fri, Feb 20, 2026 at 05:09:22PM +0800, Jisheng Zhang wrote:
> Currently, on arm64 platforms, the handle_arch_irq is a pointer which
> is set during booting, and every irq processing needs to access it,
> so it sits in hot code path. We can use the runtime constant mechanism
> which was introduced by Linus to speed up its accessing.
>
> Tested on Quad CA55 platform, the perf sched benchmark is improved
> by ~6.5%
That is a surprisingly large impact. :/
Does this meaningfully actually affect any real workload?
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
> arch/arm64/kernel/entry-common.c | 4 +++-
> arch/arm64/kernel/irq.c | 9 ++++++---
> 2 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index 3625797e9ee8..46a4c012e15f 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -25,6 +25,7 @@
> #include <asm/kprobes.h>
> #include <asm/mmu.h>
> #include <asm/processor.h>
> +#include <asm/runtime-const.h>
> #include <asm/sdei.h>
> #include <asm/stacktrace.h>
> #include <asm/sysreg.h>
> @@ -139,7 +140,8 @@ static void do_interrupt_handler(struct pt_regs *regs,
> set_irq_regs(old_regs);
> }
>
> -extern void (*handle_arch_irq)(struct pt_regs *);
> +extern void (*_handle_arch_irq)(struct pt_regs *);
> +#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
> extern void (*handle_arch_fiq)(struct pt_regs *);
We should treat handle_arch_irq and handle_arch_fiq the same way. Either
both get this, or neither do.
> static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index 15dedb385b9e..30629c183606 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -23,6 +23,7 @@
> #include <asm/daifflags.h>
> #include <asm/exception.h>
> #include <asm/numa.h>
> +#include <asm/runtime-const.h>
> #include <asm/softirq_stack.h>
> #include <asm/stacktrace.h>
> #include <asm/vmap_stack.h>
> @@ -84,15 +85,17 @@ static void default_handle_fiq(struct pt_regs *regs)
> panic("FIQ taken without a root FIQ handler\n");
> }
>
> -void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
> +void (*_handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
> +#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
This breaks the default case, since handle_arch_irq is initialized to a
bunch of garbage hex bytes (0x0123456789abcdef).
That means that if set_handle_irq() isn't called, an IRQ will result in
a call to that bogus address rather than default_handle_irq(), which'll
be more difficult to debug.
Mark.
> void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
>
> int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
> {
> - if (handle_arch_irq != default_handle_irq)
> + if (_handle_arch_irq != default_handle_irq)
> return -EBUSY;
>
> - handle_arch_irq = handle_irq;
> + _handle_arch_irq = handle_irq;
> + runtime_const_init(ptr, _handle_arch_irq);
> pr_info("Root IRQ handler: %ps\n", handle_irq);
> return 0;
> }
> --
> 2.51.0
>
>
On Mon, Feb 23, 2026 at 12:56:27PM +0000, Mark Rutland wrote:
> On Fri, Feb 20, 2026 at 05:09:22PM +0800, Jisheng Zhang wrote:
> > Currently, on arm64 platforms, the handle_arch_irq is a pointer which
> > is set during booting, and every irq processing needs to access it,
> > so it sits in hot code path. We can use the runtime constant mechanism
> > which was introduced by Linus to speed up its accessing.
> >
> > Tested on Quad CA55 platform, the perf sched benchmark is improved
> > by ~6.5%
>
> That is a surprisingly large impact. :/
>
> Does this meaningfully actually affect any real workload?
all irqs' processing is improved to this extent. The perf sched
bench(an existing and good benchmark to measure IPI) is used to
show how much will be the improvement.
>
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > ---
> > arch/arm64/kernel/entry-common.c | 4 +++-
> > arch/arm64/kernel/irq.c | 9 ++++++---
> > 2 files changed, 9 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> > index 3625797e9ee8..46a4c012e15f 100644
> > --- a/arch/arm64/kernel/entry-common.c
> > +++ b/arch/arm64/kernel/entry-common.c
> > @@ -25,6 +25,7 @@
> > #include <asm/kprobes.h>
> > #include <asm/mmu.h>
> > #include <asm/processor.h>
> > +#include <asm/runtime-const.h>
> > #include <asm/sdei.h>
> > #include <asm/stacktrace.h>
> > #include <asm/sysreg.h>
> > @@ -139,7 +140,8 @@ static void do_interrupt_handler(struct pt_regs *regs,
> > set_irq_regs(old_regs);
> > }
> >
> > -extern void (*handle_arch_irq)(struct pt_regs *);
> > +extern void (*_handle_arch_irq)(struct pt_regs *);
> > +#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
> > extern void (*handle_arch_fiq)(struct pt_regs *);
>
> We should treat handle_arch_irq and handle_arch_fiq the same way. Either
> both get this, or neither do.
>
> > static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
> > diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> > index 15dedb385b9e..30629c183606 100644
> > --- a/arch/arm64/kernel/irq.c
> > +++ b/arch/arm64/kernel/irq.c
> > @@ -23,6 +23,7 @@
> > #include <asm/daifflags.h>
> > #include <asm/exception.h>
> > #include <asm/numa.h>
> > +#include <asm/runtime-const.h>
> > #include <asm/softirq_stack.h>
> > #include <asm/stacktrace.h>
> > #include <asm/vmap_stack.h>
> > @@ -84,15 +85,17 @@ static void default_handle_fiq(struct pt_regs *regs)
> > panic("FIQ taken without a root FIQ handler\n");
> > }
> >
> > -void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
> > +void (*_handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
> > +#define handle_arch_irq runtime_const_ptr(_handle_arch_irq)
>
> This breaks the default case, since handle_arch_irq is initialized to a
> bunch of garbage hex bytes (0x0123456789abcdef).
>
> That means that if set_handle_irq() isn't called, an IRQ will result in
> a call to that bogus address rather than default_handle_irq(), which'll
> be more difficult to debug.
Oops, you're right. If runtime constants is chosen, I will address this
comment. While Thomas suggested static call instead. I found you
concerned with the arm64's static call implementation 5 years ago, I
mentioned this in another email a few seconds ago. Could you plz comment
the thread?
>
> Mark.
>
> > void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
> >
> > int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
> > {
> > - if (handle_arch_irq != default_handle_irq)
> > + if (_handle_arch_irq != default_handle_irq)
> > return -EBUSY;
> >
> > - handle_arch_irq = handle_irq;
> > + _handle_arch_irq = handle_irq;
> > + runtime_const_init(ptr, _handle_arch_irq);
> > pr_info("Root IRQ handler: %ps\n", handle_irq);
> > return 0;
> > }
> > --
> > 2.51.0
> >
> >
Hi Jisheng, On Fri, Feb 20, 2026 at 05:09:22PM +0800, Jisheng Zhang wrote: > Currently, on arm64 platforms, the handle_arch_irq is a pointer which > is set during booting, and every irq processing needs to access it, > so it sits in hot code path. We can use the runtime constant mechanism > which was introduced by Linus to speed up its accessing. > > Tested on Quad CA55 platform, the perf sched benchmark is improved > by ~6.5% 6.5% is a quite high margin, especially for only one pointer's change. Maybe it is good to share more info for which compiler you are using, how you tested and the detailed results. I played a bit on my juno board on CA73 cores with the command: perf bench sched all Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) and results in seconds. Less is better. +---------------------+--------+--------+--------+--------+ |Without change | run1 | run2 | run3 | avg | +---------------------+--------+--------+--------+--------+ |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | +---------------------+--------+--------+--------+--------+ +---------------------+--------+--------+--------+--------+--------+ |With change | run1 | run2 | run3 | avg | diff | +---------------------+--------+--------+--------+--------+--------+ |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | |seccomp-notify (sec) | 46.848 | 46.938 | 46.973 | 46.920 | +2.98% | +---------------------+--------+--------+--------+--------+--------+ With this patch, the messaging test shows a minor improvement (0.52%). The pipe test performs worse (-4.15%) after applying the patch. However, one positive signal is that the minimum latency is 23.159, which is lower than without the change (24.017). For seccomp, the results indicate a benefit (2.98%) from the change. Hope this is helpful for maintainers to judge the change. I'd leave maintainers to review the code. Thanks, Leo
On Fri, Feb 20, 2026 at 12:34:14PM +0000, Leo Yan wrote: > Hi Jisheng, Hi Leo, > > On Fri, Feb 20, 2026 at 05:09:22PM +0800, Jisheng Zhang wrote: > > Currently, on arm64 platforms, the handle_arch_irq is a pointer which > > is set during booting, and every irq processing needs to access it, > > so it sits in hot code path. We can use the runtime constant mechanism > > which was introduced by Linus to speed up its accessing. > > > > Tested on Quad CA55 platform, the perf sched benchmark is improved > > by ~6.5% > > 6.5% is a quite high margin, especially for only one pointer's change. > Maybe it is good to share more info for which compiler you are using, > how you tested and the detailed results. Sure. aarch64-linux-gnu-gcc version 15.2.0 my kernel defconfig is a minimal arm64 version which disables most drivers, only keep timer, gic, pll/clk, uart, regulator and i2c controller The reason is to avoid OS noise as much as possible. It's also put at the end of the email for reference. testing cmd: perf bench sched pipe testing steps: booting into a buildroot minimal initramfs force cpufreq governor as performance run above cmd testing resuls: before the patch: 9.471988 usecs/op 105574 ops/sec after the patch: 8.896280 usecs/op 112406 ops/sec (112406 - 105574) * 100 / 105574 = ~6.5 % Let me know if you need more details > > I played a bit on my juno board on CA73 cores with the command: IIRC, Juno is powered by a big.little SoC, then we need to ensure the benchmark is always running on big or little cores when testing, to ensure apple to apple comparison. I also have a CA73 platform, but can't access it now. I'll test the patch next week. > > perf bench sched all > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > and results in seconds. Less is better. > > +---------------------+--------+--------+--------+--------+ > |Without change | run1 | run2 | run3 | avg | > +---------------------+--------+--------+--------+--------+ > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > +---------------------+--------+--------+--------+--------+ > > +---------------------+--------+--------+--------+--------+--------+ > |With change | run1 | run2 | run3 | avg | diff | > +---------------------+--------+--------+--------+--------+--------+ > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | If you check the result, this result variance is abnormal, it means your OS is noiser. > |seccomp-notify (sec) | 46.848 | 46.938 | 46.973 | 46.920 | +2.98% | > +---------------------+--------+--------+--------+--------+--------+ > > With this patch, the messaging test shows a minor improvement (0.52%). > > The pipe test performs worse (-4.15%) after applying the patch. However, > one positive signal is that the minimum latency is 23.159, which is > lower than without the change (24.017). The perf bench is sensitive to OS noise, so I disable most devices, only uart, timer, irq chip, i2c and regulator is kept, and stop all daemons only keep a usespace process bash shell. defconfig for reference: # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT=y # CONFIG_CPU_ISOLATION is not set CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set # CONFIG_RD_LZ4 is not set # CONFIG_INITRAMFS_PRESERVE_MTIME is not set CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_BERLIN=y # CONFIG_ARM64_ERRATUM_826319 is not set # CONFIG_ARM64_ERRATUM_827319 is not set # CONFIG_ARM64_ERRATUM_824069 is not set # CONFIG_ARM64_ERRATUM_819472 is not set # CONFIG_ARM64_ERRATUM_832075 is not set # CONFIG_ARM64_ERRATUM_845719 is not set # CONFIG_ARM64_ERRATUM_843419 is not set # CONFIG_ARM64_ERRATUM_1418040 is not set # CONFIG_ARM64_ERRATUM_1165522 is not set # CONFIG_ARM64_ERRATUM_1463225 is not set # CONFIG_CAVIUM_ERRATUM_22375 is not set # CONFIG_CAVIUM_ERRATUM_23154 is not set # CONFIG_CAVIUM_ERRATUM_27456 is not set # CONFIG_CAVIUM_ERRATUM_30115 is not set # CONFIG_CAVIUM_TX2_ERRATUM_219 is not set # CONFIG_FUJITSU_ERRATUM_010001 is not set # CONFIG_HISILICON_ERRATUM_161600802 is not set # CONFIG_QCOM_FALKOR_ERRATUM_1003 is not set # CONFIG_QCOM_FALKOR_ERRATUM_1009 is not set # CONFIG_QCOM_QDF2400_ERRATUM_0065 is not set # CONFIG_QCOM_FALKOR_ERRATUM_E1041 is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_HZ_100=y # CONFIG_UNMAP_KERNEL_AT_EL0 is not set CONFIG_COMPAT=y CONFIG_ARMV8_DEPRECATED=y CONFIG_SWP_EMULATION=y CONFIG_CP15_BARRIER_EMULATION=y CONFIG_SETEND_EMULATION=y # CONFIG_ARM64_HW_AFDBM is not set # CONFIG_ARM64_PTR_AUTH is not set # CONFIG_ARM64_AMU_EXTN is not set # CONFIG_ARM64_SVE is not set # CONFIG_EFI is not set # CONFIG_SUSPEND is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_TEO=y CONFIG_ARM_PSCI_CPUIDLE=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPUFREQ_DT=y CONFIG_JUMP_LABEL=y # CONFIG_VMAP_STACK is not set # CONFIG_BLOCK is not set # CONFIG_COMPAT_BRK is not set # CONFIG_COMPACTION is not set CONFIG_CMA=y # CONFIG_ZONE_DMA32 is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SYNA_DOLPHIN_PLL=y CONFIG_SYNA_CLK_BASE=y CONFIG_SYNA_MYNA2_CLK=y CONFIG_PINCTRL_MYNA2=y CONFIG_USB_DWC3_SYNA=y # CONFIG_SYNAPTICS_PCIE is not set CONFIG_REGULATOR_HL7593=y CONFIG_PHY_SYNA_USB=y # CONFIG_SYNAPTICS_I2C is not set CONFIG_BERLIN_CHIPID=y CONFIG_BERLIN_AXI_METER=y # CONFIG_SYNAPTICS_NET is not set # CONFIG_SYNAPTICS_VIDEO is not set # CONFIG_SYNAPTICS_SOUND is not set # CONFIG_SYNAPTICS_DEVFREQ is not set # CONFIG_SYNAPTICS_DMABUF is not set # CONFIG_SYNAPTICS_IRQCHIP is not set # CONFIG_SYNAPTICS_INPUT is not set # CONFIG_SYNAPTICS_BLUETOOTH is not set # CONFIG_SYNAPTICS_CLOCKSOURCE is not set # CONFIG_SYNAPTICS_DMA is not set # CONFIG_SYNAPTICS_TTY is not set # CONFIG_SYNAPTICS_LEDS is not set # CONFIG_SYNAPTICS_MFD is not set # CONFIG_SYNAPTICS_MISC is not set # CONFIG_SYNAPTICS_MTD is not set # CONFIG_SYNAPTICS_PWM is not set # CONFIG_SYNAPTICS_RTC is not set # CONFIG_SYNAPTICS_SPI is not set # CONFIG_SYNAPTICS_STAGING is not set # CONFIG_SYNAPTICS_WATCHDOG is not set # CONFIG_SYNAPTICS_MAILBOX is not set # CONFIG_SYNAPTICS_RPMSG is not set # CONFIG_SYNAPTICS_CRYPTO is not set CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_LEGACY_TIOCSTI is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 CONFIG_SERIAL_8250_DW=y # CONFIG_HW_RANDOM is not set # CONFIG_DEVPORT is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_DESIGNWARE_CORE=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_FXL6408=y # CONFIG_HWMON is not set CONFIG_MFD_SYSCON=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_TPS6286X=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_SYSFS_STATS=y CONFIG_DMABUF_HEAPS_CMA=y # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set # CONFIG_SURFACE_PLATFORMS is not set # CONFIG_ARM64_PLATFORM_DEVICES is not set # CONFIG_ARM_ARCH_TIMER_EVTSTREAM is not set # CONFIG_FSL_ERRATUM_A008585 is not set # CONFIG_HISILICON_ERRATUM_161010101 is not set # CONFIG_ARM64_ERRATUM_858921 is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_RESET_CONTROLLER=y # CONFIG_DNOTIFY is not set # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_DMA_CMA=y CONFIG_PRINTK_TIME=y # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_DEBUG_FS=y # CONFIG_SLUB_DEBUG is not set CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_STACKTRACE=y # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set # CONFIG_STRICT_DEVMEM is not set # CONFIG_RUNTIME_TESTING_MENU is not set > > For seccomp, the results indicate a benefit (2.98%) from the change. > > Hope this is helpful for maintainers to judge the change. > > I'd leave maintainers to review the code. > > Thanks, > Leo
On Fri, Feb 20, 2026 at 09:16:24PM +0800, Jisheng Zhang wrote: > On Fri, Feb 20, 2026 at 12:34:14PM +0000, Leo Yan wrote: > > Hi Jisheng, > > Hi Leo, > > > > > On Fri, Feb 20, 2026 at 05:09:22PM +0800, Jisheng Zhang wrote: > > > Currently, on arm64 platforms, the handle_arch_irq is a pointer which > > > is set during booting, and every irq processing needs to access it, > > > so it sits in hot code path. We can use the runtime constant mechanism > > > which was introduced by Linus to speed up its accessing. > > > > > > Tested on Quad CA55 platform, the perf sched benchmark is improved > > > by ~6.5% > > > > 6.5% is a quite high margin, especially for only one pointer's change. > > Maybe it is good to share more info for which compiler you are using, > > how you tested and the detailed results. > > Sure. > aarch64-linux-gnu-gcc version 15.2.0 > > my kernel defconfig is a minimal arm64 version which disables most > drivers, only keep timer, gic, pll/clk, uart, regulator and i2c controller > The reason is to avoid OS noise as much as possible. > It's also put at the end of the email for reference. > > testing cmd: > perf bench sched pipe > > testing steps: > booting into a buildroot minimal initramfs > force cpufreq governor as performance > run above cmd > > testing resuls: > before the patch: > 9.471988 usecs/op > 105574 ops/sec > > after the patch: > 8.896280 usecs/op > 112406 ops/sec > > (112406 - 105574) * 100 / 105574 = ~6.5 % > > Let me know if you need more details > > > > > > I played a bit on my juno board on CA73 cores with the command: > > IIRC, Juno is powered by a big.little SoC, then we need to ensure > the benchmark is always running on big or little cores when testing, to > ensure apple to apple comparison. > > I also have a CA73 platform, but can't access it now. I'll test the > patch next week. > > > > perf bench sched all > > > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > > and results in seconds. Less is better. > > > > +---------------------+--------+--------+--------+--------+ > > |Without change | run1 | run2 | run3 | avg | > > +---------------------+--------+--------+--------+--------+ > > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > > +---------------------+--------+--------+--------+--------+ > > > > +---------------------+--------+--------+--------+--------+--------+ > > |With change | run1 | run2 | run3 | avg | diff | > > +---------------------+--------+--------+--------+--------+--------+ > > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | > > If you check the result, this result variance is abnormal, it means > your OS is noiser. BTW: if you remove the abnormal run3 result, you'll find that the benchmark is improved by ~3.5% on CA73: (23.159 + 23.702) / 2 = 23.43 (24.258 + 24.224) / 2 = 24.24 (24.24 - 23.43)*100 / 23.43 = ~3.5 > > > |seccomp-notify (sec) | 46.848 | 46.938 | 46.973 | 46.920 | +2.98% | > > +---------------------+--------+--------+--------+--------+--------+ > > > > With this patch, the messaging test shows a minor improvement (0.52%). > > > > The pipe test performs worse (-4.15%) after applying the patch. However, > > one positive signal is that the minimum latency is 23.159, which is > > lower than without the change (24.017). > > The perf bench is sensitive to OS noise, so I disable most devices, only > uart, timer, irq chip, i2c and regulator is kept, and stop all daemons > only keep a usespace process bash shell. > > defconfig for reference: > > # CONFIG_LOCALVERSION_AUTO is not set > CONFIG_SYSVIPC=y > CONFIG_NO_HZ_IDLE=y > CONFIG_HIGH_RES_TIMERS=y > CONFIG_PREEMPT=y > # CONFIG_CPU_ISOLATION is not set > CONFIG_BLK_DEV_INITRD=y > # CONFIG_RD_GZIP is not set > # CONFIG_RD_BZIP2 is not set > # CONFIG_RD_LZMA is not set > # CONFIG_RD_XZ is not set > # CONFIG_RD_LZO is not set > # CONFIG_RD_LZ4 is not set > # CONFIG_INITRAMFS_PRESERVE_MTIME is not set > CONFIG_EXPERT=y > # CONFIG_UID16 is not set > # CONFIG_SYSFS_SYSCALL is not set > CONFIG_PROFILING=y > CONFIG_ARCH_BERLIN=y > # CONFIG_ARM64_ERRATUM_826319 is not set > # CONFIG_ARM64_ERRATUM_827319 is not set > # CONFIG_ARM64_ERRATUM_824069 is not set > # CONFIG_ARM64_ERRATUM_819472 is not set > # CONFIG_ARM64_ERRATUM_832075 is not set > # CONFIG_ARM64_ERRATUM_845719 is not set > # CONFIG_ARM64_ERRATUM_843419 is not set > # CONFIG_ARM64_ERRATUM_1418040 is not set > # CONFIG_ARM64_ERRATUM_1165522 is not set > # CONFIG_ARM64_ERRATUM_1463225 is not set > # CONFIG_CAVIUM_ERRATUM_22375 is not set > # CONFIG_CAVIUM_ERRATUM_23154 is not set > # CONFIG_CAVIUM_ERRATUM_27456 is not set > # CONFIG_CAVIUM_ERRATUM_30115 is not set > # CONFIG_CAVIUM_TX2_ERRATUM_219 is not set > # CONFIG_FUJITSU_ERRATUM_010001 is not set > # CONFIG_HISILICON_ERRATUM_161600802 is not set > # CONFIG_QCOM_FALKOR_ERRATUM_1003 is not set > # CONFIG_QCOM_FALKOR_ERRATUM_1009 is not set > # CONFIG_QCOM_QDF2400_ERRATUM_0065 is not set > # CONFIG_QCOM_FALKOR_ERRATUM_E1041 is not set > # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set > CONFIG_NR_CPUS=4 > CONFIG_HOTPLUG_CPU=y > CONFIG_HZ_100=y > # CONFIG_UNMAP_KERNEL_AT_EL0 is not set > CONFIG_COMPAT=y > CONFIG_ARMV8_DEPRECATED=y > CONFIG_SWP_EMULATION=y > CONFIG_CP15_BARRIER_EMULATION=y > CONFIG_SETEND_EMULATION=y > # CONFIG_ARM64_HW_AFDBM is not set > # CONFIG_ARM64_PTR_AUTH is not set > # CONFIG_ARM64_AMU_EXTN is not set > # CONFIG_ARM64_SVE is not set > # CONFIG_EFI is not set > # CONFIG_SUSPEND is not set > CONFIG_CPU_IDLE=y > CONFIG_CPU_IDLE_GOV_TEO=y > CONFIG_ARM_PSCI_CPUIDLE=y > CONFIG_CPU_FREQ=y > CONFIG_CPU_FREQ_GOV_USERSPACE=y > CONFIG_CPUFREQ_DT=y > CONFIG_JUMP_LABEL=y > # CONFIG_VMAP_STACK is not set > # CONFIG_BLOCK is not set > # CONFIG_COMPAT_BRK is not set > # CONFIG_COMPACTION is not set > CONFIG_CMA=y > # CONFIG_ZONE_DMA32 is not set > # CONFIG_VM_EVENT_COUNTERS is not set > CONFIG_SYNA_DOLPHIN_PLL=y > CONFIG_SYNA_CLK_BASE=y > CONFIG_SYNA_MYNA2_CLK=y > CONFIG_PINCTRL_MYNA2=y > CONFIG_USB_DWC3_SYNA=y > # CONFIG_SYNAPTICS_PCIE is not set > CONFIG_REGULATOR_HL7593=y > CONFIG_PHY_SYNA_USB=y > # CONFIG_SYNAPTICS_I2C is not set > CONFIG_BERLIN_CHIPID=y > CONFIG_BERLIN_AXI_METER=y > # CONFIG_SYNAPTICS_NET is not set > # CONFIG_SYNAPTICS_VIDEO is not set > # CONFIG_SYNAPTICS_SOUND is not set > # CONFIG_SYNAPTICS_DEVFREQ is not set > # CONFIG_SYNAPTICS_DMABUF is not set > # CONFIG_SYNAPTICS_IRQCHIP is not set > # CONFIG_SYNAPTICS_INPUT is not set > # CONFIG_SYNAPTICS_BLUETOOTH is not set > # CONFIG_SYNAPTICS_CLOCKSOURCE is not set > # CONFIG_SYNAPTICS_DMA is not set > # CONFIG_SYNAPTICS_TTY is not set > # CONFIG_SYNAPTICS_LEDS is not set > # CONFIG_SYNAPTICS_MFD is not set > # CONFIG_SYNAPTICS_MISC is not set > # CONFIG_SYNAPTICS_MTD is not set > # CONFIG_SYNAPTICS_PWM is not set > # CONFIG_SYNAPTICS_RTC is not set > # CONFIG_SYNAPTICS_SPI is not set > # CONFIG_SYNAPTICS_STAGING is not set > # CONFIG_SYNAPTICS_WATCHDOG is not set > # CONFIG_SYNAPTICS_MAILBOX is not set > # CONFIG_SYNAPTICS_RPMSG is not set > # CONFIG_SYNAPTICS_CRYPTO is not set > CONFIG_UEVENT_HELPER=y > CONFIG_DEVTMPFS=y > CONFIG_DEVTMPFS_MOUNT=y > # CONFIG_ALLOW_DEV_COREDUMP is not set > CONFIG_INPUT_EVDEV=y > # CONFIG_INPUT_KEYBOARD is not set > # CONFIG_INPUT_MOUSE is not set > # CONFIG_SERIO is not set > # CONFIG_VT is not set > # CONFIG_LEGACY_PTYS is not set > # CONFIG_LEGACY_TIOCSTI is not set > CONFIG_SERIAL_8250=y > # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set > CONFIG_SERIAL_8250_CONSOLE=y > CONFIG_SERIAL_8250_NR_UARTS=2 > CONFIG_SERIAL_8250_RUNTIME_UARTS=2 > CONFIG_SERIAL_8250_DW=y > # CONFIG_HW_RANDOM is not set > # CONFIG_DEVPORT is not set > CONFIG_I2C=y > CONFIG_I2C_CHARDEV=y > CONFIG_I2C_DESIGNWARE_CORE=y > CONFIG_GPIO_SYSFS=y > CONFIG_GPIO_DWAPB=y > CONFIG_GPIO_FXL6408=y > # CONFIG_HWMON is not set > CONFIG_MFD_SYSCON=y > CONFIG_REGULATOR=y > CONFIG_REGULATOR_FIXED_VOLTAGE=y > CONFIG_REGULATOR_GPIO=y > CONFIG_REGULATOR_TPS6286X=y > # CONFIG_HID_SUPPORT is not set > # CONFIG_USB_SUPPORT is not set > CONFIG_DMABUF_HEAPS=y > CONFIG_DMABUF_SYSFS_STATS=y > CONFIG_DMABUF_HEAPS_CMA=y > # CONFIG_VIRTIO_MENU is not set > # CONFIG_VHOST_MENU is not set > # CONFIG_SURFACE_PLATFORMS is not set > # CONFIG_ARM64_PLATFORM_DEVICES is not set > # CONFIG_ARM_ARCH_TIMER_EVTSTREAM is not set > # CONFIG_FSL_ERRATUM_A008585 is not set > # CONFIG_HISILICON_ERRATUM_161010101 is not set > # CONFIG_ARM64_ERRATUM_858921 is not set > # CONFIG_IOMMU_SUPPORT is not set > CONFIG_RESET_CONTROLLER=y > # CONFIG_DNOTIFY is not set > # CONFIG_PROC_PAGE_MONITOR is not set > CONFIG_TMPFS=y > CONFIG_CONFIGFS_FS=y > # CONFIG_MISC_FILESYSTEMS is not set > CONFIG_DMA_CMA=y > CONFIG_PRINTK_TIME=y > # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set > CONFIG_DEBUG_FS=y > # CONFIG_SLUB_DEBUG is not set > CONFIG_SOFTLOCKUP_DETECTOR=y > CONFIG_DEBUG_ATOMIC_SLEEP=y > CONFIG_STACKTRACE=y > # CONFIG_RCU_TRACE is not set > # CONFIG_FTRACE is not set > # CONFIG_STRICT_DEVMEM is not set > # CONFIG_RUNTIME_TESTING_MENU is not set > > > > For seccomp, the results indicate a benefit (2.98%) from the change. > > > > Hope this is helpful for maintainers to judge the change. > > > > I'd leave maintainers to review the code. > > > > Thanks, > > Leo >
On Fri, Feb 20, 2026 at 09:34:14PM +0800, Jisheng Zhang wrote: [...] > > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > > > and results in seconds. Less is better. > > > > > > +---------------------+--------+--------+--------+--------+ > > > |Without change | run1 | run2 | run3 | avg | > > > +---------------------+--------+--------+--------+--------+ > > > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > > > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > > > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > > > +---------------------+--------+--------+--------+--------+ > > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > |With change | run1 | run2 | run3 | avg | diff | > > > +---------------------+--------+--------+--------+--------+--------+ > > > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > > > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | > > > > If you check the result, this result variance is abnormal, it means > > your OS is noiser. > > BTW: if you remove the abnormal run3 result, you'll find that the > benchmark is improved by ~3.5% on CA73: > (23.159 + 23.702) / 2 = 23.43 > (24.258 + 24.224) / 2 = 24.24 > (24.24 - 23.43)*100 / 23.43 = ~3.5 TBH, I don't think we should subjectively select data. But I agree a clean test env is important to avoid noise, and I also agree that the current results already show positive signals. Thanks, Leo
On Fri, Feb 20, 2026 at 04:47:38PM +0000, Leo Yan wrote: > On Fri, Feb 20, 2026 at 09:34:14PM +0800, Jisheng Zhang wrote: > > [...] > > > > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > > > > and results in seconds. Less is better. > > > > > > > > +---------------------+--------+--------+--------+--------+ > > > > |Without change | run1 | run2 | run3 | avg | > > > > +---------------------+--------+--------+--------+--------+ > > > > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > > > > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > > > > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > > > > +---------------------+--------+--------+--------+--------+ > > > > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > |With change | run1 | run2 | run3 | avg | diff | > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > > > > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | > > > > > > If you check the result, this result variance is abnormal, it means > > > your OS is noiser. > > > > BTW: if you remove the abnormal run3 result, you'll find that the > > benchmark is improved by ~3.5% on CA73: > > (23.159 + 23.702) / 2 = 23.43 > > (24.258 + 24.224) / 2 = 24.24 > > (24.24 - 23.43)*100 / 23.43 = ~3.5 > > TBH, I don't think we should subjectively select data. But I agree a The precondition of this is testing the benchmark properly. And I just tried perf bench sched in noisy OS, I didn't get the similar abnormal variance as you got, so I think your run3 result was CA53's result. This isn't an apple-to-apple comparison. If possible, could you plz test after forcing CA53 offline or test on non big.little platform. Anyway, I will test CA73 next week too. > clean test env is important to avoid noise, and I also agree that the > current results already show positive signals. > > Thanks, > Leo
On Sat, Feb 21, 2026 at 08:14:17AM +0800, Jisheng Zhang wrote: [...] > On Fri, Feb 20, 2026 at 04:47:38PM +0000, Leo Yan wrote: > > On Fri, Feb 20, 2026 at 09:34:14PM +0800, Jisheng Zhang wrote: > > > > [...] > > > > > > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > > > > > and results in seconds. Less is better. > > > > > > > > > > +---------------------+--------+--------+--------+--------+ > > > > > |Without change | run1 | run2 | run3 | avg | > > > > > +---------------------+--------+--------+--------+--------+ > > > > > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > > > > > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > > > > > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > > > > > +---------------------+--------+--------+--------+--------+ > > > > > > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > > |With change | run1 | run2 | run3 | avg | diff | > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > > > > > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | > > > > > > > > If you check the result, this result variance is abnormal, it means > > > > your OS is noiser. > > > > > > BTW: if you remove the abnormal run3 result, you'll find that the > > > benchmark is improved by ~3.5% on CA73: > > > (23.159 + 23.702) / 2 = 23.43 > > > (24.258 + 24.224) / 2 = 24.24 > > > (24.24 - 23.43)*100 / 23.43 = ~3.5 > > > > TBH, I don't think we should subjectively select data. But I agree a > > The precondition of this is testing the benchmark properly. And I just > tried perf bench sched in noisy OS, I didn't get the similar abnormal > variance as you got, so I think your run3 result was CA53's result. > This isn't an apple-to-apple comparison. Not true. As said, I tested on CA73. I should say explicitly that I have hotplugged off CA53 CPUs and run test only on CA73 CPUs. > If possible, could you plz test after forcing CA53 offline or test on > non big.little platform. Anyway, I will test CA73 next week too. > > > clean test env is important to avoid noise, and I also agree that the > > current results already show positive signals. > > > > Thanks, > > Leo
On Mon, Feb 23, 2026 at 09:15:47AM +0000, Leo Yan wrote: > On Sat, Feb 21, 2026 at 08:14:17AM +0800, Jisheng Zhang wrote: > > [...] > > > On Fri, Feb 20, 2026 at 04:47:38PM +0000, Leo Yan wrote: > > > On Fri, Feb 20, 2026 at 09:34:14PM +0800, Jisheng Zhang wrote: > > > > > > [...] > > > > > > > > > Run 3 iterations, and measures three metrics (messaging/pipe/seccomp) > > > > > > and results in seconds. Less is better. > > > > > > > > > > > > +---------------------+--------+--------+--------+--------+ > > > > > > |Without change | run1 | run2 | run3 | avg | > > > > > > +---------------------+--------+--------+--------+--------+ > > > > > > |messaging (sec) | 4.546 | 4.508 | 4.591 | 4.548 | > > > > > > |pipe (sec) | 24.258 | 24.224 | 24.017 | 24.166 | > > > > > > |seccomp-notify (sec) | 48.393 | 48.457 | 48.232 | 48.361 | > > > > > > +---------------------+--------+--------+--------+--------+ > > > > > > > > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > > > |With change | run1 | run2 | run3 | avg | diff | > > > > > > +---------------------+--------+--------+--------+--------+--------+ > > > > > > |messaging (sec) | 4.493 | 4.523 | 4.556 | 4.524 | +0.52% | > > > > > > |pipe (sec) | 23.159 | 23.702 | 28.649 | 25.170 | -4.15% | > > > > > > > > > > If you check the result, this result variance is abnormal, it means > > > > > your OS is noiser. > > > > > > > > BTW: if you remove the abnormal run3 result, you'll find that the > > > > benchmark is improved by ~3.5% on CA73: > > > > (23.159 + 23.702) / 2 = 23.43 > > > > (24.258 + 24.224) / 2 = 24.24 > > > > (24.24 - 23.43)*100 / 23.43 = ~3.5 > > > > > > TBH, I don't think we should subjectively select data. But I agree a > > > > The precondition of this is testing the benchmark properly. And I just > > tried perf bench sched in noisy OS, I didn't get the similar abnormal > > variance as you got, so I think your run3 result was CA53's result. > > This isn't an apple-to-apple comparison. > > Not true. As said, I tested on CA73. I should say explicitly that I > have hotplugged off CA53 CPUs and run test only on CA73 CPUs. I tested on quad CA73 platform, I can reproduce the abnormal variance as you got. This means the series may not alway improve performance as I expected for *all* CPUs. So I'd like to drop it now. > > > If possible, could you plz test after forcing CA53 offline or test on > > non big.little platform. Anyway, I will test CA73 next week too. > > > > > clean test env is important to avoid noise, and I also agree that the > > > current results already show positive signals. > > > > > > Thanks, > > > Leo
© 2016 - 2026 Red Hat, Inc.