arch/arm64/Kconfig | 1 + arch/arm64/include/asm/preempt.h | 18 +++++ arch/openrisc/include/asm/smp.h | 2 + arch/s390/include/asm/preempt.h | 10 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/preempt.h | 61 +++++++++++---- arch/x86/kernel/cpu/common.c | 2 +- include/asm-generic/preempt.h | 14 ++++ include/linux/hardirq.h | 41 ++++++++-- include/linux/interrupt_rc.h | 63 ++++++++++++++++ include/linux/kernel.h | 4 +- include/linux/preempt.h | 35 ++++++--- include/linux/spinlock.h | 51 +++++++++---- include/linux/spinlock_api_smp.h | 27 +++++++ include/linux/spinlock_api_up.h | 9 +++ include/linux/spinlock_rt.h | 15 ++++ kernel/Kconfig.preempt | 4 + kernel/irq/Makefile | 1 + kernel/irq/refcount_interrupt_test.c | 109 +++++++++++++++++++++++++++ kernel/locking/spinlock.c | 29 +++++++ kernel/sched/core.c | 18 +++-- kernel/softirq.c | 11 +++ lib/locking-selftest.c | 2 +- 23 files changed, 476 insertions(+), 52 deletions(-) create mode 100644 include/linux/interrupt_rc.h create mode 100644 kernel/irq/refcount_interrupt_test.c
Hi Peter,
This is a follow-up for Lyude's work [1]. Per your feedback at [2], I
did some digging and turned out that ARM64 already kinda did this. The
basic idea is based on:
1) preempt_count() previously mask our NEED_RESCHED bit, so the
effective bits is 31bits
2) with a 64bit preempt count implementation (as in your PREEMPT_LONG
proposal), the effective bits that record "whether we CAN preempt or
not" still fit in 32bit (i.e. an int)
as a result, I don't think we need to change the existing
preempt_count() API, but rather keep "32bit vs 64bit" as an
implementation detail. This saves us the need to change the printk code
for preempt_count().
For people who have reviewed the previous version, patch 8-11 are new,
please take a look.
The patchset passed the build and booting tests and also a "perf record"
test on x86 for NMI code path.
I would like to target this changes for 7.2 if possible.
[1]: https://lore.kernel.org/all/20260121223933.1568682-1-lyude@redhat.com/
[2]: https://lore.kernel.org/all/20260204111234.GA3031506@noisy.programming.kicks-ass.net/
Regards,
Boqun
Boqun Feng (8):
preempt: Introduce HARDIRQ_DISABLE_BITS
preempt: Introduce __preempt_count_{sub, add}_return()
irq & spin_lock: Add counted interrupt disabling/enabling
locking: Switch to _irq_{disable,enable}() variants in cleanup guards
sched: Remove the unused preempt_offset parameter of __cant_sleep()
sched: Avoid signed comparison of preempt_count() in __cant_migrate()
preempt: Introduce PREEMPT_COUNT_64BIT
arm64: sched/preempt: Enable PREEMPT_COUNT_64BIT
Joel Fernandes (1):
preempt: Track NMI nesting to separate per-CPU counter
Lyude Paul (2):
openrisc: Include <linux/cpumask.h> in smp.h
irq: Add KUnit test for refcounted interrupt enable/disable
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/preempt.h | 18 +++++
arch/openrisc/include/asm/smp.h | 2 +
arch/s390/include/asm/preempt.h | 10 +++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/preempt.h | 61 +++++++++++----
arch/x86/kernel/cpu/common.c | 2 +-
include/asm-generic/preempt.h | 14 ++++
include/linux/hardirq.h | 41 ++++++++--
include/linux/interrupt_rc.h | 63 ++++++++++++++++
include/linux/kernel.h | 4 +-
include/linux/preempt.h | 35 ++++++---
include/linux/spinlock.h | 51 +++++++++----
include/linux/spinlock_api_smp.h | 27 +++++++
include/linux/spinlock_api_up.h | 9 +++
include/linux/spinlock_rt.h | 15 ++++
kernel/Kconfig.preempt | 4 +
kernel/irq/Makefile | 1 +
kernel/irq/refcount_interrupt_test.c | 109 +++++++++++++++++++++++++++
kernel/locking/spinlock.c | 29 +++++++
kernel/sched/core.c | 18 +++--
kernel/softirq.c | 11 +++
lib/locking-selftest.c | 2 +-
23 files changed, 476 insertions(+), 52 deletions(-)
create mode 100644 include/linux/interrupt_rc.h
create mode 100644 kernel/irq/refcount_interrupt_test.c
--
2.50.1 (Apple Git-155)
On Thu, May 07, 2026 at 09:21:00PM -0700, Boqun Feng wrote:
> Hi Peter,
>
> This is a follow-up for Lyude's work [1]. Per your feedback at [2], I
> did some digging and turned out that ARM64 already kinda did this. The
> basic idea is based on:
>
> 1) preempt_count() previously mask our NEED_RESCHED bit, so the
> effective bits is 31bits
> 2) with a 64bit preempt count implementation (as in your PREEMPT_LONG
> proposal), the effective bits that record "whether we CAN preempt or
> not" still fit in 32bit (i.e. an int)
>
> as a result, I don't think we need to change the existing
> preempt_count() API, but rather keep "32bit vs 64bit" as an
> implementation detail. This saves us the need to change the printk code
> for preempt_count().
>
> For people who have reviewed the previous version, patch 8-11 are new,
> please take a look.
>
> The patchset passed the build and booting tests and also a "perf record"
> test on x86 for NMI code path.
>
> I would like to target this changes for 7.2 if possible.
>
> [1]: https://lore.kernel.org/all/20260121223933.1568682-1-lyude@redhat.com/
> [2]: https://lore.kernel.org/all/20260204111234.GA3031506@noisy.programming.kicks-ass.net/
>
> Regards,
> Boqun
>
> Boqun Feng (8):
> preempt: Introduce HARDIRQ_DISABLE_BITS
> preempt: Introduce __preempt_count_{sub, add}_return()
> irq & spin_lock: Add counted interrupt disabling/enabling
> locking: Switch to _irq_{disable,enable}() variants in cleanup guards
> sched: Remove the unused preempt_offset parameter of __cant_sleep()
> sched: Avoid signed comparison of preempt_count() in __cant_migrate()
> preempt: Introduce PREEMPT_COUNT_64BIT
> arm64: sched/preempt: Enable PREEMPT_COUNT_64BIT
The below is the s390 conversion to PREEMPT_COUNT_64BIT (or whatever the
future name might be). I'd appreciate if you would add that to your series.
From 827629e68ad67919f8c825d118863664badd227a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Sat, 9 May 2026 19:23:08 +0200
Subject: [PATCH] s390/preempt: Enable PREEMPT_COUNT_64BIT
Convert s390's preempt_count to 64 bit, and change
the preempt primitives accordingly.
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
arch/s390/Kconfig | 1 +
arch/s390/include/asm/lowcore.h | 13 +++++++----
arch/s390/include/asm/preempt.h | 41 +++++++++++++++------------------
3 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ecbcbb781e40..efa52667b5d4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -276,6 +276,7 @@ config S390
select PCI_MSI if PCI
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select PCI_QUIRKS if PCI
+ select PREEMPT_COUNT_64BIT
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 50ffe75adeb4..0974ab278169 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -160,10 +160,15 @@ struct lowcore {
/* SMP info area */
__u32 cpu_nr; /* 0x03a0 */
__u32 softirq_pending; /* 0x03a4 */
- __s32 preempt_count; /* 0x03a8 */
- __u32 spinlock_lockval; /* 0x03ac */
- __u32 spinlock_index; /* 0x03b0 */
- __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
+ union {
+ struct {
+ __u32 need_resched; /* 0x03a8 */
+ __u32 count; /* 0x03ac */
+ } preempt;
+ __u64 preempt_count; /* 0x03a8 */
+ };
+ __u32 spinlock_lockval; /* 0x03b0 */
+ __u32 spinlock_index; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
__u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 0a25d4648b4c..1d5e4d7e9e1b 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,11 +8,8 @@
#include <asm/cmpxchg.h>
#include <asm/march.h>
-/*
- * Use MSB so it is possible to read preempt_count with LLGT which
- * reads the least significant 31 bits with a single instruction.
- */
-#define PREEMPT_NEED_RESCHED 0x80000000
+/* Use MSB for PREEMPT_NEED_RESCHED mostly because it is available. */
+#define PREEMPT_NEED_RESCHED 0x8000000000000000UL
/*
* We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
@@ -26,25 +23,25 @@
*/
static __always_inline int preempt_count(void)
{
- unsigned long lc_preempt, count;
+ unsigned long lc_preempt;
+ int count;
- BUILD_BUG_ON(sizeof_field(struct lowcore, preempt_count) != sizeof(int));
- lc_preempt = offsetof(struct lowcore, preempt_count);
+ lc_preempt = offsetof(struct lowcore, preempt.count);
/* READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED */
asm_inline(
- ALTERNATIVE("llgt %[count],%[offzero](%%r0)\n",
- "llgt %[count],%[offalt](%%r0)\n",
+ ALTERNATIVE("ly %[count],%[offzero](%%r0)\n",
+ "ly %[count],%[offalt](%%r0)\n",
ALT_FEATURE(MFEATURE_LOWCORE))
: [count] "=d" (count)
: [offzero] "i" (lc_preempt),
[offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS),
- "m" (((struct lowcore *)0)->preempt_count));
+ "m" (((struct lowcore *)0)->preempt.count));
return count;
}
-static __always_inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(unsigned long pc)
{
- int old, new;
+ unsigned long old, new;
old = READ_ONCE(get_lowcore()->preempt_count);
do {
@@ -63,12 +60,12 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
- __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
+ __atomic64_and(~PREEMPT_NEED_RESCHED, (long *)&get_lowcore()->preempt_count);
}
static __always_inline void clear_preempt_need_resched(void)
{
- __atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
+ __atomic64_or(PREEMPT_NEED_RESCHED, (long *)&get_lowcore()->preempt_count);
}
static __always_inline bool test_preempt_need_resched(void)
@@ -88,8 +85,8 @@ static __always_inline void __preempt_count_add(int val)
lc_preempt = offsetof(struct lowcore, preempt_count);
asm_inline(
- ALTERNATIVE("asi %[offzero](%%r0),%[val]\n",
- "asi %[offalt](%%r0),%[val]\n",
+ ALTERNATIVE("agsi %[offzero](%%r0),%[val]\n",
+ "agsi %[offalt](%%r0),%[val]\n",
ALT_FEATURE(MFEATURE_LOWCORE))
: "+m" (((struct lowcore *)0)->preempt_count)
: [offzero] "i" (lc_preempt), [val] "i" (val),
@@ -98,7 +95,7 @@ static __always_inline void __preempt_count_add(int val)
return;
}
}
- __atomic_add(val, &get_lowcore()->preempt_count);
+ __atomic64_add(val, (long *)&get_lowcore()->preempt_count);
}
static __always_inline void __preempt_count_sub(int val)
@@ -119,15 +116,15 @@ static __always_inline bool __preempt_count_dec_and_test(void)
lc_preempt = offsetof(struct lowcore, preempt_count);
asm_inline(
- ALTERNATIVE("alsi %[offzero](%%r0),%[val]\n",
- "alsi %[offalt](%%r0),%[val]\n",
+ ALTERNATIVE("algsi %[offzero](%%r0),%[val]\n",
+ "algsi %[offalt](%%r0),%[val]\n",
ALT_FEATURE(MFEATURE_LOWCORE))
: "=@cc" (cc), "+m" (((struct lowcore *)0)->preempt_count)
: [offzero] "i" (lc_preempt), [val] "i" (-1),
[offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS));
return (cc == 0) || (cc == 2);
#else
- return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
+ return __atomic64_add_const_and_test(-1, (long *)&get_lowcore()->preempt_count);
#endif
}
@@ -141,7 +138,7 @@ static __always_inline bool should_resched(int preempt_offset)
static __always_inline int __preempt_count_add_return(int val)
{
- return val + __atomic_add(val, &get_lowcore()->preempt_count);
+ return val + __atomic64_add(val, (long *)&get_lowcore()->preempt_count);
}
static __always_inline int __preempt_count_sub_return(int val)
--
2.51.0
On Sat, May 09, 2026 at 08:12:49PM +0200, Heiko Carstens wrote:
> On Thu, May 07, 2026 at 09:21:00PM -0700, Boqun Feng wrote:
> > Hi Peter,
> >
> > This is a follow-up for Lyude's work [1]. Per your feedback at [2], I
> > did some digging and turned out that ARM64 already kinda did this. The
> > basic idea is based on:
> >
> > 1) preempt_count() previously mask our NEED_RESCHED bit, so the
> > effective bits is 31bits
> > 2) with a 64bit preempt count implementation (as in your PREEMPT_LONG
> > proposal), the effective bits that record "whether we CAN preempt or
> > not" still fit in 32bit (i.e. an int)
> >
> > as a result, I don't think we need to change the existing
> > preempt_count() API, but rather keep "32bit vs 64bit" as an
> > implementation detail. This saves us the need to change the printk code
> > for preempt_count().
> >
> > For people who have reviewed the previous version, patch 8-11 are new,
> > please take a look.
> >
> > The patchset passed the build and booting tests and also a "perf record"
> > test on x86 for NMI code path.
> >
> > I would like to target this changes for 7.2 if possible.
> >
> > [1]: https://lore.kernel.org/all/20260121223933.1568682-1-lyude@redhat.com/
> > [2]: https://lore.kernel.org/all/20260204111234.GA3031506@noisy.programming.kicks-ass.net/
> >
> > Regards,
> > Boqun
> >
> > Boqun Feng (8):
> > preempt: Introduce HARDIRQ_DISABLE_BITS
> > preempt: Introduce __preempt_count_{sub, add}_return()
> > irq & spin_lock: Add counted interrupt disabling/enabling
> > locking: Switch to _irq_{disable,enable}() variants in cleanup guards
> > sched: Remove the unused preempt_offset parameter of __cant_sleep()
> > sched: Avoid signed comparison of preempt_count() in __cant_migrate()
> > preempt: Introduce PREEMPT_COUNT_64BIT
> > arm64: sched/preempt: Enable PREEMPT_COUNT_64BIT
>
> The below is the s390 conversion to PREEMPT_COUNT_64BIT (or whatever the
> future name might be). I'd appreciate if you would add that to your series.
>
Thanks a lot! Yeah, I will include it in the next version.
Regards,
Boqun
> From 827629e68ad67919f8c825d118863664badd227a Mon Sep 17 00:00:00 2001
> From: Heiko Carstens <hca@linux.ibm.com>
> Date: Sat, 9 May 2026 19:23:08 +0200
> Subject: [PATCH] s390/preempt: Enable PREEMPT_COUNT_64BIT
>
> Convert s390's preempt_count to 64 bit, and change
> the preempt primitives accordingly.
>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
> arch/s390/Kconfig | 1 +
> arch/s390/include/asm/lowcore.h | 13 +++++++----
> arch/s390/include/asm/preempt.h | 41 +++++++++++++++------------------
> 3 files changed, 29 insertions(+), 26 deletions(-)
>
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index ecbcbb781e40..efa52667b5d4 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -276,6 +276,7 @@ config S390
> select PCI_MSI if PCI
> select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
> select PCI_QUIRKS if PCI
> + select PREEMPT_COUNT_64BIT
> select SPARSE_IRQ
> select SWIOTLB
> select SYSCTL_EXCEPTION_TRACE
> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
> index 50ffe75adeb4..0974ab278169 100644
> --- a/arch/s390/include/asm/lowcore.h
> +++ b/arch/s390/include/asm/lowcore.h
> @@ -160,10 +160,15 @@ struct lowcore {
> /* SMP info area */
> __u32 cpu_nr; /* 0x03a0 */
> __u32 softirq_pending; /* 0x03a4 */
> - __s32 preempt_count; /* 0x03a8 */
> - __u32 spinlock_lockval; /* 0x03ac */
> - __u32 spinlock_index; /* 0x03b0 */
> - __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
> + union {
> + struct {
> + __u32 need_resched; /* 0x03a8 */
> + __u32 count; /* 0x03ac */
> + } preempt;
> + __u64 preempt_count; /* 0x03a8 */
> + };
> + __u32 spinlock_lockval; /* 0x03b0 */
> + __u32 spinlock_index; /* 0x03b4 */
> __u64 percpu_offset; /* 0x03b8 */
> __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
>
> diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
> index 0a25d4648b4c..1d5e4d7e9e1b 100644
> --- a/arch/s390/include/asm/preempt.h
> +++ b/arch/s390/include/asm/preempt.h
> @@ -8,11 +8,8 @@
> #include <asm/cmpxchg.h>
> #include <asm/march.h>
>
> -/*
> - * Use MSB so it is possible to read preempt_count with LLGT which
> - * reads the least significant 31 bits with a single instruction.
> - */
> -#define PREEMPT_NEED_RESCHED 0x80000000
> +/* Use MSB for PREEMPT_NEED_RESCHED mostly because it is available. */
> +#define PREEMPT_NEED_RESCHED 0x8000000000000000UL
>
> /*
> * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
> @@ -26,25 +23,25 @@
> */
> static __always_inline int preempt_count(void)
> {
> - unsigned long lc_preempt, count;
> + unsigned long lc_preempt;
> + int count;
>
> - BUILD_BUG_ON(sizeof_field(struct lowcore, preempt_count) != sizeof(int));
> - lc_preempt = offsetof(struct lowcore, preempt_count);
> + lc_preempt = offsetof(struct lowcore, preempt.count);
> /* READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED */
> asm_inline(
> - ALTERNATIVE("llgt %[count],%[offzero](%%r0)\n",
> - "llgt %[count],%[offalt](%%r0)\n",
> + ALTERNATIVE("ly %[count],%[offzero](%%r0)\n",
> + "ly %[count],%[offalt](%%r0)\n",
> ALT_FEATURE(MFEATURE_LOWCORE))
> : [count] "=d" (count)
> : [offzero] "i" (lc_preempt),
> [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS),
> - "m" (((struct lowcore *)0)->preempt_count));
> + "m" (((struct lowcore *)0)->preempt.count));
> return count;
> }
>
> -static __always_inline void preempt_count_set(int pc)
> +static __always_inline void preempt_count_set(unsigned long pc)
> {
> - int old, new;
> + unsigned long old, new;
>
> old = READ_ONCE(get_lowcore()->preempt_count);
> do {
> @@ -63,12 +60,12 @@ static __always_inline void preempt_count_set(int pc)
>
> static __always_inline void set_preempt_need_resched(void)
> {
> - __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
> + __atomic64_and(~PREEMPT_NEED_RESCHED, (long *)&get_lowcore()->preempt_count);
> }
>
> static __always_inline void clear_preempt_need_resched(void)
> {
> - __atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
> + __atomic64_or(PREEMPT_NEED_RESCHED, (long *)&get_lowcore()->preempt_count);
> }
>
> static __always_inline bool test_preempt_need_resched(void)
> @@ -88,8 +85,8 @@ static __always_inline void __preempt_count_add(int val)
>
> lc_preempt = offsetof(struct lowcore, preempt_count);
> asm_inline(
> - ALTERNATIVE("asi %[offzero](%%r0),%[val]\n",
> - "asi %[offalt](%%r0),%[val]\n",
> + ALTERNATIVE("agsi %[offzero](%%r0),%[val]\n",
> + "agsi %[offalt](%%r0),%[val]\n",
> ALT_FEATURE(MFEATURE_LOWCORE))
> : "+m" (((struct lowcore *)0)->preempt_count)
> : [offzero] "i" (lc_preempt), [val] "i" (val),
> @@ -98,7 +95,7 @@ static __always_inline void __preempt_count_add(int val)
> return;
> }
> }
> - __atomic_add(val, &get_lowcore()->preempt_count);
> + __atomic64_add(val, (long *)&get_lowcore()->preempt_count);
> }
>
> static __always_inline void __preempt_count_sub(int val)
> @@ -119,15 +116,15 @@ static __always_inline bool __preempt_count_dec_and_test(void)
>
> lc_preempt = offsetof(struct lowcore, preempt_count);
> asm_inline(
> - ALTERNATIVE("alsi %[offzero](%%r0),%[val]\n",
> - "alsi %[offalt](%%r0),%[val]\n",
> + ALTERNATIVE("algsi %[offzero](%%r0),%[val]\n",
> + "algsi %[offalt](%%r0),%[val]\n",
> ALT_FEATURE(MFEATURE_LOWCORE))
> : "=@cc" (cc), "+m" (((struct lowcore *)0)->preempt_count)
> : [offzero] "i" (lc_preempt), [val] "i" (-1),
> [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS));
> return (cc == 0) || (cc == 2);
> #else
> - return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
> + return __atomic64_add_const_and_test(-1, (long *)&get_lowcore()->preempt_count);
> #endif
> }
>
> @@ -141,7 +138,7 @@ static __always_inline bool should_resched(int preempt_offset)
>
> static __always_inline int __preempt_count_add_return(int val)
> {
> - return val + __atomic_add(val, &get_lowcore()->preempt_count);
> + return val + __atomic64_add(val, (long *)&get_lowcore()->preempt_count);
> }
>
> static __always_inline int __preempt_count_sub_return(int val)
> --
> 2.51.0
>
© 2016 - 2026 Red Hat, Inc.