include/linux/rseq.h | 11 ++++++++--- kernel/rseq.c | 10 +++++----- 2 files changed, 13 insertions(+), 8 deletions(-)
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Aug 2025 16:34:43 +0200
rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.
But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.
Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: stable@vger.kernel.org
---
include/linux/rseq.h | 11 ++++++++---
kernel/rseq.c | 10 +++++-----
2 files changed, 13 insertions(+), 8 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
On 2025-08-13 11:02, Thomas Gleixner wrote: > From: Thomas Gleixner <tglx@linutronix.de> > Date: Tue, 12 Aug 2025 16:34:43 +0200 > > rseq_need_restart() reads and clears task::rseq_event_mask with preemption > disabled to guard against the scheduler. > > But membarrier() uses an IPI and sets the PREEMPT bit in the event mask > from the IPI, which leaves that RMW operation unprotected. > > Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that. > > Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: "Paul E. McKenney" <paulmck@kernel.org> > Cc: Boqun Feng <boqun.feng@gmail.com> > Cc: stable@vger.kernel.org > --- > include/linux/rseq.h | 11 ++++++++--- > kernel/rseq.c | 10 +++++----- > 2 files changed, 13 insertions(+), 8 deletions(-) > > --- a/include/linux/rseq.h > +++ b/include/linux/rseq.h > @@ -7,6 +7,12 @@ > #include <linux/preempt.h> > #include <linux/sched.h> > > +#ifdef CONFIG_MEMBARRIER > +# define RSEQ_EVENT_GUARD irq > +#else > +# define RSEQ_EVENT_GUARD preempt > +#endif We should also update this comment in include/linux/sched.h: /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsigned long rseq_event_mask; to e.g.: /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption and membarrier IPIs. */ > + > /* > * Map the event mask on the user-space ABI enum rseq_cs_flags > * for direct mask checks. > @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re > static inline void rseq_signal_deliver(struct ksignal *ksig, > struct pt_regs *regs) > { > - preempt_disable(); > - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) > + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); Then we have more to worry about interaction of the following rseq events with membarrier IPI: - rseq_preempt, rseq_migrate, rseq_signal_deliver. Both rseq_preempt and rseq_migrate are documented as only being required to be called with preempt off, not irq off. I don't see the point in sharing the same rseq_event_mask across all of those rseq event sources. Can we just move the event sources requiring preempt-off to their own word, and use a separate word for membarrier IPI instead ? This would allow us to partition the problem into two distinct states each protected by their respective mechanism. > rseq_handle_notify_resume(ksig, regs); > } > > --- a/kernel/rseq.c > +++ b/kernel/rseq.c > @@ -342,12 +342,12 @@ static int rseq_need_restart(struct task > > /* > * Load and clear event mask atomically with respect to > - * scheduler preemption. > + * scheduler preemption and membarrier IPIs. > */ > - preempt_disable(); > - event_mask = t->rseq_event_mask; > - t->rseq_event_mask = 0; > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) { > + event_mask = t->rseq_event_mask; > + t->rseq_event_mask = 0; > + } Instead we could sample both the preempt-off and the irq-off words here. Thanks, Mathieu > > return !!event_mask; > } -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
On Wed, Aug 20 2025 at 09:23, Mathieu Desnoyers wrote: > On 2025-08-13 11:02, Thomas Gleixner wrote: > We should also update this comment in include/linux/sched.h: > > /* > * RmW on rseq_event_mask must be performed atomically > * with respect to preemption. > */ > unsigned long rseq_event_mask; > > to e.g.: > > /* > * RmW on rseq_event_mask must be performed atomically > * with respect to preemption and membarrier IPIs. > */ True. >> + >> /* >> * Map the event mask on the user-space ABI enum rseq_cs_flags >> * for direct mask checks. >> @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re >> static inline void rseq_signal_deliver(struct ksignal *ksig, >> struct pt_regs *regs) >> { >> - preempt_disable(); >> - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); >> - preempt_enable(); >> + scoped_guard(RSEQ_EVENT_GUARD) >> + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > > Then we have more to worry about interaction of the following > rseq events with membarrier IPI: > > - rseq_preempt, rseq_migrate, rseq_signal_deliver. > > Both rseq_preempt and rseq_migrate are documented as only being required > to be called with preempt off, not irq off. They are always invoked from the middle of the scheduler with interrupts disabled, so just the documentation is wrong. > Can we just move the event sources requiring preempt-off to their own > word, and use a separate word for membarrier IPI instead ? This would > allow us to partition the problem into two distinct states each > protected by their respective mechanism. signal delivery can just use set_bit() which is atomic vs. the IPI no? But as I pointed out in the other series, we don't need that whole zoo of event bits at all. There is absolutely no point. signal delivery does not need to set an event in the first place. It can just unconditionally invoke this stuff, no? Thanks, tglx
On 2025-08-23 08:31, Thomas Gleixner wrote: > On Wed, Aug 20 2025 at 09:23, Mathieu Desnoyers wrote: >> On 2025-08-13 11:02, Thomas Gleixner wrote: >> We should also update this comment in include/linux/sched.h: >> >> /* >> * RmW on rseq_event_mask must be performed atomically >> * with respect to preemption. >> */ >> unsigned long rseq_event_mask; >> >> to e.g.: >> >> /* >> * RmW on rseq_event_mask must be performed atomically >> * with respect to preemption and membarrier IPIs. >> */ > > True. > >>> + >>> /* >>> * Map the event mask on the user-space ABI enum rseq_cs_flags >>> * for direct mask checks. >>> @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re >>> static inline void rseq_signal_deliver(struct ksignal *ksig, >>> struct pt_regs *regs) >>> { >>> - preempt_disable(); >>> - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); >>> - preempt_enable(); >>> + scoped_guard(RSEQ_EVENT_GUARD) >>> + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); >> >> Then we have more to worry about interaction of the following >> rseq events with membarrier IPI: >> >> - rseq_preempt, rseq_migrate, rseq_signal_deliver. >> >> Both rseq_preempt and rseq_migrate are documented as only being required >> to be called with preempt off, not irq off. > > They are always invoked from the middle of the scheduler with interrupts > disabled, so just the documentation is wrong. OK > >> Can we just move the event sources requiring preempt-off to their own >> word, and use a separate word for membarrier IPI instead ? This would >> allow us to partition the problem into two distinct states each >> protected by their respective mechanism. > > signal delivery can just use set_bit() which is atomic vs. the IPI no? Good point! > > But as I pointed out in the other series, we don't need that whole zoo > of event bits at all. There is absolutely no point. > > signal delivery does not need to set an event in the first place. It can > just unconditionally invoke this stuff, no? Yes, as long as it can take a page fault from there. Thanks, Mathieu > > Thanks, > > tglx > > -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
On 2025-08-13 11:02, Thomas Gleixner wrote: > From: Thomas Gleixner <tglx@linutronix.de> > Date: Tue, 12 Aug 2025 16:34:43 +0200 > > rseq_need_restart() reads and clears task::rseq_event_mask with preemption > disabled to guard against the scheduler. > > But membarrier() uses an IPI and sets the PREEMPT bit in the event mask > from the IPI, which leaves that RMW operation unprotected. > > Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Thanks! Mathieu > > Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: "Paul E. McKenney" <paulmck@kernel.org> > Cc: Boqun Feng <boqun.feng@gmail.com> > Cc: stable@vger.kernel.org > --- > include/linux/rseq.h | 11 ++++++++--- > kernel/rseq.c | 10 +++++----- > 2 files changed, 13 insertions(+), 8 deletions(-) > > --- a/include/linux/rseq.h > +++ b/include/linux/rseq.h > @@ -7,6 +7,12 @@ > #include <linux/preempt.h> > #include <linux/sched.h> > > +#ifdef CONFIG_MEMBARRIER > +# define RSEQ_EVENT_GUARD irq > +#else > +# define RSEQ_EVENT_GUARD preempt > +#endif > + > /* > * Map the event mask on the user-space ABI enum rseq_cs_flags > * for direct mask checks. > @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re > static inline void rseq_signal_deliver(struct ksignal *ksig, > struct pt_regs *regs) > { > - preempt_disable(); > - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) > + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > rseq_handle_notify_resume(ksig, regs); > } > > --- a/kernel/rseq.c > +++ b/kernel/rseq.c > @@ -342,12 +342,12 @@ static int rseq_need_restart(struct task > > /* > * Load and clear event mask atomically with respect to > - * scheduler preemption. > + * scheduler preemption and membarrier IPIs. > */ > - preempt_disable(); > - event_mask = t->rseq_event_mask; > - t->rseq_event_mask = 0; > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) { > + event_mask = t->rseq_event_mask; > + t->rseq_event_mask = 0; > + } > > return !!event_mask; > } -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com
On Wed, Aug 13, 2025 at 05:02:30PM +0200, Thomas Gleixner wrote: > From: Thomas Gleixner <tglx@linutronix.de> > Date: Tue, 12 Aug 2025 16:34:43 +0200 > > rseq_need_restart() reads and clears task::rseq_event_mask with preemption > disabled to guard against the scheduler. > > But membarrier() uses an IPI and sets the PREEMPT bit in the event mask > from the IPI, which leaves that RMW operation unprotected. > > Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that. > > Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: "Paul E. McKenney" <paulmck@kernel.org> > Cc: Boqun Feng <boqun.feng@gmail.com> > Cc: stable@vger.kernel.org Seems reasonable to me. Reviewed-by: Boqun Feng <boqun.feng@gmail.com> Regards, Boqun > --- > include/linux/rseq.h | 11 ++++++++--- > kernel/rseq.c | 10 +++++----- > 2 files changed, 13 insertions(+), 8 deletions(-) > > --- a/include/linux/rseq.h > +++ b/include/linux/rseq.h > @@ -7,6 +7,12 @@ > #include <linux/preempt.h> > #include <linux/sched.h> > > +#ifdef CONFIG_MEMBARRIER > +# define RSEQ_EVENT_GUARD irq > +#else > +# define RSEQ_EVENT_GUARD preempt > +#endif > + > /* > * Map the event mask on the user-space ABI enum rseq_cs_flags > * for direct mask checks. > @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_re > static inline void rseq_signal_deliver(struct ksignal *ksig, > struct pt_regs *regs) > { > - preempt_disable(); > - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) > + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); > rseq_handle_notify_resume(ksig, regs); > } > > --- a/kernel/rseq.c > +++ b/kernel/rseq.c > @@ -342,12 +342,12 @@ static int rseq_need_restart(struct task > > /* > * Load and clear event mask atomically with respect to > - * scheduler preemption. > + * scheduler preemption and membarrier IPIs. > */ > - preempt_disable(); > - event_mask = t->rseq_event_mask; > - t->rseq_event_mask = 0; > - preempt_enable(); > + scoped_guard(RSEQ_EVENT_GUARD) { > + event_mask = t->rseq_event_mask; > + t->rseq_event_mask = 0; > + } > > return !!event_mask; > }
The following commit has been merged into the core/rseq branch of tip:
Commit-ID: 6eb350a2233100a283f882c023e5ad426d0ed63b
Gitweb: https://git.kernel.org/tip/6eb350a2233100a283f882c023e5ad426d0ed63b
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 13 Aug 2025 17:02:30 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Sat, 13 Sep 2025 19:51:59 +02:00
rseq: Protect event mask against membarrier IPI
rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.
But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.
Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: stable@vger.kernel.org
---
include/linux/rseq.h | 11 ++++++++---
kernel/rseq.c | 10 +++++-----
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3e..1fbeb61 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b7a1ec3..2452b73 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
© 2016 - 2025 Red Hat, Inc.