[patch V2 15/37] rseq: Record interrupt from user space

Thomas Gleixner posted 37 patches 1 month, 1 week ago
There is a newer version of this series
[patch V2 15/37] rseq: Record interrupt from user space
Posted by Thomas Gleixner 1 month, 1 week ago
For RSEQ the only relevant reason to inspect and eventually fixup (abort)
user space critical sections is when user space was interrupted and the
task was scheduled out.

If the user to kernel entry was from a syscall no fixup is required. If
user space invokes a syscall from a critical section it can keep the
pieces as documented.

This is only supported on architectures, which utilize the generic entry
code. If your architecture does not use it, bad luck.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq-entry-common.h |    3 ++-
 include/linux/rseq.h             |   16 +++++++++++-----
 include/linux/rseq_entry.h       |   18 ++++++++++++++++++
 include/linux/rseq_types.h       |    2 ++
 4 files changed, 33 insertions(+), 6 deletions(-)

--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -4,7 +4,7 @@
 
 #include <linux/context_tracking.h>
 #include <linux/kmsan.h>
-#include <linux/rseq.h>
+#include <linux/rseq_entry.h>
 #include <linux/static_call_types.h>
 #include <linux/syscalls.h>
 #include <linux/tick.h>
@@ -281,6 +281,7 @@ static __always_inline void exit_to_user
 static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);
+	rseq_note_user_irq_entry();
 }
 
 /**
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -31,11 +31,17 @@ static inline void rseq_sched_switch_eve
 
 static __always_inline void rseq_exit_to_user_mode(void)
 {
-	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
-		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
-				 current->rseq_event.events))
-			current->rseq_event.events = 0;
-	}
+	struct rseq_event *ev = &current->rseq_event;
+
+	if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
+		WARN_ON_ONCE(ev->sched_switch);
+
+	/*
+	 * Ensure that event (especially user_irq) is cleared when the
+	 * interrupt did not result in a schedule and therefore the
+	 * rseq processing did not clear it.
+	 */
+	ev->events = 0;
 }
 
 /*
--- /dev/null
+++ b/include/linux/rseq_entry.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_ENTRY_H
+#define _LINUX_RSEQ_ENTRY_H
+
+#ifdef CONFIG_RSEQ
+#include <linux/rseq.h>
+
+static __always_inline void rseq_note_user_irq_entry(void)
+{
+	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
+		current->rseq_event.user_irq = true;
+}
+
+#else /* CONFIG_RSEQ */
+static inline void rseq_note_user_irq_entry(void) { }
+#endif /* !CONFIG_RSEQ */
+
+#endif /* _LINUX_RSEQ_ENTRY_H */
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -9,6 +9,7 @@
  * @all:		Compound to initialize and clear the data efficiently
  * @events:		Compund to access events with a single load/store
  * @sched_switch:	True if the task was scheduled out
+ * @user_irq:		True on interrupt entry from user mode
  * @has_rseq:		True if the task has a rseq pointer installed
  */
 struct rseq_event {
@@ -19,6 +20,7 @@ struct rseq_event {
 				u16		events;
 				struct {
 					u8	sched_switch;
+					u8	user_irq;
 				};
 			};
Re: [patch V2 15/37] rseq: Record interrupt from user space
Posted by Mathieu Desnoyers 1 month, 1 week ago
On 2025-08-23 12:39, Thomas Gleixner wrote:
> For RSEQ the only relevant reason to inspect and eventually fixup (abort)
> user space critical sections is when user space was interrupted and the
> task was scheduled out.
> 
> If the user to kernel entry was from a syscall no fixup is required. If
> user space invokes a syscall from a critical section it can keep the
> pieces as documented.
> 
> This is only supported on architectures, which utilize the generic entry

no comma between "architectures" and "which".

> code. If your architecture does not use it, bad luck.
> 

Should we eventually add a "depends on GENERIC_IRQ_ENTRY" to RSEQ then ?

> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   include/linux/irq-entry-common.h |    3 ++-
>   include/linux/rseq.h             |   16 +++++++++++-----
>   include/linux/rseq_entry.h       |   18 ++++++++++++++++++
>   include/linux/rseq_types.h       |    2 ++
>   4 files changed, 33 insertions(+), 6 deletions(-)
> 
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -4,7 +4,7 @@
>   
>   #include <linux/context_tracking.h>
>   #include <linux/kmsan.h>
> -#include <linux/rseq.h>
> +#include <linux/rseq_entry.h>
>   #include <linux/static_call_types.h>
>   #include <linux/syscalls.h>
>   #include <linux/tick.h>
> @@ -281,6 +281,7 @@ static __always_inline void exit_to_user
>   static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
>   {
>   	enter_from_user_mode(regs);
> +	rseq_note_user_irq_entry();

As long as this also covers the following scenarios I'm ok with this:

- trap/exception from an rseq critical section,
- NMI over an rseq critical section.

Thanks,

Mathieu

>   }
>   
>   /**
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -31,11 +31,17 @@ static inline void rseq_sched_switch_eve
>   
>   static __always_inline void rseq_exit_to_user_mode(void)
>   {
> -	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
> -				 current->rseq_event.events))
> -			current->rseq_event.events = 0;
> -	}
> +	struct rseq_event *ev = &current->rseq_event;
> +
> +	if (IS_ENABLED(CONFIG_DEBUG_RSEQ))
> +		WARN_ON_ONCE(ev->sched_switch);
> +
> +	/*
> +	 * Ensure that event (especially user_irq) is cleared when the
> +	 * interrupt did not result in a schedule and therefore the
> +	 * rseq processing did not clear it.
> +	 */
> +	ev->events = 0;
>   }
>   
>   /*
> --- /dev/null
> +++ b/include/linux/rseq_entry.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_ENTRY_H
> +#define _LINUX_RSEQ_ENTRY_H
> +
> +#ifdef CONFIG_RSEQ
> +#include <linux/rseq.h>
> +
> +static __always_inline void rseq_note_user_irq_entry(void)
> +{
> +	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
> +		current->rseq_event.user_irq = true;
> +}
> +
> +#else /* CONFIG_RSEQ */
> +static inline void rseq_note_user_irq_entry(void) { }
> +#endif /* !CONFIG_RSEQ */
> +
> +#endif /* _LINUX_RSEQ_ENTRY_H */
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -9,6 +9,7 @@
>    * @all:		Compound to initialize and clear the data efficiently
>    * @events:		Compund to access events with a single load/store
>    * @sched_switch:	True if the task was scheduled out
> + * @user_irq:		True on interrupt entry from user mode
>    * @has_rseq:		True if the task has a rseq pointer installed
>    */
>   struct rseq_event {
> @@ -19,6 +20,7 @@ struct rseq_event {
>   				u16		events;
>   				struct {
>   					u8	sched_switch;
> +					u8	user_irq;
>   				};
>   			};
>   
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Re: [patch V2 15/37] rseq: Record interrupt from user space
Posted by Thomas Gleixner 1 month ago
On Mon, Aug 25 2025 at 14:29, Mathieu Desnoyers wrote:
> On 2025-08-23 12:39, Thomas Gleixner wrote:
>> code. If your architecture does not use it, bad luck.
>> 
>
> Should we eventually add a "depends on GENERIC_IRQ_ENTRY" to RSEQ then ?

I wish we could, but that'd break MIPS, POWER and ARM*...

>> @@ -281,6 +281,7 @@ static __always_inline void exit_to_user
>>   static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
>>   {
>>   	enter_from_user_mode(regs);
>> +	rseq_note_user_irq_entry();
>
> As long as this also covers the following scenarios I'm ok with this:
>
> - trap/exception from an rseq critical section,

It does. Traps and exceptions go through that entry path.

> - NMI over an rseq critical section.

That's irrelevant as NMIs are not going through the regular exit to user
path and therefore can't reschedule. If they trigger something which
requires a reschedule they raise IRQ work, which then goes through the
regular irqentry/exit path. 

Thanks,

        tglx