[patch V2 30/37] rseq: Split up rseq_exit_to_user_mode()

Thomas Gleixner posted 37 patches 1 month, 1 week ago
There is a newer version of this series
[patch V2 30/37] rseq: Split up rseq_exit_to_user_mode()
Posted by Thomas Gleixner 1 month, 1 week ago
Seperate the interrupt and syscall exit handling. Syscall exit does not
require to clear the user_irq bit as it can't be set. On interrupt exit it
can be set when the interrupt did not result in a scheduling event and
therefore the return path did not invoke the TIF work handling, which would
have cleared it.

The debug check for the event state is also not really required even when
debug mode is enabled via the static key. Debug mode is largely aiding user
space by enabling a larger amount of validation checks, which cause a
segfault when a malformed critical section is detected. In production mode
the critical section handling takes the content mostly as is and lets user
space keep the pieces when it screwed up.

On kernel changes in that area the state check is useful, but that can be
done when lockdep is enabled, which is anyway a required test scenario for
fundamental changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq-entry-common.h |    4 ++--
 include/linux/rseq_entry.h       |   21 +++++++++++++++++----
 2 files changed, 19 insertions(+), 6 deletions(-)

--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -247,7 +247,7 @@ static __always_inline void __exit_to_us
 static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
 {
 	__exit_to_user_mode_prepare(regs);
-	rseq_exit_to_user_mode();
+	rseq_syscall_exit_to_user_mode();
 	__exit_to_user_mode_validate();
 }
 
@@ -261,7 +261,7 @@ static __always_inline void syscall_exit
 static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
 {
 	__exit_to_user_mode_prepare(regs);
-	rseq_exit_to_user_mode();
+	rseq_irqentry_exit_to_user_mode();
 	__exit_to_user_mode_validate();
 }
 
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -519,19 +519,31 @@ rseq_exit_to_user_mode_work(struct pt_re
 
 #endif /* !CONFIG_GENERIC_ENTRY */
 
-static __always_inline void rseq_exit_to_user_mode(void)
+static __always_inline void rseq_syscall_exit_to_user_mode(void)
 {
 	struct rseq_event *ev = &current->rseq_event;
 
 	rseq_stat_inc(rseq_stats.exit);
 
-	if (static_branch_unlikely(&rseq_debug_enabled))
+	/* Needed to remove the store for the !lockdep case */
+	if (IS_ENABLED(CONFIG_LOCKDEP)) {
 		WARN_ON_ONCE(ev->sched_switch);
+		ev->events = 0;
+	}
+}
+
+static __always_inline void rseq_irqentry_exit_to_user_mode(void)
+{
+	struct rseq_event *ev = &current->rseq_event;
+
+	rseq_stat_inc(rseq_stats.exit);
+
+	lockdep_assert_once(!ev->sched_switch);
 
 	/*
 	 * Ensure that event (especially user_irq) is cleared when the
 	 * interrupt did not result in a schedule and therefore the
-	 * rseq processing did not clear it.
+	 * rseq processing could not clear it.
 	 */
 	ev->events = 0;
 }
@@ -551,7 +563,8 @@ static inline unsigned long rseq_exit_to
 	return ti_work;
 }
 static inline void rseq_note_user_irq_entry(void) { }
-static inline void rseq_exit_to_user_mode(void) { }
+static inline void rseq_syscall_exit_to_user_mode(void) { }
+static inline void rseq_irqentry_exit_to_user_mode(void) { }
 static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
 #endif /* !CONFIG_RSEQ */
Re: [patch V2 30/37] rseq: Split up rseq_exit_to_user_mode()
Posted by Mathieu Desnoyers 1 month, 1 week ago
On 2025-08-23 12:40, Thomas Gleixner wrote:
> Seperate the interrupt and syscall exit handling. Syscall exit does not

Separate

> require to clear the user_irq bit as it can't be set. On interrupt exit it
> can be set when the interrupt did not result in a scheduling event and
> therefore the return path did not invoke the TIF work handling, which would
> have cleared it.
> 
> The debug check for the event state is also not really required even when
> debug mode is enabled via the static key. Debug mode is largely aiding user
> space by enabling a larger amount of validation checks, which cause a
> segfault when a malformed critical section is detected. In production mode
> the critical section handling takes the content mostly as is and lets user
> space keep the pieces when it screwed up.
> 
> On kernel changes in that area the state check is useful, but that can be
> done when lockdep is enabled, which is anyway a required test scenario for
> fundamental changes.

Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   include/linux/irq-entry-common.h |    4 ++--
>   include/linux/rseq_entry.h       |   21 +++++++++++++++++----
>   2 files changed, 19 insertions(+), 6 deletions(-)
> 
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -247,7 +247,7 @@ static __always_inline void __exit_to_us
>   static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
>   {
>   	__exit_to_user_mode_prepare(regs);
> -	rseq_exit_to_user_mode();
> +	rseq_syscall_exit_to_user_mode();
>   	__exit_to_user_mode_validate();
>   }
>   
> @@ -261,7 +261,7 @@ static __always_inline void syscall_exit
>   static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
>   {
>   	__exit_to_user_mode_prepare(regs);
> -	rseq_exit_to_user_mode();
> +	rseq_irqentry_exit_to_user_mode();
>   	__exit_to_user_mode_validate();
>   }
>   
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -519,19 +519,31 @@ rseq_exit_to_user_mode_work(struct pt_re
>   
>   #endif /* !CONFIG_GENERIC_ENTRY */
>   
> -static __always_inline void rseq_exit_to_user_mode(void)
> +static __always_inline void rseq_syscall_exit_to_user_mode(void)
>   {
>   	struct rseq_event *ev = &current->rseq_event;
>   
>   	rseq_stat_inc(rseq_stats.exit);
>   
> -	if (static_branch_unlikely(&rseq_debug_enabled))
> +	/* Needed to remove the store for the !lockdep case */
> +	if (IS_ENABLED(CONFIG_LOCKDEP)) {
>   		WARN_ON_ONCE(ev->sched_switch);
> +		ev->events = 0;
> +	}
> +}
> +
> +static __always_inline void rseq_irqentry_exit_to_user_mode(void)
> +{
> +	struct rseq_event *ev = &current->rseq_event;
> +
> +	rseq_stat_inc(rseq_stats.exit);
> +
> +	lockdep_assert_once(!ev->sched_switch);
>   
>   	/*
>   	 * Ensure that event (especially user_irq) is cleared when the
>   	 * interrupt did not result in a schedule and therefore the
> -	 * rseq processing did not clear it.
> +	 * rseq processing could not clear it.
>   	 */
>   	ev->events = 0;
>   }
> @@ -551,7 +563,8 @@ static inline unsigned long rseq_exit_to
>   	return ti_work;
>   }
>   static inline void rseq_note_user_irq_entry(void) { }
> -static inline void rseq_exit_to_user_mode(void) { }
> +static inline void rseq_syscall_exit_to_user_mode(void) { }
> +static inline void rseq_irqentry_exit_to_user_mode(void) { }
>   static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
>   #endif /* !CONFIG_RSEQ */
>   
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com