[patch V2 09/37] rseq: Introduce struct rseq_event

Thomas Gleixner posted 37 patches 1 month, 1 week ago
There is a newer version of this series
[patch V2 09/37] rseq: Introduce struct rseq_event
Posted by Thomas Gleixner 1 month, 1 week ago
In preparation for a major rewrite of this code, provide a data structure
for event management.

Put the sched_switch event and a indicator for RSEQ on a task into it as a
start. That uses a union, which allows to mask and clear the whole lot
efficiently.

The indicators are explicitely not a bit field. Bit fields generate abysmal
code.

The boolean members are defined as u8 as that actually guarantees that it
fits. There seem to be strange architecture ABIs which need more than 8bits
for a boolean.

The has_rseq member is redudandant vs. task::rseq, but it turns out that
boolean operations and quick checks on the union generate better code than
fiddling with seperate entities and data types. 

This struct will be extended over time to carry more information.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rseq.h       |   23 ++++++++++++-----------
 include/linux/rseq_types.h |   30 ++++++++++++++++++++++++++++++
 include/linux/sched.h      |    7 ++-----
 kernel/rseq.c              |    6 ++++--
 4 files changed, 48 insertions(+), 18 deletions(-)

--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
 
 static inline void rseq_handle_notify_resume(struct pt_regs *regs)
 {
-	if (current->rseq)
+	if (current->rseq_event.has_rseq)
 		__rseq_handle_notify_resume(NULL, regs);
 }
 
 static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
 {
-	if (current->rseq) {
-		current->rseq_event_pending = true;
+	if (current->rseq_event.has_rseq) {
+		current->rseq_event.sched_switch = true;
 		__rseq_handle_notify_resume(ksig, regs);
 	}
 }
 
 static inline void rseq_sched_switch_event(struct task_struct *t)
 {
-	if (t->rseq) {
-		t->rseq_event_pending = true;
+	if (t->rseq_event.has_rseq) {
+		t->rseq_event.sched_switch = true;
 		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
 	}
 }
@@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
 static __always_inline void rseq_exit_to_user_mode(void)
 {
 	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
-		if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
-			current->rseq_event_pending = false;
+		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
+				 current->rseq_event.events))
+			current->rseq_event.events = 0;
 	}
 }
 
@@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
  */
 static inline void rseq_virt_userspace_exit(void)
 {
-	if (current->rseq_event_pending)
+	if (current->rseq_event.sched_switch)
 		set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
 }
 
@@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
 		t->rseq = NULL;
 		t->rseq_len = 0;
 		t->rseq_sig = 0;
-		t->rseq_event_pending = false;
+		t->rseq_event.all = 0;
 	} else {
 		t->rseq = current->rseq;
 		t->rseq_len = current->rseq_len;
 		t->rseq_sig = current->rseq_sig;
-		t->rseq_event_pending = current->rseq_event_pending;
+		t->rseq_event = current->rseq_event;
 	}
 }
 
@@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
 	t->rseq = NULL;
 	t->rseq_len = 0;
 	t->rseq_sig = 0;
-	t->rseq_event_pending = false;
+	t->rseq_event.all = 0;
 }
 
 #else /* CONFIG_RSEQ */
--- /dev/null
+++ b/include/linux/rseq_types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_TYPES_H
+#define _LINUX_RSEQ_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * struct rseq_event - Storage for rseq related event management
+ * @all:		Compound to initialize and clear the data efficiently
+ * @events:		Compund to access events with a single load/store
+ * @sched_switch:	True if the task was scheduled out
+ * @has_rseq:		True if the task has a rseq pointer installed
+ */
+struct rseq_event {
+	union {
+		u32				all;
+		struct {
+			union {
+				u16		events;
+				struct {
+					u8	sched_switch;
+				};
+			};
+
+			u8			has_rseq;
+		};
+	};
+};
+
+#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,6 +41,7 @@
 #include <linux/task_io_accounting.h>
 #include <linux/posix-timers_types.h>
 #include <linux/restart_block.h>
+#include <linux/rseq_types.h>
 #include <uapi/linux/rseq.h>
 #include <linux/seqlock_types.h>
 #include <linux/kcsan.h>
@@ -1404,11 +1405,7 @@ struct task_struct {
 	struct rseq __user		*rseq;
 	u32				rseq_len;
 	u32				rseq_sig;
-	/*
-	 * RmW on rseq_event_pending must be performed atomically
-	 * with respect to preemption.
-	 */
-	bool				rseq_event_pending;
+	struct rseq_event		rseq_event;
 # ifdef CONFIG_DEBUG_RSEQ
 	/*
 	 * This is a place holder to save a copy of the rseq fields for
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
 	 * inconsistencies.
 	 */
 	scoped_guard(RSEQ_EVENT_GUARD) {
-		event = t->rseq_event_pending;
-		t->rseq_event_pending = false;
+		event = t->rseq_event.sched_switch;
+		t->rseq_event.sched_switch = false;
 	}
 
 	if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
@@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
 		current->rseq = NULL;
 		current->rseq_sig = 0;
 		current->rseq_len = 0;
+		current->rseq_event.all = 0;
 		return 0;
 	}
 
@@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
 	 * registered, ensure the cpu_id_start and cpu_id fields
 	 * are updated before returning to user-space.
 	 */
+	current->rseq_event.has_rseq = true;
 	rseq_sched_switch_event(current);
 
 	return 0;
Re: [patch V2 09/37] rseq: Introduce struct rseq_event
Posted by Mathieu Desnoyers 1 month, 1 week ago
On 2025-08-23 12:39, Thomas Gleixner wrote:
> In preparation for a major rewrite of this code, provide a data structure
> for event management.
> 
> Put the sched_switch event and a indicator for RSEQ on a task into it as a
> start. That uses a union, which allows to mask and clear the whole lot
> efficiently.
> 
> The indicators are explicitely not a bit field. Bit fields generate abysmal

explicitly

> code.
> 
> The boolean members are defined as u8 as that actually guarantees that it
> fits. There seem to be strange architecture ABIs which need more than 8bits
> for a boolean.
> 
> The has_rseq member is redudandant vs. task::rseq, but it turns out that

redundant

> boolean operations and quick checks on the union generate better code than
> fiddling with seperate entities and data types.

separate

> 
> This struct will be extended over time to carry more information.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   include/linux/rseq.h       |   23 ++++++++++++-----------
>   include/linux/rseq_types.h |   30 ++++++++++++++++++++++++++++++
>   include/linux/sched.h      |    7 ++-----
>   kernel/rseq.c              |    6 ++++--
>   4 files changed, 48 insertions(+), 18 deletions(-)
> 
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
>   
>   static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>   {
> -	if (current->rseq)
> +	if (current->rseq_event.has_rseq)
>   		__rseq_handle_notify_resume(NULL, regs);
>   }
>   
>   static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
>   {
> -	if (current->rseq) {
> -		current->rseq_event_pending = true;
> +	if (current->rseq_event.has_rseq) {
> +		current->rseq_event.sched_switch = true;
>   		__rseq_handle_notify_resume(ksig, regs);
>   	}
>   }
>   
>   static inline void rseq_sched_switch_event(struct task_struct *t)
>   {
> -	if (t->rseq) {
> -		t->rseq_event_pending = true;
> +	if (t->rseq_event.has_rseq) {
> +		t->rseq_event.sched_switch = true;
>   		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
>   	}
>   }
> @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
>   static __always_inline void rseq_exit_to_user_mode(void)
>   {
>   	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
> -			current->rseq_event_pending = false;
> +		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
> +				 current->rseq_event.events))
> +			current->rseq_event.events = 0;
>   	}
>   }
>   
> @@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
>    */
>   static inline void rseq_virt_userspace_exit(void)
>   {
> -	if (current->rseq_event_pending)
> +	if (current->rseq_event.sched_switch)
>   		set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
>   }
>   
> @@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
>   		t->rseq = NULL;
>   		t->rseq_len = 0;
>   		t->rseq_sig = 0;
> -		t->rseq_event_pending = false;
> +		t->rseq_event.all = 0;
>   	} else {
>   		t->rseq = current->rseq;
>   		t->rseq_len = current->rseq_len;
>   		t->rseq_sig = current->rseq_sig;
> -		t->rseq_event_pending = current->rseq_event_pending;
> +		t->rseq_event = current->rseq_event;
>   	}
>   }
>   
> @@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
>   	t->rseq = NULL;
>   	t->rseq_len = 0;
>   	t->rseq_sig = 0;
> -	t->rseq_event_pending = false;
> +	t->rseq_event.all = 0;
>   }
>   
>   #else /* CONFIG_RSEQ */
> --- /dev/null
> +++ b/include/linux/rseq_types.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_TYPES_H
> +#define _LINUX_RSEQ_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * struct rseq_event - Storage for rseq related event management
> + * @all:		Compound to initialize and clear the data efficiently
> + * @events:		Compund to access events with a single load/store

Compound

> + * @sched_switch:	True if the task was scheduled out
> + * @has_rseq:		True if the task has a rseq pointer installed
> + */
> +struct rseq_event {
> +	union {
> +		u32				all;
> +		struct {
> +			union {
> +				u16		events;
> +				struct {
> +					u8	sched_switch;
> +				};

Is alpha still supported, or can we assume bytewise loads/stores ?

Are those events meant to each consume 1 byte (which limits us to 2
events for a 2-byte "events"/4-byte "all"), or is the plan to update
them with bitwise or/~ and ?

Thanks,

Mathieu

> +			};
> +
> +			u8			has_rseq;
> +		};
> +	};
> +};
> +
> +#endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -41,6 +41,7 @@
>   #include <linux/task_io_accounting.h>
>   #include <linux/posix-timers_types.h>
>   #include <linux/restart_block.h>
> +#include <linux/rseq_types.h>
>   #include <uapi/linux/rseq.h>
>   #include <linux/seqlock_types.h>
>   #include <linux/kcsan.h>
> @@ -1404,11 +1405,7 @@ struct task_struct {
>   	struct rseq __user		*rseq;
>   	u32				rseq_len;
>   	u32				rseq_sig;
> -	/*
> -	 * RmW on rseq_event_pending must be performed atomically
> -	 * with respect to preemption.
> -	 */
> -	bool				rseq_event_pending;
> +	struct rseq_event		rseq_event;
>   # ifdef CONFIG_DEBUG_RSEQ
>   	/*
>   	 * This is a place holder to save a copy of the rseq fields for
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
>   	 * inconsistencies.
>   	 */
>   	scoped_guard(RSEQ_EVENT_GUARD) {
> -		event = t->rseq_event_pending;
> -		t->rseq_event_pending = false;
> +		event = t->rseq_event.sched_switch;
> +		t->rseq_event.sched_switch = false;
>   	}
>   
>   	if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   		current->rseq = NULL;
>   		current->rseq_sig = 0;
>   		current->rseq_len = 0;
> +		current->rseq_event.all = 0;
>   		return 0;
>   	}
>   
> @@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   	 * registered, ensure the cpu_id_start and cpu_id fields
>   	 * are updated before returning to user-space.
>   	 */
> +	current->rseq_event.has_rseq = true;
>   	rseq_sched_switch_event(current);
>   
>   	return 0;
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Re: [patch V2 09/37] rseq: Introduce struct rseq_event
Posted by Thomas Gleixner 1 month ago
On Mon, Aug 25 2025 at 14:11, Mathieu Desnoyers wrote:
>> + * @sched_switch:	True if the task was scheduled out
>> + * @has_rseq:		True if the task has a rseq pointer installed
>> + */
>> +struct rseq_event {
>> +	union {
>> +		u32				all;
>> +		struct {
>> +			union {
>> +				u16		events;
>> +				struct {
>> +					u8	sched_switch;
>> +				};
>
> Is alpha still supported, or can we assume bytewise loads/stores ?

Alpha is on life support, but that does not mean we have to cater for it
in new features.

> Are those events meant to each consume 1 byte (which limits us to 2
> events for a 2-byte "events"/4-byte "all"), or is the plan to update
> them with bitwise or/~ and ?

No. Bitwise or/and is creating horrible ASM code and needs serialization
in the worst case.

There is a no need for tons of events. See changes further down the
series.

Thanks,

        tglx