[patch 05/12] rseq: Add prctl() to enable time slice extensions

Thomas Gleixner posted 12 patches 5 months ago
There is a newer version of this series
[patch 05/12] rseq: Add prctl() to enable time slice extensions
Posted by Thomas Gleixner 5 months ago
Implement a prctl() so that tasks can enable the time slice extension
mechanism. This fails, when time slice extensions are disabled at compile
time or on the kernel command line and when no rseq pointer is registered
in the kernel.

That allows to implement a single trivial check in the exit to user mode
hotpath, to decide whether the whole mechanism needs to be invoked.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 include/linux/rseq.h       |    9 +++++++
 include/uapi/linux/prctl.h |   10 ++++++++
 kernel/rseq.c              |   52 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/sys.c               |    6 +++++
 4 files changed, 77 insertions(+)

--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -190,4 +190,13 @@ void rseq_syscall(struct pt_regs *regs);
 static inline void rseq_syscall(struct pt_regs *regs) { }
 #endif /* !CONFIG_DEBUG_RSEQ */
 
+#ifdef CONFIG_RSEQ_SLICE_EXTENSION
+int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3);
+#else /* CONFIG_RSEQ_SLICE_EXTENSION */
+static inline int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
+{
+	return -EINVAL;
+}
+#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
+
 #endif /* _LINUX_RSEQ_H */
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -376,4 +376,14 @@ struct prctl_mm_map {
 # define PR_FUTEX_HASH_SET_SLOTS	1
 # define PR_FUTEX_HASH_GET_SLOTS	2
 
+/* RSEQ time slice extensions */
+#define PR_RSEQ_SLICE_EXTENSION			79
+# define PR_RSEQ_SLICE_EXTENSION_GET		1
+# define PR_RSEQ_SLICE_EXTENSION_SET		2
+/*
+ * Bits for RSEQ_SLICE_EXTENSION_GET/SET
+ * PR_RSEQ_SLICE_EXT_ENABLE:	Enable
+ */
+# define PR_RSEQ_SLICE_EXT_ENABLE		0x01
+
 #endif /* _LINUX_PRCTL_H */
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -71,6 +71,7 @@
 #define RSEQ_BUILD_SLOW_PATH
 
 #include <linux/debugfs.h>
+#include <linux/prctl.h>
 #include <linux/ratelimit.h>
 #include <linux/rseq_entry.h>
 #include <linux/sched.h>
@@ -490,6 +491,57 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
 #ifdef CONFIG_RSEQ_SLICE_EXTENSION
 DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
 
+int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
+{
+	switch (arg2) {
+	case PR_RSEQ_SLICE_EXTENSION_GET:
+		if (arg3)
+			return -EINVAL;
+		return current->rseq.slice.state.enabled ? PR_RSEQ_SLICE_EXT_ENABLE : 0;
+
+	case PR_RSEQ_SLICE_EXTENSION_SET: {
+		u32 rflags, valid = RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
+		bool enable = !!(arg3 & PR_RSEQ_SLICE_EXT_ENABLE);
+
+		if (arg3 & ~PR_RSEQ_SLICE_EXT_ENABLE)
+			return -EINVAL;
+		if (!rseq_slice_extension_enabled())
+			return -ENOTSUPP;
+		if (!current->rseq.usrptr)
+			return -ENXIO;
+
+		/* No change? */
+		if (enable == !!current->rseq.slice.state.enabled)
+			return 0;
+
+		if (get_user(rflags, &current->rseq.usrptr->flags))
+			goto die;
+
+		if (current->rseq.slice.state.enabled)
+			valid |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
+
+		if ((rflags & valid) != valid)
+			goto die;
+
+		rflags &= ~RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
+		rflags |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
+		if (enable)
+			rflags |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
+
+		if (put_user(rflags, &current->rseq.usrptr->flags))
+			goto die;
+
+		current->rseq.slice.state.enabled = enable;
+		return 0;
+	}
+	default:
+		return -EINVAL;
+	}
+die:
+	force_sig(SIGSEGV);
+	return -EFAULT;
+}
+
 static int __init rseq_slice_cmdline(char *str)
 {
 	bool on;
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -53,6 +53,7 @@
 #include <linux/time_namespace.h>
 #include <linux/binfmts.h>
 #include <linux/futex.h>
+#include <linux/rseq.h>
 
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
@@ -2805,6 +2806,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
 	case PR_FUTEX_HASH:
 		error = futex_hash_prctl(arg2, arg3, arg4);
 		break;
+	case PR_RSEQ_SLICE_EXTENSION:
+		if (arg4 || arg5)
+			return -EINVAL;
+		error = rseq_slice_extension_prctl(arg2, arg3);
+		break;
 	default:
 		trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
 		error = -EINVAL;
Re: [patch 05/12] rseq: Add prctl() to enable time slice extensions
Posted by Mathieu Desnoyers 5 months ago
On 2025-09-08 18:59, Thomas Gleixner wrote:
> Implement a prctl() so that tasks can enable the time slice extension
> mechanism. This fails, when time slice extensions are disabled at compile
> time or on the kernel command line and when no rseq pointer is registered
> in the kernel.
> 
> That allows to implement a single trivial check in the exit to user mode
> hotpath, to decide whether the whole mechanism needs to be invoked.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> ---
>   include/linux/rseq.h       |    9 +++++++
>   include/uapi/linux/prctl.h |   10 ++++++++
>   kernel/rseq.c              |   52 +++++++++++++++++++++++++++++++++++++++++++++
>   kernel/sys.c               |    6 +++++
>   4 files changed, 77 insertions(+)
> 
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -190,4 +190,13 @@ void rseq_syscall(struct pt_regs *regs);
>   static inline void rseq_syscall(struct pt_regs *regs) { }
>   #endif /* !CONFIG_DEBUG_RSEQ */
>   
> +#ifdef CONFIG_RSEQ_SLICE_EXTENSION
> +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3);
> +#else /* CONFIG_RSEQ_SLICE_EXTENSION */
> +static inline int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
> +{
> +	return -EINVAL;
> +}
> +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
> +
>   #endif /* _LINUX_RSEQ_H */
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -376,4 +376,14 @@ struct prctl_mm_map {
>   # define PR_FUTEX_HASH_SET_SLOTS	1
>   # define PR_FUTEX_HASH_GET_SLOTS	2
>   
> +/* RSEQ time slice extensions */
> +#define PR_RSEQ_SLICE_EXTENSION			79
> +# define PR_RSEQ_SLICE_EXTENSION_GET		1
> +# define PR_RSEQ_SLICE_EXTENSION_SET		2
> +/*
> + * Bits for RSEQ_SLICE_EXTENSION_GET/SET
> + * PR_RSEQ_SLICE_EXT_ENABLE:	Enable
> + */
> +# define PR_RSEQ_SLICE_EXT_ENABLE		0x01
> +
>   #endif /* _LINUX_PRCTL_H */
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -71,6 +71,7 @@
>   #define RSEQ_BUILD_SLOW_PATH
>   
>   #include <linux/debugfs.h>
> +#include <linux/prctl.h>
>   #include <linux/ratelimit.h>
>   #include <linux/rseq_entry.h>
>   #include <linux/sched.h>
> @@ -490,6 +491,57 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   #ifdef CONFIG_RSEQ_SLICE_EXTENSION
>   DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
>   
> +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
> +{
> +	switch (arg2) {
> +	case PR_RSEQ_SLICE_EXTENSION_GET:
> +		if (arg3)
> +			return -EINVAL;
> +		return current->rseq.slice.state.enabled ? PR_RSEQ_SLICE_EXT_ENABLE : 0;
> +
> +	case PR_RSEQ_SLICE_EXTENSION_SET: {
> +		u32 rflags, valid = RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
> +		bool enable = !!(arg3 & PR_RSEQ_SLICE_EXT_ENABLE);
> +
> +		if (arg3 & ~PR_RSEQ_SLICE_EXT_ENABLE)
> +			return -EINVAL;
> +		if (!rseq_slice_extension_enabled())
> +			return -ENOTSUPP;
> +		if (!current->rseq.usrptr)
> +			return -ENXIO;
> +
> +		/* No change? */
> +		if (enable == !!current->rseq.slice.state.enabled)
> +			return 0;
> +
> +		if (get_user(rflags, &current->rseq.usrptr->flags))
> +			goto die;
> +
> +		if (current->rseq.slice.state.enabled)
> +			valid |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
> +
> +		if ((rflags & valid) != valid)
> +			goto die;
> +
> +		rflags &= ~RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
> +		rflags |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
> +		if (enable)
> +			rflags |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
> +
> +		if (put_user(rflags, &current->rseq.usrptr->flags))
> +			goto die;
> +
> +		current->rseq.slice.state.enabled = enable;

What should happen to this enabled state if rseq is unregistered
after this prctl ?

Thanks,

Mathieu

> +		return 0;
> +	}
> +	default:
> +		return -EINVAL;
> +	}
> +die:
> +	force_sig(SIGSEGV);
> +	return -EFAULT;
> +}
> +
>   static int __init rseq_slice_cmdline(char *str)
>   {
>   	bool on;
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -53,6 +53,7 @@
>   #include <linux/time_namespace.h>
>   #include <linux/binfmts.h>
>   #include <linux/futex.h>
> +#include <linux/rseq.h>
>   
>   #include <linux/sched.h>
>   #include <linux/sched/autogroup.h>
> @@ -2805,6 +2806,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
>   	case PR_FUTEX_HASH:
>   		error = futex_hash_prctl(arg2, arg3, arg4);
>   		break;
> +	case PR_RSEQ_SLICE_EXTENSION:
> +		if (arg4 || arg5)
> +			return -EINVAL;
> +		error = rseq_slice_extension_prctl(arg2, arg3);
> +		break;
>   	default:
>   		trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
>   		error = -EINVAL;
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Re: [patch 05/12] rseq: Add prctl() to enable time slice extensions
Posted by K Prateek Nayak 5 months ago
Hello Mathieu,

On 9/11/2025 9:20 PM, Mathieu Desnoyers wrote:
>>   +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
>> +{
>> +    switch (arg2) {
>> +    case PR_RSEQ_SLICE_EXTENSION_GET:
>> +        if (arg3)
>> +            return -EINVAL;
>> +        return current->rseq.slice.state.enabled ? PR_RSEQ_SLICE_EXT_ENABLE : 0;
>> +
>> +    case PR_RSEQ_SLICE_EXTENSION_SET: {
>> +        u32 rflags, valid = RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
>> +        bool enable = !!(arg3 & PR_RSEQ_SLICE_EXT_ENABLE);
>> +
>> +        if (arg3 & ~PR_RSEQ_SLICE_EXT_ENABLE)
>> +            return -EINVAL;
>> +        if (!rseq_slice_extension_enabled())
>> +            return -ENOTSUPP;
>> +        if (!current->rseq.usrptr)
>> +            return -ENXIO;
>> +
>> +        /* No change? */
>> +        if (enable == !!current->rseq.slice.state.enabled)
>> +            return 0;
>> +
>> +        if (get_user(rflags, &current->rseq.usrptr->flags))
>> +            goto die;
>> +
>> +        if (current->rseq.slice.state.enabled)
>> +            valid |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>> +
>> +        if ((rflags & valid) != valid)
>> +            goto die;
>> +
>> +        rflags &= ~RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>> +        rflags |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
>> +        if (enable)
>> +            rflags |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>> +
>> +        if (put_user(rflags, &current->rseq.usrptr->flags))
>> +            goto die;
>> +
>> +        current->rseq.slice.state.enabled = enable;
> 
> What should happen to this enabled state if rseq is unregistered
> after this prctl ?

Wouldn't rseq_reset() deal with it since it does a:

    memset(&t->rseq, 0, sizeof(t->rseq));

-- 
Thanks and Regards,
Prateek

Re: [patch 05/12] rseq: Add prctl() to enable time slice extensions
Posted by Mathieu Desnoyers 5 months ago
On 2025-09-11 12:52, K Prateek Nayak wrote:
> Hello Mathieu,
> 
> On 9/11/2025 9:20 PM, Mathieu Desnoyers wrote:
>>>    +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3)
>>> +{
>>> +    switch (arg2) {
>>> +    case PR_RSEQ_SLICE_EXTENSION_GET:
>>> +        if (arg3)
>>> +            return -EINVAL;
>>> +        return current->rseq.slice.state.enabled ? PR_RSEQ_SLICE_EXT_ENABLE : 0;
>>> +
>>> +    case PR_RSEQ_SLICE_EXTENSION_SET: {
>>> +        u32 rflags, valid = RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
>>> +        bool enable = !!(arg3 & PR_RSEQ_SLICE_EXT_ENABLE);
>>> +
>>> +        if (arg3 & ~PR_RSEQ_SLICE_EXT_ENABLE)
>>> +            return -EINVAL;
>>> +        if (!rseq_slice_extension_enabled())
>>> +            return -ENOTSUPP;
>>> +        if (!current->rseq.usrptr)
>>> +            return -ENXIO;
>>> +
>>> +        /* No change? */
>>> +        if (enable == !!current->rseq.slice.state.enabled)
>>> +            return 0;
>>> +
>>> +        if (get_user(rflags, &current->rseq.usrptr->flags))
>>> +            goto die;
>>> +
>>> +        if (current->rseq.slice.state.enabled)
>>> +            valid |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>>> +
>>> +        if ((rflags & valid) != valid)
>>> +            goto die;
>>> +
>>> +        rflags &= ~RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>>> +        rflags |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE;
>>> +        if (enable)
>>> +            rflags |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED;
>>> +
>>> +        if (put_user(rflags, &current->rseq.usrptr->flags))
>>> +            goto die;
>>> +
>>> +        current->rseq.slice.state.enabled = enable;
>>
>> What should happen to this enabled state if rseq is unregistered
>> after this prctl ?
> 
> Wouldn't rseq_reset() deal with it since it does a:
> 
>      memset(&t->rseq, 0, sizeof(t->rseq));
> 

Good point, thanks!

Mathieu


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com