Move changing the slice ext duration to debugfs, a sliglty less permanent
interface.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
Documentation/admin-guide/sysctl/kernel.rst | 11 ----
Documentation/userspace-api/rseq.rst | 4 +
kernel/rseq.c | 69 +++++++++++++++++++---------
3 files changed, 51 insertions(+), 33 deletions(-)
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1248,17 +1248,6 @@ reboot-cmd (SPARC only)
ROM/Flash boot loader. Maybe to tell it what to do after
rebooting. ???
-rseq_slice_extension_nsec
-=========================
-
-A task can request to delay its scheduling if it is in a critical section
-via the prctl(PR_RSEQ_SLICE_EXTENSION_SET) mechanism. This sets the maximum
-allowed extension in nanoseconds before scheduling of the task is enforced.
-Default value is 10000ns (10us). The possible range is 10000ns (10us) to
-50000ns (50us).
-
-This value has a direct correlation to the worst case scheduling latency;
-increment at your own risk.
sched_energy_aware
==================
--- a/Documentation/userspace-api/rseq.rst
+++ b/Documentation/userspace-api/rseq.rst
@@ -79,7 +79,9 @@ slice extension by setting rseq::slice_c
interrupted and the interrupt results in a reschedule request in the
kernel, then the kernel can grant a time slice extension and return to
userspace instead of scheduling out. The length of the extension is
-determined by the ``rseq_slice_extension_nsec`` sysctl.
+determined by debugfs:rseq/slice_ext_nsec. The default value is 10 usec; which
+is the minimum value. It can be incremented to 50 usecs, however doing so
+can/will affect the minimum scheduling latency.
The kernel indicates the grant by clearing rseq::slice_ctrl::request and
setting rseq::slice_ctrl::granted to 1. If there is a reschedule of the
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -222,12 +222,16 @@ static const struct file_operations debu
.release = single_release,
};
+extern void rseq_slice_ext_init(struct dentry *root_dir);
+
static int __init rseq_debugfs_init(void)
{
struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
rseq_stats_init(root_dir);
+ if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSIO))
+ rseq_slice_ext_init(root_dir);
return 0;
}
__initcall(rseq_debugfs_init);
@@ -515,7 +519,9 @@ struct slice_timer {
void *cookie;
};
-unsigned int rseq_slice_ext_nsecs __read_mostly = 10 * NSEC_PER_USEC;
+static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
+static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
+unsigned int rseq_slice_ext_nsecs __read_mostly = rseq_slice_ext_nsecs_min;
static DEFINE_PER_CPU(struct slice_timer, slice_timer);
DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
@@ -761,30 +767,52 @@ SYSCALL_DEFINE0(rseq_slice_yield)
return yielded;
}
-#ifdef CONFIG_SYSCTL
-static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
-static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
+#ifdef CONFIG_DEBUG_FS
+static int rseq_slice_ext_show(struct seq_file *m, void *p)
+{
+ seq_printf(m, "%d\n", rseq_slice_ext_nsecs);
+ return 0;
+}
+
+static ssize_t rseq_slice_ext_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int nsecs;
+
+ if (kstrtouint(ubuf, count, &nsecs))
+ return -EINVAL;
+
+ if (nsecs < rseq_slice_ext_nsecs_min)
+ return -ERANGE;
+
+ if (nsecs > rseq_slice_ext_nsecs_max)
+ return -ERANGE;
+
+ rseq_slice_ext_nsecs = nsecs;
+
+ return count;
+}
-static const struct ctl_table rseq_slice_ext_sysctl[] = {
- {
- .procname = "rseq_slice_extension_nsec",
- .data = &rseq_slice_ext_nsecs,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- .extra1 = (unsigned int *)&rseq_slice_ext_nsecs_min,
- .extra2 = (unsigned int *)&rseq_slice_ext_nsecs_max,
- },
+static int rseq_slice_ext_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_slice_ext_show, inode->i_private);
+}
+
+static const struct file_operations slice_ext_ops = {
+ .open = rseq_slice_ext_open,
+ .read = seq_read,
+ .write = rseq_slice_ext_write,
+ .llseek = seq_lseek,
+ .release = single_release,
};
-static void rseq_slice_sysctl_init(void)
+static void rseq_slice_ext_init(struct dentry *root_dir)
{
- if (rseq_slice_extension_enabled())
- register_sysctl_init("kernel", rseq_slice_ext_sysctl);
+ debugfs_create_file("slice_ext_nsec", 0644, root_dir, NULL, &slice_ext_ops);
}
-#else /* CONFIG_SYSCTL */
-static inline void rseq_slice_sysctl_init(void) { }
-#endif /* !CONFIG_SYSCTL */
+#else
+static void rseq_slice_ext_init(struct dentry *root_dir) { }
+#endif
static int __init rseq_slice_cmdline(char *str)
{
@@ -807,7 +835,6 @@ static int __init rseq_slice_init(void)
hrtimer_setup(per_cpu_ptr(&slice_timer.timer, cpu), rseq_slice_expired,
CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_HARD);
}
- rseq_slice_sysctl_init();
return 0;
}
device_initcall(rseq_slice_init);
On Wed, Jan 21, 2026 at 03:24:59PM +0100, Peter Zijlstra wrote:
> Move changing the slice ext duration to debugfs, a sliglty less permanent
> interface.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
(...)
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -222,12 +222,16 @@ static const struct file_operations debu
> .release = single_release,
> };
>
> +extern void rseq_slice_ext_init(struct dentry *root_dir);
This is actually a static function.
> +
> static int __init rseq_debugfs_init(void)
> {
> struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
>
> debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
> rseq_stats_init(root_dir);
> + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSIO))
Missing 'N' at the end.
> + rseq_slice_ext_init(root_dir);
> return 0;
> }
> __initcall(rseq_debugfs_init);
> @@ -515,7 +519,9 @@ struct slice_timer {
> void *cookie;
> };
(...)
> +#ifdef CONFIG_DEBUG_FS
(...)
> +#else
> +static void rseq_slice_ext_init(struct dentry *root_dir) { }
> +#endif
It might be possible to just remove the CONFIG_DEBUG_FS ifdeffery and let the
compiler optimize away all of the debugfs-related code automatically.
(...)
On Wed, Jan 21, 2026 at 03:50:55PM +0100, Thomas Weißschuh wrote:
> > +#else
> > +static void rseq_slice_ext_init(struct dentry *root_dir) { }
> > +#endif
>
> It might be possible to just remove the CONFIG_DEBUG_FS ifdeffery and let the
> compiler optimize away all of the debugfs-related code automatically.
Something like the below seems to actually build for SLICE_EXT=y,
DEBUG_FS=n (got there through allnoconfig)
Final image doesn't seem to have the various _ops,_show etc symbols
either.
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -123,7 +123,6 @@ void __rseq_trace_ip_fixup(unsigned long
}
#endif /* CONFIG_TRACEPOINTS */
-#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_RSEQ_STATS
DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
@@ -222,7 +221,7 @@ static const struct file_operations debu
.release = single_release,
};
-extern void rseq_slice_ext_init(struct dentry *root_dir);
+static void rseq_slice_ext_init(struct dentry *root_dir);
static int __init rseq_debugfs_init(void)
{
@@ -235,7 +234,6 @@ static int __init rseq_debugfs_init(void
return 0;
}
__initcall(rseq_debugfs_init);
-#endif /* CONFIG_DEBUG_FS */
static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id)
{
@@ -767,7 +765,6 @@ SYSCALL_DEFINE0(rseq_slice_yield)
return yielded;
}
-#ifdef CONFIG_DEBUG_FS
static int rseq_slice_ext_show(struct seq_file *m, void *p)
{
seq_printf(m, "%d\n", rseq_slice_ext_nsecs);
@@ -810,9 +807,6 @@ static void rseq_slice_ext_init(struct d
{
debugfs_create_file("slice_ext_nsec", 0644, root_dir, NULL, &slice_ext_ops);
}
-#else
-static void rseq_slice_ext_init(struct dentry *root_dir) { }
-#endif
static int __init rseq_slice_cmdline(char *str)
{
@@ -838,4 +832,6 @@ static int __init rseq_slice_init(void)
return 0;
}
device_initcall(rseq_slice_init);
+#else
+static void rseq_slice_ext_init(struct dentry *root_dir) { }
#endif /* CONFIG_RSEQ_SLICE_EXTENSION */
On Wed, Jan 21, 2026 at 03:50:55PM +0100, Thomas Weißschuh wrote:
> On Wed, Jan 21, 2026 at 03:24:59PM +0100, Peter Zijlstra wrote:
> > Move changing the slice ext duration to debugfs, a sliglty less permanent
> > interface.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
>
> (...)
>
> > --- a/kernel/rseq.c
> > +++ b/kernel/rseq.c
> > @@ -222,12 +222,16 @@ static const struct file_operations debu
> > .release = single_release,
> > };
> >
> > +extern void rseq_slice_ext_init(struct dentry *root_dir);
>
> This is actually a static function.
Yes, but it is not always defined.
> > +
> > static int __init rseq_debugfs_init(void)
> > {
> > struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
> >
> > debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
> > rseq_stats_init(root_dir);
> > + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSIO))
>
> Missing 'N' at the end.
Argh. Some day my editor will tell me this :/
> > + rseq_slice_ext_init(root_dir);
> > return 0;
> > }
> > __initcall(rseq_debugfs_init);
> > @@ -515,7 +519,9 @@ struct slice_timer {
> > void *cookie;
> > };
>
> (...)
>
> > +#ifdef CONFIG_DEBUG_FS
>
> (...)
>
> > +#else
> > +static void rseq_slice_ext_init(struct dentry *root_dir) { }
> > +#endif
>
> It might be possible to just remove the CONFIG_DEBUG_FS ifdeffery and let the
> compiler optimize away all of the debugfs-related code automatically.
I'll check.
The following commit has been merged into the sched/core branch of tip:
Commit-ID: e1d7f54900f1e1d3003a85b78cd7105a64203ff7
Gitweb: https://git.kernel.org/tip/e1d7f54900f1e1d3003a85b78cd7105a64203ff7
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 21 Jan 2026 14:21:51 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 22 Jan 2026 11:11:20 +01:00
rseq: Move slice_ext_nsec to debugfs
Move changing the slice ext duration to debugfs, a sliglty less permanent
interface.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260121143207.923520192@infradead.org
---
Documentation/admin-guide/sysctl/kernel.rst | 11 +---
Documentation/userspace-api/rseq.rst | 4 +-
kernel/rseq.c | 69 +++++++++++++-------
3 files changed, 49 insertions(+), 35 deletions(-)
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index b09d18e..239da22 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1248,17 +1248,6 @@ reboot-cmd (SPARC only)
ROM/Flash boot loader. Maybe to tell it what to do after
rebooting. ???
-rseq_slice_extension_nsec
-=========================
-
-A task can request to delay its scheduling if it is in a critical section
-via the prctl(PR_RSEQ_SLICE_EXTENSION_SET) mechanism. This sets the maximum
-allowed extension in nanoseconds before scheduling of the task is enforced.
-Default value is 10000ns (10us). The possible range is 10000ns (10us) to
-50000ns (50us).
-
-This value has a direct correlation to the worst case scheduling latency;
-increment at your own risk.
sched_energy_aware
==================
diff --git a/Documentation/userspace-api/rseq.rst b/Documentation/userspace-api/rseq.rst
index e1fdb0d..29af6c3 100644
--- a/Documentation/userspace-api/rseq.rst
+++ b/Documentation/userspace-api/rseq.rst
@@ -79,7 +79,9 @@ slice extension by setting rseq::slice_ctrl::request to 1. If the thread is
interrupted and the interrupt results in a reschedule request in the
kernel, then the kernel can grant a time slice extension and return to
userspace instead of scheduling out. The length of the extension is
-determined by the ``rseq_slice_extension_nsec`` sysctl.
+determined by debugfs:rseq/slice_ext_nsec. The default value is 10 usec; which
+is the minimum value. It can be incremented to 50 usecs, however doing so
+can/will affect the minimum scheduling latency.
The kernel indicates the grant by clearing rseq::slice_ctrl::request and
setting rseq::slice_ctrl::granted to 1. If there is a reschedule of the
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 1c5490a..e423a9b 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -123,7 +123,6 @@ void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
}
#endif /* CONFIG_TRACEPOINTS */
-#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_RSEQ_STATS
DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
@@ -222,16 +221,19 @@ static const struct file_operations debug_ops = {
.release = single_release,
};
+static void rseq_slice_ext_init(struct dentry *root_dir);
+
static int __init rseq_debugfs_init(void)
{
struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
rseq_stats_init(root_dir);
+ if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION))
+ rseq_slice_ext_init(root_dir);
return 0;
}
__initcall(rseq_debugfs_init);
-#endif /* CONFIG_DEBUG_FS */
static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id)
{
@@ -515,7 +517,9 @@ struct slice_timer {
void *cookie;
};
-unsigned int rseq_slice_ext_nsecs __read_mostly = 10 * NSEC_PER_USEC;
+static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
+static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
+unsigned int rseq_slice_ext_nsecs __read_mostly = rseq_slice_ext_nsecs_min;
static DEFINE_PER_CPU(struct slice_timer, slice_timer);
DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
@@ -761,30 +765,48 @@ SYSCALL_DEFINE0(rseq_slice_yield)
return yielded;
}
-#ifdef CONFIG_SYSCTL
-static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
-static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
+static int rseq_slice_ext_show(struct seq_file *m, void *p)
+{
+ seq_printf(m, "%d\n", rseq_slice_ext_nsecs);
+ return 0;
+}
+
+static ssize_t rseq_slice_ext_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int nsecs;
+
+ if (kstrtouint_from_user(ubuf, count, 10, &nsecs))
+ return -EINVAL;
+
+ if (nsecs < rseq_slice_ext_nsecs_min)
+ return -ERANGE;
-static const struct ctl_table rseq_slice_ext_sysctl[] = {
- {
- .procname = "rseq_slice_extension_nsec",
- .data = &rseq_slice_ext_nsecs,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- .extra1 = (unsigned int *)&rseq_slice_ext_nsecs_min,
- .extra2 = (unsigned int *)&rseq_slice_ext_nsecs_max,
- },
+ if (nsecs > rseq_slice_ext_nsecs_max)
+ return -ERANGE;
+
+ rseq_slice_ext_nsecs = nsecs;
+
+ return count;
+}
+
+static int rseq_slice_ext_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_slice_ext_show, inode->i_private);
+}
+
+static const struct file_operations slice_ext_ops = {
+ .open = rseq_slice_ext_open,
+ .read = seq_read,
+ .write = rseq_slice_ext_write,
+ .llseek = seq_lseek,
+ .release = single_release,
};
-static void rseq_slice_sysctl_init(void)
+static void rseq_slice_ext_init(struct dentry *root_dir)
{
- if (rseq_slice_extension_enabled())
- register_sysctl_init("kernel", rseq_slice_ext_sysctl);
+ debugfs_create_file("slice_ext_nsec", 0644, root_dir, NULL, &slice_ext_ops);
}
-#else /* CONFIG_SYSCTL */
-static inline void rseq_slice_sysctl_init(void) { }
-#endif /* !CONFIG_SYSCTL */
static int __init rseq_slice_cmdline(char *str)
{
@@ -807,8 +829,9 @@ static int __init rseq_slice_init(void)
hrtimer_setup(per_cpu_ptr(&slice_timer.timer, cpu), rseq_slice_expired,
CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_HARD);
}
- rseq_slice_sysctl_init();
return 0;
}
device_initcall(rseq_slice_init);
+#else
+static void rseq_slice_ext_init(struct dentry *root_dir) { }
#endif /* CONFIG_RSEQ_SLICE_EXTENSION */
© 2016 - 2026 Red Hat, Inc.