Add a tunable to specify duration of scheduler time slice extension.
The default will be set to 30us and the max value that can be specified
is 100us. Setting it to 0, disables scheduler time slice extension.
Signed-off-by: Prakash Sangappa <prakash.sangappa@oracle.com>
---
include/linux/sched.h | 3 +++
include/uapi/linux/rseq.h | 5 +++--
kernel/rseq.c | 7 +++++--
kernel/sched/core.c | 32 ++++++++++++++++++++++++++++++++
4 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 71e6c8221c1e..c279232ca6a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -407,6 +407,9 @@ static inline void sched_domains_mutex_lock(void) { }
static inline void sched_domains_mutex_unlock(void) { }
#endif
+/* Scheduler time slice extension */
+extern unsigned int sysctl_sched_preempt_delay_us;
+
struct sched_param {
int sched_priority;
};
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index f4813d931387..015534f064af 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -137,8 +137,9 @@ struct rseq {
* this thread.
* - RSEQ_CS_FLAG_DELAY_RESCHED
* Request by user thread to delay preemption. With use
- * of a timer, kernel grants extra cpu time upto 30us for this
- * thread before being rescheduled.
+ * of a timer, kernel grants extra cpu time upto the tunable
+ * 'sched_preempt_delay_us' value for this thread before it gets
+ * rescheduled.
* - RSEQ_CS_FLAG_RESCHEDULED
* Set by kernel if the thread was rescheduled in the extra time
* granted due to request RSEQ_CS_DELAY_RESCHED. This bit is
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 9355654e9b38..44d0f3ae0cd3 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -456,6 +456,8 @@ bool rseq_delay_resched(void)
if (!IS_ENABLED(CONFIG_SCHED_HRTICK))
return false;
+ if (!sysctl_sched_preempt_delay_us)
+ return false;
if (!t->rseq)
return false;
@@ -489,8 +491,9 @@ void rseq_delay_resched_fini(void)
* If your critical section is longer than 30 us you get to keep the
* pieces.
*/
- if (t->sched_time_delay)
- hrtick_local_start(30 * NSEC_PER_USEC);
+ if (sysctl_sched_preempt_delay_us && t->sched_time_delay)
+ hrtick_local_start(sysctl_sched_preempt_delay_us *
+ NSEC_PER_USEC);
#endif
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 86583fb72914..31928cbcd907 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -148,6 +148,15 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
*/
__read_mostly unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
+/*
+ * Scheduler time slice extension, duration in microsecs.
+ * Max value allowed 100us, default is 30us.
+ * If set to 0, scheduler time slice extension is disabled.
+ */
+#define SCHED_PREEMPT_DELAY_DEFAULT_US 30
+__read_mostly unsigned int sysctl_sched_preempt_delay_us =
+ SCHED_PREEMPT_DELAY_DEFAULT_US;
+
__read_mostly int scheduler_running;
#ifdef CONFIG_SCHED_CORE
@@ -4664,6 +4673,20 @@ static int sysctl_schedstats(const struct ctl_table *table, int write, void *buf
#endif /* CONFIG_PROC_SYSCTL */
#endif /* CONFIG_SCHEDSTATS */
+static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err;
+
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (err < 0)
+ return err;
+ if (sysctl_sched_preempt_delay_us > SCHED_PREEMPT_DELAY_DEFAULT_US)
+ pr_warn("Sched preemption delay time set higher then default value %d us\n",
+ SCHED_PREEMPT_DELAY_DEFAULT_US);
+ return err;
+}
+
#ifdef CONFIG_SYSCTL
static const struct ctl_table sched_core_sysctls[] = {
#ifdef CONFIG_SCHEDSTATS
@@ -4711,6 +4734,15 @@ static const struct ctl_table sched_core_sysctls[] = {
.extra2 = SYSCTL_FOUR,
},
#endif /* CONFIG_NUMA_BALANCING */
+ {
+ .procname = "sched_preempt_delay_us",
+ .data = &sysctl_sched_preempt_delay_us,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_preempt_delay,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
};
static int __init sched_core_sysctl_init(void)
{
--
2.43.5
Hi Prakash,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/sched/core]
[also build test WARNING on peterz-queue/sched/core linus/master v6.15-rc6]
[cannot apply to tip/core/entry next-20250513]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Prakash-Sangappa/Sched-Scheduler-time-slice-extension/20250514-054844
base: tip/sched/core
patch link: https://lore.kernel.org/r/20250513214554.4160454-4-prakash.sangappa%40oracle.com
patch subject: [PATCH V4 3/6] Sched: Tunable to specify duration of time slice extension
config: arm-randconfig-003-20250514 (https://download.01.org/0day-ci/archive/20250514/202505142126.0irJYJgE-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250514/202505142126.0irJYJgE-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505142126.0irJYJgE-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/sched/core.c:4682:12: warning: 'sysctl_sched_preempt_delay' defined but not used [-Wunused-function]
static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
^~~~~~~~~~~~~~~~~~~~~~~~~~
vim +/sysctl_sched_preempt_delay +4682 kernel/sched/core.c
4681
> 4682 static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
4683 void *buffer, size_t *lenp, loff_t *ppos)
4684 {
4685 int err;
4686
4687 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
4688 if (err < 0)
4689 return err;
4690 if (sysctl_sched_preempt_delay_us > SCHED_PREEMPT_DELAY_DEFAULT_US)
4691 pr_warn("Sched preemption delay time set higher then default value %d us\n",
4692 SCHED_PREEMPT_DELAY_DEFAULT_US);
4693 return err;
4694 }
4695
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.