Threads marked with TIF_ALLOW_RESCHED are preemptible, but do
not have explicit preemption points.
Handle them as we do preempt_model_full(), by rescheduling in the
irqentry_exit path by calling irqentry_exit_code_resched().
Co-developed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
include/linux/entry-common.h | 13 +++++++++++++
kernel/entry/common.c | 13 ++++++++++++-
kernel/sched/core.c | 32 +++++++++++++++++---------------
3 files changed, 42 insertions(+), 16 deletions(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d95ab85f96ba..3716c223a703 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -415,10 +415,23 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
* Conditional reschedule with additional sanity checks.
*/
void raw_irqentry_exit_cond_resched(void);
+
+/**
+ * irqentry_exit_allow_resched - Conditionally reschedule on return from interrupt
+ * for tasks that are explicitly marked TIF_ALLOW_RESCHED.
+ *
+ * Enabled for both preempt_model_none() and preempt_model_voluntary().
+ */
+void irqentry_exit_allow_resched(void);
+
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
+#ifdef TIF_RESCHED_ALLOW
+#define irqentry_exit_cond_resched_dynamic_disabled irqentry_exit_allow_resched
+#else
#define irqentry_exit_cond_resched_dynamic_disabled NULL
+#endif
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index d7ee4bc3f2ba..b4cee897d6f6 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -389,6 +389,13 @@ void raw_irqentry_exit_cond_resched(void)
preempt_schedule_irq();
}
}
+
+void irqentry_exit_allow_resched(void)
+{
+ if (resched_allowed())
+ raw_irqentry_exit_cond_resched();
+}
+
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
@@ -396,8 +403,10 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
void dynamic_irqentry_exit_cond_resched(void)
{
- if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+ if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) {
+ irqentry_exit_allow_resched();
return;
+ }
raw_irqentry_exit_cond_resched();
}
#endif
@@ -430,6 +439,8 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
instrumentation_begin();
if (IS_ENABLED(CONFIG_PREEMPTION))
irqentry_exit_cond_resched();
+ else
+ irqentry_exit_allow_resched();
/* Covers both tracing and lockdep */
trace_hardirqs_on();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2299a5cfbfb9..3c1b1b0cd575 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6570,6 +6570,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* - explicit schedule() call
* - return from syscall or exception to user-space
* - return from interrupt-handler to user-space
+ * - return from interrupt-handler for a task marked
+ * with allow_resched()
*
* WARNING: must be called with preemption disabled!
*/
@@ -8692,25 +8694,25 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
*
*
* NONE:
- * cond_resched <- __cond_resched
- * might_resched <- RET0
- * preempt_schedule <- NOP
- * preempt_schedule_notrace <- NOP
- * irqentry_exit_cond_resched <- NOP
+ * cond_resched <- __cond_resched
+ * might_resched <- RET0
+ * preempt_schedule <- NOP
+ * preempt_schedule_notrace <- NOP
+ * irqentry_exit_cond_resched <- irqentry_exit_allow_resched
*
* VOLUNTARY:
- * cond_resched <- __cond_resched
- * might_resched <- __cond_resched
- * preempt_schedule <- NOP
- * preempt_schedule_notrace <- NOP
- * irqentry_exit_cond_resched <- NOP
+ * cond_resched <- __cond_resched
+ * might_resched <- __cond_resched
+ * preempt_schedule <- NOP
+ * preempt_schedule_notrace <- NOP
+ * irqentry_exit_cond_resched <- irqentry_exit_allow_resched
*
* FULL:
- * cond_resched <- RET0
- * might_resched <- RET0
- * preempt_schedule <- preempt_schedule
- * preempt_schedule_notrace <- preempt_schedule_notrace
- * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
+ * cond_resched <- RET0
+ * might_resched <- RET0
+ * preempt_schedule <- preempt_schedule
+ * preempt_schedule_notrace <- preempt_schedule_notrace
+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
*/
enum {
--
2.31.1
On Wed, Aug 30, 2023 at 11:49:57AM -0700, Ankur Arora wrote: > +#ifdef TIF_RESCHED_ALLOW > +#define irqentry_exit_cond_resched_dynamic_disabled irqentry_exit_allow_resched > +#else > #define irqentry_exit_cond_resched_dynamic_disabled NULL > +#endif per ^, the below comments are not entirely accurate, since not every architecture has TIF_RESCHED_ALLOW, perhaps make it: > @@ -8692,25 +8694,25 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); > * > * > * NONE: > - * cond_resched <- __cond_resched > - * might_resched <- RET0 > - * preempt_schedule <- NOP > - * preempt_schedule_notrace <- NOP > - * irqentry_exit_cond_resched <- NOP > + * cond_resched <- __cond_resched > + * might_resched <- RET0 > + * preempt_schedule <- NOP > + * preempt_schedule_notrace <- NOP > + * irqentry_exit_cond_resched <- irqentry_exit_allow_resched + * irqentry_exit_cond_resched <- NOP / irqentry_exit_allow_resched Or something. Also, why did you add that extra whilespace all over? Makes it a bit harder to see what actually changed.
Peter Zijlstra <peterz@infradead.org> writes: > On Wed, Aug 30, 2023 at 11:49:57AM -0700, Ankur Arora wrote: > >> +#ifdef TIF_RESCHED_ALLOW >> +#define irqentry_exit_cond_resched_dynamic_disabled irqentry_exit_allow_resched >> +#else >> #define irqentry_exit_cond_resched_dynamic_disabled NULL >> +#endif > > per ^, the below comments are not entirely accurate, since not every > architecture has TIF_RESCHED_ALLOW, perhaps make it: > >> @@ -8692,25 +8694,25 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); >> * >> * >> * NONE: >> - * cond_resched <- __cond_resched >> - * might_resched <- RET0 >> - * preempt_schedule <- NOP >> - * preempt_schedule_notrace <- NOP >> - * irqentry_exit_cond_resched <- NOP >> + * cond_resched <- __cond_resched >> + * might_resched <- RET0 >> + * preempt_schedule <- NOP >> + * preempt_schedule_notrace <- NOP >> + * irqentry_exit_cond_resched <- irqentry_exit_allow_resched > > + * irqentry_exit_cond_resched <- NOP / irqentry_exit_allow_resched > > Or something. > > Also, why did you add that extra whilespace all over? Makes it a bit > harder to see what actually changed. Yeah, not sure why I didn't catch that. Will fix. -- ankur
© 2016 - 2025 Red Hat, Inc.