kernel/sched/fair.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-)
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 102a28344a60e637934ffca62d50ff8319b11165
Gitweb: https://git.kernel.org/tip/102a28344a60e637934ffca62d50ff8319b11165
Author: K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate: Tue, 02 Jun 2026 05:25:30
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 02 Jun 2026 12:26:12 +02:00
sched/fair: Move the throttled tasks to a local list in tg_unthrottle_up()
An update_curr() during the enqueue of throttled task will start
throttling the hierarchy from subsequent commit. This can lead to
tg_throttle_down() seeing non-empty throttled_limbo_list for the cfs_rq
attaching the task from throttled_limbo_list one by one. For example:
R
|
A
/ \
*B C
|
rq->curr
*B is throttled with tasks on hte limbo list. When the tasks are
unthrottled via tg_unthrottle_up() and entity of group B is placed onto
A, update_curr() is called to catch up the vruntime and it may throttle
group A causing the subsequent tg_throttle_down() to see the pending
task's on B's limbo list.
tg_unthrottle_up()
/* --cfs_rq->throttle_count == 0 */
list_for_each_entry_safe(p, cfs_rq->throttled_limbo_list)
enqueue_task_fair()
enqueue_entity(se /* B->se */)
update_curr(cfs_rq /* A->gcfs_rq */)
account_cfs_rq_runtime(cfs_rq)
throttle_cfs_rq(cfs_rq /* A->gcfs_rq */ )
tg_throttle_down()
/* Reaches B->cfs_rq with throttle_count == 0 */
!!! !list_empty(&cfs_rq->throttled_limbo_list)) !!!
Move the tasks from throttled_limbo_list onto a local list before
starting the unthrottle to prevent the splat described above. If the
hierarchy is throttled again in middle of an unthrottle, put the pending
tasks back onto the limbo list to prevent running them unnecessarily.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Benjamin Segall <bsegall@google.com>
Tested-by: Aaron Lu <ziqianlu@bytedance.com>
Link: https://patch.msgid.link/20260602052531.11450-2-kprateek.nayak@amd.com
---
kernel/sched/fair.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f91d85c..3f3f09a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6739,6 +6739,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
struct rq *rq = data;
struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
struct task_struct *p, *tmp;
+ LIST_HEAD(throttled_tasks);
/*
* If cfs_rq->curr is set, the cfs_rq might not have caught up
@@ -6769,13 +6770,31 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->throttled_clock_self_time += delta;
}
+ /*
+ * Move the tasks to a local list since an update_curr() during
+ * enqueue_task_fair() can throttle a higher cfs_rq, and it can
+ * see the "throttled_limbo_list" being non-empty in
+ * tg_throttle_down() if throttle_count turned 0 above.
+ */
+ list_splice_init(&cfs_rq->throttled_limbo_list, &throttled_tasks);
+
/* Re-enqueue the tasks that have been throttled at this level. */
- list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) {
+ list_for_each_entry_safe(p, tmp, &throttled_tasks, throttle_node) {
+ /*
+ * Back to being throttled! Break out and put the remaining
+ * tasks back onto the limbo_list to prevent running them
+ * unnecessarily.
+ */
+ if (cfs_rq->throttle_count)
+ break;
+
list_del_init(&p->throttle_node);
p->throttled = false;
- enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP);
+ enqueue_task_fair(rq, p, ENQUEUE_WAKEUP);
}
+ list_splice(&throttled_tasks, &cfs_rq->throttled_limbo_list);
+
/* Add cfs_rq with load or one or more already running entities to the list */
if (!cfs_rq_is_decayed(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
© 2016 - 2026 Red Hat, Inc.