kernel/sched/core.c | 5 ++++- kernel/sched/fair.c | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-)
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 28ad5427682bccf06074366f347a6083d6730c1e
Gitweb: https://git.kernel.org/tip/28ad5427682bccf06074366f347a6083d6730c1e
Author: K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate: Tue, 02 Jun 2026 05:25:29
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 02 Jun 2026 12:26:12 +02:00
sched/fair: Call update_curr() before unthrottling the hierarchy
Subsequent commits will allow update_curr() to throttle the hierarchy
when the runtime accounting exceeds allocated quota. Call update_curr()
before the unthrottle event, and in tg_unthrottle_up() to catch up on
any remaining runtime and stabilize the "runtime_remaining" and
"throttle_count" for that cfs_rq.
Doing an update_curr() early ensures the cfs_rq is not throttled right
back up again when the unthrottle is in progress.
Since all callers of unthrottle_cfs_rq(), except two, already update the
rq_clock and call rq_clock_start_loop_update(), move the
update_rq_clock() from unthrottle_cfs_rq() to the callers that don't
update the rq_clock.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Benjamin Segall <bsegall@google.com>
Tested-by: Aaron Lu <ziqianlu@bytedance.com>
Link: https://patch.msgid.link/20260602052531.11450-1-kprateek.nayak@amd.com
---
kernel/sched/core.c | 5 ++++-
kernel/sched/fair.c | 21 +++++++++++++++++++--
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd03141..e745c58 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9859,11 +9859,14 @@ static int tg_set_cfs_bandwidth(struct task_group *tg,
struct rq *rq = cfs_rq->rq;
guard(rq_lock_irq)(rq);
+
cfs_rq->runtime_enabled = runtime_enabled;
cfs_rq->runtime_remaining = 1;
- if (cfs_rq->throttled)
+ if (cfs_rq->throttled) {
+ update_rq_clock(rq);
unthrottle_cfs_rq(cfs_rq);
+ }
}
if (runtime_was_enabled && !runtime_enabled)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26a8bbb..f91d85c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6740,6 +6740,15 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
struct task_struct *p, *tmp;
+ /*
+ * If cfs_rq->curr is set, the cfs_rq might not have caught up
+ * since the last clock update. Do it now before we begin
+ * queueing task onto it to save the need for unnecessarily
+ * unthrottle the hierarchy for this cfs_rq to be throttled
+ * right back again.
+ */
+ update_curr(cfs_rq);
+
if (--cfs_rq->throttle_count)
return 0;
@@ -6882,14 +6891,16 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
* We can't unthrottle this cfs_rq without any runtime remaining because
* any enqueue in tg_unthrottle_up() will immediately trigger a throttle,
* which is not supposed to happen on unthrottle path.
+ *
+ * Catch up on the remaining runtime since last clock update before
+ * checking runtime remaining.
*/
+ update_curr(cfs_rq);
if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0)
return;
cfs_rq->throttled = 0;
- update_rq_clock(rq);
-
scoped_guard(raw_spinlock, &cfs_b->lock) {
list_del_rcu(&cfs_rq->throttled_list);
@@ -6964,6 +6975,7 @@ static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
bool first;
if (rq == this_rq()) {
+ update_rq_clock(rq);
unthrottle_cfs_rq(cfs_rq);
return;
}
@@ -7017,6 +7029,11 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
if (!list_empty(&cfs_rq->throttled_csd_list))
continue;
+ if (cfs_rq->curr) {
+ update_rq_clock(rq);
+ update_curr(cfs_rq);
+ }
+
/* By the above checks, this should never be true */
WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);
© 2016 - 2026 Red Hat, Inc.