From nobody Thu Apr 2 20:25:33 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4E75E2AD2C for ; Thu, 19 Feb 2026 08:10:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771488660; cv=none; b=Lb87uaW6SUOM6LaqOW5jLhNza8jS8WI8dqbSqJ7YXm6/q6UKO4Y3AcqfTWU3TZrFDI9aaApjt4GG9czU0eqyzP/OoUGSbeQeUhrKLD25OWe1lSMByr6PSr7XlXzS/D8l0u/2A+Sc6NXhxSZWcR8jbnC22fKzUDolB1hkKFYcjks= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771488660; c=relaxed/simple; bh=HBamDKNPVCgrh3uVbm087xGUtgzG3LqrV2gUldF0l4A=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=RBTXDdCRcLJZ8WyWUD0vX64q63bM/VqJ7hIdSl9j62NHy2Vwva9eiXq+RCUlkMrnIo4v5k8n9NnWQU0H8ZWAuoVJjgIRWzb5lOLCf3NUMwwyw+XcK3ARxxU0s34lL3w3HtHy0pZ5lnQGKN2yaDbmq0T83K6tYFUQrTBjlq78JjE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=SK0RYfkX; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="SK0RYfkX" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-ID:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=kFQXiMn5/u84aWz0pDpIIDNXMNeep9eenqtlm+znF5U=; b=SK0RYfkXgVgxygC8Lxi+sUBAzm P+tuM1eUYCC5YRRwVupPXXZKpsI8rkWv6EXTRz+xGj8fjtmrSMWarCbNwh53Xoj1b37DiZlht0gZl cVaviRYfirX43hyW6xmhMiGgv1xWtqctLFsfg2T7hDxSDDvtX5rUFKqkmXtQoIEj+pW/3aq2HWdTN IRgv22RhHwyp5VHVUlNX9UkwuZO8uJGPYEGL26l0yoH80pTdfG7cPG/4PEZsmtS5EEFz66P0m0rW+ PYrQvTExIFuQIuOwUCe3mNlXXQx2K+qRT+7OjHkXPKLRXR4b2HSqPgAKJalcx64pkn+y/iy6k8zgZ nmLt4OVA==; Received: from 77-249-17-252.cable.dynamic.v4.ziggo.nl ([77.249.17.252] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.98.2 #2 (Red Hat Linux)) id 1vsz7Z-00000000xzZ-27W0; Thu, 19 Feb 2026 08:10:49 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id C7F0630334D; Thu, 19 Feb 2026 09:10:48 +0100 (CET) Message-ID: <20260219080624.830623197@infradead.org> User-Agent: quilt/0.68 Date: Thu, 19 Feb 2026 08:58:44 +0100 From: Peter Zijlstra To: mingo@kernel.org Cc: peterz@infradead.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, dietmar.eggemann@arm.com, rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de, vschneid@redhat.com, linux-kernel@vger.kernel.org, wangtao554@huawei.com, quzicheng@huawei.com, kprateek.nayak@amd.com, dsmythies@telus.net, shubhang@os.amperecomputing.com Subject: [PATCH v2 4/7] sched/fair: Fix lag clamp References: <20260219075840.162631716@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Vincent reported that he was seeing undue lag clamping in a mixed slice workload. Implement the max_slice tracking as per the todo comment. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy= ") Reported-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Vincent Guittot Tested-by: K Prateek Nayak Tested-by: Shubhang Kaushik Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kic= ks-ass.net Reviewed-by: Vincent Guittot --- include/linux/sched.h | 1 + kernel/sched/fair.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -574,6 +574,7 @@ struct sched_entity { u64 deadline; u64 min_vruntime; u64 min_slice; + u64 max_slice; =20 struct list_head group_node; unsigned char on_rq; --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -748,6 +748,8 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) return cfs_rq->zero_vruntime; } =20 +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq); + /* * lag_i =3D S - s_i =3D w_i * (V - v_i) * @@ -761,17 +763,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * EEVDF gives the following limit for a steady state system: * * -r_max < lag < max(r_max, q) - * - * XXX could add max_slice to the augmented data to track this. */ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *= se) { + u64 max_slice =3D cfs_rq_max_slice(cfs_rq) + TICK_NSEC; s64 vlag, limit; =20 WARN_ON_ONCE(!se->on_rq); =20 vlag =3D avg_vruntime(cfs_rq) - se->vruntime; - limit =3D calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + limit =3D calc_delta_fair(max_slice, se); =20 se->vlag =3D clamp(vlag, -limit, limit); } @@ -829,6 +830,21 @@ static inline u64 cfs_rq_min_slice(struc return min_slice; } =20 +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq) +{ + struct sched_entity *root =3D __pick_root_entity(cfs_rq); + struct sched_entity *curr =3D cfs_rq->curr; + u64 max_slice =3D 0ULL; + + if (curr && curr->on_rq) + max_slice =3D curr->slice; + + if (root) + max_slice =3D max(max_slice, root->max_slice); + + return max_slice; +} + static inline bool __entity_less(struct rb_node *a, const struct rb_node *= b) { return entity_before(__node_2_se(a), __node_2_se(b)); @@ -853,6 +869,15 @@ static inline void __min_slice_update(st } } =20 +static inline void __max_slice_update(struct sched_entity *se, struct rb_n= ode *node) +{ + if (node) { + struct sched_entity *rse =3D __node_2_se(node); + if (rse->max_slice > se->max_slice) + se->max_slice =3D rse->max_slice; + } +} + /* * se->min_vruntime =3D min(se->vruntime, {left,right}->min_vruntime) */ @@ -860,6 +885,7 @@ static inline bool min_vruntime_update(s { u64 old_min_vruntime =3D se->min_vruntime; u64 old_min_slice =3D se->min_slice; + u64 old_max_slice =3D se->max_slice; struct rb_node *node =3D &se->run_node; =20 se->min_vruntime =3D se->vruntime; @@ -870,8 +896,13 @@ static inline bool min_vruntime_update(s __min_slice_update(se, node->rb_right); __min_slice_update(se, node->rb_left); =20 + se->max_slice =3D se->slice; + __max_slice_update(se, node->rb_right); + __max_slice_update(se, node->rb_left); + return se->min_vruntime =3D=3D old_min_vruntime && - se->min_slice =3D=3D old_min_slice; + se->min_slice =3D=3D old_min_slice && + se->max_slice =3D=3D old_max_slice; } =20 RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,