[v3] sched/fair: Rework EAS to handle more cases

[PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS

Posted by Vincent Guittot 11 months, 2 weeks ago

EAS is based on wakeup events to efficiently place tasks on the system, but
there are cases where a task doesn't have wakeup events anymore or at a far
too low pace. For such situation, we can take advantage of the task being
put back in the enqueued list to check if it should be pushed on another
CPU. When the task is alone on the CPU, it's never put back in the enqueued
list; In this special case, we use the tick to run the check.

Wake up events remain the main way to migrate tasks but we now detect
situation where a task is stuck on a CPU by checking that its utilization
is larger than the max available compute capacity (max cpu capacity or
uclamp max setting)

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |   2 +
 2 files changed, 222 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a9b97bbc085f..5b2f88dec70e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	hrtick_update(rq);
 }
 
+static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
 static void set_next_buddy(struct sched_entity *se);
 
 /*
@@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 		h_nr_idle = task_has_idle_policy(p);
 		if (task_sleep || task_delayed || !se->sched_delayed)
 			h_nr_runnable = 1;
+
+		fair_remove_pushable_task(rq, p);
 	} else {
 		cfs_rq = group_cfs_rq(se);
 		slice = cfs_rq_min_slice(cfs_rq);
@@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	return target;
 }
 
+static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
+{
+	unsigned long max_capa, util;
+
+	max_capa = min(get_actual_cpu_capacity(cpu),
+		       uclamp_eff_value(p, UCLAMP_MAX));
+	util = max(task_util_est(p), task_runnable(p));
+
+	/*
+	 * Return true only if the task might not sleep/wakeup because of a low
+	 * compute capacity. Tasks, which wake up regularly, will be handled by
+	 * feec().
+	 */
+	return (util > max_capa);
+}
+
+static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
+{
+	if (p->nr_cpus_allowed == 1)
+		return false;
+
+	if (is_rd_overutilized(rq->rd))
+		return false;
+
+	if (task_stuck_on_cpu(p, cpu_of(rq)))
+		return true;
+
+	return false;
+}
+
+static int active_load_balance_cpu_stop(void *data);
+
+static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
+{
+	int new_cpu, cpu = cpu_of(rq);
+
+	if (!sched_energy_enabled())
+		return;
+
+	if (WARN_ON(!p))
+		return;
+
+	if (WARN_ON(!task_current(rq, p)))
+		return;
+
+	if (is_migration_disabled(p))
+		return;
+
+	/* If there are several task, wait for being put back */
+	if (rq->nr_running > 1)
+		return;
+
+	if (!sched_energy_push_task(p, rq))
+		return;
+
+	new_cpu = find_energy_efficient_cpu(p, cpu);
+
+	if (new_cpu == cpu)
+		return;
+
+	/*
+	 * ->active_balance synchronizes accesses to
+	 * ->active_balance_work.  Once set, it's cleared
+	 * only after active load balance is finished.
+	 */
+	if (!rq->active_balance) {
+		rq->active_balance = 1;
+		rq->push_cpu = new_cpu;
+	} else
+		return;
+
+	raw_spin_rq_unlock(rq);
+	stop_one_cpu_nowait(cpu,
+		active_load_balance_cpu_stop, rq,
+		&rq->active_balance_work);
+	raw_spin_rq_lock(rq);
+}
+
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->cfs.pushable_tasks);
+}
+
+static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_tasks(rq))
+		return NULL;
+
+	p = plist_first_entry(&rq->cfs.pushable_tasks,
+			      struct task_struct, pushable_tasks);
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+
+	/*
+	 * Remove task from the pushable list as we try only once after that
+	 * the task has been put back in enqueued list.
+	 */
+	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+
+	return p;
+}
+
+/*
+ * See if the non running fair tasks on this rq can be sent on other CPUs
+ * that fits better with their profile.
+ */
+static bool push_fair_task(struct rq *rq)
+{
+	struct task_struct *next_task;
+	int prev_cpu, new_cpu;
+	struct rq *new_rq;
+
+	next_task = pick_next_pushable_fair_task(rq);
+	if (!next_task)
+		return false;
+
+	if (is_migration_disabled(next_task))
+		return true;
+
+	/* We might release rq lock */
+	get_task_struct(next_task);
+
+	prev_cpu = rq->cpu;
+
+	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
+
+	if (new_cpu == prev_cpu)
+		goto out;
+
+	new_rq = cpu_rq(new_cpu);
+
+	if (double_lock_balance(rq, new_rq)) {
+		/* The task has already migrated in between */
+		if (task_cpu(next_task) != rq->cpu) {
+			double_unlock_balance(rq, new_rq);
+			goto out;
+		}
+
+		deactivate_task(rq, next_task, 0);
+		set_task_cpu(next_task, new_cpu);
+		activate_task(new_rq, next_task, 0);
+
+		resched_curr(new_rq);
+
+		double_unlock_balance(rq, new_rq);
+	}
+
+out:
+	put_task_struct(next_task);
+
+	return true;
+}
+
+static void push_fair_tasks(struct rq *rq)
+{
+	/* push_fair_task() will return true if it moved a fair task */
+	while (push_fair_task(rq))
+		;
+}
+
+static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
+
+static inline void fair_queue_pushable_tasks(struct rq *rq)
+{
+	if (!sched_energy_enabled() || !has_pushable_tasks(rq))
+		return;
+
+	queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
+}
+static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	if (sched_energy_enabled())
+		plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+}
+
+static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
+		if (sched_energy_push_task(p, rq)) {
+			plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+			plist_node_init(&p->pushable_tasks, p->prio);
+			plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+		}
+	}
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	return sched_balance_newidle(rq, rf) != 0;
 }
 #else
+static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
+static inline void fair_queue_pushable_tasks(struct rq *rq) {}
+static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
+static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
 static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
 #endif /* CONFIG_SMP */
 
@@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 		put_prev_entity(cfs_rq, pse);
 		set_next_entity(cfs_rq, se);
 
+		/*
+		 * The previous task might be eligible for being pushed on
+		 * another cpu if it is still active.
+		 */
+		fair_add_pushable_task(rq, prev);
+
 		__set_next_task_fair(rq, p, true);
 	}
 
@@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
 		cfs_rq = cfs_rq_of(se);
 		put_prev_entity(cfs_rq, se);
 	}
+
+	/*
+	 * The previous task might be eligible for being pushed on another cpu
+	 * if it is still active.
+	 */
+	fair_add_pushable_task(rq, prev);
+
 }
 
 /*
@@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
 
+	check_pushable_task(curr, rq);
 	update_misfit_status(curr, rq);
 	check_update_overutilized_status(task_rq(curr));
 
@@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
 {
 	struct sched_entity *se = &p->se;
 
+	fair_remove_pushable_task(rq, p);
+
 #ifdef CONFIG_SMP
 	if (task_on_rq_queued(p)) {
 		/*
@@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
 	if (hrtick_enabled_fair(rq))
 		hrtick_start_fair(rq, p);
 
+	/*
+	 * Try to push prev task before checking misfit for next task as
+	 * the migration of prev can make next fitting the CPU
+	 */
+	fair_queue_pushable_tasks(rq);
 	update_misfit_status(p, rq);
 	sched_fair_update_stop_tick(rq, p);
 }
@@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 #ifdef CONFIG_SMP
+	plist_head_init(&cfs_rq->pushable_tasks);
 	raw_spin_lock_init(&cfs_rq->removed.lock);
 #endif
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ab16d3d0e51c..2db198dccf21 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -722,6 +722,8 @@ struct cfs_rq {
 	struct list_head	leaf_cfs_rq_list;
 	struct task_group	*tg;	/* group that "owns" this runqueue */
 
+	struct plist_head	pushable_tasks;
+
 	/* Locally cached copy of our task_group's idle value */
 	int			idle;
 
-- 
2.43.0

Re: [PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS

Posted by kernel test robot 11 months, 2 weeks ago

Hi Vincent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on peterz-queue/sched/core linus/master v6.14-rc4 next-20250228]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Guittot/sched-fair-Filter-false-overloaded_group-case-for-EAS/20250228-214408
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20250228134000.1226665-6-vincent.guittot%40linaro.org
patch subject: [PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS
config: i386-buildonly-randconfig-001-20250301 (https://download.01.org/0day-ci/archive/20250301/202503012344.WKL9UWX1-lkp@intel.com/config)
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250301/202503012344.WKL9UWX1-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503012344.WKL9UWX1-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/fair.c:8957:13: error: conflicting types for 'fair_remove_pushable_task'
    8957 | static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |             ^
   kernel/sched/fair.c:7054:13: note: previous declaration is here
    7054 | static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
         |             ^
>> kernel/sched/fair.c:9152:26: error: incompatible pointer types passing 'struct rq *' to parameter of type 'struct cfs_rq *' [-Werror,-Wincompatible-pointer-types]
    9152 |                 fair_add_pushable_task(rq, prev);
         |                                        ^~
   kernel/sched/fair.c:8958:58: note: passing argument to parameter 'cfs_rq' here
    8958 | static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |                                                          ^
   kernel/sched/fair.c:9231:25: error: incompatible pointer types passing 'struct rq *' to parameter of type 'struct cfs_rq *' [-Werror,-Wincompatible-pointer-types]
    9231 |         fair_add_pushable_task(rq, prev);
         |                                ^~
   kernel/sched/fair.c:8958:58: note: passing argument to parameter 'cfs_rq' here
    8958 | static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |                                                          ^
   3 errors generated.


vim +/fair_remove_pushable_task +8957 kernel/sched/fair.c

  8945	
  8946	static int
  8947	balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  8948	{
  8949		if (sched_fair_runnable(rq))
  8950			return 1;
  8951	
  8952		return sched_balance_newidle(rq, rf) != 0;
  8953	}
  8954	#else
  8955	static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
  8956	static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> 8957	static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
  8958	static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
  8959	static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
  8960	#endif /* CONFIG_SMP */
  8961	
  8962	static void set_next_buddy(struct sched_entity *se)
  8963	{
  8964		for_each_sched_entity(se) {
  8965			if (SCHED_WARN_ON(!se->on_rq))
  8966				return;
  8967			if (se_is_idle(se))
  8968				return;
  8969			cfs_rq_of(se)->next = se;
  8970		}
  8971	}
  8972	
  8973	/*
  8974	 * Preempt the current task with a newly woken task if needed:
  8975	 */
  8976	static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
  8977	{
  8978		struct task_struct *donor = rq->donor;
  8979		struct sched_entity *se = &donor->se, *pse = &p->se;
  8980		struct cfs_rq *cfs_rq = task_cfs_rq(donor);
  8981		int cse_is_idle, pse_is_idle;
  8982	
  8983		if (unlikely(se == pse))
  8984			return;
  8985	
  8986		/*
  8987		 * This is possible from callers such as attach_tasks(), in which we
  8988		 * unconditionally wakeup_preempt() after an enqueue (which may have
  8989		 * lead to a throttle).  This both saves work and prevents false
  8990		 * next-buddy nomination below.
  8991		 */
  8992		if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
  8993			return;
  8994	
  8995		if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
  8996			set_next_buddy(pse);
  8997		}
  8998	
  8999		/*
  9000		 * We can come here with TIF_NEED_RESCHED already set from new task
  9001		 * wake up path.
  9002		 *
  9003		 * Note: this also catches the edge-case of curr being in a throttled
  9004		 * group (e.g. via set_curr_task), since update_curr() (in the
  9005		 * enqueue of curr) will have resulted in resched being set.  This
  9006		 * prevents us from potentially nominating it as a false LAST_BUDDY
  9007		 * below.
  9008		 */
  9009		if (test_tsk_need_resched(rq->curr))
  9010			return;
  9011	
  9012		if (!sched_feat(WAKEUP_PREEMPTION))
  9013			return;
  9014	
  9015		find_matching_se(&se, &pse);
  9016		WARN_ON_ONCE(!pse);
  9017	
  9018		cse_is_idle = se_is_idle(se);
  9019		pse_is_idle = se_is_idle(pse);
  9020	
  9021		/*
  9022		 * Preempt an idle entity in favor of a non-idle entity (and don't preempt
  9023		 * in the inverse case).
  9024		 */
  9025		if (cse_is_idle && !pse_is_idle) {
  9026			/*
  9027			 * When non-idle entity preempt an idle entity,
  9028			 * don't give idle entity slice protection.
  9029			 */
  9030			cancel_protect_slice(se);
  9031			goto preempt;
  9032		}
  9033	
  9034		if (cse_is_idle != pse_is_idle)
  9035			return;
  9036	
  9037		/*
  9038		 * BATCH and IDLE tasks do not preempt others.
  9039		 */
  9040		if (unlikely(!normal_policy(p->policy)))
  9041			return;
  9042	
  9043		cfs_rq = cfs_rq_of(se);
  9044		update_curr(cfs_rq);
  9045		/*
  9046		 * If @p has a shorter slice than current and @p is eligible, override
  9047		 * current's slice protection in order to allow preemption.
  9048		 *
  9049		 * Note that even if @p does not turn out to be the most eligible
  9050		 * task at this moment, current's slice protection will be lost.
  9051		 */
  9052		if (do_preempt_short(cfs_rq, pse, se))
  9053			cancel_protect_slice(se);
  9054	
  9055		/*
  9056		 * If @p has become the most eligible task, force preemption.
  9057		 */
  9058		if (pick_eevdf(cfs_rq) == pse)
  9059			goto preempt;
  9060	
  9061		return;
  9062	
  9063	preempt:
  9064		resched_curr_lazy(rq);
  9065	}
  9066	
  9067	static struct task_struct *pick_task_fair(struct rq *rq)
  9068	{
  9069		struct sched_entity *se;
  9070		struct cfs_rq *cfs_rq;
  9071	
  9072	again:
  9073		cfs_rq = &rq->cfs;
  9074		if (!cfs_rq->nr_queued)
  9075			return NULL;
  9076	
  9077		do {
  9078			/* Might not have done put_prev_entity() */
  9079			if (cfs_rq->curr && cfs_rq->curr->on_rq)
  9080				update_curr(cfs_rq);
  9081	
  9082			if (unlikely(check_cfs_rq_runtime(cfs_rq)))
  9083				goto again;
  9084	
  9085			se = pick_next_entity(rq, cfs_rq);
  9086			if (!se)
  9087				goto again;
  9088			cfs_rq = group_cfs_rq(se);
  9089		} while (cfs_rq);
  9090	
  9091		return task_of(se);
  9092	}
  9093	
  9094	static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool first);
  9095	static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first);
  9096	
  9097	struct task_struct *
  9098	pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  9099	{
  9100		struct sched_entity *se;
  9101		struct task_struct *p;
  9102		int new_tasks;
  9103	
  9104	again:
  9105		p = pick_task_fair(rq);
  9106		if (!p)
  9107			goto idle;
  9108		se = &p->se;
  9109	
  9110	#ifdef CONFIG_FAIR_GROUP_SCHED
  9111		if (prev->sched_class != &fair_sched_class)
  9112			goto simple;
  9113	
  9114		__put_prev_set_next_dl_server(rq, prev, p);
  9115	
  9116		/*
  9117		 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
  9118		 * likely that a next task is from the same cgroup as the current.
  9119		 *
  9120		 * Therefore attempt to avoid putting and setting the entire cgroup
  9121		 * hierarchy, only change the part that actually changes.
  9122		 *
  9123		 * Since we haven't yet done put_prev_entity and if the selected task
  9124		 * is a different task than we started out with, try and touch the
  9125		 * least amount of cfs_rqs.
  9126		 */
  9127		if (prev != p) {
  9128			struct sched_entity *pse = &prev->se;
  9129			struct cfs_rq *cfs_rq;
  9130	
  9131			while (!(cfs_rq = is_same_group(se, pse))) {
  9132				int se_depth = se->depth;
  9133				int pse_depth = pse->depth;
  9134	
  9135				if (se_depth <= pse_depth) {
  9136					put_prev_entity(cfs_rq_of(pse), pse);
  9137					pse = parent_entity(pse);
  9138				}
  9139				if (se_depth >= pse_depth) {
  9140					set_next_entity(cfs_rq_of(se), se);
  9141					se = parent_entity(se);
  9142				}
  9143			}
  9144	
  9145			put_prev_entity(cfs_rq, pse);
  9146			set_next_entity(cfs_rq, se);
  9147	
  9148			/*
  9149			 * The previous task might be eligible for being pushed on
  9150			 * another cpu if it is still active.
  9151			 */
> 9152			fair_add_pushable_task(rq, prev);
  9153	
  9154			__set_next_task_fair(rq, p, true);
  9155		}
  9156	
  9157		return p;
  9158	
  9159	simple:
  9160	#endif
  9161		put_prev_set_next_task(rq, prev, p);
  9162		return p;
  9163	
  9164	idle:
  9165		if (!rf)
  9166			return NULL;
  9167	
  9168		new_tasks = sched_balance_newidle(rq, rf);
  9169	
  9170		/*
  9171		 * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is
  9172		 * possible for any higher priority task to appear. In that case we
  9173		 * must re-start the pick_next_entity() loop.
  9174		 */
  9175		if (new_tasks < 0)
  9176			return RETRY_TASK;
  9177	
  9178		if (new_tasks > 0)
  9179			goto again;
  9180	
  9181		/*
  9182		 * rq is about to be idle, check if we need to update the
  9183		 * lost_idle_time of clock_pelt
  9184		 */
  9185		update_idle_rq_clock_pelt(rq);
  9186	
  9187		return NULL;
  9188	}
  9189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS

Posted by kernel test robot 11 months, 2 weeks ago

Hi Vincent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on peterz-queue/sched/core linus/master v6.14-rc4 next-20250228]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Guittot/sched-fair-Filter-false-overloaded_group-case-for-EAS/20250228-214408
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20250228134000.1226665-6-vincent.guittot%40linaro.org
patch subject: [PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS
config: arc-randconfig-002-20250301 (https://download.01.org/0day-ci/archive/20250301/202503012314.oQzjTBLS-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250301/202503012314.oQzjTBLS-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503012314.oQzjTBLS-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

>> kernel/sched/fair.c:8957:13: error: conflicting types for 'fair_remove_pushable_task'; have 'void(struct cfs_rq *, struct task_struct *)'
    8957 | static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/sched/fair.c:7054:13: note: previous declaration of 'fair_remove_pushable_task' with type 'void(struct rq *, struct task_struct *)'
    7054 | static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/sched/fair.c: In function 'pick_next_task_fair':
>> kernel/sched/fair.c:9152:40: error: passing argument 1 of 'fair_add_pushable_task' from incompatible pointer type [-Werror=incompatible-pointer-types]
    9152 |                 fair_add_pushable_task(rq, prev);
         |                                        ^~
         |                                        |
         |                                        struct rq *
   kernel/sched/fair.c:8958:58: note: expected 'struct cfs_rq *' but argument is of type 'struct rq *'
    8958 | static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |                                           ~~~~~~~~~~~~~~~^~~~~~
   kernel/sched/fair.c: In function 'put_prev_task_fair':
   kernel/sched/fair.c:9231:32: error: passing argument 1 of 'fair_add_pushable_task' from incompatible pointer type [-Werror=incompatible-pointer-types]
    9231 |         fair_add_pushable_task(rq, prev);
         |                                ^~
         |                                |
         |                                struct rq *
   kernel/sched/fair.c:8958:58: note: expected 'struct cfs_rq *' but argument is of type 'struct rq *'
    8958 | static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |                                           ~~~~~~~~~~~~~~~^~~~~~
   kernel/sched/fair.c: In function '__set_next_task_fair':
>> kernel/sched/fair.c:13518:35: error: passing argument 1 of 'fair_remove_pushable_task' from incompatible pointer type [-Werror=incompatible-pointer-types]
   13518 |         fair_remove_pushable_task(rq, p);
         |                                   ^~
         |                                   |
         |                                   struct rq *
   kernel/sched/fair.c:8957:54: note: expected 'struct cfs_rq *' but argument is of type 'struct rq *'
    8957 | static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
         |                                       ~~~~~~~~~~~~~~~^~~~~~
   kernel/sched/fair.c: At top level:
>> kernel/sched/fair.c:7054:13: warning: 'fair_remove_pushable_task' used but never defined
    7054 | static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +8957 kernel/sched/fair.c

  8945	
  8946	static int
  8947	balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  8948	{
  8949		if (sched_fair_runnable(rq))
  8950			return 1;
  8951	
  8952		return sched_balance_newidle(rq, rf) != 0;
  8953	}
  8954	#else
  8955	static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
  8956	static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> 8957	static void fair_remove_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
  8958	static inline void fair_add_pushable_task(struct cfs_rq *cfs_rq, struct task_struct *p) {}
  8959	static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
  8960	#endif /* CONFIG_SMP */
  8961	
  8962	static void set_next_buddy(struct sched_entity *se)
  8963	{
  8964		for_each_sched_entity(se) {
  8965			if (SCHED_WARN_ON(!se->on_rq))
  8966				return;
  8967			if (se_is_idle(se))
  8968				return;
  8969			cfs_rq_of(se)->next = se;
  8970		}
  8971	}
  8972	
  8973	/*
  8974	 * Preempt the current task with a newly woken task if needed:
  8975	 */
  8976	static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
  8977	{
  8978		struct task_struct *donor = rq->donor;
  8979		struct sched_entity *se = &donor->se, *pse = &p->se;
  8980		struct cfs_rq *cfs_rq = task_cfs_rq(donor);
  8981		int cse_is_idle, pse_is_idle;
  8982	
  8983		if (unlikely(se == pse))
  8984			return;
  8985	
  8986		/*
  8987		 * This is possible from callers such as attach_tasks(), in which we
  8988		 * unconditionally wakeup_preempt() after an enqueue (which may have
  8989		 * lead to a throttle).  This both saves work and prevents false
  8990		 * next-buddy nomination below.
  8991		 */
  8992		if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
  8993			return;
  8994	
  8995		if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
  8996			set_next_buddy(pse);
  8997		}
  8998	
  8999		/*
  9000		 * We can come here with TIF_NEED_RESCHED already set from new task
  9001		 * wake up path.
  9002		 *
  9003		 * Note: this also catches the edge-case of curr being in a throttled
  9004		 * group (e.g. via set_curr_task), since update_curr() (in the
  9005		 * enqueue of curr) will have resulted in resched being set.  This
  9006		 * prevents us from potentially nominating it as a false LAST_BUDDY
  9007		 * below.
  9008		 */
  9009		if (test_tsk_need_resched(rq->curr))
  9010			return;
  9011	
  9012		if (!sched_feat(WAKEUP_PREEMPTION))
  9013			return;
  9014	
  9015		find_matching_se(&se, &pse);
  9016		WARN_ON_ONCE(!pse);
  9017	
  9018		cse_is_idle = se_is_idle(se);
  9019		pse_is_idle = se_is_idle(pse);
  9020	
  9021		/*
  9022		 * Preempt an idle entity in favor of a non-idle entity (and don't preempt
  9023		 * in the inverse case).
  9024		 */
  9025		if (cse_is_idle && !pse_is_idle) {
  9026			/*
  9027			 * When non-idle entity preempt an idle entity,
  9028			 * don't give idle entity slice protection.
  9029			 */
  9030			cancel_protect_slice(se);
  9031			goto preempt;
  9032		}
  9033	
  9034		if (cse_is_idle != pse_is_idle)
  9035			return;
  9036	
  9037		/*
  9038		 * BATCH and IDLE tasks do not preempt others.
  9039		 */
  9040		if (unlikely(!normal_policy(p->policy)))
  9041			return;
  9042	
  9043		cfs_rq = cfs_rq_of(se);
  9044		update_curr(cfs_rq);
  9045		/*
  9046		 * If @p has a shorter slice than current and @p is eligible, override
  9047		 * current's slice protection in order to allow preemption.
  9048		 *
  9049		 * Note that even if @p does not turn out to be the most eligible
  9050		 * task at this moment, current's slice protection will be lost.
  9051		 */
  9052		if (do_preempt_short(cfs_rq, pse, se))
  9053			cancel_protect_slice(se);
  9054	
  9055		/*
  9056		 * If @p has become the most eligible task, force preemption.
  9057		 */
  9058		if (pick_eevdf(cfs_rq) == pse)
  9059			goto preempt;
  9060	
  9061		return;
  9062	
  9063	preempt:
  9064		resched_curr_lazy(rq);
  9065	}
  9066	
  9067	static struct task_struct *pick_task_fair(struct rq *rq)
  9068	{
  9069		struct sched_entity *se;
  9070		struct cfs_rq *cfs_rq;
  9071	
  9072	again:
  9073		cfs_rq = &rq->cfs;
  9074		if (!cfs_rq->nr_queued)
  9075			return NULL;
  9076	
  9077		do {
  9078			/* Might not have done put_prev_entity() */
  9079			if (cfs_rq->curr && cfs_rq->curr->on_rq)
  9080				update_curr(cfs_rq);
  9081	
  9082			if (unlikely(check_cfs_rq_runtime(cfs_rq)))
  9083				goto again;
  9084	
  9085			se = pick_next_entity(rq, cfs_rq);
  9086			if (!se)
  9087				goto again;
  9088			cfs_rq = group_cfs_rq(se);
  9089		} while (cfs_rq);
  9090	
  9091		return task_of(se);
  9092	}
  9093	
  9094	static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool first);
  9095	static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first);
  9096	
  9097	struct task_struct *
  9098	pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  9099	{
  9100		struct sched_entity *se;
  9101		struct task_struct *p;
  9102		int new_tasks;
  9103	
  9104	again:
  9105		p = pick_task_fair(rq);
  9106		if (!p)
  9107			goto idle;
  9108		se = &p->se;
  9109	
  9110	#ifdef CONFIG_FAIR_GROUP_SCHED
  9111		if (prev->sched_class != &fair_sched_class)
  9112			goto simple;
  9113	
  9114		__put_prev_set_next_dl_server(rq, prev, p);
  9115	
  9116		/*
  9117		 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
  9118		 * likely that a next task is from the same cgroup as the current.
  9119		 *
  9120		 * Therefore attempt to avoid putting and setting the entire cgroup
  9121		 * hierarchy, only change the part that actually changes.
  9122		 *
  9123		 * Since we haven't yet done put_prev_entity and if the selected task
  9124		 * is a different task than we started out with, try and touch the
  9125		 * least amount of cfs_rqs.
  9126		 */
  9127		if (prev != p) {
  9128			struct sched_entity *pse = &prev->se;
  9129			struct cfs_rq *cfs_rq;
  9130	
  9131			while (!(cfs_rq = is_same_group(se, pse))) {
  9132				int se_depth = se->depth;
  9133				int pse_depth = pse->depth;
  9134	
  9135				if (se_depth <= pse_depth) {
  9136					put_prev_entity(cfs_rq_of(pse), pse);
  9137					pse = parent_entity(pse);
  9138				}
  9139				if (se_depth >= pse_depth) {
  9140					set_next_entity(cfs_rq_of(se), se);
  9141					se = parent_entity(se);
  9142				}
  9143			}
  9144	
  9145			put_prev_entity(cfs_rq, pse);
  9146			set_next_entity(cfs_rq, se);
  9147	
  9148			/*
  9149			 * The previous task might be eligible for being pushed on
  9150			 * another cpu if it is still active.
  9151			 */
> 9152			fair_add_pushable_task(rq, prev);
  9153	
  9154			__set_next_task_fair(rq, p, true);
  9155		}
  9156	
  9157		return p;
  9158	
  9159	simple:
  9160	#endif
  9161		put_prev_set_next_task(rq, prev, p);
  9162		return p;
  9163	
  9164	idle:
  9165		if (!rf)
  9166			return NULL;
  9167	
  9168		new_tasks = sched_balance_newidle(rq, rf);
  9169	
  9170		/*
  9171		 * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is
  9172		 * possible for any higher priority task to appear. In that case we
  9173		 * must re-start the pick_next_entity() loop.
  9174		 */
  9175		if (new_tasks < 0)
  9176			return RETRY_TASK;
  9177	
  9178		if (new_tasks > 0)
  9179			goto again;
  9180	
  9181		/*
  9182		 * rq is about to be idle, check if we need to update the
  9183		 * lost_idle_time of clock_pelt
  9184		 */
  9185		update_idle_rq_clock_pelt(rq);
  9186	
  9187		return NULL;
  9188	}
  9189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH 1/7 v3] sched/fair: Filter false overloaded_group case for EAS
[PATCH 2/7 v3] energy model: Add a get previous state function
[PATCH 3/7 v3] sched/fair: Rework feec() to use cost instead of spare capacity
[PATCH 4/7 v3] energy model: Remove unused em_cpu_energy()
[PATCH 5/7 v3] sched/fair: Add push task mechanism for EAS
[PATCH 6/7 v3] sched/fair: Add misfit case to push task mecanism for EAS
[PATCH 7/7 v3] sched/fair: Update overutilized detection