Since update_load_avg() now support DO_ATTACH and DO_DETACH flags to
attach or detach entity sched_avg to the cfs_rq, we can using it to
refactor detach/attach_entity_cfs_rq() functions.
Note we can attach a task with last_update_time!=0 from switched_to_fair()
since we want to decay sched_avg when running in !fair class.
So this patch move last_update_time condition check to enqueue_entity()
for task which migrate CPU or change cgroup.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
kernel/sched/fair.c | 68 ++++++++++++++++++---------------------------
1 file changed, 27 insertions(+), 41 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ad20a939227d..b8cb826bd755 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4019,21 +4019,10 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
decayed = update_cfs_rq_load_avg(now, cfs_rq);
decayed |= propagate_entity_load_avg(se);
- if (!se->avg.last_update_time && (flags & DO_ATTACH)) {
- /*
- * DO_ATTACH means we're here from enqueue_entity().
- * !last_update_time means we've passed through
- * migrate_task_rq_fair() indicating we migrated.
- *
- * IOW we're enqueueing a task on a new CPU.
- */
+ if (flags & DO_ATTACH) {
attach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq);
} else if (flags & DO_DETACH) {
- /*
- * DO_DETACH means we're here from dequeue_entity()
- * and we are migrating task out of the CPU.
- */
detach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq);
} else if (decayed) {
@@ -4082,44 +4071,31 @@ static void remove_entity_load_avg(struct sched_entity *se)
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
}
-#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* Propagate the changes of the sched_entity across the tg tree to make it
* visible to the root
*/
-static void propagate_entity_cfs_rq(struct sched_entity *se)
+static void propagate_entity_cfs_rq(struct sched_entity *se, int flags)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
- if (cfs_rq_throttled(cfs_rq))
- return;
-
- if (!throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
-
- /* Start to propagate at parent */
- se = se->parent;
+ struct cfs_rq *cfs_rq;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_load_avg(cfs_rq, se, flags);
if (cfs_rq_throttled(cfs_rq))
break;
if (!throttled_hierarchy(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
+
+ flags = UPDATE_TG;
}
}
-#else
-static void propagate_entity_cfs_rq(struct sched_entity *se) { }
-#endif
static void detach_entity_cfs_rq(struct sched_entity *se)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
/*
* In case the task sched_avg hasn't been attached:
* - A forked task which hasn't been woken up by wake_up_new_task().
@@ -4130,21 +4106,18 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
return;
/* Catch up with the cfs_rq and remove our load when we leave */
- update_load_avg(cfs_rq, se, 0);
- detach_entity_load_avg(cfs_rq, se);
- update_tg_load_avg(cfs_rq);
- propagate_entity_cfs_rq(se);
+ propagate_entity_cfs_rq(se, DO_DETACH | UPDATE_TG);
}
static void attach_entity_cfs_rq(struct sched_entity *se)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ int flags = DO_ATTACH | UPDATE_TG;
+
+ if (!sched_feat(ATTACH_AGE_LOAD))
+ flags |= SKIP_AGE_LOAD;
- /* Synchronize entity with its cfs_rq */
- update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
- attach_entity_load_avg(cfs_rq, se);
- update_tg_load_avg(cfs_rq);
- propagate_entity_cfs_rq(se);
+ /* Synchronize entity with its cfs_rq and attach our load */
+ propagate_entity_cfs_rq(se, flags);
}
static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
@@ -4479,6 +4452,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
bool curr = cfs_rq->curr == se;
+ int action = UPDATE_TG;
+
+ /*
+ * !last_update_time means we've passed through migrate_task_rq_fair()
+ * or task_change_group_fair() indicating we migrated cfs_rq. IOW we're
+ * enqueueing a task on a new CPU or moving task to a new cgroup.
+ */
+ if (!se->avg.last_update_time)
+ action |= DO_ATTACH;
/*
* If we're the current task, we must renormalise before calling
@@ -4507,7 +4489,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* its group cfs_rq
* - Add its new weight to cfs_rq->load.weight
*/
- update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
+ update_load_avg(cfs_rq, se, action);
se_update_runnable(se);
update_cfs_group(se);
account_entity_enqueue(cfs_rq, se);
@@ -4581,6 +4563,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
int action = UPDATE_TG;
+ /*
+ * When we are migrating task out of the CPU, we should
+ * detach entity sched_avg from the cfs_rq.
+ */
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
action |= DO_DETACH;
--
2.36.1
Hi Chengming,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master next-20220728]
[cannot apply to v5.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 8da3d9b8590bc178752d4b72938745e9a6c4c416
config: hexagon-randconfig-r012-20220731 (https://download.01.org/0day-ci/archive/20220802/202208020758.Ff3SOjvD-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 52cd00cabf479aa7eb6dbb063b7ba41ea57bce9e)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/336247ff1d2b402a18689fd891d79e99d8b444fc
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
git checkout 336247ff1d2b402a18689fd891d79e99d8b444fc
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash kernel/sched/
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> kernel/sched/fair.c:4462:11: error: no member named 'avg' in 'struct sched_entity'
if (!se->avg.last_update_time)
~~ ^
1 error generated.
vim +4462 kernel/sched/fair.c
4419
4420 /*
4421 * MIGRATION
4422 *
4423 * dequeue
4424 * update_curr()
4425 * update_min_vruntime()
4426 * vruntime -= min_vruntime
4427 *
4428 * enqueue
4429 * update_curr()
4430 * update_min_vruntime()
4431 * vruntime += min_vruntime
4432 *
4433 * this way the vruntime transition between RQs is done when both
4434 * min_vruntime are up-to-date.
4435 *
4436 * WAKEUP (remote)
4437 *
4438 * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
4439 * vruntime -= min_vruntime
4440 *
4441 * enqueue
4442 * update_curr()
4443 * update_min_vruntime()
4444 * vruntime += min_vruntime
4445 *
4446 * this way we don't have the most up-to-date min_vruntime on the originating
4447 * CPU and an up-to-date min_vruntime on the destination CPU.
4448 */
4449
4450 static void
4451 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
4452 {
4453 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
4454 bool curr = cfs_rq->curr == se;
4455 int action = UPDATE_TG;
4456
4457 /*
4458 * !last_update_time means we've passed through migrate_task_rq_fair()
4459 * or task_change_group_fair() indicating we migrated cfs_rq. IOW we're
4460 * enqueueing a task on a new CPU or moving task to a new cgroup.
4461 */
> 4462 if (!se->avg.last_update_time)
4463 action |= DO_ATTACH;
4464
4465 /*
4466 * If we're the current task, we must renormalise before calling
4467 * update_curr().
4468 */
4469 if (renorm && curr)
4470 se->vruntime += cfs_rq->min_vruntime;
4471
4472 update_curr(cfs_rq);
4473
4474 /*
4475 * Otherwise, renormalise after, such that we're placed at the current
4476 * moment in time, instead of some random moment in the past. Being
4477 * placed in the past could significantly boost this task to the
4478 * fairness detriment of existing tasks.
4479 */
4480 if (renorm && !curr)
4481 se->vruntime += cfs_rq->min_vruntime;
4482
4483 /*
4484 * When enqueuing a sched_entity, we must:
4485 * - Update loads to have both entity and cfs_rq synced with now.
4486 * - For group_entity, update its runnable_weight to reflect the new
4487 * h_nr_running of its group cfs_rq.
4488 * - For group_entity, update its weight to reflect the new share of
4489 * its group cfs_rq
4490 * - Add its new weight to cfs_rq->load.weight
4491 */
4492 update_load_avg(cfs_rq, se, action);
4493 se_update_runnable(se);
4494 update_cfs_group(se);
4495 account_entity_enqueue(cfs_rq, se);
4496
4497 if (flags & ENQUEUE_WAKEUP)
4498 place_entity(cfs_rq, se, 0);
4499
4500 check_schedstat_required();
4501 update_stats_enqueue_fair(cfs_rq, se, flags);
4502 check_spread(cfs_rq, se);
4503 if (!curr)
4504 __enqueue_entity(cfs_rq, se);
4505 se->on_rq = 1;
4506
4507 if (cfs_rq->nr_running == 1) {
4508 check_enqueue_throttle(cfs_rq);
4509 if (!throttled_hierarchy(cfs_rq))
4510 list_add_leaf_cfs_rq(cfs_rq);
4511 }
4512 }
4513
--
0-DAY CI Kernel Test Service
https://01.org/lkp
Hi Chengming,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on next-20220728]
[cannot apply to linus/master v5.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 8da3d9b8590bc178752d4b72938745e9a6c4c416
config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20220801/202208011647.2KU7IF9Y-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/336247ff1d2b402a18689fd891d79e99d8b444fc
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
git checkout 336247ff1d2b402a18689fd891d79e99d8b444fc
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
kernel/sched/fair.c:672:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes]
672 | int sched_update_scaling(void)
| ^~~~~~~~~~~~~~~~~~~~
kernel/sched/fair.c: In function 'enqueue_entity':
>> kernel/sched/fair.c:4462:16: error: 'struct sched_entity' has no member named 'avg'
4462 | if (!se->avg.last_update_time)
| ^~
vim +4462 kernel/sched/fair.c
4419
4420 /*
4421 * MIGRATION
4422 *
4423 * dequeue
4424 * update_curr()
4425 * update_min_vruntime()
4426 * vruntime -= min_vruntime
4427 *
4428 * enqueue
4429 * update_curr()
4430 * update_min_vruntime()
4431 * vruntime += min_vruntime
4432 *
4433 * this way the vruntime transition between RQs is done when both
4434 * min_vruntime are up-to-date.
4435 *
4436 * WAKEUP (remote)
4437 *
4438 * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
4439 * vruntime -= min_vruntime
4440 *
4441 * enqueue
4442 * update_curr()
4443 * update_min_vruntime()
4444 * vruntime += min_vruntime
4445 *
4446 * this way we don't have the most up-to-date min_vruntime on the originating
4447 * CPU and an up-to-date min_vruntime on the destination CPU.
4448 */
4449
4450 static void
4451 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
4452 {
4453 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
4454 bool curr = cfs_rq->curr == se;
4455 int action = UPDATE_TG;
4456
4457 /*
4458 * !last_update_time means we've passed through migrate_task_rq_fair()
4459 * or task_change_group_fair() indicating we migrated cfs_rq. IOW we're
4460 * enqueueing a task on a new CPU or moving task to a new cgroup.
4461 */
> 4462 if (!se->avg.last_update_time)
4463 action |= DO_ATTACH;
4464
4465 /*
4466 * If we're the current task, we must renormalise before calling
4467 * update_curr().
4468 */
4469 if (renorm && curr)
4470 se->vruntime += cfs_rq->min_vruntime;
4471
4472 update_curr(cfs_rq);
4473
4474 /*
4475 * Otherwise, renormalise after, such that we're placed at the current
4476 * moment in time, instead of some random moment in the past. Being
4477 * placed in the past could significantly boost this task to the
4478 * fairness detriment of existing tasks.
4479 */
4480 if (renorm && !curr)
4481 se->vruntime += cfs_rq->min_vruntime;
4482
4483 /*
4484 * When enqueuing a sched_entity, we must:
4485 * - Update loads to have both entity and cfs_rq synced with now.
4486 * - For group_entity, update its runnable_weight to reflect the new
4487 * h_nr_running of its group cfs_rq.
4488 * - For group_entity, update its weight to reflect the new share of
4489 * its group cfs_rq
4490 * - Add its new weight to cfs_rq->load.weight
4491 */
4492 update_load_avg(cfs_rq, se, action);
4493 se_update_runnable(se);
4494 update_cfs_group(se);
4495 account_entity_enqueue(cfs_rq, se);
4496
4497 if (flags & ENQUEUE_WAKEUP)
4498 place_entity(cfs_rq, se, 0);
4499
4500 check_schedstat_required();
4501 update_stats_enqueue_fair(cfs_rq, se, flags);
4502 check_spread(cfs_rq, se);
4503 if (!curr)
4504 __enqueue_entity(cfs_rq, se);
4505 se->on_rq = 1;
4506
4507 if (cfs_rq->nr_running == 1) {
4508 check_enqueue_throttle(cfs_rq);
4509 if (!throttled_hierarchy(cfs_rq))
4510 list_add_leaf_cfs_rq(cfs_rq);
4511 }
4512 }
4513
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On 2022/8/1 16:07, kernel test robot wrote:
> Hi Chengming,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on tip/sched/core]
> [also build test ERROR on next-20220728]
> [cannot apply to linus/master v5.19]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
> base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 8da3d9b8590bc178752d4b72938745e9a6c4c416
> config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20220801/202208011647.2KU7IF9Y-lkp@intel.com/config)
> compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
> reproduce (this is a W=1 build):
> # https://github.com/intel-lab-lkp/linux/commit/336247ff1d2b402a18689fd891d79e99d8b444fc
> git remote add linux-review https://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Chengming-Zhou/sched-fair-task-load-tracking-optimization-and-cleanup/20220801-122957
> git checkout 336247ff1d2b402a18689fd891d79e99d8b444fc
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
> kernel/sched/fair.c:672:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes]
> 672 | int sched_update_scaling(void)
> | ^~~~~~~~~~~~~~~~~~~~
> kernel/sched/fair.c: In function 'enqueue_entity':
>>> kernel/sched/fair.c:4462:16: error: 'struct sched_entity' has no member named 'avg'
> 4462 | if (!se->avg.last_update_time)
> | ^~
>
Thanks for the test report!
It seems because sched_entity has no member avg on !CONFIG_SMP,
I think we'd better drop this patch for now since it's just code
refactor, not real improvement.
>
> vim +4462 kernel/sched/fair.c
>
> 4419
> 4420 /*
> 4421 * MIGRATION
> 4422 *
> 4423 * dequeue
> 4424 * update_curr()
> 4425 * update_min_vruntime()
> 4426 * vruntime -= min_vruntime
> 4427 *
> 4428 * enqueue
> 4429 * update_curr()
> 4430 * update_min_vruntime()
> 4431 * vruntime += min_vruntime
> 4432 *
> 4433 * this way the vruntime transition between RQs is done when both
> 4434 * min_vruntime are up-to-date.
> 4435 *
> 4436 * WAKEUP (remote)
> 4437 *
> 4438 * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
> 4439 * vruntime -= min_vruntime
> 4440 *
> 4441 * enqueue
> 4442 * update_curr()
> 4443 * update_min_vruntime()
> 4444 * vruntime += min_vruntime
> 4445 *
> 4446 * this way we don't have the most up-to-date min_vruntime on the originating
> 4447 * CPU and an up-to-date min_vruntime on the destination CPU.
> 4448 */
> 4449
> 4450 static void
> 4451 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> 4452 {
> 4453 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
> 4454 bool curr = cfs_rq->curr == se;
> 4455 int action = UPDATE_TG;
> 4456
> 4457 /*
> 4458 * !last_update_time means we've passed through migrate_task_rq_fair()
> 4459 * or task_change_group_fair() indicating we migrated cfs_rq. IOW we're
> 4460 * enqueueing a task on a new CPU or moving task to a new cgroup.
> 4461 */
>> 4462 if (!se->avg.last_update_time)
> 4463 action |= DO_ATTACH;
> 4464
> 4465 /*
> 4466 * If we're the current task, we must renormalise before calling
> 4467 * update_curr().
> 4468 */
> 4469 if (renorm && curr)
> 4470 se->vruntime += cfs_rq->min_vruntime;
> 4471
> 4472 update_curr(cfs_rq);
> 4473
> 4474 /*
> 4475 * Otherwise, renormalise after, such that we're placed at the current
> 4476 * moment in time, instead of some random moment in the past. Being
> 4477 * placed in the past could significantly boost this task to the
> 4478 * fairness detriment of existing tasks.
> 4479 */
> 4480 if (renorm && !curr)
> 4481 se->vruntime += cfs_rq->min_vruntime;
> 4482
> 4483 /*
> 4484 * When enqueuing a sched_entity, we must:
> 4485 * - Update loads to have both entity and cfs_rq synced with now.
> 4486 * - For group_entity, update its runnable_weight to reflect the new
> 4487 * h_nr_running of its group cfs_rq.
> 4488 * - For group_entity, update its weight to reflect the new share of
> 4489 * its group cfs_rq
> 4490 * - Add its new weight to cfs_rq->load.weight
> 4491 */
> 4492 update_load_avg(cfs_rq, se, action);
> 4493 se_update_runnable(se);
> 4494 update_cfs_group(se);
> 4495 account_entity_enqueue(cfs_rq, se);
> 4496
> 4497 if (flags & ENQUEUE_WAKEUP)
> 4498 place_entity(cfs_rq, se, 0);
> 4499
> 4500 check_schedstat_required();
> 4501 update_stats_enqueue_fair(cfs_rq, se, flags);
> 4502 check_spread(cfs_rq, se);
> 4503 if (!curr)
> 4504 __enqueue_entity(cfs_rq, se);
> 4505 se->on_rq = 1;
> 4506
> 4507 if (cfs_rq->nr_running == 1) {
> 4508 check_enqueue_throttle(cfs_rq);
> 4509 if (!throttled_hierarchy(cfs_rq))
> 4510 list_add_leaf_cfs_rq(cfs_rq);
> 4511 }
> 4512 }
> 4513
>
© 2016 - 2026 Red Hat, Inc.