[PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case

Chengming Zhou posted 9 patches 3 years, 8 months ago
There is a newer version of this series
[PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case
Posted by Chengming Zhou 3 years, 8 months ago
commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
fixed two load tracking problems for new task, including detach on
unattached new task problem.

There still left another detach on unattached task problem for the task
which has been woken up by try_to_wake_up() and waiting for actually
being woken up by sched_ttwu_pending().

try_to_wake_up(p)
  cpu = select_task_rq(p)
  if (task_cpu(p) != cpu)
    set_task_cpu(p, cpu)
      migrate_task_rq_fair()
        remove_entity_load_avg()       --> unattached
        se->avg.last_update_time = 0;
      __set_task_cpu()
  ttwu_queue(p, cpu)
    ttwu_queue_wakelist()
      __ttwu_queue_wakelist()

task_change_group_fair()
  detach_task_cfs_rq()
    detach_entity_cfs_rq()
      detach_entity_load_avg()   --> detach on unattached task
  set_task_rq()
  attach_task_cfs_rq()
    attach_entity_cfs_rq()
      attach_entity_load_avg()

The reason of this problem is similar, we should check in detach_entity_cfs_rq()
that se->avg.last_update_time != 0, before do detach_entity_load_avg().

This patch move detach/attach_entity_cfs_rq() functions up to be together
with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
 1 file changed, 68 insertions(+), 64 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f52e7dc7f22d..4bc76d95a99d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
 void post_init_entity_util_avg(struct task_struct *p)
 {
 }
-static void update_tg_load_avg(struct cfs_rq *cfs_rq)
-{
-}
 #endif /* CONFIG_SMP */
 
 /*
@@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
 	load->inv_weight = sched_prio_to_wmult[prio];
 }
 
+static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
 	raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Propagate the changes of the sched_entity across the tg tree to make it
+ * visible to the root
+ */
+static void propagate_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (cfs_rq_throttled(cfs_rq))
+		return;
+
+	if (!throttled_hierarchy(cfs_rq))
+		list_add_leaf_cfs_rq(cfs_rq);
+
+	/* Start to propagate at parent */
+	se = se->parent;
+
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		update_load_avg(cfs_rq, se, UPDATE_TG);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		if (!throttled_hierarchy(cfs_rq))
+			list_add_leaf_cfs_rq(cfs_rq);
+	}
+}
+#else
+static void propagate_entity_cfs_rq(struct sched_entity *se) { }
+#endif
+
+static void detach_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	/*
+	 * In case the task sched_avg hasn't been attached:
+	 * - A forked task which hasn't been woken up by wake_up_new_task().
+	 * - A task which has been woken up by try_to_wake_up() but is
+	 *   waiting for actually being woken up by sched_ttwu_pending().
+	 */
+	if (!se->avg.last_update_time)
+		return;
+
+	/* Catch up with the cfs_rq and remove our load when we leave */
+	update_load_avg(cfs_rq, se, 0);
+	detach_entity_load_avg(cfs_rq, se);
+	update_tg_load_avg(cfs_rq);
+	propagate_entity_cfs_rq(se);
+}
+
+static void attach_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	/* Synchronize entity with its cfs_rq */
+	update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
+	attach_entity_load_avg(cfs_rq, se);
+	update_tg_load_avg(cfs_rq);
+	propagate_entity_cfs_rq(se);
+}
+
 static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
 {
 	return cfs_rq->avg.runnable_avg;
@@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 }
 
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
-
-static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
-static inline void
-detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
+static inline void attach_entity_cfs_rq(struct sched_entity *se) {}
 
 static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
 {
@@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
 	return false;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * Propagate the changes of the sched_entity across the tg tree to make it
- * visible to the root
- */
-static void propagate_entity_cfs_rq(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-	if (cfs_rq_throttled(cfs_rq))
-		return;
-
-	if (!throttled_hierarchy(cfs_rq))
-		list_add_leaf_cfs_rq(cfs_rq);
-
-	/* Start to propagate at parent */
-	se = se->parent;
-
-	for_each_sched_entity(se) {
-		cfs_rq = cfs_rq_of(se);
-
-		update_load_avg(cfs_rq, se, UPDATE_TG);
-
-		if (cfs_rq_throttled(cfs_rq))
-			break;
-
-		if (!throttled_hierarchy(cfs_rq))
-			list_add_leaf_cfs_rq(cfs_rq);
-	}
-}
-#else
-static void propagate_entity_cfs_rq(struct sched_entity *se) { }
-#endif
-
-static void detach_entity_cfs_rq(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-	/* Catch up with the cfs_rq and remove our load when we leave */
-	update_load_avg(cfs_rq, se, 0);
-	detach_entity_load_avg(cfs_rq, se);
-	update_tg_load_avg(cfs_rq);
-	propagate_entity_cfs_rq(se);
-}
-
-static void attach_entity_cfs_rq(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-	/* Synchronize entity with its cfs_rq */
-	update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
-	attach_entity_load_avg(cfs_rq, se);
-	update_tg_load_avg(cfs_rq);
-	propagate_entity_cfs_rq(se);
-}
-
 static void detach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
-- 
2.36.1
Re: [PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case
Posted by Vincent Guittot 3 years, 7 months ago
On Mon, 8 Aug 2022 at 14:58, Chengming Zhou <zhouchengming@bytedance.com> wrote:
>
> commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
> fixed two load tracking problems for new task, including detach on
> unattached new task problem.
>
> There still left another detach on unattached task problem for the task
> which has been woken up by try_to_wake_up() and waiting for actually
> being woken up by sched_ttwu_pending().
>
> try_to_wake_up(p)
>   cpu = select_task_rq(p)
>   if (task_cpu(p) != cpu)
>     set_task_cpu(p, cpu)
>       migrate_task_rq_fair()
>         remove_entity_load_avg()       --> unattached
>         se->avg.last_update_time = 0;
>       __set_task_cpu()
>   ttwu_queue(p, cpu)
>     ttwu_queue_wakelist()
>       __ttwu_queue_wakelist()
>
> task_change_group_fair()
>   detach_task_cfs_rq()
>     detach_entity_cfs_rq()
>       detach_entity_load_avg()   --> detach on unattached task
>   set_task_rq()
>   attach_task_cfs_rq()
>     attach_entity_cfs_rq()
>       attach_entity_load_avg()
>
> The reason of this problem is similar, we should check in detach_entity_cfs_rq()
> that se->avg.last_update_time != 0, before do detach_entity_load_avg().
>
> This patch move detach/attach_entity_cfs_rq() functions up to be together
> with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
>  1 file changed, 68 insertions(+), 64 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f52e7dc7f22d..4bc76d95a99d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
>  void post_init_entity_util_avg(struct task_struct *p)
>  {
>  }
> -static void update_tg_load_avg(struct cfs_rq *cfs_rq)
> -{
> -}
>  #endif /* CONFIG_SMP */
>
>  /*
> @@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
>         load->inv_weight = sched_prio_to_wmult[prio];
>  }
>
> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
>  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> @@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
>         raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
>  }
>
> +#ifdef CONFIG_FAIR_GROUP_SCHED
> +/*
> + * Propagate the changes of the sched_entity across the tg tree to make it
> + * visible to the root
> + */
> +static void propagate_entity_cfs_rq(struct sched_entity *se)
> +{
> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> +
> +       if (cfs_rq_throttled(cfs_rq))
> +               return;
> +
> +       if (!throttled_hierarchy(cfs_rq))
> +               list_add_leaf_cfs_rq(cfs_rq);
> +
> +       /* Start to propagate at parent */
> +       se = se->parent;
> +
> +       for_each_sched_entity(se) {
> +               cfs_rq = cfs_rq_of(se);
> +
> +               update_load_avg(cfs_rq, se, UPDATE_TG);
> +
> +               if (cfs_rq_throttled(cfs_rq))
> +                       break;
> +
> +               if (!throttled_hierarchy(cfs_rq))
> +                       list_add_leaf_cfs_rq(cfs_rq);
> +       }
> +}
> +#else
> +static void propagate_entity_cfs_rq(struct sched_entity *se) { }
> +#endif
> +
> +static void detach_entity_cfs_rq(struct sched_entity *se)
> +{
> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> +
> +       /*
> +        * In case the task sched_avg hasn't been attached:
> +        * - A forked task which hasn't been woken up by wake_up_new_task().
> +        * - A task which has been woken up by try_to_wake_up() but is
> +        *   waiting for actually being woken up by sched_ttwu_pending().
> +        */
> +       if (!se->avg.last_update_time)
> +               return;

The 2 lines above and the associated comment are the only relevant
part of the patch, aren't they ?
Is everything else just code moving from one place to another one
without change ?

> +
> +       /* Catch up with the cfs_rq and remove our load when we leave */
> +       update_load_avg(cfs_rq, se, 0);
> +       detach_entity_load_avg(cfs_rq, se);
> +       update_tg_load_avg(cfs_rq);
> +       propagate_entity_cfs_rq(se);
> +}
> +
> +static void attach_entity_cfs_rq(struct sched_entity *se)
> +{
> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> +
> +       /* Synchronize entity with its cfs_rq */
> +       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> +       attach_entity_load_avg(cfs_rq, se);
> +       update_tg_load_avg(cfs_rq);
> +       propagate_entity_cfs_rq(se);
> +}
> +
>  static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
>  {
>         return cfs_rq->avg.runnable_avg;
> @@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>  }
>
>  static inline void remove_entity_load_avg(struct sched_entity *se) {}
> -
> -static inline void
> -attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> -static inline void
> -detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> +static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
> +static inline void attach_entity_cfs_rq(struct sched_entity *se) {}
>
>  static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
>  {
> @@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
>         return false;
>  }
>
> -#ifdef CONFIG_FAIR_GROUP_SCHED
> -/*
> - * Propagate the changes of the sched_entity across the tg tree to make it
> - * visible to the root
> - */
> -static void propagate_entity_cfs_rq(struct sched_entity *se)
> -{
> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> -
> -       if (cfs_rq_throttled(cfs_rq))
> -               return;
> -
> -       if (!throttled_hierarchy(cfs_rq))
> -               list_add_leaf_cfs_rq(cfs_rq);
> -
> -       /* Start to propagate at parent */
> -       se = se->parent;
> -
> -       for_each_sched_entity(se) {
> -               cfs_rq = cfs_rq_of(se);
> -
> -               update_load_avg(cfs_rq, se, UPDATE_TG);
> -
> -               if (cfs_rq_throttled(cfs_rq))
> -                       break;
> -
> -               if (!throttled_hierarchy(cfs_rq))
> -                       list_add_leaf_cfs_rq(cfs_rq);
> -       }
> -}
> -#else
> -static void propagate_entity_cfs_rq(struct sched_entity *se) { }
> -#endif
> -
> -static void detach_entity_cfs_rq(struct sched_entity *se)
> -{
> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> -
> -       /* Catch up with the cfs_rq and remove our load when we leave */
> -       update_load_avg(cfs_rq, se, 0);
> -       detach_entity_load_avg(cfs_rq, se);
> -       update_tg_load_avg(cfs_rq);
> -       propagate_entity_cfs_rq(se);
> -}
> -
> -static void attach_entity_cfs_rq(struct sched_entity *se)
> -{
> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> -
> -       /* Synchronize entity with its cfs_rq */
> -       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> -       attach_entity_load_avg(cfs_rq, se);
> -       update_tg_load_avg(cfs_rq);
> -       propagate_entity_cfs_rq(se);
> -}
> -
>  static void detach_task_cfs_rq(struct task_struct *p)
>  {
>         struct sched_entity *se = &p->se;
> --
> 2.36.1
>
Re: [PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case
Posted by Chengming Zhou 3 years, 7 months ago
On 2022/8/17 23:01, Vincent Guittot wrote:
> On Mon, 8 Aug 2022 at 14:58, Chengming Zhou <zhouchengming@bytedance.com> wrote:
>>
>> commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
>> fixed two load tracking problems for new task, including detach on
>> unattached new task problem.
>>
>> There still left another detach on unattached task problem for the task
>> which has been woken up by try_to_wake_up() and waiting for actually
>> being woken up by sched_ttwu_pending().
>>
>> try_to_wake_up(p)
>>   cpu = select_task_rq(p)
>>   if (task_cpu(p) != cpu)
>>     set_task_cpu(p, cpu)
>>       migrate_task_rq_fair()
>>         remove_entity_load_avg()       --> unattached
>>         se->avg.last_update_time = 0;
>>       __set_task_cpu()
>>   ttwu_queue(p, cpu)
>>     ttwu_queue_wakelist()
>>       __ttwu_queue_wakelist()
>>
>> task_change_group_fair()
>>   detach_task_cfs_rq()
>>     detach_entity_cfs_rq()
>>       detach_entity_load_avg()   --> detach on unattached task
>>   set_task_rq()
>>   attach_task_cfs_rq()
>>     attach_entity_cfs_rq()
>>       attach_entity_load_avg()
>>
>> The reason of this problem is similar, we should check in detach_entity_cfs_rq()
>> that se->avg.last_update_time != 0, before do detach_entity_load_avg().
>>
>> This patch move detach/attach_entity_cfs_rq() functions up to be together
>> with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
>>  kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
>>  1 file changed, 68 insertions(+), 64 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index f52e7dc7f22d..4bc76d95a99d 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
>>  void post_init_entity_util_avg(struct task_struct *p)
>>  {
>>  }
>> -static void update_tg_load_avg(struct cfs_rq *cfs_rq)
>> -{
>> -}
>>  #endif /* CONFIG_SMP */
>>
>>  /*
>> @@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
>>         load->inv_weight = sched_prio_to_wmult[prio];
>>  }
>>
>> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
>>  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>>
>>  #ifdef CONFIG_FAIR_GROUP_SCHED
>> @@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
>>         raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
>>  }
>>
>> +#ifdef CONFIG_FAIR_GROUP_SCHED
>> +/*
>> + * Propagate the changes of the sched_entity across the tg tree to make it
>> + * visible to the root
>> + */
>> +static void propagate_entity_cfs_rq(struct sched_entity *se)
>> +{
>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> +
>> +       if (cfs_rq_throttled(cfs_rq))
>> +               return;
>> +
>> +       if (!throttled_hierarchy(cfs_rq))
>> +               list_add_leaf_cfs_rq(cfs_rq);
>> +
>> +       /* Start to propagate at parent */
>> +       se = se->parent;
>> +
>> +       for_each_sched_entity(se) {
>> +               cfs_rq = cfs_rq_of(se);
>> +
>> +               update_load_avg(cfs_rq, se, UPDATE_TG);
>> +
>> +               if (cfs_rq_throttled(cfs_rq))
>> +                       break;
>> +
>> +               if (!throttled_hierarchy(cfs_rq))
>> +                       list_add_leaf_cfs_rq(cfs_rq);
>> +       }
>> +}
>> +#else
>> +static void propagate_entity_cfs_rq(struct sched_entity *se) { }
>> +#endif
>> +
>> +static void detach_entity_cfs_rq(struct sched_entity *se)
>> +{
>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> +
>> +       /*
>> +        * In case the task sched_avg hasn't been attached:
>> +        * - A forked task which hasn't been woken up by wake_up_new_task().
>> +        * - A task which has been woken up by try_to_wake_up() but is
>> +        *   waiting for actually being woken up by sched_ttwu_pending().
>> +        */
>> +       if (!se->avg.last_update_time)
>> +               return;
> 
> The 2 lines above and the associated comment are the only relevant
> part of the patch, aren't they ?
> Is everything else just code moving from one place to another one
> without change ?

Yes, everything else is just code movement.

Thanks!

> 
>> +
>> +       /* Catch up with the cfs_rq and remove our load when we leave */
>> +       update_load_avg(cfs_rq, se, 0);
>> +       detach_entity_load_avg(cfs_rq, se);
>> +       update_tg_load_avg(cfs_rq);
>> +       propagate_entity_cfs_rq(se);
>> +}
>> +
>> +static void attach_entity_cfs_rq(struct sched_entity *se)
>> +{
>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> +
>> +       /* Synchronize entity with its cfs_rq */
>> +       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
>> +       attach_entity_load_avg(cfs_rq, se);
>> +       update_tg_load_avg(cfs_rq);
>> +       propagate_entity_cfs_rq(se);
>> +}
>> +
>>  static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
>>  {
>>         return cfs_rq->avg.runnable_avg;
>> @@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>>  }
>>
>>  static inline void remove_entity_load_avg(struct sched_entity *se) {}
>> -
>> -static inline void
>> -attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
>> -static inline void
>> -detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
>> +static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
>> +static inline void attach_entity_cfs_rq(struct sched_entity *se) {}
>>
>>  static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
>>  {
>> @@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
>>         return false;
>>  }
>>
>> -#ifdef CONFIG_FAIR_GROUP_SCHED
>> -/*
>> - * Propagate the changes of the sched_entity across the tg tree to make it
>> - * visible to the root
>> - */
>> -static void propagate_entity_cfs_rq(struct sched_entity *se)
>> -{
>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> -
>> -       if (cfs_rq_throttled(cfs_rq))
>> -               return;
>> -
>> -       if (!throttled_hierarchy(cfs_rq))
>> -               list_add_leaf_cfs_rq(cfs_rq);
>> -
>> -       /* Start to propagate at parent */
>> -       se = se->parent;
>> -
>> -       for_each_sched_entity(se) {
>> -               cfs_rq = cfs_rq_of(se);
>> -
>> -               update_load_avg(cfs_rq, se, UPDATE_TG);
>> -
>> -               if (cfs_rq_throttled(cfs_rq))
>> -                       break;
>> -
>> -               if (!throttled_hierarchy(cfs_rq))
>> -                       list_add_leaf_cfs_rq(cfs_rq);
>> -       }
>> -}
>> -#else
>> -static void propagate_entity_cfs_rq(struct sched_entity *se) { }
>> -#endif
>> -
>> -static void detach_entity_cfs_rq(struct sched_entity *se)
>> -{
>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> -
>> -       /* Catch up with the cfs_rq and remove our load when we leave */
>> -       update_load_avg(cfs_rq, se, 0);
>> -       detach_entity_load_avg(cfs_rq, se);
>> -       update_tg_load_avg(cfs_rq);
>> -       propagate_entity_cfs_rq(se);
>> -}
>> -
>> -static void attach_entity_cfs_rq(struct sched_entity *se)
>> -{
>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>> -
>> -       /* Synchronize entity with its cfs_rq */
>> -       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
>> -       attach_entity_load_avg(cfs_rq, se);
>> -       update_tg_load_avg(cfs_rq);
>> -       propagate_entity_cfs_rq(se);
>> -}
>> -
>>  static void detach_task_cfs_rq(struct task_struct *p)
>>  {
>>         struct sched_entity *se = &p->se;
>> --
>> 2.36.1
>>
Re: [PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case
Posted by Vincent Guittot 3 years, 7 months ago
On Wed, 17 Aug 2022 at 17:04, Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> On 2022/8/17 23:01, Vincent Guittot wrote:
> > On Mon, 8 Aug 2022 at 14:58, Chengming Zhou <zhouchengming@bytedance.com> wrote:
> >>
> >> commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
> >> fixed two load tracking problems for new task, including detach on
> >> unattached new task problem.
> >>
> >> There still left another detach on unattached task problem for the task
> >> which has been woken up by try_to_wake_up() and waiting for actually
> >> being woken up by sched_ttwu_pending().
> >>
> >> try_to_wake_up(p)
> >>   cpu = select_task_rq(p)
> >>   if (task_cpu(p) != cpu)
> >>     set_task_cpu(p, cpu)
> >>       migrate_task_rq_fair()
> >>         remove_entity_load_avg()       --> unattached
> >>         se->avg.last_update_time = 0;
> >>       __set_task_cpu()
> >>   ttwu_queue(p, cpu)
> >>     ttwu_queue_wakelist()
> >>       __ttwu_queue_wakelist()
> >>
> >> task_change_group_fair()
> >>   detach_task_cfs_rq()
> >>     detach_entity_cfs_rq()
> >>       detach_entity_load_avg()   --> detach on unattached task
> >>   set_task_rq()
> >>   attach_task_cfs_rq()
> >>     attach_entity_cfs_rq()
> >>       attach_entity_load_avg()
> >>
> >> The reason of this problem is similar, we should check in detach_entity_cfs_rq()
> >> that se->avg.last_update_time != 0, before do detach_entity_load_avg().
> >>
> >> This patch move detach/attach_entity_cfs_rq() functions up to be together
> >> with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.
> >>
> >> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> >> ---
> >>  kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
> >>  1 file changed, 68 insertions(+), 64 deletions(-)
> >>
> >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> >> index f52e7dc7f22d..4bc76d95a99d 100644
> >> --- a/kernel/sched/fair.c
> >> +++ b/kernel/sched/fair.c
> >> @@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
> >>  void post_init_entity_util_avg(struct task_struct *p)
> >>  {
> >>  }
> >> -static void update_tg_load_avg(struct cfs_rq *cfs_rq)
> >> -{
> >> -}
> >>  #endif /* CONFIG_SMP */
> >>
> >>  /*
> >> @@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
> >>         load->inv_weight = sched_prio_to_wmult[prio];
> >>  }
> >>
> >> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
> >>  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> >>
> >>  #ifdef CONFIG_FAIR_GROUP_SCHED
> >> @@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
> >>         raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
> >>  }
> >>
> >> +#ifdef CONFIG_FAIR_GROUP_SCHED
> >> +/*
> >> + * Propagate the changes of the sched_entity across the tg tree to make it
> >> + * visible to the root
> >> + */
> >> +static void propagate_entity_cfs_rq(struct sched_entity *se)
> >> +{
> >> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> +
> >> +       if (cfs_rq_throttled(cfs_rq))
> >> +               return;
> >> +
> >> +       if (!throttled_hierarchy(cfs_rq))
> >> +               list_add_leaf_cfs_rq(cfs_rq);
> >> +
> >> +       /* Start to propagate at parent */
> >> +       se = se->parent;
> >> +
> >> +       for_each_sched_entity(se) {
> >> +               cfs_rq = cfs_rq_of(se);
> >> +
> >> +               update_load_avg(cfs_rq, se, UPDATE_TG);
> >> +
> >> +               if (cfs_rq_throttled(cfs_rq))
> >> +                       break;
> >> +
> >> +               if (!throttled_hierarchy(cfs_rq))
> >> +                       list_add_leaf_cfs_rq(cfs_rq);
> >> +       }
> >> +}
> >> +#else
> >> +static void propagate_entity_cfs_rq(struct sched_entity *se) { }
> >> +#endif
> >> +
> >> +static void detach_entity_cfs_rq(struct sched_entity *se)
> >> +{
> >> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> +
> >> +       /*
> >> +        * In case the task sched_avg hasn't been attached:
> >> +        * - A forked task which hasn't been woken up by wake_up_new_task().
> >> +        * - A task which has been woken up by try_to_wake_up() but is
> >> +        *   waiting for actually being woken up by sched_ttwu_pending().
> >> +        */
> >> +       if (!se->avg.last_update_time)
> >> +               return;
> >
> > The 2 lines above and the associated comment are the only relevant
> > part of the patch, aren't they ?
> > Is everything else just code moving from one place to another one
> > without change ?
>
> Yes, everything else is just code movement.

Could you remove such code movement ? It doesn't add any value to the
patch, does it ? But It makes the patch quite difficult to review and
I wasted a lot of time looking for what really changed in the code

Thanks

>
> Thanks!
>
> >
> >> +
> >> +       /* Catch up with the cfs_rq and remove our load when we leave */
> >> +       update_load_avg(cfs_rq, se, 0);
> >> +       detach_entity_load_avg(cfs_rq, se);
> >> +       update_tg_load_avg(cfs_rq);
> >> +       propagate_entity_cfs_rq(se);
> >> +}
> >> +
> >> +static void attach_entity_cfs_rq(struct sched_entity *se)
> >> +{
> >> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> +
> >> +       /* Synchronize entity with its cfs_rq */
> >> +       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> >> +       attach_entity_load_avg(cfs_rq, se);
> >> +       update_tg_load_avg(cfs_rq);
> >> +       propagate_entity_cfs_rq(se);
> >> +}
> >> +
> >>  static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
> >>  {
> >>         return cfs_rq->avg.runnable_avg;
> >> @@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
> >>  }
> >>
> >>  static inline void remove_entity_load_avg(struct sched_entity *se) {}
> >> -
> >> -static inline void
> >> -attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> >> -static inline void
> >> -detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> >> +static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
> >> +static inline void attach_entity_cfs_rq(struct sched_entity *se) {}
> >>
> >>  static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
> >>  {
> >> @@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
> >>         return false;
> >>  }
> >>
> >> -#ifdef CONFIG_FAIR_GROUP_SCHED
> >> -/*
> >> - * Propagate the changes of the sched_entity across the tg tree to make it
> >> - * visible to the root
> >> - */
> >> -static void propagate_entity_cfs_rq(struct sched_entity *se)
> >> -{
> >> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> -
> >> -       if (cfs_rq_throttled(cfs_rq))
> >> -               return;
> >> -
> >> -       if (!throttled_hierarchy(cfs_rq))
> >> -               list_add_leaf_cfs_rq(cfs_rq);
> >> -
> >> -       /* Start to propagate at parent */
> >> -       se = se->parent;
> >> -
> >> -       for_each_sched_entity(se) {
> >> -               cfs_rq = cfs_rq_of(se);
> >> -
> >> -               update_load_avg(cfs_rq, se, UPDATE_TG);
> >> -
> >> -               if (cfs_rq_throttled(cfs_rq))
> >> -                       break;
> >> -
> >> -               if (!throttled_hierarchy(cfs_rq))
> >> -                       list_add_leaf_cfs_rq(cfs_rq);
> >> -       }
> >> -}
> >> -#else
> >> -static void propagate_entity_cfs_rq(struct sched_entity *se) { }
> >> -#endif
> >> -
> >> -static void detach_entity_cfs_rq(struct sched_entity *se)
> >> -{
> >> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> -
> >> -       /* Catch up with the cfs_rq and remove our load when we leave */
> >> -       update_load_avg(cfs_rq, se, 0);
> >> -       detach_entity_load_avg(cfs_rq, se);
> >> -       update_tg_load_avg(cfs_rq);
> >> -       propagate_entity_cfs_rq(se);
> >> -}
> >> -
> >> -static void attach_entity_cfs_rq(struct sched_entity *se)
> >> -{
> >> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >> -
> >> -       /* Synchronize entity with its cfs_rq */
> >> -       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
> >> -       attach_entity_load_avg(cfs_rq, se);
> >> -       update_tg_load_avg(cfs_rq);
> >> -       propagate_entity_cfs_rq(se);
> >> -}
> >> -
> >>  static void detach_task_cfs_rq(struct task_struct *p)
> >>  {
> >>         struct sched_entity *se = &p->se;
> >> --
> >> 2.36.1
> >>
Re: [PATCH v4 6/9] sched/fair: fix another detach on unattached task corner case
Posted by Chengming Zhou 3 years, 7 months ago
On 2022/8/17 23:08, Vincent Guittot wrote:
> On Wed, 17 Aug 2022 at 17:04, Chengming Zhou
> <zhouchengming@bytedance.com> wrote:
>>
>> On 2022/8/17 23:01, Vincent Guittot wrote:
>>> On Mon, 8 Aug 2022 at 14:58, Chengming Zhou <zhouchengming@bytedance.com> wrote:
>>>>
>>>> commit 7dc603c9028e ("sched/fair: Fix PELT integrity for new tasks")
>>>> fixed two load tracking problems for new task, including detach on
>>>> unattached new task problem.
>>>>
>>>> There still left another detach on unattached task problem for the task
>>>> which has been woken up by try_to_wake_up() and waiting for actually
>>>> being woken up by sched_ttwu_pending().
>>>>
>>>> try_to_wake_up(p)
>>>>   cpu = select_task_rq(p)
>>>>   if (task_cpu(p) != cpu)
>>>>     set_task_cpu(p, cpu)
>>>>       migrate_task_rq_fair()
>>>>         remove_entity_load_avg()       --> unattached
>>>>         se->avg.last_update_time = 0;
>>>>       __set_task_cpu()
>>>>   ttwu_queue(p, cpu)
>>>>     ttwu_queue_wakelist()
>>>>       __ttwu_queue_wakelist()
>>>>
>>>> task_change_group_fair()
>>>>   detach_task_cfs_rq()
>>>>     detach_entity_cfs_rq()
>>>>       detach_entity_load_avg()   --> detach on unattached task
>>>>   set_task_rq()
>>>>   attach_task_cfs_rq()
>>>>     attach_entity_cfs_rq()
>>>>       attach_entity_load_avg()
>>>>
>>>> The reason of this problem is similar, we should check in detach_entity_cfs_rq()
>>>> that se->avg.last_update_time != 0, before do detach_entity_load_avg().
>>>>
>>>> This patch move detach/attach_entity_cfs_rq() functions up to be together
>>>> with other load tracking functions to avoid to use another #ifdef CONFIG_SMP.
>>>>
>>>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>>>> ---
>>>>  kernel/sched/fair.c | 132 +++++++++++++++++++++++---------------------
>>>>  1 file changed, 68 insertions(+), 64 deletions(-)
>>>>
>>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>>> index f52e7dc7f22d..4bc76d95a99d 100644
>>>> --- a/kernel/sched/fair.c
>>>> +++ b/kernel/sched/fair.c
>>>> @@ -874,9 +874,6 @@ void init_entity_runnable_average(struct sched_entity *se)
>>>>  void post_init_entity_util_avg(struct task_struct *p)
>>>>  {
>>>>  }
>>>> -static void update_tg_load_avg(struct cfs_rq *cfs_rq)
>>>> -{
>>>> -}
>>>>  #endif /* CONFIG_SMP */
>>>>
>>>>  /*
>>>> @@ -3176,6 +3173,7 @@ void reweight_task(struct task_struct *p, int prio)
>>>>         load->inv_weight = sched_prio_to_wmult[prio];
>>>>  }
>>>>
>>>> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
>>>>  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>>>>
>>>>  #ifdef CONFIG_FAIR_GROUP_SCHED
>>>> @@ -4086,6 +4084,71 @@ static void remove_entity_load_avg(struct sched_entity *se)
>>>>         raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
>>>>  }
>>>>
>>>> +#ifdef CONFIG_FAIR_GROUP_SCHED
>>>> +/*
>>>> + * Propagate the changes of the sched_entity across the tg tree to make it
>>>> + * visible to the root
>>>> + */
>>>> +static void propagate_entity_cfs_rq(struct sched_entity *se)
>>>> +{
>>>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> +
>>>> +       if (cfs_rq_throttled(cfs_rq))
>>>> +               return;
>>>> +
>>>> +       if (!throttled_hierarchy(cfs_rq))
>>>> +               list_add_leaf_cfs_rq(cfs_rq);
>>>> +
>>>> +       /* Start to propagate at parent */
>>>> +       se = se->parent;
>>>> +
>>>> +       for_each_sched_entity(se) {
>>>> +               cfs_rq = cfs_rq_of(se);
>>>> +
>>>> +               update_load_avg(cfs_rq, se, UPDATE_TG);
>>>> +
>>>> +               if (cfs_rq_throttled(cfs_rq))
>>>> +                       break;
>>>> +
>>>> +               if (!throttled_hierarchy(cfs_rq))
>>>> +                       list_add_leaf_cfs_rq(cfs_rq);
>>>> +       }
>>>> +}
>>>> +#else
>>>> +static void propagate_entity_cfs_rq(struct sched_entity *se) { }
>>>> +#endif
>>>> +
>>>> +static void detach_entity_cfs_rq(struct sched_entity *se)
>>>> +{
>>>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> +
>>>> +       /*
>>>> +        * In case the task sched_avg hasn't been attached:
>>>> +        * - A forked task which hasn't been woken up by wake_up_new_task().
>>>> +        * - A task which has been woken up by try_to_wake_up() but is
>>>> +        *   waiting for actually being woken up by sched_ttwu_pending().
>>>> +        */
>>>> +       if (!se->avg.last_update_time)
>>>> +               return;
>>>
>>> The 2 lines above and the associated comment are the only relevant
>>> part of the patch, aren't they ?
>>> Is everything else just code moving from one place to another one
>>> without change ?
>>
>> Yes, everything else is just code movement.
> 
> Could you remove such code movement ? It doesn't add any value to the
> patch, does it ? But It makes the patch quite difficult to review and
> I wasted a lot of time looking for what really changed in the code
> 

Sorry about that. No problem, I will remove it.

Thanks!


> Thanks
> 
>>
>> Thanks!
>>
>>>
>>>> +
>>>> +       /* Catch up with the cfs_rq and remove our load when we leave */
>>>> +       update_load_avg(cfs_rq, se, 0);
>>>> +       detach_entity_load_avg(cfs_rq, se);
>>>> +       update_tg_load_avg(cfs_rq);
>>>> +       propagate_entity_cfs_rq(se);
>>>> +}
>>>> +
>>>> +static void attach_entity_cfs_rq(struct sched_entity *se)
>>>> +{
>>>> +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> +
>>>> +       /* Synchronize entity with its cfs_rq */
>>>> +       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
>>>> +       attach_entity_load_avg(cfs_rq, se);
>>>> +       update_tg_load_avg(cfs_rq);
>>>> +       propagate_entity_cfs_rq(se);
>>>> +}
>>>> +
>>>>  static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
>>>>  {
>>>>         return cfs_rq->avg.runnable_avg;
>>>> @@ -4308,11 +4371,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>>>>  }
>>>>
>>>>  static inline void remove_entity_load_avg(struct sched_entity *se) {}
>>>> -
>>>> -static inline void
>>>> -attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
>>>> -static inline void
>>>> -detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
>>>> +static inline void detach_entity_cfs_rq(struct sched_entity *se) {}
>>>> +static inline void attach_entity_cfs_rq(struct sched_entity *se) {}
>>>>
>>>>  static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
>>>>  {
>>>> @@ -11519,62 +11579,6 @@ static inline bool vruntime_normalized(struct task_struct *p)
>>>>         return false;
>>>>  }
>>>>
>>>> -#ifdef CONFIG_FAIR_GROUP_SCHED
>>>> -/*
>>>> - * Propagate the changes of the sched_entity across the tg tree to make it
>>>> - * visible to the root
>>>> - */
>>>> -static void propagate_entity_cfs_rq(struct sched_entity *se)
>>>> -{
>>>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> -
>>>> -       if (cfs_rq_throttled(cfs_rq))
>>>> -               return;
>>>> -
>>>> -       if (!throttled_hierarchy(cfs_rq))
>>>> -               list_add_leaf_cfs_rq(cfs_rq);
>>>> -
>>>> -       /* Start to propagate at parent */
>>>> -       se = se->parent;
>>>> -
>>>> -       for_each_sched_entity(se) {
>>>> -               cfs_rq = cfs_rq_of(se);
>>>> -
>>>> -               update_load_avg(cfs_rq, se, UPDATE_TG);
>>>> -
>>>> -               if (cfs_rq_throttled(cfs_rq))
>>>> -                       break;
>>>> -
>>>> -               if (!throttled_hierarchy(cfs_rq))
>>>> -                       list_add_leaf_cfs_rq(cfs_rq);
>>>> -       }
>>>> -}
>>>> -#else
>>>> -static void propagate_entity_cfs_rq(struct sched_entity *se) { }
>>>> -#endif
>>>> -
>>>> -static void detach_entity_cfs_rq(struct sched_entity *se)
>>>> -{
>>>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> -
>>>> -       /* Catch up with the cfs_rq and remove our load when we leave */
>>>> -       update_load_avg(cfs_rq, se, 0);
>>>> -       detach_entity_load_avg(cfs_rq, se);
>>>> -       update_tg_load_avg(cfs_rq);
>>>> -       propagate_entity_cfs_rq(se);
>>>> -}
>>>> -
>>>> -static void attach_entity_cfs_rq(struct sched_entity *se)
>>>> -{
>>>> -       struct cfs_rq *cfs_rq = cfs_rq_of(se);
>>>> -
>>>> -       /* Synchronize entity with its cfs_rq */
>>>> -       update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
>>>> -       attach_entity_load_avg(cfs_rq, se);
>>>> -       update_tg_load_avg(cfs_rq);
>>>> -       propagate_entity_cfs_rq(se);
>>>> -}
>>>> -
>>>>  static void detach_task_cfs_rq(struct task_struct *p)
>>>>  {
>>>>         struct sched_entity *se = &p->se;
>>>> --
>>>> 2.36.1
>>>>