[PATCH v3] sched/deadline: fix the hang in dl_task_of

Huang Shijie posted 1 patch 1 year, 3 months ago
kernel/sched/deadline.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
[PATCH v3] sched/deadline: fix the hang in dl_task_of
Posted by Huang Shijie 1 year, 3 months ago
When we enable the schedstats, we will meet an OS hang like this:
  --------------------------------------------------------
	[  134.104253] kernel BUG at kernel/sched/deadline.c:63!
	[  134.132013] ------------[ cut here ]------------
	[  134.133441]  x27: 0000000000000001
	[  134.138048] kernel BUG at kernel/sched/deadline.c:63!
	[  134.146478] x26: 0000000000000001 x25: 0000000000000000 x24: 0000000000000001
	[  134.153607] x23: 0000000000000001 x22: 0000000000000000 x21: 0000000000000001
	[  134.160734] x20: ffff007dbf1b6d00 x19: ffff007dbf1b7610 x18: 0000000000000014
	[  134.162027] ------------[ cut here ]------------
	[  134.167861] x17: 000000009deab6cd x16: 00000000527c9a1c x15: 00000000000000dc
	[  134.172473] kernel BUG at kernel/sched/deadline.c:63!
	[  134.179595] x14: 0000000001200011 x13: 0000000040001000 x12: 0000ffffb6df05bc
	[  134.191760] x11: ffff007dbf1b6d00 x10: ffff0001062dd2e8 x9 : ffff8000801215ac
	[  134.192036] ------------[ cut here ]------------
	[  134.198888] x8 : 0000000000000000 x7 : 0000000000000021 x6 : ffff0001764ed280
	[  134.203498] kernel BUG at kernel/sched/deadline.c:63!
	[  134.210622] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffff807d3dd24000
	[  134.222787] x2 : 000000028b77a140 x1 : 0000003400000000 x0 : ffff007dbf1b6c80
	[  134.229915] Call trace:
	[  134.232353]  dl_task_of.part.0+0x0/0x10
	[  134.236182]  dl_server_start+0x54/0x158
	[  134.240013]  enqueue_task_fair+0x138/0x420
	[  134.244100]  enqueue_task+0x44/0xb0
	[  134.247584]  wake_up_new_task+0x1c0/0x3a0
	[  134.251584]  kernel_clone+0xe8/0x3e8
	[  134.252022] ------------[ cut here ]------------
	[  134.255156]  __do_sys_clone+0x70/0xa8
	[  134.259764] kernel BUG at kernel/sched/deadline.c:63!
	[  134.263412]  __arm64_sys_clone+0x28/0x40
	[  134.272360]  invoke_syscall+0x50/0x120
	[  134.276101]  el0_svc_common+0x44/0xf8
	[  134.279753]  do_el0_svc+0x28/0x40
	[  134.283058]  el0_svc+0x40/0x150
	[  134.286195]  el0t_64_sync_handler+0x100/0x130
	[  134.290546]  el0t_64_sync+0x1a4/0x1a8
	[  134.294200] Code: 35ffffa2 17ffffe3 d4210000 17ffffb4 (d4210000)
	[  134.300283] ---[ end trace 0000000000000000 ]---
	[  134.304890] Kernel panic - not syncing: Oops - BUG: Fatal exception
	[  134.311147] SMP: stopping secondary CPUs
	[  135.365096] SMP: failed to stop secondary CPUs 8-9,16,30,43,86,88,121,149
	[  135.371884] Kernel Offset: disabled
	[  135.375361] CPU features: 0x00,00100003,80153d29,d75ffea7
	[  135.380749] Memory Limit: none
	[  135.383793] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]
  --------------------------------------------------------

In dl_server_start(), we set the dl_se->dl_server with 1. When schedstats
is enabled, in the following:
   dl_server_start() --> enqueue_dl_entity() --> update_stats_enqueue_dl()
	__schedstats_from_dl_se() -->dl_task_of()

we will meet the BUG_ON.

Since the fair task has already had its own schedstats, there is no need
to track anything for the associated dl_server.

So add check in:
            update_stats_wait_start_dl()
	    update_stats_wait_end_dl()
	    update_stats_enqueue_dl()
	    update_stats_dequeue_dl()

return early for a dl_server dl_se.

Tested this patch with memcached in Altra.

Fixes: 5f6bd380c7bd ("sched/rt: Remove default bandwidth control")
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
v2 --> v3:
   Return early in:
            update_stats_wait_start_dl()
	    update_stats_wait_end_dl()
	    update_stats_enqueue_dl()
	    update_stats_dequeue_dl()
   
   The v2 link:
    https://lore.kernel.org/lkml/770da6f7-89f6-49b7-b8db-a7318abbd828@arm.com/T/

v1 --> v2:
   Skip the update_stats_enqueue_dl() for a dl_server dl_se
   in enqueue_dl_entity().

   Btw, the update_stats_{wait_start,wait_end}_dl has already had
   the dl_server() check.

   The v1 link: https://lore.kernel.org/lkml/20240826021115.9284-1-shijie@os.amperecomputing.com/T/

---
 kernel/sched/deadline.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0f2df67f710b..b1ccc1b6e7b9 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1910,6 +1910,9 @@ update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 	if (!schedstat_enabled())
 		return;
 
+	if (dl_server(dl_se))
+		return;
+
 	stats = __schedstats_from_dl_se(dl_se);
 	__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
@@ -1922,6 +1925,9 @@ update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 	if (!schedstat_enabled())
 		return;
 
+	if (dl_server(dl_se))
+		return;
+
 	stats = __schedstats_from_dl_se(dl_se);
 	__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
@@ -1945,6 +1951,9 @@ update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
 	if (!schedstat_enabled())
 		return;
 
+	if (dl_server(dl_se))
+		return;
+
 	if (flags & ENQUEUE_WAKEUP)
 		update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
 }
@@ -1958,6 +1967,9 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
 	if (!schedstat_enabled())
 		return;
 
+	if (dl_server(dl_se))
+		return;
+
 	if ((flags & DEQUEUE_SLEEP)) {
 		unsigned int state;
 
-- 
2.40.1
Re: [PATCH v3] sched/deadline: fix the hang in dl_task_of
Posted by Juri Lelli 1 year, 3 months ago
Hi,

On 29/08/24 11:11, Huang Shijie wrote:
> When we enable the schedstats, we will meet an OS hang like this:
>   --------------------------------------------------------
> 	[  134.104253] kernel BUG at kernel/sched/deadline.c:63!
> 	[  134.132013] ------------[ cut here ]------------
> 	[  134.133441]  x27: 0000000000000001
> 	[  134.138048] kernel BUG at kernel/sched/deadline.c:63!
> 	[  134.146478] x26: 0000000000000001 x25: 0000000000000000 x24: 0000000000000001
> 	[  134.153607] x23: 0000000000000001 x22: 0000000000000000 x21: 0000000000000001
> 	[  134.160734] x20: ffff007dbf1b6d00 x19: ffff007dbf1b7610 x18: 0000000000000014
> 	[  134.162027] ------------[ cut here ]------------
> 	[  134.167861] x17: 000000009deab6cd x16: 00000000527c9a1c x15: 00000000000000dc
> 	[  134.172473] kernel BUG at kernel/sched/deadline.c:63!
> 	[  134.179595] x14: 0000000001200011 x13: 0000000040001000 x12: 0000ffffb6df05bc
> 	[  134.191760] x11: ffff007dbf1b6d00 x10: ffff0001062dd2e8 x9 : ffff8000801215ac
> 	[  134.192036] ------------[ cut here ]------------
> 	[  134.198888] x8 : 0000000000000000 x7 : 0000000000000021 x6 : ffff0001764ed280
> 	[  134.203498] kernel BUG at kernel/sched/deadline.c:63!
> 	[  134.210622] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffff807d3dd24000
> 	[  134.222787] x2 : 000000028b77a140 x1 : 0000003400000000 x0 : ffff007dbf1b6c80
> 	[  134.229915] Call trace:
> 	[  134.232353]  dl_task_of.part.0+0x0/0x10
> 	[  134.236182]  dl_server_start+0x54/0x158
> 	[  134.240013]  enqueue_task_fair+0x138/0x420
> 	[  134.244100]  enqueue_task+0x44/0xb0
> 	[  134.247584]  wake_up_new_task+0x1c0/0x3a0
> 	[  134.251584]  kernel_clone+0xe8/0x3e8
> 	[  134.252022] ------------[ cut here ]------------
> 	[  134.255156]  __do_sys_clone+0x70/0xa8
> 	[  134.259764] kernel BUG at kernel/sched/deadline.c:63!
> 	[  134.263412]  __arm64_sys_clone+0x28/0x40
> 	[  134.272360]  invoke_syscall+0x50/0x120
> 	[  134.276101]  el0_svc_common+0x44/0xf8
> 	[  134.279753]  do_el0_svc+0x28/0x40
> 	[  134.283058]  el0_svc+0x40/0x150
> 	[  134.286195]  el0t_64_sync_handler+0x100/0x130
> 	[  134.290546]  el0t_64_sync+0x1a4/0x1a8
> 	[  134.294200] Code: 35ffffa2 17ffffe3 d4210000 17ffffb4 (d4210000)
> 	[  134.300283] ---[ end trace 0000000000000000 ]---
> 	[  134.304890] Kernel panic - not syncing: Oops - BUG: Fatal exception
> 	[  134.311147] SMP: stopping secondary CPUs
> 	[  135.365096] SMP: failed to stop secondary CPUs 8-9,16,30,43,86,88,121,149
> 	[  135.371884] Kernel Offset: disabled
> 	[  135.375361] CPU features: 0x00,00100003,80153d29,d75ffea7
> 	[  135.380749] Memory Limit: none
> 	[  135.383793] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]
>   --------------------------------------------------------
> 
> In dl_server_start(), we set the dl_se->dl_server with 1. When schedstats
> is enabled, in the following:
>    dl_server_start() --> enqueue_dl_entity() --> update_stats_enqueue_dl()
> 	__schedstats_from_dl_se() -->dl_task_of()
> 
> we will meet the BUG_ON.
> 
> Since the fair task has already had its own schedstats, there is no need
> to track anything for the associated dl_server.
> 
> So add check in:
>             update_stats_wait_start_dl()
> 	    update_stats_wait_end_dl()
> 	    update_stats_enqueue_dl()
> 	    update_stats_dequeue_dl()
> 
> return early for a dl_server dl_se.
> 
> Tested this patch with memcached in Altra.
> 
> Fixes: 5f6bd380c7bd ("sched/rt: Remove default bandwidth control")
> Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
> ---
> v2 --> v3:
>    Return early in:
>             update_stats_wait_start_dl()
> 	    update_stats_wait_end_dl()
> 	    update_stats_enqueue_dl()
> 	    update_stats_dequeue_dl()

This looks better, thanks.

Peter, what do you think?

Best,
Juri
Re: [PATCH v3] sched/deadline: fix the hang in dl_task_of
Posted by Peter Zijlstra 1 year, 3 months ago
On Thu, Aug 29, 2024 at 10:14:02AM +0200, Juri Lelli wrote:
> Hi,
> 
> On 29/08/24 11:11, Huang Shijie wrote:
> > When we enable the schedstats, we will meet an OS hang like this:
> >   --------------------------------------------------------
> > 	[  134.104253] kernel BUG at kernel/sched/deadline.c:63!
> > 	[  134.132013] ------------[ cut here ]------------
> > 	[  134.133441]  x27: 0000000000000001
> > 	[  134.138048] kernel BUG at kernel/sched/deadline.c:63!
> > 	[  134.146478] x26: 0000000000000001 x25: 0000000000000000 x24: 0000000000000001
> > 	[  134.153607] x23: 0000000000000001 x22: 0000000000000000 x21: 0000000000000001
> > 	[  134.160734] x20: ffff007dbf1b6d00 x19: ffff007dbf1b7610 x18: 0000000000000014
> > 	[  134.162027] ------------[ cut here ]------------
> > 	[  134.167861] x17: 000000009deab6cd x16: 00000000527c9a1c x15: 00000000000000dc
> > 	[  134.172473] kernel BUG at kernel/sched/deadline.c:63!
> > 	[  134.179595] x14: 0000000001200011 x13: 0000000040001000 x12: 0000ffffb6df05bc
> > 	[  134.191760] x11: ffff007dbf1b6d00 x10: ffff0001062dd2e8 x9 : ffff8000801215ac
> > 	[  134.192036] ------------[ cut here ]------------
> > 	[  134.198888] x8 : 0000000000000000 x7 : 0000000000000021 x6 : ffff0001764ed280
> > 	[  134.203498] kernel BUG at kernel/sched/deadline.c:63!
> > 	[  134.210622] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffff807d3dd24000
> > 	[  134.222787] x2 : 000000028b77a140 x1 : 0000003400000000 x0 : ffff007dbf1b6c80
> > 	[  134.229915] Call trace:
> > 	[  134.232353]  dl_task_of.part.0+0x0/0x10
> > 	[  134.236182]  dl_server_start+0x54/0x158
> > 	[  134.240013]  enqueue_task_fair+0x138/0x420
> > 	[  134.244100]  enqueue_task+0x44/0xb0
> > 	[  134.247584]  wake_up_new_task+0x1c0/0x3a0
> > 	[  134.251584]  kernel_clone+0xe8/0x3e8
> > 	[  134.252022] ------------[ cut here ]------------
> > 	[  134.255156]  __do_sys_clone+0x70/0xa8
> > 	[  134.259764] kernel BUG at kernel/sched/deadline.c:63!
> > 	[  134.263412]  __arm64_sys_clone+0x28/0x40
> > 	[  134.272360]  invoke_syscall+0x50/0x120
> > 	[  134.276101]  el0_svc_common+0x44/0xf8
> > 	[  134.279753]  do_el0_svc+0x28/0x40
> > 	[  134.283058]  el0_svc+0x40/0x150
> > 	[  134.286195]  el0t_64_sync_handler+0x100/0x130
> > 	[  134.290546]  el0t_64_sync+0x1a4/0x1a8
> > 	[  134.294200] Code: 35ffffa2 17ffffe3 d4210000 17ffffb4 (d4210000)
> > 	[  134.300283] ---[ end trace 0000000000000000 ]---
> > 	[  134.304890] Kernel panic - not syncing: Oops - BUG: Fatal exception
> > 	[  134.311147] SMP: stopping secondary CPUs
> > 	[  135.365096] SMP: failed to stop secondary CPUs 8-9,16,30,43,86,88,121,149
> > 	[  135.371884] Kernel Offset: disabled
> > 	[  135.375361] CPU features: 0x00,00100003,80153d29,d75ffea7
> > 	[  135.380749] Memory Limit: none
> > 	[  135.383793] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]
> >   --------------------------------------------------------
> > 
> > In dl_server_start(), we set the dl_se->dl_server with 1. When schedstats
> > is enabled, in the following:
> >    dl_server_start() --> enqueue_dl_entity() --> update_stats_enqueue_dl()
> > 	__schedstats_from_dl_se() -->dl_task_of()
> > 
> > we will meet the BUG_ON.
> > 
> > Since the fair task has already had its own schedstats, there is no need
> > to track anything for the associated dl_server.
> > 
> > So add check in:
> >             update_stats_wait_start_dl()
> > 	    update_stats_wait_end_dl()
> > 	    update_stats_enqueue_dl()
> > 	    update_stats_dequeue_dl()
> > 
> > return early for a dl_server dl_se.
> > 
> > Tested this patch with memcached in Altra.
> > 
> > Fixes: 5f6bd380c7bd ("sched/rt: Remove default bandwidth control")
> > Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
> > ---
> > v2 --> v3:
> >    Return early in:
> >             update_stats_wait_start_dl()
> > 	    update_stats_wait_end_dl()
> > 	    update_stats_enqueue_dl()
> > 	    update_stats_dequeue_dl()
> 
> This looks better, thanks.
> 
> Peter, what do you think?

Peter thinks that the Changelog is overly verbose, the Fixes tag is
wrong and he doesn't much like the repeated conditions. But he is very
glad this issue is found and fixed.

As such, he's rewritten things like so, does this work for people?

---
Subject: sched/deadline: Fix schedstats vs deadline servers
From: Huang Shijie <shijie@os.amperecomputing.com>
Date: Thu, 29 Aug 2024 11:11:11 +0800

From: Huang Shijie <shijie@os.amperecomputing.com>

In dl_server_start(), when schedstats is enabled, the following
happens:

  dl_server_start()
    dl_se->dl_server = 1;
    enqueue_dl_entity()
      update_stats_enqueue_dl()
        __schedstats_from_dl_se()
          dl_task_of()
            BUG_ON(dl_server(dl_se));

Since only tasks have schedstats and internal entries do not, avoid
trying to update stats in this case.

Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20240829031111.12142-1-shijie@os.amperecomputing.com
---
 kernel/sched/deadline.c |   38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1896,46 +1896,40 @@ static inline bool __dl_less(struct rb_n
 	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
 }
 
-static inline struct sched_statistics *
+static __always_inline struct sched_statistics *
 __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
 {
+	if (!schedstat_enabled())
+		return NULL;
+
+	if (dl_server(dl_se))
+		return NULL;
+
 	return &dl_task_of(dl_se)->stats;
 }
 
 static inline void
 update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void
 update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void
 update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void
Re: [PATCH v3] sched/deadline: fix the hang in dl_task_of
Posted by Shijie Huang 1 year, 3 months ago
On 2024/9/2 19:14, Peter Zijlstra wrote:
> Subject: sched/deadline: Fix schedstats vs deadline servers
> From: Huang Shijie<shijie@os.amperecomputing.com>
> Date: Thu, 29 Aug 2024 11:11:11 +0800
>
> From: Huang Shijie<shijie@os.amperecomputing.com>
>
> In dl_server_start(), when schedstats is enabled, the following
> happens:
>
>    dl_server_start()
>      dl_se->dl_server = 1;
>      enqueue_dl_entity()
>        update_stats_enqueue_dl()
>          __schedstats_from_dl_se()
>            dl_task_of()
>              BUG_ON(dl_server(dl_se));
>
> Since only tasks have schedstats and internal entries do not, avoid
> trying to update stats in this case.
>
> Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
> Signed-off-by: Huang Shijie<shijie@os.amperecomputing.com>
> Signed-off-by: Peter Zijlstra (Intel)<peterz@infradead.org>
> Link:https://lkml.kernel.org/r/20240829031111.12142-1-shijie@os.amperecomputing.com
> ---
>   kernel/sched/deadline.c |   38 ++++++++++++++++----------------------
>   1 file changed, 16 insertions(+), 22 deletions(-)
>
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1896,46 +1896,40 @@ static inline bool __dl_less(struct rb_n
>   	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
>   }
>   
> -static inline struct sched_statistics *
> +static __always_inline struct sched_statistics *
>   __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
>   {
> +	if (!schedstat_enabled())
> +		return NULL;
> +
> +	if (dl_server(dl_se))
> +		return NULL;
> +
>   	return &dl_task_of(dl_se)->stats;
>   }
>   
>   static inline void
>   update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
>   {
> -	struct sched_statistics *stats;
> -
> -	if (!schedstat_enabled())
> -		return;
> -
> -	stats = __schedstats_from_dl_se(dl_se);
> -	__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
> +	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
> +	if (stats)
> +		__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
>   }
>   
>   static inline void
>   update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
>   {
> -	struct sched_statistics *stats;
> -
> -	if (!schedstat_enabled())
> -		return;
> -
> -	stats = __schedstats_from_dl_se(dl_se);
> -	__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
> +	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
> +	if (stats)
> +		__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
>   }
>   
>   static inline void
>   update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
>   {
> -	struct sched_statistics *stats;
> -
> -	if (!schedstat_enabled())
> -		return;
> -
> -	stats = __schedstats_from_dl_se(dl_se);
> -	__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
> +	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
> +	if (stats)
> +		__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
>   }
>   
>   static inline void

Thanks Peter,

I am okay with this one.


Thanks

Huang Shijie
Re: [PATCH v3] sched/deadline: fix the hang in dl_task_of
Posted by Juri Lelli 1 year, 3 months ago
On 02/09/24 13:14, Peter Zijlstra wrote:
> On Thu, Aug 29, 2024 at 10:14:02AM +0200, Juri Lelli wrote:
> > Hi,
> > 
> > On 29/08/24 11:11, Huang Shijie wrote:
> > > When we enable the schedstats, we will meet an OS hang like this:
> > >   --------------------------------------------------------
> > > 	[  134.104253] kernel BUG at kernel/sched/deadline.c:63!
> > > 	[  134.132013] ------------[ cut here ]------------
> > > 	[  134.133441]  x27: 0000000000000001
> > > 	[  134.138048] kernel BUG at kernel/sched/deadline.c:63!
> > > 	[  134.146478] x26: 0000000000000001 x25: 0000000000000000 x24: 0000000000000001
> > > 	[  134.153607] x23: 0000000000000001 x22: 0000000000000000 x21: 0000000000000001
> > > 	[  134.160734] x20: ffff007dbf1b6d00 x19: ffff007dbf1b7610 x18: 0000000000000014
> > > 	[  134.162027] ------------[ cut here ]------------
> > > 	[  134.167861] x17: 000000009deab6cd x16: 00000000527c9a1c x15: 00000000000000dc
> > > 	[  134.172473] kernel BUG at kernel/sched/deadline.c:63!
> > > 	[  134.179595] x14: 0000000001200011 x13: 0000000040001000 x12: 0000ffffb6df05bc
> > > 	[  134.191760] x11: ffff007dbf1b6d00 x10: ffff0001062dd2e8 x9 : ffff8000801215ac
> > > 	[  134.192036] ------------[ cut here ]------------
> > > 	[  134.198888] x8 : 0000000000000000 x7 : 0000000000000021 x6 : ffff0001764ed280
> > > 	[  134.203498] kernel BUG at kernel/sched/deadline.c:63!
> > > 	[  134.210622] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffff807d3dd24000
> > > 	[  134.222787] x2 : 000000028b77a140 x1 : 0000003400000000 x0 : ffff007dbf1b6c80
> > > 	[  134.229915] Call trace:
> > > 	[  134.232353]  dl_task_of.part.0+0x0/0x10
> > > 	[  134.236182]  dl_server_start+0x54/0x158
> > > 	[  134.240013]  enqueue_task_fair+0x138/0x420
> > > 	[  134.244100]  enqueue_task+0x44/0xb0
> > > 	[  134.247584]  wake_up_new_task+0x1c0/0x3a0
> > > 	[  134.251584]  kernel_clone+0xe8/0x3e8
> > > 	[  134.252022] ------------[ cut here ]------------
> > > 	[  134.255156]  __do_sys_clone+0x70/0xa8
> > > 	[  134.259764] kernel BUG at kernel/sched/deadline.c:63!
> > > 	[  134.263412]  __arm64_sys_clone+0x28/0x40
> > > 	[  134.272360]  invoke_syscall+0x50/0x120
> > > 	[  134.276101]  el0_svc_common+0x44/0xf8
> > > 	[  134.279753]  do_el0_svc+0x28/0x40
> > > 	[  134.283058]  el0_svc+0x40/0x150
> > > 	[  134.286195]  el0t_64_sync_handler+0x100/0x130
> > > 	[  134.290546]  el0t_64_sync+0x1a4/0x1a8
> > > 	[  134.294200] Code: 35ffffa2 17ffffe3 d4210000 17ffffb4 (d4210000)
> > > 	[  134.300283] ---[ end trace 0000000000000000 ]---
> > > 	[  134.304890] Kernel panic - not syncing: Oops - BUG: Fatal exception
> > > 	[  134.311147] SMP: stopping secondary CPUs
> > > 	[  135.365096] SMP: failed to stop secondary CPUs 8-9,16,30,43,86,88,121,149
> > > 	[  135.371884] Kernel Offset: disabled
> > > 	[  135.375361] CPU features: 0x00,00100003,80153d29,d75ffea7
> > > 	[  135.380749] Memory Limit: none
> > > 	[  135.383793] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]
> > >   --------------------------------------------------------
> > > 
> > > In dl_server_start(), we set the dl_se->dl_server with 1. When schedstats
> > > is enabled, in the following:
> > >    dl_server_start() --> enqueue_dl_entity() --> update_stats_enqueue_dl()
> > > 	__schedstats_from_dl_se() -->dl_task_of()
> > > 
> > > we will meet the BUG_ON.
> > > 
> > > Since the fair task has already had its own schedstats, there is no need
> > > to track anything for the associated dl_server.
> > > 
> > > So add check in:
> > >             update_stats_wait_start_dl()
> > > 	    update_stats_wait_end_dl()
> > > 	    update_stats_enqueue_dl()
> > > 	    update_stats_dequeue_dl()
> > > 
> > > return early for a dl_server dl_se.
> > > 
> > > Tested this patch with memcached in Altra.
> > > 
> > > Fixes: 5f6bd380c7bd ("sched/rt: Remove default bandwidth control")
> > > Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
> > > ---
> > > v2 --> v3:
> > >    Return early in:
> > >             update_stats_wait_start_dl()
> > > 	    update_stats_wait_end_dl()
> > > 	    update_stats_enqueue_dl()
> > > 	    update_stats_dequeue_dl()
> > 
> > This looks better, thanks.
> > 
> > Peter, what do you think?
> 
> Peter thinks that the Changelog is overly verbose, the Fixes tag is
> wrong and he doesn't much like the repeated conditions. But he is very
> glad this issue is found and fixed.

:)

> As such, he's rewritten things like so, does this work for people?

Works for me!

> ---
> Subject: sched/deadline: Fix schedstats vs deadline servers
> From: Huang Shijie <shijie@os.amperecomputing.com>
> Date: Thu, 29 Aug 2024 11:11:11 +0800
> 
> From: Huang Shijie <shijie@os.amperecomputing.com>
> 
> In dl_server_start(), when schedstats is enabled, the following
> happens:
> 
>   dl_server_start()
>     dl_se->dl_server = 1;
>     enqueue_dl_entity()
>       update_stats_enqueue_dl()
>         __schedstats_from_dl_se()
>           dl_task_of()
>             BUG_ON(dl_server(dl_se));
> 
> Since only tasks have schedstats and internal entries do not, avoid
> trying to update stats in this case.
> 
> Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
> Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20240829031111.12142-1-shijie@os.amperecomputing.com

Acked-by: Juri Lelli <juri.lelli@redhat.com>

Thanks,
Juri
[tip: sched/core] sched/deadline: Fix schedstats vs deadline servers
Posted by tip-bot2 for Huang Shijie 1 year, 3 months ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     9c602adb799e72ee537c0c7ca7e828c3fe2acad6
Gitweb:        https://git.kernel.org/tip/9c602adb799e72ee537c0c7ca7e828c3fe2acad6
Author:        Huang Shijie <shijie@os.amperecomputing.com>
AuthorDate:    Thu, 29 Aug 2024 11:11:11 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 03 Sep 2024 15:26:30 +02:00

sched/deadline: Fix schedstats vs deadline servers

In dl_server_start(), when schedstats is enabled, the following
happens:

  dl_server_start()
    dl_se->dl_server = 1;
    enqueue_dl_entity()
      update_stats_enqueue_dl()
        __schedstats_from_dl_se()
          dl_task_of()
            BUG_ON(dl_server(dl_se));

Since only tasks have schedstats and internal entries do not, avoid
trying to update stats in this case.

Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lkml.kernel.org/r/20240829031111.12142-1-shijie@os.amperecomputing.com
---
 kernel/sched/deadline.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0f2df67..2e84037 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1896,46 +1896,40 @@ static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
 	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
 }
 
-static inline struct sched_statistics *
+static __always_inline struct sched_statistics *
 __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
 {
+	if (!schedstat_enabled())
+		return NULL;
+
+	if (dl_server(dl_se))
+		return NULL;
+
 	return &dl_task_of(dl_se)->stats;
 }
 
 static inline void
 update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void
 update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void
 update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
 {
-	struct sched_statistics *stats;
-
-	if (!schedstat_enabled())
-		return;
-
-	stats = __schedstats_from_dl_se(dl_se);
-	__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
+	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
+	if (stats)
+		__update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
 }
 
 static inline void