[PATCH v3] lockdep: Fix wait context check on softirq for PREEMPT_RT

Ryo Takakura posted 1 patch 1 year ago
There is a newer version of this series
include/linux/bottom_half.h |  8 ++++++++
kernel/softirq.c            | 12 ++++++++++++
2 files changed, 20 insertions(+)
[PATCH v3] lockdep: Fix wait context check on softirq for PREEMPT_RT
Posted by Ryo Takakura 1 year ago
Since commit 0c1d7a2c2d32 ("lockdep: Remove softirq accounting on
PREEMPT_RT."), the wait context test for mutex usage within
"in softirq context" fails as it references @softirq_context.

[    0.184549]   | wait context tests |
[    0.184549]   --------------------------------------------------------------------------
[    0.184549]                                  | rcu  | raw  | spin |mutex |
[    0.184549]   --------------------------------------------------------------------------
[    0.184550]                in hardirq context:  ok  |  ok  |  ok  |  ok  |
[    0.185083] in hardirq context (not threaded):  ok  |  ok  |  ok  |  ok  |
[    0.185606]                in softirq context:  ok  |  ok  |  ok  |FAILED|

As a fix, add lockdep map for BH disabled section. This fixes the
issue by letting us catch cases when local_bh_disable() gets called
with preemption disabled where local_lock doesn't get acquired.
In the case of "in softirq context" selftest, local_bh_disable() was
being called with preemption disable as it's early in the boot.

Signed-off-by: Ryo Takakura <ryotkkr98@gmail.com>
---

Hi!

This version uses CONFIG_DEBUG_LOCK_ALLOC instead of CONFIG_LOCKDEP.
Thanks Waiman for the advise!

v1:
https://lore.kernel.org/lkml/20241202012017.14910-1-ryotkkr98@gmail.com/

v2:
https://lore.kernel.org/lkml/20241227040824.83626-1-ryotkkr98@gmail.com/

Boqun, let me know in case if I should add you as <suggested-by> tag.
(Please forgive me and ignore if not what the tag intends for... 
I thought its appropriate reading the document[0].)

Sincerely,
Ryo Takakura

[0] https://www.kernel.org/doc/html/v6.12/process/submitting-patches.html#using-reported-by-tested-by-reviewed-by-suggested-by-and-fixes

---
 include/linux/bottom_half.h |  8 ++++++++
 kernel/softirq.c            | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index fc53e0ad5..0640a147b 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -4,6 +4,7 @@
 
 #include <linux/instruction_pointer.h>
 #include <linux/preempt.h>
+#include <linux/lockdep.h>
 
 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
@@ -15,8 +16,13 @@ static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int
 }
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map bh_lock_map;
+#endif
+
 static inline void local_bh_disable(void)
 {
+	lock_map_acquire_read(&bh_lock_map);
 	__local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
 }
 
@@ -25,11 +31,13 @@ extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt);
 
 static inline void local_bh_enable_ip(unsigned long ip)
 {
+	lock_map_release(&bh_lock_map);
 	__local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET);
 }
 
 static inline void local_bh_enable(void)
 {
+	lock_map_release(&bh_lock_map);
 	__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8c4524ce6..f0edb0abc 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -1012,3 +1012,15 @@ unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
 {
 	return from;
 }
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key bh_lock_key;
+struct lockdep_map bh_lock_map = {
+	.name = "local_bh",
+	.key = &bh_lock_key,
+	.wait_type_outer = LD_WAIT_FREE,
+	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */
+	.lock_type = LD_LOCK_PERCPU,
+};
+EXPORT_SYMBOL_GPL(bh_lock_map);
+#endif
-- 
2.34.1
Re: [PATCH v3] lockdep: Fix wait context check on softirq for PREEMPT_RT
Posted by Boqun Feng 12 months ago
On Sat, Jan 18, 2025 at 02:49:00PM +0900, Ryo Takakura wrote:
> Since commit 0c1d7a2c2d32 ("lockdep: Remove softirq accounting on
> PREEMPT_RT."), the wait context test for mutex usage within
> "in softirq context" fails as it references @softirq_context.
> 
> [    0.184549]   | wait context tests |
> [    0.184549]   --------------------------------------------------------------------------
> [    0.184549]                                  | rcu  | raw  | spin |mutex |
> [    0.184549]   --------------------------------------------------------------------------
> [    0.184550]                in hardirq context:  ok  |  ok  |  ok  |  ok  |
> [    0.185083] in hardirq context (not threaded):  ok  |  ok  |  ok  |  ok  |
> [    0.185606]                in softirq context:  ok  |  ok  |  ok  |FAILED|
> 
> As a fix, add lockdep map for BH disabled section. This fixes the
> issue by letting us catch cases when local_bh_disable() gets called
> with preemption disabled where local_lock doesn't get acquired.
> In the case of "in softirq context" selftest, local_bh_disable() was
> being called with preemption disable as it's early in the boot.
> 
> Signed-off-by: Ryo Takakura <ryotkkr98@gmail.com>

Queued for future tests and reviews, thanks!

Regards,
Boqun

> ---
> 
> Hi!
> 
> This version uses CONFIG_DEBUG_LOCK_ALLOC instead of CONFIG_LOCKDEP.
> Thanks Waiman for the advise!
> 
> v1:
> https://lore.kernel.org/lkml/20241202012017.14910-1-ryotkkr98@gmail.com/
> 
> v2:
> https://lore.kernel.org/lkml/20241227040824.83626-1-ryotkkr98@gmail.com/
> 
> Boqun, let me know in case if I should add you as <suggested-by> tag.
> (Please forgive me and ignore if not what the tag intends for... 
> I thought its appropriate reading the document[0].)
> 
> Sincerely,
> Ryo Takakura
> 
> [0] https://www.kernel.org/doc/html/v6.12/process/submitting-patches.html#using-reported-by-tested-by-reviewed-by-suggested-by-and-fixes
> 
> ---
>  include/linux/bottom_half.h |  8 ++++++++
>  kernel/softirq.c            | 12 ++++++++++++
>  2 files changed, 20 insertions(+)
> 
> diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
> index fc53e0ad5..0640a147b 100644
> --- a/include/linux/bottom_half.h
> +++ b/include/linux/bottom_half.h
> @@ -4,6 +4,7 @@
>  
>  #include <linux/instruction_pointer.h>
>  #include <linux/preempt.h>
> +#include <linux/lockdep.h>
>  
>  #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
>  extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
> @@ -15,8 +16,13 @@ static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int
>  }
>  #endif
>  
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +extern struct lockdep_map bh_lock_map;
> +#endif
> +
>  static inline void local_bh_disable(void)
>  {
> +	lock_map_acquire_read(&bh_lock_map);
>  	__local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
>  }
>  
> @@ -25,11 +31,13 @@ extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt);
>  
>  static inline void local_bh_enable_ip(unsigned long ip)
>  {
> +	lock_map_release(&bh_lock_map);
>  	__local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET);
>  }
>  
>  static inline void local_bh_enable(void)
>  {
> +	lock_map_release(&bh_lock_map);
>  	__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
>  }
>  
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 8c4524ce6..f0edb0abc 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -1012,3 +1012,15 @@ unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
>  {
>  	return from;
>  }
> +
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +static struct lock_class_key bh_lock_key;
> +struct lockdep_map bh_lock_map = {
> +	.name = "local_bh",
> +	.key = &bh_lock_key,
> +	.wait_type_outer = LD_WAIT_FREE,
> +	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */
> +	.lock_type = LD_LOCK_PERCPU,
> +};
> +EXPORT_SYMBOL_GPL(bh_lock_map);
> +#endif
> -- 
> 2.34.1
>
[tip: locking/core] lockdep: Fix wait context check on softirq for PREEMPT_RT
Posted by tip-bot2 for Ryo Takakura 11 months, 1 week ago
The following commit has been merged into the locking/core branch of tip:

Commit-ID:     8a9d677a395703ef9075c91dd04066be8a553405
Gitweb:        https://git.kernel.org/tip/8a9d677a395703ef9075c91dd04066be8a553405
Author:        Ryo Takakura <ryotkkr98@gmail.com>
AuthorDate:    Sat, 18 Jan 2025 14:49:00 +09:00
Committer:     Boqun Feng <boqun.feng@gmail.com>
CommitterDate: Sun, 23 Feb 2025 18:24:46 -08:00

lockdep: Fix wait context check on softirq for PREEMPT_RT

Since commit 0c1d7a2c2d32 ("lockdep: Remove softirq accounting on
PREEMPT_RT."), the wait context test for mutex usage within
"in softirq context" fails as it references @softirq_context.

[    0.184549]   | wait context tests |
[    0.184549]   --------------------------------------------------------------------------
[    0.184549]                                  | rcu  | raw  | spin |mutex |
[    0.184549]   --------------------------------------------------------------------------
[    0.184550]                in hardirq context:  ok  |  ok  |  ok  |  ok  |
[    0.185083] in hardirq context (not threaded):  ok  |  ok  |  ok  |  ok  |
[    0.185606]                in softirq context:  ok  |  ok  |  ok  |FAILED|

As a fix, add lockdep map for BH disabled section. This fixes the
issue by letting us catch cases when local_bh_disable() gets called
with preemption disabled where local_lock doesn't get acquired.
In the case of "in softirq context" selftest, local_bh_disable() was
being called with preemption disable as it's early in the boot.

Signed-off-by: Ryo Takakura <ryotkkr98@gmail.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20250118054900.18639-1-ryotkkr98@gmail.com
---
 include/linux/bottom_half.h |  8 ++++++++
 kernel/softirq.c            | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index fc53e0a..0640a14 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -4,6 +4,7 @@
 
 #include <linux/instruction_pointer.h>
 #include <linux/preempt.h>
+#include <linux/lockdep.h>
 
 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
@@ -15,8 +16,13 @@ static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int
 }
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map bh_lock_map;
+#endif
+
 static inline void local_bh_disable(void)
 {
+	lock_map_acquire_read(&bh_lock_map);
 	__local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
 }
 
@@ -25,11 +31,13 @@ extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt);
 
 static inline void local_bh_enable_ip(unsigned long ip)
 {
+	lock_map_release(&bh_lock_map);
 	__local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET);
 }
 
 static inline void local_bh_enable(void)
 {
+	lock_map_release(&bh_lock_map);
 	__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 4dae6ac..e864f9c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -1073,3 +1073,15 @@ unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
 {
 	return from;
 }
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key bh_lock_key;
+struct lockdep_map bh_lock_map = {
+	.name = "local_bh",
+	.key = &bh_lock_key,
+	.wait_type_outer = LD_WAIT_FREE,
+	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */
+	.lock_type = LD_LOCK_PERCPU,
+};
+EXPORT_SYMBOL_GPL(bh_lock_map);
+#endif
Re: [tip: locking/core] lockdep: Fix wait context check on softirq for PREEMPT_RT
Posted by Peter Zijlstra 11 months, 1 week ago
On Mon, Mar 03, 2025 at 06:51:59PM -0000, tip-bot2 for Ryo Takakura wrote:
> The following commit has been merged into the locking/core branch of tip:
> 
> Commit-ID:     8a9d677a395703ef9075c91dd04066be8a553405
> Gitweb:        https://git.kernel.org/tip/8a9d677a395703ef9075c91dd04066be8a553405
> Author:        Ryo Takakura <ryotkkr98@gmail.com>
> AuthorDate:    Sat, 18 Jan 2025 14:49:00 +09:00
> Committer:     Boqun Feng <boqun.feng@gmail.com>
> CommitterDate: Sun, 23 Feb 2025 18:24:46 -08:00
> 
> lockdep: Fix wait context check on softirq for PREEMPT_RT
> 
> Since commit 0c1d7a2c2d32 ("lockdep: Remove softirq accounting on
> PREEMPT_RT."), the wait context test for mutex usage within
> "in softirq context" fails as it references @softirq_context.
> 
> [    0.184549]   | wait context tests |
> [    0.184549]   --------------------------------------------------------------------------
> [    0.184549]                                  | rcu  | raw  | spin |mutex |
> [    0.184549]   --------------------------------------------------------------------------
> [    0.184550]                in hardirq context:  ok  |  ok  |  ok  |  ok  |
> [    0.185083] in hardirq context (not threaded):  ok  |  ok  |  ok  |  ok  |
> [    0.185606]                in softirq context:  ok  |  ok  |  ok  |FAILED|
> 
> As a fix, add lockdep map for BH disabled section. This fixes the
> issue by letting us catch cases when local_bh_disable() gets called
> with preemption disabled where local_lock doesn't get acquired.
> In the case of "in softirq context" selftest, local_bh_disable() was
> being called with preemption disable as it's early in the boot.
> 
> Signed-off-by: Ryo Takakura <ryotkkr98@gmail.com>
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> Link: https://lore.kernel.org/r/20250118054900.18639-1-ryotkkr98@gmail.com

This commit is causing:

[    7.184373] NET: Registered PF_INET6 protocol family
[    7.196129] =============================
[    7.196129] [ BUG: Invalid wait context ]
[    7.196129] 6.14.0-rc5-00547-g67de62470d82-dirty #629 Not tainted
[    7.196129] -----------------------------
[    7.196129] swapper/0/1 is trying to lock:
[    7.196129] ffffffff83312108 (pcpu_alloc_mutex){+.+.}-{4:4}, at: pcpu_alloc_noprof+0x818/0xc20
[    7.196129] other info that might help us debug this:
[    7.196129] context-{5:5}
[    7.238009] ata7.00: ATA-8: ST91000640NS, SN03, max UDMA/133
[    7.196129] 3 locks held by swapper/0/1:
[    7.196129]  #0: ffffffff834414d0 (pernet_ops_rwsem){+.+.}-{4:4}, at: register_netdevice_notifier+0x1a/0x120
[    7.196129]  #1: ffffffff83442988 (rtnl_mutex){+.+.}-{4:4}, at: register_netdevice_notifier+0x1f/0x120
[    7.196129]  #2: ffffffff8324c740 (local_bh){.+.+}-{1:3}, at: dev_mc_add+0x39/0xb0
[    7.196129] stack backtrace:
[    7.196129] CPU: 31 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc5-00547-g67de62470d82-dirty #629
[    7.196129] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
[    7.196129] Call Trace:
[    7.196129]  <TASK>
[    7.196129]  dump_stack_lvl+0x57/0x80
[    7.196129]  __lock_acquire+0xd72/0x17d0
[    7.196129]  ? ret_from_fork_asm+0x1a/0x30
[    7.196129]  lock_acquire+0xcd/0x2f0
[    7.196129]  ? pcpu_alloc_noprof+0x818/0xc20
[    7.196129]  __mutex_lock+0xa4/0x820
[    7.196129]  ? pcpu_alloc_noprof+0x818/0xc20
[    7.196129]  ? pcpu_alloc_noprof+0x818/0xc20
[    7.196129]  ? lock_acquire+0xcd/0x2f0
[    7.239989]  ? pcpu_alloc_noprof+0x818/0xc20
[    7.239990]  pcpu_alloc_noprof+0x818/0xc20
[    7.239993]  ? lockdep_hardirqs_on+0x74/0x110
[    7.239996]  ? neigh_parms_alloc+0xed/0x160
[    7.240001]  ? neigh_parms_alloc+0xed/0x160
[    7.240005]  ipv6_add_dev+0x154/0x520
[    7.240005]  addrconf_notify+0x2de/0x8b0
[    7.240005]  ? register_netdevice_notifier+0x1f/0x120
[    7.240005]  ? lock_acquire+0xdd/0x2f0
[    7.240005]  call_netdevice_register_net_notifiers+0x5b/0x100
[    7.240005]  register_netdevice_notifier+0x87/0x120
[    7.240005]  addrconf_init+0xa5/0x150
[    7.240005]  inet6_init+0x1f3/0x3b0
[    7.240005]  ? __pfx_inet6_init+0x10/0x10
[    7.240005]  do_one_initcall+0x53/0x2b0
[    7.240005]  ? rcu_is_watching+0xd/0x40
[    7.240005]  kernel_init_freeable+0x23f/0x280
[    7.240005]  ? __pfx_kernel_init+0x10/0x10
[    7.240005]  kernel_init+0x16/0x130
[    7.240005]  ret_from_fork+0x2d/0x50
[    7.240005]  ? __pfx_kernel_init+0x10/0x10
[    7.240005]  ret_from_fork_asm+0x1a/0x30
[    7.240005]  </TASK>

And some other weirdness for bpetkov:

  https://lkml.kernel.org/r/20250306122413.GBZ8mT7Z61Tmgnh5Y9@fat_crate.local


I suspect you missed a release somewhere.