kernel/locking/lockdep.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-)
There is a deadlock scenario between lockdep and rcu when
rcu nocb feature is enabled, just as following call stack:
rcuop/x
-000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?)
-001|queued_spin_lock(inline) // try to hold nocb_gp_lock
-001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80)
-002|__raw_spin_lock_irqsave(inline)
-002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80)
-003|wake_nocb_gp_defer(inline)
-003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680)
-004|__call_rcu_common(inline)
-004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?)
-005|call_rcu_zapped(inline)
-005|free_zapped_rcu(ch = ?)// hold graph lock
-006|rcu_do_batch(rdp = 0xFFFFFF817F245680)
-007|nocb_cb_wait(inline)
-007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680)
-008|kthread(_create = 0xFFFFFF80803122C0)
-009|ret_from_fork(asm)
rcuop/y
-000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0)
-001|queued_spin_lock()
-001|lockdep_lock()
-001|graph_lock() // try to hold graph lock
-002|lookup_chain_cache_add()
-002|validate_chain()
-003|lock_acquire
-004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80)
-005|lock_timer_base(inline)
-006|mod_timer(inline)
-006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock
-006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680)
-007|__call_rcu_common(inline)
-007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?)
-008|call_rcu_hurry(inline)
-008|rcu_sync_call(inline)
-008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58)
-009|rcu_do_batch(rdp = 0xFFFFFF817F266680)
-010|nocb_cb_wait(inline)
-010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680)
-011|kthread(_create = 0xFFFFFF8080363740)
-012|ret_from_fork(asm)
rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread.
This patch release the graph lock before lockdep call_rcu.
Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use")
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
---
changes of v2: update patch according to Boqun's suggestions.
---
---
kernel/locking/lockdep.c | 47 +++++++++++++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 16 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3d..ddcaa69 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6184,25 +6184,27 @@ static struct pending_free *get_pending_free(void)
static void free_zapped_rcu(struct rcu_head *cb);
/*
- * Schedule an RCU callback if no RCU callback is pending. Must be called with
- * the graph lock held.
- */
-static void call_rcu_zapped(struct pending_free *pf)
+* See if we need to queue an RCU callback, must called with
+* the lockdep lock held, returns false if either we don't have
+* any pending free or the callback is already scheduled.
+* Otherwise, a call_rcu() must follow this function call.
+*/
+static bool prepare_call_rcu_zapped(struct pending_free *pf)
{
WARN_ON_ONCE(inside_selftest());
if (list_empty(&pf->zapped))
- return;
+ return false;
if (delayed_free.scheduled)
- return;
+ return false;
delayed_free.scheduled = true;
WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
delayed_free.index ^= 1;
- call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+ return true;
}
/* The caller must hold the graph lock. May be called from RCU context. */
@@ -6228,6 +6230,7 @@ static void free_zapped_rcu(struct rcu_head *ch)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
return;
@@ -6239,14 +6242,17 @@ static void free_zapped_rcu(struct rcu_head *ch)
pf = delayed_free.pf + (delayed_free.index ^ 1);
__free_zapped_classes(pf);
delayed_free.scheduled = false;
+ need_callback =
+ prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ lockdep_unlock();
+ raw_local_irq_restore(flags);
/*
- * If there's anything on the open list, close and start a new callback.
- */
- call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ * If there's anything on the open list, close and start a new callback.
+ */
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
- lockdep_unlock();
- raw_local_irq_restore(flags);
}
/*
@@ -6286,6 +6292,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
init_data_structures_once();
@@ -6293,10 +6300,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
lockdep_lock();
pf = get_pending_free();
__lockdep_free_key_range(pf, start, size);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
lockdep_unlock();
raw_local_irq_restore(flags);
-
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
/*
* Wait for any possible iterators from look_up_lock_class() to pass
* before continuing to free the memory they refer to.
@@ -6390,6 +6398,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
struct pending_free *pf;
unsigned long flags;
int locked;
+ bool need_callback = false;
raw_local_irq_save(flags);
locked = graph_lock();
@@ -6398,11 +6407,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
pf = get_pending_free();
__lockdep_reset_lock(pf, lock);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
graph_unlock();
out_irq:
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
}
/*
@@ -6446,6 +6457,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
struct pending_free *pf;
unsigned long flags;
bool found = false;
+ bool need_callback = false;
might_sleep();
@@ -6466,11 +6478,14 @@ void lockdep_unregister_key(struct lock_class_key *key)
if (found) {
pf = get_pending_free();
__lockdep_free_key_range(pf, key, 1);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
}
lockdep_unlock();
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
synchronize_rcu();
}
--
1.9.1
On 1/16/24 23:48, Zhiguo Niu wrote: > -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0) > -001|queued_spin_lock() > -001|lockdep_lock() > -001|graph_lock() // try to hold graph lock > -002|lookup_chain_cache_add() > -002|validate_chain() > -003|lock_acquire > -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80) > -005|lock_timer_base(inline) > -006|mod_timer(inline) > -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock > -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680) > -007|__call_rcu_common(inline) > -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?) > -008|call_rcu_hurry(inline) > -008|rcu_sync_call(inline) > -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58) > -009|rcu_do_batch(rdp = 0xFFFFFF817F266680) > -010|nocb_cb_wait(inline) > -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680) > -011|kthread(_create = 0xFFFFFF8080363740) > -012|ret_from_fork(asm) The above call trace shows a graph_lock() call from inside lock_timer_base(). How can that happen? In lock_timer_base() I see a raw_spin_lock_irqsave() call. Did I perhaps overlook something? Thanks, Bart.
On Thu, Feb 1, 2024 at 11:28 AM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 1/16/24 23:48, Zhiguo Niu wrote:
> > -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0)
> > -001|queued_spin_lock()
> > -001|lockdep_lock()
> > -001|graph_lock() // try to hold graph lock
> > -002|lookup_chain_cache_add()
> > -002|validate_chain()
> > -003|lock_acquire
> > -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80)
> > -005|lock_timer_base(inline)
> > -006|mod_timer(inline)
> > -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock
> > -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680)
> > -007|__call_rcu_common(inline)
> > -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?)
> > -008|call_rcu_hurry(inline)
> > -008|rcu_sync_call(inline)
> > -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58)
> > -009|rcu_do_batch(rdp = 0xFFFFFF817F266680)
> > -010|nocb_cb_wait(inline)
> > -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680)
> > -011|kthread(_create = 0xFFFFFF8080363740)
> > -012|ret_from_fork(asm)
>
> The above call trace shows a graph_lock() call from inside lock_timer_base().
> How can that happen? In lock_timer_base() I see a raw_spin_lock_irqsave() call.
> Did I perhaps overlook something?
>
(replying from gmail web, please forgive the format)
raw_spin_lock_irqsave():
lock_acquire():
__lock_acquire():
validate_chain():
lookup_chain_cache_add():
graph_lock();
Basically, every lock acquisition may lock the lockdep graph because
of dependency checking.
Regards,
Boqun
> Thanks,
>
> Bart.
>
On 2/1/24 11:48, Boqun Feng wrote: > raw_spin_lock_irqsave(): > lock_acquire(): > __lock_acquire(): > validate_chain(): > lookup_chain_cache_add(): > graph_lock(); > > Basically, every lock acquisition may lock the lockdep graph because > of dependency checking. Wouldn't it be simpler to make __lock_acquire() return early if this_cpu_read(lockdep_recursion) indicates that the graph lock is held? Thanks, Bart.
On Thu, Feb 01, 2024 at 01:24:48PM -0800, Bart Van Assche wrote: > On 2/1/24 11:48, Boqun Feng wrote: > > raw_spin_lock_irqsave(): > > lock_acquire(): > > __lock_acquire(): > > validate_chain(): > > lookup_chain_cache_add(): > > graph_lock(); > > > > Basically, every lock acquisition may lock the lockdep graph because > > of dependency checking. > > Wouldn't it be simpler to make __lock_acquire() return early if > this_cpu_read(lockdep_recursion) indicates that the graph lock is held? > Note that lockdep_recursion doesn't indicate graph lock is held, it indicates we enter lockdep internal, which means if there was any lock acquisition, __lock_acquire() would skip the check, so we don't go into lockdep internal again, therefore avoid infinite recursion. Regards, Boqun > Thanks, > > Bart.
On 1/16/24 23:48, Zhiguo Niu wrote: > There is a deadlock scenario between lockdep and rcu when > rcu nocb feature is enabled, just as following call stack: Is it necessary to support lockdep for this kernel configuration or should we rather forbid this combination by changing lib/Kconfig.debug? > /* > - * Schedule an RCU callback if no RCU callback is pending. Must be called with > - * the graph lock held. > - */ > -static void call_rcu_zapped(struct pending_free *pf) > +* See if we need to queue an RCU callback, must called with > +* the lockdep lock held, returns false if either we don't have > +* any pending free or the callback is already scheduled. > +* Otherwise, a call_rcu() must follow this function call. > +*/ Why has the name been changed from "graph lock" into "lockdep lock"? I think elsewhere in this source file it is called the "graph lock". > /* > - * If there's anything on the open list, close and start a new callback. > - */ > - call_rcu_zapped(delayed_free.pf + delayed_free.index); > + * If there's anything on the open list, close and start a new callback. > + */ > + if (need_callback) > + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); The comment above the if-statement refers to the call_rcu_zapped() function while call_rcu_zapped() has been changed into call_rcu(). So the comment is now incorrect. Additionally, what guarantees that the above code won't be triggered concurrently from two different threads? As you may know calling call_rcu() twice before the callback has been started is not allowed. I think that can happen with the above code. Bart.
On Thu, Feb 01, 2024 at 09:22:20AM -0800, Bart Van Assche wrote: > On 1/16/24 23:48, Zhiguo Niu wrote: > > There is a deadlock scenario between lockdep and rcu when > > rcu nocb feature is enabled, just as following call stack: > > Is it necessary to support lockdep for this kernel configuration or should we > rather forbid this combination by changing lib/Kconfig.debug? > RCU nocb is a quite common configuration for RCU, so I think lockdep should support this. > > /* > > - * Schedule an RCU callback if no RCU callback is pending. Must be called with > > - * the graph lock held. > > - */ > > -static void call_rcu_zapped(struct pending_free *pf) > > +* See if we need to queue an RCU callback, must called with > > +* the lockdep lock held, returns false if either we don't have > > +* any pending free or the callback is already scheduled. > > +* Otherwise, a call_rcu() must follow this function call. > > +*/ > > Why has the name been changed from "graph lock" into "lockdep lock"? I think > elsewhere in this source file it is called the "graph lock". > > > /* > > - * If there's anything on the open list, close and start a new callback. > > - */ > > - call_rcu_zapped(delayed_free.pf + delayed_free.index); > > + * If there's anything on the open list, close and start a new callback. > > + */ > > + if (need_callback) > > + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); > > The comment above the if-statement refers to the call_rcu_zapped() function > while call_rcu_zapped() has been changed into call_rcu(). So the comment is > now incorrect. > > Additionally, what guarantees that the above code won't be triggered > concurrently from two different threads? As you may know calling call_rcu() > twice before the callback has been started is not allowed. I think that can > happen with the above code. > No, it's synchronized by the delayed_free.schedule. Only one thread/CPU can schedule at a time. Or am I missing something subtle? Regards, Boqun > Bart.
On 2/1/24 11:58, Boqun Feng wrote: > On Thu, Feb 01, 2024 at 09:22:20AM -0800, Bart Van Assche wrote: >> On 1/16/24 23:48, Zhiguo Niu wrote: >>> /* >>> - * If there's anything on the open list, close and start a new callback. >>> - */ >>> - call_rcu_zapped(delayed_free.pf + delayed_free.index); >>> + * If there's anything on the open list, close and start a new callback. >>> + */ >>> + if (need_callback) >>> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); >> >> The comment above the if-statement refers to the call_rcu_zapped() function >> while call_rcu_zapped() has been changed into call_rcu(). So the comment is >> now incorrect. >> >> Additionally, what guarantees that the above code won't be triggered >> concurrently from two different threads? As you may know calling call_rcu() >> twice before the callback has been started is not allowed. I think that can >> happen with the above code. > > No, it's synchronized by the delayed_free.schedule. Only one thread/CPU > can schedule at a time. Or am I missing something subtle? Only call_rcu_zapped() reads and modifies delayed_free.scheduled. Direct call_rcu() calls do neither read nor modify delayed_free.scheduled. Bart.
On Thu, Feb 01, 2024 at 12:56:36PM -0800, Bart Van Assche wrote: > On 2/1/24 11:58, Boqun Feng wrote: > > On Thu, Feb 01, 2024 at 09:22:20AM -0800, Bart Van Assche wrote: > > > On 1/16/24 23:48, Zhiguo Niu wrote: > > > > /* > > > > - * If there's anything on the open list, close and start a new callback. > > > > - */ > > > > - call_rcu_zapped(delayed_free.pf + delayed_free.index); > > > > + * If there's anything on the open list, close and start a new callback. > > > > + */ > > > > + if (need_callback) > > > > + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); > > > > > > The comment above the if-statement refers to the call_rcu_zapped() function > > > while call_rcu_zapped() has been changed into call_rcu(). So the comment is > > > now incorrect. > > > > > > Additionally, what guarantees that the above code won't be triggered > > > concurrently from two different threads? As you may know calling call_rcu() > > > twice before the callback has been started is not allowed. I think that can > > > happen with the above code. > > > > No, it's synchronized by the delayed_free.schedule. Only one thread/CPU > > can schedule at a time. Or am I missing something subtle? > > Only call_rcu_zapped() reads and modifies delayed_free.scheduled. Direct > call_rcu() calls do neither read nor modify delayed_free.scheduled. Have you checked the change in the patch? Now call_rcu_zapped() has been splitted into two parts: preparing the callback and calling call_rcu(), the preparing part checks and sets the delayed_free.scheduled under graph_lock(), so only one CPU/thread will win and do the actual call_rcu(). And the RCU callback free_zapped_rcu() will unset delayed_free.scheduled, again under graph_lock(). If you think it's still possible, could you provide a case where the race may happen? Regards, Boqun > > Bart. >
On 2/1/24 13:53, Boqun Feng wrote: > Have you checked the change in the patch? Now call_rcu_zapped() has been > splitted into two parts: preparing the callback and calling call_rcu(), > the preparing part checks and sets the delayed_free.scheduled under > graph_lock(), so only one CPU/thread will win and do the actual > call_rcu(). And the RCU callback free_zapped_rcu() will unset > delayed_free.scheduled, again under graph_lock(). > > If you think it's still possible, could you provide a case where the > race may happen? Yes, I noticed that call_rcu_zapped() has been split but the first time I took a look at this patch I wasn't sure that the new code is correct. After having taken a second look, the new mechanism for deciding whether or not to invoke call_rcu() seems fine to me. Bart.
Dear All, Thanks for your discussion and suggestions , I update and send patch version3 according to your comments. thanks! On Fri, Feb 2, 2024 at 10:13 AM Bart Van Assche <bvanassche@acm.org> wrote: > > On 2/1/24 13:53, Boqun Feng wrote: > > Have you checked the change in the patch? Now call_rcu_zapped() has been > > splitted into two parts: preparing the callback and calling call_rcu(), > > the preparing part checks and sets the delayed_free.scheduled under > > graph_lock(), so only one CPU/thread will win and do the actual > > call_rcu(). And the RCU callback free_zapped_rcu() will unset > > delayed_free.scheduled, again under graph_lock(). > > > > If you think it's still possible, could you provide a case where the > > race may happen? > > Yes, I noticed that call_rcu_zapped() has been split but the first time > I took a look at this patch I wasn't sure that the new code is correct. > After having taken a second look, the new mechanism for deciding whether > or not to invoke call_rcu() seems fine to me. > > Bart.
On Wed, Jan 17, 2024 at 03:48:34PM +0800, Zhiguo Niu wrote:
> There is a deadlock scenario between lockdep and rcu when
> rcu nocb feature is enabled, just as following call stack:
>
> rcuop/x
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?)
> -001|queued_spin_lock(inline) // try to hold nocb_gp_lock
> -001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80)
> -002|__raw_spin_lock_irqsave(inline)
> -002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80)
> -003|wake_nocb_gp_defer(inline)
> -003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680)
> -004|__call_rcu_common(inline)
> -004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?)
> -005|call_rcu_zapped(inline)
> -005|free_zapped_rcu(ch = ?)// hold graph lock
> -006|rcu_do_batch(rdp = 0xFFFFFF817F245680)
> -007|nocb_cb_wait(inline)
> -007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680)
> -008|kthread(_create = 0xFFFFFF80803122C0)
> -009|ret_from_fork(asm)
>
> rcuop/y
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0)
> -001|queued_spin_lock()
> -001|lockdep_lock()
> -001|graph_lock() // try to hold graph lock
> -002|lookup_chain_cache_add()
> -002|validate_chain()
> -003|lock_acquire
> -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80)
> -005|lock_timer_base(inline)
> -006|mod_timer(inline)
> -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock
> -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680)
> -007|__call_rcu_common(inline)
> -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?)
> -008|call_rcu_hurry(inline)
> -008|rcu_sync_call(inline)
> -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58)
> -009|rcu_do_batch(rdp = 0xFFFFFF817F266680)
> -010|nocb_cb_wait(inline)
> -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680)
> -011|kthread(_create = 0xFFFFFF8080363740)
> -012|ret_from_fork(asm)
>
> rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread.
> This patch release the graph lock before lockdep call_rcu.
>
> Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use")
This has a "Fixes" tag but I don't see Cc: stable. Does this need to be
picked for stable branches? Or does tip branch does something special?
Also, Cc: Bart Van Assche <bvanassche@acm.org> (blamed_fixes) just FYI.
--
Carlos Llamas
On Wed, Jan 17, 2024 at 03:48:34PM +0800, Zhiguo Niu wrote:
> There is a deadlock scenario between lockdep and rcu when
> rcu nocb feature is enabled, just as following call stack:
>
> rcuop/x
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?)
> -001|queued_spin_lock(inline) // try to hold nocb_gp_lock
> -001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80)
> -002|__raw_spin_lock_irqsave(inline)
> -002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80)
> -003|wake_nocb_gp_defer(inline)
> -003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680)
> -004|__call_rcu_common(inline)
> -004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?)
> -005|call_rcu_zapped(inline)
> -005|free_zapped_rcu(ch = ?)// hold graph lock
> -006|rcu_do_batch(rdp = 0xFFFFFF817F245680)
> -007|nocb_cb_wait(inline)
> -007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680)
> -008|kthread(_create = 0xFFFFFF80803122C0)
> -009|ret_from_fork(asm)
>
> rcuop/y
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0)
> -001|queued_spin_lock()
> -001|lockdep_lock()
> -001|graph_lock() // try to hold graph lock
> -002|lookup_chain_cache_add()
> -002|validate_chain()
> -003|lock_acquire
> -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80)
> -005|lock_timer_base(inline)
> -006|mod_timer(inline)
> -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock
> -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680)
> -007|__call_rcu_common(inline)
> -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?)
> -008|call_rcu_hurry(inline)
> -008|rcu_sync_call(inline)
> -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58)
> -009|rcu_do_batch(rdp = 0xFFFFFF817F266680)
> -010|nocb_cb_wait(inline)
> -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680)
> -011|kthread(_create = 0xFFFFFF8080363740)
> -012|ret_from_fork(asm)
>
> rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread.
> This patch release the graph lock before lockdep call_rcu.
>
> Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use")
> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Regards,
Boqun
> ---
> changes of v2: update patch according to Boqun's suggestions.
> ---
> ---
> kernel/locking/lockdep.c | 47 +++++++++++++++++++++++++++++++----------------
> 1 file changed, 31 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 151bd3d..ddcaa69 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -6184,25 +6184,27 @@ static struct pending_free *get_pending_free(void)
> static void free_zapped_rcu(struct rcu_head *cb);
>
> /*
> - * Schedule an RCU callback if no RCU callback is pending. Must be called with
> - * the graph lock held.
> - */
> -static void call_rcu_zapped(struct pending_free *pf)
> +* See if we need to queue an RCU callback, must called with
> +* the lockdep lock held, returns false if either we don't have
> +* any pending free or the callback is already scheduled.
> +* Otherwise, a call_rcu() must follow this function call.
> +*/
> +static bool prepare_call_rcu_zapped(struct pending_free *pf)
> {
> WARN_ON_ONCE(inside_selftest());
>
> if (list_empty(&pf->zapped))
> - return;
> + return false;
>
> if (delayed_free.scheduled)
> - return;
> + return false;
>
> delayed_free.scheduled = true;
>
> WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
> delayed_free.index ^= 1;
>
> - call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> + return true;
> }
>
> /* The caller must hold the graph lock. May be called from RCU context. */
> @@ -6228,6 +6230,7 @@ static void free_zapped_rcu(struct rcu_head *ch)
> {
> struct pending_free *pf;
> unsigned long flags;
> + bool need_callback;
>
> if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
> return;
> @@ -6239,14 +6242,17 @@ static void free_zapped_rcu(struct rcu_head *ch)
> pf = delayed_free.pf + (delayed_free.index ^ 1);
> __free_zapped_classes(pf);
> delayed_free.scheduled = false;
> + need_callback =
> + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index);
> + lockdep_unlock();
> + raw_local_irq_restore(flags);
>
> /*
> - * If there's anything on the open list, close and start a new callback.
> - */
> - call_rcu_zapped(delayed_free.pf + delayed_free.index);
> + * If there's anything on the open list, close and start a new callback.
> + */
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
>
> - lockdep_unlock();
> - raw_local_irq_restore(flags);
> }
>
> /*
> @@ -6286,6 +6292,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
> {
> struct pending_free *pf;
> unsigned long flags;
> + bool need_callback;
>
> init_data_structures_once();
>
> @@ -6293,10 +6300,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
> lockdep_lock();
> pf = get_pending_free();
> __lockdep_free_key_range(pf, start, size);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
> lockdep_unlock();
> raw_local_irq_restore(flags);
> -
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> /*
> * Wait for any possible iterators from look_up_lock_class() to pass
> * before continuing to free the memory they refer to.
> @@ -6390,6 +6398,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
> struct pending_free *pf;
> unsigned long flags;
> int locked;
> + bool need_callback = false;
>
> raw_local_irq_save(flags);
> locked = graph_lock();
> @@ -6398,11 +6407,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
>
> pf = get_pending_free();
> __lockdep_reset_lock(pf, lock);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
>
> graph_unlock();
> out_irq:
> raw_local_irq_restore(flags);
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> }
>
> /*
> @@ -6446,6 +6457,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
> struct pending_free *pf;
> unsigned long flags;
> bool found = false;
> + bool need_callback = false;
>
> might_sleep();
>
> @@ -6466,11 +6478,14 @@ void lockdep_unregister_key(struct lock_class_key *key)
> if (found) {
> pf = get_pending_free();
> __lockdep_free_key_range(pf, key, 1);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
> }
> lockdep_unlock();
> raw_local_irq_restore(flags);
>
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> +
> /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
> synchronize_rcu();
> }
> --
> 1.9.1
>
On 1/17/24 02:48, Zhiguo Niu wrote:
> There is a deadlock scenario between lockdep and rcu when
> rcu nocb feature is enabled, just as following call stack:
>
> rcuop/x
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFF817F2A8A80, val = ?)
> -001|queued_spin_lock(inline) // try to hold nocb_gp_lock
> -001|do_raw_spin_lock(lock = 0xFFFFFF817F2A8A80)
> -002|__raw_spin_lock_irqsave(inline)
> -002|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F2A8A80)
> -003|wake_nocb_gp_defer(inline)
> -003|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F30B680)
> -004|__call_rcu_common(inline)
> -004|call_rcu(head = 0xFFFFFFC082EECC28, func = ?)
> -005|call_rcu_zapped(inline)
> -005|free_zapped_rcu(ch = ?)// hold graph lock
> -006|rcu_do_batch(rdp = 0xFFFFFF817F245680)
> -007|nocb_cb_wait(inline)
> -007|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F245680)
> -008|kthread(_create = 0xFFFFFF80803122C0)
> -009|ret_from_fork(asm)
>
> rcuop/y
> -000|queued_spin_lock_slowpath(lock = 0xFFFFFFC08291BBC8, val = 0)
> -001|queued_spin_lock()
> -001|lockdep_lock()
> -001|graph_lock() // try to hold graph lock
> -002|lookup_chain_cache_add()
> -002|validate_chain()
> -003|lock_acquire
> -004|_raw_spin_lock_irqsave(lock = 0xFFFFFF817F211D80)
> -005|lock_timer_base(inline)
> -006|mod_timer(inline)
> -006|wake_nocb_gp_defer(inline)// hold nocb_gp_lock
> -006|__call_rcu_nocb_wake(rdp = 0xFFFFFF817F2A8680)
> -007|__call_rcu_common(inline)
> -007|call_rcu(head = 0xFFFFFFC0822E0B58, func = ?)
> -008|call_rcu_hurry(inline)
> -008|rcu_sync_call(inline)
> -008|rcu_sync_func(rhp = 0xFFFFFFC0822E0B58)
> -009|rcu_do_batch(rdp = 0xFFFFFF817F266680)
> -010|nocb_cb_wait(inline)
> -010|rcu_nocb_cb_kthread(arg = 0xFFFFFF817F266680)
> -011|kthread(_create = 0xFFFFFF8080363740)
> -012|ret_from_fork(asm)
>
> rcuop/x and rcuop/y are rcu nocb threads with the same nocb gp thread.
> This patch release the graph lock before lockdep call_rcu.
>
> Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use")
> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> changes of v2: update patch according to Boqun's suggestions.
> ---
> ---
> kernel/locking/lockdep.c | 47 +++++++++++++++++++++++++++++++----------------
> 1 file changed, 31 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 151bd3d..ddcaa69 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -6184,25 +6184,27 @@ static struct pending_free *get_pending_free(void)
> static void free_zapped_rcu(struct rcu_head *cb);
>
> /*
> - * Schedule an RCU callback if no RCU callback is pending. Must be called with
> - * the graph lock held.
> - */
> -static void call_rcu_zapped(struct pending_free *pf)
> +* See if we need to queue an RCU callback, must called with
> +* the lockdep lock held, returns false if either we don't have
> +* any pending free or the callback is already scheduled.
> +* Otherwise, a call_rcu() must follow this function call.
> +*/
> +static bool prepare_call_rcu_zapped(struct pending_free *pf)
> {
> WARN_ON_ONCE(inside_selftest());
>
> if (list_empty(&pf->zapped))
> - return;
> + return false;
>
> if (delayed_free.scheduled)
> - return;
> + return false;
>
> delayed_free.scheduled = true;
>
> WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
> delayed_free.index ^= 1;
>
> - call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> + return true;
> }
>
> /* The caller must hold the graph lock. May be called from RCU context. */
> @@ -6228,6 +6230,7 @@ static void free_zapped_rcu(struct rcu_head *ch)
> {
> struct pending_free *pf;
> unsigned long flags;
> + bool need_callback;
>
> if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
> return;
> @@ -6239,14 +6242,17 @@ static void free_zapped_rcu(struct rcu_head *ch)
> pf = delayed_free.pf + (delayed_free.index ^ 1);
> __free_zapped_classes(pf);
> delayed_free.scheduled = false;
> + need_callback =
> + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index);
> + lockdep_unlock();
> + raw_local_irq_restore(flags);
>
> /*
> - * If there's anything on the open list, close and start a new callback.
> - */
> - call_rcu_zapped(delayed_free.pf + delayed_free.index);
> + * If there's anything on the open list, close and start a new callback.
> + */
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
>
> - lockdep_unlock();
> - raw_local_irq_restore(flags);
> }
>
> /*
> @@ -6286,6 +6292,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
> {
> struct pending_free *pf;
> unsigned long flags;
> + bool need_callback;
>
> init_data_structures_once();
>
> @@ -6293,10 +6300,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
> lockdep_lock();
> pf = get_pending_free();
> __lockdep_free_key_range(pf, start, size);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
> lockdep_unlock();
> raw_local_irq_restore(flags);
> -
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> /*
> * Wait for any possible iterators from look_up_lock_class() to pass
> * before continuing to free the memory they refer to.
> @@ -6390,6 +6398,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
> struct pending_free *pf;
> unsigned long flags;
> int locked;
> + bool need_callback = false;
>
> raw_local_irq_save(flags);
> locked = graph_lock();
> @@ -6398,11 +6407,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
>
> pf = get_pending_free();
> __lockdep_reset_lock(pf, lock);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
>
> graph_unlock();
> out_irq:
> raw_local_irq_restore(flags);
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> }
>
> /*
> @@ -6446,6 +6457,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
> struct pending_free *pf;
> unsigned long flags;
> bool found = false;
> + bool need_callback = false;
>
> might_sleep();
>
> @@ -6466,11 +6478,14 @@ void lockdep_unregister_key(struct lock_class_key *key)
> if (found) {
> pf = get_pending_free();
> __lockdep_free_key_range(pf, key, 1);
> - call_rcu_zapped(pf);
> + need_callback = prepare_call_rcu_zapped(pf);
> }
> lockdep_unlock();
> raw_local_irq_restore(flags);
>
> + if (need_callback)
> + call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
> +
> /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
> synchronize_rcu();
> }
LGTM.
Reviewed-by: Waiman Long <longman@redhat.com>
© 2016 - 2025 Red Hat, Inc.