[v1] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

[PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 12 months ago

Lockdep is blind to the dev->mutex field of struct device, owing to
the fact that these mutexes are assigned to lockdep's "novalidate"
class.  Commit 1704f47b50b5 ("lockdep: Add novalidate class for
dev->mutex conversion") did this because the hierarchical nature of
the device tree makes it impossible in practice to determine whether
acquiring one of these mutexes is safe or might lead to a deadlock.

Unfortunately, this means that lockdep is unable to help diagnose real
deadlocks involving these mutexes when they occur in testing [1] [2]
or in actual use, or to detect bad locking patterns that might lead to
a deadlock.  We would like to obtain as much of lockdep's benefits as
possible without generating a flood of false positives -- which is
what happens if one naively removes these mutexes from the
"novalidate" class.

Accordingly, as a middle ground the mutex in each non-static struct
device will be placed in its own unique locking class.  This approach
gives up some of lockdep's advantages (for example, all devices having
a particular bus_type or device_type might reasonably be put into the
same locking class), but it should at least allow us to gain the
benefit of some of lockdep's capabilities.

Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
Link: https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Ingo Molnar <mingo@redhat.com>
CC: Boqun Feng <boqun.feng@gmail.com>

---

I decided to take your suggestion about introducing a new
lockdep_static_obj() function, to reduce the size of the patch.  It
can always be combined with the original static_obj() function later
on, if that's what the lockdep developers want.

If Hillf Danton contributed any of the code for this patch, I haven't
seen it in any messages sent to me or in the mailing list archives.
That's why I didn't include a Co-developed-by: tag for him.

 drivers/base/core.c      |    8 +++++++-
 include/linux/device.h   |    1 +
 include/linux/lockdep.h  |    6 ++++++
 kernel/locking/lockdep.c |    5 +++++
 4 files changed, 19 insertions(+), 1 deletion(-)

Index: usb-devel/drivers/base/core.c
===================================================================
--- usb-devel.orig/drivers/base/core.c
+++ usb-devel/drivers/base/core.c
@@ -2322,6 +2322,9 @@ static void device_release(struct kobjec
 	devres_release_all(dev);
 
 	kfree(dev->dma_range_map);
+	mutex_destroy(&dev->mutex);
+	if (!lockdep_static_obj(dev))
+		lockdep_unregister_key(&dev->mutex_key);
 
 	if (dev->release)
 		dev->release(dev);
@@ -2941,7 +2944,10 @@ void device_initialize(struct device *de
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
-	lockdep_set_novalidate_class(&dev->mutex);
+	if (!lockdep_static_obj(dev)) {
+		lockdep_register_key(&dev->mutex_key);
+		lockdep_set_class(&dev->mutex, &dev->mutex_key);
+	}
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
Index: usb-devel/include/linux/device.h
===================================================================
--- usb-devel.orig/include/linux/device.h
+++ usb-devel/include/linux/device.h
@@ -570,6 +570,7 @@ struct device {
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
+	struct lock_class_key	mutex_key;	/* Unique key for each device */
 
 	struct dev_links_info	links;
 	struct dev_pm_info	power;
Index: usb-devel/include/linux/lockdep.h
===================================================================
--- usb-devel.orig/include/linux/lockdep.h
+++ usb-devel/include/linux/lockdep.h
@@ -172,6 +172,7 @@ do {							\
 	current->lockdep_recursion -= LOCKDEP_OFF;	\
 } while (0)
 
+extern int lockdep_static_obj(const void *obj);
 extern void lockdep_register_key(struct lock_class_key *key);
 extern void lockdep_unregister_key(struct lock_class_key *key);
 
@@ -391,6 +392,11 @@ static inline void lockdep_set_selftest_
 # define lockdep_free_key_range(start, size)	do { } while (0)
 # define lockdep_sys_exit() 			do { } while (0)
 
+static inline int lockdep_static_obj(const void *obj)
+{
+	return 0;
+}
+
 static inline void lockdep_register_key(struct lock_class_key *key)
 {
 }
Index: usb-devel/kernel/locking/lockdep.c
===================================================================
--- usb-devel.orig/kernel/locking/lockdep.c
+++ usb-devel/kernel/locking/lockdep.c
@@ -857,6 +857,11 @@ static int static_obj(const void *obj)
 	 */
 	return is_module_address(addr) || is_module_percpu_address(addr);
 }
+
+int lockdep_static_obj(const void *obj)
+{
+	return static_obj(obj);
+}
 #endif
 
 /*

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Sat, Feb 11, 2023 at 04:41:11PM -0500, Alan Stern wrote:
> Lockdep is blind to the dev->mutex field of struct device, owing to
> the fact that these mutexes are assigned to lockdep's "novalidate"
> class.  Commit 1704f47b50b5 ("lockdep: Add novalidate class for
> dev->mutex conversion") did this because the hierarchical nature of
> the device tree makes it impossible in practice to determine whether
> acquiring one of these mutexes is safe or might lead to a deadlock.
> 
> Unfortunately, this means that lockdep is unable to help diagnose real
> deadlocks involving these mutexes when they occur in testing [1] [2]
> or in actual use, or to detect bad locking patterns that might lead to
> a deadlock.  We would like to obtain as much of lockdep's benefits as
> possible without generating a flood of false positives -- which is
> what happens if one naively removes these mutexes from the
> "novalidate" class.
> 
> Accordingly, as a middle ground the mutex in each non-static struct
> device will be placed in its own unique locking class.  This approach
> gives up some of lockdep's advantages (for example, all devices having
> a particular bus_type or device_type might reasonably be put into the
> same locking class), but it should at least allow us to gain the
> benefit of some of lockdep's capabilities.
> 
> Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
> Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
> Link: https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/
> Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: Boqun Feng <boqun.feng@gmail.com>

My main worry when adding a ton of classes like this is the
combinatorics (dynamic classes make this more trivial, but it's entirely
possible with just static classes too).

As an example, we used to have a static class per cpu runqueue, now,
given migration takes two runqueue locks (per locking rules in cpu
order -- source and dest runqueue etc..) we got O(n^2) combinations of
class orderings, which lead to a giant graph, which led to both
performance and memory usage issues.

From this was born the subclass, which reduced the whole thing to a
static ordering of runqueue-1 goes inside runqueue-0.

Similar combinatoric issues have cropped up in other places from time to
time, typically solved by using a different annotation.

Now, given the whole device thing is more or less a tree, hierarchical
locking should limit that, the only worry is that sibling locking while
holding parent thing. If siblings are of different classes, things will
both complain and create combinatorics again.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:49:50AM +0100, Peter Zijlstra wrote:
> My main worry when adding a ton of classes like this is the
> combinatorics (dynamic classes make this more trivial, but it's entirely
> possible with just static classes too).
> 
> As an example, we used to have a static class per cpu runqueue, now,
> given migration takes two runqueue locks (per locking rules in cpu
> order -- source and dest runqueue etc..) we got O(n^2) combinations of
> class orderings, which lead to a giant graph, which led to both
> performance and memory usage issues.

Having a new class for each device would add a lot of classes.  Just how 
badly this would affect lockdep's performance is hard to predict.  
Testing seems like the best way to find out.

> From this was born the subclass, which reduced the whole thing to a
> static ordering of runqueue-1 goes inside runqueue-0.
> 
> Similar combinatoric issues have cropped up in other places from time to
> time, typically solved by using a different annotation.
> 
> Now, given the whole device thing is more or less a tree, hierarchical
> locking should limit that, the only worry is that sibling locking while
> holding parent thing. If siblings are of different classes, things will
> both complain and create combinatorics again.

Actually, I expect the sibling ordering thing won't come up very often.  
If it does occur somewhere, there's an excellent chance it can be fixed 
by hand (always locking the children in the same order).

I'm worried more about other things.  Suppose do we manage somehow to 
tell lockdep about locks belonging to a logical tree structure.  How 
can this be incorporated into the checking algorithm?

Here's an example.  Let A be a parent device and B its child, and let X 
be some other type of lock entirely (not a device lock).  Suppose at 
some point in the distant past, a first thread locked B and then X -- 
both locks long since released.  Now suppose a second thread locks A and 
then X (presumably valid, right?).  What should happen if this thread 
tries to lock B?

This ought to give rise to a violation: B->X and X->B.  But would this 
not be checked, under the rule that holding A's lock makes it okay to 
take B's lock?

For that matter, what if the second thread had locked X and then A.  
Should that be allowed?  Even though it doesn't directly contradict 
B->X?

Here's a more complicated example, taken from the USB subsystem.  Each 
usb_device structure representing a hub has some number of children 
usb_device structures (for the USB devices plugged into the hub), as 
well as a bunch of child usb_port device structures (representing the 
hub's own downstream ports, which the other USB devices are plugged 
into).

In theory the child usb_device should really be a direct child of the 
usb_port it's plugged into, but for historical reasons the two are 
siblings instead.

So now we have a rule that you cannot lock a usb_device if you're 
holding the lock of the usb_port that it's plugged into.  (Yes, this is 
logically backwards.)  How could we get lockdep to check this?

The only approach I can think of is something I suggested earlier in 
this discussion: Create lockdep classes for bus_types or device_types 
rather than for individual devices.  (usb_device and usb_port have 
different device_types.)  But I don't know how to combine this with the 
tree-structured approach.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Greg Kroah-Hartman 2 years, 11 months ago

On Mon, Feb 13, 2023 at 11:18:50AM -0500, Alan Stern wrote:
> On Mon, Feb 13, 2023 at 10:49:50AM +0100, Peter Zijlstra wrote:
> > My main worry when adding a ton of classes like this is the
> > combinatorics (dynamic classes make this more trivial, but it's entirely
> > possible with just static classes too).
> > 
> > As an example, we used to have a static class per cpu runqueue, now,
> > given migration takes two runqueue locks (per locking rules in cpu
> > order -- source and dest runqueue etc..) we got O(n^2) combinations of
> > class orderings, which lead to a giant graph, which led to both
> > performance and memory usage issues.
> 
> Having a new class for each device would add a lot of classes.  Just how 
> badly this would affect lockdep's performance is hard to predict.  
> Testing seems like the best way to find out.

We support systems with 50000+ devices today, so one class per device
might be messy.

But back to the original issue here, why any of this?  What's wrong with
what we have now?  I haven't seen real locking issues reported yet (only
odd syzbot reports that didn't make any sense.)  Is this effort even
worth it?

thanks,

greg k-h

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Mon, Feb 13, 2023 at 06:51:57PM +0100, Greg Kroah-Hartman wrote:
> But back to the original issue here, why any of this?  What's wrong with
> what we have now?  I haven't seen real locking issues reported yet (only
> odd syzbot reports that didn't make any sense.)  Is this effort even
> worth it?

A large part of the reason those syzbot reports didn't make any sense 
was because they didn't include any lockdep information.  Making lockdep 
aware of device locking would make those reports a lot easier to 
understand and would help with fixing the bugs.  And it might even help 
with catching similar problems before they get merged into the kernel.

Will it be worthwhile in the end?  I have no idea.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Linus Torvalds 2 years, 12 months ago

On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote:
>
> @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de
>         kobject_init(&dev->kobj, &device_ktype);
>         INIT_LIST_HEAD(&dev->dma_pools);
>         mutex_init(&dev->mutex);
> -       lockdep_set_novalidate_class(&dev->mutex);
> +       if (!lockdep_static_obj(dev)) {
> +               lockdep_register_key(&dev->mutex_key);
> +               lockdep_set_class(&dev->mutex, &dev->mutex_key);
> +       }
>         spin_lock_init(&dev->devres_lock);
>         INIT_LIST_HEAD(&dev->devres_head);
>         device_pm_init(dev);

So I think this is the right thing to do, but I note that while that
lockdep_set_novalidate_class() was "documented" to only be for
'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also
used by md/bcache/btree.c for the mca_bucket_alloc().

Can we *please* get rid of it there too (it was added by the initial
code, and never had any explicit excuse for it), possibly by using the
same model.

And then we could get rid of lockdep_set_novalidate_class() entirely.
That would be a good thing.

Coly/Kent? See

    https://lore.kernel.org/lkml/Y+gLd78vChQERZ6A@rowland.harvard.edu/

for more context, and

    https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/

for some history.

                 Linus

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 12 months ago

On 2/11/23 16:51, Linus Torvalds wrote:
> On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote:
>>
>> @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de
>>          kobject_init(&dev->kobj, &device_ktype);
>>          INIT_LIST_HEAD(&dev->dma_pools);
>>          mutex_init(&dev->mutex);
>> -       lockdep_set_novalidate_class(&dev->mutex);
>> +       if (!lockdep_static_obj(dev)) {
>> +               lockdep_register_key(&dev->mutex_key);
>> +               lockdep_set_class(&dev->mutex, &dev->mutex_key);
>> +       }
>>          spin_lock_init(&dev->devres_lock);
>>          INIT_LIST_HEAD(&dev->devres_head);
>>          device_pm_init(dev);
> 
> So I think this is the right thing to do, but I note that while that
> lockdep_set_novalidate_class() was "documented" to only be for
> 'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also
> used by md/bcache/btree.c for the mca_bucket_alloc().
> 
> Can we *please* get rid of it there too (it was added by the initial
> code, and never had any explicit excuse for it), possibly by using the
> same model.
> 
> And then we could get rid of lockdep_set_novalidate_class() entirely.
> That would be a good thing.

Yeah, what bcache really needs (and presumably dev->mutex as well) is 
just to disable lockdep checking for self-deadlock of that lock type, 
since it's got its own deadlock avoidance and the subclass thing isn't 
good enough.

I've got a patch that should do what we want, replying from my other 
account with it.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 12 months ago

On Sat, Feb 11, 2023 at 06:06:28PM -0500, Kent Overstreet wrote:
> On 2/11/23 16:51, Linus Torvalds wrote:
> > On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote:
> > > 
> > > @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de
> > >          kobject_init(&dev->kobj, &device_ktype);
> > >          INIT_LIST_HEAD(&dev->dma_pools);
> > >          mutex_init(&dev->mutex);
> > > -       lockdep_set_novalidate_class(&dev->mutex);
> > > +       if (!lockdep_static_obj(dev)) {
> > > +               lockdep_register_key(&dev->mutex_key);
> > > +               lockdep_set_class(&dev->mutex, &dev->mutex_key);
> > > +       }
> > >          spin_lock_init(&dev->devres_lock);
> > >          INIT_LIST_HEAD(&dev->devres_head);
> > >          device_pm_init(dev);
> > 
> > So I think this is the right thing to do, but I note that while that
> > lockdep_set_novalidate_class() was "documented" to only be for
> > 'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also
> > used by md/bcache/btree.c for the mca_bucket_alloc().
> > 
> > Can we *please* get rid of it there too (it was added by the initial
> > code, and never had any explicit excuse for it), possibly by using the
> > same model.
> > 
> > And then we could get rid of lockdep_set_novalidate_class() entirely.
> > That would be a good thing.
> 
> Yeah, what bcache really needs (and presumably dev->mutex as well) is just
> to disable lockdep checking for self-deadlock of that lock type, since it's
> got its own deadlock avoidance and the subclass thing isn't good enough.
> 
> I've got a patch that should do what we want, replying from my other account
> with it.

After scanning the rest of the thread: I don't think you want to create
separate lockdep classes for each bus and device type, that's defeating
how lockdep works. Maybe if it was only a small, _static_ number of new
classes, but the basic premesis of lockdep is that there are static
human understandable lock ordering rules, so lockdep figures out what
they are and checks them: if you create a bunch of dynamic classes, the
classes are going to be different for everyone in practice and won't
have any real bearing on the structure of the code - that is, given a
lockdep splat, you won't be able to do anything with it.

If static lock ordering rules aren't working (say, because the lock
ordering rules are determined by hardware relationships or what
userspace is doing), then you have to do something more sophisticated.

Wait/wound mutexes would be the next thing to look at, and DRM ended up
needing them for similar reasons as what you're running up against so I
think they bear serious consideration.

ww mutexes are the simple version of dynamic deadlock avoidance -
instead of doing full cycle detection they just compare transaction
start times, so they come at the cost of more frequent aborts. If this
is an issue for you, here's what full cycle detection looks like:

https://evilpiepirate.org/git/bcachefs.git/tree/fs/bcachefs/btree_locking.c#n53

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 12 months ago

On Sat, Feb 11, 2023 at 06:24:42PM -0500, Kent Overstreet wrote:
> After scanning the rest of the thread: I don't think you want to create
> separate lockdep classes for each bus and device type, that's defeating
> how lockdep works.

Not at all.  In fact, exactly the opposite: lockdep works by creating a 
class for each lock-inside-a-data-structure-type combination.  A struct 
device-bus_type/device_type combination is pretty much the same kind of 
thing.

>  Maybe if it was only a small, _static_ number of new
> classes,

The collection of bus_types and device_types _is_ static, in the sense 
that each one is a structure defined in a driver source file.  Whether 
the number is "small" depends on your tolerance for large numbers; the 
kernel has a lot of source files.  :-)

Mind you, I'm not saying that having lockdep classes for each bus_type 
or device_type is always the right thing to do.  There definitely are 
cases where it wouldn't do what we want.  But perhaps in some cases it 
would work.

> but the basic premesis of lockdep is that there are static
> human understandable lock ordering rules, so lockdep figures out what
> they are and checks them: if you create a bunch of dynamic classes, the
> classes are going to be different for everyone in practice and won't
> have any real bearing on the structure of the code

As a rule, bus_type's and device_type's aren't dynamic.  Maybe Greg KH 
once published an example of such a thing; IIRC it was more like a 
proof-of-principle rather than a serious recommendation on how to write 
drivers.  (Or else I'm misremembering and it was actually an example of 
creating dynamic sysfs attributes.)

Or maybe you're referring to what this patch does?  It does indeed 
create a bunch of dynamic classes -- one for each struct device.  The 
ordering rules derived by lockdep will be somewhat arbitrary, as you 
say.  But some of them certainly will be related to the structure of the 
source code.

For instance, there's a rule that you must not acquire a device's lock 
if you're already holding the lock of one of its descendants, and this 
is related to how device discovery works (the driver for a device is 
responsible for discovering the device's children).  But that rule alone 
isn't enough to prevent deadlocks.

>  that is, given a
> lockdep splat, you won't be able to do anything with it.

Nonsense.  Even if you don't know what the locking rules are, given a 
splat you can see what the cycle is and try to figure out which of the 
links should be invalid.  Without the splat you'd be a lot worse off.

> If static lock ordering rules aren't working (say, because the lock
> ordering rules are determined by hardware relationships or what
> userspace is doing), then you have to do something more sophisticated.
> 
> Wait/wound mutexes would be the next thing to look at, and DRM ended up
> needing them for similar reasons as what you're running up against so I
> think they bear serious consideration.
> 
> ww mutexes are the simple version of dynamic deadlock avoidance -
> instead of doing full cycle detection they just compare transaction
> start times, so they come at the cost of more frequent aborts. If this
> is an issue for you, here's what full cycle detection looks like:
> 
> https://evilpiepirate.org/git/bcachefs.git/tree/fs/bcachefs/btree_locking.c#n53

I'm not at all sure that w/w mutexes are the answer to device locking.  
Not to mention that converting over to use them would require a huge 
effort.

A typical kind of issue that seems to pop up a lot is a task trying to 
flush a work queue while holding a lock that's needed by one of the 
items on the queue.  (This isn't particularly limited to dev->mutex 
locks, of course.  It can crop up anywhere, but it seems to happen with 
some regularity in this setting.  Perhaps the fact that lockdep is 
unable to warn about these things is a contributing factor.  Can w/w 
mutexes can handle this sort of thing?  I'm not sufficiently familiar 
with them to know.)  Apparently people write this sort of code because 
they aren't aware of or don't pay attention to the context in which 
their functions run -- that is, the locks that have automatically been 
acquired by the callers.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 12 months ago

On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote:
> On Sat, Feb 11, 2023 at 06:24:42PM -0500, Kent Overstreet wrote:
> > After scanning the rest of the thread: I don't think you want to create
> > separate lockdep classes for each bus and device type, that's defeating
> > how lockdep works.
> 
> Not at all.  In fact, exactly the opposite: lockdep works by creating a 
> class for each lock-inside-a-data-structure-type combination.  A struct 
> device-bus_type/device_type combination is pretty much the same kind of 
> thing.
> 
> >  Maybe if it was only a small, _static_ number of new
> > classes,
> 
> The collection of bus_types and device_types _is_ static, in the sense 
> that each one is a structure defined in a driver source file.  Whether 
> the number is "small" depends on your tolerance for large numbers; the 
> kernel has a lot of source files.  :-)
> 
> Mind you, I'm not saying that having lockdep classes for each bus_type 
> or device_type is always the right thing to do.  There definitely are 
> cases where it wouldn't do what we want.  But perhaps in some cases it 
> would work.
> 
> > but the basic premesis of lockdep is that there are static
> > human understandable lock ordering rules, so lockdep figures out what
> > they are and checks them: if you create a bunch of dynamic classes, the
> > classes are going to be different for everyone in practice and won't
> > have any real bearing on the structure of the code
> 
> As a rule, bus_type's and device_type's aren't dynamic.  Maybe Greg KH 
> once published an example of such a thing; IIRC it was more like a 
> proof-of-principle rather than a serious recommendation on how to write 
> drivers.  (Or else I'm misremembering and it was actually an example of 
> creating dynamic sysfs attributes.)
> 
> Or maybe you're referring to what this patch does?  It does indeed 
> create a bunch of dynamic classes -- one for each struct device.  The 
> ordering rules derived by lockdep will be somewhat arbitrary, as you 
> say.  But some of them certainly will be related to the structure of the 
> source code.

I could be :) I haven't been able to find the patch in question - have a
link?

If you're talking about making lock_class_key dynamic, I think I stand
by what I said though - OTOH, if all you're doing is lifting that to the
caller of the device object init function, so it'll still be a static
object in the driver, that would be totally fine.

I probably should've found the patch before commenting :)

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 12 months ago

On Sat, Feb 11, 2023 at 09:46:42PM -0500, Kent Overstreet wrote:
> On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote:
> > Or maybe you're referring to what this patch does?  It does indeed 
> > create a bunch of dynamic classes -- one for each struct device.  The 
> > ordering rules derived by lockdep will be somewhat arbitrary, as you 
> > say.  But some of them certainly will be related to the structure of the 
> > source code.
> 
> I could be :) I haven't been able to find the patch in question - have a
> link?

It was earlier in this email thread.  Here's a link:

https://lore.kernel.org/r/Y+gLd78vChQERZ6A@rowland.harvard.edu/

> If you're talking about making lock_class_key dynamic, I think I stand
> by what I said though - OTOH, if all you're doing is lifting that to the
> caller of the device object init function, so it'll still be a static
> object in the driver, that would be totally fine.

The patch does the first, not the second.  Feel free to object some 
more...  :-)

Alan Stern

> I probably should've found the patch before commenting :)

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 12 months ago

On Sat, Feb 11, 2023 at 10:03:11PM -0500, Alan Stern wrote:
> On Sat, Feb 11, 2023 at 09:46:42PM -0500, Kent Overstreet wrote:
> > On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote:
> > > Or maybe you're referring to what this patch does?  It does indeed 
> > > create a bunch of dynamic classes -- one for each struct device.  The 
> > > ordering rules derived by lockdep will be somewhat arbitrary, as you 
> > > say.  But some of them certainly will be related to the structure of the 
> > > source code.
> > 
> > I could be :) I haven't been able to find the patch in question - have a
> > link?
> 
> It was earlier in this email thread.  Here's a link:
> 
> https://lore.kernel.org/r/Y+gLd78vChQERZ6A@rowland.harvard.edu/
> 
> > If you're talking about making lock_class_key dynamic, I think I stand
> > by what I said though - OTOH, if all you're doing is lifting that to the
> > caller of the device object init function, so it'll still be a static
> > object in the driver, that would be totally fine.
> 
> The patch does the first, not the second.  Feel free to object some 
> more...  :-)

So IMO the more correct way to do this would be to change
device_initialize() to __device_initialize(), have it take a
lock_class_key as a parameter, and then use __mutex_init() instead of
mutex_init().

But let's think about this more. Will there ever be situations where
lock ordering is dependent on what hardware is plugged into what, or
what hardware is plugged in first?

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote:
> So IMO the more correct way to do this would be to change
> device_initialize() to __device_initialize(), have it take a
> lock_class_key as a parameter, and then use __mutex_init() instead of
> mutex_init().

Yes, maybe.  The increase in code size might outweigh the space saved.

> But let's think about this more. Will there ever be situations where
> lock ordering is dependent on what hardware is plugged into what, or
> what hardware is plugged in first?

Device lock ordering is always dependent on what hardware is plugged 
into what.  However, I'm not aware of any situations where, given two 
different kinds of hardware, either one could be plugged into the other.
Such things may exist but I can't think of any examples.

On the other hand, there are obvious cases where two pieces of the 
_same_ kind of hardware can be plugged together in either order.  USB 
hubs are a good example.

Consider the possibility that a driver might want to lock all of a 
device's children at once.  (I don't know if this ever happens, but it 
might.)  Provided it acquires the parent device's lock first, this is 
utterly safe no matter what order the children are locked in.  Try 
telling that to lockdep!  In the end, we'd probably have to enforce a 
single fixed ordering.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> Provided it acquires the parent device's lock first, this is 
> utterly safe no matter what order the children are locked in.  Try 
> telling that to lockdep! 

mutex_lock_next_lock(child->lock, parent->lock) is there to express this
exact pattern, it allows taking multiple child->lock class locks (in any
order) provided parent->lock is held.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > Provided it acquires the parent device's lock first, this is 
> > utterly safe no matter what order the children are locked in.  Try 
> > telling that to lockdep! 
> 
> mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> exact pattern, it allows taking multiple child->lock class locks (in any
> order) provided parent->lock is held.

Perhaps I'm stupid, but I've never understood how subclasses - or this -
are supposed to work.

Locks don't get a fixed subclass, so what's to prevent some code from
going

/* thread 1: */
mutex_lock(&a->lock);
mutex_lock_nested(&b->lock, 1);

/* thread 2: */
mutex_lock(&b->lock);
mutex_lock_nested(&a->lock, 1);

I don't see how they can be used to check that we're obeying a lock
ordering?

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Mon, Feb 13, 2023 at 01:46:11PM -0500, Kent Overstreet wrote:
> On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > Provided it acquires the parent device's lock first, this is 
> > > utterly safe no matter what order the children are locked in.  Try 
> > > telling that to lockdep! 
> > 
> > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > exact pattern, it allows taking multiple child->lock class locks (in any
> > order) provided parent->lock is held.
> 
> Perhaps I'm stupid, but I've never understood how subclasses - or this -
> are supposed to work.
> 
> Locks don't get a fixed subclass, so what's to prevent some code from
> going

So there's two annotations here, the nest_lock thing and subclasses,
they're distinct things.

Every class gets a fixed 8 subclasses (0-7) given by the unique byte
addresses inside the actual key object.

Subclasses will let you create nesting order of the same class that are
acceptable. Typically lock/1 nests inside lock/0, but that's not
hard-coded, simply convention.

The way it is used is given an external lock order, say the CPU number
for the runqueue locks, you do things like:

void double_rq_lock(struct rq *rq1, struct rq *r2)
{
	lockdep_assert_irqs_disabled();

	if (rq_order_less(r2, rq1))
		swap(rq1, rq2);

	raw_spin_rq_lock(rq1);
	if (__rq_lockp(rq1) != __rq_lock(rq2))
		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);

	...
}

(which is more complicated than it needs to be due to the whole
core-scheduling mess, but should still be readable I suppose).

Basically we make sure rq1 and rq2 are in the correct order and acquire
them with subclass 0 (the default) and subcless 1 (SINGLE_DEPTH_NESTING)
resp. dictating the subclass order.

This is lock order per decree, if you get the order function wrong
lockdep will not see the inversion but you *will* deadlock.

Then there's that nesting lock, that requires two classes and at least 3
locks to make sense:

  P, C1, C2

Where we posit that any multi-lock of Cn is fully serialized by P and it
is used like:

	mutex_lock(P);
	mutex_lock_nest_lock(C1, P);
	mutex_lock_nest_lock(C2, P);

Where any order of Cn is acceptable, because fully ordered by P.

If you were to combine this with subclass on Cn to allow multi-lock
instances not order by P, you get to keep the pieces :-)

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 11 months ago

On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote:
> This is lock order per decree, if you get the order function wrong
> lockdep will not see the inversion but you *will* deadlock.

Yeah, that's what I mean. Given that a subclass isn't a fixed thing you
assign to a lock, just something you magic up as needed - I just don't
see what this gets us?

Why not just tell lockdep what the order function is directly?

(I know, I've been saying I'd write a patch for this, I'll get around to
it, I swear :)

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote:
> Every class gets a fixed 8 subclasses (0-7) given by the unique byte
> addresses inside the actual key object.
> 
> Subclasses will let you create nesting order of the same class that are
> acceptable. Typically lock/1 nests inside lock/0, but that's not
> hard-coded, simply convention.

Can you explain in more detail how this works in the lockdep checking 
algorithm?  (For simplicity, let's leave out issues of interrupt status 
and other environmental things.)

I've been assuming that lockdep builds up a set of links between the 
classes -- namely, a link is created from A to B whenever a thread holds 
a lock of class A while acquiring a lock of class B.  The checking part 
would then amount to just making sure that these links don't form any 
cycles.

So then how do subclasses fit into the picture?  Is it just that now the 
links are between subclasses rather than classes, so it's not 
automatically wrong to hold a lock while acquiring another lock of the 
same class as long as the two acquisitions are in different subclasses?  
But you can still provoke a violation if there's a cycle among the 
subclasses?

> Then there's that nesting lock, that requires two classes and at least 3
> locks to make sense:
> 
>   P, C1, C2
> 
> Where we posit that any multi-lock of Cn is fully serialized by P

Assuming the speculations above are correct, how does the algorithm take 
lockdep nesting into account?  Does it simply avoid creating a link from 
subclass C to itself if both C1 and C2 were acquired while holding a 
lock of the parent subclass and both acquisitions were annotated with 
mutex_lock_next_lock()?  Or is there more to it than that?  (And should 
I have written class rather than subclass?)

And Kent, how does your proposed lockdep_set_no_check_recursion() work?  
Does it just prevent lockdep from creating a link between any two 
subclasses of the specified class?  Does it do anything more?

Alan

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Tue, Feb 14, 2023 at 03:05:42PM -0500, Alan Stern wrote:
> On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote:
> > Every class gets a fixed 8 subclasses (0-7) given by the unique byte
> > addresses inside the actual key object.
> > 
> > Subclasses will let you create nesting order of the same class that are
> > acceptable. Typically lock/1 nests inside lock/0, but that's not
> > hard-coded, simply convention.
> 
> Can you explain in more detail how this works in the lockdep checking 
> algorithm?  (For simplicity, let's leave out issues of interrupt status 
> and other environmental things.)
> 
> I've been assuming that lockdep builds up a set of links between the 
> classes -- namely, a link is created from A to B whenever a thread holds 
> a lock of class A while acquiring a lock of class B.  The checking part 
> would then amount to just making sure that these links don't form any 
> cycles.
> 
> So then how do subclasses fit into the picture?  Is it just that now the 
> links are between subclasses rather than classes, so it's not 
> automatically wrong to hold a lock while acquiring another lock of the 
> same class as long as the two acquisitions are in different subclasses?  
> But you can still provoke a violation if there's a cycle among the 
> subclasses?

For all intents and purposes the subclasses are fully distinct classes
from the validation pov.

	mutex_lock(L);
	mutex_lock_nested(L, 0);

are equivalent (ignoring lockdep_set_subclass()), and

	mutex_lock_nested(L, 1);

is a distinct class, validation wise. So if you write:

	mutex_lock(L1);
	mutex_lock_nested(L2, 1);

you explicitly create a lock order between the distinct validation
classes: L/0,  L/1

> > Then there's that nesting lock, that requires two classes and at least 3
> > locks to make sense:
> > 
> >   P, C1, C2
> > 
> > Where we posit that any multi-lock of Cn is fully serialized by P
> 
> Assuming the speculations above are correct, how does the algorithm take 
> lockdep nesting into account?  Does it simply avoid creating a link from 
> subclass C to itself if both C1 and C2 were acquired while holding a 
> lock of the parent subclass and both acquisitions were annotated with 
> mutex_lock_next_lock()?

Basically this; it will explicitly ignore the nesting.

Given:

	mutex_lock(P);
	mutex_lock_nest_lock(C1, P);
	mutex_lock_nest_lock(C2, P);

mutex_lock_nest_lock() basically does:

 - validate that the instance of P is actually held.
   (as such, mutex_lock_nest_lock(C1, P1); mutex_lock_nest_lock(C2, P2);
   will cause objections).

 - either:

   * establish P->C in the held-lock stack
     and update the graph if so required

   * find the existing C in the held-lock stack
     and instead of complaining about class recursion, increment a
     refcount, and leave the held-stack and thus the graph unmodified.

subsequent mutex_unlock() will decrement the refcount and only when 0
'pop' the actual entry from the held stack.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > Provided it acquires the parent device's lock first, this is 
> > utterly safe no matter what order the children are locked in.  Try 
> > telling that to lockdep! 
> 
> mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> exact pattern, it allows taking multiple child->lock class locks (in any
> order) provided parent->lock is held.

Ah, this is news to me.  Is this sort of thing documented somewhere?

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote:
> On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > Provided it acquires the parent device's lock first, this is 
> > > utterly safe no matter what order the children are locked in.  Try 
> > > telling that to lockdep! 
> > 
> > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > exact pattern, it allows taking multiple child->lock class locks (in any
> > order) provided parent->lock is held.
> 
> Ah, this is news to me.  Is this sort of thing documented somewhere?

Probably not :/

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Boqun Feng 2 years, 11 months ago

On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote:
> > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > > Provided it acquires the parent device's lock first, this is 
> > > > utterly safe no matter what order the children are locked in.  Try 
> > > > telling that to lockdep! 
> > > 
> > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > > exact pattern, it allows taking multiple child->lock class locks (in any
> > > order) provided parent->lock is held.
> > 
> > Ah, this is news to me.  Is this sort of thing documented somewhere?

Basically if you have two lock instances A and B with the same class,
and you know that locking ordering is always A -> B, then you can do

	mutex_lock(A);
	mutex_lock_nest_lock(B, A); // lock B.

to tell the lockdep this is not deadlock, plus lockdep will treat the
acquisition of A and the precondition of acquisition B, so the following
is not a deadlock as well:

T1:
	mutex_lock(A);
	mutex_lock(C);
	mutex_lock_nest_lock(B, A);

T2:
	mutex_lock(A);
	mutex_lock_nest_lock(B, A);
	mutex_lock(C);

Regards,
Boqun

> 
> Probably not :/

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote:
> On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote:
> > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > > > Provided it acquires the parent device's lock first, this is 
> > > > > utterly safe no matter what order the children are locked in.  Try 
> > > > > telling that to lockdep! 
> > > > 
> > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > > > exact pattern, it allows taking multiple child->lock class locks (in any
> > > > order) provided parent->lock is held.
> > > 
> > > Ah, this is news to me.  Is this sort of thing documented somewhere?
> 
> Basically if you have two lock instances A and B with the same class,
> and you know that locking ordering is always A -> B, then you can do
> 
> 	mutex_lock(A);
> 	mutex_lock_nest_lock(B, A); // lock B.
> 

No, this isn't quite right, You need at least 3 locks and 2 classes.

P, C1, C2

Then:

	mutex_lock(P)
	mutex_lock_next_lock(C1, P)
	mutex_lock_next_lock(C2, P)

And it will accept any order of Cn -- since it assumes that any
multi-lock of Cn will always hold P, therefore the ordering fully given
by P.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Boqun Feng 2 years, 11 months ago

On Tue, Feb 14, 2023 at 11:48:07AM +0100, Peter Zijlstra wrote:
> On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote:
> > On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote:
> > > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote:
> > > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > > > > Provided it acquires the parent device's lock first, this is 
> > > > > > utterly safe no matter what order the children are locked in.  Try 
> > > > > > telling that to lockdep! 
> > > > > 
> > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > > > > exact pattern, it allows taking multiple child->lock class locks (in any
> > > > > order) provided parent->lock is held.
> > > > 
> > > > Ah, this is news to me.  Is this sort of thing documented somewhere?
> > 
> > Basically if you have two lock instances A and B with the same class,
> > and you know that locking ordering is always A -> B, then you can do
> > 
> > 	mutex_lock(A);
> > 	mutex_lock_nest_lock(B, A); // lock B.
> > 
> 
> No, this isn't quite right, You need at least 3 locks and 2 classes.
> 
> P, C1, C2
> 
> Then:
> 
> 	mutex_lock(P)
> 	mutex_lock_next_lock(C1, P)
> 	mutex_lock_next_lock(C2, P)
> 
> And it will accept any order of Cn -- since it assumes that any
> multi-lock of Cn will always hold P, therefore the ordering fully given
> by P.

Ah, right, I was missing the fact that it works with 2 classes...

But I think with only one class, the nest_lock() still works, right?
In other words, if P and Cn are the same lock class in your example.

Also seems I gave a wrong answer to Alan, just to clarify, the following
is not a deadlock to lockdep:

T1:
	mutex_lock(P)
	mutex_lock_next_lock(C1, P)
	mutex_lock_next_lock(C2, P)
	mutex_lock(B)

T2:
	mutex_lock(P)
	mutex_lock(B)
	mutex_lock_next_lock(C1, P)
	mutex_lock_next_lock(C2, P)

Because of any pair of

	mutex_lock(L);
	... // other locks maybe
	mutex_lock_nest_lock(M, L);

lockdep will not add M into the dependency graph, since it's nested and
should be serialized by L.

Regards,
Boqun

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Tue, Feb 14, 2023 at 08:22:28AM -0800, Boqun Feng wrote:

> Ah, right, I was missing the fact that it works with 2 classes...
> 
> But I think with only one class, the nest_lock() still works, right?
> In other words, if P and Cn are the same lock class in your example.

I don't think so, but I don't think I've carefully considered that case.

> Also seems I gave a wrong answer to Alan, just to clarify, the following
> is not a deadlock to lockdep:
> 
> T1:
> 	mutex_lock(P)
> 	mutex_lock_next_lock(C1, P)
> 	mutex_lock_next_lock(C2, P)
> 	mutex_lock(B)
> 
> T2:
> 	mutex_lock(P)
> 	mutex_lock(B)
> 	mutex_lock_next_lock(C1, P)
> 	mutex_lock_next_lock(C2, P)
> 

This should in fact complain about a CB-BC deadlock, (but I've not
tested it, just going on memories of how I implemented it).

> Because of any pair of
> 
> 	mutex_lock(L);
> 	... // other locks maybe
> 	mutex_lock_nest_lock(M, L);
> 
> lockdep will not add M into the dependency graph, since it's nested and
> should be serialized by L.

We do enter M into the dependency graph, but instead ignore M-M
recursion. Specifically so that we might catch the above deadlock vs B.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Boqun Feng 2 years, 11 months ago

On Wed, Feb 15, 2023 at 11:45:03AM +0100, Peter Zijlstra wrote:
> On Tue, Feb 14, 2023 at 08:22:28AM -0800, Boqun Feng wrote:
> 
> > Ah, right, I was missing the fact that it works with 2 classes...
> > 
> > But I think with only one class, the nest_lock() still works, right?
> > In other words, if P and Cn are the same lock class in your example.

After playing with some self test cases, I found I was wrong again ;-(

> 
> I don't think so, but I don't think I've carefully considered that case.
> 

You are right, the same class case will trigger a DEBUG_LOCKS_WARN_ON()
in the match_held_lock() when releasing the locks.

> > Also seems I gave a wrong answer to Alan, just to clarify, the following
> > is not a deadlock to lockdep:
> > 
> > T1:
> > 	mutex_lock(P)
> > 	mutex_lock_next_lock(C1, P)
> > 	mutex_lock_next_lock(C2, P)
> > 	mutex_lock(B)
> > 
> > T2:
> > 	mutex_lock(P)
> > 	mutex_lock(B)
> > 	mutex_lock_next_lock(C1, P)
> > 	mutex_lock_next_lock(C2, P)
> > 
> 
> This should in fact complain about a CB-BC deadlock, (but I've not
> tested it, just going on memories of how I implemented it).
> 

Yes, confirmed by a selftest.

> > Because of any pair of
> > 
> > 	mutex_lock(L);
> > 	... // other locks maybe
> > 	mutex_lock_nest_lock(M, L);
> > 
> > lockdep will not add M into the dependency graph, since it's nested and
> > should be serialized by L.
> 
> We do enter M into the dependency graph, but instead ignore M-M
> recursion. Specifically so that we might catch the above deadlock vs B.

Right, I mis-read the code, which suggests I should improve it to help
the future me ;-)

FWIW, the selftests I used are as follow:

Regards,
Boqun

------------------------------->8
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 8d24279fad05..6aadebad68c1 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -60,6 +60,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
 #define LOCKTYPE_RTMUTEX 0x20
 #define LOCKTYPE_LL	0x40
 #define LOCKTYPE_SPECIAL 0x80
+#define LOCKTYPE_NEST	0x100
 
 static struct ww_acquire_ctx t, t2;
 static struct ww_mutex o, o2, o3;
@@ -2091,14 +2092,14 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
 	ww_mutex_lock_slow(&o3, &t);
 }
 
-static void ww_test_spin_nest_unlocked(void)
+static void nest_test_spin_nest_unlocked(void)
 {
 	spin_lock_nest_lock(&lock_A, &o.base);
 	U(A);
 }
 
 /* This is not a deadlock, because we have X1 to serialize Y1 and Y2 */
-static void ww_test_spin_nest_lock(void)
+static void nest_test_spin_nest_lock(void)
 {
 	spin_lock(&lock_X1);
 	spin_lock_nest_lock(&lock_Y1, &lock_X1);
@@ -2110,6 +2111,33 @@ static void ww_test_spin_nest_lock(void)
 	spin_unlock(&lock_X1);
 }
 
+static void nest_test_spin_nest_lock_deadlock(void)
+{
+	nest_test_spin_nest_lock();
+
+	/*
+	 * Although above is not a deadlokc, but with the following code, Y1 and
+	 * A create a ABBA deadlock.
+	 */
+	spin_lock(&lock_X1);
+	spin_lock(&lock_A);
+	spin_lock_nest_lock(&lock_Y1, &lock_X1);
+	spin_lock_nest_lock(&lock_Y2, &lock_X1);
+	spin_unlock(&lock_A);
+	spin_unlock(&lock_Y2);
+	spin_unlock(&lock_Y1);
+	spin_unlock(&lock_X1);
+}
+
+/* Not the supported usage */
+static void nest_test_spin_nest_lock_same_class(void)
+{
+	spin_lock(&lock_X1);
+	spin_lock_nest_lock(&lock_X2, &lock_X1);
+	spin_unlock(&lock_X2);
+	spin_unlock(&lock_X1);
+}
+
 static void ww_test_unneeded_slow(void)
 {
 	WWAI(&t);
@@ -2323,14 +2351,6 @@ static void ww_tests(void)
 	dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW);
 	pr_cont("\n");
 
-	print_testname("spinlock nest unlocked");
-	dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
-	pr_cont("\n");
-
-	print_testname("spinlock nest test");
-	dotest(ww_test_spin_nest_lock, SUCCESS, LOCKTYPE_WW);
-	pr_cont("\n");
-
 	printk("  -----------------------------------------------------\n");
 	printk("                                 |block | try  |context|\n");
 	printk("  -----------------------------------------------------\n");
@@ -2360,6 +2380,27 @@ static void ww_tests(void)
 	pr_cont("\n");
 }
 
+static void nest_tests(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | nest lock tests |\n");
+	printk("  -------------------\n");
+	print_testname("spinlock nest unlocked");
+	dotest(nest_test_spin_nest_unlocked, FAILURE, LOCKTYPE_NEST);
+	pr_cont("\n");
+
+	print_testname("spinlock nest test");
+	dotest(nest_test_spin_nest_lock, SUCCESS, LOCKTYPE_NEST);
+	pr_cont("\n");
+	print_testname("spinlock nest test dead lock");
+	dotest(nest_test_spin_nest_lock_deadlock, FAILURE, LOCKTYPE_NEST);
+	pr_cont("\n");
+	print_testname("spinlock nest test dead lock");
+	dotest(nest_test_spin_nest_lock_same_class, FAILURE, LOCKTYPE_NEST);
+	pr_cont("\n");
+
+}
+
 
 /*
  * <in hardirq handler>
@@ -2966,6 +3007,8 @@ void locking_selftest(void)
 
 	ww_tests();
 
+	nest_tests();
+
 	force_read_lock_recursive = 0;
 	/*
 	 * queued_read_lock() specific test cases can be put here

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote:
> Basically if you have two lock instances A and B with the same class,
> and you know that locking ordering is always A -> B, then you can do
> 
> 	mutex_lock(A);
> 	mutex_lock_nest_lock(B, A); // lock B.
> 
> to tell the lockdep this is not deadlock, plus lockdep will treat the
> acquisition of A and the precondition of acquisition B, so the following
> is not a deadlock as well:
> 
> T1:
> 	mutex_lock(A);
> 	mutex_lock(C);
> 	mutex_lock_nest_lock(B, A);
> 
> T2:
> 	mutex_lock(A);
> 	mutex_lock_nest_lock(B, A);
> 	mutex_lock(C);

Why isn't this treated as a deadlock?  It sure looks like a deadlock to 
me.  Is this an example where lockdep just doesn't get the right answer?

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Boqun Feng 2 years, 11 months ago

On Mon, Feb 13, 2023 at 09:03:14PM -0500, Alan Stern wrote:
> On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote:
> > Basically if you have two lock instances A and B with the same class,
> > and you know that locking ordering is always A -> B, then you can do
> > 
> > 	mutex_lock(A);
> > 	mutex_lock_nest_lock(B, A); // lock B.
> > 
> > to tell the lockdep this is not deadlock, plus lockdep will treat the
> > acquisition of A and the precondition of acquisition B, so the following
> > is not a deadlock as well:
> > 
> > T1:
> > 	mutex_lock(A);
> > 	mutex_lock(C);
> > 	mutex_lock_nest_lock(B, A);
> > 
> > T2:
> > 	mutex_lock(A);
> > 	mutex_lock_nest_lock(B, A);
> > 	mutex_lock(C);
> 
> Why isn't this treated as a deadlock?  It sure looks like a deadlock to 
> me.  Is this an example where lockdep just doesn't get the right answer?
> 

Because A serializes B and C, so that particular piece of code doesn't
cause deadlock. Note that you can still use you normal mutex_lock() for
B, so if there is more code:

T3:
	mutex_lock(C);
	mutex_lock(B);

lockdep will report deadlock.

Regards,
Boqun

> Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Boqun Feng 2 years, 11 months ago

On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote:
> On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote:
> > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote:
> > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > > > > Provided it acquires the parent device's lock first, this is 
> > > > > utterly safe no matter what order the children are locked in.  Try 
> > > > > telling that to lockdep! 
> > > > 
> > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this
> > > > exact pattern, it allows taking multiple child->lock class locks (in any
> > > > order) provided parent->lock is held.
> > > 
> > > Ah, this is news to me.  Is this sort of thing documented somewhere?
> 
> Basically if you have two lock instances A and B with the same class,
> and you know that locking ordering is always A -> B, then you can do
> 
> 	mutex_lock(A);
> 	mutex_lock_nest_lock(B, A); // lock B.
> 
> to tell the lockdep this is not deadlock, plus lockdep will treat the
> acquisition of A and the precondition of acquisition B, so the following

^^^
acquisition of A *as* the precondition of acquisition B

Regards,
Boqun

> is not a deadlock as well:
> 
> T1:
> 	mutex_lock(A);
> 	mutex_lock(C);
> 	mutex_lock_nest_lock(B, A);
> 
> T2:
> 	mutex_lock(A);
> 	mutex_lock_nest_lock(B, A);
> 	mutex_lock(C);
> 
> Regards,
> Boqun
> 
> > 
> > Probably not :/
>

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 11 months ago

On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote:
> > So IMO the more correct way to do this would be to change
> > device_initialize() to __device_initialize(), have it take a
> > lock_class_key as a parameter, and then use __mutex_init() instead of
> > mutex_init().
> 
> Yes, maybe.  The increase in code size might outweigh the space saved.

Where would the increase in code size come from? And whatever effect
would only be when lockdep is enabled, so not a concern.

But consider that the name of a lock as registered with lockdep really
should correspond to a source code location - i.e. it should be
something you can grep for. (We should probably consider adding file and
line number to that string, since current names are not unambiguous).

Whereas in your pass, you're calling lockdep_set_class(), which
generates a class name via stringifcation - with the same name every
time.

Definitely _don't_ do that. With your patch, the generated lockdep
traces will be useless.

> > But let's think about this more. Will there ever be situations where
> > lock ordering is dependent on what hardware is plugged into what, or
> > what hardware is plugged in first?
> 
> Device lock ordering is always dependent on what hardware is plugged 
> into what.  However, I'm not aware of any situations where, given two 
> different kinds of hardware, either one could be plugged into the other.
> Such things may exist but I can't think of any examples.

Different brands of hubs?

Lots of things have hubs embedded into them these days. 15 years ago I
had an Apple keyboard with an embedded hub.

> On the other hand, there are obvious cases where two pieces of the 
> _same_ kind of hardware can be plugged together in either order.  USB 
> hubs are a good example.
> 
> Consider the possibility that a driver might want to lock all of a 
> device's children at once.  (I don't know if this ever happens, but it 
> might.)  Provided it acquires the parent device's lock first, this is 
> utterly safe no matter what order the children are locked in.  Try 
> telling that to lockdep!  In the end, we'd probably have to enforce a 
> single fixed ordering.

The way you speak of hypotheticals and possibilities makes me think that
the actual locking rules are not ironed out at all :)

The patch I posted would be an improvement over the current situation,
because it'd get you checking w.r.t. other lock types - but with that
you would still have to have your own deadlock avoidance strategy, and
you'd have to be _really_ clear on what it is and how you know that
you're getting it right - you're still opting out of checking.

I think you should really be investigating wait/wound mutexes here.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Sun, Feb 12, 2023 at 02:14:02PM -0500, Kent Overstreet wrote:
> On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote:
> > On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote:
> > > So IMO the more correct way to do this would be to change
> > > device_initialize() to __device_initialize(), have it take a
> > > lock_class_key as a parameter, and then use __mutex_init() instead of
> > > mutex_init().
> > 
> > Yes, maybe.  The increase in code size might outweigh the space saved.
> 
> Where would the increase in code size come from?

Maybe I didn't understand your suggestion.  Did you mean that all 
callers of device_initialize() (or whatever) should be able to specify 
their own lock_class_key?  Or were you just trying to avoid the single 
static allocation of a lock_class_key in device_initialize() done as a 
side-effect of the mutex_init() call?

> And whatever effect
> would only be when lockdep is enabled, so not a concern.

Not if a new function is created (i.e., __device_initialize()).

> But consider that the name of a lock as registered with lockdep really
> should correspond to a source code location - i.e. it should be
> something you can grep for. (We should probably consider adding file and
> line number to that string, since current names are not unambiguous).
> 
> Whereas in your pass, you're calling lockdep_set_class(), which
> generates a class name via stringifcation - with the same name every
> time.
> 
> Definitely _don't_ do that. With your patch, the generated lockdep
> traces will be useless.

I'll revise the patch to use the device's name for the class.  While it 
may not be globally unique, it should be sufficiently specific.

(Device names are often set after the device is initialized.  Does 
lockdep mind if a lock_class_key's name is changed after it has been 
registered?)

> > > But let's think about this more. Will there ever be situations where
> > > lock ordering is dependent on what hardware is plugged into what, or
> > > what hardware is plugged in first?
> > 
> > Device lock ordering is always dependent on what hardware is plugged 
> > into what.  However, I'm not aware of any situations where, given two 
> > different kinds of hardware, either one could be plugged into the other.
> > Such things may exist but I can't think of any examples.
> 
> Different brands of hubs?

As far as the kernel is concerned they wouldn't be _different kinds_ of 
hardware; they would both be hubs.

> Lots of things have hubs embedded into them these days. 15 years ago I
> had an Apple keyboard with an embedded hub.

Apple keyboards get treated as two logically separate pieces of 
hardware: a hub and a keyboard.  The fact that they are packaged as a 
single unit is irrelevant.

> > On the other hand, there are obvious cases where two pieces of the 
> > _same_ kind of hardware can be plugged together in either order.  USB 
> > hubs are a good example.
> > 
> > Consider the possibility that a driver might want to lock all of a 
> > device's children at once.  (I don't know if this ever happens, but it 
> > might.)  Provided it acquires the parent device's lock first, this is 
> > utterly safe no matter what order the children are locked in.  Try 
> > telling that to lockdep!  In the end, we'd probably have to enforce a 
> > single fixed ordering.
> 
> The way you speak of hypotheticals and possibilities makes me think that
> the actual locking rules are not ironed out at all :)

You're right.  There are no explicitly documented rules for device 
locking as far as I'm aware.  Each subsystem handles its own locking 
independently (but self-consistently, we hope).  That's one of the 
reasons that creating lockdep rules for devices is so difficult.

The business about not locking a parent if you already own the child's 
lock is perhaps the only universal -- and I don't know that it's written 
down anywhere.

> The patch I posted would be an improvement over the current situation,
> because it'd get you checking w.r.t. other lock types - but with that
> you would still have to have your own deadlock avoidance strategy, and
> you'd have to be _really_ clear on what it is and how you know that
> you're getting it right - you're still opting out of checking.

Same with the patch I posted, except that it opts back into checking.

> I think you should really be investigating wait/wound mutexes here.

At this stage, converting would be most impractical.  And I don't think 
it's really needed.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote:

> (Device names are often set after the device is initialized.  Does 
> lockdep mind if a lock_class_key's name is changed after it has been 
> registered?)

It does, althought I don't at the moment recall how hard it would be to
change that.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:27:18AM +0100, Peter Zijlstra wrote:
> On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote:
> 
> > (Device names are often set after the device is initialized.  Does 
> > lockdep mind if a lock_class_key's name is changed after it has been 
> > registered?)
> 
> It does, althought I don't at the moment recall how hard it would be to
> change that.

If the names are only used for printing purposes, or other similarly 
innocuous things, it ought to be enough to set the name with 
smp_store_release() and read the name with smp_load_acquire().

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Mon, Feb 13, 2023 at 10:28:08AM -0500, Alan Stern wrote:
> On Mon, Feb 13, 2023 at 10:27:18AM +0100, Peter Zijlstra wrote:
> > On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote:
> > 
> > > (Device names are often set after the device is initialized.  Does 
> > > lockdep mind if a lock_class_key's name is changed after it has been 
> > > registered?)
> > 
> > It does, althought I don't at the moment recall how hard it would be to
> > change that.
> 
> If the names are only used for printing purposes, or other similarly 
> innocuous things, it ought to be enough to set the name with 
> smp_store_release() and read the name with smp_load_acquire().

The name is copied from the key to the 'class' upon registration of the
first lock that uses a particular key. Then later, when looking up the
class for subsequent usages of the same key, the string is checked, and
WARNs if they somehow not match.

Granted, this is a silly sanity check that's easily disabled. But from a
cursory look that seems to be just about it.

The only 'problem' is that it's (typically) class name that's printed,
not the key name, so if you change the key name, without also changing
the class name, reports get really confusing.

Still, that all ought to be fixable.. just a matter of typing or so :-)

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 11 months ago

On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote:
> Maybe I didn't understand your suggestion.  Did you mean that all 
> callers of device_initialize() (or whatever) should be able to specify 
> their own lock_class_key?  Or were you just trying to avoid the single 
> static allocation of a lock_class_key in device_initialize() done as a 
> side-effect of the mutex_init() call?
> 
> > And whatever effect
> > would only be when lockdep is enabled, so not a concern.
> 
> Not if a new function is created (i.e., __device_initialize()).

Follow the same pattern as mutex_init() - have a look over that code.

> > But consider that the name of a lock as registered with lockdep really
> > should correspond to a source code location - i.e. it should be
> > something you can grep for. (We should probably consider adding file and
> > line number to that string, since current names are not unambiguous).
> > 
> > Whereas in your pass, you're calling lockdep_set_class(), which
> > generates a class name via stringifcation - with the same name every
> > time.
> > 
> > Definitely _don't_ do that. With your patch, the generated lockdep
> > traces will be useless.
> 
> I'll revise the patch to use the device's name for the class.  While it 
> may not be globally unique, it should be sufficiently specific.
> 
> (Device names are often set after the device is initialized.  Does 
> lockdep mind if a lock_class_key's name is changed after it has been 
> registered?)

The device name should _not_ be something dynamic, it should be
something easily tied back to a source code location - i.e. related to
the driver name, not the device name.

That's how people read and use lockdep reports!

Do it exactly the same way mutex_init() does it, just lift it up a level
to a wrapper around device_initialize() - stringify the pointer to the
mutex (embedded in struct device, embedded in what-have-you driver code)
and use that.

> You're right.  There are no explicitly documented rules for device 
> locking as far as I'm aware.  Each subsystem handles its own locking 
> independently (but self-consistently, we hope).  That's one of the 
> reasons that creating lockdep rules for devices is so difficult.
> 
> The business about not locking a parent if you already own the child's 
> lock is perhaps the only universal -- and I don't know that it's written 
> down anywhere.

Yeah that's sketchy; if the rules are too complicated to be written
down, they're too complicated.

One thing that could be contemplated is adding support for user-defined
comparison functions to lockdep, to define a lock ordering within a
class when subclass isn't sufficient.

That would work for bcache - for bcache the lock ordering is parent
nodes before children, and if multiple nodes are locked at the same
level they have to be locked in natural key order.

But, this would add a lot of complexity to lockdep, and this is the sort
of situation where if you have a bug in the comparison function (i.e. it
doesn't define a total ordering) it'll break things in terribly fun
ways.

> > The patch I posted would be an improvement over the current situation,
> > because it'd get you checking w.r.t. other lock types - but with that
> > you would still have to have your own deadlock avoidance strategy, and
> > you'd have to be _really_ clear on what it is and how you know that
> > you're getting it right - you're still opting out of checking.
> 
> Same with the patch I posted, except that it opts back into checking.
> 
> > I think you should really be investigating wait/wound mutexes here.
> 
> At this stage, converting would be most impractical.  And I don't think 
> it's really needed.

They do make you deal with lock restarts; unwinding typical stateful
kernel code is not necessarily super practical :)

Anyways, it sounds like the lockdep-class-per-driver approach will get
you more information, that's certainly a reasonable approach for now.

Cheers,
-Kent

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Peter Zijlstra 2 years, 11 months ago

On Sun, Feb 12, 2023 at 03:51:05PM -0500, Kent Overstreet wrote:
> But, this would add a lot of complexity to lockdep, and this is the sort
> of situation where if you have a bug in the comparison function (i.e. it
> doesn't define a total ordering) it'll break things in terribly fun
> ways.

FWIW, it is possible to annotate an actual deadlock away with many of
the lockdep annotations -- care is always needed with this stuff.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Sun, Feb 12, 2023 at 03:51:05PM -0500, Kent Overstreet wrote:
> On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote:
> > I'll revise the patch to use the device's name for the class.  While it 
> > may not be globally unique, it should be sufficiently specific.
> > 
> > (Device names are often set after the device is initialized.  Does 
> > lockdep mind if a lock_class_key's name is changed after it has been 
> > registered?)
> 
> The device name should _not_ be something dynamic, it should be
> something easily tied back to a source code location - i.e. related to
> the driver name, not the device name.
> 
> That's how people read and use lockdep reports!
> 
> Do it exactly the same way mutex_init() does it, just lift it up a level
> to a wrapper around device_initialize() - stringify the pointer to the
> mutex (embedded in struct device, embedded in what-have-you driver code)
> and use that.

I really don't think that's a good idea here.  When you've got a bus 
containing multiple devices, typically all those device structures are 
created by the same line of code.  So knowing the source code location 
won't tell you _which_ device structure is involved in the locking 
cycle or what driver it's using.  By contrast, knowing the device name 
would.

Furthermore, to the extent that the device's name identifies what kind 
of device it is, the name would tell you what where the structure was 
created and which driver it is using.

For example, knowing that a struct device was initialized in line 2104 
of drivers/usb/core/message.c tells you only that the device is a USB 
interface.  It doesn't tell you which USB interface.  But knowing that 
the device's name is 1-7:1.1 not only tells me that the device is a USB 
interface, it also allows me to do:

$ ls -l /sys/bus/usb/devices/1-7:1.1/driver
lrwxrwxrwx. 1 root root 0 Feb 12 19:56 /sys/bus/usb/devices/1-7:1.1/driver -> ../../../../../../bus/usb/drivers/usbhid/

telling me that the device is some sort of HID device.  Probably my 
laptop's touchpad (which could easily be verified).  Even without direct 
interaction with the system, searching through the kernel log would give 
me this information.

> > At this stage, converting would be most impractical.  And I don't think 
> > it's really needed.
> 
> They do make you deal with lock restarts; unwinding typical stateful
> kernel code is not necessarily super practical :)
> 
> Anyways, it sounds like the lockdep-class-per-driver approach will get
> you more information, that's certainly a reasonable approach for now.

Thanks for the review and suggestions.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 11 months ago

On Sun, Feb 12, 2023 at 08:23:34PM -0500, Alan Stern wrote:
> I really don't think that's a good idea here.  When you've got a bus 
> containing multiple devices, typically all those device structures are 
> created by the same line of code.  So knowing the source code location 
> won't tell you _which_ device structure is involved in the locking 
> cycle or what driver it's using.

Yeah, I was thinking about this more and realized it'd be insufficient.

> By contrast, knowing the device name 
> would.
> 
> Furthermore, to the extent that the device's name identifies what kind 
> of device it is, the name would tell you what where the structure was 
> created and which driver it is using.

OTOH, with the device name, it seems like you'll additionally need the
full device topology to be able to do anything with lockdep splats, no?

What if we just added a way to set a comparison function for a lockdep
class? I'm looking at the lockdep code now, and I think I could do that
for you.

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Alan Stern 2 years, 11 months ago

On Sun, Feb 12, 2023 at 09:21:14PM -0500, Kent Overstreet wrote:
> On Sun, Feb 12, 2023 at 08:23:34PM -0500, Alan Stern wrote:
> > I really don't think that's a good idea here.  When you've got a bus 
> > containing multiple devices, typically all those device structures are 
> > created by the same line of code.  So knowing the source code location 
> > won't tell you _which_ device structure is involved in the locking 
> > cycle or what driver it's using.
> 
> Yeah, I was thinking about this more and realized it'd be insufficient.
> 
> > By contrast, knowing the device name 
> > would.
> > 
> > Furthermore, to the extent that the device's name identifies what kind 
> > of device it is, the name would tell you what where the structure was 
> > created and which driver it is using.
> 
> OTOH, with the device name, it seems like you'll additionally need the
> full device topology to be able to do anything with lockdep splats, no?

Not necessarily.  Knowing the name already tells you something about 
where the device fits into the full tree.  And if necessary, you can 
probably glean the necessary information from the kernel log.

Besides, you often don't need the full device topology.  For instance, 
if the problem is that a driver is flushing a work queue while holding a 
lock needed by an item on the queue, mostly you just need to know what 
driver and where the flush occurs -- and that information is already 
provided by lockdep.

> What if we just added a way to set a comparison function for a lockdep
> class? I'm looking at the lockdep code now, and I think I could do that
> for you.

I don't know what a lockdep class comparison function does (or would 
do).  Nor how having one would help.

Alan Stern

Re: [PATCH RFC] drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Posted by Kent Overstreet 2 years, 12 months ago

On Sat, Feb 11, 2023 at 06:06:28PM -0500, Kent Overstreet wrote:
> Yeah, what bcache really needs (and presumably dev->mutex as well) is just
> to disable lockdep checking for self-deadlock of that lock type, since it's
> got its own deadlock avoidance and the subclass thing isn't good enough.
> 
> I've got a patch that should do what we want, replying from my other account
> with it.
-- >8 --
Subject: [PATCH] locking/lockdep: lockdep_set_no_check_recursion()

This adds a method to tell lockdep not to check lock ordering within a
lock class - but to still check lock ordering w.r.t. other lock types.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/lockdep.h       |  6 ++++++
 include/linux/lockdep_types.h |  2 +-
 kernel/locking/lockdep.c      | 26 ++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index e027c504b7..66f28553c6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -666,4 +666,10 @@ lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 }
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_set_no_check_recursion(struct lockdep_map *map);
+#else
+static inline void lockdep_set_no_check_recursion(struct lockdep_map *map) {}
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b..506e769b4a 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -128,7 +128,7 @@ struct lock_class {
 	u8				wait_type_inner;
 	u8				wait_type_outer;
 	u8				lock_type;
-	/* u8				hole; */
+	u8				no_check_recursion;
 
 #ifdef CONFIG_LOCK_STAT
 	unsigned long			contention_point[LOCKSTAT_POINTS];
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cae7d5f0ad..47ffb8df11 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3023,6 +3023,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
 		if ((next->read == 2) && prev->read)
 			continue;
 
+		if (hlock_class(next)->no_check_recursion)
+			continue;
+
 		/*
 		 * We're holding the nest_lock, which serializes this lock's
 		 * nesting behaviour.
@@ -3084,6 +3087,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return 2;
 	}
 
+	if (hlock_class(prev) == hlock_class(next) &&
+	    hlock_class(prev)->no_check_recursion)
+		return 2;
+
 	/*
 	 * Prove that the new <prev> -> <next> dependency would not
 	 * create a circular dependency in the graph. (We do this by
@@ -6617,3 +6624,22 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	dump_stack();
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_set_no_check_recursion(struct lockdep_map *lock)
+{
+	struct lock_class *class = lock->class_cache[0];
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	lockdep_recursion_inc();
+
+	if (!class)
+		class = register_lock_class(lock, 0, 0);
+	if (class)
+		class->no_check_recursion = true;
+	lockdep_recursion_finish();
+	raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lockdep_set_no_check_recursion);
+#endif
-- 
2.39.1