drivers/base/core.c | 8 +++++++- include/linux/device.h | 1 + include/linux/lockdep.h | 6 ++++++ kernel/locking/lockdep.c | 5 +++++ 4 files changed, 19 insertions(+), 1 deletion(-)
Lockdep is blind to the dev->mutex field of struct device, owing to
the fact that these mutexes are assigned to lockdep's "novalidate"
class. Commit 1704f47b50b5 ("lockdep: Add novalidate class for
dev->mutex conversion") did this because the hierarchical nature of
the device tree makes it impossible in practice to determine whether
acquiring one of these mutexes is safe or might lead to a deadlock.
Unfortunately, this means that lockdep is unable to help diagnose real
deadlocks involving these mutexes when they occur in testing [1] [2]
or in actual use, or to detect bad locking patterns that might lead to
a deadlock. We would like to obtain as much of lockdep's benefits as
possible without generating a flood of false positives -- which is
what happens if one naively removes these mutexes from the
"novalidate" class.
Accordingly, as a middle ground the mutex in each non-static struct
device will be placed in its own unique locking class. This approach
gives up some of lockdep's advantages (for example, all devices having
a particular bus_type or device_type might reasonably be put into the
same locking class), but it should at least allow us to gain the
benefit of some of lockdep's capabilities.
Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
Link: https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Ingo Molnar <mingo@redhat.com>
CC: Boqun Feng <boqun.feng@gmail.com>
---
I decided to take your suggestion about introducing a new
lockdep_static_obj() function, to reduce the size of the patch. It
can always be combined with the original static_obj() function later
on, if that's what the lockdep developers want.
If Hillf Danton contributed any of the code for this patch, I haven't
seen it in any messages sent to me or in the mailing list archives.
That's why I didn't include a Co-developed-by: tag for him.
drivers/base/core.c | 8 +++++++-
include/linux/device.h | 1 +
include/linux/lockdep.h | 6 ++++++
kernel/locking/lockdep.c | 5 +++++
4 files changed, 19 insertions(+), 1 deletion(-)
Index: usb-devel/drivers/base/core.c
===================================================================
--- usb-devel.orig/drivers/base/core.c
+++ usb-devel/drivers/base/core.c
@@ -2322,6 +2322,9 @@ static void device_release(struct kobjec
devres_release_all(dev);
kfree(dev->dma_range_map);
+ mutex_destroy(&dev->mutex);
+ if (!lockdep_static_obj(dev))
+ lockdep_unregister_key(&dev->mutex_key);
if (dev->release)
dev->release(dev);
@@ -2941,7 +2944,10 @@ void device_initialize(struct device *de
kobject_init(&dev->kobj, &device_ktype);
INIT_LIST_HEAD(&dev->dma_pools);
mutex_init(&dev->mutex);
- lockdep_set_novalidate_class(&dev->mutex);
+ if (!lockdep_static_obj(dev)) {
+ lockdep_register_key(&dev->mutex_key);
+ lockdep_set_class(&dev->mutex, &dev->mutex_key);
+ }
spin_lock_init(&dev->devres_lock);
INIT_LIST_HEAD(&dev->devres_head);
device_pm_init(dev);
Index: usb-devel/include/linux/device.h
===================================================================
--- usb-devel.orig/include/linux/device.h
+++ usb-devel/include/linux/device.h
@@ -570,6 +570,7 @@ struct device {
struct mutex mutex; /* mutex to synchronize calls to
* its driver.
*/
+ struct lock_class_key mutex_key; /* Unique key for each device */
struct dev_links_info links;
struct dev_pm_info power;
Index: usb-devel/include/linux/lockdep.h
===================================================================
--- usb-devel.orig/include/linux/lockdep.h
+++ usb-devel/include/linux/lockdep.h
@@ -172,6 +172,7 @@ do { \
current->lockdep_recursion -= LOCKDEP_OFF; \
} while (0)
+extern int lockdep_static_obj(const void *obj);
extern void lockdep_register_key(struct lock_class_key *key);
extern void lockdep_unregister_key(struct lock_class_key *key);
@@ -391,6 +392,11 @@ static inline void lockdep_set_selftest_
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
+static inline int lockdep_static_obj(const void *obj)
+{
+ return 0;
+}
+
static inline void lockdep_register_key(struct lock_class_key *key)
{
}
Index: usb-devel/kernel/locking/lockdep.c
===================================================================
--- usb-devel.orig/kernel/locking/lockdep.c
+++ usb-devel/kernel/locking/lockdep.c
@@ -857,6 +857,11 @@ static int static_obj(const void *obj)
*/
return is_module_address(addr) || is_module_percpu_address(addr);
}
+
+int lockdep_static_obj(const void *obj)
+{
+ return static_obj(obj);
+}
#endif
/*
On Sat, Feb 11, 2023 at 04:41:11PM -0500, Alan Stern wrote: > Lockdep is blind to the dev->mutex field of struct device, owing to > the fact that these mutexes are assigned to lockdep's "novalidate" > class. Commit 1704f47b50b5 ("lockdep: Add novalidate class for > dev->mutex conversion") did this because the hierarchical nature of > the device tree makes it impossible in practice to determine whether > acquiring one of these mutexes is safe or might lead to a deadlock. > > Unfortunately, this means that lockdep is unable to help diagnose real > deadlocks involving these mutexes when they occur in testing [1] [2] > or in actual use, or to detect bad locking patterns that might lead to > a deadlock. We would like to obtain as much of lockdep's benefits as > possible without generating a flood of false positives -- which is > what happens if one naively removes these mutexes from the > "novalidate" class. > > Accordingly, as a middle ground the mutex in each non-static struct > device will be placed in its own unique locking class. This approach > gives up some of lockdep's advantages (for example, all devices having > a particular bus_type or device_type might reasonably be put into the > same locking class), but it should at least allow us to gain the > benefit of some of lockdep's capabilities. > > Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1] > Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2] > Link: https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/ > Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Signed-off-by: Alan Stern <stern@rowland.harvard.edu> > CC: Peter Zijlstra <peterz@infradead.org> > CC: Ingo Molnar <mingo@redhat.com> > CC: Boqun Feng <boqun.feng@gmail.com> My main worry when adding a ton of classes like this is the combinatorics (dynamic classes make this more trivial, but it's entirely possible with just static classes too). As an example, we used to have a static class per cpu runqueue, now, given migration takes two runqueue locks (per locking rules in cpu order -- source and dest runqueue etc..) we got O(n^2) combinations of class orderings, which lead to a giant graph, which led to both performance and memory usage issues. From this was born the subclass, which reduced the whole thing to a static ordering of runqueue-1 goes inside runqueue-0. Similar combinatoric issues have cropped up in other places from time to time, typically solved by using a different annotation. Now, given the whole device thing is more or less a tree, hierarchical locking should limit that, the only worry is that sibling locking while holding parent thing. If siblings are of different classes, things will both complain and create combinatorics again.
On Mon, Feb 13, 2023 at 10:49:50AM +0100, Peter Zijlstra wrote: > My main worry when adding a ton of classes like this is the > combinatorics (dynamic classes make this more trivial, but it's entirely > possible with just static classes too). > > As an example, we used to have a static class per cpu runqueue, now, > given migration takes two runqueue locks (per locking rules in cpu > order -- source and dest runqueue etc..) we got O(n^2) combinations of > class orderings, which lead to a giant graph, which led to both > performance and memory usage issues. Having a new class for each device would add a lot of classes. Just how badly this would affect lockdep's performance is hard to predict. Testing seems like the best way to find out. > From this was born the subclass, which reduced the whole thing to a > static ordering of runqueue-1 goes inside runqueue-0. > > Similar combinatoric issues have cropped up in other places from time to > time, typically solved by using a different annotation. > > Now, given the whole device thing is more or less a tree, hierarchical > locking should limit that, the only worry is that sibling locking while > holding parent thing. If siblings are of different classes, things will > both complain and create combinatorics again. Actually, I expect the sibling ordering thing won't come up very often. If it does occur somewhere, there's an excellent chance it can be fixed by hand (always locking the children in the same order). I'm worried more about other things. Suppose do we manage somehow to tell lockdep about locks belonging to a logical tree structure. How can this be incorporated into the checking algorithm? Here's an example. Let A be a parent device and B its child, and let X be some other type of lock entirely (not a device lock). Suppose at some point in the distant past, a first thread locked B and then X -- both locks long since released. Now suppose a second thread locks A and then X (presumably valid, right?). What should happen if this thread tries to lock B? This ought to give rise to a violation: B->X and X->B. But would this not be checked, under the rule that holding A's lock makes it okay to take B's lock? For that matter, what if the second thread had locked X and then A. Should that be allowed? Even though it doesn't directly contradict B->X? Here's a more complicated example, taken from the USB subsystem. Each usb_device structure representing a hub has some number of children usb_device structures (for the USB devices plugged into the hub), as well as a bunch of child usb_port device structures (representing the hub's own downstream ports, which the other USB devices are plugged into). In theory the child usb_device should really be a direct child of the usb_port it's plugged into, but for historical reasons the two are siblings instead. So now we have a rule that you cannot lock a usb_device if you're holding the lock of the usb_port that it's plugged into. (Yes, this is logically backwards.) How could we get lockdep to check this? The only approach I can think of is something I suggested earlier in this discussion: Create lockdep classes for bus_types or device_types rather than for individual devices. (usb_device and usb_port have different device_types.) But I don't know how to combine this with the tree-structured approach. Alan Stern
On Mon, Feb 13, 2023 at 11:18:50AM -0500, Alan Stern wrote: > On Mon, Feb 13, 2023 at 10:49:50AM +0100, Peter Zijlstra wrote: > > My main worry when adding a ton of classes like this is the > > combinatorics (dynamic classes make this more trivial, but it's entirely > > possible with just static classes too). > > > > As an example, we used to have a static class per cpu runqueue, now, > > given migration takes two runqueue locks (per locking rules in cpu > > order -- source and dest runqueue etc..) we got O(n^2) combinations of > > class orderings, which lead to a giant graph, which led to both > > performance and memory usage issues. > > Having a new class for each device would add a lot of classes. Just how > badly this would affect lockdep's performance is hard to predict. > Testing seems like the best way to find out. We support systems with 50000+ devices today, so one class per device might be messy. But back to the original issue here, why any of this? What's wrong with what we have now? I haven't seen real locking issues reported yet (only odd syzbot reports that didn't make any sense.) Is this effort even worth it? thanks, greg k-h
On Mon, Feb 13, 2023 at 06:51:57PM +0100, Greg Kroah-Hartman wrote: > But back to the original issue here, why any of this? What's wrong with > what we have now? I haven't seen real locking issues reported yet (only > odd syzbot reports that didn't make any sense.) Is this effort even > worth it? A large part of the reason those syzbot reports didn't make any sense was because they didn't include any lockdep information. Making lockdep aware of device locking would make those reports a lot easier to understand and would help with fixing the bugs. And it might even help with catching similar problems before they get merged into the kernel. Will it be worthwhile in the end? I have no idea. Alan Stern
On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote: > > @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de > kobject_init(&dev->kobj, &device_ktype); > INIT_LIST_HEAD(&dev->dma_pools); > mutex_init(&dev->mutex); > - lockdep_set_novalidate_class(&dev->mutex); > + if (!lockdep_static_obj(dev)) { > + lockdep_register_key(&dev->mutex_key); > + lockdep_set_class(&dev->mutex, &dev->mutex_key); > + } > spin_lock_init(&dev->devres_lock); > INIT_LIST_HEAD(&dev->devres_head); > device_pm_init(dev); So I think this is the right thing to do, but I note that while that lockdep_set_novalidate_class() was "documented" to only be for 'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also used by md/bcache/btree.c for the mca_bucket_alloc(). Can we *please* get rid of it there too (it was added by the initial code, and never had any explicit excuse for it), possibly by using the same model. And then we could get rid of lockdep_set_novalidate_class() entirely. That would be a good thing. Coly/Kent? See https://lore.kernel.org/lkml/Y+gLd78vChQERZ6A@rowland.harvard.edu/ for more context, and https://lore.kernel.org/all/28a82f50-39d5-a45f-7c7a-57a66cec0741@I-love.SAKURA.ne.jp/ for some history. Linus
On 2/11/23 16:51, Linus Torvalds wrote: > On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote: >> >> @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de >> kobject_init(&dev->kobj, &device_ktype); >> INIT_LIST_HEAD(&dev->dma_pools); >> mutex_init(&dev->mutex); >> - lockdep_set_novalidate_class(&dev->mutex); >> + if (!lockdep_static_obj(dev)) { >> + lockdep_register_key(&dev->mutex_key); >> + lockdep_set_class(&dev->mutex, &dev->mutex_key); >> + } >> spin_lock_init(&dev->devres_lock); >> INIT_LIST_HEAD(&dev->devres_head); >> device_pm_init(dev); > > So I think this is the right thing to do, but I note that while that > lockdep_set_novalidate_class() was "documented" to only be for > 'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also > used by md/bcache/btree.c for the mca_bucket_alloc(). > > Can we *please* get rid of it there too (it was added by the initial > code, and never had any explicit excuse for it), possibly by using the > same model. > > And then we could get rid of lockdep_set_novalidate_class() entirely. > That would be a good thing. Yeah, what bcache really needs (and presumably dev->mutex as well) is just to disable lockdep checking for self-deadlock of that lock type, since it's got its own deadlock avoidance and the subclass thing isn't good enough. I've got a patch that should do what we want, replying from my other account with it.
On Sat, Feb 11, 2023 at 06:06:28PM -0500, Kent Overstreet wrote: > On 2/11/23 16:51, Linus Torvalds wrote: > > On Sat, Feb 11, 2023 at 1:41 PM Alan Stern <stern@rowland.harvard.edu> wrote: > > > > > > @@ -2941,7 +2944,10 @@ void device_initialize(struct device *de > > > kobject_init(&dev->kobj, &device_ktype); > > > INIT_LIST_HEAD(&dev->dma_pools); > > > mutex_init(&dev->mutex); > > > - lockdep_set_novalidate_class(&dev->mutex); > > > + if (!lockdep_static_obj(dev)) { > > > + lockdep_register_key(&dev->mutex_key); > > > + lockdep_set_class(&dev->mutex, &dev->mutex_key); > > > + } > > > spin_lock_init(&dev->devres_lock); > > > INIT_LIST_HEAD(&dev->devres_head); > > > device_pm_init(dev); > > > > So I think this is the right thing to do, but I note that while that > > lockdep_set_novalidate_class() was "documented" to only be for > > 'dev->mutex' by scripts/checkpatch.pl, that horrific thing is also > > used by md/bcache/btree.c for the mca_bucket_alloc(). > > > > Can we *please* get rid of it there too (it was added by the initial > > code, and never had any explicit excuse for it), possibly by using the > > same model. > > > > And then we could get rid of lockdep_set_novalidate_class() entirely. > > That would be a good thing. > > Yeah, what bcache really needs (and presumably dev->mutex as well) is just > to disable lockdep checking for self-deadlock of that lock type, since it's > got its own deadlock avoidance and the subclass thing isn't good enough. > > I've got a patch that should do what we want, replying from my other account > with it. After scanning the rest of the thread: I don't think you want to create separate lockdep classes for each bus and device type, that's defeating how lockdep works. Maybe if it was only a small, _static_ number of new classes, but the basic premesis of lockdep is that there are static human understandable lock ordering rules, so lockdep figures out what they are and checks them: if you create a bunch of dynamic classes, the classes are going to be different for everyone in practice and won't have any real bearing on the structure of the code - that is, given a lockdep splat, you won't be able to do anything with it. If static lock ordering rules aren't working (say, because the lock ordering rules are determined by hardware relationships or what userspace is doing), then you have to do something more sophisticated. Wait/wound mutexes would be the next thing to look at, and DRM ended up needing them for similar reasons as what you're running up against so I think they bear serious consideration. ww mutexes are the simple version of dynamic deadlock avoidance - instead of doing full cycle detection they just compare transaction start times, so they come at the cost of more frequent aborts. If this is an issue for you, here's what full cycle detection looks like: https://evilpiepirate.org/git/bcachefs.git/tree/fs/bcachefs/btree_locking.c#n53
On Sat, Feb 11, 2023 at 06:24:42PM -0500, Kent Overstreet wrote: > After scanning the rest of the thread: I don't think you want to create > separate lockdep classes for each bus and device type, that's defeating > how lockdep works. Not at all. In fact, exactly the opposite: lockdep works by creating a class for each lock-inside-a-data-structure-type combination. A struct device-bus_type/device_type combination is pretty much the same kind of thing. > Maybe if it was only a small, _static_ number of new > classes, The collection of bus_types and device_types _is_ static, in the sense that each one is a structure defined in a driver source file. Whether the number is "small" depends on your tolerance for large numbers; the kernel has a lot of source files. :-) Mind you, I'm not saying that having lockdep classes for each bus_type or device_type is always the right thing to do. There definitely are cases where it wouldn't do what we want. But perhaps in some cases it would work. > but the basic premesis of lockdep is that there are static > human understandable lock ordering rules, so lockdep figures out what > they are and checks them: if you create a bunch of dynamic classes, the > classes are going to be different for everyone in practice and won't > have any real bearing on the structure of the code As a rule, bus_type's and device_type's aren't dynamic. Maybe Greg KH once published an example of such a thing; IIRC it was more like a proof-of-principle rather than a serious recommendation on how to write drivers. (Or else I'm misremembering and it was actually an example of creating dynamic sysfs attributes.) Or maybe you're referring to what this patch does? It does indeed create a bunch of dynamic classes -- one for each struct device. The ordering rules derived by lockdep will be somewhat arbitrary, as you say. But some of them certainly will be related to the structure of the source code. For instance, there's a rule that you must not acquire a device's lock if you're already holding the lock of one of its descendants, and this is related to how device discovery works (the driver for a device is responsible for discovering the device's children). But that rule alone isn't enough to prevent deadlocks. > that is, given a > lockdep splat, you won't be able to do anything with it. Nonsense. Even if you don't know what the locking rules are, given a splat you can see what the cycle is and try to figure out which of the links should be invalid. Without the splat you'd be a lot worse off. > If static lock ordering rules aren't working (say, because the lock > ordering rules are determined by hardware relationships or what > userspace is doing), then you have to do something more sophisticated. > > Wait/wound mutexes would be the next thing to look at, and DRM ended up > needing them for similar reasons as what you're running up against so I > think they bear serious consideration. > > ww mutexes are the simple version of dynamic deadlock avoidance - > instead of doing full cycle detection they just compare transaction > start times, so they come at the cost of more frequent aborts. If this > is an issue for you, here's what full cycle detection looks like: > > https://evilpiepirate.org/git/bcachefs.git/tree/fs/bcachefs/btree_locking.c#n53 I'm not at all sure that w/w mutexes are the answer to device locking. Not to mention that converting over to use them would require a huge effort. A typical kind of issue that seems to pop up a lot is a task trying to flush a work queue while holding a lock that's needed by one of the items on the queue. (This isn't particularly limited to dev->mutex locks, of course. It can crop up anywhere, but it seems to happen with some regularity in this setting. Perhaps the fact that lockdep is unable to warn about these things is a contributing factor. Can w/w mutexes can handle this sort of thing? I'm not sufficiently familiar with them to know.) Apparently people write this sort of code because they aren't aware of or don't pay attention to the context in which their functions run -- that is, the locks that have automatically been acquired by the callers. Alan Stern
On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote: > On Sat, Feb 11, 2023 at 06:24:42PM -0500, Kent Overstreet wrote: > > After scanning the rest of the thread: I don't think you want to create > > separate lockdep classes for each bus and device type, that's defeating > > how lockdep works. > > Not at all. In fact, exactly the opposite: lockdep works by creating a > class for each lock-inside-a-data-structure-type combination. A struct > device-bus_type/device_type combination is pretty much the same kind of > thing. > > > Maybe if it was only a small, _static_ number of new > > classes, > > The collection of bus_types and device_types _is_ static, in the sense > that each one is a structure defined in a driver source file. Whether > the number is "small" depends on your tolerance for large numbers; the > kernel has a lot of source files. :-) > > Mind you, I'm not saying that having lockdep classes for each bus_type > or device_type is always the right thing to do. There definitely are > cases where it wouldn't do what we want. But perhaps in some cases it > would work. > > > but the basic premesis of lockdep is that there are static > > human understandable lock ordering rules, so lockdep figures out what > > they are and checks them: if you create a bunch of dynamic classes, the > > classes are going to be different for everyone in practice and won't > > have any real bearing on the structure of the code > > As a rule, bus_type's and device_type's aren't dynamic. Maybe Greg KH > once published an example of such a thing; IIRC it was more like a > proof-of-principle rather than a serious recommendation on how to write > drivers. (Or else I'm misremembering and it was actually an example of > creating dynamic sysfs attributes.) > > Or maybe you're referring to what this patch does? It does indeed > create a bunch of dynamic classes -- one for each struct device. The > ordering rules derived by lockdep will be somewhat arbitrary, as you > say. But some of them certainly will be related to the structure of the > source code. I could be :) I haven't been able to find the patch in question - have a link? If you're talking about making lock_class_key dynamic, I think I stand by what I said though - OTOH, if all you're doing is lifting that to the caller of the device object init function, so it'll still be a static object in the driver, that would be totally fine. I probably should've found the patch before commenting :)
On Sat, Feb 11, 2023 at 09:46:42PM -0500, Kent Overstreet wrote: > On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote: > > Or maybe you're referring to what this patch does? It does indeed > > create a bunch of dynamic classes -- one for each struct device. The > > ordering rules derived by lockdep will be somewhat arbitrary, as you > > say. But some of them certainly will be related to the structure of the > > source code. > > I could be :) I haven't been able to find the patch in question - have a > link? It was earlier in this email thread. Here's a link: https://lore.kernel.org/r/Y+gLd78vChQERZ6A@rowland.harvard.edu/ > If you're talking about making lock_class_key dynamic, I think I stand > by what I said though - OTOH, if all you're doing is lifting that to the > caller of the device object init function, so it'll still be a static > object in the driver, that would be totally fine. The patch does the first, not the second. Feel free to object some more... :-) Alan Stern > I probably should've found the patch before commenting :)
On Sat, Feb 11, 2023 at 10:03:11PM -0500, Alan Stern wrote: > On Sat, Feb 11, 2023 at 09:46:42PM -0500, Kent Overstreet wrote: > > On Sat, Feb 11, 2023 at 09:40:58PM -0500, Alan Stern wrote: > > > Or maybe you're referring to what this patch does? It does indeed > > > create a bunch of dynamic classes -- one for each struct device. The > > > ordering rules derived by lockdep will be somewhat arbitrary, as you > > > say. But some of them certainly will be related to the structure of the > > > source code. > > > > I could be :) I haven't been able to find the patch in question - have a > > link? > > It was earlier in this email thread. Here's a link: > > https://lore.kernel.org/r/Y+gLd78vChQERZ6A@rowland.harvard.edu/ > > > If you're talking about making lock_class_key dynamic, I think I stand > > by what I said though - OTOH, if all you're doing is lifting that to the > > caller of the device object init function, so it'll still be a static > > object in the driver, that would be totally fine. > > The patch does the first, not the second. Feel free to object some > more... :-) So IMO the more correct way to do this would be to change device_initialize() to __device_initialize(), have it take a lock_class_key as a parameter, and then use __mutex_init() instead of mutex_init(). But let's think about this more. Will there ever be situations where lock ordering is dependent on what hardware is plugged into what, or what hardware is plugged in first?
On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote: > So IMO the more correct way to do this would be to change > device_initialize() to __device_initialize(), have it take a > lock_class_key as a parameter, and then use __mutex_init() instead of > mutex_init(). Yes, maybe. The increase in code size might outweigh the space saved. > But let's think about this more. Will there ever be situations where > lock ordering is dependent on what hardware is plugged into what, or > what hardware is plugged in first? Device lock ordering is always dependent on what hardware is plugged into what. However, I'm not aware of any situations where, given two different kinds of hardware, either one could be plugged into the other. Such things may exist but I can't think of any examples. On the other hand, there are obvious cases where two pieces of the _same_ kind of hardware can be plugged together in either order. USB hubs are a good example. Consider the possibility that a driver might want to lock all of a device's children at once. (I don't know if this ever happens, but it might.) Provided it acquires the parent device's lock first, this is utterly safe no matter what order the children are locked in. Try telling that to lockdep! In the end, we'd probably have to enforce a single fixed ordering. Alan Stern
On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > Provided it acquires the parent device's lock first, this is > utterly safe no matter what order the children are locked in. Try > telling that to lockdep! mutex_lock_next_lock(child->lock, parent->lock) is there to express this exact pattern, it allows taking multiple child->lock class locks (in any order) provided parent->lock is held.
On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > Provided it acquires the parent device's lock first, this is > > utterly safe no matter what order the children are locked in. Try > > telling that to lockdep! > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > exact pattern, it allows taking multiple child->lock class locks (in any > order) provided parent->lock is held. Perhaps I'm stupid, but I've never understood how subclasses - or this - are supposed to work. Locks don't get a fixed subclass, so what's to prevent some code from going /* thread 1: */ mutex_lock(&a->lock); mutex_lock_nested(&b->lock, 1); /* thread 2: */ mutex_lock(&b->lock); mutex_lock_nested(&a->lock, 1); I don't see how they can be used to check that we're obeying a lock ordering?
On Mon, Feb 13, 2023 at 01:46:11PM -0500, Kent Overstreet wrote: > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > Provided it acquires the parent device's lock first, this is > > > utterly safe no matter what order the children are locked in. Try > > > telling that to lockdep! > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > exact pattern, it allows taking multiple child->lock class locks (in any > > order) provided parent->lock is held. > > Perhaps I'm stupid, but I've never understood how subclasses - or this - > are supposed to work. > > Locks don't get a fixed subclass, so what's to prevent some code from > going So there's two annotations here, the nest_lock thing and subclasses, they're distinct things. Every class gets a fixed 8 subclasses (0-7) given by the unique byte addresses inside the actual key object. Subclasses will let you create nesting order of the same class that are acceptable. Typically lock/1 nests inside lock/0, but that's not hard-coded, simply convention. The way it is used is given an external lock order, say the CPU number for the runqueue locks, you do things like: void double_rq_lock(struct rq *rq1, struct rq *r2) { lockdep_assert_irqs_disabled(); if (rq_order_less(r2, rq1)) swap(rq1, rq2); raw_spin_rq_lock(rq1); if (__rq_lockp(rq1) != __rq_lock(rq2)) raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); ... } (which is more complicated than it needs to be due to the whole core-scheduling mess, but should still be readable I suppose). Basically we make sure rq1 and rq2 are in the correct order and acquire them with subclass 0 (the default) and subcless 1 (SINGLE_DEPTH_NESTING) resp. dictating the subclass order. This is lock order per decree, if you get the order function wrong lockdep will not see the inversion but you *will* deadlock. Then there's that nesting lock, that requires two classes and at least 3 locks to make sense: P, C1, C2 Where we posit that any multi-lock of Cn is fully serialized by P and it is used like: mutex_lock(P); mutex_lock_nest_lock(C1, P); mutex_lock_nest_lock(C2, P); Where any order of Cn is acceptable, because fully ordered by P. If you were to combine this with subclass on Cn to allow multi-lock instances not order by P, you get to keep the pieces :-)
On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote: > This is lock order per decree, if you get the order function wrong > lockdep will not see the inversion but you *will* deadlock. Yeah, that's what I mean. Given that a subclass isn't a fixed thing you assign to a lock, just something you magic up as needed - I just don't see what this gets us? Why not just tell lockdep what the order function is directly? (I know, I've been saying I'd write a patch for this, I'll get around to it, I swear :)
On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote: > Every class gets a fixed 8 subclasses (0-7) given by the unique byte > addresses inside the actual key object. > > Subclasses will let you create nesting order of the same class that are > acceptable. Typically lock/1 nests inside lock/0, but that's not > hard-coded, simply convention. Can you explain in more detail how this works in the lockdep checking algorithm? (For simplicity, let's leave out issues of interrupt status and other environmental things.) I've been assuming that lockdep builds up a set of links between the classes -- namely, a link is created from A to B whenever a thread holds a lock of class A while acquiring a lock of class B. The checking part would then amount to just making sure that these links don't form any cycles. So then how do subclasses fit into the picture? Is it just that now the links are between subclasses rather than classes, so it's not automatically wrong to hold a lock while acquiring another lock of the same class as long as the two acquisitions are in different subclasses? But you can still provoke a violation if there's a cycle among the subclasses? > Then there's that nesting lock, that requires two classes and at least 3 > locks to make sense: > > P, C1, C2 > > Where we posit that any multi-lock of Cn is fully serialized by P Assuming the speculations above are correct, how does the algorithm take lockdep nesting into account? Does it simply avoid creating a link from subclass C to itself if both C1 and C2 were acquired while holding a lock of the parent subclass and both acquisitions were annotated with mutex_lock_next_lock()? Or is there more to it than that? (And should I have written class rather than subclass?) And Kent, how does your proposed lockdep_set_no_check_recursion() work? Does it just prevent lockdep from creating a link between any two subclasses of the specified class? Does it do anything more? Alan
On Tue, Feb 14, 2023 at 03:05:42PM -0500, Alan Stern wrote: > On Tue, Feb 14, 2023 at 12:05:27PM +0100, Peter Zijlstra wrote: > > Every class gets a fixed 8 subclasses (0-7) given by the unique byte > > addresses inside the actual key object. > > > > Subclasses will let you create nesting order of the same class that are > > acceptable. Typically lock/1 nests inside lock/0, but that's not > > hard-coded, simply convention. > > Can you explain in more detail how this works in the lockdep checking > algorithm? (For simplicity, let's leave out issues of interrupt status > and other environmental things.) > > I've been assuming that lockdep builds up a set of links between the > classes -- namely, a link is created from A to B whenever a thread holds > a lock of class A while acquiring a lock of class B. The checking part > would then amount to just making sure that these links don't form any > cycles. > > So then how do subclasses fit into the picture? Is it just that now the > links are between subclasses rather than classes, so it's not > automatically wrong to hold a lock while acquiring another lock of the > same class as long as the two acquisitions are in different subclasses? > But you can still provoke a violation if there's a cycle among the > subclasses? For all intents and purposes the subclasses are fully distinct classes from the validation pov. mutex_lock(L); mutex_lock_nested(L, 0); are equivalent (ignoring lockdep_set_subclass()), and mutex_lock_nested(L, 1); is a distinct class, validation wise. So if you write: mutex_lock(L1); mutex_lock_nested(L2, 1); you explicitly create a lock order between the distinct validation classes: L/0, L/1 > > Then there's that nesting lock, that requires two classes and at least 3 > > locks to make sense: > > > > P, C1, C2 > > > > Where we posit that any multi-lock of Cn is fully serialized by P > > Assuming the speculations above are correct, how does the algorithm take > lockdep nesting into account? Does it simply avoid creating a link from > subclass C to itself if both C1 and C2 were acquired while holding a > lock of the parent subclass and both acquisitions were annotated with > mutex_lock_next_lock()? Basically this; it will explicitly ignore the nesting. Given: mutex_lock(P); mutex_lock_nest_lock(C1, P); mutex_lock_nest_lock(C2, P); mutex_lock_nest_lock() basically does: - validate that the instance of P is actually held. (as such, mutex_lock_nest_lock(C1, P1); mutex_lock_nest_lock(C2, P2); will cause objections). - either: * establish P->C in the held-lock stack and update the graph if so required * find the existing C in the held-lock stack and instead of complaining about class recursion, increment a refcount, and leave the held-stack and thus the graph unmodified. subsequent mutex_unlock() will decrement the refcount and only when 0 'pop' the actual entry from the held stack.
On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > Provided it acquires the parent device's lock first, this is > > utterly safe no matter what order the children are locked in. Try > > telling that to lockdep! > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > exact pattern, it allows taking multiple child->lock class locks (in any > order) provided parent->lock is held. Ah, this is news to me. Is this sort of thing documented somewhere? Alan Stern
On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote: > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > Provided it acquires the parent device's lock first, this is > > > utterly safe no matter what order the children are locked in. Try > > > telling that to lockdep! > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > exact pattern, it allows taking multiple child->lock class locks (in any > > order) provided parent->lock is held. > > Ah, this is news to me. Is this sort of thing documented somewhere? Probably not :/
On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote: > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote: > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > > Provided it acquires the parent device's lock first, this is > > > > utterly safe no matter what order the children are locked in. Try > > > > telling that to lockdep! > > > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > > exact pattern, it allows taking multiple child->lock class locks (in any > > > order) provided parent->lock is held. > > > > Ah, this is news to me. Is this sort of thing documented somewhere? Basically if you have two lock instances A and B with the same class, and you know that locking ordering is always A -> B, then you can do mutex_lock(A); mutex_lock_nest_lock(B, A); // lock B. to tell the lockdep this is not deadlock, plus lockdep will treat the acquisition of A and the precondition of acquisition B, so the following is not a deadlock as well: T1: mutex_lock(A); mutex_lock(C); mutex_lock_nest_lock(B, A); T2: mutex_lock(A); mutex_lock_nest_lock(B, A); mutex_lock(C); Regards, Boqun > > Probably not :/
On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote: > On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote: > > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote: > > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > > > Provided it acquires the parent device's lock first, this is > > > > > utterly safe no matter what order the children are locked in. Try > > > > > telling that to lockdep! > > > > > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > > > exact pattern, it allows taking multiple child->lock class locks (in any > > > > order) provided parent->lock is held. > > > > > > Ah, this is news to me. Is this sort of thing documented somewhere? > > Basically if you have two lock instances A and B with the same class, > and you know that locking ordering is always A -> B, then you can do > > mutex_lock(A); > mutex_lock_nest_lock(B, A); // lock B. > No, this isn't quite right, You need at least 3 locks and 2 classes. P, C1, C2 Then: mutex_lock(P) mutex_lock_next_lock(C1, P) mutex_lock_next_lock(C2, P) And it will accept any order of Cn -- since it assumes that any multi-lock of Cn will always hold P, therefore the ordering fully given by P.
On Tue, Feb 14, 2023 at 11:48:07AM +0100, Peter Zijlstra wrote: > On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote: > > On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote: > > > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote: > > > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > > > > Provided it acquires the parent device's lock first, this is > > > > > > utterly safe no matter what order the children are locked in. Try > > > > > > telling that to lockdep! > > > > > > > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > > > > exact pattern, it allows taking multiple child->lock class locks (in any > > > > > order) provided parent->lock is held. > > > > > > > > Ah, this is news to me. Is this sort of thing documented somewhere? > > > > Basically if you have two lock instances A and B with the same class, > > and you know that locking ordering is always A -> B, then you can do > > > > mutex_lock(A); > > mutex_lock_nest_lock(B, A); // lock B. > > > > No, this isn't quite right, You need at least 3 locks and 2 classes. > > P, C1, C2 > > Then: > > mutex_lock(P) > mutex_lock_next_lock(C1, P) > mutex_lock_next_lock(C2, P) > > And it will accept any order of Cn -- since it assumes that any > multi-lock of Cn will always hold P, therefore the ordering fully given > by P. Ah, right, I was missing the fact that it works with 2 classes... But I think with only one class, the nest_lock() still works, right? In other words, if P and Cn are the same lock class in your example. Also seems I gave a wrong answer to Alan, just to clarify, the following is not a deadlock to lockdep: T1: mutex_lock(P) mutex_lock_next_lock(C1, P) mutex_lock_next_lock(C2, P) mutex_lock(B) T2: mutex_lock(P) mutex_lock(B) mutex_lock_next_lock(C1, P) mutex_lock_next_lock(C2, P) Because of any pair of mutex_lock(L); ... // other locks maybe mutex_lock_nest_lock(M, L); lockdep will not add M into the dependency graph, since it's nested and should be serialized by L. Regards, Boqun
On Tue, Feb 14, 2023 at 08:22:28AM -0800, Boqun Feng wrote: > Ah, right, I was missing the fact that it works with 2 classes... > > But I think with only one class, the nest_lock() still works, right? > In other words, if P and Cn are the same lock class in your example. I don't think so, but I don't think I've carefully considered that case. > Also seems I gave a wrong answer to Alan, just to clarify, the following > is not a deadlock to lockdep: > > T1: > mutex_lock(P) > mutex_lock_next_lock(C1, P) > mutex_lock_next_lock(C2, P) > mutex_lock(B) > > T2: > mutex_lock(P) > mutex_lock(B) > mutex_lock_next_lock(C1, P) > mutex_lock_next_lock(C2, P) > This should in fact complain about a CB-BC deadlock, (but I've not tested it, just going on memories of how I implemented it). > Because of any pair of > > mutex_lock(L); > ... // other locks maybe > mutex_lock_nest_lock(M, L); > > lockdep will not add M into the dependency graph, since it's nested and > should be serialized by L. We do enter M into the dependency graph, but instead ignore M-M recursion. Specifically so that we might catch the above deadlock vs B.
On Wed, Feb 15, 2023 at 11:45:03AM +0100, Peter Zijlstra wrote: > On Tue, Feb 14, 2023 at 08:22:28AM -0800, Boqun Feng wrote: > > > Ah, right, I was missing the fact that it works with 2 classes... > > > > But I think with only one class, the nest_lock() still works, right? > > In other words, if P and Cn are the same lock class in your example. After playing with some self test cases, I found I was wrong again ;-( > > I don't think so, but I don't think I've carefully considered that case. > You are right, the same class case will trigger a DEBUG_LOCKS_WARN_ON() in the match_held_lock() when releasing the locks. > > Also seems I gave a wrong answer to Alan, just to clarify, the following > > is not a deadlock to lockdep: > > > > T1: > > mutex_lock(P) > > mutex_lock_next_lock(C1, P) > > mutex_lock_next_lock(C2, P) > > mutex_lock(B) > > > > T2: > > mutex_lock(P) > > mutex_lock(B) > > mutex_lock_next_lock(C1, P) > > mutex_lock_next_lock(C2, P) > > > > This should in fact complain about a CB-BC deadlock, (but I've not > tested it, just going on memories of how I implemented it). > Yes, confirmed by a selftest. > > Because of any pair of > > > > mutex_lock(L); > > ... // other locks maybe > > mutex_lock_nest_lock(M, L); > > > > lockdep will not add M into the dependency graph, since it's nested and > > should be serialized by L. > > We do enter M into the dependency graph, but instead ignore M-M > recursion. Specifically so that we might catch the above deadlock vs B. Right, I mis-read the code, which suggests I should improve it to help the future me ;-) FWIW, the selftests I used are as follow: Regards, Boqun ------------------------------->8 diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 8d24279fad05..6aadebad68c1 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -60,6 +60,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); #define LOCKTYPE_RTMUTEX 0x20 #define LOCKTYPE_LL 0x40 #define LOCKTYPE_SPECIAL 0x80 +#define LOCKTYPE_NEST 0x100 static struct ww_acquire_ctx t, t2; static struct ww_mutex o, o2, o3; @@ -2091,14 +2092,14 @@ static void ww_test_edeadlk_acquire_wrong_slow(void) ww_mutex_lock_slow(&o3, &t); } -static void ww_test_spin_nest_unlocked(void) +static void nest_test_spin_nest_unlocked(void) { spin_lock_nest_lock(&lock_A, &o.base); U(A); } /* This is not a deadlock, because we have X1 to serialize Y1 and Y2 */ -static void ww_test_spin_nest_lock(void) +static void nest_test_spin_nest_lock(void) { spin_lock(&lock_X1); spin_lock_nest_lock(&lock_Y1, &lock_X1); @@ -2110,6 +2111,33 @@ static void ww_test_spin_nest_lock(void) spin_unlock(&lock_X1); } +static void nest_test_spin_nest_lock_deadlock(void) +{ + nest_test_spin_nest_lock(); + + /* + * Although above is not a deadlokc, but with the following code, Y1 and + * A create a ABBA deadlock. + */ + spin_lock(&lock_X1); + spin_lock(&lock_A); + spin_lock_nest_lock(&lock_Y1, &lock_X1); + spin_lock_nest_lock(&lock_Y2, &lock_X1); + spin_unlock(&lock_A); + spin_unlock(&lock_Y2); + spin_unlock(&lock_Y1); + spin_unlock(&lock_X1); +} + +/* Not the supported usage */ +static void nest_test_spin_nest_lock_same_class(void) +{ + spin_lock(&lock_X1); + spin_lock_nest_lock(&lock_X2, &lock_X1); + spin_unlock(&lock_X2); + spin_unlock(&lock_X1); +} + static void ww_test_unneeded_slow(void) { WWAI(&t); @@ -2323,14 +2351,6 @@ static void ww_tests(void) dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW); pr_cont("\n"); - print_testname("spinlock nest unlocked"); - dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW); - pr_cont("\n"); - - print_testname("spinlock nest test"); - dotest(ww_test_spin_nest_lock, SUCCESS, LOCKTYPE_WW); - pr_cont("\n"); - printk(" -----------------------------------------------------\n"); printk(" |block | try |context|\n"); printk(" -----------------------------------------------------\n"); @@ -2360,6 +2380,27 @@ static void ww_tests(void) pr_cont("\n"); } +static void nest_tests(void) +{ + printk(" --------------------------------------------------------------------------\n"); + printk(" | nest lock tests |\n"); + printk(" -------------------\n"); + print_testname("spinlock nest unlocked"); + dotest(nest_test_spin_nest_unlocked, FAILURE, LOCKTYPE_NEST); + pr_cont("\n"); + + print_testname("spinlock nest test"); + dotest(nest_test_spin_nest_lock, SUCCESS, LOCKTYPE_NEST); + pr_cont("\n"); + print_testname("spinlock nest test dead lock"); + dotest(nest_test_spin_nest_lock_deadlock, FAILURE, LOCKTYPE_NEST); + pr_cont("\n"); + print_testname("spinlock nest test dead lock"); + dotest(nest_test_spin_nest_lock_same_class, FAILURE, LOCKTYPE_NEST); + pr_cont("\n"); + +} + /* * <in hardirq handler> @@ -2966,6 +3007,8 @@ void locking_selftest(void) ww_tests(); + nest_tests(); + force_read_lock_recursive = 0; /* * queued_read_lock() specific test cases can be put here
On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote: > Basically if you have two lock instances A and B with the same class, > and you know that locking ordering is always A -> B, then you can do > > mutex_lock(A); > mutex_lock_nest_lock(B, A); // lock B. > > to tell the lockdep this is not deadlock, plus lockdep will treat the > acquisition of A and the precondition of acquisition B, so the following > is not a deadlock as well: > > T1: > mutex_lock(A); > mutex_lock(C); > mutex_lock_nest_lock(B, A); > > T2: > mutex_lock(A); > mutex_lock_nest_lock(B, A); > mutex_lock(C); Why isn't this treated as a deadlock? It sure looks like a deadlock to me. Is this an example where lockdep just doesn't get the right answer? Alan Stern
On Mon, Feb 13, 2023 at 09:03:14PM -0500, Alan Stern wrote: > On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote: > > Basically if you have two lock instances A and B with the same class, > > and you know that locking ordering is always A -> B, then you can do > > > > mutex_lock(A); > > mutex_lock_nest_lock(B, A); // lock B. > > > > to tell the lockdep this is not deadlock, plus lockdep will treat the > > acquisition of A and the precondition of acquisition B, so the following > > is not a deadlock as well: > > > > T1: > > mutex_lock(A); > > mutex_lock(C); > > mutex_lock_nest_lock(B, A); > > > > T2: > > mutex_lock(A); > > mutex_lock_nest_lock(B, A); > > mutex_lock(C); > > Why isn't this treated as a deadlock? It sure looks like a deadlock to > me. Is this an example where lockdep just doesn't get the right answer? > Because A serializes B and C, so that particular piece of code doesn't cause deadlock. Note that you can still use you normal mutex_lock() for B, so if there is more code: T3: mutex_lock(C); mutex_lock(B); lockdep will report deadlock. Regards, Boqun > Alan Stern
On Mon, Feb 13, 2023 at 05:51:11PM -0800, Boqun Feng wrote: > On Mon, Feb 13, 2023 at 05:29:49PM +0100, Peter Zijlstra wrote: > > On Mon, Feb 13, 2023 at 10:25:59AM -0500, Alan Stern wrote: > > > On Mon, Feb 13, 2023 at 10:24:13AM +0100, Peter Zijlstra wrote: > > > > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > > > > Provided it acquires the parent device's lock first, this is > > > > > utterly safe no matter what order the children are locked in. Try > > > > > telling that to lockdep! > > > > > > > > mutex_lock_next_lock(child->lock, parent->lock) is there to express this > > > > exact pattern, it allows taking multiple child->lock class locks (in any > > > > order) provided parent->lock is held. > > > > > > Ah, this is news to me. Is this sort of thing documented somewhere? > > Basically if you have two lock instances A and B with the same class, > and you know that locking ordering is always A -> B, then you can do > > mutex_lock(A); > mutex_lock_nest_lock(B, A); // lock B. > > to tell the lockdep this is not deadlock, plus lockdep will treat the > acquisition of A and the precondition of acquisition B, so the following ^^^ acquisition of A *as* the precondition of acquisition B Regards, Boqun > is not a deadlock as well: > > T1: > mutex_lock(A); > mutex_lock(C); > mutex_lock_nest_lock(B, A); > > T2: > mutex_lock(A); > mutex_lock_nest_lock(B, A); > mutex_lock(C); > > Regards, > Boqun > > > > > Probably not :/ >
On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote: > > So IMO the more correct way to do this would be to change > > device_initialize() to __device_initialize(), have it take a > > lock_class_key as a parameter, and then use __mutex_init() instead of > > mutex_init(). > > Yes, maybe. The increase in code size might outweigh the space saved. Where would the increase in code size come from? And whatever effect would only be when lockdep is enabled, so not a concern. But consider that the name of a lock as registered with lockdep really should correspond to a source code location - i.e. it should be something you can grep for. (We should probably consider adding file and line number to that string, since current names are not unambiguous). Whereas in your pass, you're calling lockdep_set_class(), which generates a class name via stringifcation - with the same name every time. Definitely _don't_ do that. With your patch, the generated lockdep traces will be useless. > > But let's think about this more. Will there ever be situations where > > lock ordering is dependent on what hardware is plugged into what, or > > what hardware is plugged in first? > > Device lock ordering is always dependent on what hardware is plugged > into what. However, I'm not aware of any situations where, given two > different kinds of hardware, either one could be plugged into the other. > Such things may exist but I can't think of any examples. Different brands of hubs? Lots of things have hubs embedded into them these days. 15 years ago I had an Apple keyboard with an embedded hub. > On the other hand, there are obvious cases where two pieces of the > _same_ kind of hardware can be plugged together in either order. USB > hubs are a good example. > > Consider the possibility that a driver might want to lock all of a > device's children at once. (I don't know if this ever happens, but it > might.) Provided it acquires the parent device's lock first, this is > utterly safe no matter what order the children are locked in. Try > telling that to lockdep! In the end, we'd probably have to enforce a > single fixed ordering. The way you speak of hypotheticals and possibilities makes me think that the actual locking rules are not ironed out at all :) The patch I posted would be an improvement over the current situation, because it'd get you checking w.r.t. other lock types - but with that you would still have to have your own deadlock avoidance strategy, and you'd have to be _really_ clear on what it is and how you know that you're getting it right - you're still opting out of checking. I think you should really be investigating wait/wound mutexes here.
On Sun, Feb 12, 2023 at 02:14:02PM -0500, Kent Overstreet wrote: > On Sun, Feb 12, 2023 at 10:23:44AM -0500, Alan Stern wrote: > > On Sat, Feb 11, 2023 at 10:10:23PM -0500, Kent Overstreet wrote: > > > So IMO the more correct way to do this would be to change > > > device_initialize() to __device_initialize(), have it take a > > > lock_class_key as a parameter, and then use __mutex_init() instead of > > > mutex_init(). > > > > Yes, maybe. The increase in code size might outweigh the space saved. > > Where would the increase in code size come from? Maybe I didn't understand your suggestion. Did you mean that all callers of device_initialize() (or whatever) should be able to specify their own lock_class_key? Or were you just trying to avoid the single static allocation of a lock_class_key in device_initialize() done as a side-effect of the mutex_init() call? > And whatever effect > would only be when lockdep is enabled, so not a concern. Not if a new function is created (i.e., __device_initialize()). > But consider that the name of a lock as registered with lockdep really > should correspond to a source code location - i.e. it should be > something you can grep for. (We should probably consider adding file and > line number to that string, since current names are not unambiguous). > > Whereas in your pass, you're calling lockdep_set_class(), which > generates a class name via stringifcation - with the same name every > time. > > Definitely _don't_ do that. With your patch, the generated lockdep > traces will be useless. I'll revise the patch to use the device's name for the class. While it may not be globally unique, it should be sufficiently specific. (Device names are often set after the device is initialized. Does lockdep mind if a lock_class_key's name is changed after it has been registered?) > > > But let's think about this more. Will there ever be situations where > > > lock ordering is dependent on what hardware is plugged into what, or > > > what hardware is plugged in first? > > > > Device lock ordering is always dependent on what hardware is plugged > > into what. However, I'm not aware of any situations where, given two > > different kinds of hardware, either one could be plugged into the other. > > Such things may exist but I can't think of any examples. > > Different brands of hubs? As far as the kernel is concerned they wouldn't be _different kinds_ of hardware; they would both be hubs. > Lots of things have hubs embedded into them these days. 15 years ago I > had an Apple keyboard with an embedded hub. Apple keyboards get treated as two logically separate pieces of hardware: a hub and a keyboard. The fact that they are packaged as a single unit is irrelevant. > > On the other hand, there are obvious cases where two pieces of the > > _same_ kind of hardware can be plugged together in either order. USB > > hubs are a good example. > > > > Consider the possibility that a driver might want to lock all of a > > device's children at once. (I don't know if this ever happens, but it > > might.) Provided it acquires the parent device's lock first, this is > > utterly safe no matter what order the children are locked in. Try > > telling that to lockdep! In the end, we'd probably have to enforce a > > single fixed ordering. > > The way you speak of hypotheticals and possibilities makes me think that > the actual locking rules are not ironed out at all :) You're right. There are no explicitly documented rules for device locking as far as I'm aware. Each subsystem handles its own locking independently (but self-consistently, we hope). That's one of the reasons that creating lockdep rules for devices is so difficult. The business about not locking a parent if you already own the child's lock is perhaps the only universal -- and I don't know that it's written down anywhere. > The patch I posted would be an improvement over the current situation, > because it'd get you checking w.r.t. other lock types - but with that > you would still have to have your own deadlock avoidance strategy, and > you'd have to be _really_ clear on what it is and how you know that > you're getting it right - you're still opting out of checking. Same with the patch I posted, except that it opts back into checking. > I think you should really be investigating wait/wound mutexes here. At this stage, converting would be most impractical. And I don't think it's really needed. Alan Stern
On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote: > (Device names are often set after the device is initialized. Does > lockdep mind if a lock_class_key's name is changed after it has been > registered?) It does, althought I don't at the moment recall how hard it would be to change that.
On Mon, Feb 13, 2023 at 10:27:18AM +0100, Peter Zijlstra wrote: > On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote: > > > (Device names are often set after the device is initialized. Does > > lockdep mind if a lock_class_key's name is changed after it has been > > registered?) > > It does, althought I don't at the moment recall how hard it would be to > change that. If the names are only used for printing purposes, or other similarly innocuous things, it ought to be enough to set the name with smp_store_release() and read the name with smp_load_acquire(). Alan Stern
On Mon, Feb 13, 2023 at 10:28:08AM -0500, Alan Stern wrote: > On Mon, Feb 13, 2023 at 10:27:18AM +0100, Peter Zijlstra wrote: > > On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote: > > > > > (Device names are often set after the device is initialized. Does > > > lockdep mind if a lock_class_key's name is changed after it has been > > > registered?) > > > > It does, althought I don't at the moment recall how hard it would be to > > change that. > > If the names are only used for printing purposes, or other similarly > innocuous things, it ought to be enough to set the name with > smp_store_release() and read the name with smp_load_acquire(). The name is copied from the key to the 'class' upon registration of the first lock that uses a particular key. Then later, when looking up the class for subsequent usages of the same key, the string is checked, and WARNs if they somehow not match. Granted, this is a silly sanity check that's easily disabled. But from a cursory look that seems to be just about it. The only 'problem' is that it's (typically) class name that's printed, not the key name, so if you change the key name, without also changing the class name, reports get really confusing. Still, that all ought to be fixable.. just a matter of typing or so :-)
On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote: > Maybe I didn't understand your suggestion. Did you mean that all > callers of device_initialize() (or whatever) should be able to specify > their own lock_class_key? Or were you just trying to avoid the single > static allocation of a lock_class_key in device_initialize() done as a > side-effect of the mutex_init() call? > > > And whatever effect > > would only be when lockdep is enabled, so not a concern. > > Not if a new function is created (i.e., __device_initialize()). Follow the same pattern as mutex_init() - have a look over that code. > > But consider that the name of a lock as registered with lockdep really > > should correspond to a source code location - i.e. it should be > > something you can grep for. (We should probably consider adding file and > > line number to that string, since current names are not unambiguous). > > > > Whereas in your pass, you're calling lockdep_set_class(), which > > generates a class name via stringifcation - with the same name every > > time. > > > > Definitely _don't_ do that. With your patch, the generated lockdep > > traces will be useless. > > I'll revise the patch to use the device's name for the class. While it > may not be globally unique, it should be sufficiently specific. > > (Device names are often set after the device is initialized. Does > lockdep mind if a lock_class_key's name is changed after it has been > registered?) The device name should _not_ be something dynamic, it should be something easily tied back to a source code location - i.e. related to the driver name, not the device name. That's how people read and use lockdep reports! Do it exactly the same way mutex_init() does it, just lift it up a level to a wrapper around device_initialize() - stringify the pointer to the mutex (embedded in struct device, embedded in what-have-you driver code) and use that. > You're right. There are no explicitly documented rules for device > locking as far as I'm aware. Each subsystem handles its own locking > independently (but self-consistently, we hope). That's one of the > reasons that creating lockdep rules for devices is so difficult. > > The business about not locking a parent if you already own the child's > lock is perhaps the only universal -- and I don't know that it's written > down anywhere. Yeah that's sketchy; if the rules are too complicated to be written down, they're too complicated. One thing that could be contemplated is adding support for user-defined comparison functions to lockdep, to define a lock ordering within a class when subclass isn't sufficient. That would work for bcache - for bcache the lock ordering is parent nodes before children, and if multiple nodes are locked at the same level they have to be locked in natural key order. But, this would add a lot of complexity to lockdep, and this is the sort of situation where if you have a bug in the comparison function (i.e. it doesn't define a total ordering) it'll break things in terribly fun ways. > > The patch I posted would be an improvement over the current situation, > > because it'd get you checking w.r.t. other lock types - but with that > > you would still have to have your own deadlock avoidance strategy, and > > you'd have to be _really_ clear on what it is and how you know that > > you're getting it right - you're still opting out of checking. > > Same with the patch I posted, except that it opts back into checking. > > > I think you should really be investigating wait/wound mutexes here. > > At this stage, converting would be most impractical. And I don't think > it's really needed. They do make you deal with lock restarts; unwinding typical stateful kernel code is not necessarily super practical :) Anyways, it sounds like the lockdep-class-per-driver approach will get you more information, that's certainly a reasonable approach for now. Cheers, -Kent
On Sun, Feb 12, 2023 at 03:51:05PM -0500, Kent Overstreet wrote: > But, this would add a lot of complexity to lockdep, and this is the sort > of situation where if you have a bug in the comparison function (i.e. it > doesn't define a total ordering) it'll break things in terribly fun > ways. FWIW, it is possible to annotate an actual deadlock away with many of the lockdep annotations -- care is always needed with this stuff.
On Sun, Feb 12, 2023 at 03:51:05PM -0500, Kent Overstreet wrote: > On Sun, Feb 12, 2023 at 03:19:16PM -0500, Alan Stern wrote: > > I'll revise the patch to use the device's name for the class. While it > > may not be globally unique, it should be sufficiently specific. > > > > (Device names are often set after the device is initialized. Does > > lockdep mind if a lock_class_key's name is changed after it has been > > registered?) > > The device name should _not_ be something dynamic, it should be > something easily tied back to a source code location - i.e. related to > the driver name, not the device name. > > That's how people read and use lockdep reports! > > Do it exactly the same way mutex_init() does it, just lift it up a level > to a wrapper around device_initialize() - stringify the pointer to the > mutex (embedded in struct device, embedded in what-have-you driver code) > and use that. I really don't think that's a good idea here. When you've got a bus containing multiple devices, typically all those device structures are created by the same line of code. So knowing the source code location won't tell you _which_ device structure is involved in the locking cycle or what driver it's using. By contrast, knowing the device name would. Furthermore, to the extent that the device's name identifies what kind of device it is, the name would tell you what where the structure was created and which driver it is using. For example, knowing that a struct device was initialized in line 2104 of drivers/usb/core/message.c tells you only that the device is a USB interface. It doesn't tell you which USB interface. But knowing that the device's name is 1-7:1.1 not only tells me that the device is a USB interface, it also allows me to do: $ ls -l /sys/bus/usb/devices/1-7:1.1/driver lrwxrwxrwx. 1 root root 0 Feb 12 19:56 /sys/bus/usb/devices/1-7:1.1/driver -> ../../../../../../bus/usb/drivers/usbhid/ telling me that the device is some sort of HID device. Probably my laptop's touchpad (which could easily be verified). Even without direct interaction with the system, searching through the kernel log would give me this information. > > At this stage, converting would be most impractical. And I don't think > > it's really needed. > > They do make you deal with lock restarts; unwinding typical stateful > kernel code is not necessarily super practical :) > > Anyways, it sounds like the lockdep-class-per-driver approach will get > you more information, that's certainly a reasonable approach for now. Thanks for the review and suggestions. Alan Stern
On Sun, Feb 12, 2023 at 08:23:34PM -0500, Alan Stern wrote: > I really don't think that's a good idea here. When you've got a bus > containing multiple devices, typically all those device structures are > created by the same line of code. So knowing the source code location > won't tell you _which_ device structure is involved in the locking > cycle or what driver it's using. Yeah, I was thinking about this more and realized it'd be insufficient. > By contrast, knowing the device name > would. > > Furthermore, to the extent that the device's name identifies what kind > of device it is, the name would tell you what where the structure was > created and which driver it is using. OTOH, with the device name, it seems like you'll additionally need the full device topology to be able to do anything with lockdep splats, no? What if we just added a way to set a comparison function for a lockdep class? I'm looking at the lockdep code now, and I think I could do that for you.
On Sun, Feb 12, 2023 at 09:21:14PM -0500, Kent Overstreet wrote: > On Sun, Feb 12, 2023 at 08:23:34PM -0500, Alan Stern wrote: > > I really don't think that's a good idea here. When you've got a bus > > containing multiple devices, typically all those device structures are > > created by the same line of code. So knowing the source code location > > won't tell you _which_ device structure is involved in the locking > > cycle or what driver it's using. > > Yeah, I was thinking about this more and realized it'd be insufficient. > > > By contrast, knowing the device name > > would. > > > > Furthermore, to the extent that the device's name identifies what kind > > of device it is, the name would tell you what where the structure was > > created and which driver it is using. > > OTOH, with the device name, it seems like you'll additionally need the > full device topology to be able to do anything with lockdep splats, no? Not necessarily. Knowing the name already tells you something about where the device fits into the full tree. And if necessary, you can probably glean the necessary information from the kernel log. Besides, you often don't need the full device topology. For instance, if the problem is that a driver is flushing a work queue while holding a lock needed by an item on the queue, mostly you just need to know what driver and where the flush occurs -- and that information is already provided by lockdep. > What if we just added a way to set a comparison function for a lockdep > class? I'm looking at the lockdep code now, and I think I could do that > for you. I don't know what a lockdep class comparison function does (or would do). Nor how having one would help. Alan Stern
On Sat, Feb 11, 2023 at 06:06:28PM -0500, Kent Overstreet wrote: > Yeah, what bcache really needs (and presumably dev->mutex as well) is just > to disable lockdep checking for self-deadlock of that lock type, since it's > got its own deadlock avoidance and the subclass thing isn't good enough. > > I've got a patch that should do what we want, replying from my other account > with it. -- >8 -- Subject: [PATCH] locking/lockdep: lockdep_set_no_check_recursion() This adds a method to tell lockdep not to check lock ordering within a lock class - but to still check lock ordering w.r.t. other lock types. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- include/linux/lockdep.h | 6 ++++++ include/linux/lockdep_types.h | 2 +- kernel/locking/lockdep.c | 26 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index e027c504b7..66f28553c6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -666,4 +666,10 @@ lockdep_rcu_suspicious(const char *file, const int line, const char *s) } #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void lockdep_set_no_check_recursion(struct lockdep_map *map); +#else +static inline void lockdep_set_no_check_recursion(struct lockdep_map *map) {} +#endif + #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index d22430840b..506e769b4a 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -128,7 +128,7 @@ struct lock_class { u8 wait_type_inner; u8 wait_type_outer; u8 lock_type; - /* u8 hole; */ + u8 no_check_recursion; #ifdef CONFIG_LOCK_STAT unsigned long contention_point[LOCKSTAT_POINTS]; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index cae7d5f0ad..47ffb8df11 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3023,6 +3023,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) if ((next->read == 2) && prev->read) continue; + if (hlock_class(next)->no_check_recursion) + continue; + /* * We're holding the nest_lock, which serializes this lock's * nesting behaviour. @@ -3084,6 +3087,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return 2; } + if (hlock_class(prev) == hlock_class(next) && + hlock_class(prev)->no_check_recursion) + return 2; + /* * Prove that the new <prev> -> <next> dependency would not * create a circular dependency in the graph. (We do this by @@ -6617,3 +6624,22 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) dump_stack(); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void lockdep_set_no_check_recursion(struct lockdep_map *lock) +{ + struct lock_class *class = lock->class_cache[0]; + unsigned long flags; + + raw_local_irq_save(flags); + lockdep_recursion_inc(); + + if (!class) + class = register_lock_class(lock, 0, 0); + if (class) + class->no_check_recursion = true; + lockdep_recursion_finish(); + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lockdep_set_no_check_recursion); +#endif -- 2.39.1
© 2016 - 2025 Red Hat, Inc.