Lock scenario print is always a weak spot of lockdep splats. Improvement
can be made if we rework the dependency search and the error printing.
However without touching the graph search, we can improve a little for
the circular deadlock case, since we have the to-be-added lock
dependency, and know whether these two locks are read/write/sync.
In order to know whether a held_lock is sync or not, a bit was
"stolen" from ->references, which reduce our limit for the same lock
class nesting from 2^12 to 2^11, and it should still be good enough.
Besides, since we now have bit in held_lock for sync, we don't need the
"hardirqoffs being 1" trick, and also we can avoid the __lock_release()
if we jump out of __lock_acquire() before the held_lock stored.
With these changes, a deadlock case evolved with read lock and sync gets
a better print-out from:
[...] Possible unsafe locking scenario:
[...]
[...] CPU0 CPU1
[...] ---- ----
[...] lock(srcuA);
[...] lock(srcuB);
[...] lock(srcuA);
[...] lock(srcuB);
to
[...] Possible unsafe locking scenario:
[...]
[...] CPU0 CPU1
[...] ---- ----
[...] rlock(srcuA);
[...] lock(srcuB);
[...] lock(srcuA);
[...] sync(srcuB);
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
include/linux/lockdep.h | 3 ++-
kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++--------------
2 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ba09df6a0872..febd7ecc225c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -134,7 +134,8 @@ struct held_lock {
unsigned int read:2; /* see lock_acquire() comment */
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
- unsigned int references:12; /* 32 bits */
+ unsigned int sync:1;
+ unsigned int references:11; /* 32 bits */
unsigned int pin_count;
};
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cffa026a765f..4031d87f6829 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1880,6 +1880,8 @@ print_circular_lock_scenario(struct held_lock *src,
struct lock_class *source = hlock_class(src);
struct lock_class *target = hlock_class(tgt);
struct lock_class *parent = prt->class;
+ int src_read = src->read;
+ int tgt_read = tgt->read;
/*
* A direct locking problem where unsafe_class lock is taken
@@ -1907,7 +1909,10 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" Possible unsafe locking scenario:\n\n");
printk(" CPU0 CPU1\n");
printk(" ---- ----\n");
- printk(" lock(");
+ if (tgt_read != 0)
+ printk(" rlock(");
+ else
+ printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
printk(" lock(");
@@ -1916,7 +1921,12 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
- printk(" lock(");
+ if (src_read != 0)
+ printk(" rlock(");
+ else if (src->sync)
+ printk(" sync(");
+ else
+ printk(" lock(");
__print_lock_name(source);
printk(KERN_CONT ");\n");
printk("\n *** DEADLOCK ***\n\n");
@@ -4530,7 +4540,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
return 0;
}
}
- if (!hlock->hardirqs_off) {
+
+ /*
+ * For lock_sync(), don't mark the ENABLED usage, since lock_sync()
+ * creates no critical section and no extra dependency can be introduced
+ * by interrupts
+ */
+ if (!hlock->hardirqs_off && !hlock->sync) {
if (hlock->read) {
if (!mark_lock(curr, hlock,
LOCK_ENABLED_HARDIRQ_READ))
@@ -4909,7 +4925,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read);
static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, int hardirqs_off,
struct lockdep_map *nest_lock, unsigned long ip,
- int references, int pin_count)
+ int references, int pin_count, int sync)
{
struct task_struct *curr = current;
struct lock_class *class = NULL;
@@ -4960,7 +4976,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
class_idx = class - lock_classes;
- if (depth) { /* we're holding locks */
+ if (depth && !sync) {
+ /* we're holding locks and the new held lock is not a sync */
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
if (!references)
@@ -4994,6 +5011,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
+ hlock->sync = !!sync;
hlock->hardirqs_off = !!hardirqs_off;
hlock->references = references;
#ifdef CONFIG_LOCK_STAT
@@ -5055,6 +5073,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!validate_chain(curr, hlock, chain_head, chain_key))
return 0;
+ /* For lock_sync(), we are done here since no actual critical section */
+ if (hlock->sync)
+ return 1;
+
curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
@@ -5196,7 +5218,7 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
hlock->read, hlock->check,
hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count)) {
+ hlock->references, hlock->pin_count, 0)) {
case 0:
return 1;
case 1:
@@ -5666,7 +5688,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
lockdep_recursion_inc();
__lock_acquire(lock, subclass, trylock, read, check,
- irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 0);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
}
@@ -5699,11 +5721,6 @@ EXPORT_SYMBOL_GPL(lock_release);
* APIs are used to wait for one or multiple critical sections (on other CPUs
* or threads), and it means that calling these APIs inside these critical
* sections is potential deadlock.
- *
- * This annotation acts as an acqurie+release anontation pair with hardirqoff
- * being 1. Since there's no critical section, no interrupt can create extra
- * dependencies "inside" the annotation, hardirqoff == 1 allows us to avoid
- * false positives.
*/
void lock_sync(struct lockdep_map *lock, unsigned subclass, int read,
int check, struct lockdep_map *nest_lock, unsigned long ip)
@@ -5717,10 +5734,9 @@ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read,
check_flags(flags);
lockdep_recursion_inc();
- __lock_acquire(lock, subclass, 0, read, check, 1, nest_lock, ip, 0, 0);
-
- if (__lock_release(lock, ip))
- check_chain_key(current);
+ __lock_acquire(lock, subclass, 0, read, check,
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 1);
+ check_chain_key(current);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
}
--
2.38.1
On 1/13/23 18:57, Boqun Feng wrote: > Lock scenario print is always a weak spot of lockdep splats. Improvement > can be made if we rework the dependency search and the error printing. > > However without touching the graph search, we can improve a little for > the circular deadlock case, since we have the to-be-added lock > dependency, and know whether these two locks are read/write/sync. > > In order to know whether a held_lock is sync or not, a bit was > "stolen" from ->references, which reduce our limit for the same lock > class nesting from 2^12 to 2^11, and it should still be good enough. > > Besides, since we now have bit in held_lock for sync, we don't need the > "hardirqoffs being 1" trick, and also we can avoid the __lock_release() > if we jump out of __lock_acquire() before the held_lock stored. > > With these changes, a deadlock case evolved with read lock and sync gets > a better print-out from: > > [...] Possible unsafe locking scenario: > [...] > [...] CPU0 CPU1 > [...] ---- ---- > [...] lock(srcuA); > [...] lock(srcuB); > [...] lock(srcuA); > [...] lock(srcuB); > > to > > [...] Possible unsafe locking scenario: > [...] > [...] CPU0 CPU1 > [...] ---- ---- > [...] rlock(srcuA); > [...] lock(srcuB); > [...] lock(srcuA); > [...] sync(srcuB); > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com> > --- > include/linux/lockdep.h | 3 ++- > kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++-------------- > 2 files changed, 34 insertions(+), 17 deletions(-) > > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h > index ba09df6a0872..febd7ecc225c 100644 > --- a/include/linux/lockdep.h > +++ b/include/linux/lockdep.h > @@ -134,7 +134,8 @@ struct held_lock { > unsigned int read:2; /* see lock_acquire() comment */ > unsigned int check:1; /* see lock_acquire() comment */ > unsigned int hardirqs_off:1; > - unsigned int references:12; /* 32 bits */ > + unsigned int sync:1; > + unsigned int references:11; /* 32 bits */ > unsigned int pin_count; > }; > > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c > index cffa026a765f..4031d87f6829 100644 > --- a/kernel/locking/lockdep.c > +++ b/kernel/locking/lockdep.c > @@ -1880,6 +1880,8 @@ print_circular_lock_scenario(struct held_lock *src, > struct lock_class *source = hlock_class(src); > struct lock_class *target = hlock_class(tgt); > struct lock_class *parent = prt->class; > + int src_read = src->read; > + int tgt_read = tgt->read; > > /* > * A direct locking problem where unsafe_class lock is taken > @@ -1907,7 +1909,10 @@ print_circular_lock_scenario(struct held_lock *src, > printk(" Possible unsafe locking scenario:\n\n"); > printk(" CPU0 CPU1\n"); > printk(" ---- ----\n"); > - printk(" lock("); > + if (tgt_read != 0) > + printk(" rlock("); > + else > + printk(" lock("); > __print_lock_name(target); > printk(KERN_CONT ");\n"); > printk(" lock("); > @@ -1916,7 +1921,12 @@ print_circular_lock_scenario(struct held_lock *src, > printk(" lock("); > __print_lock_name(target); > printk(KERN_CONT ");\n"); > - printk(" lock("); > + if (src_read != 0) > + printk(" rlock("); > + else if (src->sync) > + printk(" sync("); > + else > + printk(" lock("); > __print_lock_name(source); > printk(KERN_CONT ");\n"); > printk("\n *** DEADLOCK ***\n\n"); src can be sync() but not the target. Is there a reason why that is the case? > @@ -4530,7 +4540,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) > return 0; > } > } > - if (!hlock->hardirqs_off) { > + > + /* > + * For lock_sync(), don't mark the ENABLED usage, since lock_sync() > + * creates no critical section and no extra dependency can be introduced > + * by interrupts > + */ > + if (!hlock->hardirqs_off && !hlock->sync) { > if (hlock->read) { > if (!mark_lock(curr, hlock, > LOCK_ENABLED_HARDIRQ_READ)) > @@ -4909,7 +4925,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); > static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > int trylock, int read, int check, int hardirqs_off, > struct lockdep_map *nest_lock, unsigned long ip, > - int references, int pin_count) > + int references, int pin_count, int sync) > { > struct task_struct *curr = current; > struct lock_class *class = NULL; > @@ -4960,7 +4976,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > > class_idx = class - lock_classes; > > - if (depth) { /* we're holding locks */ > + if (depth && !sync) { > + /* we're holding locks and the new held lock is not a sync */ > hlock = curr->held_locks + depth - 1; > if (hlock->class_idx == class_idx && nest_lock) { > if (!references) > @@ -4994,6 +5011,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > hlock->trylock = trylock; > hlock->read = read; > hlock->check = check; > + hlock->sync = !!sync; > hlock->hardirqs_off = !!hardirqs_off; > hlock->references = references; > #ifdef CONFIG_LOCK_STAT > @@ -5055,6 +5073,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > if (!validate_chain(curr, hlock, chain_head, chain_key)) > return 0; > > + /* For lock_sync(), we are done here since no actual critical section */ > + if (hlock->sync) > + return 1; > + > curr->curr_chain_key = chain_key; > curr->lockdep_depth++; > check_chain_key(curr); Even with sync, there is still a corresponding lock_acquire() and lock_release(), you can't exit here without increasing lockdep_depth. That can cause underflow. Cheers, Longman
On Mon, Jan 16, 2023 at 05:21:09PM -0500, Waiman Long wrote: > On 1/13/23 18:57, Boqun Feng wrote: > > Lock scenario print is always a weak spot of lockdep splats. Improvement > > can be made if we rework the dependency search and the error printing. > > > > However without touching the graph search, we can improve a little for > > the circular deadlock case, since we have the to-be-added lock > > dependency, and know whether these two locks are read/write/sync. > > > > In order to know whether a held_lock is sync or not, a bit was > > "stolen" from ->references, which reduce our limit for the same lock > > class nesting from 2^12 to 2^11, and it should still be good enough. > > > > Besides, since we now have bit in held_lock for sync, we don't need the > > "hardirqoffs being 1" trick, and also we can avoid the __lock_release() > > if we jump out of __lock_acquire() before the held_lock stored. > > > > With these changes, a deadlock case evolved with read lock and sync gets > > a better print-out from: > > > > [...] Possible unsafe locking scenario: > > [...] > > [...] CPU0 CPU1 > > [...] ---- ---- > > [...] lock(srcuA); > > [...] lock(srcuB); > > [...] lock(srcuA); > > [...] lock(srcuB); > > > > to > > > > [...] Possible unsafe locking scenario: > > [...] > > [...] CPU0 CPU1 > > [...] ---- ---- > > [...] rlock(srcuA); > > [...] lock(srcuB); > > [...] lock(srcuA); > > [...] sync(srcuB); > > > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com> > > --- > > include/linux/lockdep.h | 3 ++- > > kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++-------------- > > 2 files changed, 34 insertions(+), 17 deletions(-) > > > > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h > > index ba09df6a0872..febd7ecc225c 100644 > > --- a/include/linux/lockdep.h > > +++ b/include/linux/lockdep.h > > @@ -134,7 +134,8 @@ struct held_lock { > > unsigned int read:2; /* see lock_acquire() comment */ > > unsigned int check:1; /* see lock_acquire() comment */ > > unsigned int hardirqs_off:1; > > - unsigned int references:12; /* 32 bits */ > > + unsigned int sync:1; > > + unsigned int references:11; /* 32 bits */ > > unsigned int pin_count; > > }; > > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c > > index cffa026a765f..4031d87f6829 100644 > > --- a/kernel/locking/lockdep.c > > +++ b/kernel/locking/lockdep.c > > @@ -1880,6 +1880,8 @@ print_circular_lock_scenario(struct held_lock *src, > > struct lock_class *source = hlock_class(src); > > struct lock_class *target = hlock_class(tgt); > > struct lock_class *parent = prt->class; > > + int src_read = src->read; > > + int tgt_read = tgt->read; > > /* > > * A direct locking problem where unsafe_class lock is taken > > @@ -1907,7 +1909,10 @@ print_circular_lock_scenario(struct held_lock *src, > > printk(" Possible unsafe locking scenario:\n\n"); > > printk(" CPU0 CPU1\n"); > > printk(" ---- ----\n"); > > - printk(" lock("); > > + if (tgt_read != 0) > > + printk(" rlock("); > > + else > > + printk(" lock("); > > __print_lock_name(target); > > printk(KERN_CONT ");\n"); > > printk(" lock("); > > @@ -1916,7 +1921,12 @@ print_circular_lock_scenario(struct held_lock *src, > > printk(" lock("); > > __print_lock_name(target); > > printk(KERN_CONT ");\n"); > > - printk(" lock("); > > + if (src_read != 0) > > + printk(" rlock("); > > + else if (src->sync) > > + printk(" sync("); > > + else > > + printk(" lock("); > > __print_lock_name(source); > > printk(KERN_CONT ");\n"); > > printk("\n *** DEADLOCK ***\n\n"); > > src can be sync() but not the target. Is there a reason why that is the > case? > The functions annotated by sync() don't create real critical sections, so no lock dependency can be created from a sync(), for example: synchronize_srcu(A); mutex_lock(B); no dependency from A to B. In the scenario case, if we see a dependency target -> source, the target cannot be a lock_sync(). I will add better documentation later. > > > @@ -4530,7 +4540,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) > > return 0; > > } > > } > > - if (!hlock->hardirqs_off) { > > + > > + /* > > + * For lock_sync(), don't mark the ENABLED usage, since lock_sync() > > + * creates no critical section and no extra dependency can be introduced > > + * by interrupts > > + */ > > + if (!hlock->hardirqs_off && !hlock->sync) { > > if (hlock->read) { > > if (!mark_lock(curr, hlock, > > LOCK_ENABLED_HARDIRQ_READ)) > > @@ -4909,7 +4925,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); > > static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > > int trylock, int read, int check, int hardirqs_off, > > struct lockdep_map *nest_lock, unsigned long ip, > > - int references, int pin_count) > > + int references, int pin_count, int sync) > > { > > struct task_struct *curr = current; > > struct lock_class *class = NULL; > > @@ -4960,7 +4976,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > > class_idx = class - lock_classes; > > - if (depth) { /* we're holding locks */ > > + if (depth && !sync) { > > + /* we're holding locks and the new held lock is not a sync */ > > hlock = curr->held_locks + depth - 1; > > if (hlock->class_idx == class_idx && nest_lock) { > > if (!references) > > @@ -4994,6 +5011,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > > hlock->trylock = trylock; > > hlock->read = read; > > hlock->check = check; > > + hlock->sync = !!sync; > > hlock->hardirqs_off = !!hardirqs_off; > > hlock->references = references; > > #ifdef CONFIG_LOCK_STAT > > @@ -5055,6 +5073,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, > > if (!validate_chain(curr, hlock, chain_head, chain_key)) > > return 0; > > + /* For lock_sync(), we are done here since no actual critical section */ > > + if (hlock->sync) > > + return 1; > > + > > curr->curr_chain_key = chain_key; > > curr->lockdep_depth++; > > check_chain_key(curr); > > Even with sync, there is still a corresponding lock_acquire() and > lock_release(), you can't exit here without increasing lockdep_depth. That > can cause underflow. > I actually remove the __lock_release() in lock_sync() in this patch, so I think it's OK. But I must admit the whole submission is to give David something to see whether the output is an improvement, so I probably should separate the output changes and the lock_sync() internall into two patches (and the later can also be folded into the introduction patch). Regards, Boqun > Cheers, > Longman >
On 1/16/23 17:35, Boqun Feng wrote: > On Mon, Jan 16, 2023 at 05:21:09PM -0500, Waiman Long wrote: >> On 1/13/23 18:57, Boqun Feng wrote: >>> Lock scenario print is always a weak spot of lockdep splats. Improvement >>> can be made if we rework the dependency search and the error printing. >>> >>> However without touching the graph search, we can improve a little for >>> the circular deadlock case, since we have the to-be-added lock >>> dependency, and know whether these two locks are read/write/sync. >>> >>> In order to know whether a held_lock is sync or not, a bit was >>> "stolen" from ->references, which reduce our limit for the same lock >>> class nesting from 2^12 to 2^11, and it should still be good enough. >>> >>> Besides, since we now have bit in held_lock for sync, we don't need the >>> "hardirqoffs being 1" trick, and also we can avoid the __lock_release() >>> if we jump out of __lock_acquire() before the held_lock stored. >>> >>> With these changes, a deadlock case evolved with read lock and sync gets >>> a better print-out from: >>> >>> [...] Possible unsafe locking scenario: >>> [...] >>> [...] CPU0 CPU1 >>> [...] ---- ---- >>> [...] lock(srcuA); >>> [...] lock(srcuB); >>> [...] lock(srcuA); >>> [...] lock(srcuB); >>> >>> to >>> >>> [...] Possible unsafe locking scenario: >>> [...] >>> [...] CPU0 CPU1 >>> [...] ---- ---- >>> [...] rlock(srcuA); >>> [...] lock(srcuB); >>> [...] lock(srcuA); >>> [...] sync(srcuB); >>> >>> Signed-off-by: Boqun Feng <boqun.feng@gmail.com> >>> --- >>> include/linux/lockdep.h | 3 ++- >>> kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++-------------- >>> 2 files changed, 34 insertions(+), 17 deletions(-) >>> >>> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h >>> index ba09df6a0872..febd7ecc225c 100644 >>> --- a/include/linux/lockdep.h >>> +++ b/include/linux/lockdep.h >>> @@ -134,7 +134,8 @@ struct held_lock { >>> unsigned int read:2; /* see lock_acquire() comment */ >>> unsigned int check:1; /* see lock_acquire() comment */ >>> unsigned int hardirqs_off:1; >>> - unsigned int references:12; /* 32 bits */ >>> + unsigned int sync:1; >>> + unsigned int references:11; /* 32 bits */ >>> unsigned int pin_count; >>> }; >>> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c >>> index cffa026a765f..4031d87f6829 100644 >>> --- a/kernel/locking/lockdep.c >>> +++ b/kernel/locking/lockdep.c >>> @@ -1880,6 +1880,8 @@ print_circular_lock_scenario(struct held_lock *src, >>> struct lock_class *source = hlock_class(src); >>> struct lock_class *target = hlock_class(tgt); >>> struct lock_class *parent = prt->class; >>> + int src_read = src->read; >>> + int tgt_read = tgt->read; >>> /* >>> * A direct locking problem where unsafe_class lock is taken >>> @@ -1907,7 +1909,10 @@ print_circular_lock_scenario(struct held_lock *src, >>> printk(" Possible unsafe locking scenario:\n\n"); >>> printk(" CPU0 CPU1\n"); >>> printk(" ---- ----\n"); >>> - printk(" lock("); >>> + if (tgt_read != 0) >>> + printk(" rlock("); >>> + else >>> + printk(" lock("); >>> __print_lock_name(target); >>> printk(KERN_CONT ");\n"); >>> printk(" lock("); >>> @@ -1916,7 +1921,12 @@ print_circular_lock_scenario(struct held_lock *src, >>> printk(" lock("); >>> __print_lock_name(target); >>> printk(KERN_CONT ");\n"); >>> - printk(" lock("); >>> + if (src_read != 0) >>> + printk(" rlock("); >>> + else if (src->sync) >>> + printk(" sync("); >>> + else >>> + printk(" lock("); >>> __print_lock_name(source); >>> printk(KERN_CONT ");\n"); >>> printk("\n *** DEADLOCK ***\n\n"); >> src can be sync() but not the target. Is there a reason why that is the >> case? >> > The functions annotated by sync() don't create real critical sections, > so no lock dependency can be created from a sync(), for example: > > synchronize_srcu(A); > mutex_lock(B); > > no dependency from A to B. In the scenario case, if we see a dependency > target -> source, the target cannot be a lock_sync(). I will add better > documentation later. Right, the dependency won't happen since you reduce lock_sync() to mostly do validate_chain() without actually storing it in the lock chain which I did miss in my initial review. Without that, a dependency may happen if an NMI happens between lock_acquire() and lock_release() in lock_sync(). >>> @@ -4530,7 +4540,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) >>> return 0; >>> } >>> } >>> - if (!hlock->hardirqs_off) { >>> + >>> + /* >>> + * For lock_sync(), don't mark the ENABLED usage, since lock_sync() >>> + * creates no critical section and no extra dependency can be introduced >>> + * by interrupts >>> + */ >>> + if (!hlock->hardirqs_off && !hlock->sync) { >>> if (hlock->read) { >>> if (!mark_lock(curr, hlock, >>> LOCK_ENABLED_HARDIRQ_READ)) >>> @@ -4909,7 +4925,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); >>> static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, >>> int trylock, int read, int check, int hardirqs_off, >>> struct lockdep_map *nest_lock, unsigned long ip, >>> - int references, int pin_count) >>> + int references, int pin_count, int sync) >>> { >>> struct task_struct *curr = current; >>> struct lock_class *class = NULL; >>> @@ -4960,7 +4976,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, >>> class_idx = class - lock_classes; >>> - if (depth) { /* we're holding locks */ >>> + if (depth && !sync) { >>> + /* we're holding locks and the new held lock is not a sync */ >>> hlock = curr->held_locks + depth - 1; >>> if (hlock->class_idx == class_idx && nest_lock) { >>> if (!references) >>> @@ -4994,6 +5011,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, >>> hlock->trylock = trylock; >>> hlock->read = read; >>> hlock->check = check; >>> + hlock->sync = !!sync; >>> hlock->hardirqs_off = !!hardirqs_off; >>> hlock->references = references; >>> #ifdef CONFIG_LOCK_STAT >>> @@ -5055,6 +5073,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, >>> if (!validate_chain(curr, hlock, chain_head, chain_key)) >>> return 0; >>> + /* For lock_sync(), we are done here since no actual critical section */ >>> + if (hlock->sync) >>> + return 1; >>> + >>> curr->curr_chain_key = chain_key; >>> curr->lockdep_depth++; >>> check_chain_key(curr); >> Even with sync, there is still a corresponding lock_acquire() and >> lock_release(), you can't exit here without increasing lockdep_depth. That >> can cause underflow. >> > I actually remove the __lock_release() in lock_sync() in this patch, so > I think it's OK. But I must admit the whole submission is to give David > something to see whether the output is an improvement, so I probably > should separate the output changes and the lock_sync() internall into > two patches (and the later can also be folded into the introduction > patch). I saw that now. You may not need to separate it into 2 patches since there is some dependency between the two. You do have to document the 2 different changes in your patch description. Cheers, Longman
© 2016 - 2025 Red Hat, Inc.