The code is littered with inscrutable and duplicative lockdep incantations,
replace these with defines which explain what is going on and add
commentary to explain what we're doing.
If lockdep is disabled these become no-ops. We must use defines so _RET_IP_
remains meaningful.
These are self-documenting and aid readability of the code.
Additionally, instead of using the confusing rwsem_*() form for something
that is emphatically not an rwsem, we instead explicitly use
lock_[acquired, release]_shared/exclusive() lockdep invocations since we
are doing something rather custom here and these make more sense to use.
No functional change intended.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
include/linux/mmap_lock.h | 37 ++++++++++++++++++++++++++++++++++---
mm/mmap_lock.c | 10 +++++-----
2 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 294fb282052d..1887ca55ead7 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -78,6 +78,37 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
#ifdef CONFIG_PER_VMA_LOCK
+/*
+ * VMA locks do not behave like most ordinary locks found in the kernel, so we
+ * cannot quite have full lockdep tracking in the way we would ideally prefer.
+ *
+ * Read locks act as shared locks which exclude an exclusive lock being
+ * taken. We therefore mark these accordingly on read lock acquire/release.
+ *
+ * Write locks are acquired exclusively per-VMA, but released in a shared
+ * fashion, that is upon vma_end_write_all(), we update the mmap's seqcount such
+ * that write lock is released.
+ *
+ * We therefore cannot track write locks per-VMA, nor do we try. Mitigating this
+ * is the fact that, of course, we do lockdep-track the mmap lock rwsem which
+ * must be held when taking a VMA write lock.
+ *
+ * We do, however, want to indicate that during either acquisition of a VMA
+ * write lock or detachment of a VMA that we require the lock held be exclusive,
+ * so we utilise lockdep to do so.
+ */
+#define __vma_lockdep_acquire_read(vma) \
+ lock_acquire_shared(&vma->vmlock_dep_map, 0, 1, NULL, _RET_IP_)
+#define __vma_lockdep_release_read(vma) \
+ lock_release(&vma->vmlock_dep_map, _RET_IP_)
+#define __vma_lockdep_acquire_exclusive(vma) \
+ lock_acquire_exclusive(&vma->vmlock_dep_map, 0, 0, NULL, _RET_IP_)
+#define __vma_lockdep_release_exclusive(vma) \
+ lock_release(&vma->vmlock_dep_map, _RET_IP_)
+/* Only meaningful if CONFIG_LOCK_STAT is defined. */
+#define __vma_lockdep_stat_mark_acquired(vma) \
+ lock_acquired(&vma->vmlock_dep_map, _RET_IP_)
+
static inline void mm_lock_seqcount_init(struct mm_struct *mm)
{
seqcount_init(&mm->mm_lock_seq);
@@ -176,9 +207,9 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
struct mm_struct *mm = vma->vm_mm;
int newcnt;
- rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
-
+ __vma_lockdep_release_read(vma);
newcnt = __vma_refcount_put_return(vma);
+
/*
* __vma_enter_locked() may be sleeping waiting for readers to drop
* their reference count, so wake it up if we were the last reader
@@ -207,7 +238,7 @@ static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int
VM_REFCNT_LIMIT)))
return false;
- rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+ __vma_lockdep_acquire_read(vma);
return true;
}
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index 6be1bbcde09e..85b2ae1d9720 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -72,7 +72,7 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma,
if (!refcount_add_not_zero(VM_REFCNT_EXCLUDE_READERS_FLAG, &vma->vm_refcnt))
return 0;
- rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_);
+ __vma_lockdep_acquire_exclusive(vma);
err = rcuwait_wait_event(&vma->vm_mm->vma_writer_wait,
refcount_read(&vma->vm_refcnt) == tgt_refcnt,
state);
@@ -85,10 +85,10 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma,
WARN_ON_ONCE(!detaching);
err = 0;
}
- rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+ __vma_lockdep_release_exclusive(vma);
return err;
}
- lock_acquired(&vma->vmlock_dep_map, _RET_IP_);
+ __vma_lockdep_stat_mark_acquired(vma);
return 1;
}
@@ -97,7 +97,7 @@ static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached)
{
*detached = refcount_sub_and_test(VM_REFCNT_EXCLUDE_READERS_FLAG,
&vma->vm_refcnt);
- rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+ __vma_lockdep_release_exclusive(vma);
}
int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
@@ -204,7 +204,7 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
goto err;
}
- rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+ __vma_lockdep_acquire_read(vma);
if (unlikely(vma->vm_mm != mm))
goto err_unstable;
--
2.52.0
On 2026-01-23 20:12:14 [+0000], Lorenzo Stoakes wrote: > --- a/include/linux/mmap_lock.h > +++ b/include/linux/mmap_lock.h > @@ -78,6 +78,37 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) … > +/* Only meaningful if CONFIG_LOCK_STAT is defined. */ > +#define __vma_lockdep_stat_mark_acquired(vma) \ > + lock_acquired(&vma->vmlock_dep_map, _RET_IP_) > + After going through the remaining series, I don't think I found a matching lock_contended(). So perf/ tracing just give you a few lock-acquired events. Wouldn't it make sense to also some lock_contended() events where the caller had to wait before it could acquire the lock? Sebastian
On Wed, Jan 28, 2026 at 12:37:49PM +0100, Sebastian Andrzej Siewior wrote: > On 2026-01-23 20:12:14 [+0000], Lorenzo Stoakes wrote: > > --- a/include/linux/mmap_lock.h > > +++ b/include/linux/mmap_lock.h > > @@ -78,6 +78,37 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) > … > > +/* Only meaningful if CONFIG_LOCK_STAT is defined. */ > > +#define __vma_lockdep_stat_mark_acquired(vma) \ > > + lock_acquired(&vma->vmlock_dep_map, _RET_IP_) > > + > > After going through the remaining series, I don't think I found a > matching lock_contended(). So perf/ tracing just give you a few > lock-acquired events. Wouldn't it make sense to also some > lock_contended() events where the caller had to wait before it could > acquire the lock? Yeah I did wonder about this actually. The series really just abstracts this part, so I think doing something with that should be a follow-up. Suren - what was your intent with this? I did wonder what we actually really accomplished with this. VMA locks are always try-locks. Write locks can't be contended against one another since VMAs are always a per-process entity and not obtained remotely, so either a VMA is write-locked by us or not write-locked, never write-locked by anybody else. Read locks immediately give up if the VMA is write locked. Would we want to record a lock_contended() event in that case I guess then? I don't think we'd want to do that if the VMA were detached, only if it were write-locked? > > Sebastian Cheers, Lorenzo
On Wed, Jan 28, 2026 at 3:48 AM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > On Wed, Jan 28, 2026 at 12:37:49PM +0100, Sebastian Andrzej Siewior wrote: > > On 2026-01-23 20:12:14 [+0000], Lorenzo Stoakes wrote: > > > --- a/include/linux/mmap_lock.h > > > +++ b/include/linux/mmap_lock.h > > > @@ -78,6 +78,37 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) > > … > > > +/* Only meaningful if CONFIG_LOCK_STAT is defined. */ > > > +#define __vma_lockdep_stat_mark_acquired(vma) \ > > > + lock_acquired(&vma->vmlock_dep_map, _RET_IP_) > > > + > > > > After going through the remaining series, I don't think I found a > > matching lock_contended(). So perf/ tracing just give you a few > > lock-acquired events. Wouldn't it make sense to also some > > lock_contended() events where the caller had to wait before it could > > acquire the lock? > > Yeah I did wonder about this actually. The series really just abstracts this > part, so I think doing something with that should be a follow-up. > > Suren - what was your intent with this? I did wonder what we actually really > accomplished with this. > > VMA locks are always try-locks. > > Write locks can't be contended against one another since VMAs are always a > per-process entity and not obtained remotely, so either a VMA is write-locked by > us or not write-locked, never write-locked by anybody else. Correct, all paths which call __vma_enter_locked() to either write-lock a vma or to detach it have to already hold the mmap write lock on vma->vm_mm. So, lock_contended() will never be triggered for vma write locking paths. > > Read locks immediately give up if the VMA is write locked. > > Would we want to record a lock_contended() event in that case I guess then? Yes, we could have it in vma_start_read_locked_nested() if refcount increment fails and if the refcount != 0 (if it's 0 then the vma is detached and the refcount can't change concurrently). > > I don't think we'd want to do that if the VMA were detached, only if it were > write-locked? Yes, that's why we would need an additional check for 0 in there. I can add this logic after your patchset lands. I don't think we need to block it for this. Thanks, Suren. > > > > > Sebastian > > Cheers, Lorenzo
On 2026-01-23 20:12:14 [+0000], Lorenzo Stoakes wrote: > The code is littered with inscrutable and duplicative lockdep incantations, > replace these with defines which explain what is going on and add > commentary to explain what we're doing. > > If lockdep is disabled these become no-ops. We must use defines so _RET_IP_ > remains meaningful. > > These are self-documenting and aid readability of the code. > > Additionally, instead of using the confusing rwsem_*() form for something > that is emphatically not an rwsem, we instead explicitly use > lock_[acquired, release]_shared/exclusive() lockdep invocations since we > are doing something rather custom here and these make more sense to use. > > No functional change intended. This is just "replace rwsem macro with our own macro" which is fine. The subject confused me because I expected something new to see ;) Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Sebastian
On Wed, Jan 28, 2026 at 12:18:32PM +0100, Sebastian Andrzej Siewior wrote: > On 2026-01-23 20:12:14 [+0000], Lorenzo Stoakes wrote: > > The code is littered with inscrutable and duplicative lockdep incantations, > > replace these with defines which explain what is going on and add > > commentary to explain what we're doing. > > > > If lockdep is disabled these become no-ops. We must use defines so _RET_IP_ > > remains meaningful. > > > > These are self-documenting and aid readability of the code. > > > > Additionally, instead of using the confusing rwsem_*() form for something > > that is emphatically not an rwsem, we instead explicitly use > > lock_[acquired, release]_shared/exclusive() lockdep invocations since we > > are doing something rather custom here and these make more sense to use. > > > > No functional change intended. > > This is just "replace rwsem macro with our own macro" which is fine. The > subject confused me because I expected something new to see ;) Haha well yeah, I mean it's been a bit of a journey actually, reminding myself of how we actually use lockdep with the VMA locks and then, in real time, realising how broken a 'is held' can be, so in general I think we got to a good place! > > Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Thanks! > > Sebastian Cheers, Lorenzo
© 2016 - 2026 Red Hat, Inc.