From: Suren Baghdasaryan <surenb@google.com>
Add helper functions to speculatively perform operations without
read-locking mmap_lock, expecting that mmap_lock will not be
write-locked and mm is not modified from under us.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 6b3272686860..58dde2e35f7e 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
}
#ifdef CONFIG_PER_VMA_LOCK
+
static inline void mm_lock_seqcount_init(struct mm_struct *mm)
{
seqcount_init(&mm->mm_lock_seq);
@@ -86,11 +87,35 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
do_raw_write_seqcount_end(&mm->mm_lock_seq);
}
-#else
+static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
+{
+ *seq = raw_read_seqcount(&mm->mm_lock_seq);
+ /* Allow speculation if mmap_lock is not write-locked */
+ return (*seq & 1) == 0;
+}
+
+static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
+{
+ return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
-#endif
+
+static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
+{
+ return false;
+}
+
+static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
+{
+ return false;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
static inline void mmap_init_lock(struct mm_struct *mm)
{
--
2.43.5
On Sun, Oct 27, 2024 at 06:08:16PM -0700, Andrii Nakryiko wrote:
> From: Suren Baghdasaryan <surenb@google.com>
>
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
> include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
> 1 file changed, 27 insertions(+), 2 deletions(-)
>
> @@ -86,11 +87,35 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
> do_raw_write_seqcount_end(&mm->mm_lock_seq);
> }
>
> -#else
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> + *seq = raw_read_seqcount(&mm->mm_lock_seq);
> + /* Allow speculation if mmap_lock is not write-locked */
> + return (*seq & 1) == 0;
> +}
At the very least this should have more comment; I don't think it
adequately explains the reason for being weird. Perhaps:
/*
* Since mmap_lock is a sleeping lock, and waiting for it to
* become unlocked is more or less equivalent with taking it
* ourselves, don't bother with the speculative path and take
* the slow path, which takes the lock.
*/
*seq = raw_read_seqcount(&mm->mm_lock_seq);
return !(*seq & 1);
But perhaps it makes even more sense to add this functionality to
seqcount itself. The same argument can be made for seqcount_mutex and
seqcount_rwlock users.
> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> + return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
> +}
This naming is somewhare weird, begin/end do not typically imply boolean
return values.
Perhaps something like? can_speculate, or speculate_try_begin, paired
with speculated_success or speculate_retry ?
On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:
> But perhaps it makes even more sense to add this functionality to
> seqcount itself. The same argument can be made for seqcount_mutex and
> seqcount_rwlock users.
Something like so I suppose.
---
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5298765d6ca4..102afdf8c7db 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
__seq; \
})
+/**
+ * raw_seqcount_try_begin() - begin a seqcount_t read critical section
+ * w/o lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Very like raw_seqcount_begin(), except it enables eliding the critical
+ * section entirely if odd, instead of doing the speculation knowing it will
+ * fail.
+ *
+ * Useful when counter stabilization is more or less equivalent to taking
+ * the lock and there is a slowpath that does that.
+ *
+ * If true, start will be set to the (even) sequence count read.
+ *
+ * Return: true when a read critical section is started.
+ */
+#define raw_seqcount_try_begin(s, start) \
+({ \
+ start = raw_read_seqcount(s); \
+ !(start & 1); \
+})
+
/**
* raw_seqcount_begin() - begin a seqcount_t read critical section w/o
* lockdep and w/o counter stabilization
On Thu, Nov 21, 2024 at 7:23 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:
>
> > But perhaps it makes even more sense to add this functionality to
> > seqcount itself. The same argument can be made for seqcount_mutex and
> > seqcount_rwlock users.
>
> Something like so I suppose.
Ok, let me put this all together. Thanks!
>
> ---
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index 5298765d6ca4..102afdf8c7db 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
> __seq; \
> })
>
> +/**
> + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> + * w/o lockdep and w/o counter stabilization
> + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> + *
> + * Very like raw_seqcount_begin(), except it enables eliding the critical
> + * section entirely if odd, instead of doing the speculation knowing it will
> + * fail.
> + *
> + * Useful when counter stabilization is more or less equivalent to taking
> + * the lock and there is a slowpath that does that.
> + *
> + * If true, start will be set to the (even) sequence count read.
> + *
> + * Return: true when a read critical section is started.
> + */
> +#define raw_seqcount_try_begin(s, start) \
> +({ \
> + start = raw_read_seqcount(s); \
> + !(start & 1); \
> +})
> +
> /**
> * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
> * lockdep and w/o counter stabilization
On Thu, Nov 21, 2024 at 7:36 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Thu, Nov 21, 2024 at 7:23 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:
> >
> > > But perhaps it makes even more sense to add this functionality to
> > > seqcount itself. The same argument can be made for seqcount_mutex and
> > > seqcount_rwlock users.
> >
> > Something like so I suppose.
>
> Ok, let me put this all together. Thanks!
I posted the new version at
https://lore.kernel.org/all/20241121162826.987947-1-surenb@google.com/
The changes are minimal, only those requested by Peter, so hopefully
they can be accepted quickly.
>
> >
> > ---
> > diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> > index 5298765d6ca4..102afdf8c7db 100644
> > --- a/include/linux/seqlock.h
> > +++ b/include/linux/seqlock.h
> > @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
> > __seq; \
> > })
> >
> > +/**
> > + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> > + * w/o lockdep and w/o counter stabilization
> > + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> > + *
> > + * Very like raw_seqcount_begin(), except it enables eliding the critical
> > + * section entirely if odd, instead of doing the speculation knowing it will
> > + * fail.
> > + *
> > + * Useful when counter stabilization is more or less equivalent to taking
> > + * the lock and there is a slowpath that does that.
> > + *
> > + * If true, start will be set to the (even) sequence count read.
> > + *
> > + * Return: true when a read critical section is started.
> > + */
> > +#define raw_seqcount_try_begin(s, start) \
> > +({ \
> > + start = raw_read_seqcount(s); \
> > + !(start & 1); \
> > +})
> > +
> > /**
> > * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
> > * lockdep and w/o counter stabilization
On 10/28/24 02:08, Andrii Nakryiko wrote:
> From: Suren Baghdasaryan <surenb@google.com>
>
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
> 1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 6b3272686860..58dde2e35f7e 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
> }
>
> #ifdef CONFIG_PER_VMA_LOCK
> +
> static inline void mm_lock_seqcount_init(struct mm_struct *mm)
> {
> seqcount_init(&mm->mm_lock_seq);
> @@ -86,11 +87,35 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
> do_raw_write_seqcount_end(&mm->mm_lock_seq);
> }
>
> -#else
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> + *seq = raw_read_seqcount(&mm->mm_lock_seq);
> + /* Allow speculation if mmap_lock is not write-locked */
> + return (*seq & 1) == 0;
> +}
> +
> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> + return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
> +}
> +
> +#else /* CONFIG_PER_VMA_LOCK */
> +
> static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
> static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
> static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
> -#endif
> +
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> + return false;
> +}
> +
> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> + return false;
> +}
> +
> +#endif /* CONFIG_PER_VMA_LOCK */
>
> static inline void mmap_init_lock(struct mm_struct *mm)
> {
© 2016 - 2026 Red Hat, Inc.