[PATCH v9 07/17] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail

Suren Baghdasaryan posted 17 patches 1 year ago
There is a newer version of this series
[PATCH v9 07/17] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail
Posted by Suren Baghdasaryan 1 year ago
With upcoming replacement of vm_lock with vm_refcnt, we need to handle a
possibility of vma_start_read_locked/vma_start_read_locked_nested failing
due to refcount overflow. Prepare for such possibility by changing these
APIs and adjusting their users.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Lokesh Gidra <lokeshgidra@google.com>
---
 include/linux/mm.h |  6 ++++--
 mm/userfaultfd.c   | 18 +++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2f805f1a0176..cbb4e3dbbaed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,10 +747,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
  * not be used in such cases because it might fail due to mm_lock_seq overflow.
  * This functionality is used to obtain vma read lock and drop the mmap read lock.
  */
-static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
 {
 	mmap_assert_locked(vma->vm_mm);
 	down_read_nested(&vma->vm_lock.lock, subclass);
+	return true;
 }
 
 /*
@@ -759,10 +760,11 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int
  * not be used in such cases because it might fail due to mm_lock_seq overflow.
  * This functionality is used to obtain vma read lock and drop the mmap read lock.
  */
-static inline void vma_start_read_locked(struct vm_area_struct *vma)
+static inline bool vma_start_read_locked(struct vm_area_struct *vma)
 {
 	mmap_assert_locked(vma->vm_mm);
 	down_read(&vma->vm_lock.lock);
+	return true;
 }
 
 static inline void vma_end_read(struct vm_area_struct *vma)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 4527c385935b..411a663932c4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -85,7 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
 	mmap_read_lock(mm);
 	vma = find_vma_and_prepare_anon(mm, address);
 	if (!IS_ERR(vma))
-		vma_start_read_locked(vma);
+		if (!vma_start_read_locked(vma))
+			vma = ERR_PTR(-EAGAIN);
 
 	mmap_read_unlock(mm);
 	return vma;
@@ -1483,10 +1484,17 @@ static int uffd_move_lock(struct mm_struct *mm,
 	mmap_read_lock(mm);
 	err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
 	if (!err) {
-		vma_start_read_locked(*dst_vmap);
-		if (*dst_vmap != *src_vmap)
-			vma_start_read_locked_nested(*src_vmap,
-						SINGLE_DEPTH_NESTING);
+		if (vma_start_read_locked(*dst_vmap)) {
+			if (*dst_vmap != *src_vmap) {
+				if (!vma_start_read_locked_nested(*src_vmap,
+							SINGLE_DEPTH_NESTING)) {
+					vma_end_read(*dst_vmap);
+					err = -EAGAIN;
+				}
+			}
+		} else {
+			err = -EAGAIN;
+		}
 	}
 	mmap_read_unlock(mm);
 	return err;
-- 
2.47.1.613.gc27f4b7a9f-goog
Re: [PATCH v9 07/17] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail
Posted by Lorenzo Stoakes 1 year ago
On Fri, Jan 10, 2025 at 08:25:54PM -0800, Suren Baghdasaryan wrote:
> With upcoming replacement of vm_lock with vm_refcnt, we need to handle a
> possibility of vma_start_read_locked/vma_start_read_locked_nested failing
> due to refcount overflow. Prepare for such possibility by changing these
> APIs and adjusting their users.
>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> Cc: Lokesh Gidra <lokeshgidra@google.com>
> ---
>  include/linux/mm.h |  6 ++++--
>  mm/userfaultfd.c   | 18 +++++++++++++-----
>  2 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 2f805f1a0176..cbb4e3dbbaed 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -747,10 +747,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
>   * not be used in such cases because it might fail due to mm_lock_seq overflow.
>   * This functionality is used to obtain vma read lock and drop the mmap read lock.
>   */
> -static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
> +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
>  {
>  	mmap_assert_locked(vma->vm_mm);
>  	down_read_nested(&vma->vm_lock.lock, subclass);
> +	return true;
>  }
>
>  /*
> @@ -759,10 +760,11 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int
>   * not be used in such cases because it might fail due to mm_lock_seq overflow.
>   * This functionality is used to obtain vma read lock and drop the mmap read lock.
>   */
> -static inline void vma_start_read_locked(struct vm_area_struct *vma)
> +static inline bool vma_start_read_locked(struct vm_area_struct *vma)
>  {
>  	mmap_assert_locked(vma->vm_mm);
>  	down_read(&vma->vm_lock.lock);
> +	return true;
>  }
>
>  static inline void vma_end_read(struct vm_area_struct *vma)
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 4527c385935b..411a663932c4 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -85,7 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
>  	mmap_read_lock(mm);
>  	vma = find_vma_and_prepare_anon(mm, address);
>  	if (!IS_ERR(vma))
> -		vma_start_read_locked(vma);
> +		if (!vma_start_read_locked(vma))
> +			vma = ERR_PTR(-EAGAIN);

Nit but this kind of reads a bit weirdly now:

	if (!IS_ERR(vma))
		if (!vma_start_read_locked(vma))
			vma = ERR_PTR(-EAGAIN);

Wouldn't this be nicer as:

	if (!IS_ERR(vma) && !vma_start_read_locked(vma))
		vma = ERR_PTR(-EAGAIN);

On the other hand, this embeds an action in an expression, but then it sort of
still looks weird.

	if (!IS_ERR(vma)) {
		bool ok = vma_start_read_locked(vma);

		if (!ok)
			vma = ERR_PTR(-EAGAIN);
	}

This makes me wonder, now yes, we are truly bikeshedding, sorry, but maybe we
could just have vma_start_read_locked return a VMA pointer that could be an
error?

Then this becomes:

	if (!IS_ERR(vma))
		vma = vma_start_read_locked(vma);

>
>  	mmap_read_unlock(mm);
>  	return vma;
> @@ -1483,10 +1484,17 @@ static int uffd_move_lock(struct mm_struct *mm,
>  	mmap_read_lock(mm);
>  	err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
>  	if (!err) {
> -		vma_start_read_locked(*dst_vmap);
> -		if (*dst_vmap != *src_vmap)
> -			vma_start_read_locked_nested(*src_vmap,
> -						SINGLE_DEPTH_NESTING);
> +		if (vma_start_read_locked(*dst_vmap)) {
> +			if (*dst_vmap != *src_vmap) {
> +				if (!vma_start_read_locked_nested(*src_vmap,
> +							SINGLE_DEPTH_NESTING)) {
> +					vma_end_read(*dst_vmap);

Hmm, why do we end read if the lock failed here but not above?

> +					err = -EAGAIN;
> +				}
> +			}
> +		} else {
> +			err = -EAGAIN;
> +		}
>  	}

This whole block is really ugly now, this really needs refactoring.

How about (on assumption the vma_end_read() is correct):


	err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
	if (err)
		goto out;

	if (!vma_start_read_locked(*dst_vmap)) {
		err = -EAGAIN;
		goto out;
	}

	/* Nothing further to do. */
	if (*dst_vmap == *src_vmap)
		goto out;

	if (!vma_start_read_locked_nested(*src_vmap,
				SINGLE_DEPTH_NESTING)) {
		vma_end_read(*dst_vmap);
		err = -EAGAIN;
	}

out:
	mmap_read_unlock(mm);
	return err;
}

>  	mmap_read_unlock(mm);
>  	return err;
> --
> 2.47.1.613.gc27f4b7a9f-goog
>
Re: [PATCH v9 07/17] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail
Posted by Suren Baghdasaryan 1 year ago
On Mon, Jan 13, 2025 at 7:25 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> On Fri, Jan 10, 2025 at 08:25:54PM -0800, Suren Baghdasaryan wrote:
> > With upcoming replacement of vm_lock with vm_refcnt, we need to handle a
> > possibility of vma_start_read_locked/vma_start_read_locked_nested failing
> > due to refcount overflow. Prepare for such possibility by changing these
> > APIs and adjusting their users.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > Acked-by: Vlastimil Babka <vbabka@suse.cz>
> > Cc: Lokesh Gidra <lokeshgidra@google.com>
> > ---
> >  include/linux/mm.h |  6 ++++--
> >  mm/userfaultfd.c   | 18 +++++++++++++-----
> >  2 files changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 2f805f1a0176..cbb4e3dbbaed 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -747,10 +747,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
> >   * not be used in such cases because it might fail due to mm_lock_seq overflow.
> >   * This functionality is used to obtain vma read lock and drop the mmap read lock.
> >   */
> > -static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
> > +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
> >  {
> >       mmap_assert_locked(vma->vm_mm);
> >       down_read_nested(&vma->vm_lock.lock, subclass);
> > +     return true;
> >  }
> >
> >  /*
> > @@ -759,10 +760,11 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int
> >   * not be used in such cases because it might fail due to mm_lock_seq overflow.
> >   * This functionality is used to obtain vma read lock and drop the mmap read lock.
> >   */
> > -static inline void vma_start_read_locked(struct vm_area_struct *vma)
> > +static inline bool vma_start_read_locked(struct vm_area_struct *vma)
> >  {
> >       mmap_assert_locked(vma->vm_mm);
> >       down_read(&vma->vm_lock.lock);
> > +     return true;
> >  }
> >
> >  static inline void vma_end_read(struct vm_area_struct *vma)
> > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > index 4527c385935b..411a663932c4 100644
> > --- a/mm/userfaultfd.c
> > +++ b/mm/userfaultfd.c
> > @@ -85,7 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
> >       mmap_read_lock(mm);
> >       vma = find_vma_and_prepare_anon(mm, address);
> >       if (!IS_ERR(vma))
> > -             vma_start_read_locked(vma);
> > +             if (!vma_start_read_locked(vma))
> > +                     vma = ERR_PTR(-EAGAIN);
>
> Nit but this kind of reads a bit weirdly now:
>
>         if (!IS_ERR(vma))
>                 if (!vma_start_read_locked(vma))
>                         vma = ERR_PTR(-EAGAIN);
>
> Wouldn't this be nicer as:
>
>         if (!IS_ERR(vma) && !vma_start_read_locked(vma))
>                 vma = ERR_PTR(-EAGAIN);
>
> On the other hand, this embeds an action in an expression, but then it sort of
> still looks weird.
>
>         if (!IS_ERR(vma)) {
>                 bool ok = vma_start_read_locked(vma);
>
>                 if (!ok)
>                         vma = ERR_PTR(-EAGAIN);
>         }
>
> This makes me wonder, now yes, we are truly bikeshedding, sorry, but maybe we
> could just have vma_start_read_locked return a VMA pointer that could be an
> error?
>
> Then this becomes:
>
>         if (!IS_ERR(vma))
>                 vma = vma_start_read_locked(vma);

No, I think it would be wrong for vma_start_read_locked() to always
return EAGAIN when it can't lock the vma. The error code here is
context-dependent, so while EAGAIN is the right thing here, it might
not work for other future users.

>
> >
> >       mmap_read_unlock(mm);
> >       return vma;
> > @@ -1483,10 +1484,17 @@ static int uffd_move_lock(struct mm_struct *mm,
> >       mmap_read_lock(mm);
> >       err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
> >       if (!err) {
> > -             vma_start_read_locked(*dst_vmap);
> > -             if (*dst_vmap != *src_vmap)
> > -                     vma_start_read_locked_nested(*src_vmap,
> > -                                             SINGLE_DEPTH_NESTING);
> > +             if (vma_start_read_locked(*dst_vmap)) {
> > +                     if (*dst_vmap != *src_vmap) {
> > +                             if (!vma_start_read_locked_nested(*src_vmap,
> > +                                                     SINGLE_DEPTH_NESTING)) {
> > +                                     vma_end_read(*dst_vmap);
>
> Hmm, why do we end read if the lock failed here but not above?

We have successfully done vma_start_read_locked(dst_vmap) (we locked
dest vma) but we failed to do vma_start_read_locked_nested(src_vmap)
(we could not lock src vma). So we should undo the dest vma locking.
Does that clarify the logic?

>
> > +                                     err = -EAGAIN;
> > +                             }
> > +                     }
> > +             } else {
> > +                     err = -EAGAIN;
> > +             }
> >       }
>
> This whole block is really ugly now, this really needs refactoring.
>
> How about (on assumption the vma_end_read() is correct):
>
>
>         err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
>         if (err)
>                 goto out;
>
>         if (!vma_start_read_locked(*dst_vmap)) {
>                 err = -EAGAIN;
>                 goto out;
>         }
>
>         /* Nothing further to do. */
>         if (*dst_vmap == *src_vmap)
>                 goto out;
>
>         if (!vma_start_read_locked_nested(*src_vmap,
>                                 SINGLE_DEPTH_NESTING)) {
>                 vma_end_read(*dst_vmap);
>                 err = -EAGAIN;
>         }
>
> out:
>         mmap_read_unlock(mm);
>         return err;
> }

Ok, that looks good to me. Will change this way.
Thanks!

>
> >       mmap_read_unlock(mm);
> >       return err;
> > --
> > 2.47.1.613.gc27f4b7a9f-goog
> >
Re: [PATCH v9 07/17] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail
Posted by Lorenzo Stoakes 1 year ago
On Mon, Jan 13, 2025 at 09:53:01AM -0800, Suren Baghdasaryan wrote:
> On Mon, Jan 13, 2025 at 7:25 AM Lorenzo Stoakes
> <lorenzo.stoakes@oracle.com> wrote:
> >
> > On Fri, Jan 10, 2025 at 08:25:54PM -0800, Suren Baghdasaryan wrote:
> > > With upcoming replacement of vm_lock with vm_refcnt, we need to handle a
> > > possibility of vma_start_read_locked/vma_start_read_locked_nested failing
> > > due to refcount overflow. Prepare for such possibility by changing these
> > > APIs and adjusting their users.
> > >
> > > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > > Acked-by: Vlastimil Babka <vbabka@suse.cz>
> > > Cc: Lokesh Gidra <lokeshgidra@google.com>
> > > ---
> > >  include/linux/mm.h |  6 ++++--
> > >  mm/userfaultfd.c   | 18 +++++++++++++-----
> > >  2 files changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index 2f805f1a0176..cbb4e3dbbaed 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -747,10 +747,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
> > >   * not be used in such cases because it might fail due to mm_lock_seq overflow.
> > >   * This functionality is used to obtain vma read lock and drop the mmap read lock.
> > >   */
> > > -static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
> > > +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
> > >  {
> > >       mmap_assert_locked(vma->vm_mm);
> > >       down_read_nested(&vma->vm_lock.lock, subclass);
> > > +     return true;
> > >  }
> > >
> > >  /*
> > > @@ -759,10 +760,11 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int
> > >   * not be used in such cases because it might fail due to mm_lock_seq overflow.
> > >   * This functionality is used to obtain vma read lock and drop the mmap read lock.
> > >   */
> > > -static inline void vma_start_read_locked(struct vm_area_struct *vma)
> > > +static inline bool vma_start_read_locked(struct vm_area_struct *vma)
> > >  {
> > >       mmap_assert_locked(vma->vm_mm);
> > >       down_read(&vma->vm_lock.lock);
> > > +     return true;
> > >  }
> > >
> > >  static inline void vma_end_read(struct vm_area_struct *vma)
> > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > index 4527c385935b..411a663932c4 100644
> > > --- a/mm/userfaultfd.c
> > > +++ b/mm/userfaultfd.c
> > > @@ -85,7 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
> > >       mmap_read_lock(mm);
> > >       vma = find_vma_and_prepare_anon(mm, address);
> > >       if (!IS_ERR(vma))
> > > -             vma_start_read_locked(vma);
> > > +             if (!vma_start_read_locked(vma))
> > > +                     vma = ERR_PTR(-EAGAIN);
> >
> > Nit but this kind of reads a bit weirdly now:
> >
> >         if (!IS_ERR(vma))
> >                 if (!vma_start_read_locked(vma))
> >                         vma = ERR_PTR(-EAGAIN);
> >
> > Wouldn't this be nicer as:
> >
> >         if (!IS_ERR(vma) && !vma_start_read_locked(vma))
> >                 vma = ERR_PTR(-EAGAIN);
> >
> > On the other hand, this embeds an action in an expression, but then it sort of
> > still looks weird.
> >
> >         if (!IS_ERR(vma)) {
> >                 bool ok = vma_start_read_locked(vma);
> >
> >                 if (!ok)
> >                         vma = ERR_PTR(-EAGAIN);
> >         }
> >
> > This makes me wonder, now yes, we are truly bikeshedding, sorry, but maybe we
> > could just have vma_start_read_locked return a VMA pointer that could be an
> > error?
> >
> > Then this becomes:
> >
> >         if (!IS_ERR(vma))
> >                 vma = vma_start_read_locked(vma);
>
> No, I think it would be wrong for vma_start_read_locked() to always
> return EAGAIN when it can't lock the vma. The error code here is
> context-dependent, so while EAGAIN is the right thing here, it might
> not work for other future users.

Ack, makes sense.

But it'd be nice to clean this up so it isn't this arrow-shaped-code
thing. I mean obviously this is subjective and sorry to bikeshed this late
in a series... but :)

Are you with:

	if (!IS_ERR(vma)) {
		bool ok = vma_start_read_locked(vma);

		if (!ok)
			vma = ERR_PTR(-EAGAIN);
	}

?

I think this reads better.

Sorry to be a pain! :)

>
> >
> > >
> > >       mmap_read_unlock(mm);
> > >       return vma;
> > > @@ -1483,10 +1484,17 @@ static int uffd_move_lock(struct mm_struct *mm,
> > >       mmap_read_lock(mm);
> > >       err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
> > >       if (!err) {
> > > -             vma_start_read_locked(*dst_vmap);
> > > -             if (*dst_vmap != *src_vmap)
> > > -                     vma_start_read_locked_nested(*src_vmap,
> > > -                                             SINGLE_DEPTH_NESTING);
> > > +             if (vma_start_read_locked(*dst_vmap)) {
> > > +                     if (*dst_vmap != *src_vmap) {
> > > +                             if (!vma_start_read_locked_nested(*src_vmap,
> > > +                                                     SINGLE_DEPTH_NESTING)) {
> > > +                                     vma_end_read(*dst_vmap);
> >
> > Hmm, why do we end read if the lock failed here but not above?
>
> We have successfully done vma_start_read_locked(dst_vmap) (we locked
> dest vma) but we failed to do vma_start_read_locked_nested(src_vmap)
> (we could not lock src vma). So we should undo the dest vma locking.
> Does that clarify the logic?

Ahh right makes sense. Maybe a quick cheeky comment to that effect here too?

>
> >
> > > +                                     err = -EAGAIN;
> > > +                             }
> > > +                     }
> > > +             } else {
> > > +                     err = -EAGAIN;
> > > +             }
> > >       }
> >
> > This whole block is really ugly now, this really needs refactoring.
> >
> > How about (on assumption the vma_end_read() is correct):
> >
> >
> >         err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
> >         if (err)
> >                 goto out;
> >
> >         if (!vma_start_read_locked(*dst_vmap)) {
> >                 err = -EAGAIN;
> >                 goto out;
> >         }
> >
> >         /* Nothing further to do. */
> >         if (*dst_vmap == *src_vmap)
> >                 goto out;
> >
> >         if (!vma_start_read_locked_nested(*src_vmap,
> >                                 SINGLE_DEPTH_NESTING)) {
> >                 vma_end_read(*dst_vmap);
> >                 err = -EAGAIN;
> >         }
> >
> > out:
> >         mmap_read_unlock(mm);
> >         return err;
> > }
>
> Ok, that looks good to me. Will change this way.
> Thanks!
>

Thanks!

> >
> > >       mmap_read_unlock(mm);
> > >       return err;
> > > --
> > > 2.47.1.613.gc27f4b7a9f-goog
> > >