[PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state

Lorenzo Stoakes posted 5 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Lorenzo Stoakes 3 months, 3 weeks ago
Now we have the madvise_behavior helper struct we no longer need to mess
around with void* pointers in order to propagate anon_vma_name, and this
means we can get rid of the confusing and inconsistent visitor pattern
implementation in madvise_vma_anon_name().

This means we now have a single state object that threads through most of
madvise()'s logic and a single code path which executes the majority of
madvise() behaviour (we maintain separate logic for failure injection and
memory population for the time being).

Note that users cannot inadvertently cause this behaviour to occur, as
madvise_behavior_valid() would reject it.

Doing this results in a can_modify_vma_madv() check for anonymous VMA name
changes, however this will cause no issues as this operation is not
prohibited.

We can also then reuse more code and drop the redundant
madvise_vma_anon_name() function altogether.

Additionally separate out behaviours that update VMAs from those that do
not.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/madvise.c | 167 +++++++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 77 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 070132f9842b..9dd935d64692 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -37,6 +37,9 @@
 #include "internal.h"
 #include "swap.h"
 
+#define __MADV_SET_ANON_VMA_NAME (-1)
+#define __MADV_CLEAR_ANON_VMA_NAME (-2)
+
 /*
  * Maximum number of attempts we make to install guard pages before we give up
  * and return -ERESTARTNOINTR to have userspace try again.
@@ -59,9 +62,13 @@ struct madvise_behavior {
 	int behavior;
 	struct mmu_gather *tlb;
 	enum madvise_lock_mode lock_mode;
+	struct anon_vma_name *anon_name;
 };
 
 #ifdef CONFIG_ANON_VMA_NAME
+static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
+		unsigned long end, struct madvise_behavior *madv_behavior);
+
 struct anon_vma_name *anon_vma_name_alloc(const char *name)
 {
 	struct anon_vma_name *anon_name;
@@ -112,6 +119,48 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
 
 	return 0;
 }
+
+int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+			  unsigned long len_in, struct anon_vma_name *anon_name)
+{
+	unsigned long end;
+	unsigned long len;
+	struct madvise_behavior madv_behavior = {
+		.lock_mode = MADVISE_MMAP_WRITE_LOCK,
+		.anon_name = anon_name,
+	};
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (len_in && !len)
+		return -EINVAL;
+
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+
+	if (end == start)
+		return 0;
+
+	madv_behavior.behavior =
+		anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;
+
+	return madvise_walk_vmas(mm, start, end, &madv_behavior);
+}
+
+static bool is_anon_vma_name(int behavior)
+{
+	switch (behavior) {
+	case __MADV_SET_ANON_VMA_NAME:
+	case __MADV_CLEAR_ANON_VMA_NAME:
+		return true;
+	default:
+		return false;
+	}
+}
 #else /* CONFIG_ANON_VMA_NAME */
 static int replace_anon_vma_name(struct vm_area_struct *vma,
 				 struct anon_vma_name *anon_name)
@@ -121,6 +170,11 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
 
 	return 0;
 }
+
+static bool is_anon_vma_name(int behavior)
+{
+	return false;
+}
 #endif /* CONFIG_ANON_VMA_NAME */
 /*
  * Update the vm_flags on region of a vma, splitting it or merging it as
@@ -1252,13 +1306,12 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
 static int madvise_vma_behavior(struct vm_area_struct *vma,
 				struct vm_area_struct **prev,
 				unsigned long start, unsigned long end,
-				void *behavior_arg)
+				struct madvise_behavior *madv_behavior)
 {
-	struct madvise_behavior *arg = behavior_arg;
-	int behavior = arg->behavior;
-	int error;
-	struct anon_vma_name *anon_name;
+	int behavior = madv_behavior->behavior;
+	struct anon_vma_name *anon_name = madv_behavior->anon_name;
 	vm_flags_t new_flags = vma->vm_flags;
+	int error;
 
 	if (unlikely(!can_modify_vma_madv(vma, behavior)))
 		return -EPERM;
@@ -1275,7 +1328,17 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
 	case MADV_FREE:
 	case MADV_DONTNEED:
 	case MADV_DONTNEED_LOCKED:
-		return madvise_dontneed_free(vma, prev, start, end, arg);
+		return madvise_dontneed_free(vma, prev, start, end,
+					     madv_behavior);
+	case MADV_COLLAPSE:
+		return madvise_collapse(vma, prev, start, end);
+	case MADV_GUARD_INSTALL:
+		return madvise_guard_install(vma, prev, start, end);
+	case MADV_GUARD_REMOVE:
+		return madvise_guard_remove(vma, prev, start, end);
+
+	/* The below behaviours update VMAs via madvise_update_vma(). */
+
 	case MADV_NORMAL:
 		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
 		break;
@@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
 		if (error)
 			goto out;
 		break;
-	case MADV_COLLAPSE:
-		return madvise_collapse(vma, prev, start, end);
-	case MADV_GUARD_INSTALL:
-		return madvise_guard_install(vma, prev, start, end);
-	case MADV_GUARD_REMOVE:
-		return madvise_guard_remove(vma, prev, start, end);
+	case __MADV_SET_ANON_VMA_NAME:
+	case __MADV_CLEAR_ANON_VMA_NAME:
+		/* Only anonymous mappings can be named */
+		if (vma->vm_file && !vma_is_anon_shmem(vma))
+			return -EBADF;
+		break;
 	}
 
 	/* We cannot provide prev in this lock mode. */
-	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
-	anon_name = anon_vma_name(vma);
-	anon_vma_name_get(anon_name);
+	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
+
+	if (!is_anon_vma_name(behavior)) {
+		anon_name = anon_vma_name(vma);
+		anon_vma_name_get(anon_name);
+	}
 	error = madvise_update_vma(vma, prev, start, end, new_flags,
 				   anon_name);
-	anon_vma_name_put(anon_name);
+	if (!is_anon_vma_name(behavior))
+		anon_vma_name_put(anon_name);
 
 out:
 	/*
@@ -1532,11 +1599,7 @@ static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm,
  */
 static
 int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
-		      unsigned long end, struct madvise_behavior *madv_behavior,
-		      void *arg,
-		      int (*visit)(struct vm_area_struct *vma,
-				   struct vm_area_struct **prev, unsigned long start,
-				   unsigned long end, void *arg))
+		      unsigned long end, struct madvise_behavior *madv_behavior)
 {
 	struct vm_area_struct *vma;
 	struct vm_area_struct *prev;
@@ -1548,11 +1611,12 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
 	 * If VMA read lock is supported, apply madvise to a single VMA
 	 * tentatively, avoiding walking VMAs.
 	 */
-	if (madv_behavior && madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK) {
+	if (madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK) {
 		vma = try_vma_read_lock(mm, madv_behavior, start, end);
 		if (vma) {
 			prev = vma;
-			error = visit(vma, &prev, start, end, arg);
+			error = madvise_vma_behavior(vma, &prev, start, end,
+						     madv_behavior);
 			vma_end_read(vma);
 			return error;
 		}
@@ -1586,7 +1650,8 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
 			tmp = end;
 
 		/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
-		error = visit(vma, &prev, start, tmp, arg);
+		error = madvise_vma_behavior(vma, &prev, start, tmp,
+					     madv_behavior);
 		if (error)
 			return error;
 		start = tmp;
@@ -1603,57 +1668,6 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
 	return unmapped_error;
 }
 
-#ifdef CONFIG_ANON_VMA_NAME
-static int madvise_vma_anon_name(struct vm_area_struct *vma,
-				 struct vm_area_struct **prev,
-				 unsigned long start, unsigned long end,
-				 void *anon_name)
-{
-	int error;
-
-	/* Only anonymous mappings can be named */
-	if (vma->vm_file && !vma_is_anon_shmem(vma))
-		return -EBADF;
-
-	error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
-				   anon_name);
-
-	/*
-	 * madvise() returns EAGAIN if kernel resources, such as
-	 * slab, are temporarily unavailable.
-	 */
-	if (error == -ENOMEM)
-		error = -EAGAIN;
-	return error;
-}
-
-int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
-			  unsigned long len_in, struct anon_vma_name *anon_name)
-{
-	unsigned long end;
-	unsigned long len;
-
-	if (start & ~PAGE_MASK)
-		return -EINVAL;
-	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
-
-	/* Check to see whether len was rounded up from small -ve to zero */
-	if (len_in && !len)
-		return -EINVAL;
-
-	end = start + len;
-	if (end < start)
-		return -EINVAL;
-
-	if (end == start)
-		return 0;
-
-	return madvise_walk_vmas(mm, start, end, NULL, anon_name,
-				 madvise_vma_anon_name);
-}
-#endif /* CONFIG_ANON_VMA_NAME */
-
-
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
  * take mmap_lock for writing. Others, which simply traverse vmas, need
@@ -1845,8 +1859,7 @@ static int madvise_do_behavior(struct mm_struct *mm,
 	if (is_madvise_populate(behavior))
 		error = madvise_populate(mm, start, end, behavior);
 	else
-		error = madvise_walk_vmas(mm, start, end, madv_behavior,
-				madv_behavior, madvise_vma_behavior);
+		error = madvise_walk_vmas(mm, start, end, madv_behavior);
 	blk_finish_plug(&plug);
 	return error;
 }
-- 
2.49.0
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Vlastimil Babka 3 months, 2 weeks ago
On 6/19/25 22:26, Lorenzo Stoakes wrote:
> Now we have the madvise_behavior helper struct we no longer need to mess
> around with void* pointers in order to propagate anon_vma_name, and this
> means we can get rid of the confusing and inconsistent visitor pattern
> implementation in madvise_vma_anon_name().
> 
> This means we now have a single state object that threads through most of
> madvise()'s logic and a single code path which executes the majority of
> madvise() behaviour (we maintain separate logic for failure injection and
> memory population for the time being).
> 
> Note that users cannot inadvertently cause this behaviour to occur, as
> madvise_behavior_valid() would reject it.

This paragraph is a bit confusing. I've inferred from the code you're
talking about the new internal negative values, but the preceding paragraphs
don't mention them. Could you explain in more detail what the patch does?
I.e. adding the new struct madvise_behavior field and the new behavior value(s).

> Doing this results in a can_modify_vma_madv() check for anonymous VMA name
> changes, however this will cause no issues as this operation is not
> prohibited.
> 
> We can also then reuse more code and drop the redundant
> madvise_vma_anon_name() function altogether.
> 
> Additionally separate out behaviours that update VMAs from those that do
> not.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

> @@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>  		if (error)
>  			goto out;
>  		break;
> -	case MADV_COLLAPSE:
> -		return madvise_collapse(vma, prev, start, end);
> -	case MADV_GUARD_INSTALL:
> -		return madvise_guard_install(vma, prev, start, end);
> -	case MADV_GUARD_REMOVE:
> -		return madvise_guard_remove(vma, prev, start, end);
> +	case __MADV_SET_ANON_VMA_NAME:
> +	case __MADV_CLEAR_ANON_VMA_NAME:
> +		/* Only anonymous mappings can be named */
> +		if (vma->vm_file && !vma_is_anon_shmem(vma))
> +			return -EBADF;
> +		break;
>  	}
>  
>  	/* We cannot provide prev in this lock mode. */
> -	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
> -	anon_name = anon_vma_name(vma);
> -	anon_vma_name_get(anon_name);
> +	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
> +
> +	if (!is_anon_vma_name(behavior)) {
> +		anon_name = anon_vma_name(vma);
> +		anon_vma_name_get(anon_name);
> +	}
>  	error = madvise_update_vma(vma, prev, start, end, new_flags,
>  				   anon_name);
> -	anon_vma_name_put(anon_name);
> +	if (!is_anon_vma_name(behavior))
> +		anon_vma_name_put(anon_name);

This is not new, but the refactoring made it very visible that we're doing
get/put on anon_name exactly in cases where we're not messing with anon_name
so it might look buggy. Some explanatory comment would be thus nice,
otherwise people need to git blame for commit 942341dcc5748.

Otherwise LGTM, will wait with tag for v2 as you replied elsewhere there
will be changes. Thanks!
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Lorenzo Stoakes 3 months, 2 weeks ago
On Fri, Jun 20, 2025 at 03:05:02PM +0200, Vlastimil Babka wrote:
> On 6/19/25 22:26, Lorenzo Stoakes wrote:
> > Now we have the madvise_behavior helper struct we no longer need to mess
> > around with void* pointers in order to propagate anon_vma_name, and this
> > means we can get rid of the confusing and inconsistent visitor pattern
> > implementation in madvise_vma_anon_name().
> >
> > This means we now have a single state object that threads through most of
> > madvise()'s logic and a single code path which executes the majority of
> > madvise() behaviour (we maintain separate logic for failure injection and
> > memory population for the time being).
> >
> > Note that users cannot inadvertently cause this behaviour to occur, as
> > madvise_behavior_valid() would reject it.
>
> This paragraph is a bit confusing. I've inferred from the code you're
> talking about the new internal negative values, but the preceding paragraphs
> don't mention them. Could you explain in more detail what the patch does?
> I.e. adding the new struct madvise_behavior field and the new behavior value(s).

Sure will update on respin.

>
> > Doing this results in a can_modify_vma_madv() check for anonymous VMA name
> > changes, however this will cause no issues as this operation is not
> > prohibited.
> >
> > We can also then reuse more code and drop the redundant
> > madvise_vma_anon_name() function altogether.
> >
> > Additionally separate out behaviours that update VMAs from those that do
> > not.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> > @@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
> >  		if (error)
> >  			goto out;
> >  		break;
> > -	case MADV_COLLAPSE:
> > -		return madvise_collapse(vma, prev, start, end);
> > -	case MADV_GUARD_INSTALL:
> > -		return madvise_guard_install(vma, prev, start, end);
> > -	case MADV_GUARD_REMOVE:
> > -		return madvise_guard_remove(vma, prev, start, end);
> > +	case __MADV_SET_ANON_VMA_NAME:
> > +	case __MADV_CLEAR_ANON_VMA_NAME:
> > +		/* Only anonymous mappings can be named */
> > +		if (vma->vm_file && !vma_is_anon_shmem(vma))
> > +			return -EBADF;
> > +		break;
> >  	}
> >
> >  	/* We cannot provide prev in this lock mode. */
> > -	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
> > -	anon_name = anon_vma_name(vma);
> > -	anon_vma_name_get(anon_name);
> > +	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
> > +
> > +	if (!is_anon_vma_name(behavior)) {
> > +		anon_name = anon_vma_name(vma);
> > +		anon_vma_name_get(anon_name);
> > +	}
> >  	error = madvise_update_vma(vma, prev, start, end, new_flags,
> >  				   anon_name);
> > -	anon_vma_name_put(anon_name);
> > +	if (!is_anon_vma_name(behavior))
> > +		anon_vma_name_put(anon_name);
>
> This is not new, but the refactoring made it very visible that we're doing
> get/put on anon_name exactly in cases where we're not messing with anon_name
> so it might look buggy. Some explanatory comment would be thus nice,
> otherwise people need to git blame for commit 942341dcc5748.

Yeah I was confused myself until you mentioned that commit and - of course -
it's because of merge :P which maybe I should have figured out right away but
there we are :>)

So for my own sake as well as others I will add on respin.

>
> Otherwise LGTM, will wait with tag for v2 as you replied elsewhere there
> will be changes. Thanks!
>

Thanks!
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Zi Yan 3 months, 3 weeks ago
On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote:

> Now we have the madvise_behavior helper struct we no longer need to mess
> around with void* pointers in order to propagate anon_vma_name, and this
> means we can get rid of the confusing and inconsistent visitor pattern
> implementation in madvise_vma_anon_name().
>
> This means we now have a single state object that threads through most of
> madvise()'s logic and a single code path which executes the majority of
> madvise() behaviour (we maintain separate logic for failure injection and
> memory population for the time being).
>
> Note that users cannot inadvertently cause this behaviour to occur, as
> madvise_behavior_valid() would reject it.
>
> Doing this results in a can_modify_vma_madv() check for anonymous VMA name
> changes, however this will cause no issues as this operation is not
> prohibited.
>
> We can also then reuse more code and drop the redundant
> madvise_vma_anon_name() function altogether.
>
> Additionally separate out behaviours that update VMAs from those that do
> not.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  mm/madvise.c | 167 +++++++++++++++++++++++++++------------------------
>  1 file changed, 90 insertions(+), 77 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 070132f9842b..9dd935d64692 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -37,6 +37,9 @@
>  #include "internal.h"
>  #include "swap.h"
>
> +#define __MADV_SET_ANON_VMA_NAME (-1)
> +#define __MADV_CLEAR_ANON_VMA_NAME (-2)
> +

These are stored in madvise_behavior.behavior field and used
internally. At least you could add a comment in mman-common.h
about this use, in case someone uses these values for MADV_*.
Yes, these are large values that are very unlikely to be used,
but who knows whether one will use them. :)

>  /*
>   * Maximum number of attempts we make to install guard pages before we give up
>   * and return -ERESTARTNOINTR to have userspace try again.
> @@ -59,9 +62,13 @@ struct madvise_behavior {
>  	int behavior;
>  	struct mmu_gather *tlb;
>  	enum madvise_lock_mode lock_mode;
> +	struct anon_vma_name *anon_name;
>  };
>
>  #ifdef CONFIG_ANON_VMA_NAME
> +static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
> +		unsigned long end, struct madvise_behavior *madv_behavior);
> +
>  struct anon_vma_name *anon_vma_name_alloc(const char *name)
>  {
>  	struct anon_vma_name *anon_name;
> @@ -112,6 +119,48 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
>
>  	return 0;
>  }
> +
> +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> +			  unsigned long len_in, struct anon_vma_name *anon_name)
> +{
> +	unsigned long end;
> +	unsigned long len;
> +	struct madvise_behavior madv_behavior = {
> +		.lock_mode = MADVISE_MMAP_WRITE_LOCK,
> +		.anon_name = anon_name,
> +	};
> +
> +	if (start & ~PAGE_MASK)
> +		return -EINVAL;
> +	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
> +
> +	/* Check to see whether len was rounded up from small -ve to zero */
> +	if (len_in && !len)
> +		return -EINVAL;
> +
> +	end = start + len;
> +	if (end < start)
> +		return -EINVAL;
> +
> +	if (end == start)
> +		return 0;
> +
> +	madv_behavior.behavior =
> +		anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;

How are __MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME used?
It seems to me that madvise_vma_behavior() treats them the same.
Why does anon_name is NULL or not make a difference?

> +
> +	return madvise_walk_vmas(mm, start, end, &madv_behavior);
> +}
> +
> +static bool is_anon_vma_name(int behavior)

Maybe update_anon_vma_name()? Otherwise the function reads like
the behavior can be anon_vma_name.

> +{
> +	switch (behavior) {
> +	case __MADV_SET_ANON_VMA_NAME:
> +	case __MADV_CLEAR_ANON_VMA_NAME:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
>  #else /* CONFIG_ANON_VMA_NAME */
>  static int replace_anon_vma_name(struct vm_area_struct *vma,
>  				 struct anon_vma_name *anon_name)
> @@ -121,6 +170,11 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
>
>  	return 0;
>  }
> +
> +static bool is_anon_vma_name(int behavior)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_ANON_VMA_NAME */
>  /*
>   * Update the vm_flags on region of a vma, splitting it or merging it as
> @@ -1252,13 +1306,12 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
>  static int madvise_vma_behavior(struct vm_area_struct *vma,
>  				struct vm_area_struct **prev,
>  				unsigned long start, unsigned long end,
> -				void *behavior_arg)
> +				struct madvise_behavior *madv_behavior)
>  {
> -	struct madvise_behavior *arg = behavior_arg;
> -	int behavior = arg->behavior;
> -	int error;
> -	struct anon_vma_name *anon_name;
> +	int behavior = madv_behavior->behavior;
> +	struct anon_vma_name *anon_name = madv_behavior->anon_name;
>  	vm_flags_t new_flags = vma->vm_flags;
> +	int error;
>
>  	if (unlikely(!can_modify_vma_madv(vma, behavior)))
>  		return -EPERM;
> @@ -1275,7 +1328,17 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>  	case MADV_FREE:
>  	case MADV_DONTNEED:
>  	case MADV_DONTNEED_LOCKED:
> -		return madvise_dontneed_free(vma, prev, start, end, arg);
> +		return madvise_dontneed_free(vma, prev, start, end,
> +					     madv_behavior);
> +	case MADV_COLLAPSE:
> +		return madvise_collapse(vma, prev, start, end);
> +	case MADV_GUARD_INSTALL:
> +		return madvise_guard_install(vma, prev, start, end);
> +	case MADV_GUARD_REMOVE:
> +		return madvise_guard_remove(vma, prev, start, end);
> +
> +	/* The below behaviours update VMAs via madvise_update_vma(). */
> +

Great comment and code move!

>  	case MADV_NORMAL:
>  		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
>  		break;
> @@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>  		if (error)
>  			goto out;
>  		break;
> -	case MADV_COLLAPSE:
> -		return madvise_collapse(vma, prev, start, end);
> -	case MADV_GUARD_INSTALL:
> -		return madvise_guard_install(vma, prev, start, end);
> -	case MADV_GUARD_REMOVE:
> -		return madvise_guard_remove(vma, prev, start, end);
> +	case __MADV_SET_ANON_VMA_NAME:
> +	case __MADV_CLEAR_ANON_VMA_NAME:
> +		/* Only anonymous mappings can be named */
> +		if (vma->vm_file && !vma_is_anon_shmem(vma))
> +			return -EBADF;
> +		break;
>  	}

__MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME are
used here the code below. I do not see the functional difference
of them. I understand a NULL anon_name means clear the name,
but it is also just set the name to NULL.

>
>  	/* We cannot provide prev in this lock mode. */
> -	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
> -	anon_name = anon_vma_name(vma);
> -	anon_vma_name_get(anon_name);
> +	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
> +
> +	if (!is_anon_vma_name(behavior)) {
> +		anon_name = anon_vma_name(vma);
> +		anon_vma_name_get(anon_name);
> +	}
>  	error = madvise_update_vma(vma, prev, start, end, new_flags,
>  				   anon_name);
> -	anon_vma_name_put(anon_name);
> +	if (!is_anon_vma_name(behavior))
> +		anon_vma_name_put(anon_name);

Otherwise, the rest looks good to me.

--
Best Regards,
Yan, Zi
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Lorenzo Stoakes 3 months, 2 weeks ago
On Thu, Jun 19, 2025 at 09:32:43PM -0400, Zi Yan wrote:
> On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote:
>
> > Now we have the madvise_behavior helper struct we no longer need to mess
> > around with void* pointers in order to propagate anon_vma_name, and this
> > means we can get rid of the confusing and inconsistent visitor pattern
> > implementation in madvise_vma_anon_name().
> >
> > This means we now have a single state object that threads through most of
> > madvise()'s logic and a single code path which executes the majority of
> > madvise() behaviour (we maintain separate logic for failure injection and
> > memory population for the time being).
> >
> > Note that users cannot inadvertently cause this behaviour to occur, as
> > madvise_behavior_valid() would reject it.
> >
> > Doing this results in a can_modify_vma_madv() check for anonymous VMA name
> > changes, however this will cause no issues as this operation is not
> > prohibited.
> >
> > We can also then reuse more code and drop the redundant
> > madvise_vma_anon_name() function altogether.
> >
> > Additionally separate out behaviours that update VMAs from those that do
> > not.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> >  mm/madvise.c | 167 +++++++++++++++++++++++++++------------------------
> >  1 file changed, 90 insertions(+), 77 deletions(-)
> >
> > diff --git a/mm/madvise.c b/mm/madvise.c
> > index 070132f9842b..9dd935d64692 100644
> > --- a/mm/madvise.c
> > +++ b/mm/madvise.c
> > @@ -37,6 +37,9 @@
> >  #include "internal.h"
> >  #include "swap.h"
> >
> > +#define __MADV_SET_ANON_VMA_NAME (-1)
> > +#define __MADV_CLEAR_ANON_VMA_NAME (-2)
> > +
>
> These are stored in madvise_behavior.behavior field and used
> internally. At least you could add a comment in mman-common.h
> about this use, in case someone uses these values for MADV_*.
> Yes, these are large values that are very unlikely to be used,
> but who knows whether one will use them. :)

These are signed integers, so somebody would have to overflow >2.14bn, I don't
think this is really worth commenting there without adding confusion, and at any
rate, even if somebody did do that, madvise_behavior_valid() would catch it :)

>
> >  /*
> >   * Maximum number of attempts we make to install guard pages before we give up
> >   * and return -ERESTARTNOINTR to have userspace try again.
> > @@ -59,9 +62,13 @@ struct madvise_behavior {
> >  	int behavior;
> >  	struct mmu_gather *tlb;
> >  	enum madvise_lock_mode lock_mode;
> > +	struct anon_vma_name *anon_name;
> >  };
> >
> >  #ifdef CONFIG_ANON_VMA_NAME
> > +static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
> > +		unsigned long end, struct madvise_behavior *madv_behavior);
> > +
> >  struct anon_vma_name *anon_vma_name_alloc(const char *name)
> >  {
> >  	struct anon_vma_name *anon_name;
> > @@ -112,6 +119,48 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
> >
> >  	return 0;
> >  }
> > +
> > +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> > +			  unsigned long len_in, struct anon_vma_name *anon_name)
> > +{
> > +	unsigned long end;
> > +	unsigned long len;
> > +	struct madvise_behavior madv_behavior = {
> > +		.lock_mode = MADVISE_MMAP_WRITE_LOCK,
> > +		.anon_name = anon_name,
> > +	};
> > +
> > +	if (start & ~PAGE_MASK)
> > +		return -EINVAL;
> > +	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
> > +
> > +	/* Check to see whether len was rounded up from small -ve to zero */
> > +	if (len_in && !len)
> > +		return -EINVAL;
> > +
> > +	end = start + len;
> > +	if (end < start)
> > +		return -EINVAL;
> > +
> > +	if (end == start)
> > +		return 0;
> > +
> > +	madv_behavior.behavior =
> > +		anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;
>
> How are __MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME used?
> It seems to me that madvise_vma_behavior() treats them the same.
> Why does anon_name is NULL or not make a difference?

Yeah you're right, I separated out because at one point I thought I needed to to
correctly handle the update vs. other updates but that's not actually the case,
will change.

>
> > +
> > +	return madvise_walk_vmas(mm, start, end, &madv_behavior);
> > +}
> > +
> > +static bool is_anon_vma_name(int behavior)
>
> Maybe update_anon_vma_name()? Otherwise the function reads like
> the behavior can be anon_vma_name.

Ack will change to is_set_anon_vma_name() or something like this to make
clearer.

>
> > +{
> > +	switch (behavior) {
> > +	case __MADV_SET_ANON_VMA_NAME:
> > +	case __MADV_CLEAR_ANON_VMA_NAME:
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}
> >  #else /* CONFIG_ANON_VMA_NAME */
> >  static int replace_anon_vma_name(struct vm_area_struct *vma,
> >  				 struct anon_vma_name *anon_name)
> > @@ -121,6 +170,11 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
> >
> >  	return 0;
> >  }
> > +
> > +static bool is_anon_vma_name(int behavior)
> > +{
> > +	return false;
> > +}
> >  #endif /* CONFIG_ANON_VMA_NAME */
> >  /*
> >   * Update the vm_flags on region of a vma, splitting it or merging it as
> > @@ -1252,13 +1306,12 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
> >  static int madvise_vma_behavior(struct vm_area_struct *vma,
> >  				struct vm_area_struct **prev,
> >  				unsigned long start, unsigned long end,
> > -				void *behavior_arg)
> > +				struct madvise_behavior *madv_behavior)
> >  {
> > -	struct madvise_behavior *arg = behavior_arg;
> > -	int behavior = arg->behavior;
> > -	int error;
> > -	struct anon_vma_name *anon_name;
> > +	int behavior = madv_behavior->behavior;
> > +	struct anon_vma_name *anon_name = madv_behavior->anon_name;
> >  	vm_flags_t new_flags = vma->vm_flags;
> > +	int error;
> >
> >  	if (unlikely(!can_modify_vma_madv(vma, behavior)))
> >  		return -EPERM;
> > @@ -1275,7 +1328,17 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
> >  	case MADV_FREE:
> >  	case MADV_DONTNEED:
> >  	case MADV_DONTNEED_LOCKED:
> > -		return madvise_dontneed_free(vma, prev, start, end, arg);
> > +		return madvise_dontneed_free(vma, prev, start, end,
> > +					     madv_behavior);
> > +	case MADV_COLLAPSE:
> > +		return madvise_collapse(vma, prev, start, end);
> > +	case MADV_GUARD_INSTALL:
> > +		return madvise_guard_install(vma, prev, start, end);
> > +	case MADV_GUARD_REMOVE:
> > +		return madvise_guard_remove(vma, prev, start, end);
> > +
> > +	/* The below behaviours update VMAs via madvise_update_vma(). */
> > +
>
> Great comment and code move!

Thanks!

>
> >  	case MADV_NORMAL:
> >  		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
> >  		break;
> > @@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
> >  		if (error)
> >  			goto out;
> >  		break;
> > -	case MADV_COLLAPSE:
> > -		return madvise_collapse(vma, prev, start, end);
> > -	case MADV_GUARD_INSTALL:
> > -		return madvise_guard_install(vma, prev, start, end);
> > -	case MADV_GUARD_REMOVE:
> > -		return madvise_guard_remove(vma, prev, start, end);
> > +	case __MADV_SET_ANON_VMA_NAME:
> > +	case __MADV_CLEAR_ANON_VMA_NAME:
> > +		/* Only anonymous mappings can be named */
> > +		if (vma->vm_file && !vma_is_anon_shmem(vma))
> > +			return -EBADF;
> > +		break;
> >  	}
>
> __MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME are
> used here the code below. I do not see the functional difference
> of them. I understand a NULL anon_name means clear the name,
> but it is also just set the name to NULL.

Yeah you're right this was my mistake :) I had previously felt I should
differentiate as previously the code below was

if (madv_behavior->anon_name) {  ...

Which would have become:

if (behavior != __MADV_CLEAR_ANON_VMA_NAME && madv_behavior->anon_name) {  ...

But then realised we can just check the behavior flag and anyway avoid the
unnecessary get/put by doing this.

Will fixup.

>
> >
> >  	/* We cannot provide prev in this lock mode. */
> > -	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
> > -	anon_name = anon_vma_name(vma);
> > -	anon_vma_name_get(anon_name);
> > +	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
> > +
> > +	if (!is_anon_vma_name(behavior)) {
> > +		anon_name = anon_vma_name(vma);
> > +		anon_vma_name_get(anon_name);
> > +	}
> >  	error = madvise_update_vma(vma, prev, start, end, new_flags,
> >  				   anon_name);
> > -	anon_vma_name_put(anon_name);
> > +	if (!is_anon_vma_name(behavior))
> > +		anon_vma_name_put(anon_name);
>
> Otherwise, the rest looks good to me.

Thanks!

>
> --
> Best Regards,
> Yan, Zi
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Lance Yang 3 months, 3 weeks ago

On 2025/6/20 09:32, Zi Yan wrote:
> On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote:
> 
>> Now we have the madvise_behavior helper struct we no longer need to mess
>> around with void* pointers in order to propagate anon_vma_name, and this
>> means we can get rid of the confusing and inconsistent visitor pattern
>> implementation in madvise_vma_anon_name().
>>
>> This means we now have a single state object that threads through most of
>> madvise()'s logic and a single code path which executes the majority of
>> madvise() behaviour (we maintain separate logic for failure injection and
>> memory population for the time being).
>>
>> Note that users cannot inadvertently cause this behaviour to occur, as
>> madvise_behavior_valid() would reject it.
>>
>> Doing this results in a can_modify_vma_madv() check for anonymous VMA name
>> changes, however this will cause no issues as this operation is not
>> prohibited.
>>
>> We can also then reuse more code and drop the redundant
>> madvise_vma_anon_name() function altogether.
>>
>> Additionally separate out behaviours that update VMAs from those that do
>> not.
>>
>> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> ---
>>   mm/madvise.c | 167 +++++++++++++++++++++++++++------------------------
>>   1 file changed, 90 insertions(+), 77 deletions(-)
>>
>> diff --git a/mm/madvise.c b/mm/madvise.c
>> index 070132f9842b..9dd935d64692 100644
>> --- a/mm/madvise.c
>> +++ b/mm/madvise.c
>> @@ -37,6 +37,9 @@
>>   #include "internal.h"
>>   #include "swap.h"
>>
>> +#define __MADV_SET_ANON_VMA_NAME (-1)
>> +#define __MADV_CLEAR_ANON_VMA_NAME (-2)
>> +
> 
> These are stored in madvise_behavior.behavior field and used
> internally. At least you could add a comment in mman-common.h
> about this use, in case someone uses these values for MADV_*.
> Yes, these are large values that are very unlikely to be used,
> but who knows whether one will use them. :)
> 
>>   /*
>>    * Maximum number of attempts we make to install guard pages before we give up
>>    * and return -ERESTARTNOINTR to have userspace try again.
>> @@ -59,9 +62,13 @@ struct madvise_behavior {
>>   	int behavior;
>>   	struct mmu_gather *tlb;
>>   	enum madvise_lock_mode lock_mode;
>> +	struct anon_vma_name *anon_name;
>>   };
>>
>>   #ifdef CONFIG_ANON_VMA_NAME
>> +static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
>> +		unsigned long end, struct madvise_behavior *madv_behavior);
>> +
>>   struct anon_vma_name *anon_vma_name_alloc(const char *name)
>>   {
>>   	struct anon_vma_name *anon_name;
>> @@ -112,6 +119,48 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
>>
>>   	return 0;
>>   }
>> +
>> +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
>> +			  unsigned long len_in, struct anon_vma_name *anon_name)
>> +{
>> +	unsigned long end;
>> +	unsigned long len;
>> +	struct madvise_behavior madv_behavior = {
>> +		.lock_mode = MADVISE_MMAP_WRITE_LOCK,
>> +		.anon_name = anon_name,
>> +	};
>> +
>> +	if (start & ~PAGE_MASK)
>> +		return -EINVAL;
>> +	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
>> +
>> +	/* Check to see whether len was rounded up from small -ve to zero */
>> +	if (len_in && !len)
>> +		return -EINVAL;
>> +
>> +	end = start + len;
>> +	if (end < start)
>> +		return -EINVAL;
>> +
>> +	if (end == start)
>> +		return 0;
>> +
>> +	madv_behavior.behavior =
>> +		anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;
> 
> How are __MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME used?
> It seems to me that madvise_vma_behavior() treats them the same.
> Why does anon_name is NULL or not make a difference?
> 
>> +
>> +	return madvise_walk_vmas(mm, start, end, &madv_behavior);
>> +}
>> +
>> +static bool is_anon_vma_name(int behavior)
> 
> Maybe update_anon_vma_name()? Otherwise the function reads like
> the behavior can be anon_vma_name.
> 
>> +{
>> +	switch (behavior) {
>> +	case __MADV_SET_ANON_VMA_NAME:
>> +	case __MADV_CLEAR_ANON_VMA_NAME:
>> +		return true;
>> +	default:
>> +		return false;
>> +	}
>> +}
>>   #else /* CONFIG_ANON_VMA_NAME */
>>   static int replace_anon_vma_name(struct vm_area_struct *vma,
>>   				 struct anon_vma_name *anon_name)
>> @@ -121,6 +170,11 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
>>
>>   	return 0;
>>   }
>> +
>> +static bool is_anon_vma_name(int behavior)
>> +{
>> +	return false;
>> +}
>>   #endif /* CONFIG_ANON_VMA_NAME */
>>   /*
>>    * Update the vm_flags on region of a vma, splitting it or merging it as
>> @@ -1252,13 +1306,12 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
>>   static int madvise_vma_behavior(struct vm_area_struct *vma,
>>   				struct vm_area_struct **prev,
>>   				unsigned long start, unsigned long end,
>> -				void *behavior_arg)
>> +				struct madvise_behavior *madv_behavior)
>>   {
>> -	struct madvise_behavior *arg = behavior_arg;
>> -	int behavior = arg->behavior;
>> -	int error;
>> -	struct anon_vma_name *anon_name;
>> +	int behavior = madv_behavior->behavior;
>> +	struct anon_vma_name *anon_name = madv_behavior->anon_name;
>>   	vm_flags_t new_flags = vma->vm_flags;
>> +	int error;
>>
>>   	if (unlikely(!can_modify_vma_madv(vma, behavior)))
>>   		return -EPERM;
>> @@ -1275,7 +1328,17 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>>   	case MADV_FREE:
>>   	case MADV_DONTNEED:
>>   	case MADV_DONTNEED_LOCKED:
>> -		return madvise_dontneed_free(vma, prev, start, end, arg);
>> +		return madvise_dontneed_free(vma, prev, start, end,
>> +					     madv_behavior);
>> +	case MADV_COLLAPSE:
>> +		return madvise_collapse(vma, prev, start, end);
>> +	case MADV_GUARD_INSTALL:
>> +		return madvise_guard_install(vma, prev, start, end);
>> +	case MADV_GUARD_REMOVE:
>> +		return madvise_guard_remove(vma, prev, start, end);
>> +
>> +	/* The below behaviours update VMAs via madvise_update_vma(). */
>> +
> 
> Great comment and code move!
> 
>>   	case MADV_NORMAL:
>>   		new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
>>   		break;
>> @@ -1325,21 +1388,25 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
>>   		if (error)
>>   			goto out;
>>   		break;
>> -	case MADV_COLLAPSE:
>> -		return madvise_collapse(vma, prev, start, end);
>> -	case MADV_GUARD_INSTALL:
>> -		return madvise_guard_install(vma, prev, start, end);
>> -	case MADV_GUARD_REMOVE:
>> -		return madvise_guard_remove(vma, prev, start, end);
>> +	case __MADV_SET_ANON_VMA_NAME:
>> +	case __MADV_CLEAR_ANON_VMA_NAME:


Would this be clearer and more closely describe the code's logic?

		/* Reject naming for regular file-backed mappings. */


>> +		/* Only anonymous mappings can be named */
>> +		if (vma->vm_file && !vma_is_anon_shmem(vma))
>> +			return -EBADF;
>> +		break;
>>   	}


Thanks,
Lance

> 
> __MADV_SET_ANON_VMA_NAME and __MADV_CLEAR_ANON_VMA_NAME are
> used here the code below. I do not see the functional difference
> of them. I understand a NULL anon_name means clear the name,
> but it is also just set the name to NULL.
> 
>>
>>   	/* We cannot provide prev in this lock mode. */
>> -	VM_WARN_ON_ONCE(arg->lock_mode == MADVISE_VMA_READ_LOCK);
>> -	anon_name = anon_vma_name(vma);
>> -	anon_vma_name_get(anon_name);
>> +	VM_WARN_ON_ONCE(madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK);
>> +
>> +	if (!is_anon_vma_name(behavior)) {
>> +		anon_name = anon_vma_name(vma);
>> +		anon_vma_name_get(anon_name);
>> +	}
>>   	error = madvise_update_vma(vma, prev, start, end, new_flags,
>>   				   anon_name);
>> -	anon_vma_name_put(anon_name);
>> +	if (!is_anon_vma_name(behavior))
>> +		anon_vma_name_put(anon_name);
> 
> Otherwise, the rest looks good to me.
> 
> --
> Best Regards,
> Yan, Zi
Re: [PATCH 1/5] mm/madvise: remove the visitor pattern and thread anon_vma state
Posted by Lorenzo Stoakes 3 months, 2 weeks ago
On Fri, Jun 20, 2025 at 10:43:30AM +0800, Lance Yang wrote:
> Would this be clearer and more closely describe the code's logic?
>
> 		/* Reject naming for regular file-backed mappings. */

Yeah here's the fun of 'anonymous' being super unclear :) it's like the 'no
true Scotsman' fallacy in mm form... ;)

'Anonymous, no really really anonymous, not even file-backed' :P

it's a good point but I'd rather keep this as close to the original as
possible so it's just a code _move_. Perhaps we can update later but I
think the context makes clear what is meant here at least.

>
>
> > > +		/* Only anonymous mappings can be named */
> > > +		if (vma->vm_file && !vma_is_anon_shmem(vma))
> > > +			return -EBADF;
> > > +		break;
> > >   	}
>
>
> Thanks,
> Lance
>