[v3] fix error handling in mmap_region() and refactor

[PATCH v3 5/5] mm: defer second attempt at merge on mmap()

Posted by Lorenzo Stoakes 1 year, 3 months ago

Rather than trying to merge again when ostensibly allocating a new VMA,
instead defer until the VMA is added and attempt to merge the existing
range.

This way we have no complicated unwinding logic midway through the process
of mapping the VMA.

In addition this removes limitations on the VMA not being able to be the
first in the virtual memory address space which was previously implicitly
required.

In theory, for this very same reason, we should unconditionally attempt
merge here, however this is likely to have a performance impact so it is
better to avoid this given the unlikely outcome of a merge.

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/vma.c | 55 ++++++++++++++-----------------------------------------
 1 file changed, 14 insertions(+), 41 deletions(-)

diff --git a/mm/vma.c b/mm/vma.c
index 7c690be67910..7194f9449c60 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -19,6 +19,7 @@ struct mmap_state {
 	struct file *file;

 	unsigned long charged;
+	bool retry_merge;

 	struct vm_area_struct *prev;
 	struct vm_area_struct *next;
@@ -2278,8 +2279,9 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)
 	return 0;
 }

+
 static int __mmap_new_file_vma(struct mmap_state *map,
-			       struct vm_area_struct **vmap, bool *mergedp)
+			       struct vm_area_struct **vmap)
 {
 	struct vma_iterator *vmi = map->vmi;
 	struct vm_area_struct *vma = *vmap;
@@ -2308,37 +2310,10 @@ static int __mmap_new_file_vma(struct mmap_state *map,
 			!(map->flags & VM_MAYWRITE) &&
 			(vma->vm_flags & VM_MAYWRITE));

-	/* mmap_file() might have changed VMA flags. */
+	/* If the flags change (and are mergeable), let's retry later. */
+	map->retry_merge = vma->vm_flags != map->flags && !(vma->vm_flags & VM_SPECIAL);
 	map->flags = vma->vm_flags;

-	vma_iter_config(vmi, map->addr, map->end);
-	/*
-	 * If flags changed after mmap_file(), we should try merge
-	 * vma again as we may succeed this time.
-	 */
-	if (unlikely(map->flags != vma->vm_flags && map->prev)) {
-		struct vm_area_struct *merge;
-		VMG_MMAP_STATE(vmg, map, /* vma = */ NULL);
-
-		merge = vma_merge_new_range(&vmg);
-		if (merge) {
-			/*
-			 * ->mmap() can change vma->vm_file and fput
-			 * the original file. So fput the vma->vm_file
-			 * here or we would add an extra fput for file
-			 * and cause general protection fault
-			 * ultimately.
-			 */
-			fput(vma->vm_file);
-			vm_area_free(vma);
-			vma = merge;
-			*mergedp = true;
-		} else {
-			vma_iter_config(vmi, map->addr, map->end);
-		}
-	}
-
-	*vmap = vma;
 	return 0;
 }

@@ -2346,10 +2321,6 @@ static int __mmap_new_file_vma(struct mmap_state *map,
  * __mmap_new_vma() - Allocate a new VMA for the region, as merging was not
  * possible.
  *
- * An exception to this is if the mapping is file-backed, and the underlying
- * driver changes the VMA flags, permitting a subsequent merge of the VMA, in
- * which case the returned VMA is one that was merged on a second attempt.
- *
  * @map:  Mapping state.
  * @vmap: Output pointer for the new VMA.
  *
@@ -2359,7 +2330,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 {
 	struct vma_iterator *vmi = map->vmi;
 	int error = 0;
-	bool merged = false;
 	struct vm_area_struct *vma;

 	/*
@@ -2382,7 +2352,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	}

 	if (map->file)
-		error = __mmap_new_file_vma(map, &vma, &merged);
+		error = __mmap_new_file_vma(map, &vma);
 	else if (map->flags & VM_SHARED)
 		error = shmem_zero_setup(vma);
 	else
@@ -2391,9 +2361,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	if (error)
 		goto free_iter_vma;

-	if (merged)
-		goto file_expanded;
-
 #ifdef CONFIG_SPARC64
 	/* TODO: Fix SPARC ADI! */
 	WARN_ON_ONCE(!arch_validate_flags(map->flags));
@@ -2410,8 +2377,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	 * call covers the non-merge case.
 	 */
 	khugepaged_enter_vma(vma, map->flags);
-
-file_expanded:
 	ksm_add_vma(vma);
 	*vmap = vma;
 	return 0;
@@ -2493,6 +2458,14 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
 			goto unacct_error;
 	}

+	/* If flags changed, we might be able to merge, so try again. */
+	if (map.retry_merge) {
+		VMG_MMAP_STATE(vmg, &map, vma);
+
+		vma_iter_config(map.vmi, map.addr, map.end);
+		vma_merge_existing_range(&vmg);
+	}
+
 	__mmap_complete(&map, vma);

 	return addr;
--
2.47.0

Re: [PATCH v3 5/5] mm: defer second attempt at merge on mmap()

Posted by Lorenzo Stoakes 1 year, 3 months ago

On Fri, Oct 25, 2024 at 01:26:27PM +0100, Lorenzo Stoakes wrote:
> Rather than trying to merge again when ostensibly allocating a new VMA,
> instead defer until the VMA is added and attempt to merge the existing
> range.
>
> This way we have no complicated unwinding logic midway through the process
> of mapping the VMA.
>
> In addition this removes limitations on the VMA not being able to be the
> first in the virtual memory address space which was previously implicitly
> required.
>
> In theory, for this very same reason, we should unconditionally attempt
> merge here, however this is likely to have a performance impact so it is
> better to avoid this given the unlikely outcome of a merge.
>
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Hi Andrew, as per Vlasitmil's review could you squash this fix-patch into
this commit please?  Thanks!

----8<----
From 51efd2702933919a605eb9a198adbdcf03da9c2f Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Sat, 26 Oct 2024 11:33:01 +0100
Subject: [PATCH] mm: remove unnecessary indirection

Now we removed the merge logic from __mmap_new_file_vma() we can simply
pass in the vma direct rather than a pointer to a VMA, as pointed out by
Vlastimil.
---
 mm/vma.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/vma.c b/mm/vma.c
index f22ffb772fe1..68138e8c153e 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2278,10 +2278,9 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)


 static int __mmap_new_file_vma(struct mmap_state *map,
-			       struct vm_area_struct **vmap)
+			       struct vm_area_struct *vma)
 {
 	struct vma_iterator *vmi = map->vmi;
-	struct vm_area_struct *vma = *vmap;
 	int error;

 	vma->vm_file = get_file(map->file);
@@ -2349,7 +2348,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	}

 	if (map->file)
-		error = __mmap_new_file_vma(map, &vma);
+		error = __mmap_new_file_vma(map, vma);
 	else if (map->flags & VM_SHARED)
 		error = shmem_zero_setup(vma);
 	else
--
2.47.0

Re: [PATCH v3 5/5] mm: defer second attempt at merge on mmap()

Posted by Vlastimil Babka 1 year, 3 months ago

On 10/25/24 14:26, Lorenzo Stoakes wrote:
> Rather than trying to merge again when ostensibly allocating a new VMA,
> instead defer until the VMA is added and attempt to merge the existing
> range.
> 
> This way we have no complicated unwinding logic midway through the process
> of mapping the VMA.
> 
> In addition this removes limitations on the VMA not being able to be the
> first in the virtual memory address space which was previously implicitly
> required.
> 
> In theory, for this very same reason, we should unconditionally attempt
> merge here, however this is likely to have a performance impact so it is
> better to avoid this given the unlikely outcome of a merge.
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  mm/vma.c | 55 ++++++++++++++-----------------------------------------
>  1 file changed, 14 insertions(+), 41 deletions(-)
> 
> diff --git a/mm/vma.c b/mm/vma.c
> index 7c690be67910..7194f9449c60 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -19,6 +19,7 @@ struct mmap_state {
>  	struct file *file;
> 
>  	unsigned long charged;
> +	bool retry_merge;
> 
>  	struct vm_area_struct *prev;
>  	struct vm_area_struct *next;
> @@ -2278,8 +2279,9 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)
>  	return 0;
>  }
> 
> +
>  static int __mmap_new_file_vma(struct mmap_state *map,
> -			       struct vm_area_struct **vmap, bool *mergedp)
> +			       struct vm_area_struct **vmap)

AFAICS **vmap could become *vma now

>  {
>  	struct vma_iterator *vmi = map->vmi;
>  	struct vm_area_struct *vma = *vmap;

Re: [PATCH v3 5/5] mm: defer second attempt at merge on mmap()

Posted by Lorenzo Stoakes 1 year, 3 months ago

On Fri, Oct 25, 2024 at 07:40:43PM +0200, Vlastimil Babka wrote:
> On 10/25/24 14:26, Lorenzo Stoakes wrote:
> > Rather than trying to merge again when ostensibly allocating a new VMA,
> > instead defer until the VMA is added and attempt to merge the existing
> > range.
> >
> > This way we have no complicated unwinding logic midway through the process
> > of mapping the VMA.
> >
> > In addition this removes limitations on the VMA not being able to be the
> > first in the virtual memory address space which was previously implicitly
> > required.
> >
> > In theory, for this very same reason, we should unconditionally attempt
> > merge here, however this is likely to have a performance impact so it is
> > better to avoid this given the unlikely outcome of a merge.
> >
> > Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> >  mm/vma.c | 55 ++++++++++++++-----------------------------------------
> >  1 file changed, 14 insertions(+), 41 deletions(-)
> >
> > diff --git a/mm/vma.c b/mm/vma.c
> > index 7c690be67910..7194f9449c60 100644
> > --- a/mm/vma.c
> > +++ b/mm/vma.c
> > @@ -19,6 +19,7 @@ struct mmap_state {
> >  	struct file *file;
> >
> >  	unsigned long charged;
> > +	bool retry_merge;
> >
> >  	struct vm_area_struct *prev;
> >  	struct vm_area_struct *next;
> > @@ -2278,8 +2279,9 @@ static int __mmap_prepare(struct mmap_state *map, struct list_head *uf)
> >  	return 0;
> >  }
> >
> > +
> >  static int __mmap_new_file_vma(struct mmap_state *map,
> > -			       struct vm_area_struct **vmap, bool *mergedp)
> > +			       struct vm_area_struct **vmap)
>
> AFAICS **vmap could become *vma now

Ah yeah good spot you're right, will chase up on respin or do a fix-patch
next week!

>
> >  {
> >  	struct vma_iterator *vmi = map->vmi;
> >  	struct vm_area_struct *vma = *vmap;