[PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()

Liam R. Howlett posted 9 patches 3 weeks, 2 days ago
[PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
Posted by Liam R. Howlett 3 weeks, 2 days ago
vms_clear_ptes() is slightly different than other callers to
unmap_region() and so had the unmapping open-coded.  Using the new
structure it is now possible to special-case the struct setup instead of
having the open-coded function.

exit_mmap() also calls unmap_vmas() with many arguemnts.  Using the
unmap_all_init() function to set the unmap descriptor for all vmas makes
this a bit easier to read.

Update to the vma test code is necessary to ensure testing continues to
function.

No functional changes intended.

Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
 include/linux/mm.h               |  3 ---
 mm/internal.h                    |  3 +++
 mm/memory.c                      | 24 ++++++++------------
 mm/mmap.c                        |  5 +++-
 mm/vma.c                         | 39 ++++++++++++++++++--------------
 mm/vma.h                         | 14 ++++++++++++
 tools/testing/vma/vma_internal.h | 14 ++++--------
 7 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 892fe5dbf9de0..23eb59d543390 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2450,9 +2450,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
 	zap_page_range_single(vma, vma->vm_start,
 			      vma->vm_end - vma->vm_start, NULL);
 }
-void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
-		struct vm_area_struct *start_vma, unsigned long start,
-		unsigned long end, unsigned long tree_end, bool mm_wr_locked);
 
 struct mmu_notifier_range;
 
diff --git a/mm/internal.h b/mm/internal.h
index d295252407fee..1239944f2830a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma)
 	}
 }
 
+/* unmap_vmas is in mm/memory.c */
+void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
+
 #ifdef CONFIG_MMU
 
 /* Flags for folio_pte_batch(). */
diff --git a/mm/memory.c b/mm/memory.c
index 829cd94950182..8d4d976311037 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2084,12 +2084,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 /**
  * unmap_vmas - unmap a range of memory covered by a list of vma's
  * @tlb: address of the caller's struct mmu_gather
- * @mas: the maple state
- * @vma: the starting vma
- * @start_addr: virtual address at which to start unmapping
- * @end_addr: virtual address at which to end unmapping
- * @tree_end: The maximum index to check
- * @mm_wr_locked: lock flag
+ * @unmap: The unmap_desc
  *
  * Unmap all pages in the vma list.
  *
@@ -2102,11 +2097,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
-		struct vm_area_struct *vma, unsigned long start_addr,
-		unsigned long end_addr, unsigned long tree_end,
-		bool mm_wr_locked)
+void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
 {
+	struct vm_area_struct *vma;
 	struct mmu_notifier_range range;
 	struct zap_details details = {
 		.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
@@ -2114,17 +2107,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
 		.even_cows = true,
 	};
 
+	vma = unmap->first;
 	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
-				start_addr, end_addr);
+				unmap->vma_min, unmap->vma_max);
 	mmu_notifier_invalidate_range_start(&range);
 	do {
-		unsigned long start = start_addr;
-		unsigned long end = end_addr;
+		unsigned long start = unmap->vma_min;
+		unsigned long end = unmap->vma_max;
 		hugetlb_zap_begin(vma, &start, &end);
 		unmap_single_vma(tlb, vma, start, end, &details,
-				 mm_wr_locked);
+				 unmap->mm_wr_locked);
 		hugetlb_zap_end(vma, &details);
-		vma = mas_find(mas, tree_end - 1);
+		vma = mas_find(unmap->mas, unmap->tree_max - 1);
 	} while (vma);
 	mmu_notifier_invalidate_range_end(&range);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 5c9bd3f20e53f..6011f62b0a294 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1280,10 +1280,12 @@ void exit_mmap(struct mm_struct *mm)
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	VMA_ITERATOR(vmi, mm, 0);
+	struct unmap_desc unmap;
 
 	/* mm's last user has gone, and its about to be pulled down */
 	mmu_notifier_release(mm);
 
+	unmap.mm_wr_locked = false;
 	mmap_read_lock(mm);
 	arch_exit_mmap(mm);
 
@@ -1295,11 +1297,12 @@ void exit_mmap(struct mm_struct *mm)
 		goto destroy;
 	}
 
+	unmap_all_init(&unmap, &vmi, vma);
 	flush_cache_mm(mm);
 	tlb_gather_mmu_fullmm(&tlb, mm);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
-	unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
+	unmap_vmas(&tlb, &unmap);
 	mmap_read_unlock(mm);
 
 	/*
diff --git a/mm/vma.c b/mm/vma.c
index c92384975cbb2..ad64cd9795ef3 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -481,8 +481,7 @@ void unmap_region(struct unmap_desc *desc)
 
 	tlb_gather_mmu(&tlb, mm);
 	update_hiwater_rss(mm);
-	unmap_vmas(&tlb, mas, desc->first, desc->vma_min, desc->vma_max,
-		   desc->vma_max, desc->mm_wr_locked);
+	unmap_vmas(&tlb, desc);
 	mas_set(mas, desc->tree_reset);
 	free_pgtables(&tlb, mas, desc->first, desc->first_pgaddr,
 		      desc->last_pgaddr, desc->tree_max,
@@ -1213,26 +1212,32 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
 static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
 		    struct ma_state *mas_detach, bool mm_wr_locked)
 {
-	struct mmu_gather tlb;
+	struct unmap_desc unmap = {
+		.mas = mas_detach,
+		.first = vms->vma,
+		/* start and end may be different if there is no prev or next vma. */
+		.first_pgaddr = vms->unmap_start,
+		.last_pgaddr = vms->unmap_end,
+		.vma_min = vms->start,
+		.vma_max = vms->end,
+		/*
+		 * The tree limits and reset differ from the normal case since it's a
+		 * side-tree
+		 */
+		.tree_reset = 1,
+		.tree_max = vms->vma_count,
+		/*
+		 * We can free page tables without write-locking mmap_lock because VMAs
+		 * were isolated before we downgraded mmap_lock.
+		 */
+		.mm_wr_locked = mm_wr_locked,
+	};
 
 	if (!vms->clear_ptes) /* Nothing to do */
 		return;
 
-	/*
-	 * We can free page tables without write-locking mmap_lock because VMAs
-	 * were isolated before we downgraded mmap_lock.
-	 */
 	mas_set(mas_detach, 1);
-	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
-	update_hiwater_rss(vms->vma->vm_mm);
-	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
-		   vms->vma_count, mm_wr_locked);
-
-	mas_set(mas_detach, 1);
-	/* start and end may be different if there is no prev or next vma. */
-	free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
-		      vms->unmap_end, vms->unmap_end, mm_wr_locked);
-	tlb_finish_mmu(&tlb);
+	unmap_region(&unmap);
 	vms->clear_ptes = false;
 }
 
diff --git a/mm/vma.h b/mm/vma.h
index 4edd5d26ffcfc..8b55a0c73d097 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -164,6 +164,20 @@ struct unmap_desc {
 	bool mm_wr_locked;            /* If the mmap write lock is held */
 };
 
+static inline void unmap_all_init(struct unmap_desc *desc,
+		struct vma_iterator *vmi, struct vm_area_struct *vma)
+{
+	desc->mas = &vmi->mas;
+	desc->first = vma;
+	desc->first_pgaddr = FIRST_USER_ADDRESS;
+	desc->last_pgaddr = USER_PGTABLES_CEILING;
+	desc->vma_min = 0;
+	desc->vma_max = ULONG_MAX;
+	desc->tree_max = ULONG_MAX;
+	desc->tree_reset = vma->vm_end;
+	desc->mm_wr_locked = false;
+}
+
 #define UNMAP_REGION(name, _vmi, _vma, _vma_min, _vma_max, _prev, _next)      \
 	struct unmap_desc name = {                                          \
 		.mas = &(_vmi)->mas,                                          \
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 823d379e1fac2..d73ad4747d40a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -884,18 +884,12 @@ static inline void update_hiwater_vm(struct mm_struct *)
 {
 }
 
-static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
-		      struct vm_area_struct *vma, unsigned long start_addr,
-		      unsigned long end_addr, unsigned long tree_end,
-		      bool mm_wr_locked)
+struct unmap_desc;
+
+static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
 {
 	(void)tlb;
-	(void)mas;
-	(void)vma;
-	(void)start_addr;
-	(void)end_addr;
-	(void)tree_end;
-	(void)mm_wr_locked;
+	(void)unmap;
 }
 
 static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
-- 
2.47.2
Re: [PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
Posted by kernel test robot 2 weeks, 6 days ago
Hi Liam,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]
[cannot apply to linus/master v6.17-rc5 next-20250911]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Liam-R-Howlett/mm-mmap-Move-exit_mmap-trace-point/20250910-031555
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250909190945.1030905-9-Liam.Howlett%40oracle.com
patch subject: [PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
config: arm-allnoconfig (https://download.01.org/0day-ci/archive/20250912/202509121212.sx5Tfe4e-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 21857ae337e0892a5522b6e7337899caa61de2a6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250912/202509121212.sx5Tfe4e-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509121212.sx5Tfe4e-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from mm/filemap.c:53:
   In file included from mm/internal.h:23:
>> mm/vma.h:173:22: error: use of undeclared identifier 'USER_PGTABLES_CEILING'
     173 |         desc->last_pgaddr = USER_PGTABLES_CEILING;
         |                             ^~~~~~~~~~~~~~~~~~~~~
   In file included from mm/filemap.c:65:
   mm/swap.h:461:10: error: call to undeclared function 'swp_offset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     461 |                 return swp_offset(folio->swap);
         |                        ^
   2 errors generated.
--
   In file included from mm/oom_kill.c:50:
   In file included from mm/internal.h:23:
>> mm/vma.h:173:22: error: use of undeclared identifier 'USER_PGTABLES_CEILING'
     173 |         desc->last_pgaddr = USER_PGTABLES_CEILING;
         |                             ^~~~~~~~~~~~~~~~~~~~~
   1 error generated.


vim +/USER_PGTABLES_CEILING +173 mm/vma.h

   166	
   167	static inline void unmap_all_init(struct unmap_desc *desc,
   168			struct vma_iterator *vmi, struct vm_area_struct *vma)
   169	{
   170		desc->mas = &vmi->mas;
   171		desc->first = vma;
   172		desc->first_pgaddr = FIRST_USER_ADDRESS;
 > 173		desc->last_pgaddr = USER_PGTABLES_CEILING;
   174		desc->vma_min = 0;
   175		desc->vma_max = ULONG_MAX;
   176		desc->tree_max = ULONG_MAX;
   177		desc->tree_reset = vma->vm_end;
   178		desc->mm_wr_locked = false;
   179	}
   180	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
Posted by Lorenzo Stoakes 3 weeks ago
On Tue, Sep 09, 2025 at 03:09:44PM -0400, Liam R. Howlett wrote:
> vms_clear_ptes() is slightly different than other callers to
> unmap_region() and so had the unmapping open-coded.  Using the new
> structure it is now possible to special-case the struct setup instead of
> having the open-coded function.
>
> exit_mmap() also calls unmap_vmas() with many arguemnts.  Using the
> unmap_all_init() function to set the unmap descriptor for all vmas makes
> this a bit easier to read.
>
> Update to the vma test code is necessary to ensure testing continues to
> function.
>
> No functional changes intended.
>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
>  include/linux/mm.h               |  3 ---
>  mm/internal.h                    |  3 +++
>  mm/memory.c                      | 24 ++++++++------------
>  mm/mmap.c                        |  5 +++-
>  mm/vma.c                         | 39 ++++++++++++++++++--------------
>  mm/vma.h                         | 14 ++++++++++++
>  tools/testing/vma/vma_internal.h | 14 ++++--------
>  7 files changed, 56 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 892fe5dbf9de0..23eb59d543390 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2450,9 +2450,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
>  	zap_page_range_single(vma, vma->vm_start,
>  			      vma->vm_end - vma->vm_start, NULL);
>  }
> -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -		struct vm_area_struct *start_vma, unsigned long start,
> -		unsigned long end, unsigned long tree_end, bool mm_wr_locked);
>
>  struct mmu_notifier_range;
>
> diff --git a/mm/internal.h b/mm/internal.h
> index d295252407fee..1239944f2830a 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma)
>  	}
>  }
>
> +/* unmap_vmas is in mm/memory.c */
> +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
> +
>  #ifdef CONFIG_MMU
>
>  /* Flags for folio_pte_batch(). */
> diff --git a/mm/memory.c b/mm/memory.c
> index 829cd94950182..8d4d976311037 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2084,12 +2084,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>  /**
>   * unmap_vmas - unmap a range of memory covered by a list of vma's
>   * @tlb: address of the caller's struct mmu_gather
> - * @mas: the maple state
> - * @vma: the starting vma
> - * @start_addr: virtual address at which to start unmapping
> - * @end_addr: virtual address at which to end unmapping
> - * @tree_end: The maximum index to check
> - * @mm_wr_locked: lock flag
> + * @unmap: The unmap_desc
>   *
>   * Unmap all pages in the vma list.
>   *
> @@ -2102,11 +2097,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
>   * drops the lock and schedules.
>   */
> -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -		struct vm_area_struct *vma, unsigned long start_addr,
> -		unsigned long end_addr, unsigned long tree_end,
> -		bool mm_wr_locked)
> +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
>  {
> +	struct vm_area_struct *vma;
>  	struct mmu_notifier_range range;
>  	struct zap_details details = {
>  		.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
> @@ -2114,17 +2107,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
>  		.even_cows = true,
>  	};
>
> +	vma = unmap->first;
>  	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
> -				start_addr, end_addr);
> +				unmap->vma_min, unmap->vma_max);
>  	mmu_notifier_invalidate_range_start(&range);
>  	do {
> -		unsigned long start = start_addr;
> -		unsigned long end = end_addr;
> +		unsigned long start = unmap->vma_min;
> +		unsigned long end = unmap->vma_max;
>  		hugetlb_zap_begin(vma, &start, &end);
>  		unmap_single_vma(tlb, vma, start, end, &details,
> -				 mm_wr_locked);
> +				 unmap->mm_wr_locked);
>  		hugetlb_zap_end(vma, &details);
> -		vma = mas_find(mas, tree_end - 1);
> +		vma = mas_find(unmap->mas, unmap->tree_max - 1);

An aside, but do kinda see David's point about min, max & start, end & floor
ceililng, perhaps the prefix_ is enough to differentiate these and we could be
as consistent as we can be?

>  	} while (vma);
>  	mmu_notifier_invalidate_range_end(&range);
>  }
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5c9bd3f20e53f..6011f62b0a294 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1280,10 +1280,12 @@ void exit_mmap(struct mm_struct *mm)
>  	struct vm_area_struct *vma;
>  	unsigned long nr_accounted = 0;
>  	VMA_ITERATOR(vmi, mm, 0);
> +	struct unmap_desc unmap;

Perhaps :

	struct unmap_desc unnmap = {
		.mm_wr_locked = false,
	};

To be clear? I mean it does mean we're initialising some fields twice
though... but at least avoids uninitialised state if we add new fields.

However I see you're already seeing mm_wr_locked to false in
unmap_all_init() anyway?

Ideally we'd have something like UNMAP_STATE_VMA() here, but ofc you don't
have the VMA yet.

So probably something like what you've done here is the way to go, but I'd
rather we initialise this var in case of adding fields later.

Then again... you would have to make sure the _init() function set all
fields anyway so maybe not necessary. Hmm.

>
>  	/* mm's last user has gone, and its about to be pulled down */
>  	mmu_notifier_release(mm);
>
> +	unmap.mm_wr_locked = false;
>  	mmap_read_lock(mm);
>  	arch_exit_mmap(mm);
>
> @@ -1295,11 +1297,12 @@ void exit_mmap(struct mm_struct *mm)
>  		goto destroy;
>  	}
>
> +	unmap_all_init(&unmap, &vmi, vma);
>  	flush_cache_mm(mm);
>  	tlb_gather_mmu_fullmm(&tlb, mm);
>  	/* update_hiwater_rss(mm) here? but nobody should be looking */
>  	/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
> -	unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
> +	unmap_vmas(&tlb, &unmap);
>  	mmap_read_unlock(mm);
>
>  	/*
> diff --git a/mm/vma.c b/mm/vma.c
> index c92384975cbb2..ad64cd9795ef3 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -481,8 +481,7 @@ void unmap_region(struct unmap_desc *desc)
>
>  	tlb_gather_mmu(&tlb, mm);
>  	update_hiwater_rss(mm);
> -	unmap_vmas(&tlb, mas, desc->first, desc->vma_min, desc->vma_max,
> -		   desc->vma_max, desc->mm_wr_locked);
> +	unmap_vmas(&tlb, desc);

Nit, perhaps didn't notice on previous commit actually, but you're being
inconsistent between naming this 'desc' and 'unmap'. Let's choose one and
stick with it.

>  	mas_set(mas, desc->tree_reset);
>  	free_pgtables(&tlb, mas, desc->first, desc->first_pgaddr,
>  		      desc->last_pgaddr, desc->tree_max,
> @@ -1213,26 +1212,32 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
>  		    struct ma_state *mas_detach, bool mm_wr_locked)
>  {
> -	struct mmu_gather tlb;
> +	struct unmap_desc unmap = {
> +		.mas = mas_detach,
> +		.first = vms->vma,
> +		/* start and end may be different if there is no prev or next vma. */
> +		.first_pgaddr = vms->unmap_start,
> +		.last_pgaddr = vms->unmap_end,
> +		.vma_min = vms->start,
> +		.vma_max = vms->end,
> +		/*
> +		 * The tree limits and reset differ from the normal case since it's a
> +		 * side-tree
> +		 */
> +		.tree_reset = 1,
> +		.tree_max = vms->vma_count,
> +		/*
> +		 * We can free page tables without write-locking mmap_lock because VMAs
> +		 * were isolated before we downgraded mmap_lock.
> +		 */
> +		.mm_wr_locked = mm_wr_locked,

These comments are great thanks! :)

> +	};
>
>  	if (!vms->clear_ptes) /* Nothing to do */
>  		return;
>
> -	/*
> -	 * We can free page tables without write-locking mmap_lock because VMAs
> -	 * were isolated before we downgraded mmap_lock.
> -	 */
>  	mas_set(mas_detach, 1);
> -	tlb_gather_mmu(&tlb, vms->vma->vm_mm);
> -	update_hiwater_rss(vms->vma->vm_mm);
> -	unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
> -		   vms->vma_count, mm_wr_locked);
> -
> -	mas_set(mas_detach, 1);
> -	/* start and end may be different if there is no prev or next vma. */
> -	free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
> -		      vms->unmap_end, vms->unmap_end, mm_wr_locked);
> -	tlb_finish_mmu(&tlb);
> +	unmap_region(&unmap);

HMmm why are we removing all these? I'm guessing this is a separate
refactoring, could we perhaps break this out, as it doesn't seem to quite
belong in this patch?

I mean this is really nice :) just I think belongs in another patch, unless
you feel it's really tied to this one/necessary here?

Sorry to be a pain... :)

>  	vms->clear_ptes = false;
>  }
>
> diff --git a/mm/vma.h b/mm/vma.h
> index 4edd5d26ffcfc..8b55a0c73d097 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -164,6 +164,20 @@ struct unmap_desc {
>  	bool mm_wr_locked;            /* If the mmap write lock is held */
>  };
>
> +static inline void unmap_all_init(struct unmap_desc *desc,
> +		struct vma_iterator *vmi, struct vm_area_struct *vma)
> +{
> +	desc->mas = &vmi->mas;
> +	desc->first = vma;
> +	desc->first_pgaddr = FIRST_USER_ADDRESS;
> +	desc->last_pgaddr = USER_PGTABLES_CEILING;
> +	desc->vma_min = 0;
> +	desc->vma_max = ULONG_MAX;
> +	desc->tree_max = ULONG_MAX;
> +	desc->tree_reset = vma->vm_end;
> +	desc->mm_wr_locked = false;
> +}
> +
>  #define UNMAP_REGION(name, _vmi, _vma, _vma_min, _vma_max, _prev, _next)      \
>  	struct unmap_desc name = {                                          \
>  		.mas = &(_vmi)->mas,                                          \
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 823d379e1fac2..d73ad4747d40a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -884,18 +884,12 @@ static inline void update_hiwater_vm(struct mm_struct *)
>  {
>  }
>
> -static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -		      struct vm_area_struct *vma, unsigned long start_addr,
> -		      unsigned long end_addr, unsigned long tree_end,
> -		      bool mm_wr_locked)
> +struct unmap_desc;
> +
> +static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
>  {
>  	(void)tlb;
> -	(void)mas;
> -	(void)vma;
> -	(void)start_addr;
> -	(void)end_addr;
> -	(void)tree_end;
> -	(void)mm_wr_locked;
> +	(void)unmap;
>  }

Hmm why is this still here, I thought I'd removed all these (void)'s... I think
actually that's what's in Vlasta's tree, so this will conflict unfortunately.

Anwyay, please just remove all these (void)'s, they're unnecessary.

(Also I'm sorry for making updating this file a thing, but I'm not sure there's
a better way)

>
>  static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
> --
> 2.47.2
>
Re: [PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
Posted by Suren Baghdasaryan 3 weeks, 2 days ago
On Tue, Sep 9, 2025 at 12:10 PM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> vms_clear_ptes() is slightly different than other callers to
> unmap_region() and so had the unmapping open-coded.  Using the new
> structure it is now possible to special-case the struct setup instead of
> having the open-coded function.
>
> exit_mmap() also calls unmap_vmas() with many arguemnts.  Using the
> unmap_all_init() function to set the unmap descriptor for all vmas makes
> this a bit easier to read.
>
> Update to the vma test code is necessary to ensure testing continues to
> function.
>
> No functional changes intended.
>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
>  include/linux/mm.h               |  3 ---
>  mm/internal.h                    |  3 +++
>  mm/memory.c                      | 24 ++++++++------------
>  mm/mmap.c                        |  5 +++-
>  mm/vma.c                         | 39 ++++++++++++++++++--------------
>  mm/vma.h                         | 14 ++++++++++++
>  tools/testing/vma/vma_internal.h | 14 ++++--------
>  7 files changed, 56 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 892fe5dbf9de0..23eb59d543390 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2450,9 +2450,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
>         zap_page_range_single(vma, vma->vm_start,
>                               vma->vm_end - vma->vm_start, NULL);
>  }
> -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -               struct vm_area_struct *start_vma, unsigned long start,
> -               unsigned long end, unsigned long tree_end, bool mm_wr_locked);
>
>  struct mmu_notifier_range;
>
> diff --git a/mm/internal.h b/mm/internal.h
> index d295252407fee..1239944f2830a 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma)
>         }
>  }
>
> +/* unmap_vmas is in mm/memory.c */
> +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
> +
>  #ifdef CONFIG_MMU
>
>  /* Flags for folio_pte_batch(). */
> diff --git a/mm/memory.c b/mm/memory.c
> index 829cd94950182..8d4d976311037 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2084,12 +2084,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>  /**
>   * unmap_vmas - unmap a range of memory covered by a list of vma's
>   * @tlb: address of the caller's struct mmu_gather
> - * @mas: the maple state
> - * @vma: the starting vma
> - * @start_addr: virtual address at which to start unmapping
> - * @end_addr: virtual address at which to end unmapping
> - * @tree_end: The maximum index to check
> - * @mm_wr_locked: lock flag
> + * @unmap: The unmap_desc
>   *
>   * Unmap all pages in the vma list.
>   *
> @@ -2102,11 +2097,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
>   * drops the lock and schedules.
>   */
> -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -               struct vm_area_struct *vma, unsigned long start_addr,
> -               unsigned long end_addr, unsigned long tree_end,
> -               bool mm_wr_locked)
> +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
>  {
> +       struct vm_area_struct *vma;
>         struct mmu_notifier_range range;
>         struct zap_details details = {
>                 .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
> @@ -2114,17 +2107,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
>                 .even_cows = true,
>         };
>
> +       vma = unmap->first;
>         mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
> -                               start_addr, end_addr);
> +                               unmap->vma_min, unmap->vma_max);
>         mmu_notifier_invalidate_range_start(&range);
>         do {
> -               unsigned long start = start_addr;
> -               unsigned long end = end_addr;
> +               unsigned long start = unmap->vma_min;
> +               unsigned long end = unmap->vma_max;
>                 hugetlb_zap_begin(vma, &start, &end);
>                 unmap_single_vma(tlb, vma, start, end, &details,
> -                                mm_wr_locked);
> +                                unmap->mm_wr_locked);
>                 hugetlb_zap_end(vma, &details);
> -               vma = mas_find(mas, tree_end - 1);
> +               vma = mas_find(unmap->mas, unmap->tree_max - 1);
>         } while (vma);
>         mmu_notifier_invalidate_range_end(&range);
>  }
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5c9bd3f20e53f..6011f62b0a294 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1280,10 +1280,12 @@ void exit_mmap(struct mm_struct *mm)
>         struct vm_area_struct *vma;
>         unsigned long nr_accounted = 0;
>         VMA_ITERATOR(vmi, mm, 0);
> +       struct unmap_desc unmap;
>
>         /* mm's last user has gone, and its about to be pulled down */
>         mmu_notifier_release(mm);
>
> +       unmap.mm_wr_locked = false;

This will be reset by unmap_all_init() anyway, right?

>         mmap_read_lock(mm);
>         arch_exit_mmap(mm);
>
> @@ -1295,11 +1297,12 @@ void exit_mmap(struct mm_struct *mm)
>                 goto destroy;
>         }
>
> +       unmap_all_init(&unmap, &vmi, vma);

Can we use a macro, something like DEFINE_UNMAP_ALL_REGIONS() instead
of unmap_all_init()?

>         flush_cache_mm(mm);
>         tlb_gather_mmu_fullmm(&tlb, mm);
>         /* update_hiwater_rss(mm) here? but nobody should be looking */
>         /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
> -       unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
> +       unmap_vmas(&tlb, &unmap);
>         mmap_read_unlock(mm);
>
>         /*
> diff --git a/mm/vma.c b/mm/vma.c
> index c92384975cbb2..ad64cd9795ef3 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -481,8 +481,7 @@ void unmap_region(struct unmap_desc *desc)
>
>         tlb_gather_mmu(&tlb, mm);
>         update_hiwater_rss(mm);
> -       unmap_vmas(&tlb, mas, desc->first, desc->vma_min, desc->vma_max,
> -                  desc->vma_max, desc->mm_wr_locked);
> +       unmap_vmas(&tlb, desc);
>         mas_set(mas, desc->tree_reset);
>         free_pgtables(&tlb, mas, desc->first, desc->first_pgaddr,
>                       desc->last_pgaddr, desc->tree_max,
> @@ -1213,26 +1212,32 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
>                     struct ma_state *mas_detach, bool mm_wr_locked)
>  {
> -       struct mmu_gather tlb;
> +       struct unmap_desc unmap = {
> +               .mas = mas_detach,
> +               .first = vms->vma,
> +               /* start and end may be different if there is no prev or next vma. */
> +               .first_pgaddr = vms->unmap_start,
> +               .last_pgaddr = vms->unmap_end,
> +               .vma_min = vms->start,
> +               .vma_max = vms->end,
> +               /*
> +                * The tree limits and reset differ from the normal case since it's a
> +                * side-tree
> +                */
> +               .tree_reset = 1,
> +               .tree_max = vms->vma_count,
> +               /*
> +                * We can free page tables without write-locking mmap_lock because VMAs
> +                * were isolated before we downgraded mmap_lock.
> +                */
> +               .mm_wr_locked = mm_wr_locked,
> +       };
>
>         if (!vms->clear_ptes) /* Nothing to do */
>                 return;
>
> -       /*
> -        * We can free page tables without write-locking mmap_lock because VMAs
> -        * were isolated before we downgraded mmap_lock.
> -        */
>         mas_set(mas_detach, 1);
> -       tlb_gather_mmu(&tlb, vms->vma->vm_mm);
> -       update_hiwater_rss(vms->vma->vm_mm);
> -       unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
> -                  vms->vma_count, mm_wr_locked);
> -
> -       mas_set(mas_detach, 1);
> -       /* start and end may be different if there is no prev or next vma. */
> -       free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
> -                     vms->unmap_end, vms->unmap_end, mm_wr_locked);
> -       tlb_finish_mmu(&tlb);
> +       unmap_region(&unmap);
>         vms->clear_ptes = false;
>  }
>
> diff --git a/mm/vma.h b/mm/vma.h
> index 4edd5d26ffcfc..8b55a0c73d097 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -164,6 +164,20 @@ struct unmap_desc {
>         bool mm_wr_locked;            /* If the mmap write lock is held */
>  };
>
> +static inline void unmap_all_init(struct unmap_desc *desc,
> +               struct vma_iterator *vmi, struct vm_area_struct *vma)
> +{
> +       desc->mas = &vmi->mas;
> +       desc->first = vma;
> +       desc->first_pgaddr = FIRST_USER_ADDRESS;
> +       desc->last_pgaddr = USER_PGTABLES_CEILING;
> +       desc->vma_min = 0;
> +       desc->vma_max = ULONG_MAX;
> +       desc->tree_max = ULONG_MAX;
> +       desc->tree_reset = vma->vm_end;
> +       desc->mm_wr_locked = false;
> +}
> +
>  #define UNMAP_REGION(name, _vmi, _vma, _vma_min, _vma_max, _prev, _next)      \
>         struct unmap_desc name = {                                          \
>                 .mas = &(_vmi)->mas,                                          \
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 823d379e1fac2..d73ad4747d40a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -884,18 +884,12 @@ static inline void update_hiwater_vm(struct mm_struct *)
>  {
>  }
>
> -static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
> -                     struct vm_area_struct *vma, unsigned long start_addr,
> -                     unsigned long end_addr, unsigned long tree_end,
> -                     bool mm_wr_locked)
> +struct unmap_desc;
> +
> +static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
>  {
>         (void)tlb;
> -       (void)mas;
> -       (void)vma;
> -       (void)start_addr;
> -       (void)end_addr;
> -       (void)tree_end;
> -       (void)mm_wr_locked;
> +       (void)unmap;
>  }
>
>  static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
> --
> 2.47.2
>
Re: [PATCH v1 8/9] mm/vma: Use unmap_desc in vms_clear_ptes() and exit_mmap()
Posted by Liam R. Howlett 3 weeks ago
* Suren Baghdasaryan <surenb@google.com> [250909 18:16]:
> On Tue, Sep 9, 2025 at 12:10 PM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > vms_clear_ptes() is slightly different than other callers to
> > unmap_region() and so had the unmapping open-coded.  Using the new
> > structure it is now possible to special-case the struct setup instead of
> > having the open-coded function.
> >
> > exit_mmap() also calls unmap_vmas() with many arguemnts.  Using the
> > unmap_all_init() function to set the unmap descriptor for all vmas makes
> > this a bit easier to read.
> >
> > Update to the vma test code is necessary to ensure testing continues to
> > function.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> > ---
> >  include/linux/mm.h               |  3 ---
> >  mm/internal.h                    |  3 +++
> >  mm/memory.c                      | 24 ++++++++------------
> >  mm/mmap.c                        |  5 +++-
> >  mm/vma.c                         | 39 ++++++++++++++++++--------------
> >  mm/vma.h                         | 14 ++++++++++++
> >  tools/testing/vma/vma_internal.h | 14 ++++--------
> >  7 files changed, 56 insertions(+), 46 deletions(-)

...

> >         struct vm_area_struct *vma;
> >         unsigned long nr_accounted = 0;
> >         VMA_ITERATOR(vmi, mm, 0);
> > +       struct unmap_desc unmap;
> >
> >         /* mm's last user has gone, and its about to be pulled down */
> >         mmu_notifier_release(mm);
> >
> > +       unmap.mm_wr_locked = false;
> 
> This will be reset by unmap_all_init() anyway, right?

Yes, I will drop that.  Thanks, I missed this when I rewrote to use a
different function.

> 
> >         mmap_read_lock(mm);
> >         arch_exit_mmap(mm);
> >
> > @@ -1295,11 +1297,12 @@ void exit_mmap(struct mm_struct *mm)
> >                 goto destroy;
> >         }
> >
> > +       unmap_all_init(&unmap, &vmi, vma);
> 
> Can we use a macro, something like DEFINE_UNMAP_ALL_REGIONS() instead
> of unmap_all_init()?

No, because the vma is unknown and we set up some of the unmap_desc from
the values in vma.

> 
> >         flush_cache_mm(mm);
> >         tlb_gather_mmu_fullmm(&tlb, mm);
> >         /* update_hiwater_rss(mm) here? but nobody should be looking */
> >         /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
> > -       unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
> > +       unmap_vmas(&tlb, &unmap);
> >         mmap_read_unlock(mm);

...

Thanks,
Liam