[PATCH v6 13/15] mm: memory: support clearing page ranges

Ankur Arora posted 15 patches 1 month ago
There is a newer version of this series
[PATCH v6 13/15] mm: memory: support clearing page ranges
Posted by Ankur Arora 1 month ago
Change folio_zero_user() to clear contiguous page ranges instead of
in the current page-at-a-time fashion. This, when exposed to the
processor, allows it to optimize clearing based on the knowledge of
the extent.

However, clearing in large chunks can have two problems:

 - cache locality when clearing small folios (< MAX_ORDER_NR_PAGES)
   (larger folios don't have any expectation of cache locality).

 - preemption latency when clearing large folios.

Handle the first by splitting the clearing in three parts: the
faulting page and its immediate locality, its left and right
regions; with the local neighbourhood cleared last.

The second problem is relevant when running under cooperative
preemption models. Limit the worst case preemption latency by clearing
in architecture specified PAGE_CONTIG_NR units.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 mm/memory.c | 82 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 61 insertions(+), 21 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 0ba4f6b71847..0f5b1900b480 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -7021,40 +7021,80 @@ static inline int process_huge_page(
 	return 0;
 }
 
-static void clear_gigantic_page(struct folio *folio, unsigned long addr_hint,
-				unsigned int nr_pages)
+/*
+ * Clear contiguous pages chunking them up when running under
+ * non-preemptible models.
+ */
+static void clear_contig_highpages(struct page *page, unsigned long addr,
+				   unsigned int npages)
 {
-	unsigned long addr = ALIGN_DOWN(addr_hint, folio_size(folio));
-	int i;
+	unsigned int i, count, unit;
 
-	might_sleep();
-	for (i = 0; i < nr_pages; i++) {
+	unit = preempt_model_preemptible() ? npages : PAGE_CONTIG_NR;
+
+	for (i = 0; i < npages; ) {
+		count = min(unit, npages - i);
+		clear_user_highpages(nth_page(page, i),
+				     addr + i * PAGE_SIZE, count);
+		i += count;
 		cond_resched();
-		clear_user_highpage(folio_page(folio, i), addr + i * PAGE_SIZE);
 	}
 }
 
-static int clear_subpage(unsigned long addr, int idx, void *arg)
-{
-	struct folio *folio = arg;
-
-	clear_user_highpage(folio_page(folio, idx), addr);
-	return 0;
-}
-
 /**
  * folio_zero_user - Zero a folio which will be mapped to userspace.
  * @folio: The folio to zero.
- * @addr_hint: The address will be accessed or the base address if uncelar.
+ * @addr_hint: The address accessed by the user or the base address.
+ *
+ * Uses architectural support for clear_pages() to zero page extents
+ * instead of clearing page-at-a-time.
+ *
+ * Clearing of small folios (< MAX_ORDER_NR_PAGES) is split in three parts:
+ * pages in the immediate locality of the faulting page, and its left, right
+ * regions; the local neighbourhood cleared last in order to keep cache
+ * lines of the target region hot.
+ *
+ * For larger folios we assume that there is no expectation of cache locality
+ * and just do a straight zero.
  */
 void folio_zero_user(struct folio *folio, unsigned long addr_hint)
 {
-	unsigned int nr_pages = folio_nr_pages(folio);
+	unsigned long base_addr = ALIGN_DOWN(addr_hint, folio_size(folio));
+	const long fault_idx = (addr_hint - base_addr) / PAGE_SIZE;
+	const struct range pg = DEFINE_RANGE(0, folio_nr_pages(folio) - 1);
+	const int width = 2; /* number of pages cleared last on either side */
+	struct range r[3];
+	int i;
 
-	if (unlikely(nr_pages > MAX_ORDER_NR_PAGES))
-		clear_gigantic_page(folio, addr_hint, nr_pages);
-	else
-		process_huge_page(addr_hint, nr_pages, clear_subpage, folio);
+	if (folio_nr_pages(folio) > MAX_ORDER_NR_PAGES) {
+		clear_contig_highpages(folio_page(folio, 0),
+					base_addr, folio_nr_pages(folio));
+		return;
+	}
+
+	/*
+	 * Faulting page and its immediate neighbourhood. Cleared at the end to
+	 * ensure it sticks around in the cache.
+	 */
+	r[2] = DEFINE_RANGE(clamp_t(s64, fault_idx - width, pg.start, pg.end),
+			    clamp_t(s64, fault_idx + width, pg.start, pg.end));
+
+	/* Region to the left of the fault */
+	r[1] = DEFINE_RANGE(pg.start,
+			    clamp_t(s64, r[2].start-1, pg.start-1, r[2].start));
+
+	/* Region to the right of the fault: always valid for the common fault_idx=0 case. */
+	r[0] = DEFINE_RANGE(clamp_t(s64, r[2].end+1, r[2].end, pg.end+1),
+			    pg.end);
+
+	for (i = 0; i <= 2; i++) {
+		unsigned int npages = range_len(&r[i]);
+		struct page *page = folio_page(folio, r[i].start);
+		unsigned long addr = base_addr + folio_page_idx(folio, page) * PAGE_SIZE;
+
+		if (npages > 0)
+			clear_contig_highpages(page, addr, npages);
+	}
 }
 
 static int copy_user_gigantic_page(struct folio *dst, struct folio *src,
-- 
2.31.1
Re: [PATCH v6 13/15] mm: memory: support clearing page ranges
Posted by kernel test robot 1 month ago
Hi Ankur,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Ankur-Arora/perf-bench-mem-Remove-repetition-around-time-measurement/20250902-161417
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250902080816.3715913-14-ankur.a.arora%40oracle.com
patch subject: [PATCH v6 13/15] mm: memory: support clearing page ranges
config: i386-randconfig-014-20250903 (https://download.01.org/0day-ci/archive/20250903/202509030344.SZCI0AIf-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250903/202509030344.SZCI0AIf-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509030344.SZCI0AIf-lkp@intel.com/

All warnings (new ones prefixed by >>):

   mm/memory.c: In function 'clear_contig_highpages':
   mm/memory.c:7165:38: error: implicit declaration of function 'nth_page'; did you mean 'pte_page'? [-Werror=implicit-function-declaration]
    7165 |                 clear_user_highpages(nth_page(page, i),
         |                                      ^~~~~~~~
         |                                      pte_page
>> mm/memory.c:7165:38: warning: passing argument 1 of 'clear_user_highpages' makes pointer from integer without a cast [-Wint-conversion]
    7165 |                 clear_user_highpages(nth_page(page, i),
         |                                      ^~~~~~~~~~~~~~~~~
         |                                      |
         |                                      int
   In file included from include/linux/bvec.h:10,
                    from include/linux/blk_types.h:10,
                    from include/linux/writeback.h:13,
                    from include/linux/memcontrol.h:23,
                    from include/linux/swap.h:9,
                    from include/linux/mm_inline.h:8,
                    from mm/memory.c:44:
   include/linux/highmem.h:211:54: note: expected 'struct page *' but argument is of type 'int'
     211 | static inline void clear_user_highpages(struct page *page, unsigned long vaddr,
         |                                         ~~~~~~~~~~~~~^~~~
   cc1: some warnings being treated as errors


vim +/clear_user_highpages +7165 mm/memory.c

  7151	
  7152	/*
  7153	 * Clear contiguous pages chunking them up when running under
  7154	 * non-preemptible models.
  7155	 */
  7156	static void clear_contig_highpages(struct page *page, unsigned long addr,
  7157					   unsigned int npages)
  7158	{
  7159		unsigned int i, count, unit;
  7160	
  7161		unit = preempt_model_preemptible() ? npages : PAGE_CONTIG_NR;
  7162	
  7163		for (i = 0; i < npages; ) {
  7164			count = min(unit, npages - i);
> 7165			clear_user_highpages(nth_page(page, i),
  7166					     addr + i * PAGE_SIZE, count);
  7167			i += count;
  7168			cond_resched();
  7169		}
  7170	}
  7171	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki