[PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.

Zi Yan posted 4 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.
Posted by Zi Yan 1 month, 3 weeks ago
The helper gathers an folio order statistics of folios within a virtual
address range and checks it against a given order list. It aims to provide
a more precise folio order check instead of just checking the existence of
PMD folios.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 .../selftests/mm/split_huge_page_test.c       |   4 +-
 tools/testing/selftests/mm/vm_util.c          | 173 ++++++++++++++++++
 tools/testing/selftests/mm/vm_util.h          |   7 +
 3 files changed, 181 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 5d07b0b89226..63ac82f0b9e0 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -34,8 +34,6 @@ uint64_t pmd_pagesize;
 #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
 #define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
-#define PFN_MASK     ((1UL<<55)-1)
-#define KPF_THP      (1UL<<22)
 #define GET_ORDER(nr_pages)    (31 - __builtin_clz(nr_pages))
 
 int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
@@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
 
 		if (kpageflags_file) {
 			pread(kpageflags_file, &page_flags, sizeof(page_flags),
-				(paddr & PFN_MASK) * sizeof(page_flags));
+				PAGEMAP_PFN(paddr) * sizeof(page_flags));
 
 			return !!(page_flags & KPF_THP);
 		}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 6a239aa413e2..4d952d1bc96d 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max)
 	return count;
 }
 
+static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
+{
+	size_t count;
+
+	count = pread(kpageflags_fd, flags, sizeof(*flags),
+		      pfn * sizeof(*flags));
+
+	if (count != sizeof(*flags))
+		return -1;
+
+	return 0;
+}
+
+static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd,
+			  uint64_t *flags)
+{
+	unsigned long pfn;
+
+	pfn = pagemap_get_pfn(pagemap_fd, vaddr);
+	/*
+	 * Treat non-present page as a page without any flag, so that
+	 * gather_folio_orders() just record the current folio order.
+	 */
+	if (pfn == -1UL) {
+		*flags = 0;
+		return 1;
+	}
+
+	if (get_pfn_flags(pfn, kpageflags_fd, flags))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
+ * @vaddr_start: start vaddr
+ * @len: range length
+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
+ * @kpageflags_fd: file descriptor to /proc/kpageflags
+ * @orders: output folio order array
+ * @nr_orders: folio order array size
+ *
+ * gather_folio_orders() scan through [vaddr_start, len) and check all folios
+ * within the range and record their orders. All order-0 pages will be recorded.
+ * Non-present vaddr is skipped.
+ *
+ *
+ * Return: 0 - no error, -1 - unhandled cases
+ */
+static int gather_folio_orders(char *vaddr_start, size_t len,
+			       int pagemap_fd, int kpageflags_fd,
+			       int orders[], int nr_orders)
+{
+	uint64_t page_flags = 0;
+	int cur_order = -1;
+	char *vaddr;
+
+	if (!pagemap_fd || !kpageflags_fd)
+		return -1;
+	if (nr_orders <= 0)
+		return -1;
+
+	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
+		char *next_folio_vaddr;
+		int status;
+
+		status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
+					&page_flags);
+		if (status < 0)
+			return -1;
+
+		/* skip non present vaddr */
+		if (status == 1) {
+			vaddr += psize();
+			continue;
+		}
+
+		/* all order-0 pages with possible false postive (non folio) */
+		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+			orders[0]++;
+			vaddr += psize();
+			continue;
+		}
+
+		/* skip non thp compound pages */
+		if (!(page_flags & KPF_THP)) {
+			vaddr += psize();
+			continue;
+		}
+
+		/* vpn points to part of a THP at this point */
+		if (page_flags & KPF_COMPOUND_HEAD)
+			cur_order = 1;
+		else {
+			/* not a head nor a tail in a THP? */
+			if (!(page_flags & KPF_COMPOUND_TAIL))
+				return -1;
+
+			vaddr += psize();
+			continue;
+		}
+
+		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+
+		if (next_folio_vaddr >= vaddr_start + len)
+			break;
+
+		while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
+						 kpageflags_fd,
+						 &page_flags)) >= 0) {
+			/*
+			 * non present vaddr, next compound head page, or
+			 * order-0 page
+			 */
+			if (status == 1 ||
+			    (page_flags & KPF_COMPOUND_HEAD) ||
+			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+				if (cur_order < nr_orders) {
+					orders[cur_order]++;
+					cur_order = -1;
+					vaddr = next_folio_vaddr;
+				}
+				break;
+			}
+
+			/* not a head nor a tail in a THP? */
+			if (!(page_flags & KPF_COMPOUND_TAIL))
+				return -1;
+
+			cur_order++;
+			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+		}
+
+		if (status < 0)
+			return status;
+	}
+	if (cur_order > 0 && cur_order < nr_orders)
+		orders[cur_order]++;
+	return 0;
+}
+
+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
+			int kpageflags_fd, int orders[], int nr_orders)
+{
+	int *vaddr_orders;
+	int status;
+	int i;
+
+	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
+
+	if (!vaddr_orders)
+		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
+
+	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
+	status = gather_folio_orders(vaddr_start, len, pagemap_fd,
+				     kpageflags_fd, vaddr_orders, nr_orders);
+	if (status)
+		goto out;
+
+	status = 0;
+	for (i = 0; i < nr_orders; i++)
+		if (vaddr_orders[i] != orders[i]) {
+			ksft_print_msg("order %d: expected: %d got %d\n", i,
+				       orders[i], vaddr_orders[i]);
+			status = -1;
+		}
+
+out:
+	free(vaddr_orders);
+	return status;
+}
+
 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
 			      bool miss, bool wp, bool minor, uint64_t *ioctls)
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 1843ad48d32b..02e3f1e7065b 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -18,6 +18,11 @@
 #define PM_SWAP                       BIT_ULL(62)
 #define PM_PRESENT                    BIT_ULL(63)
 
+#define KPF_COMPOUND_HEAD             BIT_ULL(15)
+#define KPF_COMPOUND_TAIL             BIT_ULL(16)
+#define KPF_THP                       BIT_ULL(22)
+
+
 /*
  * Ignore the checkpatch warning, we must read from x but don't want to do
  * anything with it in order to trigger a read page fault. We therefore must use
@@ -85,6 +90,8 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
 int64_t allocate_transhuge(void *ptr, int pagemap_fd);
 unsigned long default_huge_page_size(void);
 int detect_hugetlb_page_sizes(size_t sizes[], int max);
+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_file,
+			int kpageflags_file, int orders[], int nr_orders);
 
 int uffd_register(int uffd, void *addr, uint64_t len,
 		  bool miss, bool wp, bool minor);
-- 
2.47.2
Re: [PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.
Posted by Wei Yang 1 month, 3 weeks ago
On Tue, Aug 12, 2025 at 11:55:10AM -0400, Zi Yan wrote:
[...]
>+/*
>+ * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
>+ * @vaddr_start: start vaddr
>+ * @len: range length
>+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
>+ * @kpageflags_fd: file descriptor to /proc/kpageflags
>+ * @orders: output folio order array
>+ * @nr_orders: folio order array size
>+ *
>+ * gather_folio_orders() scan through [vaddr_start, len) and check all folios
>+ * within the range and record their orders. All order-0 pages will be recorded.

I feel a little confused about the description here. Especially on the
behavior when the range is not aligned on folio boundary. 

See following code at 1) and 2).

>+ * Non-present vaddr is skipped.
>+ *
>+ *
>+ * Return: 0 - no error, -1 - unhandled cases
>+ */
>+static int gather_folio_orders(char *vaddr_start, size_t len,
>+			       int pagemap_fd, int kpageflags_fd,
>+			       int orders[], int nr_orders)
>+{
>+	uint64_t page_flags = 0;
>+	int cur_order = -1;
>+	char *vaddr;
>+
>+	if (!pagemap_fd || !kpageflags_fd)
>+		return -1;

If my understanding is correct, we use open() to get a file descriptor.

On error it returns -1. And 0 is a possible valid value, but usually used by
stdin. The code may work in most cases, but seems not right.

>+	if (nr_orders <= 0)
>+		return -1;
>+

Maybe we want to check orders[] here too?

>+	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
>+		char *next_folio_vaddr;
>+		int status;
>+
>+		status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
>+					&page_flags);
>+		if (status < 0)
>+			return -1;
>+
>+		/* skip non present vaddr */
>+		if (status == 1) {
>+			vaddr += psize();
>+			continue;
>+		}
>+
>+		/* all order-0 pages with possible false postive (non folio) */

Do we still false positive case? Non-present page returns 1, which is handled
above.

>+		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>+			orders[0]++;
>+			vaddr += psize();
>+			continue;
>+		}
>+
>+		/* skip non thp compound pages */
>+		if (!(page_flags & KPF_THP)) {
>+			vaddr += psize();
>+			continue;
>+		}
>+
>+		/* vpn points to part of a THP at this point */
>+		if (page_flags & KPF_COMPOUND_HEAD)
>+			cur_order = 1;
>+		else {
>+			/* not a head nor a tail in a THP? */
>+			if (!(page_flags & KPF_COMPOUND_TAIL))
>+				return -1;

When reaches here, we know (page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL)).
So we have at least one of it set.

Looks not possible to hit it?

>+
>+			vaddr += psize();
>+			continue;

1)

In case vaddr points to the middle of a large folio, this will skip this folio
and count from next one.

>+		}
>+
>+		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
>+
>+		if (next_folio_vaddr >= vaddr_start + len)
>+			break;
>+
>+		while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
>+						 kpageflags_fd,
>+						 &page_flags)) >= 0) {
>+			/*
>+			 * non present vaddr, next compound head page, or
>+			 * order-0 page
>+			 */
>+			if (status == 1 ||
>+			    (page_flags & KPF_COMPOUND_HEAD) ||
>+			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>+				if (cur_order < nr_orders) {
>+					orders[cur_order]++;
>+					cur_order = -1;
>+					vaddr = next_folio_vaddr;
>+				}
>+				break;
>+			}
>+
>+			/* not a head nor a tail in a THP? */
>+			if (!(page_flags & KPF_COMPOUND_TAIL))
>+				return -1;
>+
>+			cur_order++;
>+			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));

2)

If (vaddr_start + len) points to the middle of a large folio and folio is more
than order 1 size, we may continue the loop and still count this last folio.
Because we don't check next_folio_vaddr and (vaddr_start + len).

A simple chart of these case.

          vaddr_start                   +     len
               |                               |
               v                               v
     +---------------------+              +-----------------+
     |folio 1              |              |folio 2          |
     +---------------------+              +-----------------+

folio 1 is not counted, but folio 2 is counted.

So at 1) and 2) handles the boundary differently. Not sure this is designed
behavior. If so I think it would be better to record in document, otherwise
the behavior is not obvious to user.

>+		}
>+
>+		if (status < 0)
>+			return status;
>+	}
>+	if (cur_order > 0 && cur_order < nr_orders)
>+		orders[cur_order]++;

Another boundary case here.

If we come here because (next_folio_vaddr >= vaddr_start + len) in the for
loop instead of the while loop. This means we found the folio head at vaddr,
but the left range (vaddr_start + len - vaddr) is less than or equal to order
1 page size.

But we haven't detected the real end of this folio. If this folio is more than
order 1 size, we still count it an order 1 folio.

>+	return 0;
>+}
>+
>+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
>+			int kpageflags_fd, int orders[], int nr_orders)
>+{
>+	int *vaddr_orders;
>+	int status;
>+	int i;
>+
>+	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
>+
>+	if (!vaddr_orders)
>+		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
>+
>+	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
>+	status = gather_folio_orders(vaddr_start, len, pagemap_fd,
>+				     kpageflags_fd, vaddr_orders, nr_orders);
>+	if (status)
>+		goto out;
>+
>+	status = 0;
>+	for (i = 0; i < nr_orders; i++)
>+		if (vaddr_orders[i] != orders[i]) {
>+			ksft_print_msg("order %d: expected: %d got %d\n", i,
>+				       orders[i], vaddr_orders[i]);
>+			status = -1;
>+		}
>+
>+out:
>+	free(vaddr_orders);
>+	return status;
>+}

-- 
Wei Yang
Help you, Help me
Re: [PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.
Posted by Zi Yan 1 month, 3 weeks ago
On 13 Aug 2025, at 17:12, Wei Yang wrote:

> On Tue, Aug 12, 2025 at 11:55:10AM -0400, Zi Yan wrote:
> [...]
>> +/*
>> + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
>> + * @vaddr_start: start vaddr
>> + * @len: range length
>> + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
>> + * @kpageflags_fd: file descriptor to /proc/kpageflags
>> + * @orders: output folio order array
>> + * @nr_orders: folio order array size
>> + *
>> + * gather_folio_orders() scan through [vaddr_start, len) and check all folios
>> + * within the range and record their orders. All order-0 pages will be recorded.
>
> I feel a little confused about the description here. Especially on the
> behavior when the range is not aligned on folio boundary.

I was too ambitious on this function. It is intended to just check after
split folio orders. I will move the function to split_huge_page_test.c
and rename it to gather_after_split_folio_orders() and
check_after_split_folio_orders().

>
> See following code at 1) and 2).
>
>> + * Non-present vaddr is skipped.
>> + *
>> + *
>> + * Return: 0 - no error, -1 - unhandled cases
>> + */
>> +static int gather_folio_orders(char *vaddr_start, size_t len,
>> +			       int pagemap_fd, int kpageflags_fd,
>> +			       int orders[], int nr_orders)
>> +{
>> +	uint64_t page_flags = 0;
>> +	int cur_order = -1;
>> +	char *vaddr;
>> +
>> +	if (!pagemap_fd || !kpageflags_fd)
>> +		return -1;
>
> If my understanding is correct, we use open() to get a file descriptor.
>
> On error it returns -1. And 0 is a possible valid value, but usually used by
> stdin. The code may work in most cases, but seems not right.

Will fix it to

if (pagemap_fd == -1 || kpageflags_fd == -1)

>
>> +	if (nr_orders <= 0)
>> +		return -1;
>> +
>
> Maybe we want to check orders[] here too?
>
>> +	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
>> +		char *next_folio_vaddr;
>> +		int status;
>> +
>> +		status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
>> +					&page_flags);
>> +		if (status < 0)
>> +			return -1;
>> +
>> +		/* skip non present vaddr */
>> +		if (status == 1) {
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* all order-0 pages with possible false postive (non folio) */
>
> Do we still false positive case? Non-present page returns 1, which is handled
> above.

Any order-0 non folio will be counted, like GFP_KERNEL pages.

>
>> +		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>> +			orders[0]++;
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* skip non thp compound pages */
>> +		if (!(page_flags & KPF_THP)) {
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* vpn points to part of a THP at this point */
>> +		if (page_flags & KPF_COMPOUND_HEAD)
>> +			cur_order = 1;
>> +		else {
>> +			/* not a head nor a tail in a THP? */
>> +			if (!(page_flags & KPF_COMPOUND_TAIL))
>> +				return -1;
>
> When reaches here, we know (page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL)).
> So we have at least one of it set.
>
> Looks not possible to hit it?

Will remove it.
>
>> +
>> +			vaddr += psize();
>> +			continue;
>
> 1)
>
> In case vaddr points to the middle of a large folio, this will skip this folio
> and count from next one.
>
>> +		}
>> +
>> +		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
>> +
>> +		if (next_folio_vaddr >= vaddr_start + len)
>> +			break;
>> +
>> +		while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
>> +						 kpageflags_fd,
>> +						 &page_flags)) >= 0) {
>> +			/*
>> +			 * non present vaddr, next compound head page, or
>> +			 * order-0 page
>> +			 */
>> +			if (status == 1 ||
>> +			    (page_flags & KPF_COMPOUND_HEAD) ||
>> +			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>> +				if (cur_order < nr_orders) {
>> +					orders[cur_order]++;
>> +					cur_order = -1;
>> +					vaddr = next_folio_vaddr;
>> +				}
>> +				break;
>> +			}
>> +
>> +			/* not a head nor a tail in a THP? */
>> +			if (!(page_flags & KPF_COMPOUND_TAIL))
>> +				return -1;
>> +
>> +			cur_order++;
>> +			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
>
> 2)
>
> If (vaddr_start + len) points to the middle of a large folio and folio is more
> than order 1 size, we may continue the loop and still count this last folio.
> Because we don't check next_folio_vaddr and (vaddr_start + len).
>
> A simple chart of these case.
>
>           vaddr_start                   +     len
>                |                               |
>                v                               v
>      +---------------------+              +-----------------+
>      |folio 1              |              |folio 2          |
>      +---------------------+              +-----------------+
>
> folio 1 is not counted, but folio 2 is counted.
>
> So at 1) and 2) handles the boundary differently. Not sure this is designed
> behavior. If so I think it would be better to record in document, otherwise
> the behavior is not obvious to user.

Will document it.

>
>> +		}
>> +
>> +		if (status < 0)
>> +			return status;
>> +	}
>> +	if (cur_order > 0 && cur_order < nr_orders)
>> +		orders[cur_order]++;
>
> Another boundary case here.
>
> If we come here because (next_folio_vaddr >= vaddr_start + len) in the for
> loop instead of the while loop. This means we found the folio head at vaddr,
> but the left range (vaddr_start + len - vaddr) is less than or equal to order
> 1 page size.
>
> But we haven't detected the real end of this folio. If this folio is more than
> order 1 size, we still count it an order 1 folio.

Yes. Will document it.

Thanks for the review.

>
>> +	return 0;
>> +}
>> +
>> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
>> +			int kpageflags_fd, int orders[], int nr_orders)
>> +{
>> +	int *vaddr_orders;
>> +	int status;
>> +	int i;
>> +
>> +	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
>> +
>> +	if (!vaddr_orders)
>> +		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
>> +
>> +	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
>> +	status = gather_folio_orders(vaddr_start, len, pagemap_fd,
>> +				     kpageflags_fd, vaddr_orders, nr_orders);
>> +	if (status)
>> +		goto out;
>> +
>> +	status = 0;
>> +	for (i = 0; i < nr_orders; i++)
>> +		if (vaddr_orders[i] != orders[i]) {
>> +			ksft_print_msg("order %d: expected: %d got %d\n", i,
>> +				       orders[i], vaddr_orders[i]);
>> +			status = -1;
>> +		}
>> +
>> +out:
>> +	free(vaddr_orders);
>> +	return status;
>> +}
>
> -- 
> Wei Yang
> Help you, Help me


Best Regards,
Yan, Zi
Re: [PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.
Posted by wang lian 1 month, 3 weeks ago

> On Aug 12, 2025, at 23:55, Zi Yan <ziy@nvidia.com> wrote:
> 
> The helper gathers an folio order statistics of folios within a virtual
> address range and checks it against a given order list. It aims to provide
> a more precise folio order check instead of just checking the existence of
> PMD folios.
> 
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> .../selftests/mm/split_huge_page_test.c       |   4 +-
> tools/testing/selftests/mm/vm_util.c          | 173 ++++++++++++++++++
> tools/testing/selftests/mm/vm_util.h          |   7 +
> 3 files changed, 181 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
> index 5d07b0b89226..63ac82f0b9e0 100644
> --- a/tools/testing/selftests/mm/split_huge_page_test.c
> +++ b/tools/testing/selftests/mm/split_huge_page_test.c
> @@ -34,8 +34,6 @@ uint64_t pmd_pagesize;
> #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
> #define PATH_FMT "%s,0x%lx,0x%lx,%d"
> 
> -#define PFN_MASK     ((1UL<<55)-1)
> -#define KPF_THP      (1UL<<22)
> #define GET_ORDER(nr_pages)    (31 - __builtin_clz(nr_pages))
> 
> int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
> @@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
> 
> 		if (kpageflags_file) {
> 			pread(kpageflags_file, &page_flags, sizeof(page_flags),
> -				(paddr & PFN_MASK) * sizeof(page_flags));
> +				PAGEMAP_PFN(paddr) * sizeof(page_flags));
> 
> 			return !!(page_flags & KPF_THP);
> 		}
> diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
> index 6a239aa413e2..4d952d1bc96d 100644
> --- a/tools/testing/selftests/mm/vm_util.c
> +++ b/tools/testing/selftests/mm/vm_util.c
> @@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max)
> 	return count;
> }
> 
> +static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
> +{
> +	size_t count;
> +
> +	count = pread(kpageflags_fd, flags, sizeof(*flags),
> +		      pfn * sizeof(*flags));
> +
> +	if (count != sizeof(*flags))
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd,
> +			  uint64_t *flags)
> +{
> +	unsigned long pfn;
> +
> +	pfn = pagemap_get_pfn(pagemap_fd, vaddr);
> +	/*
> +	 * Treat non-present page as a page without any flag, so that
> +	 * gather_folio_orders() just record the current folio order.
> +	 */
> +	if (pfn == -1UL) {
> +		*flags = 0;
> +		return 1;
> +	}
> +
> +	if (get_pfn_flags(pfn, kpageflags_fd, flags))
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/*
> + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
> + * @vaddr_start: start vaddr
> + * @len: range length
> + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
> + * @kpageflags_fd: file descriptor to /proc/kpageflags
> + * @orders: output folio order array
> + * @nr_orders: folio order array size
> + *
> + * gather_folio_orders() scan through [vaddr_start, len) and check all folios
> + * within the range and record their orders. All order-0 pages will be recorded.
> + * Non-present vaddr is skipped.
> + *
> + *
> + * Return: 0 - no error, -1 - unhandled cases
> + */
> +static int gather_folio_orders(char *vaddr_start, size_t len,
> +			       int pagemap_fd, int kpageflags_fd,
> +			       int orders[], int nr_orders)
> +{
> +	uint64_t page_flags = 0;
> +	int cur_order = -1;
> +	char *vaddr;
> +
> +	if (!pagemap_fd || !kpageflags_fd)
> +		return -1;
> +	if (nr_orders <= 0)
> +		return -1;
> +
> +	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
> +		char *next_folio_vaddr;
> +		int status;
> +
> +		status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
> +					&page_flags);
> +		if (status < 0)
> +			return -1;
> +
> +		/* skip non present vaddr */
> +		if (status == 1) {
> +			vaddr += psize();
> +			continue;
> +		}
> +
> +		/* all order-0 pages with possible false postive (non folio) */
> +		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
> +			orders[0]++;
> +			vaddr += psize();
> +			continue;
> +		}
> +
> +		/* skip non thp compound pages */
> +		if (!(page_flags & KPF_THP)) {
> +			vaddr += psize();
> +			continue;
> +		}
> +
> +		/* vpn points to part of a THP at this point */
> +		if (page_flags & KPF_COMPOUND_HEAD)
> +			cur_order = 1;
> +		else {
> +			/* not a head nor a tail in a THP? */
> +			if (!(page_flags & KPF_COMPOUND_TAIL))
> +				return -1;
> +
> +			vaddr += psize();
> +			continue;
> +		}
> +
> +		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
> +
> +		if (next_folio_vaddr >= vaddr_start + len)
> +			break;
> +
> +		while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
> +						 kpageflags_fd,
> +						 &page_flags)) >= 0) {
> +			/*
> +			 * non present vaddr, next compound head page, or
> +			 * order-0 page
> +			 */
> +			if (status == 1 ||
> +			    (page_flags & KPF_COMPOUND_HEAD) ||
> +			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
> +				if (cur_order < nr_orders) {
> +					orders[cur_order]++;
> +					cur_order = -1;
> +					vaddr = next_folio_vaddr;
> +				}
> +				break;
> +			}
> +
> +			/* not a head nor a tail in a THP? */
> +			if (!(page_flags & KPF_COMPOUND_TAIL))
> +				return -1;
> +
> +			cur_order++;
> +			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
> +		}
> +
> +		if (status < 0)
> +			return status;
> +	}
> +	if (cur_order > 0 && cur_order < nr_orders)
> +		orders[cur_order]++;
> +	return 0;
> +}
> +
> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
> +			int kpageflags_fd, int orders[], int nr_orders)
> +{
> +	int *vaddr_orders;
> +	int status;
> +	int i;
> +
> +	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
> +
> +	if (!vaddr_orders)
> +		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
> +
> +	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
> +	status = gather_folio_orders(vaddr_start, len, pagemap_fd,
> +				     kpageflags_fd, vaddr_orders, nr_orders);
> +	if (status)
> +		goto out;
> +
> +	status = 0;

Nit.
It seems redundant.
Would you consider removing it for a bit more conciseness?
This doesn't block my approval, of course.
Reviewed-by: wang lian <lianux.mm@gmail.com>

> +	for (i = 0; i < nr_orders; i++)
> +		if (vaddr_orders[i] != orders[i]) {
> +			ksft_print_msg("order %d: expected: %d got %d\n", i,
> +				       orders[i], vaddr_orders[i]);
> +			status = -1;
> +		}
> +
> +out:
> +	free(vaddr_orders);
> +	return status;
> +}
> +
> /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
> int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
> 			      bool miss, bool wp, bool minor, uint64_t *ioctls)
> diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
> index 1843ad48d32b..02e3f1e7065b 100644
> --- a/tools/testing/selftests/mm/vm_util.h
> +++ b/tools/testing/selftests/mm/vm_util.h
> @@ -18,6 +18,11 @@
> #define PM_SWAP                       BIT_ULL(62)
> #define PM_PRESENT                    BIT_ULL(63)
> 
> +#define KPF_COMPOUND_HEAD             BIT_ULL(15)
> +#define KPF_COMPOUND_TAIL             BIT_ULL(16)
> +#define KPF_THP                       BIT_ULL(22)
> +
> +
> /*
>  * Ignore the checkpatch warning, we must read from x but don't want to do
>  * anything with it in order to trigger a read page fault. We therefore must use
> @@ -85,6 +90,8 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
> int64_t allocate_transhuge(void *ptr, int pagemap_fd);
> unsigned long default_huge_page_size(void);
> int detect_hugetlb_page_sizes(size_t sizes[], int max);
> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_file,
> +			int kpageflags_file, int orders[], int nr_orders);
> 
> int uffd_register(int uffd, void *addr, uint64_t len,
> 		  bool miss, bool wp, bool minor);
> -- 
> 2.47.2
> 

Best regards,
wang lian
Re: [PATCH v3 2/4] selftests/mm: add check_folio_orders() helper.
Posted by Zi Yan 1 month, 3 weeks ago
On 12 Aug 2025, at 23:38, wang lian wrote:

>> On Aug 12, 2025, at 23:55, Zi Yan <ziy@nvidia.com> wrote:
>>
>> The helper gathers an folio order statistics of folios within a virtual
>> address range and checks it against a given order list. It aims to provide
>> a more precise folio order check instead of just checking the existence of
>> PMD folios.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> .../selftests/mm/split_huge_page_test.c       |   4 +-
>> tools/testing/selftests/mm/vm_util.c          | 173 ++++++++++++++++++
>> tools/testing/selftests/mm/vm_util.h          |   7 +
>> 3 files changed, 181 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
>> index 5d07b0b89226..63ac82f0b9e0 100644
>> --- a/tools/testing/selftests/mm/split_huge_page_test.c
>> +++ b/tools/testing/selftests/mm/split_huge_page_test.c
>> @@ -34,8 +34,6 @@ uint64_t pmd_pagesize;
>> #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
>> #define PATH_FMT "%s,0x%lx,0x%lx,%d"
>>
>> -#define PFN_MASK     ((1UL<<55)-1)
>> -#define KPF_THP      (1UL<<22)
>> #define GET_ORDER(nr_pages)    (31 - __builtin_clz(nr_pages))
>>
>> int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
>> @@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
>>
>> 		if (kpageflags_file) {
>> 			pread(kpageflags_file, &page_flags, sizeof(page_flags),
>> -				(paddr & PFN_MASK) * sizeof(page_flags));
>> +				PAGEMAP_PFN(paddr) * sizeof(page_flags));
>>
>> 			return !!(page_flags & KPF_THP);
>> 		}
>> diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
>> index 6a239aa413e2..4d952d1bc96d 100644
>> --- a/tools/testing/selftests/mm/vm_util.c
>> +++ b/tools/testing/selftests/mm/vm_util.c
>> @@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max)
>> 	return count;
>> }
>>
>> +static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
>> +{
>> +	size_t count;
>> +
>> +	count = pread(kpageflags_fd, flags, sizeof(*flags),
>> +		      pfn * sizeof(*flags));
>> +
>> +	if (count != sizeof(*flags))
>> +		return -1;
>> +
>> +	return 0;
>> +}
>> +
>> +static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd,
>> +			  uint64_t *flags)
>> +{
>> +	unsigned long pfn;
>> +
>> +	pfn = pagemap_get_pfn(pagemap_fd, vaddr);
>> +	/*
>> +	 * Treat non-present page as a page without any flag, so that
>> +	 * gather_folio_orders() just record the current folio order.
>> +	 */
>> +	if (pfn == -1UL) {
>> +		*flags = 0;
>> +		return 1;
>> +	}
>> +
>> +	if (get_pfn_flags(pfn, kpageflags_fd, flags))
>> +		return -1;
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
>> + * @vaddr_start: start vaddr
>> + * @len: range length
>> + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
>> + * @kpageflags_fd: file descriptor to /proc/kpageflags
>> + * @orders: output folio order array
>> + * @nr_orders: folio order array size
>> + *
>> + * gather_folio_orders() scan through [vaddr_start, len) and check all folios
>> + * within the range and record their orders. All order-0 pages will be recorded.
>> + * Non-present vaddr is skipped.
>> + *
>> + *
>> + * Return: 0 - no error, -1 - unhandled cases
>> + */
>> +static int gather_folio_orders(char *vaddr_start, size_t len,
>> +			       int pagemap_fd, int kpageflags_fd,
>> +			       int orders[], int nr_orders)
>> +{
>> +	uint64_t page_flags = 0;
>> +	int cur_order = -1;
>> +	char *vaddr;
>> +
>> +	if (!pagemap_fd || !kpageflags_fd)
>> +		return -1;
>> +	if (nr_orders <= 0)
>> +		return -1;
>> +
>> +	for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
>> +		char *next_folio_vaddr;
>> +		int status;
>> +
>> +		status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
>> +					&page_flags);
>> +		if (status < 0)
>> +			return -1;
>> +
>> +		/* skip non present vaddr */
>> +		if (status == 1) {
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* all order-0 pages with possible false postive (non folio) */
>> +		if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>> +			orders[0]++;
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* skip non thp compound pages */
>> +		if (!(page_flags & KPF_THP)) {
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		/* vpn points to part of a THP at this point */
>> +		if (page_flags & KPF_COMPOUND_HEAD)
>> +			cur_order = 1;
>> +		else {
>> +			/* not a head nor a tail in a THP? */
>> +			if (!(page_flags & KPF_COMPOUND_TAIL))
>> +				return -1;
>> +
>> +			vaddr += psize();
>> +			continue;
>> +		}
>> +
>> +		next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
>> +
>> +		if (next_folio_vaddr >= vaddr_start + len)
>> +			break;
>> +
>> +		while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
>> +						 kpageflags_fd,
>> +						 &page_flags)) >= 0) {
>> +			/*
>> +			 * non present vaddr, next compound head page, or
>> +			 * order-0 page
>> +			 */
>> +			if (status == 1 ||
>> +			    (page_flags & KPF_COMPOUND_HEAD) ||
>> +			    !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
>> +				if (cur_order < nr_orders) {
>> +					orders[cur_order]++;
>> +					cur_order = -1;
>> +					vaddr = next_folio_vaddr;
>> +				}
>> +				break;
>> +			}
>> +
>> +			/* not a head nor a tail in a THP? */
>> +			if (!(page_flags & KPF_COMPOUND_TAIL))
>> +				return -1;
>> +
>> +			cur_order++;
>> +			next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
>> +		}
>> +
>> +		if (status < 0)
>> +			return status;
>> +	}
>> +	if (cur_order > 0 && cur_order < nr_orders)
>> +		orders[cur_order]++;
>> +	return 0;
>> +}
>> +
>> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
>> +			int kpageflags_fd, int orders[], int nr_orders)
>> +{
>> +	int *vaddr_orders;
>> +	int status;
>> +	int i;
>> +
>> +	vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
>> +
>> +	if (!vaddr_orders)
>> +		ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
>> +
>> +	memset(vaddr_orders, 0, sizeof(int) * nr_orders);
>> +	status = gather_folio_orders(vaddr_start, len, pagemap_fd,
>> +				     kpageflags_fd, vaddr_orders, nr_orders);
>> +	if (status)
>> +		goto out;
>> +
>> +	status = 0;
>
> Nit.
> It seems redundant.
> Would you consider removing it for a bit more conciseness?
> This doesn't block my approval, of course.
> Reviewed-by: wang lian <lianux.mm@gmail.com>

Sure. Thanks.

Best Regards,
Yan, Zi