The helper gathers an folio order statistics of folios within a virtual
address range and checks it against a given order list. It aims to provide
a more precise folio order check instead of just checking the existence of
PMD folios.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
.../selftests/mm/split_huge_page_test.c | 4 +-
tools/testing/selftests/mm/vm_util.c | 173 ++++++++++++++++++
tools/testing/selftests/mm/vm_util.h | 7 +
3 files changed, 181 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 5d07b0b89226..63ac82f0b9e0 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -34,8 +34,6 @@ uint64_t pmd_pagesize;
#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
#define PATH_FMT "%s,0x%lx,0x%lx,%d"
-#define PFN_MASK ((1UL<<55)-1)
-#define KPF_THP (1UL<<22)
#define GET_ORDER(nr_pages) (31 - __builtin_clz(nr_pages))
int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
@@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
if (kpageflags_file) {
pread(kpageflags_file, &page_flags, sizeof(page_flags),
- (paddr & PFN_MASK) * sizeof(page_flags));
+ PAGEMAP_PFN(paddr) * sizeof(page_flags));
return !!(page_flags & KPF_THP);
}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 6a239aa413e2..4d952d1bc96d 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max)
return count;
}
+static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
+{
+ size_t count;
+
+ count = pread(kpageflags_fd, flags, sizeof(*flags),
+ pfn * sizeof(*flags));
+
+ if (count != sizeof(*flags))
+ return -1;
+
+ return 0;
+}
+
+static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd,
+ uint64_t *flags)
+{
+ unsigned long pfn;
+
+ pfn = pagemap_get_pfn(pagemap_fd, vaddr);
+ /*
+ * Treat non-present page as a page without any flag, so that
+ * gather_folio_orders() just record the current folio order.
+ */
+ if (pfn == -1UL) {
+ *flags = 0;
+ return 1;
+ }
+
+ if (get_pfn_flags(pfn, kpageflags_fd, flags))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * gather_folio_orders - scan through [vaddr_start, len) and record folio orders
+ * @vaddr_start: start vaddr
+ * @len: range length
+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
+ * @kpageflags_fd: file descriptor to /proc/kpageflags
+ * @orders: output folio order array
+ * @nr_orders: folio order array size
+ *
+ * gather_folio_orders() scan through [vaddr_start, len) and check all folios
+ * within the range and record their orders. All order-0 pages will be recorded.
+ * Non-present vaddr is skipped.
+ *
+ *
+ * Return: 0 - no error, -1 - unhandled cases
+ */
+static int gather_folio_orders(char *vaddr_start, size_t len,
+ int pagemap_fd, int kpageflags_fd,
+ int orders[], int nr_orders)
+{
+ uint64_t page_flags = 0;
+ int cur_order = -1;
+ char *vaddr;
+
+ if (!pagemap_fd || !kpageflags_fd)
+ return -1;
+ if (nr_orders <= 0)
+ return -1;
+
+ for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
+ char *next_folio_vaddr;
+ int status;
+
+ status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd,
+ &page_flags);
+ if (status < 0)
+ return -1;
+
+ /* skip non present vaddr */
+ if (status == 1) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* all order-0 pages with possible false postive (non folio) */
+ if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ orders[0]++;
+ vaddr += psize();
+ continue;
+ }
+
+ /* skip non thp compound pages */
+ if (!(page_flags & KPF_THP)) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* vpn points to part of a THP at this point */
+ if (page_flags & KPF_COMPOUND_HEAD)
+ cur_order = 1;
+ else {
+ /* not a head nor a tail in a THP? */
+ if (!(page_flags & KPF_COMPOUND_TAIL))
+ return -1;
+
+ vaddr += psize();
+ continue;
+ }
+
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+
+ if (next_folio_vaddr >= vaddr_start + len)
+ break;
+
+ while ((status = get_page_flags(next_folio_vaddr, pagemap_fd,
+ kpageflags_fd,
+ &page_flags)) >= 0) {
+ /*
+ * non present vaddr, next compound head page, or
+ * order-0 page
+ */
+ if (status == 1 ||
+ (page_flags & KPF_COMPOUND_HEAD) ||
+ !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ if (cur_order < nr_orders) {
+ orders[cur_order]++;
+ cur_order = -1;
+ vaddr = next_folio_vaddr;
+ }
+ break;
+ }
+
+ /* not a head nor a tail in a THP? */
+ if (!(page_flags & KPF_COMPOUND_TAIL))
+ return -1;
+
+ cur_order++;
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+ }
+
+ if (status < 0)
+ return status;
+ }
+ if (cur_order > 0 && cur_order < nr_orders)
+ orders[cur_order]++;
+ return 0;
+}
+
+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd,
+ int kpageflags_fd, int orders[], int nr_orders)
+{
+ int *vaddr_orders;
+ int status;
+ int i;
+
+ vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
+
+ if (!vaddr_orders)
+ ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
+
+ memset(vaddr_orders, 0, sizeof(int) * nr_orders);
+ status = gather_folio_orders(vaddr_start, len, pagemap_fd,
+ kpageflags_fd, vaddr_orders, nr_orders);
+ if (status)
+ goto out;
+
+ status = 0;
+ for (i = 0; i < nr_orders; i++)
+ if (vaddr_orders[i] != orders[i]) {
+ ksft_print_msg("order %d: expected: %d got %d\n", i,
+ orders[i], vaddr_orders[i]);
+ status = -1;
+ }
+
+out:
+ free(vaddr_orders);
+ return status;
+}
+
/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls)
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 1843ad48d32b..02e3f1e7065b 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -18,6 +18,11 @@
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
+#define KPF_COMPOUND_HEAD BIT_ULL(15)
+#define KPF_COMPOUND_TAIL BIT_ULL(16)
+#define KPF_THP BIT_ULL(22)
+
+
/*
* Ignore the checkpatch warning, we must read from x but don't want to do
* anything with it in order to trigger a read page fault. We therefore must use
@@ -85,6 +90,8 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
int64_t allocate_transhuge(void *ptr, int pagemap_fd);
unsigned long default_huge_page_size(void);
int detect_hugetlb_page_sizes(size_t sizes[], int max);
+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_file,
+ int kpageflags_file, int orders[], int nr_orders);
int uffd_register(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor);
--
2.47.2
On Tue, Aug 12, 2025 at 11:55:10AM -0400, Zi Yan wrote: [...] >+/* >+ * gather_folio_orders - scan through [vaddr_start, len) and record folio orders >+ * @vaddr_start: start vaddr >+ * @len: range length >+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap >+ * @kpageflags_fd: file descriptor to /proc/kpageflags >+ * @orders: output folio order array >+ * @nr_orders: folio order array size >+ * >+ * gather_folio_orders() scan through [vaddr_start, len) and check all folios >+ * within the range and record their orders. All order-0 pages will be recorded. I feel a little confused about the description here. Especially on the behavior when the range is not aligned on folio boundary. See following code at 1) and 2). >+ * Non-present vaddr is skipped. >+ * >+ * >+ * Return: 0 - no error, -1 - unhandled cases >+ */ >+static int gather_folio_orders(char *vaddr_start, size_t len, >+ int pagemap_fd, int kpageflags_fd, >+ int orders[], int nr_orders) >+{ >+ uint64_t page_flags = 0; >+ int cur_order = -1; >+ char *vaddr; >+ >+ if (!pagemap_fd || !kpageflags_fd) >+ return -1; If my understanding is correct, we use open() to get a file descriptor. On error it returns -1. And 0 is a possible valid value, but usually used by stdin. The code may work in most cases, but seems not right. >+ if (nr_orders <= 0) >+ return -1; >+ Maybe we want to check orders[] here too? >+ for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { >+ char *next_folio_vaddr; >+ int status; >+ >+ status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd, >+ &page_flags); >+ if (status < 0) >+ return -1; >+ >+ /* skip non present vaddr */ >+ if (status == 1) { >+ vaddr += psize(); >+ continue; >+ } >+ >+ /* all order-0 pages with possible false postive (non folio) */ Do we still false positive case? Non-present page returns 1, which is handled above. >+ if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >+ orders[0]++; >+ vaddr += psize(); >+ continue; >+ } >+ >+ /* skip non thp compound pages */ >+ if (!(page_flags & KPF_THP)) { >+ vaddr += psize(); >+ continue; >+ } >+ >+ /* vpn points to part of a THP at this point */ >+ if (page_flags & KPF_COMPOUND_HEAD) >+ cur_order = 1; >+ else { >+ /* not a head nor a tail in a THP? */ >+ if (!(page_flags & KPF_COMPOUND_TAIL)) >+ return -1; When reaches here, we know (page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL)). So we have at least one of it set. Looks not possible to hit it? >+ >+ vaddr += psize(); >+ continue; 1) In case vaddr points to the middle of a large folio, this will skip this folio and count from next one. >+ } >+ >+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); >+ >+ if (next_folio_vaddr >= vaddr_start + len) >+ break; >+ >+ while ((status = get_page_flags(next_folio_vaddr, pagemap_fd, >+ kpageflags_fd, >+ &page_flags)) >= 0) { >+ /* >+ * non present vaddr, next compound head page, or >+ * order-0 page >+ */ >+ if (status == 1 || >+ (page_flags & KPF_COMPOUND_HEAD) || >+ !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >+ if (cur_order < nr_orders) { >+ orders[cur_order]++; >+ cur_order = -1; >+ vaddr = next_folio_vaddr; >+ } >+ break; >+ } >+ >+ /* not a head nor a tail in a THP? */ >+ if (!(page_flags & KPF_COMPOUND_TAIL)) >+ return -1; >+ >+ cur_order++; >+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); 2) If (vaddr_start + len) points to the middle of a large folio and folio is more than order 1 size, we may continue the loop and still count this last folio. Because we don't check next_folio_vaddr and (vaddr_start + len). A simple chart of these case. vaddr_start + len | | v v +---------------------+ +-----------------+ |folio 1 | |folio 2 | +---------------------+ +-----------------+ folio 1 is not counted, but folio 2 is counted. So at 1) and 2) handles the boundary differently. Not sure this is designed behavior. If so I think it would be better to record in document, otherwise the behavior is not obvious to user. >+ } >+ >+ if (status < 0) >+ return status; >+ } >+ if (cur_order > 0 && cur_order < nr_orders) >+ orders[cur_order]++; Another boundary case here. If we come here because (next_folio_vaddr >= vaddr_start + len) in the for loop instead of the while loop. This means we found the folio head at vaddr, but the left range (vaddr_start + len - vaddr) is less than or equal to order 1 page size. But we haven't detected the real end of this folio. If this folio is more than order 1 size, we still count it an order 1 folio. >+ return 0; >+} >+ >+int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd, >+ int kpageflags_fd, int orders[], int nr_orders) >+{ >+ int *vaddr_orders; >+ int status; >+ int i; >+ >+ vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); >+ >+ if (!vaddr_orders) >+ ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); >+ >+ memset(vaddr_orders, 0, sizeof(int) * nr_orders); >+ status = gather_folio_orders(vaddr_start, len, pagemap_fd, >+ kpageflags_fd, vaddr_orders, nr_orders); >+ if (status) >+ goto out; >+ >+ status = 0; >+ for (i = 0; i < nr_orders; i++) >+ if (vaddr_orders[i] != orders[i]) { >+ ksft_print_msg("order %d: expected: %d got %d\n", i, >+ orders[i], vaddr_orders[i]); >+ status = -1; >+ } >+ >+out: >+ free(vaddr_orders); >+ return status; >+} -- Wei Yang Help you, Help me
On 13 Aug 2025, at 17:12, Wei Yang wrote: > On Tue, Aug 12, 2025 at 11:55:10AM -0400, Zi Yan wrote: > [...] >> +/* >> + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders >> + * @vaddr_start: start vaddr >> + * @len: range length >> + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap >> + * @kpageflags_fd: file descriptor to /proc/kpageflags >> + * @orders: output folio order array >> + * @nr_orders: folio order array size >> + * >> + * gather_folio_orders() scan through [vaddr_start, len) and check all folios >> + * within the range and record their orders. All order-0 pages will be recorded. > > I feel a little confused about the description here. Especially on the > behavior when the range is not aligned on folio boundary. I was too ambitious on this function. It is intended to just check after split folio orders. I will move the function to split_huge_page_test.c and rename it to gather_after_split_folio_orders() and check_after_split_folio_orders(). > > See following code at 1) and 2). > >> + * Non-present vaddr is skipped. >> + * >> + * >> + * Return: 0 - no error, -1 - unhandled cases >> + */ >> +static int gather_folio_orders(char *vaddr_start, size_t len, >> + int pagemap_fd, int kpageflags_fd, >> + int orders[], int nr_orders) >> +{ >> + uint64_t page_flags = 0; >> + int cur_order = -1; >> + char *vaddr; >> + >> + if (!pagemap_fd || !kpageflags_fd) >> + return -1; > > If my understanding is correct, we use open() to get a file descriptor. > > On error it returns -1. And 0 is a possible valid value, but usually used by > stdin. The code may work in most cases, but seems not right. Will fix it to if (pagemap_fd == -1 || kpageflags_fd == -1) > >> + if (nr_orders <= 0) >> + return -1; >> + > > Maybe we want to check orders[] here too? > >> + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { >> + char *next_folio_vaddr; >> + int status; >> + >> + status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd, >> + &page_flags); >> + if (status < 0) >> + return -1; >> + >> + /* skip non present vaddr */ >> + if (status == 1) { >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* all order-0 pages with possible false postive (non folio) */ > > Do we still false positive case? Non-present page returns 1, which is handled > above. Any order-0 non folio will be counted, like GFP_KERNEL pages. > >> + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >> + orders[0]++; >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* skip non thp compound pages */ >> + if (!(page_flags & KPF_THP)) { >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* vpn points to part of a THP at this point */ >> + if (page_flags & KPF_COMPOUND_HEAD) >> + cur_order = 1; >> + else { >> + /* not a head nor a tail in a THP? */ >> + if (!(page_flags & KPF_COMPOUND_TAIL)) >> + return -1; > > When reaches here, we know (page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL)). > So we have at least one of it set. > > Looks not possible to hit it? Will remove it. > >> + >> + vaddr += psize(); >> + continue; > > 1) > > In case vaddr points to the middle of a large folio, this will skip this folio > and count from next one. > >> + } >> + >> + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); >> + >> + if (next_folio_vaddr >= vaddr_start + len) >> + break; >> + >> + while ((status = get_page_flags(next_folio_vaddr, pagemap_fd, >> + kpageflags_fd, >> + &page_flags)) >= 0) { >> + /* >> + * non present vaddr, next compound head page, or >> + * order-0 page >> + */ >> + if (status == 1 || >> + (page_flags & KPF_COMPOUND_HEAD) || >> + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >> + if (cur_order < nr_orders) { >> + orders[cur_order]++; >> + cur_order = -1; >> + vaddr = next_folio_vaddr; >> + } >> + break; >> + } >> + >> + /* not a head nor a tail in a THP? */ >> + if (!(page_flags & KPF_COMPOUND_TAIL)) >> + return -1; >> + >> + cur_order++; >> + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); > > 2) > > If (vaddr_start + len) points to the middle of a large folio and folio is more > than order 1 size, we may continue the loop and still count this last folio. > Because we don't check next_folio_vaddr and (vaddr_start + len). > > A simple chart of these case. > > vaddr_start + len > | | > v v > +---------------------+ +-----------------+ > |folio 1 | |folio 2 | > +---------------------+ +-----------------+ > > folio 1 is not counted, but folio 2 is counted. > > So at 1) and 2) handles the boundary differently. Not sure this is designed > behavior. If so I think it would be better to record in document, otherwise > the behavior is not obvious to user. Will document it. > >> + } >> + >> + if (status < 0) >> + return status; >> + } >> + if (cur_order > 0 && cur_order < nr_orders) >> + orders[cur_order]++; > > Another boundary case here. > > If we come here because (next_folio_vaddr >= vaddr_start + len) in the for > loop instead of the while loop. This means we found the folio head at vaddr, > but the left range (vaddr_start + len - vaddr) is less than or equal to order > 1 page size. > > But we haven't detected the real end of this folio. If this folio is more than > order 1 size, we still count it an order 1 folio. Yes. Will document it. Thanks for the review. > >> + return 0; >> +} >> + >> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd, >> + int kpageflags_fd, int orders[], int nr_orders) >> +{ >> + int *vaddr_orders; >> + int status; >> + int i; >> + >> + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); >> + >> + if (!vaddr_orders) >> + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); >> + >> + memset(vaddr_orders, 0, sizeof(int) * nr_orders); >> + status = gather_folio_orders(vaddr_start, len, pagemap_fd, >> + kpageflags_fd, vaddr_orders, nr_orders); >> + if (status) >> + goto out; >> + >> + status = 0; >> + for (i = 0; i < nr_orders; i++) >> + if (vaddr_orders[i] != orders[i]) { >> + ksft_print_msg("order %d: expected: %d got %d\n", i, >> + orders[i], vaddr_orders[i]); >> + status = -1; >> + } >> + >> +out: >> + free(vaddr_orders); >> + return status; >> +} > > -- > Wei Yang > Help you, Help me Best Regards, Yan, Zi
> On Aug 12, 2025, at 23:55, Zi Yan <ziy@nvidia.com> wrote: > > The helper gathers an folio order statistics of folios within a virtual > address range and checks it against a given order list. It aims to provide > a more precise folio order check instead of just checking the existence of > PMD folios. > > Signed-off-by: Zi Yan <ziy@nvidia.com> > --- > .../selftests/mm/split_huge_page_test.c | 4 +- > tools/testing/selftests/mm/vm_util.c | 173 ++++++++++++++++++ > tools/testing/selftests/mm/vm_util.h | 7 + > 3 files changed, 181 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c > index 5d07b0b89226..63ac82f0b9e0 100644 > --- a/tools/testing/selftests/mm/split_huge_page_test.c > +++ b/tools/testing/selftests/mm/split_huge_page_test.c > @@ -34,8 +34,6 @@ uint64_t pmd_pagesize; > #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" > #define PATH_FMT "%s,0x%lx,0x%lx,%d" > > -#define PFN_MASK ((1UL<<55)-1) > -#define KPF_THP (1UL<<22) > #define GET_ORDER(nr_pages) (31 - __builtin_clz(nr_pages)) > > int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) > @@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) > > if (kpageflags_file) { > pread(kpageflags_file, &page_flags, sizeof(page_flags), > - (paddr & PFN_MASK) * sizeof(page_flags)); > + PAGEMAP_PFN(paddr) * sizeof(page_flags)); > > return !!(page_flags & KPF_THP); > } > diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c > index 6a239aa413e2..4d952d1bc96d 100644 > --- a/tools/testing/selftests/mm/vm_util.c > +++ b/tools/testing/selftests/mm/vm_util.c > @@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max) > return count; > } > > +static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags) > +{ > + size_t count; > + > + count = pread(kpageflags_fd, flags, sizeof(*flags), > + pfn * sizeof(*flags)); > + > + if (count != sizeof(*flags)) > + return -1; > + > + return 0; > +} > + > +static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd, > + uint64_t *flags) > +{ > + unsigned long pfn; > + > + pfn = pagemap_get_pfn(pagemap_fd, vaddr); > + /* > + * Treat non-present page as a page without any flag, so that > + * gather_folio_orders() just record the current folio order. > + */ > + if (pfn == -1UL) { > + *flags = 0; > + return 1; > + } > + > + if (get_pfn_flags(pfn, kpageflags_fd, flags)) > + return -1; > + > + return 0; > +} > + > +/* > + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders > + * @vaddr_start: start vaddr > + * @len: range length > + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap > + * @kpageflags_fd: file descriptor to /proc/kpageflags > + * @orders: output folio order array > + * @nr_orders: folio order array size > + * > + * gather_folio_orders() scan through [vaddr_start, len) and check all folios > + * within the range and record their orders. All order-0 pages will be recorded. > + * Non-present vaddr is skipped. > + * > + * > + * Return: 0 - no error, -1 - unhandled cases > + */ > +static int gather_folio_orders(char *vaddr_start, size_t len, > + int pagemap_fd, int kpageflags_fd, > + int orders[], int nr_orders) > +{ > + uint64_t page_flags = 0; > + int cur_order = -1; > + char *vaddr; > + > + if (!pagemap_fd || !kpageflags_fd) > + return -1; > + if (nr_orders <= 0) > + return -1; > + > + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { > + char *next_folio_vaddr; > + int status; > + > + status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd, > + &page_flags); > + if (status < 0) > + return -1; > + > + /* skip non present vaddr */ > + if (status == 1) { > + vaddr += psize(); > + continue; > + } > + > + /* all order-0 pages with possible false postive (non folio) */ > + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { > + orders[0]++; > + vaddr += psize(); > + continue; > + } > + > + /* skip non thp compound pages */ > + if (!(page_flags & KPF_THP)) { > + vaddr += psize(); > + continue; > + } > + > + /* vpn points to part of a THP at this point */ > + if (page_flags & KPF_COMPOUND_HEAD) > + cur_order = 1; > + else { > + /* not a head nor a tail in a THP? */ > + if (!(page_flags & KPF_COMPOUND_TAIL)) > + return -1; > + > + vaddr += psize(); > + continue; > + } > + > + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); > + > + if (next_folio_vaddr >= vaddr_start + len) > + break; > + > + while ((status = get_page_flags(next_folio_vaddr, pagemap_fd, > + kpageflags_fd, > + &page_flags)) >= 0) { > + /* > + * non present vaddr, next compound head page, or > + * order-0 page > + */ > + if (status == 1 || > + (page_flags & KPF_COMPOUND_HEAD) || > + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { > + if (cur_order < nr_orders) { > + orders[cur_order]++; > + cur_order = -1; > + vaddr = next_folio_vaddr; > + } > + break; > + } > + > + /* not a head nor a tail in a THP? */ > + if (!(page_flags & KPF_COMPOUND_TAIL)) > + return -1; > + > + cur_order++; > + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); > + } > + > + if (status < 0) > + return status; > + } > + if (cur_order > 0 && cur_order < nr_orders) > + orders[cur_order]++; > + return 0; > +} > + > +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd, > + int kpageflags_fd, int orders[], int nr_orders) > +{ > + int *vaddr_orders; > + int status; > + int i; > + > + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); > + > + if (!vaddr_orders) > + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); > + > + memset(vaddr_orders, 0, sizeof(int) * nr_orders); > + status = gather_folio_orders(vaddr_start, len, pagemap_fd, > + kpageflags_fd, vaddr_orders, nr_orders); > + if (status) > + goto out; > + > + status = 0; Nit. It seems redundant. Would you consider removing it for a bit more conciseness? This doesn't block my approval, of course. Reviewed-by: wang lian <lianux.mm@gmail.com> > + for (i = 0; i < nr_orders; i++) > + if (vaddr_orders[i] != orders[i]) { > + ksft_print_msg("order %d: expected: %d got %d\n", i, > + orders[i], vaddr_orders[i]); > + status = -1; > + } > + > +out: > + free(vaddr_orders); > + return status; > +} > + > /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ > int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, > bool miss, bool wp, bool minor, uint64_t *ioctls) > diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h > index 1843ad48d32b..02e3f1e7065b 100644 > --- a/tools/testing/selftests/mm/vm_util.h > +++ b/tools/testing/selftests/mm/vm_util.h > @@ -18,6 +18,11 @@ > #define PM_SWAP BIT_ULL(62) > #define PM_PRESENT BIT_ULL(63) > > +#define KPF_COMPOUND_HEAD BIT_ULL(15) > +#define KPF_COMPOUND_TAIL BIT_ULL(16) > +#define KPF_THP BIT_ULL(22) > + > + > /* > * Ignore the checkpatch warning, we must read from x but don't want to do > * anything with it in order to trigger a read page fault. We therefore must use > @@ -85,6 +90,8 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); > int64_t allocate_transhuge(void *ptr, int pagemap_fd); > unsigned long default_huge_page_size(void); > int detect_hugetlb_page_sizes(size_t sizes[], int max); > +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_file, > + int kpageflags_file, int orders[], int nr_orders); > > int uffd_register(int uffd, void *addr, uint64_t len, > bool miss, bool wp, bool minor); > -- > 2.47.2 > Best regards, wang lian
On 12 Aug 2025, at 23:38, wang lian wrote: >> On Aug 12, 2025, at 23:55, Zi Yan <ziy@nvidia.com> wrote: >> >> The helper gathers an folio order statistics of folios within a virtual >> address range and checks it against a given order list. It aims to provide >> a more precise folio order check instead of just checking the existence of >> PMD folios. >> >> Signed-off-by: Zi Yan <ziy@nvidia.com> >> --- >> .../selftests/mm/split_huge_page_test.c | 4 +- >> tools/testing/selftests/mm/vm_util.c | 173 ++++++++++++++++++ >> tools/testing/selftests/mm/vm_util.h | 7 + >> 3 files changed, 181 insertions(+), 3 deletions(-) >> >> diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c >> index 5d07b0b89226..63ac82f0b9e0 100644 >> --- a/tools/testing/selftests/mm/split_huge_page_test.c >> +++ b/tools/testing/selftests/mm/split_huge_page_test.c >> @@ -34,8 +34,6 @@ uint64_t pmd_pagesize; >> #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" >> #define PATH_FMT "%s,0x%lx,0x%lx,%d" >> >> -#define PFN_MASK ((1UL<<55)-1) >> -#define KPF_THP (1UL<<22) >> #define GET_ORDER(nr_pages) (31 - __builtin_clz(nr_pages)) >> >> int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) >> @@ -49,7 +47,7 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) >> >> if (kpageflags_file) { >> pread(kpageflags_file, &page_flags, sizeof(page_flags), >> - (paddr & PFN_MASK) * sizeof(page_flags)); >> + PAGEMAP_PFN(paddr) * sizeof(page_flags)); >> >> return !!(page_flags & KPF_THP); >> } >> diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c >> index 6a239aa413e2..4d952d1bc96d 100644 >> --- a/tools/testing/selftests/mm/vm_util.c >> +++ b/tools/testing/selftests/mm/vm_util.c >> @@ -338,6 +338,179 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max) >> return count; >> } >> >> +static int get_pfn_flags(unsigned long pfn, int kpageflags_fd, uint64_t *flags) >> +{ >> + size_t count; >> + >> + count = pread(kpageflags_fd, flags, sizeof(*flags), >> + pfn * sizeof(*flags)); >> + >> + if (count != sizeof(*flags)) >> + return -1; >> + >> + return 0; >> +} >> + >> +static int get_page_flags(char *vaddr, int pagemap_fd, int kpageflags_fd, >> + uint64_t *flags) >> +{ >> + unsigned long pfn; >> + >> + pfn = pagemap_get_pfn(pagemap_fd, vaddr); >> + /* >> + * Treat non-present page as a page without any flag, so that >> + * gather_folio_orders() just record the current folio order. >> + */ >> + if (pfn == -1UL) { >> + *flags = 0; >> + return 1; >> + } >> + >> + if (get_pfn_flags(pfn, kpageflags_fd, flags)) >> + return -1; >> + >> + return 0; >> +} >> + >> +/* >> + * gather_folio_orders - scan through [vaddr_start, len) and record folio orders >> + * @vaddr_start: start vaddr >> + * @len: range length >> + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap >> + * @kpageflags_fd: file descriptor to /proc/kpageflags >> + * @orders: output folio order array >> + * @nr_orders: folio order array size >> + * >> + * gather_folio_orders() scan through [vaddr_start, len) and check all folios >> + * within the range and record their orders. All order-0 pages will be recorded. >> + * Non-present vaddr is skipped. >> + * >> + * >> + * Return: 0 - no error, -1 - unhandled cases >> + */ >> +static int gather_folio_orders(char *vaddr_start, size_t len, >> + int pagemap_fd, int kpageflags_fd, >> + int orders[], int nr_orders) >> +{ >> + uint64_t page_flags = 0; >> + int cur_order = -1; >> + char *vaddr; >> + >> + if (!pagemap_fd || !kpageflags_fd) >> + return -1; >> + if (nr_orders <= 0) >> + return -1; >> + >> + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { >> + char *next_folio_vaddr; >> + int status; >> + >> + status = get_page_flags(vaddr, pagemap_fd, kpageflags_fd, >> + &page_flags); >> + if (status < 0) >> + return -1; >> + >> + /* skip non present vaddr */ >> + if (status == 1) { >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* all order-0 pages with possible false postive (non folio) */ >> + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >> + orders[0]++; >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* skip non thp compound pages */ >> + if (!(page_flags & KPF_THP)) { >> + vaddr += psize(); >> + continue; >> + } >> + >> + /* vpn points to part of a THP at this point */ >> + if (page_flags & KPF_COMPOUND_HEAD) >> + cur_order = 1; >> + else { >> + /* not a head nor a tail in a THP? */ >> + if (!(page_flags & KPF_COMPOUND_TAIL)) >> + return -1; >> + >> + vaddr += psize(); >> + continue; >> + } >> + >> + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); >> + >> + if (next_folio_vaddr >= vaddr_start + len) >> + break; >> + >> + while ((status = get_page_flags(next_folio_vaddr, pagemap_fd, >> + kpageflags_fd, >> + &page_flags)) >= 0) { >> + /* >> + * non present vaddr, next compound head page, or >> + * order-0 page >> + */ >> + if (status == 1 || >> + (page_flags & KPF_COMPOUND_HEAD) || >> + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { >> + if (cur_order < nr_orders) { >> + orders[cur_order]++; >> + cur_order = -1; >> + vaddr = next_folio_vaddr; >> + } >> + break; >> + } >> + >> + /* not a head nor a tail in a THP? */ >> + if (!(page_flags & KPF_COMPOUND_TAIL)) >> + return -1; >> + >> + cur_order++; >> + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); >> + } >> + >> + if (status < 0) >> + return status; >> + } >> + if (cur_order > 0 && cur_order < nr_orders) >> + orders[cur_order]++; >> + return 0; >> +} >> + >> +int check_folio_orders(char *vaddr_start, size_t len, int pagemap_fd, >> + int kpageflags_fd, int orders[], int nr_orders) >> +{ >> + int *vaddr_orders; >> + int status; >> + int i; >> + >> + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); >> + >> + if (!vaddr_orders) >> + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); >> + >> + memset(vaddr_orders, 0, sizeof(int) * nr_orders); >> + status = gather_folio_orders(vaddr_start, len, pagemap_fd, >> + kpageflags_fd, vaddr_orders, nr_orders); >> + if (status) >> + goto out; >> + >> + status = 0; > > Nit. > It seems redundant. > Would you consider removing it for a bit more conciseness? > This doesn't block my approval, of course. > Reviewed-by: wang lian <lianux.mm@gmail.com> Sure. Thanks. Best Regards, Yan, Zi
© 2016 - 2025 Red Hat, Inc.