... and hide it behind a kconfig option. There is really no need for
any !xen code to perform this check.
The naming is a bit off: we want to find the "normal" page when a PTE
was marked "special". So it's really not "finding a special" page.
Improve the documentation, and add a comment in the code where XEN ends
up performing the pte_mkspecial() through a hypercall. More details can
be found in commit 923b2919e2c3 ("xen/gntdev: mark userspace PTEs as
special on x86 PV guests").
Cc: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/xen/Kconfig | 1 +
drivers/xen/gntdev.c | 5 +++--
include/linux/mm.h | 18 +++++++++++++-----
mm/Kconfig | 2 ++
mm/memory.c | 12 ++++++++++--
tools/testing/vma/vma_internal.h | 18 +++++++++++++-----
6 files changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 24f485827e039..f9a35ed266ecf 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -138,6 +138,7 @@ config XEN_GNTDEV
depends on XEN
default m
select MMU_NOTIFIER
+ select FIND_NORMAL_PAGE
help
Allows userspace processes to use grants.
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1f21607656182..26f13b37c78e6 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -321,6 +321,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
BUG_ON(pgnr >= map->count);
pte_maddr = arbitrary_virt_to_machine(pte).maddr;
+ /* Note: this will perform a pte_mkspecial() through the hypercall. */
gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
map->grants[pgnr].ref,
map->grants[pgnr].domid);
@@ -528,7 +529,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
gntdev_put_map(priv, map);
}
-static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
+static struct page *gntdev_vma_find_normal_page(struct vm_area_struct *vma,
unsigned long addr)
{
struct gntdev_grant_map *map = vma->vm_private_data;
@@ -539,7 +540,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
static const struct vm_operations_struct gntdev_vmops = {
.open = gntdev_vma_open,
.close = gntdev_vma_close,
- .find_special_page = gntdev_vma_find_special_page,
+ .find_normal_page = gntdev_vma_find_normal_page,
};
/* ------------------------------------------------------------------ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ca7d2fa71343..3868ca1a25f9c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -657,13 +657,21 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr, pgoff_t *ilx);
#endif
+#ifdef CONFIG_FIND_NORMAL_PAGE
/*
- * Called by vm_normal_page() for special PTEs to find the
- * page for @addr. This is useful if the default behavior
- * (using pte_page()) would not find the correct page.
+ * Called by vm_normal_page() for special PTEs in @vma at @addr. This
+ * allows for returning a "normal" page from vm_normal_page() even
+ * though the PTE indicates that the "struct page" either does not exist
+ * or should not be touched: "special".
+ *
+ * Do not add new users: this really only works when a "normal" page
+ * was mapped, but then the PTE got changed to something weird (+
+ * marked special) that would not make pte_pfn() identify the originally
+ * inserted page.
*/
- struct page *(*find_special_page)(struct vm_area_struct *vma,
- unsigned long addr);
+ struct page *(*find_normal_page)(struct vm_area_struct *vma,
+ unsigned long addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
};
#ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/Kconfig b/mm/Kconfig
index e443fe8cd6cf2..59a04d0b2e272 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1381,6 +1381,8 @@ config PT_RECLAIM
Note: now only empty user PTE page table pages will be reclaimed.
+config FIND_NORMAL_PAGE
+ def_bool n
source "mm/damon/Kconfig"
diff --git a/mm/memory.c b/mm/memory.c
index 6f806bf3cc994..002c28795d8b7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -639,6 +639,12 @@ static void print_bad_page_map(struct vm_area_struct *vma,
* trivial. Secondly, an architecture may not have a spare page table
* entry bit, which requires a more complicated scheme, described below.
*
+ * With CONFIG_FIND_NORMAL_PAGE, we might have the "special" bit set on
+ * page table entries that actually map "normal" pages: however, that page
+ * cannot be looked up through the PFN stored in the page table entry, but
+ * instead will be looked up through vm_ops->find_normal_page(). So far, this
+ * only applies to PTEs.
+ *
* A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
* special mapping (even if there are underlying and valid "struct pages").
* COWed pages of a VM_PFNMAP are always normal.
@@ -679,8 +685,10 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
{
if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
if (unlikely(special)) {
- if (vma->vm_ops && vma->vm_ops->find_special_page)
- return vma->vm_ops->find_special_page(vma, addr);
+#ifdef CONFIG_FIND_NORMAL_PAGE
+ if (vma->vm_ops && vma->vm_ops->find_normal_page)
+ return vma->vm_ops->find_normal_page(vma, addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (is_zero_pfn(pfn) || is_huge_zero_pfn(pfn))
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 3639aa8dd2b06..cb1c2a8afe265 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -467,13 +467,21 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr, pgoff_t *ilx);
#endif
+#ifdef CONFIG_FIND_NORMAL_PAGE
/*
- * Called by vm_normal_page() for special PTEs to find the
- * page for @addr. This is useful if the default behavior
- * (using pte_page()) would not find the correct page.
+ * Called by vm_normal_page() for special PTEs in @vma at @addr. This
+ * allows for returning a "normal" page from vm_normal_page() even
+ * though the PTE indicates that the "struct page" either does not exist
+ * or should not be touched: "special".
+ *
+ * Do not add new users: this really only works when a "normal" page
+ * was mapped, but then the PTE got changed to something weird (+
+ * marked special) that would not make pte_pfn() identify the originally
+ * inserted page.
*/
- struct page *(*find_special_page)(struct vm_area_struct *vma,
- unsigned long addr);
+ struct page *(*find_normal_page)(struct vm_area_struct *vma,
+ unsigned long addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
};
struct vm_unmapped_area_info {
--
2.50.1
On Mon, Aug 11, 2025 at 01:26:31PM +0200, David Hildenbrand wrote: > ... and hide it behind a kconfig option. There is really no need for > any !xen code to perform this check. > > The naming is a bit off: we want to find the "normal" page when a PTE > was marked "special". So it's really not "finding a special" page. > > Improve the documentation, and add a comment in the code where XEN ends > up performing the pte_mkspecial() through a hypercall. More details can > be found in commit 923b2919e2c3 ("xen/gntdev: mark userspace PTEs as > special on x86 PV guests"). > > Cc: David Vrabel <david.vrabel@citrix.com> > Reviewed-by: Oscar Salvador <osalvador@suse.de> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > Reviewed-by: Wei Yang <richard.weiyang@gmail.com> > Signed-off-by: David Hildenbrand <david@redhat.com> Oh I already reviewed it. But anyway, may as well say - THANKS fof this it's great again :) > --- > drivers/xen/Kconfig | 1 + > drivers/xen/gntdev.c | 5 +++-- > include/linux/mm.h | 18 +++++++++++++----- > mm/Kconfig | 2 ++ > mm/memory.c | 12 ++++++++++-- > tools/testing/vma/vma_internal.h | 18 +++++++++++++----- > 6 files changed, 42 insertions(+), 14 deletions(-) > > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index 24f485827e039..f9a35ed266ecf 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -138,6 +138,7 @@ config XEN_GNTDEV > depends on XEN > default m > select MMU_NOTIFIER > + select FIND_NORMAL_PAGE > help > Allows userspace processes to use grants. > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 1f21607656182..26f13b37c78e6 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -321,6 +321,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) > BUG_ON(pgnr >= map->count); > pte_maddr = arbitrary_virt_to_machine(pte).maddr; > > + /* Note: this will perform a pte_mkspecial() through the hypercall. */ > gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, > map->grants[pgnr].ref, > map->grants[pgnr].domid); > @@ -528,7 +529,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) > gntdev_put_map(priv, map); > } > > -static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, > +static struct page *gntdev_vma_find_normal_page(struct vm_area_struct *vma, > unsigned long addr) > { > struct gntdev_grant_map *map = vma->vm_private_data; > @@ -539,7 +540,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, > static const struct vm_operations_struct gntdev_vmops = { > .open = gntdev_vma_open, > .close = gntdev_vma_close, > - .find_special_page = gntdev_vma_find_special_page, > + .find_normal_page = gntdev_vma_find_normal_page, > }; > > /* ------------------------------------------------------------------ */ > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 8ca7d2fa71343..3868ca1a25f9c 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -657,13 +657,21 @@ struct vm_operations_struct { > struct mempolicy *(*get_policy)(struct vm_area_struct *vma, > unsigned long addr, pgoff_t *ilx); > #endif > +#ifdef CONFIG_FIND_NORMAL_PAGE > /* > - * Called by vm_normal_page() for special PTEs to find the > - * page for @addr. This is useful if the default behavior > - * (using pte_page()) would not find the correct page. > + * Called by vm_normal_page() for special PTEs in @vma at @addr. This > + * allows for returning a "normal" page from vm_normal_page() even > + * though the PTE indicates that the "struct page" either does not exist > + * or should not be touched: "special". > + * > + * Do not add new users: this really only works when a "normal" page > + * was mapped, but then the PTE got changed to something weird (+ > + * marked special) that would not make pte_pfn() identify the originally > + * inserted page. > */ > - struct page *(*find_special_page)(struct vm_area_struct *vma, > - unsigned long addr); > + struct page *(*find_normal_page)(struct vm_area_struct *vma, > + unsigned long addr); > +#endif /* CONFIG_FIND_NORMAL_PAGE */ > }; > > #ifdef CONFIG_NUMA_BALANCING > diff --git a/mm/Kconfig b/mm/Kconfig > index e443fe8cd6cf2..59a04d0b2e272 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -1381,6 +1381,8 @@ config PT_RECLAIM > > Note: now only empty user PTE page table pages will be reclaimed. > > +config FIND_NORMAL_PAGE > + def_bool n > > source "mm/damon/Kconfig" > > diff --git a/mm/memory.c b/mm/memory.c > index 6f806bf3cc994..002c28795d8b7 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -639,6 +639,12 @@ static void print_bad_page_map(struct vm_area_struct *vma, > * trivial. Secondly, an architecture may not have a spare page table > * entry bit, which requires a more complicated scheme, described below. > * > + * With CONFIG_FIND_NORMAL_PAGE, we might have the "special" bit set on > + * page table entries that actually map "normal" pages: however, that page > + * cannot be looked up through the PFN stored in the page table entry, but > + * instead will be looked up through vm_ops->find_normal_page(). So far, this > + * only applies to PTEs. > + * > * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a > * special mapping (even if there are underlying and valid "struct pages"). > * COWed pages of a VM_PFNMAP are always normal. > @@ -679,8 +685,10 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma, > { > if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { > if (unlikely(special)) { > - if (vma->vm_ops && vma->vm_ops->find_special_page) > - return vma->vm_ops->find_special_page(vma, addr); > +#ifdef CONFIG_FIND_NORMAL_PAGE > + if (vma->vm_ops && vma->vm_ops->find_normal_page) > + return vma->vm_ops->find_normal_page(vma, addr); > +#endif /* CONFIG_FIND_NORMAL_PAGE */ > if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) > return NULL; > if (is_zero_pfn(pfn) || is_huge_zero_pfn(pfn)) > diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h > index 3639aa8dd2b06..cb1c2a8afe265 100644 > --- a/tools/testing/vma/vma_internal.h > +++ b/tools/testing/vma/vma_internal.h > @@ -467,13 +467,21 @@ struct vm_operations_struct { > struct mempolicy *(*get_policy)(struct vm_area_struct *vma, > unsigned long addr, pgoff_t *ilx); > #endif > +#ifdef CONFIG_FIND_NORMAL_PAGE > /* > - * Called by vm_normal_page() for special PTEs to find the > - * page for @addr. This is useful if the default behavior > - * (using pte_page()) would not find the correct page. > + * Called by vm_normal_page() for special PTEs in @vma at @addr. This > + * allows for returning a "normal" page from vm_normal_page() even > + * though the PTE indicates that the "struct page" either does not exist > + * or should not be touched: "special". > + * > + * Do not add new users: this really only works when a "normal" page > + * was mapped, but then the PTE got changed to something weird (+ > + * marked special) that would not make pte_pfn() identify the originally > + * inserted page. > */ > - struct page *(*find_special_page)(struct vm_area_struct *vma, > - unsigned long addr); > + struct page *(*find_normal_page)(struct vm_area_struct *vma, > + unsigned long addr); > +#endif /* CONFIG_FIND_NORMAL_PAGE */ > }; > > struct vm_unmapped_area_info { > -- > 2.50.1 >
© 2016 - 2025 Red Hat, Inc.