Let's provide variants of track_pfn_remap() and untrack_pfn() that won't
mess with VMAs, and replace the usage in mm/memremap.c.
Add some documentation.
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org> # x86 bits
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/x86/mm/pat/memtype.c | 14 ++++++++++++++
include/linux/pgtable.h | 39 +++++++++++++++++++++++++++++++++++++++
mm/memremap.c | 8 ++++----
3 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index fa78facc6f633..1ec8af6cad6bf 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1068,6 +1068,20 @@ int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot
return 0;
}
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
+{
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
+
+ return reserve_pfn_range(paddr, size, prot, 0);
+}
+
+void pfnmap_untrack(unsigned long pfn, unsigned long size)
+{
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
+
+ free_pfn_range(paddr, size);
+}
+
/*
* untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index be1745839871c..90f72cd358390 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
return 0;
}
+static inline int pfnmap_track(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
+{
+ return 0;
+}
+
+static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
+{
+}
+
/*
* track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
* tables copied during copy_page_range(). Will store the pfn to be
@@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
*/
int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
pgprot_t *prot);
+
+/**
+ * pfnmap_track - track a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to track
+ *
+ * Requested the pfn range to be 'tracked' by a hardware implementation and
+ * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
+ *
+ * This allows for fine-grained control of memory cache behaviour at page
+ * level granularity. Tracking memory this way is persisted across VMA splits
+ * (VMA merging does not apply for VM_PFNMAP).
+ *
+ * Currently, there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
+
+/**
+ * pfnmap_untrack - untrack a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ *
+ * Untrack a pfn range previously tracked through pfnmap_track().
+ */
+void pfnmap_untrack(unsigned long pfn, unsigned long size);
extern int track_pfn_copy(struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma, unsigned long *pfn);
extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
diff --git a/mm/memremap.c b/mm/memremap.c
index 2aebc1b192da9..c417c843e9b1f 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -130,7 +130,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
}
mem_hotplug_done();
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
pgmap_array_delete(range);
}
@@ -211,8 +211,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, ¶ms->pgprot, PHYS_PFN(range->start), 0,
- range_len(range));
+ error = pfnmap_track(PHYS_PFN(range->start), range_len(range),
+ ¶ms->pgprot);
if (error)
goto err_pfn_remap;
@@ -277,7 +277,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
if (!is_private)
kasan_remove_zero_shadow(__va(range->start), range_len(range));
err_kasan:
- untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
+ pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
err_pfn_remap:
pgmap_array_delete(range);
return error;
--
2.49.0
* David Hildenbrand <david@redhat.com> [250512 08:34]:
> Let's provide variants of track_pfn_remap() and untrack_pfn() that won't
> mess with VMAs, and replace the usage in mm/memremap.c.
>
> Add some documentation.
>
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Acked-by: Ingo Molnar <mingo@kernel.org> # x86 bits
> Signed-off-by: David Hildenbrand <david@redhat.com>
Small nit with this one, but either way:
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
> arch/x86/mm/pat/memtype.c | 14 ++++++++++++++
> include/linux/pgtable.h | 39 +++++++++++++++++++++++++++++++++++++++
> mm/memremap.c | 8 ++++----
> 3 files changed, 57 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
> index fa78facc6f633..1ec8af6cad6bf 100644
> --- a/arch/x86/mm/pat/memtype.c
> +++ b/arch/x86/mm/pat/memtype.c
> @@ -1068,6 +1068,20 @@ int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot
> return 0;
> }
>
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
> +{
> + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
Here, the << PAGE_SHIFT isn't really needed, because..
> +
> + return reserve_pfn_range(paddr, size, prot, 0);
> +}
> +
> +void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
> +
> + free_pfn_range(paddr, size);
> +}
> +
> /*
> * untrack_pfn is called while unmapping a pfnmap for a region.
> * untrack can be called for a specific region indicated by pfn and size or
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index be1745839871c..90f72cd358390 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
> return 0;
> }
>
> +static inline int pfnmap_track(unsigned long pfn, unsigned long size,
> + pgprot_t *prot)
> +{
> + return 0;
> +}
> +
> +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> +}
> +
> /*
> * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
> * tables copied during copy_page_range(). Will store the pfn to be
> @@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
> */
> int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
> pgprot_t *prot);
> +
> +/**
> + * pfnmap_track - track a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + * @prot: the pgprot to track
> + *
> + * Requested the pfn range to be 'tracked' by a hardware implementation and
> + * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
> + *
> + * This allows for fine-grained control of memory cache behaviour at page
> + * level granularity. Tracking memory this way is persisted across VMA splits
> + * (VMA merging does not apply for VM_PFNMAP).
> + *
> + * Currently, there is only one implementation for this - x86 Page Attribute
> + * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
> + *
> + * Returns 0 on success and -EINVAL on error.
> + */
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
> +
> +/**
> + * pfnmap_untrack - untrack a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + *
> + * Untrack a pfn range previously tracked through pfnmap_track().
> + */
> +void pfnmap_untrack(unsigned long pfn, unsigned long size);
> extern int track_pfn_copy(struct vm_area_struct *dst_vma,
> struct vm_area_struct *src_vma, unsigned long *pfn);
> extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 2aebc1b192da9..c417c843e9b1f 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -130,7 +130,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
> }
> mem_hotplug_done();
>
> - untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> + pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
> pgmap_array_delete(range);
> }
>
> @@ -211,8 +211,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
> if (nid < 0)
> nid = numa_mem_id();
>
> - error = track_pfn_remap(NULL, ¶ms->pgprot, PHYS_PFN(range->start), 0,
> - range_len(range));
> + error = pfnmap_track(PHYS_PFN(range->start), range_len(range),
This user (of two) converts the range->start to the pfn.
The other user is pfnmap_track_ctx_alloc() in mm/memory.c which is
called from remap_pfn_range(), which also has addr.
Couldn't we just use the address directly?
I think the same holds for untrack as well.
> + ¶ms->pgprot);
> if (error)
> goto err_pfn_remap;
>
> @@ -277,7 +277,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
> if (!is_private)
> kasan_remove_zero_shadow(__va(range->start), range_len(range));
> err_kasan:
> - untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> + pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
> err_pfn_remap:
> pgmap_array_delete(range);
> return error;
> --
> 2.49.0
>
On 13.05.25 19:40, Liam R. Howlett wrote: > * David Hildenbrand <david@redhat.com> [250512 08:34]: >> Let's provide variants of track_pfn_remap() and untrack_pfn() that won't >> mess with VMAs, and replace the usage in mm/memremap.c. >> >> Add some documentation. >> >> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> >> Acked-by: Ingo Molnar <mingo@kernel.org> # x86 bits >> Signed-off-by: David Hildenbrand <david@redhat.com> > > Small nit with this one, but either way: > > Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Thanks! [...] > > The other user is pfnmap_track_ctx_alloc() in mm/memory.c which is > called from remap_pfn_range(), which also has addr. > > Couldn't we just use the address directly? > > I think the same holds for untrack as well. Hm, conceptually, I want the "pfntrack" interface to consume ... PFNs :) Actually, I was thinking about converting the "size" parameter to nr_pages as well, but decided to leave that for another day. ... because I really should be working on (... checking todo list ...) anything else but PAT at this point. So unless there are strong feelings, I'll leave it that way (the way the old interface also used it), and add it to my todo list (either make it an address or make size -> nr_pages). Thanks for all the review Liam! -- Cheers, David / dhildenb
© 2016 - 2026 Red Hat, Inc.