From: Shivansh Dhiman <shivansh.dhiman@amd.com>
Add NUMA mempolicy support to the filemap allocation path by introducing
new APIs that take a mempolicy argument:
- filemap_grab_folio_mpol()
- filemap_alloc_folio_mpol()
- __filemap_get_folio_mpol()
These APIs allow callers to specify a NUMA policy during page cache
allocations, enabling fine-grained control over memory placement. This is
particularly needed by KVM when using guest-memfd memory backends, where
the guest memory needs to be allocated according to the NUMA policy
specified by VMM.
The existing non-mempolicy APIs remain unchanged and continue to use the
default allocation behavior.
Signed-off-by: Shivansh Dhiman <shivansh.dhiman@amd.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
include/linux/pagemap.h | 39 +++++++++++++++++++++++++++++++++++++++
mm/filemap.c | 30 +++++++++++++++++++++++++-----
2 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 47bfc6b1b632..f480b3b29113 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -662,15 +662,24 @@ static inline void *detach_page_private(struct page *page)
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
+struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol);
#else
static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
{
return folio_alloc_noprof(gfp, order);
}
+static inline struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp,
+ unsigned int order, struct mempolicy *mpol)
+{
+ return filemap_alloc_folio_noprof(gfp, order);
+}
#endif
#define filemap_alloc_folio(...) \
alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
+#define filemap_alloc_folio_mpol(...) \
+ alloc_hooks(filemap_alloc_folio_mpol_noprof(__VA_ARGS__))
static inline struct page *__page_cache_alloc(gfp_t gfp)
{
@@ -762,6 +771,8 @@ static inline fgf_t fgf_set_order(size_t size)
void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
fgf_t fgp_flags, gfp_t gfp);
+struct folio *__filemap_get_folio_mpol(struct address_space *mapping,
+ pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
fgf_t fgp_flags, gfp_t gfp);
@@ -820,6 +831,34 @@ static inline struct folio *filemap_grab_folio(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+/**
+ * filemap_grab_folio_mpol - grab a folio from the page cache.
+ * @mapping: The address space to search.
+ * @index: The page index.
+ * @mpol: The mempolicy to apply when allocating a new folio.
+ *
+ * Same as filemap_grab_folio(), except that it allocates the folio using
+ * given memory policy.
+ *
+ * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
+ * and failed to create a folio.
+ */
+#ifdef CONFIG_NUMA
+static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping,
+ pgoff_t index, struct mempolicy *mpol)
+{
+ return __filemap_get_folio_mpol(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping), mpol);
+}
+#else
+static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping,
+ pgoff_t index, struct mempolicy *mpol)
+{
+ return filemap_grab_folio(mapping, index);
+}
+#endif /* CONFIG_NUMA */
+
/**
* find_get_page - find and get a page reference
* @mapping: the address_space to search
diff --git a/mm/filemap.c b/mm/filemap.c
index 804d7365680c..9abb20c4d705 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1001,11 +1001,17 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
EXPORT_SYMBOL_GPL(filemap_add_folio);
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
+struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol)
{
int n;
struct folio *folio;
+ if (mpol)
+ return folio_alloc_mpol_noprof(gfp, order, mpol,
+ NO_INTERLEAVE_INDEX,
+ numa_node_id());
+
if (cpuset_do_page_mem_spread()) {
unsigned int cpuset_mems_cookie;
do {
@@ -1018,6 +1024,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
}
return folio_alloc_noprof(gfp, order);
}
+EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
+
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
+{
+ return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
+}
EXPORT_SYMBOL(filemap_alloc_folio_noprof);
#endif
@@ -1881,11 +1893,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
}
/**
- * __filemap_get_folio - Find and get a reference to a folio.
+ * __filemap_get_folio_mpol - Find and get a reference to a folio.
* @mapping: The address_space to search.
* @index: The page index.
* @fgp_flags: %FGP flags modify how the folio is returned.
* @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
+ * @mpol: The mempolicy to apply when allocating a new folio.
*
* Looks up the page cache entry at @mapping & @index.
*
@@ -1896,8 +1909,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
*
* Return: The found folio or an ERR_PTR() otherwise.
*/
-struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- fgf_t fgp_flags, gfp_t gfp)
+struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
+ fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
{
struct folio *folio;
@@ -1967,7 +1980,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
err = -ENOMEM;
if (order > min_order)
alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
- folio = filemap_alloc_folio(alloc_gfp, order);
+ folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
if (!folio)
continue;
@@ -2003,6 +2016,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
folio_clear_dropbehind(folio);
return folio;
}
+EXPORT_SYMBOL(__filemap_get_folio_mpol);
+
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+ fgf_t fgp_flags, gfp_t gfp)
+{
+ return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL);
+}
EXPORT_SYMBOL(__filemap_get_folio);
static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
--
2.34.1
On 2/26/25 09:25, Shivank Garg wrote:
> From: Shivansh Dhiman <shivansh.dhiman@amd.com>
>
> Add NUMA mempolicy support to the filemap allocation path by introducing
> new APIs that take a mempolicy argument:
> - filemap_grab_folio_mpol()
> - filemap_alloc_folio_mpol()
> - __filemap_get_folio_mpol()
>
> These APIs allow callers to specify a NUMA policy during page cache
> allocations, enabling fine-grained control over memory placement. This is
> particularly needed by KVM when using guest-memfd memory backends, where
> the guest memory needs to be allocated according to the NUMA policy
> specified by VMM.
>
> The existing non-mempolicy APIs remain unchanged and continue to use the
> default allocation behavior.
>
> Signed-off-by: Shivansh Dhiman <shivansh.dhiman@amd.com>
> Signed-off-by: Shivank Garg <shivankg@amd.com>
<snip>
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1001,11 +1001,17 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
> EXPORT_SYMBOL_GPL(filemap_add_folio);
>
> #ifdef CONFIG_NUMA
> -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
> + struct mempolicy *mpol)
> {
> int n;
> struct folio *folio;
>
> + if (mpol)
> + return folio_alloc_mpol_noprof(gfp, order, mpol,
> + NO_INTERLEAVE_INDEX,
> + numa_node_id());
> +
> if (cpuset_do_page_mem_spread()) {
> unsigned int cpuset_mems_cookie;
> do {
> @@ -1018,6 +1024,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> }
> return folio_alloc_noprof(gfp, order);
> }
> +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
> +
> +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +{
> + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
> +}
> EXPORT_SYMBOL(filemap_alloc_folio_noprof);
> #endif
Here it seems to me:
- filemap_alloc_folio_noprof() could stay unchanged
- filemap_alloc_folio_mpol_noprof() would
- call folio_alloc_mpol_noprof() if (mpol)
- call filemap_alloc_folio_noprof() otherwise
The code would be a bit more clearly structured that way?
> @@ -1881,11 +1893,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
> }
>
> /**
> - * __filemap_get_folio - Find and get a reference to a folio.
> + * __filemap_get_folio_mpol - Find and get a reference to a folio.
> * @mapping: The address_space to search.
> * @index: The page index.
> * @fgp_flags: %FGP flags modify how the folio is returned.
> * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
> + * @mpol: The mempolicy to apply when allocating a new folio.
> *
> * Looks up the page cache entry at @mapping & @index.
> *
> @@ -1896,8 +1909,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
> *
> * Return: The found folio or an ERR_PTR() otherwise.
> */
> -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> - fgf_t fgp_flags, gfp_t gfp)
> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
> + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
> {
> struct folio *folio;
>
> @@ -1967,7 +1980,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> err = -ENOMEM;
> if (order > min_order)
> alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
> - folio = filemap_alloc_folio(alloc_gfp, order);
> + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
> if (!folio)
> continue;
>
> @@ -2003,6 +2016,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> folio_clear_dropbehind(folio);
> return folio;
> }
> +EXPORT_SYMBOL(__filemap_get_folio_mpol);
> +
> +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> + fgf_t fgp_flags, gfp_t gfp)
> +{
> + return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL);
> +}
> EXPORT_SYMBOL(__filemap_get_folio);
>
> static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
Vlastimil Babka <vbabka@suse.cz> writes:
> On 2/26/25 09:25, Shivank Garg wrote:
>> From: Shivansh Dhiman <shivansh.dhiman@amd.com>
>>
>> Add NUMA mempolicy support to the filemap allocation path by introducing
>> new APIs that take a mempolicy argument:
>> - filemap_grab_folio_mpol()
>> - filemap_alloc_folio_mpol()
>> - __filemap_get_folio_mpol()
>>
>> These APIs allow callers to specify a NUMA policy during page cache
>> allocations, enabling fine-grained control over memory placement. This is
>> particularly needed by KVM when using guest-memfd memory backends, where
>> the guest memory needs to be allocated according to the NUMA policy
>> specified by VMM.
>>
>> The existing non-mempolicy APIs remain unchanged and continue to use the
>> default allocation behavior.
>>
>> Signed-off-by: Shivansh Dhiman <shivansh.dhiman@amd.com>
>> Signed-off-by: Shivank Garg <shivankg@amd.com>
>
> <snip>
>
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -1001,11 +1001,17 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
>> EXPORT_SYMBOL_GPL(filemap_add_folio);
>>
>> #ifdef CONFIG_NUMA
>> -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
>> + struct mempolicy *mpol)
>> {
>> int n;
>> struct folio *folio;
>>
>> + if (mpol)
>> + return folio_alloc_mpol_noprof(gfp, order, mpol,
>> + NO_INTERLEAVE_INDEX,
Could we pass in the interleave index instead of hard-coding it?
>> + numa_node_id());
>> +
>> if (cpuset_do_page_mem_spread()) {
>> unsigned int cpuset_mems_cookie;
>> do {
>> @@ -1018,6 +1024,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>> }
>> return folio_alloc_noprof(gfp, order);
>> }
>> +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
>> +
>> +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>> +{
>> + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
>> +}
>> EXPORT_SYMBOL(filemap_alloc_folio_noprof);
>> #endif
>
> Here it seems to me:
>
> - filemap_alloc_folio_noprof() could stay unchanged
> - filemap_alloc_folio_mpol_noprof() would
> - call folio_alloc_mpol_noprof() if (mpol)
> - call filemap_alloc_folio_noprof() otherwise
>
> The code would be a bit more clearly structured that way?
>
I feel that the original proposal makes it clearer that for all filemap
folio allocations, if mpol is defined, anything to do with cpuset's page
spread is overridden. Just a slight preference though. I do also agree
that having filemap_alloc_folio_mpol_noprof() call
filemap_alloc_folio_noprof() would result in fewer changes.
>> @@ -1881,11 +1893,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>> }
>>
>> /**
>> - * __filemap_get_folio - Find and get a reference to a folio.
>> + * __filemap_get_folio_mpol - Find and get a reference to a folio.
>> * @mapping: The address_space to search.
>> * @index: The page index.
>> * @fgp_flags: %FGP flags modify how the folio is returned.
>> * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
>> + * @mpol: The mempolicy to apply when allocating a new folio.
>> *
>> * Looks up the page cache entry at @mapping & @index.
>> *
>> @@ -1896,8 +1909,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>> *
>> * Return: The found folio or an ERR_PTR() otherwise.
>> */
>> -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>> - fgf_t fgp_flags, gfp_t gfp)
>> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
>> + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
>> {
>> struct folio *folio;
>>
>> @@ -1967,7 +1980,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>> err = -ENOMEM;
>> if (order > min_order)
>> alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
>> - folio = filemap_alloc_folio(alloc_gfp, order);
>> + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
>> if (!folio)
>> continue;
>>
>> @@ -2003,6 +2016,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>> folio_clear_dropbehind(folio);
>> return folio;
>> }
>> +EXPORT_SYMBOL(__filemap_get_folio_mpol);
>> +
>> +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>> + fgf_t fgp_flags, gfp_t gfp)
>> +{
>> + return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL);
>> +}
>> EXPORT_SYMBOL(__filemap_get_folio);
>>
>> static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
On 2/28/2025 11:21 PM, Ackerley Tng wrote:
> Vlastimil Babka <vbabka@suse.cz> writes:
>
>> On 2/26/25 09:25, Shivank Garg wrote:
>>> From: Shivansh Dhiman <shivansh.dhiman@amd.com>
>>>
>>> Add NUMA mempolicy support to the filemap allocation path by introducing
>>> new APIs that take a mempolicy argument:
>>> - filemap_grab_folio_mpol()
>>> - filemap_alloc_folio_mpol()
>>> - __filemap_get_folio_mpol()
>>>
>>> These APIs allow callers to specify a NUMA policy during page cache
>>> allocations, enabling fine-grained control over memory placement. This is
>>> particularly needed by KVM when using guest-memfd memory backends, where
>>> the guest memory needs to be allocated according to the NUMA policy
>>> specified by VMM.
>>>
>>> The existing non-mempolicy APIs remain unchanged and continue to use the
>>> default allocation behavior.
>>>
>>> Signed-off-by: Shivansh Dhiman <shivansh.dhiman@amd.com>
>>> Signed-off-by: Shivank Garg <shivankg@amd.com>
>>
>> <snip>
>>
>>> --- a/mm/filemap.c
>>> +++ b/mm/filemap.c
>>> @@ -1001,11 +1001,17 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
>>> EXPORT_SYMBOL_GPL(filemap_add_folio);
>>>
>>> #ifdef CONFIG_NUMA
>>> -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>>> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
>>> + struct mempolicy *mpol)
>>> {
>>> int n;
>>> struct folio *folio;
>>>
>>> + if (mpol)
>>> + return folio_alloc_mpol_noprof(gfp, order, mpol,
>>> + NO_INTERLEAVE_INDEX,
>
> Could we pass in the interleave index instead of hard-coding it?
Good point.
I'll modify this to allow passing the interleave index.
>
>>> + numa_node_id());
>>> +
>>> if (cpuset_do_page_mem_spread()) {
>>> unsigned int cpuset_mems_cookie;
>>> do {
>>> @@ -1018,6 +1024,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>>> }
>>> return folio_alloc_noprof(gfp, order);
>>> }
>>> +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
>>> +
>>> +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>>> +{
>>> + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
>>> +}
>>> EXPORT_SYMBOL(filemap_alloc_folio_noprof);
>>> #endif
>>
>> Here it seems to me:
>>
>> - filemap_alloc_folio_noprof() could stay unchanged
>> - filemap_alloc_folio_mpol_noprof() would
>> - call folio_alloc_mpol_noprof() if (mpol)
>> - call filemap_alloc_folio_noprof() otherwise
>>
>> The code would be a bit more clearly structured that way?
>>
>
> I feel that the original proposal makes it clearer that for all filemap
> folio allocations, if mpol is defined, anything to do with cpuset's page
> spread is overridden. Just a slight preference though. I do also agree
> that having filemap_alloc_folio_mpol_noprof() call
> filemap_alloc_folio_noprof() would result in fewer changes.
>
Your proposed structure makes sense.
I'll update the patch to add these suggestions in the next version.
Thanks,
Shivank
>>> @@ -1881,11 +1893,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>>> }
>>>
>>> /**
>>> - * __filemap_get_folio - Find and get a reference to a folio.
>>> + * __filemap_get_folio_mpol - Find and get a reference to a folio.
>>> * @mapping: The address_space to search.
>>> * @index: The page index.
>>> * @fgp_flags: %FGP flags modify how the folio is returned.
>>> * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
>>> + * @mpol: The mempolicy to apply when allocating a new folio.
>>> *
>>> * Looks up the page cache entry at @mapping & @index.
>>> *
>>> @@ -1896,8 +1909,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>>> *
>>> * Return: The found folio or an ERR_PTR() otherwise.
>>> */
>>> -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>>> - fgf_t fgp_flags, gfp_t gfp)
>>> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
>>> + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
>>> {
>>> struct folio *folio;
>>>
>>> @@ -1967,7 +1980,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>>> err = -ENOMEM;
>>> if (order > min_order)
>>> alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
>>> - folio = filemap_alloc_folio(alloc_gfp, order);
>>> + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
>>> if (!folio)
>>> continue;
>>>
>>> @@ -2003,6 +2016,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>>> folio_clear_dropbehind(folio);
>>> return folio;
>>> }
>>> +EXPORT_SYMBOL(__filemap_get_folio_mpol);
>>> +
>>> +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>>> + fgf_t fgp_flags, gfp_t gfp)
>>> +{
>>> + return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL);
>>> +}
>>> EXPORT_SYMBOL(__filemap_get_folio);
>>>
>>> static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
© 2016 - 2026 Red Hat, Inc.