The page_ext_next() function assumes that page extension objects for a
page order allocation always reside in the same memory section, which
may not be true and could lead to crashes. Use the new page_ext
iteration API instead.
Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
---
mm/page_owner.c | 61 +++++++++++++++++++++++--------------------------
1 file changed, 29 insertions(+), 32 deletions(-)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 2d6360eaccbb6..c9d2c688eb981 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -229,17 +229,19 @@ static void dec_stack_record_count(depot_stack_handle_t handle,
handle);
}
-static inline void __update_page_owner_handle(struct page_ext *page_ext,
+static inline void __update_page_owner_handle(struct page *page,
depot_stack_handle_t handle,
unsigned short order,
gfp_t gfp_mask,
short last_migrate_reason, u64 ts_nsec,
pid_t pid, pid_t tgid, char *comm)
{
- int i;
+ struct page_ext_iter iter;
+ struct page_ext *page_ext;
struct page_owner *page_owner;
- for (i = 0; i < (1 << order); i++) {
+ rcu_read_lock();
+ for_each_page_ext(page, 1 << order, page_ext, iter) {
page_owner = get_page_owner(page_ext);
page_owner->handle = handle;
page_owner->order = order;
@@ -252,20 +254,22 @@ static inline void __update_page_owner_handle(struct page_ext *page_ext,
sizeof(page_owner->comm));
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_ext = page_ext_next(page_ext);
}
+ rcu_read_unlock();
}
-static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+static inline void __update_page_owner_free_handle(struct page *page,
depot_stack_handle_t handle,
unsigned short order,
pid_t pid, pid_t tgid,
u64 free_ts_nsec)
{
- int i;
+ struct page_ext_iter iter;
+ struct page_ext *page_ext;
struct page_owner *page_owner;
- for (i = 0; i < (1 << order); i++) {
+ rcu_read_lock();
+ for_each_page_ext(page, 1 << order, page_ext, iter) {
page_owner = get_page_owner(page_ext);
/* Only __reset_page_owner() wants to clear the bit */
if (handle) {
@@ -275,8 +279,8 @@ static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
page_owner->free_ts_nsec = free_ts_nsec;
page_owner->free_pid = current->pid;
page_owner->free_tgid = current->tgid;
- page_ext = page_ext_next(page_ext);
}
+ rcu_read_unlock();
}
void __reset_page_owner(struct page *page, unsigned short order)
@@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order)
page_owner = get_page_owner(page_ext);
alloc_handle = page_owner->handle;
+ page_ext_put(page_ext);
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
- __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+ __update_page_owner_free_handle(page, handle, order, current->pid,
current->tgid, free_ts_nsec);
- page_ext_put(page_ext);
if (alloc_handle != early_handle)
/*
@@ -313,19 +317,13 @@ void __reset_page_owner(struct page *page, unsigned short order)
noinline void __set_page_owner(struct page *page, unsigned short order,
gfp_t gfp_mask)
{
- struct page_ext *page_ext;
u64 ts_nsec = local_clock();
depot_stack_handle_t handle;
handle = save_stack(gfp_mask);
-
- page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
- __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+ __update_page_owner_handle(page, handle, order, gfp_mask, -1,
ts_nsec, current->pid, current->tgid,
current->comm);
- page_ext_put(page_ext);
inc_stack_record_count(handle, gfp_mask, 1 << order);
}
@@ -344,26 +342,24 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
void __split_page_owner(struct page *page, int old_order, int new_order)
{
- int i;
- struct page_ext *page_ext = page_ext_get(page);
+ struct page_ext_iter iter;
+ struct page_ext *page_ext;
struct page_owner *page_owner;
- if (unlikely(!page_ext))
- return;
-
- for (i = 0; i < (1 << old_order); i++) {
+ rcu_read_lock();
+ for_each_page_ext(page, 1 << old_order, page_ext, iter) {
page_owner = get_page_owner(page_ext);
page_owner->order = new_order;
- page_ext = page_ext_next(page_ext);
}
- page_ext_put(page_ext);
+ rcu_read_unlock();
}
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
- int i;
struct page_ext *old_ext;
struct page_ext *new_ext;
+ struct page_ext *page_ext;
+ struct page_ext_iter iter;
struct page_owner *old_page_owner;
struct page_owner *new_page_owner;
depot_stack_handle_t migrate_handle;
@@ -381,7 +377,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
migrate_handle = new_page_owner->handle;
- __update_page_owner_handle(new_ext, old_page_owner->handle,
+ __update_page_owner_handle(&newfolio->page, old_page_owner->handle,
old_page_owner->order, old_page_owner->gfp_mask,
old_page_owner->last_migrate_reason,
old_page_owner->ts_nsec, old_page_owner->pid,
@@ -391,7 +387,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
* will be freed after migration. Keep them until then as they may be
* useful.
*/
- __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+ __update_page_owner_free_handle(&newfolio->page, 0, old_page_owner->order,
old_page_owner->free_pid,
old_page_owner->free_tgid,
old_page_owner->free_ts_nsec);
@@ -400,11 +396,12 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
* for the new one and the old folio otherwise there will be an imbalance
* when subtracting those pages from the stack.
*/
- for (i = 0; i < (1 << new_page_owner->order); i++) {
+ rcu_read_lock();
+ for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) {
+ old_page_owner = get_page_owner(page_ext);
old_page_owner->handle = migrate_handle;
- old_ext = page_ext_next(old_ext);
- old_page_owner = get_page_owner(old_ext);
}
+ rcu_read_unlock();
page_ext_put(new_ext);
page_ext_put(old_ext);
@@ -813,7 +810,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
goto ext_put_continue;
/* Found early allocated page */
- __update_page_owner_handle(page_ext, early_handle, 0, 0,
+ __update_page_owner_handle(page, early_handle, 0, 0,
-1, local_clock(), current->pid,
current->tgid, current->comm);
count++;
--
2.48.1
On 24.02.25 22:59, Luiz Capitulino wrote:
> The page_ext_next() function assumes that page extension objects for a
> page order allocation always reside in the same memory section, which
> may not be true and could lead to crashes. Use the new page_ext
> iteration API instead.
>
> Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
> ---
> mm/page_owner.c | 61 +++++++++++++++++++++++--------------------------
> 1 file changed, 29 insertions(+), 32 deletions(-)
>
[...]
> void __reset_page_owner(struct page *page, unsigned short order)
> @@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order)
>
> page_owner = get_page_owner(page_ext);
> alloc_handle = page_owner->handle;
> + page_ext_put(page_ext);
>
> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
> - __update_page_owner_free_handle(page_ext, handle, order, current->pid,
> + __update_page_owner_free_handle(page, handle, order, current->pid,
> current->tgid, free_ts_nsec);
> - page_ext_put(page_ext);
I assume moving that is fine ...
but I'll not that ...
> - for (i = 0; i < (1 << new_page_owner->order); i++) {
> + rcu_read_lock();
> + for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) {
> + old_page_owner = get_page_owner(page_ext);
> old_page_owner->handle = migrate_handle;
> - old_ext = page_ext_next(old_ext);
> - old_page_owner = get_page_owner(old_ext);
> }
> + rcu_read_unlock();
>
> page_ext_put(new_ext);
> page_ext_put(old_ext);
... here you are not moving it?
In general, LGTM, only the remaining page_ext_put() are a bit confusing.
--
Cheers,
David / dhildenb
On 2025-02-25 11:44, David Hildenbrand wrote:
> On 24.02.25 22:59, Luiz Capitulino wrote:
>> The page_ext_next() function assumes that page extension objects for a
>> page order allocation always reside in the same memory section, which
>> may not be true and could lead to crashes. Use the new page_ext
>> iteration API instead.
>>
>> Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
>> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>> ---
>> mm/page_owner.c | 61 +++++++++++++++++++++++--------------------------
>> 1 file changed, 29 insertions(+), 32 deletions(-)
>>
>
> [...]
>
>> void __reset_page_owner(struct page *page, unsigned short order)
>> @@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order)
>> page_owner = get_page_owner(page_ext);
>> alloc_handle = page_owner->handle;
>> + page_ext_put(page_ext);
>> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
>> - __update_page_owner_free_handle(page_ext, handle, order, current->pid,
>> + __update_page_owner_free_handle(page, handle, order, current->pid,
>> current->tgid, free_ts_nsec);
>> - page_ext_put(page_ext);
>
> I assume moving that is fine ...
>
> but I'll not that ...
>
>> - for (i = 0; i < (1 << new_page_owner->order); i++) {
>> + rcu_read_lock();
>> + for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) {
>> + old_page_owner = get_page_owner(page_ext);
>> old_page_owner->handle = migrate_handle;
>> - old_ext = page_ext_next(old_ext);
>> - old_page_owner = get_page_owner(old_ext);
>> }
>> + rcu_read_unlock();
>> page_ext_put(new_ext);
>> page_ext_put(old_ext);
>
> ... here you are not moving it?
>
>
> In general, LGTM, only the remaining page_ext_put() are a bit confusing.
Which part you found confusing: the fact that I'm not moving them up or that
we still make use of them?
For this hunk, I decided to keep them where they are because 'new_page_owner',
which is a page extension from 'next_ext', is still used in the last loop. So
I decided to free them all at the end for simplicity.
The other part is, page_ext_get() and page_ext_put() are still valid functions
for getting specific page extensions outside of loops and the usage in
__folio_copy_owner() (and a few other cases) seems valid to me.
On 25.02.25 23:30, Luiz Capitulino wrote:
> On 2025-02-25 11:44, David Hildenbrand wrote:
>> On 24.02.25 22:59, Luiz Capitulino wrote:
>>> The page_ext_next() function assumes that page extension objects for a
>>> page order allocation always reside in the same memory section, which
>>> may not be true and could lead to crashes. Use the new page_ext
>>> iteration API instead.
>>>
>>> Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
>>> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>>> ---
>>> mm/page_owner.c | 61 +++++++++++++++++++++++--------------------------
>>> 1 file changed, 29 insertions(+), 32 deletions(-)
>>>
>>
>> [...]
>>
>>> void __reset_page_owner(struct page *page, unsigned short order)
>>> @@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order)
>>> page_owner = get_page_owner(page_ext);
>>> alloc_handle = page_owner->handle;
>>> + page_ext_put(page_ext);
>>> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
>>> - __update_page_owner_free_handle(page_ext, handle, order, current->pid,
>>> + __update_page_owner_free_handle(page, handle, order, current->pid,
>>> current->tgid, free_ts_nsec);
>>> - page_ext_put(page_ext);
>>
>> I assume moving that is fine ...
>>
>> but I'll not that ...
>>
>>> - for (i = 0; i < (1 << new_page_owner->order); i++) {
>>> + rcu_read_lock();
>>> + for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) {
>>> + old_page_owner = get_page_owner(page_ext);
>>> old_page_owner->handle = migrate_handle;
>>> - old_ext = page_ext_next(old_ext);
>>> - old_page_owner = get_page_owner(old_ext);
>>> }
>>> + rcu_read_unlock();
>>> page_ext_put(new_ext);
>>> page_ext_put(old_ext);
>>
>> ... here you are not moving it?
>>
>>
>> In general, LGTM, only the remaining page_ext_put() are a bit confusing.
>
> Which part you found confusing: the fact that I'm not moving them up or that
> we still make use of them?
How we are deferring page_ext_put() when not actually working on these
values anymore. The page_owner itself should not go away here unless we
have a serious bug.
To be precise, can't we simply do the following on top?
diff --git a/mm/page_owner.c b/mm/page_owner.c
index c9d2c688eb981..12044340adf89 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -356,26 +356,24 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
- struct page_ext *old_ext;
- struct page_ext *new_ext;
struct page_ext *page_ext;
struct page_ext_iter iter;
struct page_owner *old_page_owner;
struct page_owner *new_page_owner;
depot_stack_handle_t migrate_handle;
- old_ext = page_ext_get(&old->page);
- if (unlikely(!old_ext))
+ page_ext = page_ext_get(&old->page);
+ if (unlikely(!page_ext))
return;
+ old_page_owner = get_page_owner(page_ext);
+ page_ext_put(page_ext);
- new_ext = page_ext_get(&newfolio->page);
- if (unlikely(!new_ext)) {
- page_ext_put(old_ext);
+ page_ext = page_ext_get(&newfolio->page);
+ if (unlikely(!page_ext))
return;
- }
+ new_page_owner = get_page_owner(page_ext);
+ page_ext_put(page_ext);
- old_page_owner = get_page_owner(old_ext);
- new_page_owner = get_page_owner(new_ext);
migrate_handle = new_page_owner->handle;
__update_page_owner_handle(&newfolio->page, old_page_owner->handle,
old_page_owner->order, old_page_owner->gfp_mask,
@@ -402,9 +400,6 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
old_page_owner->handle = migrate_handle;
}
rcu_read_unlock();
-
- page_ext_put(new_ext);
- page_ext_put(old_ext);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
--
Cheers,
David / dhildenb
On 2025-02-27 08:50, David Hildenbrand wrote:
> On 25.02.25 23:30, Luiz Capitulino wrote:
>> On 2025-02-25 11:44, David Hildenbrand wrote:
>>> On 24.02.25 22:59, Luiz Capitulino wrote:
>>>> The page_ext_next() function assumes that page extension objects for a
>>>> page order allocation always reside in the same memory section, which
>>>> may not be true and could lead to crashes. Use the new page_ext
>>>> iteration API instead.
>>>>
>>>> Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
>>>> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>>>> ---
>>>> mm/page_owner.c | 61 +++++++++++++++++++++++--------------------------
>>>> 1 file changed, 29 insertions(+), 32 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>> void __reset_page_owner(struct page *page, unsigned short order)
>>>> @@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order)
>>>> page_owner = get_page_owner(page_ext);
>>>> alloc_handle = page_owner->handle;
>>>> + page_ext_put(page_ext);
>>>> handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
>>>> - __update_page_owner_free_handle(page_ext, handle, order, current->pid,
>>>> + __update_page_owner_free_handle(page, handle, order, current->pid,
>>>> current->tgid, free_ts_nsec);
>>>> - page_ext_put(page_ext);
>>>
>>> I assume moving that is fine ...
>>>
>>> but I'll not that ...
>>>
>>>> - for (i = 0; i < (1 << new_page_owner->order); i++) {
>>>> + rcu_read_lock();
>>>> + for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) {
>>>> + old_page_owner = get_page_owner(page_ext);
>>>> old_page_owner->handle = migrate_handle;
>>>> - old_ext = page_ext_next(old_ext);
>>>> - old_page_owner = get_page_owner(old_ext);
>>>> }
>>>> + rcu_read_unlock();
>>>> page_ext_put(new_ext);
>>>> page_ext_put(old_ext);
>>>
>>> ... here you are not moving it?
>>>
>>>
>>> In general, LGTM, only the remaining page_ext_put() are a bit confusing.
>>
>> Which part you found confusing: the fact that I'm not moving them up or that
>> we still make use of them?
>
> How we are deferring page_ext_put() when not actually working on these
> values anymore. The page_owner itself should not go away here unless we
> have a serious bug.
>
> To be precise, can't we simply do the following on top?
Yes, that looks good and I like how the new API allows for simpler code.
My only concern is that if the user is not familiar with the page_ext
internals, it might not be clear what page_ext_put() is actually
protecting in which case it looks wrong that we're using a reference
returned by get_page_owner() after releasing the lock. If you think
that that's not an issue then I can apply this change on top.
>
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index c9d2c688eb981..12044340adf89 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -356,26 +356,24 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
>
> void __folio_copy_owner(struct folio *newfolio, struct folio *old)
> {
> - struct page_ext *old_ext;
> - struct page_ext *new_ext;
> struct page_ext *page_ext;
> struct page_ext_iter iter;
> struct page_owner *old_page_owner;
> struct page_owner *new_page_owner;
> depot_stack_handle_t migrate_handle;
>
> - old_ext = page_ext_get(&old->page);
> - if (unlikely(!old_ext))
> + page_ext = page_ext_get(&old->page);
> + if (unlikely(!page_ext))
> return;
> + old_page_owner = get_page_owner(page_ext);
> + page_ext_put(page_ext);
>
> - new_ext = page_ext_get(&newfolio->page);
> - if (unlikely(!new_ext)) {
> - page_ext_put(old_ext);
> + page_ext = page_ext_get(&newfolio->page);
> + if (unlikely(!page_ext))
> return;
> - }
> + new_page_owner = get_page_owner(page_ext);
> + page_ext_put(page_ext);
>
> - old_page_owner = get_page_owner(old_ext);
> - new_page_owner = get_page_owner(new_ext);
> migrate_handle = new_page_owner->handle;
> __update_page_owner_handle(&newfolio->page, old_page_owner->handle,
> old_page_owner->order, old_page_owner->gfp_mask,
> @@ -402,9 +400,6 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
> old_page_owner->handle = migrate_handle;
> }
> rcu_read_unlock();
> -
> - page_ext_put(new_ext);
> - page_ext_put(old_ext);
> }
>
> void pagetypeinfo_showmixedcount_print(struct seq_file *m,
>
>
>> To be precise, can't we simply do the following on top? > > Yes, that looks good and I like how the new API allows for simpler code. > > My only concern is that if the user is not familiar with the page_ext > internals, it might not be clear what page_ext_put() is actually > protecting in which case it looks wrong that we're using a reference > returned by get_page_owner() after releasing the lock. If you think > that that's not an issue then I can apply this change on top. The page_ext stuff only protects the page_ext itself, not any data stored in there. So I assume this should be just fine. (most of these cases shouldn't need any protection, because the page_ext should not actually ever vanish here for memory that we are holding in our hands; but we decided to just add it everywhere for consistency) -- Cheers, David / dhildenb
© 2016 - 2026 Red Hat, Inc.