__free_pages_core() is used when exposing fresh memory to the buddy
during system boot and when onlining memory in generic_online_page().
generic_online_page() is used in two cases:
1. Direct memory onlining in online_pages().
2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV
balloon and virtio-mem), when parts of a section are kept
fake-offline to be fake-onlined later on.
In 1, we already place pages to the tail of the freelist. Pages will be
freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists
via undo_isolate_page_range().
In 2, we currently don't implement a proper rule. In case of virtio-mem,
where we currently always online MAX_ORDER - 1 pages, the pages will be
placed to the HEAD of the freelist - undesireable. While the hyper-v
balloon calls generic_online_page() with single pages, usually it will
call it on successive single pages in a larger block.
The pages are fresh, so place them to the tail of the freelists and avoid
the PCP. In __free_pages_core(), remove the now superflouos call to
set_page_refcounted() and add a comment regarding page initialization and
the refcount.
Note: In 2. we currently don't shuffle. If ever relevant (page shuffling
is usually of limited use in virtualized environments), we might want to
shuffle after a sequence of generic_online_page() calls in the
relevant callers.
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
1 file changed, 24 insertions(+), 13 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d5a5f528b8ca..8a2134fe9947 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -270,7 +270,8 @@ bool pm_suspended_storage(void)
unsigned int pageblock_order __read_mostly;
#endif
-static void __free_pages_ok(struct page *page, unsigned int order);
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fop_t fop_flags);
/*
* results with 256, 32 in the lowmem_reserve sysctl:
@@ -682,7 +683,7 @@ static void bad_page(struct page *page, const char *reason)
void free_compound_page(struct page *page)
{
mem_cgroup_uncharge(page);
- __free_pages_ok(page, compound_order(page));
+ __free_pages_ok(page, compound_order(page), FOP_NONE);
}
void prep_compound_page(struct page *page, unsigned int order)
@@ -1419,17 +1420,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
spin_unlock(&zone->lock);
}
-static void free_one_page(struct zone *zone,
- struct page *page, unsigned long pfn,
- unsigned int order,
- int migratetype)
+static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn,
+ unsigned int order, int migratetype, fop_t fop_flags)
{
spin_lock(&zone->lock);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
- __free_one_page(page, pfn, zone, order, migratetype, FOP_NONE);
+ __free_one_page(page, pfn, zone, order, migratetype, fop_flags);
spin_unlock(&zone->lock);
}
@@ -1507,7 +1506,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
}
}
-static void __free_pages_ok(struct page *page, unsigned int order)
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fop_t fop_flags)
{
unsigned long flags;
int migratetype;
@@ -1519,7 +1519,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
- free_one_page(page_zone(page), page, pfn, order, migratetype);
+ free_one_page(page_zone(page), page, pfn, order, migratetype,
+ fop_flags);
local_irq_restore(flags);
}
@@ -1529,6 +1530,11 @@ void __free_pages_core(struct page *page, unsigned int order)
struct page *p = page;
unsigned int loop;
+ /*
+ * When initializing the memmap, init_single_page() sets the refcount
+ * of all pages to 1 ("allocated"/"not free"). We have to set the
+ * refcount of all involved pages to 0.
+ */
prefetchw(p);
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
@@ -1539,8 +1545,12 @@ void __free_pages_core(struct page *page, unsigned int order)
set_page_count(p, 0);
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
- set_page_refcounted(page);
- __free_pages(page, order);
+
+ /*
+ * Bypass PCP and place fresh pages right to the tail, primarily
+ * relevant for memory onlining.
+ */
+ __free_pages_ok(page, order, FOP_TO_TAIL);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -3171,7 +3181,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(zone, page, pfn, 0, migratetype);
+ free_one_page(zone, page, pfn, 0, migratetype,
+ FOP_NONE);
return;
}
migratetype = MIGRATE_MOVABLE;
@@ -5063,7 +5074,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
if (order == 0) /* Via pcp? */
free_unref_page(page);
else
- __free_pages_ok(page, order);
+ __free_pages_ok(page, order, FOP_NONE);
}
void __free_pages(struct page *page, unsigned int order)
--
2.26.2
> __free_pages_core() is used when exposing fresh memory to the buddy
> during system boot and when onlining memory in generic_online_page().
>
> generic_online_page() is used in two cases:
>
> 1. Direct memory onlining in online_pages().
> 2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV
> balloon and virtio-mem), when parts of a section are kept
> fake-offline to be fake-onlined later on.
>
> In 1, we already place pages to the tail of the freelist. Pages will be
> freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists
> via undo_isolate_page_range().
>
> In 2, we currently don't implement a proper rule. In case of virtio-mem,
> where we currently always online MAX_ORDER - 1 pages, the pages will be
> placed to the HEAD of the freelist - undesireable. While the hyper-v
> balloon calls generic_online_page() with single pages, usually it will
> call it on successive single pages in a larger block.
>
> The pages are fresh, so place them to the tail of the freelists and avoid
> the PCP. In __free_pages_core(), remove the now superflouos call to
> set_page_refcounted() and add a comment regarding page initialization and
> the refcount.
>
> Note: In 2. we currently don't shuffle. If ever relevant (page shuffling
> is usually of limited use in virtualized environments), we might want to
> shuffle after a sequence of generic_online_page() calls in the
> relevant callers.
>
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> Reviewed-by: Oscar Salvador <osalvador@suse.de>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> Cc: Mel Gorman <mgorman@techsingularity.net>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
> Cc: Oscar Salvador <osalvador@suse.de>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: "K. Y. Srinivasan" <kys@microsoft.com>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
> 1 file changed, 24 insertions(+), 13 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d5a5f528b8ca..8a2134fe9947 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -270,7 +270,8 @@ bool pm_suspended_storage(void)
> unsigned int pageblock_order __read_mostly;
> #endif
>
> -static void __free_pages_ok(struct page *page, unsigned int order);
> +static void __free_pages_ok(struct page *page, unsigned int order,
> + fop_t fop_flags);
>
> /*
> * results with 256, 32 in the lowmem_reserve sysctl:
> @@ -682,7 +683,7 @@ static void bad_page(struct page *page, const char *reason)
> void free_compound_page(struct page *page)
> {
> mem_cgroup_uncharge(page);
> - __free_pages_ok(page, compound_order(page));
> + __free_pages_ok(page, compound_order(page), FOP_NONE);
> }
>
> void prep_compound_page(struct page *page, unsigned int order)
> @@ -1419,17 +1420,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
> spin_unlock(&zone->lock);
> }
>
> -static void free_one_page(struct zone *zone,
> - struct page *page, unsigned long pfn,
> - unsigned int order,
> - int migratetype)
> +static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn,
> + unsigned int order, int migratetype, fop_t fop_flags)
> {
> spin_lock(&zone->lock);
> if (unlikely(has_isolate_pageblock(zone) ||
> is_migrate_isolate(migratetype))) {
> migratetype = get_pfnblock_migratetype(page, pfn);
> }
> - __free_one_page(page, pfn, zone, order, migratetype, FOP_NONE);
> + __free_one_page(page, pfn, zone, order, migratetype, fop_flags);
> spin_unlock(&zone->lock);
> }
>
> @@ -1507,7 +1506,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
> }
> }
>
> -static void __free_pages_ok(struct page *page, unsigned int order)
> +static void __free_pages_ok(struct page *page, unsigned int order,
> + fop_t fop_flags)
> {
> unsigned long flags;
> int migratetype;
> @@ -1519,7 +1519,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
> migratetype = get_pfnblock_migratetype(page, pfn);
> local_irq_save(flags);
> __count_vm_events(PGFREE, 1 << order);
> - free_one_page(page_zone(page), page, pfn, order, migratetype);
> + free_one_page(page_zone(page), page, pfn, order, migratetype,
> + fop_flags);
> local_irq_restore(flags);
> }
>
> @@ -1529,6 +1530,11 @@ void __free_pages_core(struct page *page, unsigned int order)
> struct page *p = page;
> unsigned int loop;
>
> + /*
> + * When initializing the memmap, init_single_page() sets the refcount
> + * of all pages to 1 ("allocated"/"not free"). We have to set the
> + * refcount of all involved pages to 0.
> + */
> prefetchw(p);
> for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> prefetchw(p + 1);
> @@ -1539,8 +1545,12 @@ void __free_pages_core(struct page *page, unsigned int order)
> set_page_count(p, 0);
>
> atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
> - set_page_refcounted(page);
> - __free_pages(page, order);
> +
> + /*
> + * Bypass PCP and place fresh pages right to the tail, primarily
> + * relevant for memory onlining.
> + */
> + __free_pages_ok(page, order, FOP_TO_TAIL);
> }
>
> #ifdef CONFIG_NEED_MULTIPLE_NODES
> @@ -3171,7 +3181,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
> */
> if (migratetype >= MIGRATE_PCPTYPES) {
> if (unlikely(is_migrate_isolate(migratetype))) {
> - free_one_page(zone, page, pfn, 0, migratetype);
> + free_one_page(zone, page, pfn, 0, migratetype,
> + FOP_NONE);
> return;
> }
> migratetype = MIGRATE_MOVABLE;
> @@ -5063,7 +5074,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
> if (order == 0) /* Via pcp? */
> free_unref_page(page);
> else
> - __free_pages_ok(page, order);
> + __free_pages_ok(page, order, FOP_NONE);
> }
>
> void __free_pages(struct page *page, unsigned int order)
Acked-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
On Mon 28-09-20 20:21:09, David Hildenbrand wrote:
> __free_pages_core() is used when exposing fresh memory to the buddy
> during system boot and when onlining memory in generic_online_page().
>
> generic_online_page() is used in two cases:
>
> 1. Direct memory onlining in online_pages().
> 2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV
> balloon and virtio-mem), when parts of a section are kept
> fake-offline to be fake-onlined later on.
>
> In 1, we already place pages to the tail of the freelist. Pages will be
> freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists
> via undo_isolate_page_range().
>
> In 2, we currently don't implement a proper rule. In case of virtio-mem,
> where we currently always online MAX_ORDER - 1 pages, the pages will be
> placed to the HEAD of the freelist - undesireable. While the hyper-v
> balloon calls generic_online_page() with single pages, usually it will
> call it on successive single pages in a larger block.
>
> The pages are fresh, so place them to the tail of the freelists and avoid
> the PCP. In __free_pages_core(), remove the now superflouos call to
> set_page_refcounted() and add a comment regarding page initialization and
> the refcount.
>
> Note: In 2. we currently don't shuffle. If ever relevant (page shuffling
> is usually of limited use in virtualized environments), we might want to
> shuffle after a sequence of generic_online_page() calls in the
> relevant callers.
It took some time to get through all the freeing paths with subtle
differences but this looks reasonable. You are mentioning that this
influences a boot time free memory ordering as well but only very
briefly. I do not expect this to make a huge difference but who knows.
It makes some sense to add pages in the order they show up in the
physical address ordering.
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> Reviewed-by: Oscar Salvador <osalvador@suse.de>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> Cc: Mel Gorman <mgorman@techsingularity.net>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
> Cc: Oscar Salvador <osalvador@suse.de>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: "K. Y. Srinivasan" <kys@microsoft.com>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Signed-off-by: David Hildenbrand <david@redhat.com>
That being said I do not see any fundamental problems.
Acked-by: Michal Hocko <mhocko@suse.com>
> ---
> mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
> 1 file changed, 24 insertions(+), 13 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d5a5f528b8ca..8a2134fe9947 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -270,7 +270,8 @@ bool pm_suspended_storage(void)
> unsigned int pageblock_order __read_mostly;
> #endif
>
> -static void __free_pages_ok(struct page *page, unsigned int order);
> +static void __free_pages_ok(struct page *page, unsigned int order,
> + fop_t fop_flags);
>
> /*
> * results with 256, 32 in the lowmem_reserve sysctl:
> @@ -682,7 +683,7 @@ static void bad_page(struct page *page, const char *reason)
> void free_compound_page(struct page *page)
> {
> mem_cgroup_uncharge(page);
> - __free_pages_ok(page, compound_order(page));
> + __free_pages_ok(page, compound_order(page), FOP_NONE);
> }
>
> void prep_compound_page(struct page *page, unsigned int order)
> @@ -1419,17 +1420,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
> spin_unlock(&zone->lock);
> }
>
> -static void free_one_page(struct zone *zone,
> - struct page *page, unsigned long pfn,
> - unsigned int order,
> - int migratetype)
> +static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn,
> + unsigned int order, int migratetype, fop_t fop_flags)
> {
> spin_lock(&zone->lock);
> if (unlikely(has_isolate_pageblock(zone) ||
> is_migrate_isolate(migratetype))) {
> migratetype = get_pfnblock_migratetype(page, pfn);
> }
> - __free_one_page(page, pfn, zone, order, migratetype, FOP_NONE);
> + __free_one_page(page, pfn, zone, order, migratetype, fop_flags);
> spin_unlock(&zone->lock);
> }
>
> @@ -1507,7 +1506,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
> }
> }
>
> -static void __free_pages_ok(struct page *page, unsigned int order)
> +static void __free_pages_ok(struct page *page, unsigned int order,
> + fop_t fop_flags)
> {
> unsigned long flags;
> int migratetype;
> @@ -1519,7 +1519,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
> migratetype = get_pfnblock_migratetype(page, pfn);
> local_irq_save(flags);
> __count_vm_events(PGFREE, 1 << order);
> - free_one_page(page_zone(page), page, pfn, order, migratetype);
> + free_one_page(page_zone(page), page, pfn, order, migratetype,
> + fop_flags);
> local_irq_restore(flags);
> }
>
> @@ -1529,6 +1530,11 @@ void __free_pages_core(struct page *page, unsigned int order)
> struct page *p = page;
> unsigned int loop;
>
> + /*
> + * When initializing the memmap, init_single_page() sets the refcount
> + * of all pages to 1 ("allocated"/"not free"). We have to set the
> + * refcount of all involved pages to 0.
> + */
> prefetchw(p);
> for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> prefetchw(p + 1);
> @@ -1539,8 +1545,12 @@ void __free_pages_core(struct page *page, unsigned int order)
> set_page_count(p, 0);
>
> atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
> - set_page_refcounted(page);
> - __free_pages(page, order);
> +
> + /*
> + * Bypass PCP and place fresh pages right to the tail, primarily
> + * relevant for memory onlining.
> + */
> + __free_pages_ok(page, order, FOP_TO_TAIL);
> }
>
> #ifdef CONFIG_NEED_MULTIPLE_NODES
> @@ -3171,7 +3181,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
> */
> if (migratetype >= MIGRATE_PCPTYPES) {
> if (unlikely(is_migrate_isolate(migratetype))) {
> - free_one_page(zone, page, pfn, 0, migratetype);
> + free_one_page(zone, page, pfn, 0, migratetype,
> + FOP_NONE);
> return;
> }
> migratetype = MIGRATE_MOVABLE;
> @@ -5063,7 +5074,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
> if (order == 0) /* Via pcp? */
> free_unref_page(page);
> else
> - __free_pages_ok(page, order);
> + __free_pages_ok(page, order, FOP_NONE);
> }
>
> void __free_pages(struct page *page, unsigned int order)
> --
> 2.26.2
>
--
Michal Hocko
SUSE Labs
On 02.10.20 15:41, Michal Hocko wrote: > On Mon 28-09-20 20:21:09, David Hildenbrand wrote: >> __free_pages_core() is used when exposing fresh memory to the buddy >> during system boot and when onlining memory in generic_online_page(). >> >> generic_online_page() is used in two cases: >> >> 1. Direct memory onlining in online_pages(). >> 2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV >> balloon and virtio-mem), when parts of a section are kept >> fake-offline to be fake-onlined later on. >> >> In 1, we already place pages to the tail of the freelist. Pages will be >> freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists >> via undo_isolate_page_range(). >> >> In 2, we currently don't implement a proper rule. In case of virtio-mem, >> where we currently always online MAX_ORDER - 1 pages, the pages will be >> placed to the HEAD of the freelist - undesireable. While the hyper-v >> balloon calls generic_online_page() with single pages, usually it will >> call it on successive single pages in a larger block. >> >> The pages are fresh, so place them to the tail of the freelists and avoid >> the PCP. In __free_pages_core(), remove the now superflouos call to >> set_page_refcounted() and add a comment regarding page initialization and >> the refcount. >> >> Note: In 2. we currently don't shuffle. If ever relevant (page shuffling >> is usually of limited use in virtualized environments), we might want to >> shuffle after a sequence of generic_online_page() calls in the >> relevant callers. > > It took some time to get through all the freeing paths with subtle > differences but this looks reasonable. You are mentioning that this > influences a boot time free memory ordering as well but only very > briefly. I do not expect this to make a huge difference but who knows. > It makes some sense to add pages in the order they show up in the > physical address ordering. I think boot memory is mostly exposed in the physical address ordering. In that case, higher addresses will now be used less likely immediately after this patch. I also don't think it's an issue - if we still detect it's an issue it's fairly easy to change again. Thanks! -- Thanks, David / dhildenb
On Mon, Sep 28, 2020 at 08:21:09PM +0200, David Hildenbrand wrote:
>__free_pages_core() is used when exposing fresh memory to the buddy
>during system boot and when onlining memory in generic_online_page().
>
>generic_online_page() is used in two cases:
>
>1. Direct memory onlining in online_pages().
>2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV
> balloon and virtio-mem), when parts of a section are kept
> fake-offline to be fake-onlined later on.
>
>In 1, we already place pages to the tail of the freelist. Pages will be
>freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists
>via undo_isolate_page_range().
>
>In 2, we currently don't implement a proper rule. In case of virtio-mem,
>where we currently always online MAX_ORDER - 1 pages, the pages will be
>placed to the HEAD of the freelist - undesireable. While the hyper-v
>balloon calls generic_online_page() with single pages, usually it will
>call it on successive single pages in a larger block.
>
>The pages are fresh, so place them to the tail of the freelists and avoid
>the PCP. In __free_pages_core(), remove the now superflouos call to
>set_page_refcounted() and add a comment regarding page initialization and
>the refcount.
>
>Note: In 2. we currently don't shuffle. If ever relevant (page shuffling
>is usually of limited use in virtualized environments), we might want to
>shuffle after a sequence of generic_online_page() calls in the
>relevant callers.
>
>Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
>Reviewed-by: Oscar Salvador <osalvador@suse.de>
>Cc: Andrew Morton <akpm@linux-foundation.org>
>Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
>Cc: Mel Gorman <mgorman@techsingularity.net>
>Cc: Michal Hocko <mhocko@kernel.org>
>Cc: Dave Hansen <dave.hansen@intel.com>
>Cc: Vlastimil Babka <vbabka@suse.cz>
>Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>Cc: Oscar Salvador <osalvador@suse.de>
>Cc: Mike Rapoport <rppt@kernel.org>
>Cc: "K. Y. Srinivasan" <kys@microsoft.com>
>Cc: Haiyang Zhang <haiyangz@microsoft.com>
>Cc: Stephen Hemminger <sthemmin@microsoft.com>
>Cc: Wei Liu <wei.liu@kernel.org>
>Signed-off-by: David Hildenbrand <david@redhat.com>
>---
> mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
> 1 file changed, 24 insertions(+), 13 deletions(-)
>
>diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>index d5a5f528b8ca..8a2134fe9947 100644
>--- a/mm/page_alloc.c
>+++ b/mm/page_alloc.c
>@@ -270,7 +270,8 @@ bool pm_suspended_storage(void)
> unsigned int pageblock_order __read_mostly;
> #endif
>
>-static void __free_pages_ok(struct page *page, unsigned int order);
>+static void __free_pages_ok(struct page *page, unsigned int order,
>+ fop_t fop_flags);
>
> /*
> * results with 256, 32 in the lowmem_reserve sysctl:
>@@ -682,7 +683,7 @@ static void bad_page(struct page *page, const char *reason)
> void free_compound_page(struct page *page)
> {
> mem_cgroup_uncharge(page);
>- __free_pages_ok(page, compound_order(page));
>+ __free_pages_ok(page, compound_order(page), FOP_NONE);
> }
>
> void prep_compound_page(struct page *page, unsigned int order)
>@@ -1419,17 +1420,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
> spin_unlock(&zone->lock);
> }
>
>-static void free_one_page(struct zone *zone,
>- struct page *page, unsigned long pfn,
>- unsigned int order,
>- int migratetype)
>+static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn,
>+ unsigned int order, int migratetype, fop_t fop_flags)
> {
> spin_lock(&zone->lock);
> if (unlikely(has_isolate_pageblock(zone) ||
> is_migrate_isolate(migratetype))) {
> migratetype = get_pfnblock_migratetype(page, pfn);
> }
>- __free_one_page(page, pfn, zone, order, migratetype, FOP_NONE);
>+ __free_one_page(page, pfn, zone, order, migratetype, fop_flags);
> spin_unlock(&zone->lock);
> }
>
>@@ -1507,7 +1506,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
> }
> }
>
>-static void __free_pages_ok(struct page *page, unsigned int order)
>+static void __free_pages_ok(struct page *page, unsigned int order,
>+ fop_t fop_flags)
> {
> unsigned long flags;
> int migratetype;
>@@ -1519,7 +1519,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
> migratetype = get_pfnblock_migratetype(page, pfn);
> local_irq_save(flags);
> __count_vm_events(PGFREE, 1 << order);
>- free_one_page(page_zone(page), page, pfn, order, migratetype);
>+ free_one_page(page_zone(page), page, pfn, order, migratetype,
>+ fop_flags);
> local_irq_restore(flags);
> }
>
>@@ -1529,6 +1530,11 @@ void __free_pages_core(struct page *page, unsigned int order)
> struct page *p = page;
> unsigned int loop;
>
>+ /*
>+ * When initializing the memmap, init_single_page() sets the refcount
If my code is the latest version.
s/init_single_page/__init_single_page/
Besides this.
Reviewed-by: Wei Yang <richard.weiyang@linux.alibaba.com>
>+ * of all pages to 1 ("allocated"/"not free"). We have to set the
>+ * refcount of all involved pages to 0.
>+ */
> prefetchw(p);
> for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
> prefetchw(p + 1);
>@@ -1539,8 +1545,12 @@ void __free_pages_core(struct page *page, unsigned int order)
> set_page_count(p, 0);
>
> atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>- set_page_refcounted(page);
>- __free_pages(page, order);
>+
>+ /*
>+ * Bypass PCP and place fresh pages right to the tail, primarily
>+ * relevant for memory onlining.
>+ */
>+ __free_pages_ok(page, order, FOP_TO_TAIL);
> }
>
> #ifdef CONFIG_NEED_MULTIPLE_NODES
>@@ -3171,7 +3181,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
> */
> if (migratetype >= MIGRATE_PCPTYPES) {
> if (unlikely(is_migrate_isolate(migratetype))) {
>- free_one_page(zone, page, pfn, 0, migratetype);
>+ free_one_page(zone, page, pfn, 0, migratetype,
>+ FOP_NONE);
> return;
> }
> migratetype = MIGRATE_MOVABLE;
>@@ -5063,7 +5074,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
> if (order == 0) /* Via pcp? */
> free_unref_page(page);
> else
>- __free_pages_ok(page, order);
>+ __free_pages_ok(page, order, FOP_NONE);
> }
>
> void __free_pages(struct page *page, unsigned int order)
>--
>2.26.2
--
Wei Yang
Help you, Help me
On 29.09.20 11:36, Wei Yang wrote:
> On Mon, Sep 28, 2020 at 08:21:09PM +0200, David Hildenbrand wrote:
>> __free_pages_core() is used when exposing fresh memory to the buddy
>> during system boot and when onlining memory in generic_online_page().
>>
>> generic_online_page() is used in two cases:
>>
>> 1. Direct memory onlining in online_pages().
>> 2. Deferred memory onlining in memory-ballooning-like mechanisms (HyperV
>> balloon and virtio-mem), when parts of a section are kept
>> fake-offline to be fake-onlined later on.
>>
>> In 1, we already place pages to the tail of the freelist. Pages will be
>> freed to MIGRATE_ISOLATE lists first and moved to the tail of the freelists
>> via undo_isolate_page_range().
>>
>> In 2, we currently don't implement a proper rule. In case of virtio-mem,
>> where we currently always online MAX_ORDER - 1 pages, the pages will be
>> placed to the HEAD of the freelist - undesireable. While the hyper-v
>> balloon calls generic_online_page() with single pages, usually it will
>> call it on successive single pages in a larger block.
>>
>> The pages are fresh, so place them to the tail of the freelists and avoid
>> the PCP. In __free_pages_core(), remove the now superflouos call to
>> set_page_refcounted() and add a comment regarding page initialization and
>> the refcount.
>>
>> Note: In 2. we currently don't shuffle. If ever relevant (page shuffling
>> is usually of limited use in virtualized environments), we might want to
>> shuffle after a sequence of generic_online_page() calls in the
>> relevant callers.
>>
>> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
>> Reviewed-by: Oscar Salvador <osalvador@suse.de>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
>> Cc: Mel Gorman <mgorman@techsingularity.net>
>> Cc: Michal Hocko <mhocko@kernel.org>
>> Cc: Dave Hansen <dave.hansen@intel.com>
>> Cc: Vlastimil Babka <vbabka@suse.cz>
>> Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>> Cc: Oscar Salvador <osalvador@suse.de>
>> Cc: Mike Rapoport <rppt@kernel.org>
>> Cc: "K. Y. Srinivasan" <kys@microsoft.com>
>> Cc: Haiyang Zhang <haiyangz@microsoft.com>
>> Cc: Stephen Hemminger <sthemmin@microsoft.com>
>> Cc: Wei Liu <wei.liu@kernel.org>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>> mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
>> 1 file changed, 24 insertions(+), 13 deletions(-)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index d5a5f528b8ca..8a2134fe9947 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -270,7 +270,8 @@ bool pm_suspended_storage(void)
>> unsigned int pageblock_order __read_mostly;
>> #endif
>>
>> -static void __free_pages_ok(struct page *page, unsigned int order);
>> +static void __free_pages_ok(struct page *page, unsigned int order,
>> + fop_t fop_flags);
>>
>> /*
>> * results with 256, 32 in the lowmem_reserve sysctl:
>> @@ -682,7 +683,7 @@ static void bad_page(struct page *page, const char *reason)
>> void free_compound_page(struct page *page)
>> {
>> mem_cgroup_uncharge(page);
>> - __free_pages_ok(page, compound_order(page));
>> + __free_pages_ok(page, compound_order(page), FOP_NONE);
>> }
>>
>> void prep_compound_page(struct page *page, unsigned int order)
>> @@ -1419,17 +1420,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>> spin_unlock(&zone->lock);
>> }
>>
>> -static void free_one_page(struct zone *zone,
>> - struct page *page, unsigned long pfn,
>> - unsigned int order,
>> - int migratetype)
>> +static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn,
>> + unsigned int order, int migratetype, fop_t fop_flags)
>> {
>> spin_lock(&zone->lock);
>> if (unlikely(has_isolate_pageblock(zone) ||
>> is_migrate_isolate(migratetype))) {
>> migratetype = get_pfnblock_migratetype(page, pfn);
>> }
>> - __free_one_page(page, pfn, zone, order, migratetype, FOP_NONE);
>> + __free_one_page(page, pfn, zone, order, migratetype, fop_flags);
>> spin_unlock(&zone->lock);
>> }
>>
>> @@ -1507,7 +1506,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
>> }
>> }
>>
>> -static void __free_pages_ok(struct page *page, unsigned int order)
>> +static void __free_pages_ok(struct page *page, unsigned int order,
>> + fop_t fop_flags)
>> {
>> unsigned long flags;
>> int migratetype;
>> @@ -1519,7 +1519,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
>> migratetype = get_pfnblock_migratetype(page, pfn);
>> local_irq_save(flags);
>> __count_vm_events(PGFREE, 1 << order);
>> - free_one_page(page_zone(page), page, pfn, order, migratetype);
>> + free_one_page(page_zone(page), page, pfn, order, migratetype,
>> + fop_flags);
>> local_irq_restore(flags);
>> }
>>
>> @@ -1529,6 +1530,11 @@ void __free_pages_core(struct page *page, unsigned int order)
>> struct page *p = page;
>> unsigned int loop;
>>
>> + /*
>> + * When initializing the memmap, init_single_page() sets the refcount
>
> If my code is the latest version.
>
> s/init_single_page/__init_single_page/
Indeed - thanks!
--
Thanks,
David / dhildenb
© 2016 - 2026 Red Hat, Inc.