mm/sparse-vmemmap.c | 5 ----- mm/sparse.c | 15 +++++++++------ 2 files changed, 9 insertions(+), 11 deletions(-)
For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done
upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may
return NULL in failure scenario.
Also, memmap pages may be allocated either from the memblock allocator
during early boot or from the buddy allocator. When removed via
arch_remove_memory(), accounting of memmap pages must reflect the
original allocation source.
To ensure correctness:
* Account memmap pages after successful allocation in sparse_init_nid()
and section_activate().
* Account memmap pages in section_deactivate() based on allocation
source.
Cc: stable@vger.kernel.org
Fixes: 15995a352474 ("mm: report per-page metadata information")
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
---
v4:
* Add fixes and suggested-by.
v3:
* Account memmap pages for !CONFIG_SPARSEMEM_VMEMMAP and only when memmap
allocation succeeds. Thanks Wei Yang.
v2:
* Account memmap pages for !CONFIG_SPARSEMEM_VMEMMAP in
section_deactivate(). Thanks David.
* https://lore.kernel.org/all/20250804151328.2326642-1-sumanthk@linux.ibm.com/
v1:
* Account memmap pages for early sections.
* https://lore.kernel.org/all/20250804084015.270570-1-sumanthk@linux.ibm.com/
mm/sparse-vmemmap.c | 5 -----
mm/sparse.c | 15 +++++++++------
2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index fd2ab5118e13..41aa0493eb03 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -578,11 +578,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
if (r < 0)
return NULL;
- if (system_state == SYSTEM_BOOTING)
- memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
- else
- memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
-
return pfn_to_page(pfn);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 066cbf82acb8..24323122f6cb 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -454,9 +454,6 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
*/
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
sparsemap_buf_end = sparsemap_buf + size;
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
- memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
-#endif
}
static void __init sparse_buffer_fini(void)
@@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
sparse_buffer_fini();
goto failed;
}
+ memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
+ PAGE_SIZE));
sparse_init_early_section(nid, map, pnum, 0);
}
}
@@ -680,7 +679,6 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
unsigned long start = (unsigned long) pfn_to_page(pfn);
unsigned long end = start + nr_pages * sizeof(struct page);
- memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
vmemmap_free(start, end, altmap);
}
static void free_map_bootmem(struct page *memmap)
@@ -856,10 +854,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* The memmap of early sections is always fully populated. See
* section_activate() and pfn_valid() .
*/
- if (!section_is_early)
+ if (!section_is_early) {
+ memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
depopulate_section_memmap(pfn, nr_pages, altmap);
- else if (memmap)
+ } else if (memmap) {
+ memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
+ PAGE_SIZE)));
free_map_bootmem(memmap);
+ }
if (empty)
ms->section_mem_map = (unsigned long)NULL;
@@ -904,6 +906,7 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
section_deactivate(pfn, nr_pages, altmap);
return ERR_PTR(-ENOMEM);
}
+ memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
return memmap;
}
--
2.48.1
On 07.08.25 20:35, Sumanth Korikkar wrote: > For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done > upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may > return NULL in failure scenario. > > Also, memmap pages may be allocated either from the memblock allocator > during early boot or from the buddy allocator. When removed via > arch_remove_memory(), accounting of memmap pages must reflect the > original allocation source. > > To ensure correctness: > * Account memmap pages after successful allocation in sparse_init_nid() > and section_activate(). > * Account memmap pages in section_deactivate() based on allocation > source. > > Cc: stable@vger.kernel.org > Fixes: 15995a352474 ("mm: report per-page metadata information") > Suggested-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com> > --- > v4: > * Add fixes and suggested-by. > > v3: > * Account memmap pages for !CONFIG_SPARSEMEM_VMEMMAP and only when memmap > allocation succeeds. Thanks Wei Yang. > > v2: > * Account memmap pages for !CONFIG_SPARSEMEM_VMEMMAP in > section_deactivate(). Thanks David. > * https://lore.kernel.org/all/20250804151328.2326642-1-sumanthk@linux.ibm.com/ > > v1: > * Account memmap pages for early sections. > * https://lore.kernel.org/all/20250804084015.270570-1-sumanthk@linux.ibm.com/ > > mm/sparse-vmemmap.c | 5 ----- > mm/sparse.c | 15 +++++++++------ > 2 files changed, 9 insertions(+), 11 deletions(-) > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > index fd2ab5118e13..41aa0493eb03 100644 > --- a/mm/sparse-vmemmap.c > +++ b/mm/sparse-vmemmap.c > @@ -578,11 +578,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, > if (r < 0) > return NULL; > > - if (system_state == SYSTEM_BOOTING) > - memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); > - else > - memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); > - > return pfn_to_page(pfn); > } > > diff --git a/mm/sparse.c b/mm/sparse.c > index 066cbf82acb8..24323122f6cb 100644 > --- a/mm/sparse.c > +++ b/mm/sparse.c > @@ -454,9 +454,6 @@ static void __init sparse_buffer_init(unsigned long size, int nid) > */ > sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); > sparsemap_buf_end = sparsemap_buf + size; > -#ifndef CONFIG_SPARSEMEM_VMEMMAP > - memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); > -#endif > } > > static void __init sparse_buffer_fini(void) > @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, > sparse_buffer_fini(); > goto failed; > } > + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), > + PAGE_SIZE)); IIRC, we can have partially populated boot sections, where only some subsections actually have a memmap ... so this calculation is possibly wrong in some cases. -- Cheers, David / dhildenb
> > diff --git a/mm/sparse.c b/mm/sparse.c > > index 066cbf82acb8..24323122f6cb 100644 > > --- a/mm/sparse.c > > +++ b/mm/sparse.c > > @@ -454,9 +454,6 @@ static void __init sparse_buffer_init(unsigned long size, int nid) > > */ > > sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); > > sparsemap_buf_end = sparsemap_buf + size; > > -#ifndef CONFIG_SPARSEMEM_VMEMMAP > > - memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); > > -#endif > > } > > static void __init sparse_buffer_fini(void) > > @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, > > sparse_buffer_fini(); > > goto failed; > > } > > + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), > > + PAGE_SIZE)); > > IIRC, we can have partially populated boot sections, where only some > subsections actually have a memmap ... so this calculation is possibly wrong > in some cases. In section_activate(): /* * The early init code does not consider partially populated initial * sections, it simply assumes that memory will never be referenced. If * we hot-add memory into such a section then we do not need to populate * the memmap and can simply reuse what is already there. */ if (nr_pages < PAGES_PER_SECTION && early_section(ms)) return pfn_to_page(pfn); The patch ignores the accounting here, based on the comments described above for partially populated initial sections. memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap); if (!memmap) { section_deactivate(pfn, nr_pages, altmap); return ERR_PTR(-ENOMEM); } memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)); only bookkeeping for newly allocated memmap is performed. Also before this patch, __populate_section_memmap() did memmap accounting for !NULL usecases. This patch also does similar change, but covers memmap accounting for both CONFIG_SPARSEMEM_VMEMMAP + !CONFIG_SPARSEMEM_VMEMMAP usecases and memmap accounting based on allocation resource. Let me know, if this sounds right. Thank you.
On Fri, 8 Aug 2025 16:23:52 +0200 David Hildenbrand <david@redhat.com> wrote: > > @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, > > sparse_buffer_fini(); > > goto failed; > > } > > + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), > > + PAGE_SIZE)); > > IIRC, we can have partially populated boot sections, where only some > subsections actually have a memmap ... so this calculation is possibly > wrong in some cases. The patch (which has cc:stable!) has been in state "stuck" for 12 days due to "possibly wrong". Could someone please find a way to get it unstuck?
On Thu, Aug 07, 2025 at 08:35:45PM +0200, Sumanth Korikkar wrote: >For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done >upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may >return NULL in failure scenario. > >Also, memmap pages may be allocated either from the memblock allocator >during early boot or from the buddy allocator. When removed via >arch_remove_memory(), accounting of memmap pages must reflect the >original allocation source. > >To ensure correctness: >* Account memmap pages after successful allocation in sparse_init_nid() > and section_activate(). >* Account memmap pages in section_deactivate() based on allocation > source. > >Cc: stable@vger.kernel.org >Fixes: 15995a352474 ("mm: report per-page metadata information") >Suggested-by: David Hildenbrand <david@redhat.com> >Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com> LGTM Reviewed-by: Wei Yang <richard.weiyang@gmail.com> -- Wei Yang Help you, Help me
© 2016 - 2025 Red Hat, Inc.