From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
to make it clear that KHO operates on pages rather than on a random
physical address.
The kho_preserve_pages() will be also used in upcoming support for
vmalloc preservation.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/kexec_handover.h | 5 +++--
kernel/kexec_handover.c | 25 +++++++++++--------------
mm/memblock.c | 4 +++-
3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..cc5c49b0612b 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -18,6 +18,7 @@ enum kho_event {
struct folio;
struct notifier_block;
+struct page;
#define DECLARE_KHOSER_PTR(name, type) \
union { \
@@ -42,7 +43,7 @@ struct kho_serialization;
bool kho_is_enabled(void);
int kho_preserve_folio(struct folio *folio);
-int kho_preserve_phys(phys_addr_t phys, size_t size);
+int kho_preserve_pages(struct page *page, unsigned int nr_pages);
struct folio *kho_restore_folio(phys_addr_t phys);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -65,7 +66,7 @@ static inline int kho_preserve_folio(struct folio *folio)
return -EOPNOTSUPP;
}
-static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
+static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
return -EOPNOTSUPP;
}
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index f421acc58c1f..3ad59c5f9eaa 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -698,26 +698,23 @@ int kho_preserve_folio(struct folio *folio)
EXPORT_SYMBOL_GPL(kho_preserve_folio);
/**
- * kho_preserve_phys - preserve a physically contiguous range across kexec.
- * @phys: physical address of the range.
- * @size: size of the range.
+ * kho_preserve_pages - preserve contiguous pages across kexec
+ * @page: first page in the list.
+ * @nr_pages: number of pages.
*
- * Instructs KHO to preserve the memory range from @phys to @phys + @size
- * across kexec.
+ * Preserve a contiguous list of order 0 pages. Must be restored using
+ * kho_restore_page() on each order 0 page.
*
* Return: 0 on success, error code on failure
*/
-int kho_preserve_phys(phys_addr_t phys, size_t size)
+int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
- unsigned long pfn = PHYS_PFN(phys);
+ struct kho_mem_track *track = &kho_out.ser.track;
+ const unsigned long start_pfn = page_to_pfn(page);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn = start_pfn;
unsigned long failed_pfn = 0;
- const unsigned long start_pfn = pfn;
- const unsigned long end_pfn = PHYS_PFN(phys + size);
int err = 0;
- struct kho_mem_track *track = &kho_out.ser.track;
-
- if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
- return -EINVAL;
while (pfn < end_pfn) {
const unsigned int order =
@@ -737,7 +734,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
return err;
}
-EXPORT_SYMBOL_GPL(kho_preserve_phys);
+EXPORT_SYMBOL_GPL(kho_preserve_pages);
/* Handling for debug/kho/out */
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c..6ec3eaa4e8d1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser)
for (i = 0; i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
+ struct page *page = phys_to_page(map->start);
+ unsigned int nr_pages = map->size >> PAGE_SHIFT;
- err |= kho_preserve_phys(map->start, map->size);
+ err |= kho_preserve_pages(page, nr_pages);
}
err |= kho_preserve_folio(page_folio(kho_fdt));
--
2.50.1
Hi Mike, On Wed, Sep 17 2025, Mike Rapoport wrote: > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > to make it clear that KHO operates on pages rather than on a random > physical address. > > The kho_preserve_pages() will be also used in upcoming support for > vmalloc preservation. > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > --- > include/linux/kexec_handover.h | 5 +++-- > kernel/kexec_handover.c | 25 +++++++++++-------------- > mm/memblock.c | 4 +++- > 3 files changed, 17 insertions(+), 17 deletions(-) > > diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h > index 348844cffb13..cc5c49b0612b 100644 > --- a/include/linux/kexec_handover.h > +++ b/include/linux/kexec_handover.h > @@ -18,6 +18,7 @@ enum kho_event { > > struct folio; > struct notifier_block; > +struct page; > > #define DECLARE_KHOSER_PTR(name, type) \ > union { \ > @@ -42,7 +43,7 @@ struct kho_serialization; > bool kho_is_enabled(void); > > int kho_preserve_folio(struct folio *folio); > -int kho_preserve_phys(phys_addr_t phys, size_t size); > +int kho_preserve_pages(struct page *page, unsigned int nr_pages); > struct folio *kho_restore_folio(phys_addr_t phys); > int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); > int kho_retrieve_subtree(const char *name, phys_addr_t *phys); > @@ -65,7 +66,7 @@ static inline int kho_preserve_folio(struct folio *folio) > return -EOPNOTSUPP; > } > > -static inline int kho_preserve_phys(phys_addr_t phys, size_t size) > +static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) > { > return -EOPNOTSUPP; > } > diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c > index f421acc58c1f..3ad59c5f9eaa 100644 > --- a/kernel/kexec_handover.c > +++ b/kernel/kexec_handover.c > @@ -698,26 +698,23 @@ int kho_preserve_folio(struct folio *folio) > EXPORT_SYMBOL_GPL(kho_preserve_folio); > > /** > - * kho_preserve_phys - preserve a physically contiguous range across kexec. > - * @phys: physical address of the range. > - * @size: size of the range. > + * kho_preserve_pages - preserve contiguous pages across kexec > + * @page: first page in the list. > + * @nr_pages: number of pages. > * > - * Instructs KHO to preserve the memory range from @phys to @phys + @size > - * across kexec. > + * Preserve a contiguous list of order 0 pages. Must be restored using > + * kho_restore_page() on each order 0 page. This is not true. The pages are preserved with the maximum order possible. while (pfn < end_pfn) { const unsigned int order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); err = __kho_preserve_order(track, pfn, order); [...] So four 0-order pages will be preserved as one 2-order page. Restoring them as four 0-order pages is wrong. And my proposed patch for checking the magic [0] will uncover this exact bug. I think you should either change the logic to always preserve at order 0, or maybe add a kho_restore_pages() that replicates the same order calculation. [0] https://lore.kernel.org/lkml/20250917125725.665-2-pratyush@kernel.org/ > * > * Return: 0 on success, error code on failure > */ > -int kho_preserve_phys(phys_addr_t phys, size_t size) > +int kho_preserve_pages(struct page *page, unsigned int nr_pages) > { > - unsigned long pfn = PHYS_PFN(phys); > + struct kho_mem_track *track = &kho_out.ser.track; > + const unsigned long start_pfn = page_to_pfn(page); > + const unsigned long end_pfn = start_pfn + nr_pages; > + unsigned long pfn = start_pfn; > unsigned long failed_pfn = 0; > - const unsigned long start_pfn = pfn; > - const unsigned long end_pfn = PHYS_PFN(phys + size); > int err = 0; > - struct kho_mem_track *track = &kho_out.ser.track; > - > - if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) > - return -EINVAL; > > while (pfn < end_pfn) { > const unsigned int order = > @@ -737,7 +734,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size) > > return err; > } > -EXPORT_SYMBOL_GPL(kho_preserve_phys); > +EXPORT_SYMBOL_GPL(kho_preserve_pages); > > /* Handling for debug/kho/out */ > > diff --git a/mm/memblock.c b/mm/memblock.c > index 117d963e677c..6ec3eaa4e8d1 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser) > > for (i = 0; i < reserved_mem_count; i++) { > struct reserve_mem_table *map = &reserved_mem_table[i]; > + struct page *page = phys_to_page(map->start); > + unsigned int nr_pages = map->size >> PAGE_SHIFT; > > - err |= kho_preserve_phys(map->start, map->size); > + err |= kho_preserve_pages(page, nr_pages); Unrelated to this patch, but since there is no kho_restore_{phys,pages}(), won't the reserve_mem memory end up with uninitialized struct pages, since preserved pages are memblock_reserved_mark_noinit()? That would also be a case for kho_restore_pages() I suppose? -- Regards, Pratyush Yadav
Hi Pratyush, On Thu, Sep 18, 2025 at 12:32:08PM +0200, Pratyush Yadav wrote: > Hi Mike, > > On Wed, Sep 17 2025, Mike Rapoport wrote: > > > /** > > - * kho_preserve_phys - preserve a physically contiguous range across kexec. > > - * @phys: physical address of the range. > > - * @size: size of the range. > > + * kho_preserve_pages - preserve contiguous pages across kexec > > + * @page: first page in the list. > > + * @nr_pages: number of pages. > > * > > - * Instructs KHO to preserve the memory range from @phys to @phys + @size > > - * across kexec. > > + * Preserve a contiguous list of order 0 pages. Must be restored using > > + * kho_restore_page() on each order 0 page. > > This is not true. The pages are preserved with the maximum order > possible. > > while (pfn < end_pfn) { > const unsigned int order = > min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); > > err = __kho_preserve_order(track, pfn, order); > [...] > > So four 0-order pages will be preserved as one 2-order page. Restoring > them as four 0-order pages is wrong. And my proposed patch for checking > the magic [0] will uncover this exact bug. > > I think you should either change the logic to always preserve at order > 0, or maybe add a kho_restore_pages() that replicates the same order > calculation. Heh, it seems I shot myself in the foot when I suggested to move the sanity checks to kho_restore_page() :-D We surely don't want to preserve contiguous chunks of order-0 pages as order 0, so kho_restore_pages() it is. > [0] https://lore.kernel.org/lkml/20250917125725.665-2-pratyush@kernel.org/ > > > diff --git a/mm/memblock.c b/mm/memblock.c > > index 117d963e677c..6ec3eaa4e8d1 100644 > > --- a/mm/memblock.c > > +++ b/mm/memblock.c > > @@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser) > > > > for (i = 0; i < reserved_mem_count; i++) { > > struct reserve_mem_table *map = &reserved_mem_table[i]; > > + struct page *page = phys_to_page(map->start); > > + unsigned int nr_pages = map->size >> PAGE_SHIFT; > > > > - err |= kho_preserve_phys(map->start, map->size); > > + err |= kho_preserve_pages(page, nr_pages); > > Unrelated to this patch, but since there is no > kho_restore_{phys,pages}(), won't the reserve_mem memory end up with > uninitialized struct pages, since preserved pages are > memblock_reserved_mark_noinit()? True, this is something we need to fix. > That would also be a case for kho_restore_pages() I suppose? Yes, just need to find the right place to stick it. We cannot call kho_restore_pages() in reserve_mem_kho_revive() because at that point there's still no memory map. -- Sincerely yours, Mike.
© 2016 - 2025 Red Hat, Inc.