[PATCH v4 2/4] kho: replace kho_preserve_phys() with kho_preserve_pages()

Mike Rapoport posted 4 patches 2 weeks ago
There is a newer version of this series
[PATCH v4 2/4] kho: replace kho_preserve_phys() with kho_preserve_pages()
Posted by Mike Rapoport 2 weeks ago
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>

to make it clear that KHO operates on pages rather than on a random
physical address.

The kho_preserve_pages() will be also used in upcoming support for
vmalloc preservation.

Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 include/linux/kexec_handover.h |  5 +++--
 kernel/kexec_handover.c        | 25 +++++++++++--------------
 mm/memblock.c                  |  4 +++-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..cc5c49b0612b 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -18,6 +18,7 @@ enum kho_event {
 
 struct folio;
 struct notifier_block;
+struct page;
 
 #define DECLARE_KHOSER_PTR(name, type) \
 	union {                        \
@@ -42,7 +43,7 @@ struct kho_serialization;
 bool kho_is_enabled(void);
 
 int kho_preserve_folio(struct folio *folio);
-int kho_preserve_phys(phys_addr_t phys, size_t size);
+int kho_preserve_pages(struct page *page, unsigned int nr_pages);
 struct folio *kho_restore_folio(phys_addr_t phys);
 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -65,7 +66,7 @@ static inline int kho_preserve_folio(struct folio *folio)
 	return -EOPNOTSUPP;
 }
 
-static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
+static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index f421acc58c1f..3ad59c5f9eaa 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -698,26 +698,23 @@ int kho_preserve_folio(struct folio *folio)
 EXPORT_SYMBOL_GPL(kho_preserve_folio);
 
 /**
- * kho_preserve_phys - preserve a physically contiguous range across kexec.
- * @phys: physical address of the range.
- * @size: size of the range.
+ * kho_preserve_pages - preserve contiguous pages across kexec
+ * @page: first page in the list.
+ * @nr_pages: number of pages.
  *
- * Instructs KHO to preserve the memory range from @phys to @phys + @size
- * across kexec.
+ * Preserve a contiguous list of order 0 pages. Must be restored using
+ * kho_restore_page() on each order 0 page.
  *
  * Return: 0 on success, error code on failure
  */
-int kho_preserve_phys(phys_addr_t phys, size_t size)
+int kho_preserve_pages(struct page *page, unsigned int nr_pages)
 {
-	unsigned long pfn = PHYS_PFN(phys);
+	struct kho_mem_track *track = &kho_out.ser.track;
+	const unsigned long start_pfn = page_to_pfn(page);
+	const unsigned long end_pfn = start_pfn + nr_pages;
+	unsigned long pfn = start_pfn;
 	unsigned long failed_pfn = 0;
-	const unsigned long start_pfn = pfn;
-	const unsigned long end_pfn = PHYS_PFN(phys + size);
 	int err = 0;
-	struct kho_mem_track *track = &kho_out.ser.track;
-
-	if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
-		return -EINVAL;
 
 	while (pfn < end_pfn) {
 		const unsigned int order =
@@ -737,7 +734,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(kho_preserve_phys);
+EXPORT_SYMBOL_GPL(kho_preserve_pages);
 
 /* Handling for debug/kho/out */
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c..6ec3eaa4e8d1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser)
 
 	for (i = 0; i < reserved_mem_count; i++) {
 		struct reserve_mem_table *map = &reserved_mem_table[i];
+		struct page *page = phys_to_page(map->start);
+		unsigned int nr_pages = map->size >> PAGE_SHIFT;
 
-		err |= kho_preserve_phys(map->start, map->size);
+		err |= kho_preserve_pages(page, nr_pages);
 	}
 
 	err |= kho_preserve_folio(page_folio(kho_fdt));
-- 
2.50.1
Re: [PATCH v4 2/4] kho: replace kho_preserve_phys() with kho_preserve_pages()
Posted by Pratyush Yadav 2 weeks ago
Hi Mike,

On Wed, Sep 17 2025, Mike Rapoport wrote:

> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> to make it clear that KHO operates on pages rather than on a random
> physical address.
>
> The kho_preserve_pages() will be also used in upcoming support for
> vmalloc preservation.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
>  include/linux/kexec_handover.h |  5 +++--
>  kernel/kexec_handover.c        | 25 +++++++++++--------------
>  mm/memblock.c                  |  4 +++-
>  3 files changed, 17 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
> index 348844cffb13..cc5c49b0612b 100644
> --- a/include/linux/kexec_handover.h
> +++ b/include/linux/kexec_handover.h
> @@ -18,6 +18,7 @@ enum kho_event {
>  
>  struct folio;
>  struct notifier_block;
> +struct page;
>  
>  #define DECLARE_KHOSER_PTR(name, type) \
>  	union {                        \
> @@ -42,7 +43,7 @@ struct kho_serialization;
>  bool kho_is_enabled(void);
>  
>  int kho_preserve_folio(struct folio *folio);
> -int kho_preserve_phys(phys_addr_t phys, size_t size);
> +int kho_preserve_pages(struct page *page, unsigned int nr_pages);
>  struct folio *kho_restore_folio(phys_addr_t phys);
>  int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
>  int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
> @@ -65,7 +66,7 @@ static inline int kho_preserve_folio(struct folio *folio)
>  	return -EOPNOTSUPP;
>  }
>  
> -static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
> +static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages)
>  {
>  	return -EOPNOTSUPP;
>  }
> diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
> index f421acc58c1f..3ad59c5f9eaa 100644
> --- a/kernel/kexec_handover.c
> +++ b/kernel/kexec_handover.c
> @@ -698,26 +698,23 @@ int kho_preserve_folio(struct folio *folio)
>  EXPORT_SYMBOL_GPL(kho_preserve_folio);
>  
>  /**
> - * kho_preserve_phys - preserve a physically contiguous range across kexec.
> - * @phys: physical address of the range.
> - * @size: size of the range.
> + * kho_preserve_pages - preserve contiguous pages across kexec
> + * @page: first page in the list.
> + * @nr_pages: number of pages.
>   *
> - * Instructs KHO to preserve the memory range from @phys to @phys + @size
> - * across kexec.
> + * Preserve a contiguous list of order 0 pages. Must be restored using
> + * kho_restore_page() on each order 0 page.

This is not true. The pages are preserved with the maximum order
possible.

	while (pfn < end_pfn) {
		const unsigned int order =
			min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));

		err = __kho_preserve_order(track, pfn, order);
		[...]

So four 0-order pages will be preserved as one 2-order page. Restoring
them as four 0-order pages is wrong. And my proposed patch for checking
the magic [0] will uncover this exact bug.

I think you should either change the logic to always preserve at order
0, or maybe add a kho_restore_pages() that replicates the same order
calculation.

[0] https://lore.kernel.org/lkml/20250917125725.665-2-pratyush@kernel.org/

>   *
>   * Return: 0 on success, error code on failure
>   */
> -int kho_preserve_phys(phys_addr_t phys, size_t size)
> +int kho_preserve_pages(struct page *page, unsigned int nr_pages)
>  {
> -	unsigned long pfn = PHYS_PFN(phys);
> +	struct kho_mem_track *track = &kho_out.ser.track;
> +	const unsigned long start_pfn = page_to_pfn(page);
> +	const unsigned long end_pfn = start_pfn + nr_pages;
> +	unsigned long pfn = start_pfn;
>  	unsigned long failed_pfn = 0;
> -	const unsigned long start_pfn = pfn;
> -	const unsigned long end_pfn = PHYS_PFN(phys + size);
>  	int err = 0;
> -	struct kho_mem_track *track = &kho_out.ser.track;
> -
> -	if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
> -		return -EINVAL;
>  
>  	while (pfn < end_pfn) {
>  		const unsigned int order =
> @@ -737,7 +734,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
>  
>  	return err;
>  }
> -EXPORT_SYMBOL_GPL(kho_preserve_phys);
> +EXPORT_SYMBOL_GPL(kho_preserve_pages);
>  
>  /* Handling for debug/kho/out */
>  
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 117d963e677c..6ec3eaa4e8d1 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser)
>  
>  	for (i = 0; i < reserved_mem_count; i++) {
>  		struct reserve_mem_table *map = &reserved_mem_table[i];
> +		struct page *page = phys_to_page(map->start);
> +		unsigned int nr_pages = map->size >> PAGE_SHIFT;
>  
> -		err |= kho_preserve_phys(map->start, map->size);
> +		err |= kho_preserve_pages(page, nr_pages);

Unrelated to this patch, but since there is no
kho_restore_{phys,pages}(), won't the reserve_mem memory end up with
uninitialized struct pages, since preserved pages are
memblock_reserved_mark_noinit()?

That would also be a case for kho_restore_pages() I suppose?

-- 
Regards,
Pratyush Yadav
Re: [PATCH v4 2/4] kho: replace kho_preserve_phys() with kho_preserve_pages()
Posted by Mike Rapoport 1 week, 6 days ago
Hi Pratyush,

On Thu, Sep 18, 2025 at 12:32:08PM +0200, Pratyush Yadav wrote:
> Hi Mike,
> 
> On Wed, Sep 17 2025, Mike Rapoport wrote:
> 
> >  /**
> > - * kho_preserve_phys - preserve a physically contiguous range across kexec.
> > - * @phys: physical address of the range.
> > - * @size: size of the range.
> > + * kho_preserve_pages - preserve contiguous pages across kexec
> > + * @page: first page in the list.
> > + * @nr_pages: number of pages.
> >   *
> > - * Instructs KHO to preserve the memory range from @phys to @phys + @size
> > - * across kexec.
> > + * Preserve a contiguous list of order 0 pages. Must be restored using
> > + * kho_restore_page() on each order 0 page.
> 
> This is not true. The pages are preserved with the maximum order
> possible.
> 
> 	while (pfn < end_pfn) {
> 		const unsigned int order =
> 			min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
> 
> 		err = __kho_preserve_order(track, pfn, order);
> 		[...]
> 
> So four 0-order pages will be preserved as one 2-order page. Restoring
> them as four 0-order pages is wrong. And my proposed patch for checking
> the magic [0] will uncover this exact bug.
> 
> I think you should either change the logic to always preserve at order
> 0, or maybe add a kho_restore_pages() that replicates the same order
> calculation.

Heh, it seems I shot myself in the foot when I suggested to move the sanity
checks to kho_restore_page() :-D

We surely don't want to preserve contiguous chunks of order-0 pages as
order 0, so kho_restore_pages() it is.
 
> [0] https://lore.kernel.org/lkml/20250917125725.665-2-pratyush@kernel.org/
> 
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index 117d963e677c..6ec3eaa4e8d1 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -2516,8 +2516,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser)
> >  
> >  	for (i = 0; i < reserved_mem_count; i++) {
> >  		struct reserve_mem_table *map = &reserved_mem_table[i];
> > +		struct page *page = phys_to_page(map->start);
> > +		unsigned int nr_pages = map->size >> PAGE_SHIFT;
> >  
> > -		err |= kho_preserve_phys(map->start, map->size);
> > +		err |= kho_preserve_pages(page, nr_pages);
> 
> Unrelated to this patch, but since there is no
> kho_restore_{phys,pages}(), won't the reserve_mem memory end up with
> uninitialized struct pages, since preserved pages are
> memblock_reserved_mark_noinit()?

True, this is something we need to fix.
 
> That would also be a case for kho_restore_pages() I suppose?

Yes, just need to find the right place to stick it.
We cannot call kho_restore_pages() in reserve_mem_kho_revive() because at
that point there's still no memory map.

-- 
Sincerely yours,
Mike.