[PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges

Pasha Tatashin posted 30 patches 1 month, 4 weeks ago
There is a newer version of this series
[PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Pasha Tatashin 1 month, 4 weeks ago
From: Changyuan Lyu <changyuanl@google.com>

Allow users of KHO to cancel the previous preservation by adding the
necessary interfaces to unpreserve folio.

Signed-off-by: Changyuan Lyu <changyuanl@google.com>
Co-developed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
 include/linux/kexec_handover.h | 12 +++++
 kernel/kexec_handover.c        | 90 +++++++++++++++++++++++++++++-----
 2 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index cabdff5f50a2..383e9460edb9 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -33,7 +33,9 @@ struct folio;
 bool kho_is_enabled(void);
 
 int kho_preserve_folio(struct folio *folio);
+int kho_unpreserve_folio(struct folio *folio);
 int kho_preserve_phys(phys_addr_t phys, size_t size);
+int kho_unpreserve_phys(phys_addr_t phys, size_t size);
 struct folio *kho_restore_folio(phys_addr_t phys);
 int kho_add_subtree(const char *name, void *fdt);
 void kho_remove_subtree(void *fdt);
@@ -58,11 +60,21 @@ static inline int kho_preserve_folio(struct folio *folio)
 	return -EOPNOTSUPP;
 }
 
+static inline int kho_unpreserve_folio(struct folio *folio)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
 {
 	return -EOPNOTSUPP;
 }
 
+static inline int kho_unpreserve_phys(phys_addr_t phys, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline struct folio *kho_restore_folio(phys_addr_t phys)
 {
 	return NULL;
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 8a4894e8ac71..b2e99aefbb32 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -136,26 +136,33 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
 	return elm;
 }
 
-static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
-			     unsigned long end_pfn)
+static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
+				   unsigned int order)
 {
 	struct kho_mem_phys_bits *bits;
 	struct kho_mem_phys *physxa;
+	const unsigned long pfn_high = pfn >> order;
 
-	while (pfn < end_pfn) {
-		const unsigned int order =
-			min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
-		const unsigned long pfn_high = pfn >> order;
+	physxa = xa_load(&track->orders, order);
+	if (!physxa)
+		return;
 
-		physxa = xa_load(&track->orders, order);
-		if (!physxa)
-			continue;
+	bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
+	if (!bits)
+		return;
 
-		bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
-		if (!bits)
-			continue;
+	clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+}
 
-		clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
+			     unsigned long end_pfn)
+{
+	unsigned int order;
+
+	while (pfn < end_pfn) {
+		order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+
+		__kho_unpreserve_order(track, pfn, order);
 
 		pfn += 1 << order;
 	}
@@ -667,6 +674,30 @@ int kho_preserve_folio(struct folio *folio)
 }
 EXPORT_SYMBOL_GPL(kho_preserve_folio);
 
+/**
+ * kho_unpreserve_folio - unpreserve a folio.
+ * @folio: folio to unpreserve.
+ *
+ * Instructs KHO to unpreserve a folio that was preserved by
+ * kho_preserve_folio() before. The provided @folio (pfn and order)
+ * must exactly match a previously preserved folio.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_unpreserve_folio(struct folio *folio)
+{
+	const unsigned long pfn = folio_pfn(folio);
+	const unsigned int order = folio_order(folio);
+	struct kho_mem_track *track = &kho_out.track;
+
+	if (kho_out.finalized)
+		return -EBUSY;
+
+	__kho_unpreserve_order(track, pfn, order);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
+
 /**
  * kho_preserve_phys - preserve a physically contiguous range across kexec.
  * @phys: physical address of the range.
@@ -712,6 +743,39 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
 }
 EXPORT_SYMBOL_GPL(kho_preserve_phys);
 
+/**
+ * kho_unpreserve_phys - unpreserve a physically contiguous range.
+ * @phys: physical address of the range.
+ * @size: size of the range.
+ *
+ * Instructs KHO to unpreserve the memory range from @phys to @phys + @size.
+ * The @phys address must be aligned to @size, and @size must be a
+ * power-of-2 multiple of PAGE_SIZE.
+ * This call must exactly match a granularity at which memory was originally
+ * preserved (either by a `kho_preserve_phys` call with the same `phys` and
+ * `size`). Unpreserving arbitrary sub-ranges of larger preserved blocks is not
+ * supported.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_unpreserve_phys(phys_addr_t phys, size_t size)
+{
+	struct kho_mem_track *track = &kho_out.track;
+	unsigned long pfn = PHYS_PFN(phys);
+	unsigned long end_pfn = PHYS_PFN(phys + size);
+
+	if (kho_out.finalized)
+		return -EBUSY;
+
+	if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
+		return -EINVAL;
+
+	__kho_unpreserve(track, pfn, end_pfn);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kho_unpreserve_phys);
+
 static int __kho_abort(void)
 {
 	int err = 0;
-- 
2.50.1.565.gc32cd1483b-goog
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Jason Gunthorpe 1 month, 3 weeks ago
On Thu, Aug 07, 2025 at 01:44:13AM +0000, Pasha Tatashin wrote:
> +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> +{

Why are we adding phys apis? Didn't we talk about this before and
agree not to expose these?

The places using it are goofy:

+static int luo_fdt_setup(void)
+{
+       fdt_out = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                          get_order(LUO_FDT_SIZE));

+       ret = kho_preserve_phys(__pa(fdt_out), LUO_FDT_SIZE);

+       WARN_ON_ONCE(kho_unpreserve_phys(__pa(fdt_out), LUO_FDT_SIZE));

It literally allocated a page and then for some reason switches to
phys with an open coded __pa??

This is ugly, if you want a helper to match __get_free_pages() then
make one that works on void * directly. You can get the order of the
void * directly from the struct page IIRC when using GFP_COMP.

Which is perhaps another comment, if this __get_free_pages() is going
to be a common pattern (and I guess it will be) then the API should be
streamlined alot more:

 void *kho_alloc_preserved_memory(gfp, size);
 void kho_free_preserved_memory(void *);

Which can wrapper the get_free_pages and the preserve logic and gives
a nice path to possibly someday supporting non-PAGE_SIZE allocations.

Jason
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Mike Rapoport 1 month, 2 weeks ago
On Thu, Aug 14, 2025 at 10:22:33AM -0300, Jason Gunthorpe wrote:
> On Thu, Aug 07, 2025 at 01:44:13AM +0000, Pasha Tatashin wrote:
> > +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> > +{
> 
> Why are we adding phys apis? Didn't we talk about this before and
> agree not to expose these?
> 
> The places using it are goofy:
> 
> +static int luo_fdt_setup(void)
> +{
> +       fdt_out = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +                                          get_order(LUO_FDT_SIZE));
> 
> +       ret = kho_preserve_phys(__pa(fdt_out), LUO_FDT_SIZE);
> 
> +       WARN_ON_ONCE(kho_unpreserve_phys(__pa(fdt_out), LUO_FDT_SIZE));
> 
> It literally allocated a page and then for some reason switches to
> phys with an open coded __pa??
> 
> This is ugly, if you want a helper to match __get_free_pages() then
> make one that works on void * directly. You can get the order of the
> void * directly from the struct page IIRC when using GFP_COMP.
> 
> Which is perhaps another comment, if this __get_free_pages() is going
> to be a common pattern (and I guess it will be) then the API should be
> streamlined alot more:
> 
>  void *kho_alloc_preserved_memory(gfp, size);
>  void kho_free_preserved_memory(void *);

This looks backwards to me. KHO should not deal with memory allocation,
it's responsibility to preserve/restore memory objects it supports.

For __get_free_pages() the natural KHO API is kho_(un)preserve_pages().
With struct page/mesdesc we always have page_to_<specialized object> from
one side and page_to_pfn from the other side.

Then folio and phys/virt APIS just become a thin wrappers around the _page
APIs. And down the road we can add slab and maybe vmalloc. 

Once folio won't overlap struct page, we'll have a hard time with only
kho_preserve_folio() for memory that's not actually folio (i.e. anon and
page cache)
 
> Which can wrapper the get_free_pages and the preserve logic and gives
> a nice path to possibly someday supporting non-PAGE_SIZE allocations.
> 
> Jason
> 

-- 
Sincerely yours,
Mike.
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Jason Gunthorpe 1 month, 2 weeks ago
On Fri, Aug 15, 2025 at 12:12:10PM +0300, Mike Rapoport wrote:
> > Which is perhaps another comment, if this __get_free_pages() is going
> > to be a common pattern (and I guess it will be) then the API should be
> > streamlined alot more:
> > 
> >  void *kho_alloc_preserved_memory(gfp, size);
> >  void kho_free_preserved_memory(void *);
> 
> This looks backwards to me. KHO should not deal with memory allocation,
> it's responsibility to preserve/restore memory objects it supports.

Then maybe those are luo_ helpers

But having users open code __get_free_pages() and convert to/from
struct page, phys, etc is not a great idea.

The use case is simply to get some memory to preserve, it should work
in terms of void *. We don't support slab today so this has to be
emulated with full pages, but this detail should not leak out of the
API.

Jason
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Pasha Tatashin 1 week, 6 days ago
On Mon, Aug 18, 2025 at 9:55 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Fri, Aug 15, 2025 at 12:12:10PM +0300, Mike Rapoport wrote:
> > > Which is perhaps another comment, if this __get_free_pages() is going
> > > to be a common pattern (and I guess it will be) then the API should be
> > > streamlined alot more:
> > >
> > >  void *kho_alloc_preserved_memory(gfp, size);
> > >  void kho_free_preserved_memory(void *);
> >
> > This looks backwards to me. KHO should not deal with memory allocation,
> > it's responsibility to preserve/restore memory objects it supports.
>
> Then maybe those are luo_ helpers
>
> But having users open code __get_free_pages() and convert to/from
> struct page, phys, etc is not a great idea.

I added:

void *luo_contig_alloc_preserve(size_t size);
void luo_contig_free_unpreserve(void *mem, size_t size);

Allocate contiguous, zeroed, and preserved memory.

Pasha
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Pasha Tatashin 1 month, 3 weeks ago
On Thu, Aug 14, 2025 at 1:22 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Thu, Aug 07, 2025 at 01:44:13AM +0000, Pasha Tatashin wrote:
> > +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> > +{
>
> Why are we adding phys apis? Didn't we talk about this before and
> agree not to expose these?

It is already there, this patch simply completes a lacking unpreserve part.

We can talk about removing it in the future, but the phys interface
provides a benefit of not having to preserve  power of two in length
objects.

>
> The places using it are goofy:
>
> +static int luo_fdt_setup(void)
> +{
> +       fdt_out = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +                                          get_order(LUO_FDT_SIZE));
>
> +       ret = kho_preserve_phys(__pa(fdt_out), LUO_FDT_SIZE);
>
> +       WARN_ON_ONCE(kho_unpreserve_phys(__pa(fdt_out), LUO_FDT_SIZE));
>
> It literally allocated a page and then for some reason switches to
> phys with an open coded __pa??
>
> This is ugly, if you want a helper to match __get_free_pages() then
> make one that works on void * directly. You can get the order of the
> void * directly from the struct page IIRC when using GFP_COMP.

I will make this changes.

>
> Which is perhaps another comment, if this __get_free_pages() is going
> to be a common pattern (and I guess it will be) then the API should be
> streamlined alot more:
>
>  void *kho_alloc_preserved_memory(gfp, size);
>  void kho_free_preserved_memory(void *);

Hm, not all GFP flags are compatible with KHO preserve, but we could
add this or similar API, but first let's make KHO completely
stateless: remove, finalize and abort parts from it.

>
> Which can wrapper the get_free_pages and the preserve logic and gives
> a nice path to possibly someday supporting non-PAGE_SIZE allocations.
>
> Jason
Re: [PATCH v3 07/30] kho: add interfaces to unpreserve folios and physical memory ranges
Posted by Jason Gunthorpe 1 month, 3 weeks ago
On Thu, Aug 14, 2025 at 03:05:04PM +0000, Pasha Tatashin wrote:
> On Thu, Aug 14, 2025 at 1:22 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Thu, Aug 07, 2025 at 01:44:13AM +0000, Pasha Tatashin wrote:
> > > +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> > > +{
> >
> > Why are we adding phys apis? Didn't we talk about this before and
> > agree not to expose these?
> 
> It is already there, this patch simply completes a lacking unpreserve part.

This patch yes, but that is because the later patches intend to use
it, which I argue those patches should not.

There should not be any users of these phys interfaces because they
make no sense. The API preserves folios and brings allocated folios
back on the other side. None of that is phys.

> > Which is perhaps another comment, if this __get_free_pages() is going
> > to be a common pattern (and I guess it will be) then the API should be
> > streamlined alot more:
> >
> >  void *kho_alloc_preserved_memory(gfp, size);
> >  void kho_free_preserved_memory(void *);
> 
> Hm, not all GFP flags are compatible with KHO preserve, but we could
> add this or similar API, but first let's make KHO completely
> stateless: remove, finalize and abort parts from it.

Right, in those cases we often warn on and mask invalid flag

Jason