Integrate the LUO with the KHO framework to enable passing LUO state
across a kexec reboot.
When LUO is transitioned to a "prepared" state, it tells KHO to
finalize, so all memory segments that were added to KHO preservation
list are getting preserved. After "Prepared" state no new segments
can be preserved. If LUO is canceled, it also tells KHO to cancel the
serialization, and therefore, later LUO can go back into the prepared
state.
This patch introduces the following changes:
- During the KHO finalization phase allocate FDT blob.
- Populate this FDT with a LUO compatibility string ("luo-v1").
LUO now depends on `CONFIG_KEXEC_HANDOVER`. The core state transition
logic (`luo_do_*_calls`) remains unimplemented in this patch.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/liveupdate.h | 6 +
include/linux/liveupdate/abi/luo.h | 54 +++++++
kernel/liveupdate/luo_core.c | 243 ++++++++++++++++++++++++++++-
kernel/liveupdate/luo_internal.h | 17 ++
mm/mm_init.c | 4 +
5 files changed, 323 insertions(+), 1 deletion(-)
create mode 100644 include/linux/liveupdate/abi/luo.h
create mode 100644 kernel/liveupdate/luo_internal.h
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index 730b76625fec..0be8804fc42a 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -13,6 +13,8 @@
#ifdef CONFIG_LIVEUPDATE
+void __init liveupdate_init(void);
+
/* Return true if live update orchestrator is enabled */
bool liveupdate_enabled(void);
@@ -21,6 +23,10 @@ int liveupdate_reboot(void);
#else /* CONFIG_LIVEUPDATE */
+static inline void liveupdate_init(void)
+{
+}
+
static inline bool liveupdate_enabled(void)
{
return false;
diff --git a/include/linux/liveupdate/abi/luo.h b/include/linux/liveupdate/abi/luo.h
new file mode 100644
index 000000000000..9483a294287f
--- /dev/null
+++ b/include/linux/liveupdate/abi/luo.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: Live Update Orchestrator ABI
+ *
+ * This header defines the stable Application Binary Interface used by the
+ * Live Update Orchestrator to pass state from a pre-update kernel to a
+ * post-update kernel. The ABI is built upon the Kexec HandOver framework
+ * and uses a Flattened Device Tree to describe the preserved data.
+ *
+ * This interface is a contract. Any modification to the FDT structure, node
+ * properties, compatible strings, or the layout of the `__packed` serialization
+ * structures defined here constitutes a breaking change. Such changes require
+ * incrementing the version number in the relevant `_COMPATIBLE` string to
+ * prevent a new kernel from misinterpreting data from an old kernel.
+ *
+ * FDT Structure Overview:
+ * The entire LUO state is encapsulated within a single KHO entry named "LUO".
+ * This entry contains an FDT with the following layout:
+ *
+ * .. code-block:: none
+ *
+ * / {
+ * compatible = "luo-v1";
+ * liveupdate-number = <...>;
+ * };
+ *
+ * Main LUO Node (/):
+ *
+ * - compatible: "luo-v1"
+ * Identifies the overall LUO ABI version.
+ * - liveupdate-number: u64
+ * A counter tracking the number of successful live updates performed.
+ */
+
+#ifndef _LINUX_LIVEUPDATE_ABI_LUO_H
+#define _LINUX_LIVEUPDATE_ABI_LUO_H
+
+/*
+ * The LUO FDT hooks all LUO state for sessions, fds, etc.
+ * In the root it allso carries "liveupdate-number" 64-bit property that
+ * corresponds to the number of live-updates performed on this machine.
+ */
+#define LUO_FDT_SIZE PAGE_SIZE
+#define LUO_FDT_KHO_ENTRY_NAME "LUO"
+#define LUO_FDT_COMPATIBLE "luo-v1"
+#define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number"
+
+#endif /* _LINUX_LIVEUPDATE_ABI_LUO_H */
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index 0e1ab19fa1cd..c1bd236bccb0 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -42,11 +42,23 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kexec_handover.h>
#include <linux/kobject.h>
+#include <linux/libfdt.h>
#include <linux/liveupdate.h>
+#include <linux/liveupdate/abi/luo.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include "luo_internal.h"
+#include "kexec_handover_internal.h"
static struct {
bool enabled;
+ void *fdt_out;
+ void *fdt_in;
+ u64 liveupdate_num;
} luo_global;
static int __init early_liveupdate_param(char *buf)
@@ -55,6 +67,122 @@ static int __init early_liveupdate_param(char *buf)
}
early_param("liveupdate", early_liveupdate_param);
+static int __init luo_early_startup(void)
+{
+ phys_addr_t fdt_phys;
+ int err, ln_size;
+ const void *ptr;
+
+ if (!kho_is_enabled()) {
+ if (liveupdate_enabled())
+ pr_warn("Disabling liveupdate because KHO is disabled\n");
+ luo_global.enabled = false;
+ return 0;
+ }
+
+ /* Retrieve LUO subtree, and verify its format. */
+ err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
+ LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
+ return err;
+ }
+
+ return 0;
+ }
+
+ luo_global.fdt_in = __va(fdt_phys);
+ err = fdt_node_check_compatible(luo_global.fdt_in, 0,
+ LUO_FDT_COMPATIBLE);
+ if (err) {
+ pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
+ LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
+
+ return -EINVAL;
+ }
+
+ ln_size = 0;
+ ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
+ &ln_size);
+ if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
+ pr_err("Unable to get live update number '%s' [%d]\n",
+ LUO_FDT_LIVEUPDATE_NUM, ln_size);
+
+ return -EINVAL;
+ }
+ memcpy(&luo_global.liveupdate_num, ptr,
+ sizeof(luo_global.liveupdate_num));
+ pr_info("Retrieved live update data, liveupdate number: %lld\n",
+ luo_global.liveupdate_num);
+
+ return 0;
+}
+
+void __init liveupdate_init(void)
+{
+ int err;
+
+ err = luo_early_startup();
+ if (err) {
+ pr_err("The incoming tree failed to initialize properly [%pe], disabling live update\n",
+ ERR_PTR(err));
+ luo_global.enabled = false;
+ }
+}
+
+/* Called during boot to create LUO fdt tree */
+static int __init luo_fdt_setup(void)
+{
+ const u64 ln = luo_global.liveupdate_num + 1;
+ void *fdt_out;
+ int err;
+
+ fdt_out = luo_alloc_preserve(LUO_FDT_SIZE);
+ if (IS_ERR(fdt_out)) {
+ pr_err("failed to allocate/preserve FDT memory\n");
+ return PTR_ERR(fdt_out);
+ }
+
+ err = fdt_create(fdt_out, LUO_FDT_SIZE);
+ err |= fdt_finish_reservemap(fdt_out);
+ err |= fdt_begin_node(fdt_out, "");
+ err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
+ err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
+ err |= fdt_end_node(fdt_out);
+ err |= fdt_finish(fdt_out);
+ if (err)
+ goto exit_free;
+
+ err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
+ if (err)
+ goto exit_free;
+ luo_global.fdt_out = fdt_out;
+
+ return 0;
+
+exit_free:
+ luo_free_unpreserve(fdt_out, LUO_FDT_SIZE);
+ pr_err("failed to prepare LUO FDT: %d\n", err);
+
+ return err;
+}
+
+static int __init luo_late_startup(void)
+{
+ int err;
+
+ if (!liveupdate_enabled())
+ return 0;
+
+ err = luo_fdt_setup();
+ if (err)
+ luo_global.enabled = false;
+
+ return err;
+}
+late_initcall(luo_late_startup);
+
/* Public Functions */
/**
@@ -69,7 +197,22 @@ early_param("liveupdate", early_liveupdate_param);
*/
int liveupdate_reboot(void)
{
- return 0;
+ int err;
+
+ if (!liveupdate_enabled())
+ return 0;
+
+ err = kho_finalize();
+ if (err) {
+ pr_err("kho_finalize failed %d\n", err);
+ /*
+ * kho_finalize() may return libfdt errors, to aboid passing to
+ * userspace unknown errors, change this to EAGAIN.
+ */
+ err = -EAGAIN;
+ }
+
+ return err;
}
/**
@@ -84,3 +227,101 @@ bool liveupdate_enabled(void)
{
return luo_global.enabled;
}
+
+/**
+ * luo_alloc_preserve - Allocate, zero, and preserve memory.
+ * @size: The number of bytes to allocate.
+ *
+ * Allocates a physically contiguous block of zeroed pages that is large
+ * enough to hold @size bytes. The allocated memory is then registered with
+ * KHO for preservation across a kexec.
+ *
+ * Note: The actual allocated size will be rounded up to the nearest
+ * power-of-two page boundary.
+ *
+ * @return A virtual pointer to the allocated and preserved memory on success,
+ * or an ERR_PTR() encoded error on failure.
+ */
+void *luo_alloc_preserve(size_t size)
+{
+ struct folio *folio;
+ int order, ret;
+
+ if (!size)
+ return ERR_PTR(-EINVAL);
+
+ order = get_order(size);
+ if (order > MAX_PAGE_ORDER)
+ return ERR_PTR(-E2BIG);
+
+ folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
+ if (!folio)
+ return ERR_PTR(-ENOMEM);
+
+ ret = kho_preserve_folio(folio);
+ if (ret) {
+ folio_put(folio);
+ return ERR_PTR(ret);
+ }
+
+ return folio_address(folio);
+}
+
+/**
+ * luo_free_unpreserve - Unpreserve and free memory.
+ * @mem: Pointer to the memory allocated by luo_alloc_preserve().
+ * @size: The original size requested during allocation. This is used to
+ * recalculate the correct order for freeing the pages.
+ *
+ * Unregisters the memory from KHO preservation and frees the underlying
+ * pages back to the system. This function should be called to clean up
+ * memory allocated with luo_alloc_preserve().
+ */
+void luo_free_unpreserve(void *mem, size_t size)
+{
+ struct folio *folio;
+
+ unsigned int order;
+
+ if (!mem || !size)
+ return;
+
+ order = get_order(size);
+ if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
+ return;
+
+ folio = virt_to_folio(mem);
+ WARN_ON_ONCE(kho_unpreserve_folio(folio));
+ folio_put(folio);
+}
+
+/**
+ * luo_free_restore - Restore and free memory after kexec.
+ * @mem: Pointer to the memory (in the new kernel's address space)
+ * that was allocated by the old kernel.
+ * @size: The original size requested during allocation. This is used to
+ * recalculate the correct order for freeing the pages.
+ *
+ * This function is intended to be called in the new kernel (post-kexec)
+ * to take ownership of and free a memory region that was preserved by the
+ * old kernel using luo_alloc_preserve().
+ *
+ * It first restores the pages from KHO (using their physical address)
+ * and then frees the pages back to the new kernel's page allocator.
+ */
+void luo_free_restore(void *mem, size_t size)
+{
+ struct folio *folio;
+ unsigned int order;
+
+ if (!mem || !size)
+ return;
+
+ order = get_order(size);
+ if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
+ return;
+
+ folio = kho_restore_folio(__pa(mem));
+ if (!WARN_ON(!folio))
+ free_pages((unsigned long)mem, order);
+}
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
new file mode 100644
index 000000000000..29f47a69be0b
--- /dev/null
+++ b/kernel/liveupdate/luo_internal.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _LINUX_LUO_INTERNAL_H
+#define _LINUX_LUO_INTERNAL_H
+
+#include <linux/liveupdate.h>
+
+void *luo_alloc_preserve(size_t size);
+void luo_free_unpreserve(void *mem, size_t size);
+void luo_free_restore(void *mem, size_t size);
+
+#endif /* _LINUX_LUO_INTERNAL_H */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c6812b4dbb2e..20c850a52167 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -21,6 +21,7 @@
#include <linux/buffer_head.h>
#include <linux/kmemleak.h>
#include <linux/kfence.h>
+#include <linux/liveupdate.h>
#include <linux/page_ext.h>
#include <linux/pti.h>
#include <linux/pgtable.h>
@@ -2703,6 +2704,9 @@ void __init mm_core_init(void)
*/
kho_memory_init();
+ /* Live Update should follow right after KHO is initialized */
+ liveupdate_init();
+
memblock_free_all();
mem_init();
kmem_cache_init();
--
2.51.2.1041.gc1ab5b90ca-goog
On Fri, Nov 07, 2025 at 04:03:00PM -0500, Pasha Tatashin wrote:
> Integrate the LUO with the KHO framework to enable passing LUO state
> across a kexec reboot.
>
> When LUO is transitioned to a "prepared" state, it tells KHO to
> finalize, so all memory segments that were added to KHO preservation
> list are getting preserved. After "Prepared" state no new segments
> can be preserved. If LUO is canceled, it also tells KHO to cancel the
> serialization, and therefore, later LUO can go back into the prepared
> state.
>
> This patch introduces the following changes:
> - During the KHO finalization phase allocate FDT blob.
> - Populate this FDT with a LUO compatibility string ("luo-v1").
>
> LUO now depends on `CONFIG_KEXEC_HANDOVER`. The core state transition
> logic (`luo_do_*_calls`) remains unimplemented in this patch.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
> include/linux/liveupdate.h | 6 +
> include/linux/liveupdate/abi/luo.h | 54 +++++++
> kernel/liveupdate/luo_core.c | 243 ++++++++++++++++++++++++++++-
> kernel/liveupdate/luo_internal.h | 17 ++
> mm/mm_init.c | 4 +
> 5 files changed, 323 insertions(+), 1 deletion(-)
> create mode 100644 include/linux/liveupdate/abi/luo.h
> create mode 100644 kernel/liveupdate/luo_internal.h
>
> diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
> index 730b76625fec..0be8804fc42a 100644
> --- a/include/linux/liveupdate.h
> +++ b/include/linux/liveupdate.h
> @@ -13,6 +13,8 @@
>
> #ifdef CONFIG_LIVEUPDATE
>
> +void __init liveupdate_init(void);
> +
> /* Return true if live update orchestrator is enabled */
> bool liveupdate_enabled(void);
>
> @@ -21,6 +23,10 @@ int liveupdate_reboot(void);
>
> #else /* CONFIG_LIVEUPDATE */
>
> +static inline void liveupdate_init(void)
> +{
> +}
The common practice is to place brackets at the same line with function
declaration.
...
> +static int __init luo_early_startup(void)
> +{
> + phys_addr_t fdt_phys;
> + int err, ln_size;
> + const void *ptr;
> +
> + if (!kho_is_enabled()) {
> + if (liveupdate_enabled())
> + pr_warn("Disabling liveupdate because KHO is disabled\n");
> + luo_global.enabled = false;
> + return 0;
> + }
> +
> + /* Retrieve LUO subtree, and verify its format. */
> + err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
> + if (err) {
> + if (err != -ENOENT) {
> + pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
> + LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
> + return err;
> + }
> +
> + return 0;
> + }
> +
> + luo_global.fdt_in = __va(fdt_phys);
phys_to_virt is clearer, isn't it?
> + err = fdt_node_check_compatible(luo_global.fdt_in, 0,
> + LUO_FDT_COMPATIBLE);
...
> +void __init liveupdate_init(void)
> +{
> + int err;
> +
> + err = luo_early_startup();
> + if (err) {
> + pr_err("The incoming tree failed to initialize properly [%pe], disabling live update\n",
> + ERR_PTR(err));
> + luo_global.enabled = false;
> + }
> +}
> +
> +/* Called during boot to create LUO fdt tree */
^ create outgoing
> +static int __init luo_late_startup(void)
> +{
> + int err;
> +
> + if (!liveupdate_enabled())
> + return 0;
> +
> + err = luo_fdt_setup();
> + if (err)
> + luo_global.enabled = false;
> +
> + return err;
> +}
> +late_initcall(luo_late_startup);
It would be nice to have a comment explaining why late_initcall() is fine
and why there's no need to initialize the outgoing fdt earlier.
> +/**
> + * luo_alloc_preserve - Allocate, zero, and preserve memory.
I think this and the "free" counterparts would be useful for any KHO users,
even those that don't need LUO.
> + * @size: The number of bytes to allocate.
> + *
> + * Allocates a physically contiguous block of zeroed pages that is large
> + * enough to hold @size bytes. The allocated memory is then registered with
> + * KHO for preservation across a kexec.
> + *
> + * Note: The actual allocated size will be rounded up to the nearest
> + * power-of-two page boundary.
> + *
> + * @return A virtual pointer to the allocated and preserved memory on success,
> + * or an ERR_PTR() encoded error on failure.
> + */
> +void *luo_alloc_preserve(size_t size)
> +{
> + struct folio *folio;
> + int order, ret;
> +
> + if (!size)
> + return ERR_PTR(-EINVAL);
> +
> + order = get_order(size);
> + if (order > MAX_PAGE_ORDER)
> + return ERR_PTR(-E2BIG);
High order allocations would likely fail or at least cause a heavy reclaim.
For now it seems that we won't be needing really large contiguous chunks so
maybe limiting this to PAGE_ALLOC_COSTLY_ORDER?
Later if we'd need higher order allocations we can try to allocate with
__GFP_NORETRY or __GFP_RETRY_MAYFAIL with a fallback to vmalloc.
> +
> + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
> + if (!folio)
> + return ERR_PTR(-ENOMEM);
> +
> + ret = kho_preserve_folio(folio);
> + if (ret) {
> + folio_put(folio);
> + return ERR_PTR(ret);
> + }
> +
> + return folio_address(folio);
> +}
> +
--
Sincerely yours,
Mike.
On Fri, Nov 14, 2025 at 6:30 AM Mike Rapoport <rppt@kernel.org> wrote:
>
> On Fri, Nov 07, 2025 at 04:03:00PM -0500, Pasha Tatashin wrote:
> > Integrate the LUO with the KHO framework to enable passing LUO state
> > across a kexec reboot.
> >
> > When LUO is transitioned to a "prepared" state, it tells KHO to
> > finalize, so all memory segments that were added to KHO preservation
> > list are getting preserved. After "Prepared" state no new segments
> > can be preserved. If LUO is canceled, it also tells KHO to cancel the
> > serialization, and therefore, later LUO can go back into the prepared
> > state.
> >
> > This patch introduces the following changes:
> > - During the KHO finalization phase allocate FDT blob.
> > - Populate this FDT with a LUO compatibility string ("luo-v1").
> >
> > LUO now depends on `CONFIG_KEXEC_HANDOVER`. The core state transition
> > logic (`luo_do_*_calls`) remains unimplemented in this patch.
> >
> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> > ---
> > include/linux/liveupdate.h | 6 +
> > include/linux/liveupdate/abi/luo.h | 54 +++++++
> > kernel/liveupdate/luo_core.c | 243 ++++++++++++++++++++++++++++-
> > kernel/liveupdate/luo_internal.h | 17 ++
> > mm/mm_init.c | 4 +
> > 5 files changed, 323 insertions(+), 1 deletion(-)
> > create mode 100644 include/linux/liveupdate/abi/luo.h
> > create mode 100644 kernel/liveupdate/luo_internal.h
> >
> > diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
> > index 730b76625fec..0be8804fc42a 100644
> > --- a/include/linux/liveupdate.h
> > +++ b/include/linux/liveupdate.h
> > @@ -13,6 +13,8 @@
> >
> > #ifdef CONFIG_LIVEUPDATE
> >
> > +void __init liveupdate_init(void);
> > +
> > /* Return true if live update orchestrator is enabled */
> > bool liveupdate_enabled(void);
> >
> > @@ -21,6 +23,10 @@ int liveupdate_reboot(void);
> >
> > #else /* CONFIG_LIVEUPDATE */
> >
> > +static inline void liveupdate_init(void)
> > +{
> > +}
>
> The common practice is to place brackets at the same line with function
> declaration.
Sure.
>
> ...
>
> > +static int __init luo_early_startup(void)
> > +{
> > + phys_addr_t fdt_phys;
> > + int err, ln_size;
> > + const void *ptr;
> > +
> > + if (!kho_is_enabled()) {
> > + if (liveupdate_enabled())
> > + pr_warn("Disabling liveupdate because KHO is disabled\n");
> > + luo_global.enabled = false;
> > + return 0;
> > + }
> > +
> > + /* Retrieve LUO subtree, and verify its format. */
> > + err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
> > + if (err) {
> > + if (err != -ENOENT) {
> > + pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
> > + LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
> > + return err;
> > + }
> > +
> > + return 0;
> > + }
> > +
> > + luo_global.fdt_in = __va(fdt_phys);
>
> phys_to_virt is clearer, isn't it?
Sure
>
> > + err = fdt_node_check_compatible(luo_global.fdt_in, 0,
> > + LUO_FDT_COMPATIBLE);
>
> ...
>
> > +void __init liveupdate_init(void)
> > +{
> > + int err;
> > +
> > + err = luo_early_startup();
> > + if (err) {
> > + pr_err("The incoming tree failed to initialize properly [%pe], disabling live update\n",
> > + ERR_PTR(err));
> > + luo_global.enabled = false;
> > + }
> > +}
> > +
> > +/* Called during boot to create LUO fdt tree */
>
> ^ create outgoing
OK
>
> > +static int __init luo_late_startup(void)
> > +{
> > + int err;
> > +
> > + if (!liveupdate_enabled())
> > + return 0;
> > +
> > + err = luo_fdt_setup();
> > + if (err)
> > + luo_global.enabled = false;
> > +
> > + return err;
> > +}
> > +late_initcall(luo_late_startup);
>
> It would be nice to have a comment explaining why late_initcall() is fine
> and why there's no need to initialize the outgoing fdt earlier.
I will add a comment; basically it is fine because the outgoing data
structures are only used after we enter userspace.
>
> > +/**
> > + * luo_alloc_preserve - Allocate, zero, and preserve memory.
>
> I think this and the "free" counterparts would be useful for any KHO users,
> even those that don't need LUO.
I will move them to KHO.
>
> > + * @size: The number of bytes to allocate.
> > + *
> > + * Allocates a physically contiguous block of zeroed pages that is large
> > + * enough to hold @size bytes. The allocated memory is then registered with
> > + * KHO for preservation across a kexec.
> > + *
> > + * Note: The actual allocated size will be rounded up to the nearest
> > + * power-of-two page boundary.
> > + *
> > + * @return A virtual pointer to the allocated and preserved memory on success,
> > + * or an ERR_PTR() encoded error on failure.
> > + */
> > +void *luo_alloc_preserve(size_t size)
> > +{
> > + struct folio *folio;
> > + int order, ret;
> > +
> > + if (!size)
> > + return ERR_PTR(-EINVAL);
> > +
> > + order = get_order(size);
> > + if (order > MAX_PAGE_ORDER)
> > + return ERR_PTR(-E2BIG);
>
> High order allocations would likely fail or at least cause a heavy reclaim.
> For now it seems that we won't be needing really large contiguous chunks so
> maybe limiting this to PAGE_ALLOC_COSTLY_ORDER?
Let's use MAX_PAGE_ORDER for now, my concern is that
PAGE_ALLOC_COSTLY_ORDER too fragile to make it part of ABI. If
allocation fails, the user will have to deal with it, as we return a
proper error code.
> Later if we'd need higher order allocations we can try to allocate with
> __GFP_NORETRY or __GFP_RETRY_MAYFAIL with a fallback to vmalloc.
>
> > +
> > + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
> > + if (!folio)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + ret = kho_preserve_folio(folio);
> > + if (ret) {
> > + folio_put(folio);
> > + return ERR_PTR(ret);
> > + }
> > +
> > + return folio_address(folio);
> > +}
> > +
>
> --
> Sincerely yours,
> Mike.
On Fri, Nov 07, 2025 at 04:03:00PM -0500, Pasha Tatashin wrote:
> Integrate the LUO with the KHO framework to enable passing LUO state
> across a kexec reboot.
>
> When LUO is transitioned to a "prepared" state, it tells KHO to
> finalize, so all memory segments that were added to KHO preservation
> list are getting preserved. After "Prepared" state no new segments
> can be preserved. If LUO is canceled, it also tells KHO to cancel the
> serialization, and therefore, later LUO can go back into the prepared
> state.
>
> This patch introduces the following changes:
> - During the KHO finalization phase allocate FDT blob.
> - Populate this FDT with a LUO compatibility string ("luo-v1").
>
> LUO now depends on `CONFIG_KEXEC_HANDOVER`. The core state transition
> logic (`luo_do_*_calls`) remains unimplemented in this patch.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
...
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index c6812b4dbb2e..20c850a52167 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -21,6 +21,7 @@
> #include <linux/buffer_head.h>
> #include <linux/kmemleak.h>
> #include <linux/kfence.h>
> +#include <linux/liveupdate.h>
> #include <linux/page_ext.h>
> #include <linux/pti.h>
> #include <linux/pgtable.h>
> @@ -2703,6 +2704,9 @@ void __init mm_core_init(void)
> */
> kho_memory_init();
>
> + /* Live Update should follow right after KHO is initialized */
> + liveupdate_init();
> +
Why do you think it should be immediately after kho_memory_init()?
Any reason this can't be called from start_kernel() or even later as an
early_initcall() or core_initall()?
> memblock_free_all();
> mem_init();
> kmem_cache_init();
> --
> 2.51.2.1041.gc1ab5b90ca-goog
>
>
--
Sincerely yours,
Mike.
> > kho_memory_init(); > > > > + /* Live Update should follow right after KHO is initialized */ > > + liveupdate_init(); > > + > > Why do you think it should be immediately after kho_memory_init()? > Any reason this can't be called from start_kernel() or even later as an > early_initcall() or core_initall()? Unfortunately, no, even here it is too late, and we might need to find a way to move the kho_init/liveupdate_init earlier. We must be able to preserve HugeTLB pages, and those are reserved earlier in boot. Pasha
On Tue, Nov 11, 2025 at 3:39 PM Pasha Tatashin <pasha.tatashin@soleen.com> wrote: > > > > kho_memory_init(); > > > > > > + /* Live Update should follow right after KHO is initialized */ > > > + liveupdate_init(); > > > + > > > > Why do you think it should be immediately after kho_memory_init()? > > Any reason this can't be called from start_kernel() or even later as an > > early_initcall() or core_initall()? > > Unfortunately, no, even here it is too late, and we might need to find > a way to move the kho_init/liveupdate_init earlier. We must be able to > preserve HugeTLB pages, and those are reserved earlier in boot. Just to clarify: liveupdate_init() is needed to start using: liveupdate_flb_incoming_* API, and FLB data is needed during HugeTLB reservation. Pasha
On Tue, Nov 11, 2025 at 03:42:24PM -0500, Pasha Tatashin wrote: > On Tue, Nov 11, 2025 at 3:39 PM Pasha Tatashin > <pasha.tatashin@soleen.com> wrote: > > > > > > kho_memory_init(); > > > > > > > > + /* Live Update should follow right after KHO is initialized */ > > > > + liveupdate_init(); > > > > + > > > > > > Why do you think it should be immediately after kho_memory_init()? > > > Any reason this can't be called from start_kernel() or even later as an > > > early_initcall() or core_initall()? > > > > Unfortunately, no, even here it is too late, and we might need to find > > a way to move the kho_init/liveupdate_init earlier. We must be able to > > preserve HugeTLB pages, and those are reserved earlier in boot. > > Just to clarify: liveupdate_init() is needed to start using: > liveupdate_flb_incoming_* API, and FLB data is needed during HugeTLB > reservation. Since flb is "file-lifecycle-bound", it implies *file*. Early memory reservations in hugetlb are not bound to files, they end up in file objects way later. So I think for now we can move liveupdate_init() later in boot and we will solve the problem of hugetlb reservations when we add support for hugetlb. > Pasha -- Sincerely yours, Mike.
On Wed, Nov 12, 2025 at 5:21 AM Mike Rapoport <rppt@kernel.org> wrote: > > On Tue, Nov 11, 2025 at 03:42:24PM -0500, Pasha Tatashin wrote: > > On Tue, Nov 11, 2025 at 3:39 PM Pasha Tatashin > > <pasha.tatashin@soleen.com> wrote: > > > > > > > > kho_memory_init(); > > > > > > > > > > + /* Live Update should follow right after KHO is initialized */ > > > > > + liveupdate_init(); > > > > > + > > > > > > > > Why do you think it should be immediately after kho_memory_init()? > > > > Any reason this can't be called from start_kernel() or even later as an > > > > early_initcall() or core_initall()? > > > > > > Unfortunately, no, even here it is too late, and we might need to find > > > a way to move the kho_init/liveupdate_init earlier. We must be able to > > > preserve HugeTLB pages, and those are reserved earlier in boot. > > > > Just to clarify: liveupdate_init() is needed to start using: > > liveupdate_flb_incoming_* API, and FLB data is needed during HugeTLB > > reservation. > > Since flb is "file-lifecycle-bound", it implies *file*. Early memory > reservations in hugetlb are not bound to files, they end up in file objects > way later. FLB global objects act similarly to subsystem-wide data, except their data has a clear creation and destruction time tied to preserved files. When the first file of a particular type is added to LUO, this global data is created; when the last file of that type is removed (unpreserved or finished), this global data is destroyed, this is why its life is bound to file lifecycle. Crucially, this global data is accessible at any time while LUO owns the associated files spanning the early boot update boundary. > So I think for now we can move liveupdate_init() later in boot and we will > solve the problem of hugetlb reservations when we add support for hugetlb. HugeTLB reserves memory early in boot. If we already have preserved HugeTLB pages via LUO/KHO, we must ensure they are counted against the boot-time reservation. For example, if hugetlb_cma_reserve() needs to reserve ten 1G pages, but LUO has already preserved seven, we only need to reserve three new pages and the rest are going to be restored with the files. Since this count is contained in the FLB global object, that data needs to be available during the early reservation phase. (Pratyush is working on HugeTLB preservation and can explain further). Pasha
On Wed, Nov 12, 2025 at 07:46:23AM -0500, Pasha Tatashin wrote: > On Wed, Nov 12, 2025 at 5:21 AM Mike Rapoport <rppt@kernel.org> wrote: > > > > On Tue, Nov 11, 2025 at 03:42:24PM -0500, Pasha Tatashin wrote: > > > On Tue, Nov 11, 2025 at 3:39 PM Pasha Tatashin > > > <pasha.tatashin@soleen.com> wrote: > > > > > > > > > > kho_memory_init(); > > > > > > > > > > > > + /* Live Update should follow right after KHO is initialized */ > > > > > > + liveupdate_init(); > > > > > > + > > > > > > > > > > Why do you think it should be immediately after kho_memory_init()? > > > > > Any reason this can't be called from start_kernel() or even later as an > > > > > early_initcall() or core_initall()? > > > > > > > > Unfortunately, no, even here it is too late, and we might need to find > > > > a way to move the kho_init/liveupdate_init earlier. We must be able to > > > > preserve HugeTLB pages, and those are reserved earlier in boot. > > > > > > Just to clarify: liveupdate_init() is needed to start using: > > > liveupdate_flb_incoming_* API, and FLB data is needed during HugeTLB > > > reservation. > > > > Since flb is "file-lifecycle-bound", it implies *file*. Early memory > > reservations in hugetlb are not bound to files, they end up in file objects > > way later. > > FLB global objects act similarly to subsystem-wide data, except their > data has a clear creation and destruction time tied to preserved > files. When the first file of a particular type is added to LUO, this > global data is created; when the last file of that type is removed > (unpreserved or finished), this global data is destroyed, this is why > its life is bound to file lifecycle. Crucially, this global data is > accessible at any time while LUO owns the associated files spanning > the early boot update boundary. But there are no files at mm_core_init(). I'm really confused here. > > So I think for now we can move liveupdate_init() later in boot and we will > > solve the problem of hugetlb reservations when we add support for hugetlb. > > HugeTLB reserves memory early in boot. If we already have preserved > HugeTLB pages via LUO/KHO, we must ensure they are counted against the > boot-time reservation. For example, if hugetlb_cma_reserve() needs to > reserve ten 1G pages, but LUO has already preserved seven, we only > need to reserve three new pages and the rest are going to be restored > with the files. > > Since this count is contained in the FLB global object, that data > needs to be available during the early reservation phase. (Pratyush is > working on HugeTLB preservation and can explain further). Not sure I really follow the design here, but in my understanding the gist here is that hugetlb reservations need to be aware of the preserved state. If that's the case, we definitely can move liveupdate_init() to an initcall and revisit this when hugetlb support for luo comes along. > Pasha > -- Sincerely yours, Mike.
> > FLB global objects act similarly to subsystem-wide data, except their > > data has a clear creation and destruction time tied to preserved > > files. When the first file of a particular type is added to LUO, this > > global data is created; when the last file of that type is removed > > (unpreserved or finished), this global data is destroyed, this is why > > its life is bound to file lifecycle. Crucially, this global data is > > accessible at any time while LUO owns the associated files spanning > > the early boot update boundary. > > But there are no files at mm_core_init(). I'm really confused here. This isn't about the files themselves, but about the subsystem global data. The files are only used to describe the lifetime of this global data. I think mm_core_init() is too late, and the call would need to be moved earlier to work correctly with subsystems. At the very least, we will have to add some early FDT parsing to retrieve data during early boot, but that would be part of the HugeTLB preservation work. I can move liveupdate_init() inside kho_memory_init(), so we don't need to modify mm_core_init(). Or, rename kho_memory_init to kho_and_liveupdate_memory_init() and combine the two calls into a single function in kexec_handover.c. > > > So I think for now we can move liveupdate_init() later in boot and we will > > > solve the problem of hugetlb reservations when we add support for hugetlb. > > > > HugeTLB reserves memory early in boot. If we already have preserved > > HugeTLB pages via LUO/KHO, we must ensure they are counted against the > > boot-time reservation. For example, if hugetlb_cma_reserve() needs to > > reserve ten 1G pages, but LUO has already preserved seven, we only > > need to reserve three new pages and the rest are going to be restored > > with the files. > > > > Since this count is contained in the FLB global object, that data > > needs to be available during the early reservation phase. (Pratyush is > > working on HugeTLB preservation and can explain further). > > Not sure I really follow the design here, but in my understanding the gist > here is that hugetlb reservations need to be aware of the preserved state. > If that's the case, we definitely can move liveupdate_init() to an initcall > and revisit this when hugetlb support for luo comes along. This will break the in-kernel tests that ensure FLB data is accessible and works correctly during early boot, as they use early_initcall(liveupdate_test_early_init);. We cannot rely on early_initcall() for liveupdate_init() because it would compete with the test. We also can't move the test to a later initcall, as that would break the verification of what FLB is promising: early access to global data by subsystems that need it (PCI, IOMMU Core, HugeTLB, etc.). Thanks, Pasha
On Wed, Nov 12, 2025 at 10:14 AM Pasha Tatashin <pasha.tatashin@soleen.com> wrote: > > > > FLB global objects act similarly to subsystem-wide data, except their > > > data has a clear creation and destruction time tied to preserved > > > files. When the first file of a particular type is added to LUO, this > > > global data is created; when the last file of that type is removed > > > (unpreserved or finished), this global data is destroyed, this is why > > > its life is bound to file lifecycle. Crucially, this global data is > > > accessible at any time while LUO owns the associated files spanning > > > the early boot update boundary. > > > > But there are no files at mm_core_init(). I'm really confused here. > > This isn't about the files themselves, but about the subsystem global > data. The files are only used to describe the lifetime of this global > data. > > I think mm_core_init() is too late, and the call would need to be > moved earlier to work correctly with subsystems. At the very least, we > will have to add some early FDT parsing to retrieve data during early > boot, but that would be part of the HugeTLB preservation work. > > I can move liveupdate_init() inside kho_memory_init(), so we don't > need to modify mm_core_init(). Or, rename kho_memory_init to > kho_and_liveupdate_memory_init() and combine the two calls into a > single function in kexec_handover.c. > > > > > So I think for now we can move liveupdate_init() later in boot and we will > > > > solve the problem of hugetlb reservations when we add support for hugetlb. > > > > > > HugeTLB reserves memory early in boot. If we already have preserved > > > HugeTLB pages via LUO/KHO, we must ensure they are counted against the > > > boot-time reservation. For example, if hugetlb_cma_reserve() needs to > > > reserve ten 1G pages, but LUO has already preserved seven, we only > > > need to reserve three new pages and the rest are going to be restored > > > with the files. > > > > > > Since this count is contained in the FLB global object, that data > > > needs to be available during the early reservation phase. (Pratyush is > > > working on HugeTLB preservation and can explain further). > > > > Not sure I really follow the design here, but in my understanding the gist > > here is that hugetlb reservations need to be aware of the preserved state. > > If that's the case, we definitely can move liveupdate_init() to an initcall > > and revisit this when hugetlb support for luo comes along. > > This will break the in-kernel tests that ensure FLB data is accessible > and works correctly during early boot, as they use > early_initcall(liveupdate_test_early_init);. We had a chat, so we agreed to move liveupdate_init() into early_initcall() and liveupdate_test_early_init into somewhere later initcall. And when HugeTLB support is added we will introduce a variant for read-only access to do it early in boot from setup_arch(). > We cannot rely on early_initcall() for liveupdate_init() because it > would compete with the test. We also can't move the test to a later > initcall, as that would break the verification of what FLB is > promising: early access to global data by subsystems that need it > (PCI, IOMMU Core, HugeTLB, etc.). > > Thanks, > Pasha
On Fri, Nov 07, 2025 at 04:03:00PM -0500, Pasha Tatashin wrote:
> Integrate the LUO with the KHO framework to enable passing LUO state
> across a kexec reboot.
>
> When LUO is transitioned to a "prepared" state, it tells KHO to
> finalize, so all memory segments that were added to KHO preservation
> list are getting preserved. After "Prepared" state no new segments
> can be preserved. If LUO is canceled, it also tells KHO to cancel the
> serialization, and therefore, later LUO can go back into the prepared
> state.
>
> This patch introduces the following changes:
> - During the KHO finalization phase allocate FDT blob.
> - Populate this FDT with a LUO compatibility string ("luo-v1").
>
> LUO now depends on `CONFIG_KEXEC_HANDOVER`. The core state transition
> logic (`luo_do_*_calls`) remains unimplemented in this patch.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
...
> @@ -69,7 +197,22 @@ early_param("liveupdate", early_liveupdate_param);
> */
> int liveupdate_reboot(void)
> {
> - return 0;
> + int err;
> +
> + if (!liveupdate_enabled())
> + return 0;
> +
> + err = kho_finalize();
kho_finalize() should be really called from kernel_kexec().
We avoided it because of the concern that memory allocations that late in
reboot could be an issue. But I looked at hibernate() and it does
allocations on reboot->hibernate path, so adding kho_finalize() as the
first step of kernel_kexec() seems fine.
And if we prioritize stateless memory tracking in KHO, it won't be a
concern at all.
> + if (err) {
> + pr_err("kho_finalize failed %d\n", err);
> + /*
> + * kho_finalize() may return libfdt errors, to aboid passing to
> + * userspace unknown errors, change this to EAGAIN.
> + */
> + err = -EAGAIN;
> + }
> +
> + return err;
> }
>
> /**
--
Sincerely yours,
Mike.
>
> kho_finalize() should be really called from kernel_kexec().
>
> We avoided it because of the concern that memory allocations that late in
> reboot could be an issue. But I looked at hibernate() and it does
> allocations on reboot->hibernate path, so adding kho_finalize() as the
> first step of kernel_kexec() seems fine.
This isn't a regular reboot; it's a live update. The
liveupdate_reboot() is designed to be reversible and allows us to
return an error, undoing the freeze() operations via unfreeze() in
case of failure.
This is why this call is placed first in reboot(), before any
irreversible reboot notifiers or shutdown callbacks are performed. If
an allocation problem occurs in KHO, the error is simply reported back
to userspace, and the live update update is safely aborted.
> And if we prioritize stateless memory tracking in KHO, it won't be a
> concern at all.
We are prioritizing stateless KHO work ;-) +Jason Miu
Once KHO is stateless, the kho_finalize() is going to be removed.
>
> > + if (err) {
> > + pr_err("kho_finalize failed %d\n", err);
> > + /*
> > + * kho_finalize() may return libfdt errors, to aboid passing to
> > + * userspace unknown errors, change this to EAGAIN.
> > + */
> > + err = -EAGAIN;
> > + }
> > +
> > + return err;
> > }
> >
> > /**
>
> --
> Sincerely yours,
> Mike.
On Mon, Nov 10, 2025 at 10:43:43AM -0500, Pasha Tatashin wrote: > > > > kho_finalize() should be really called from kernel_kexec(). > > > > We avoided it because of the concern that memory allocations that late in > > reboot could be an issue. But I looked at hibernate() and it does > > allocations on reboot->hibernate path, so adding kho_finalize() as the > > first step of kernel_kexec() seems fine. > > This isn't a regular reboot; it's a live update. The > liveupdate_reboot() is designed to be reversible and allows us to > return an error, undoing the freeze() operations via unfreeze() in > case of failure. > > This is why this call is placed first in reboot(), before any > irreversible reboot notifiers or shutdown callbacks are performed. If > an allocation problem occurs in KHO, the error is simply reported back > to userspace, and the live update update is safely aborted. This is fine. But what I don't like is that we can't use kho without liveupdate. We are making debugfs optional, we have a way to call kho_finalize() on the reboot path and it does not seem an issue to do it even without liveupdate. But then we force kho_finalize() into liveupdate_reboot() allowing weird configurations where kho is there but it's unusable. What I'd like to see is that we can finalize KHO on kexec reboot path even when liveupdate is not compiled and until then the patch that makes KHO debugfs optional should not go further IMO. Another thing I didn't check in this series yet is how finalization driven from debugfs interacts with liveupdate internal handling? > > And if we prioritize stateless memory tracking in KHO, it won't be a > > concern at all. > > We are prioritizing stateless KHO work ;-) +Jason Miu > Once KHO is stateless, the kho_finalize() is going to be removed. There's still fdt_finish(), but it can't fail in practice. -- Sincerely yours, Mike.
Hi Mike, Thank you for review, my comments below: > > This is why this call is placed first in reboot(), before any > > irreversible reboot notifiers or shutdown callbacks are performed. If > > an allocation problem occurs in KHO, the error is simply reported back > > to userspace, and the live update update is safely aborted. > > This is fine. But what I don't like is that we can't use kho without > liveupdate. We are making debugfs optional, we have a way to call Yes you can: you can disable liveupdate (i.e. not supply liveupdate=1 via kernel parameter) and use KHO the old way: drive it from the userspace. However, if liveupdate is enabled, liveupdate becomes the driver of KHO as unfortunately KHO has these weird states at the moment. > kho_finalize() on the reboot path and it does not seem an issue to do it > even without liveupdate. But then we force kho_finalize() into > liveupdate_reboot() allowing weird configurations where kho is there but > it's unusable. What do you mean KHO is there but unusable, we should not have such a state... > What I'd like to see is that we can finalize KHO on kexec reboot path even > when liveupdate is not compiled and until then the patch that makes KHO > debugfs optional should not go further IMO. > > Another thing I didn't check in this series yet is how finalization driven > from debugfs interacts with liveupdate internal handling? I think what we can do is the following: - Remove "Kconfig: make debugfs optional" from this series, and instead make that change as part of stateless KHO work. - This will ensure that when liveupdate=0 always KHO finalize is fully support the old way. - When liveupdate=1 always disable KHO debugfs "finalize" API, and allow liveupdate to drive it automatically. It would add another liveupdate_enable() check to KHO, and is going to be removed as part of stateless KHO work. Pasha
Hi Pasha, On Tue, Nov 11, 2025 at 03:57:39PM -0500, Pasha Tatashin wrote: > Hi Mike, > > Thank you for review, my comments below: > > > > This is why this call is placed first in reboot(), before any > > > irreversible reboot notifiers or shutdown callbacks are performed. If > > > an allocation problem occurs in KHO, the error is simply reported back > > > to userspace, and the live update update is safely aborted. The call to liveupdate_reboot() is just before kernel_kexec(). Why we don't move it there? And all the liveupdate_reboot() does if kho_finalize() fails it's massaging the error value before returning it to userspace. Why kernel_kexec() can't do the same? > > This is fine. But what I don't like is that we can't use kho without > > liveupdate. We are making debugfs optional, we have a way to call > > Yes you can: you can disable liveupdate (i.e. not supply liveupdate=1 > via kernel parameter) and use KHO the old way: drive it from the > userspace. However, if liveupdate is enabled, liveupdate becomes the > driver of KHO as unfortunately KHO has these weird states at the > moment. The "weird state" is the point where KHO builds its FDT. Replacing the current memory tracker with one that does not require serialization won't change it. We still need a way to tell KHO that "there won't be new nodes in FDT, pack it". > > kho_finalize() on the reboot path and it does not seem an issue to do it > > even without liveupdate. But then we force kho_finalize() into > > liveupdate_reboot() allowing weird configurations where kho is there but > > it's unusable. > > What do you mean KHO is there but unusable, we should not have such a state... If you compile a kernel with KEXEC_HANDOVER=y, KEXEC_HANDOVER_DEBUGFS=n and LIVEUPDATE=n and boot with kho=1 there is nothing to trigger kho_finalize(). > > What I'd like to see is that we can finalize KHO on kexec reboot path even > > when liveupdate is not compiled and until then the patch that makes KHO > > debugfs optional should not go further IMO. > > > > Another thing I didn't check in this series yet is how finalization driven > > from debugfs interacts with liveupdate internal handling? > > I think what we can do is the following: > - Remove "Kconfig: make debugfs optional" from this series, and > instead make that change as part of stateless KHO work. > - This will ensure that when liveupdate=0 always KHO finalize is fully > support the old way. > - When liveupdate=1 always disable KHO debugfs "finalize" API, and > allow liveupdate to drive it automatically. It would add another > liveupdate_enable() check to KHO, and is going to be removed as part > of stateless KHO work. KHO should not call into liveupdate. That's layering violation. And "stateless KHO" does not really make it stateless, it only removes the memory serialization from kho_finalize(), but it's still required to pack the FDT. I think we should allow kho finalization in some form from kernel_kexec(). When kho=1 and liveupdate=0, it will actually create the FDT if there was no previous trigger from debugfs or it will continue with FDT created by explicit request via debugfs. When liveupdate=1, liveupdate_reboot() may call a function that actually finalizes the state to allow safe rollback (although in the current patches it does not seem necessary). And then kho_finalize() called from kernel_kexec() will just continue with the state created by liveupdate_reboot(). If we already finalized the kho state via debugfs, liveupdate_reboot() can either error out or reset that state. > Pasha > -- Sincerely yours, Mike.
On Wed, Nov 12, 2025 at 8:25 AM Mike Rapoport <rppt@kernel.org> wrote: > > Hi Pasha, > > On Tue, Nov 11, 2025 at 03:57:39PM -0500, Pasha Tatashin wrote: > > Hi Mike, > > > > Thank you for review, my comments below: > > > > > > This is why this call is placed first in reboot(), before any > > > > irreversible reboot notifiers or shutdown callbacks are performed. If > > > > an allocation problem occurs in KHO, the error is simply reported back > > > > to userspace, and the live update update is safely aborted. > > The call to liveupdate_reboot() is just before kernel_kexec(). Why we don't > move it there? Yes, I can move that call into kernel_kexec(). > And all the liveupdate_reboot() does if kho_finalize() fails it's massaging > the error value before returning it to userspace. Why kernel_kexec() can't > do the same? We could do that. It would look something like this: if (liveupdate_enabled()) kho_finalize(); Because we want to do kho_finalize() from kernel_kexec only when we do live update. > > > This is fine. But what I don't like is that we can't use kho without > > > liveupdate. We are making debugfs optional, we have a way to call This is exactly the fix I proposed: 1. When live-update is enabled, always disable "finalize" debugfs API. 2. When live-update is disabled, always enable "finalize" debugfs API. Once KHO is stateless the "finalize" debugfs API is going to be removed, and KHO debugfs itself can be optional. > > Yes you can: you can disable liveupdate (i.e. not supply liveupdate=1 > > via kernel parameter) and use KHO the old way: drive it from the > > userspace. However, if liveupdate is enabled, liveupdate becomes the > > driver of KHO as unfortunately KHO has these weird states at the > > moment. > > The "weird state" is the point where KHO builds its FDT. Replacing the > current memory tracker with one that does not require serialization won't > change it. We still need a way to tell KHO that "there won't be new nodes > in FDT, pack it". > see my answer below > > > kho_finalize() on the reboot path and it does not seem an issue to do it > > > even without liveupdate. But then we force kho_finalize() into > > > liveupdate_reboot() allowing weird configurations where kho is there but > > > it's unusable. > > > > What do you mean KHO is there but unusable, we should not have such a state... > > If you compile a kernel with KEXEC_HANDOVER=y, KEXEC_HANDOVER_DEBUGFS=n and > LIVEUPDATE=n and boot with kho=1 there is nothing to trigger > kho_finalize(). > > > > What I'd like to see is that we can finalize KHO on kexec reboot path even > > > when liveupdate is not compiled and until then the patch that makes KHO > > > debugfs optional should not go further IMO. > > > > > > Another thing I didn't check in this series yet is how finalization driven > > > from debugfs interacts with liveupdate internal handling? > > > > I think what we can do is the following: > > - Remove "Kconfig: make debugfs optional" from this series, and > > instead make that change as part of stateless KHO work. > > - This will ensure that when liveupdate=0 always KHO finalize is fully > > support the old way. > > - When liveupdate=1 always disable KHO debugfs "finalize" API, and > > allow liveupdate to drive it automatically. It would add another > > liveupdate_enable() check to KHO, and is going to be removed as part > > of stateless KHO work. > > KHO should not call into liveupdate. That's layering violation. > And "stateless KHO" does not really make it stateless, it only removes the > memory serialization from kho_finalize(), but it's still required to pack > the FDT. This touches on a point I've raised in the KHO sync meetings: to be effective, the "stateless KHO" work must also make subtree add/remove stateless. There should not be a separate "finalize" state just to finish the FDT. The KHO FDT is tiny (only one page), and there are only a handful of subtrees. Adding and removing subtrees is cheap; we should be able to open FDT, modify it, and finish FDT on every operation. There's no need for a special finalization state at kexec time. KHO should be totally stateless. > I think we should allow kho finalization in some form from kernel_kexec(). If we achieve that, we wouldn't need a kho_finalize() call from kernel_kexec() at all. All KHO operations should be allowed at any time once KHO is initialized, and they shouldn't depend on the machine state. So, even late in shutdown or early in boot, it should be possible to preserve KHO memory or a subtree. I'm not saying it's a good idea to do that late in shutdown (as preservation may fail), but that should be the caller's problem. Thanks, Pasha
On Wed, Nov 12, 2025 at 09:58:27AM -0500, Pasha Tatashin wrote: > On Wed, Nov 12, 2025 at 8:25 AM Mike Rapoport <rppt@kernel.org> wrote: > > > > Hi Pasha, > > > > On Tue, Nov 11, 2025 at 03:57:39PM -0500, Pasha Tatashin wrote: > > > Hi Mike, > > > > > > Thank you for review, my comments below: > > > > > > > > This is why this call is placed first in reboot(), before any > > > > > irreversible reboot notifiers or shutdown callbacks are performed. If > > > > > an allocation problem occurs in KHO, the error is simply reported back > > > > > to userspace, and the live update update is safely aborted. > > > > The call to liveupdate_reboot() is just before kernel_kexec(). Why we don't > > move it there? > > Yes, I can move that call into kernel_kexec(). > > > And all the liveupdate_reboot() does if kho_finalize() fails it's massaging > > the error value before returning it to userspace. Why kernel_kexec() can't > > do the same? > > We could do that. It would look something like this: > > if (liveupdate_enabled()) > kho_finalize(); > > Because we want to do kho_finalize() from kernel_kexec only when we do > live update. > > > > > This is fine. But what I don't like is that we can't use kho without > > > > liveupdate. We are making debugfs optional, we have a way to call > > This is exactly the fix I proposed: > > 1. When live-update is enabled, always disable "finalize" debugfs API. > 2. When live-update is disabled, always enable "finalize" debugfs API. I don't mind the concept, what I do mind is sprinkling liveupdate_enabled() in KHO. How about we kill debugfs/kho/out/abort and make kho_finalize() overwrite an existing FDT if there was any? Abort was required to allow rollback for subsystems that had kho notifiers, but now notifiers are gone and kho_abort() only frees the memory serialization data. I don't see an issue with kho_finalize() from debugfs being a tad slower because of a call to kho_abort() and the liveupdate path anyway won't incur that penalty. > > KHO should not call into liveupdate. That's layering violation. > > And "stateless KHO" does not really make it stateless, it only removes the > > memory serialization from kho_finalize(), but it's still required to pack > > the FDT. > > This touches on a point I've raised in the KHO sync meetings: to be > effective, the "stateless KHO" work must also make subtree add/remove > stateless. There should not be a separate "finalize" state just to > finish the FDT. The KHO FDT is tiny (only one page), and there are > only a handful of subtrees. Adding and removing subtrees is cheap; we > should be able to open FDT, modify it, and finish FDT on every > operation. There's no need for a special finalization state at kexec > time. KHO should be totally stateless. And as the first step we can drop 'if (!kho_out.finalized)' from kho_fill_kimage(). We might need to massage the check for valid FDT in kho_populate() to avoid unnecessary noise, but largely there's no issue with always passing KHO data in kimage. > Thanks, > Pasha -- Sincerely yours, Mike.
On Thu, Nov 13, 2025 at 11:32 AM Mike Rapoport <rppt@kernel.org> wrote: > > On Wed, Nov 12, 2025 at 09:58:27AM -0500, Pasha Tatashin wrote: > > On Wed, Nov 12, 2025 at 8:25 AM Mike Rapoport <rppt@kernel.org> wrote: > > > > > > Hi Pasha, > > > > > > On Tue, Nov 11, 2025 at 03:57:39PM -0500, Pasha Tatashin wrote: > > > > Hi Mike, > > > > > > > > Thank you for review, my comments below: > > > > > > > > > > This is why this call is placed first in reboot(), before any > > > > > > irreversible reboot notifiers or shutdown callbacks are performed. If > > > > > > an allocation problem occurs in KHO, the error is simply reported back > > > > > > to userspace, and the live update update is safely aborted. > > > > > > The call to liveupdate_reboot() is just before kernel_kexec(). Why we don't > > > move it there? > > > > Yes, I can move that call into kernel_kexec(). > > > > > And all the liveupdate_reboot() does if kho_finalize() fails it's massaging > > > the error value before returning it to userspace. Why kernel_kexec() can't > > > do the same? > > > > We could do that. It would look something like this: > > > > if (liveupdate_enabled()) > > kho_finalize(); > > > > Because we want to do kho_finalize() from kernel_kexec only when we do > > live update. > > > > > > > This is fine. But what I don't like is that we can't use kho without > > > > > liveupdate. We are making debugfs optional, we have a way to call > > > > This is exactly the fix I proposed: > > > > 1. When live-update is enabled, always disable "finalize" debugfs API. > > 2. When live-update is disabled, always enable "finalize" debugfs API. > > I don't mind the concept, what I do mind is sprinkling liveupdate_enabled() > in KHO. Sure, let's just unconditionally do kho_fill_kimage(). > How about we kill debugfs/kho/out/abort and make kho_finalize() overwrite > an existing FDT if there was any? > > Abort was required to allow rollback for subsystems that had kho notifiers, > but now notifiers are gone and kho_abort() only frees the memory > serialization data. I don't see an issue with kho_finalize() from debugfs > being a tad slower because of a call to kho_abort() and the liveupdate path > anyway won't incur that penalty. Sounds good to me. > > > KHO should not call into liveupdate. That's layering violation. > > > And "stateless KHO" does not really make it stateless, it only removes the > > > memory serialization from kho_finalize(), but it's still required to pack > > > the FDT. > > > > This touches on a point I've raised in the KHO sync meetings: to be > > effective, the "stateless KHO" work must also make subtree add/remove > > stateless. There should not be a separate "finalize" state just to > > finish the FDT. The KHO FDT is tiny (only one page), and there are > > only a handful of subtrees. Adding and removing subtrees is cheap; we > > should be able to open FDT, modify it, and finish FDT on every > > operation. There's no need for a special finalization state at kexec > > time. KHO should be totally stateless. > > And as the first step we can drop 'if (!kho_out.finalized)' from > kho_fill_kimage(). We might need to massage the check for valid FDT in > kho_populate() to avoid unnecessary noise, but largely there's no issue > with always passing KHO data in kimage. Sounds good, let me work on this patch. Pasha
© 2016 - 2026 Red Hat, Inc.