From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
The KHO framework uses a notifier chain as the mechanism for clients to
participate in the finalization process. While this works for a single,
central state machine, it is too restrictive for kernel-internal
components like pstore/reserve_mem or IMA. These components need a
simpler, direct way to register their state for preservation (e.g.,
during their initcall) without being part of a complex,
shutdown-time notifier sequence. The notifier model forces all
participants into a single finalization flow and makes direct
preservation from an arbitrary context difficult.
This patch refactors the client participation model by removing the
notifier chain and introducing a direct API for managing FDT subtrees.
The core kho_finalize() and kho_abort() state machine remains, but
clients now register their data with KHO beforehand.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/kexec_handover.h | 28 +----
kernel/kexec_handover.c | 184 +++++++++++++++----------------
kernel/kexec_handover_debug.c | 17 +--
kernel/kexec_handover_internal.h | 5 +-
mm/memblock.c | 60 ++--------
5 files changed, 118 insertions(+), 176 deletions(-)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 04d0108db98e..2faf290803ce 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -10,14 +10,7 @@ struct kho_scratch {
phys_addr_t size;
};
-/* KHO Notifier index */
-enum kho_event {
- KEXEC_KHO_FINALIZE = 0,
- KEXEC_KHO_ABORT = 1,
-};
-
struct folio;
-struct notifier_block;
struct page;
#define DECLARE_KHOSER_PTR(name, type) \
@@ -37,8 +30,6 @@ struct page;
(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
})
-struct kho_serialization;
-
struct kho_vmalloc_chunk;
struct kho_vmalloc {
DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
@@ -57,12 +48,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
struct folio *kho_restore_folio(phys_addr_t phys);
struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_add_subtree(const char *name, void *fdt);
+void kho_remove_subtree(void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
-int register_kho_notifier(struct notifier_block *nb);
-int unregister_kho_notifier(struct notifier_block *nb);
-
void kho_memory_init(void);
void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
@@ -114,23 +103,16 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
return NULL;
}
-static inline int kho_add_subtree(struct kho_serialization *ser,
- const char *name, void *fdt)
+static inline int kho_add_subtree(const char *name, void *fdt)
{
return -EOPNOTSUPP;
}
-static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+static inline void kho_remove_subtree(void *fdt)
{
- return -EOPNOTSUPP;
}
-static inline int register_kho_notifier(struct notifier_block *nb)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int unregister_kho_notifier(struct notifier_block *nb)
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
{
return -EOPNOTSUPP;
}
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index f0f6c6b8ad83..e0dc0ed565ef 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -15,7 +15,6 @@
#include <linux/libfdt.h>
#include <linux/list.h>
#include <linux/memblock.h>
-#include <linux/notifier.h>
#include <linux/page-isolation.h>
#include <linux/vmalloc.h>
@@ -99,33 +98,34 @@ struct kho_mem_track {
struct khoser_mem_chunk;
-struct kho_serialization {
- struct page *fdt;
- struct kho_mem_track track;
- /* First chunk of serialized preserved memory map */
- struct khoser_mem_chunk *preserved_mem_map;
+struct kho_sub_fdt {
+ struct list_head l;
+ const char *name;
+ void *fdt;
};
struct kho_out {
- struct blocking_notifier_head chain_head;
+ void *fdt;
+ bool finalized;
+ struct mutex lock; /* protects KHO FDT finalization */
- struct dentry *dir;
+ struct list_head sub_fdts;
+ struct mutex fdts_lock;
- struct mutex lock; /* protects KHO FDT finalization */
+ struct kho_mem_track track;
+ /* First chunk of serialized preserved memory map */
+ struct khoser_mem_chunk *preserved_mem_map;
- struct kho_serialization ser;
- bool finalized;
+ struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
- .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
.lock = __MUTEX_INITIALIZER(kho_out.lock),
- .ser = {
- .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list),
- .track = {
- .orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
- },
+ .track = {
+ .orders = XARRAY_INIT(kho_out.track.orders, 0),
},
+ .sub_fdts = LIST_HEAD_INIT(kho_out.sub_fdts),
+ .fdts_lock = __MUTEX_INITIALIZER(kho_out.fdts_lock),
.finalized = false,
};
@@ -366,14 +366,14 @@ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
}
}
-static int kho_mem_serialize(struct kho_serialization *ser)
+static int kho_mem_serialize(struct kho_out *kho_out)
{
struct khoser_mem_chunk *first_chunk = NULL;
struct khoser_mem_chunk *chunk = NULL;
struct kho_mem_phys *physxa;
unsigned long order;
- xa_for_each(&ser->track.orders, order, physxa) {
+ xa_for_each(&kho_out->track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
@@ -401,7 +401,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
}
}
- ser->preserved_mem_map = first_chunk;
+ kho_out->preserved_mem_map = first_chunk;
return 0;
@@ -660,28 +660,8 @@ static void __init kho_reserve_scratch(void)
kho_enable = false;
}
-struct kho_out {
- struct blocking_notifier_head chain_head;
- struct mutex lock; /* protects KHO FDT finalization */
- struct kho_serialization ser;
- bool finalized;
- struct kho_debugfs dbg;
-};
-
-static struct kho_out kho_out = {
- .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
- .lock = __MUTEX_INITIALIZER(kho_out.lock),
- .ser = {
- .track = {
- .orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
- },
- },
- .finalized = false,
-};
-
/**
* kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
- * @ser: serialization control object passed by KHO notifiers.
* @name: name of the sub tree.
* @fdt: the sub tree blob.
*
@@ -695,34 +675,45 @@ static struct kho_out kho_out = {
*
* Return: 0 on success, error code on failure
*/
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
+int kho_add_subtree(const char *name, void *fdt)
{
- int err = 0;
- u64 phys = (u64)virt_to_phys(fdt);
- void *root = page_to_virt(ser->fdt);
+ struct kho_sub_fdt *sub_fdt;
+ int err;
- err |= fdt_begin_node(root, name);
- err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
- err |= fdt_end_node(root);
+ sub_fdt = kmalloc(sizeof(*sub_fdt), GFP_KERNEL);
+ if (!sub_fdt)
+ return -ENOMEM;
- if (err)
- return err;
+ INIT_LIST_HEAD(&sub_fdt->l);
+ sub_fdt->name = name;
+ sub_fdt->fdt = fdt;
+
+ mutex_lock(&kho_out.fdts_lock);
+ list_add_tail(&sub_fdt->l, &kho_out.sub_fdts);
+ err = kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false);
+ mutex_unlock(&kho_out.fdts_lock);
- return kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false);
+ return err;
}
EXPORT_SYMBOL_GPL(kho_add_subtree);
-int register_kho_notifier(struct notifier_block *nb)
+void kho_remove_subtree(void *fdt)
{
- return blocking_notifier_chain_register(&kho_out.chain_head, nb);
-}
-EXPORT_SYMBOL_GPL(register_kho_notifier);
+ struct kho_sub_fdt *sub_fdt;
+
+ mutex_lock(&kho_out.fdts_lock);
+ list_for_each_entry(sub_fdt, &kho_out.sub_fdts, l) {
+ if (sub_fdt->fdt == fdt) {
+ list_del(&sub_fdt->l);
+ kfree(sub_fdt);
+ kho_debugfs_fdt_remove(&kho_out.dbg, fdt);
+ break;
+ }
+ }
+ mutex_unlock(&kho_out.fdts_lock);
-int unregister_kho_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
}
-EXPORT_SYMBOL_GPL(unregister_kho_notifier);
+EXPORT_SYMBOL_GPL(kho_remove_subtree);
/**
* kho_preserve_folio - preserve a folio across kexec.
@@ -737,7 +728,7 @@ int kho_preserve_folio(struct folio *folio)
{
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
return __kho_preserve_order(track, pfn, order);
}
@@ -755,7 +746,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
*/
int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn = start_pfn;
@@ -851,7 +842,7 @@ static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur
static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk)
{
- struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_mem_track *track = &kho_out.track;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
__kho_unpreserve(track, pfn, pfn + 1);
@@ -1033,11 +1024,11 @@ EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
static int __kho_abort(void)
{
- int err;
+ int err = 0;
unsigned long order;
struct kho_mem_phys *physxa;
- xa_for_each(&kho_out.ser.track.orders, order, physxa) {
+ xa_for_each(&kho_out.track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
@@ -1047,17 +1038,13 @@ static int __kho_abort(void)
xa_destroy(&physxa->phys_bits);
kfree(physxa);
}
- xa_destroy(&kho_out.ser.track.orders);
+ xa_destroy(&kho_out.track.orders);
- if (kho_out.ser.preserved_mem_map) {
- kho_mem_ser_free(kho_out.ser.preserved_mem_map);
- kho_out.ser.preserved_mem_map = NULL;
+ if (kho_out.preserved_mem_map) {
+ kho_mem_ser_free(kho_out.preserved_mem_map);
+ kho_out.preserved_mem_map = NULL;
}
- err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT,
- NULL);
- err = notifier_to_errno(err);
-
if (err)
pr_err("Failed to abort KHO finalization: %d\n", err);
@@ -1084,7 +1071,7 @@ int kho_abort(void)
kho_out.finalized = false;
- kho_debugfs_cleanup(&kho_out.dbg);
+ kho_debugfs_fdt_remove(&kho_out.dbg, kho_out.fdt);
unlock:
mutex_unlock(&kho_out.lock);
@@ -1095,41 +1082,46 @@ static int __kho_finalize(void)
{
int err = 0;
u64 *preserved_mem_map;
- void *fdt = page_to_virt(kho_out.ser.fdt);
+ void *root = kho_out.fdt;
+ struct kho_sub_fdt *fdt;
- err |= fdt_create(fdt, PAGE_SIZE);
- err |= fdt_finish_reservemap(fdt);
- err |= fdt_begin_node(fdt, "");
- err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
+ err |= fdt_create(root, PAGE_SIZE);
+ err |= fdt_finish_reservemap(root);
+ err |= fdt_begin_node(root, "");
+ err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
/**
* Reserve the preserved-memory-map property in the root FDT, so
* that all property definitions will precede subnodes created by
* KHO callers.
*/
- err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
+ err |= fdt_property_placeholder(root, PROP_PRESERVED_MEMORY_MAP,
sizeof(*preserved_mem_map),
(void **)&preserved_mem_map);
if (err)
goto abort;
- err = kho_preserve_folio(page_folio(kho_out.ser.fdt));
+ err = kho_preserve_folio(virt_to_folio(kho_out.fdt));
if (err)
goto abort;
- err = blocking_notifier_call_chain(&kho_out.chain_head,
- KEXEC_KHO_FINALIZE, &kho_out.ser);
- err = notifier_to_errno(err);
+ err = kho_mem_serialize(&kho_out);
if (err)
goto abort;
- err = kho_mem_serialize(&kho_out.ser);
- if (err)
- goto abort;
+ *preserved_mem_map = (u64)virt_to_phys(kho_out.preserved_mem_map);
- *preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
+ mutex_lock(&kho_out.fdts_lock);
+ list_for_each_entry(fdt, &kho_out.sub_fdts, l) {
+ phys_addr_t phys = virt_to_phys(fdt->fdt);
- err |= fdt_end_node(fdt);
- err |= fdt_finish(fdt);
+ err |= fdt_begin_node(root, fdt->name);
+ err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
+ err |= fdt_end_node(root);
+ };
+ mutex_unlock(&kho_out.fdts_lock);
+
+ err |= fdt_end_node(root);
+ err |= fdt_finish(root);
abort:
if (err) {
@@ -1160,7 +1152,7 @@ int kho_finalize(void)
kho_out.finalized = true;
ret = kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
- page_to_virt(kho_out.ser.fdt), true);
+ kho_out.fdt, true);
unlock:
mutex_unlock(&kho_out.lock);
@@ -1252,15 +1244,17 @@ static __init int kho_init(void)
{
int err = 0;
const void *fdt = kho_get_fdt();
+ struct page *fdt_page;
if (!kho_enable)
return 0;
- kho_out.ser.fdt = alloc_page(GFP_KERNEL);
- if (!kho_out.ser.fdt) {
+ fdt_page = alloc_page(GFP_KERNEL);
+ if (!fdt_page) {
err = -ENOMEM;
goto err_free_scratch;
}
+ kho_out.fdt = page_to_virt(fdt_page);
err = kho_debugfs_init();
if (err)
@@ -1288,8 +1282,8 @@ static __init int kho_init(void)
return 0;
err_free_fdt:
- put_page(kho_out.ser.fdt);
- kho_out.ser.fdt = NULL;
+ put_page(fdt_page);
+ kho_out.fdt = NULL;
err_free_scratch:
for (int i = 0; i < kho_scratch_cnt; i++) {
void *start = __va(kho_scratch[i].addr);
@@ -1300,7 +1294,7 @@ static __init int kho_init(void)
kho_enable = false;
return err;
}
-late_initcall(kho_init);
+fs_initcall(kho_init);
static void __init kho_release_scratch(void)
{
@@ -1436,7 +1430,7 @@ int kho_fill_kimage(struct kimage *image)
if (!kho_out.finalized)
return 0;
- image->kho.fdt = page_to_phys(kho_out.ser.fdt);
+ image->kho.fdt = virt_to_phys(kho_out.fdt);
scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt;
scratch = (struct kexec_buf){
diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c
index b88d138a97be..af4bad225630 100644
--- a/kernel/kexec_handover_debug.c
+++ b/kernel/kexec_handover_debug.c
@@ -61,14 +61,17 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt);
}
-void kho_debugfs_cleanup(struct kho_debugfs *dbg)
+void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
{
- struct fdt_debugfs *ff, *tmp;
-
- list_for_each_entry_safe(ff, tmp, &dbg->fdt_list, list) {
- debugfs_remove(ff->file);
- list_del(&ff->list);
- kfree(ff);
+ struct fdt_debugfs *ff;
+
+ list_for_each_entry(ff, &dbg->fdt_list, list) {
+ if (ff->wrapper.data == fdt) {
+ debugfs_remove(ff->file);
+ list_del(&ff->list);
+ kfree(ff);
+ break;
+ }
}
}
diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
index f6f172ddcae4..229a05558b99 100644
--- a/kernel/kexec_handover_internal.h
+++ b/kernel/kexec_handover_internal.h
@@ -30,7 +30,7 @@ void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
int kho_out_debugfs_init(struct kho_debugfs *dbg);
int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
const void *fdt, bool root);
-void kho_debugfs_cleanup(struct kho_debugfs *dbg);
+void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
#else
static inline int kho_debugfs_init(void) { return 0; }
static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
@@ -38,7 +38,8 @@ static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
const void *fdt, bool root) { return 0; }
-static inline void kho_debugfs_cleanup(struct kho_debugfs *dbg) {}
+static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
+ void *fdt) { }
#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index e23e16618e9b..c4b2d4e4c715 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
#define MEMBLOCK_KHO_FDT "memblock"
#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
-static struct page *kho_fdt;
-
-static int reserve_mem_kho_finalize(struct kho_serialization *ser)
-{
- int err = 0, i;
-
- for (i = 0; i < reserved_mem_count; i++) {
- struct reserve_mem_table *map = &reserved_mem_table[i];
- struct page *page = phys_to_page(map->start);
- unsigned int nr_pages = map->size >> PAGE_SHIFT;
-
- err |= kho_preserve_pages(page, nr_pages);
- }
-
- err |= kho_preserve_folio(page_folio(kho_fdt));
- err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
-
- return notifier_from_errno(err);
-}
-
-static int reserve_mem_kho_notifier(struct notifier_block *self,
- unsigned long cmd, void *v)
-{
- switch (cmd) {
- case KEXEC_KHO_FINALIZE:
- return reserve_mem_kho_finalize((struct kho_serialization *)v);
- case KEXEC_KHO_ABORT:
- return NOTIFY_DONE;
- default:
- return NOTIFY_BAD;
- }
-}
-
-static struct notifier_block reserve_mem_kho_nb = {
- .notifier_call = reserve_mem_kho_notifier,
-};
static int __init prepare_kho_fdt(void)
{
int err = 0, i;
+ struct page *fdt_page;
void *fdt;
- kho_fdt = alloc_page(GFP_KERNEL);
- if (!kho_fdt)
+ fdt_page = alloc_page(GFP_KERNEL);
+ if (!fdt_page)
return -ENOMEM;
- fdt = page_to_virt(kho_fdt);
+ fdt = page_to_virt(fdt_page);
err |= fdt_create(fdt, PAGE_SIZE);
err |= fdt_finish_reservemap(fdt);
@@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
for (i = 0; i < reserved_mem_count; i++) {
struct reserve_mem_table *map = &reserved_mem_table[i];
+ struct page *page = phys_to_page(map->start);
+ unsigned int nr_pages = map->size >> PAGE_SHIFT;
+ err |= kho_preserve_pages(page, nr_pages);
err |= fdt_begin_node(fdt, map->name);
err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
@@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
err |= fdt_end_node(fdt);
}
err |= fdt_end_node(fdt);
-
err |= fdt_finish(fdt);
+ err |= kho_preserve_folio(page_folio(fdt_page));
+ err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
+
if (err) {
pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
- put_page(kho_fdt);
- kho_fdt = NULL;
+ put_page(fdt_page);
}
return err;
@@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
err = prepare_kho_fdt();
if (err)
return err;
-
- err = register_kho_notifier(&reserve_mem_kho_nb);
- if (err) {
- put_page(kho_fdt);
- kho_fdt = NULL;
- }
-
return err;
}
late_initcall(reserve_mem_init);
--
2.51.0.536.g15c5d4f767-goog
On Mon, Sep 29 2025, Pasha Tatashin wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> The KHO framework uses a notifier chain as the mechanism for clients to
> participate in the finalization process. While this works for a single,
> central state machine, it is too restrictive for kernel-internal
> components like pstore/reserve_mem or IMA. These components need a
> simpler, direct way to register their state for preservation (e.g.,
> during their initcall) without being part of a complex,
> shutdown-time notifier sequence. The notifier model forces all
> participants into a single finalization flow and makes direct
> preservation from an arbitrary context difficult.
> This patch refactors the client participation model by removing the
> notifier chain and introducing a direct API for managing FDT subtrees.
>
> The core kho_finalize() and kho_abort() state machine remains, but
> clients now register their data with KHO beforehand.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
[...]
> diff --git a/mm/memblock.c b/mm/memblock.c
> index e23e16618e9b..c4b2d4e4c715 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
> #define MEMBLOCK_KHO_FDT "memblock"
> #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
> #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
> -static struct page *kho_fdt;
> -
> -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
> -{
> - int err = 0, i;
> -
> - for (i = 0; i < reserved_mem_count; i++) {
> - struct reserve_mem_table *map = &reserved_mem_table[i];
> - struct page *page = phys_to_page(map->start);
> - unsigned int nr_pages = map->size >> PAGE_SHIFT;
> -
> - err |= kho_preserve_pages(page, nr_pages);
> - }
> -
> - err |= kho_preserve_folio(page_folio(kho_fdt));
> - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
> -
> - return notifier_from_errno(err);
> -}
> -
> -static int reserve_mem_kho_notifier(struct notifier_block *self,
> - unsigned long cmd, void *v)
> -{
> - switch (cmd) {
> - case KEXEC_KHO_FINALIZE:
> - return reserve_mem_kho_finalize((struct kho_serialization *)v);
> - case KEXEC_KHO_ABORT:
> - return NOTIFY_DONE;
> - default:
> - return NOTIFY_BAD;
> - }
> -}
> -
> -static struct notifier_block reserve_mem_kho_nb = {
> - .notifier_call = reserve_mem_kho_notifier,
> -};
>
> static int __init prepare_kho_fdt(void)
> {
> int err = 0, i;
> + struct page *fdt_page;
> void *fdt;
>
> - kho_fdt = alloc_page(GFP_KERNEL);
> - if (!kho_fdt)
> + fdt_page = alloc_page(GFP_KERNEL);
> + if (!fdt_page)
> return -ENOMEM;
>
> - fdt = page_to_virt(kho_fdt);
> + fdt = page_to_virt(fdt_page);
>
> err |= fdt_create(fdt, PAGE_SIZE);
> err |= fdt_finish_reservemap(fdt);
> @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
> err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
> for (i = 0; i < reserved_mem_count; i++) {
> struct reserve_mem_table *map = &reserved_mem_table[i];
> + struct page *page = phys_to_page(map->start);
> + unsigned int nr_pages = map->size >> PAGE_SHIFT;
>
> + err |= kho_preserve_pages(page, nr_pages);
> err |= fdt_begin_node(fdt, map->name);
> err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
> err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
> @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
> err |= fdt_end_node(fdt);
> }
> err |= fdt_end_node(fdt);
> -
> err |= fdt_finish(fdt);
>
> + err |= kho_preserve_folio(page_folio(fdt_page));
> + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
> +
> if (err) {
> pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> - put_page(kho_fdt);
> - kho_fdt = NULL;
> + put_page(fdt_page);
This adds subtree to KHO even if the FDT might be invalid. And then
leaves a dangling reference in KHO to the FDT in case of an error. I
think you should either do this check after
kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
kho_add_subtree(), or call kho_remove_subtree() in the error block.
I prefer the former since if kho_add_subtree() is the one that fails,
there is little sense in removing a subtree that was never added.
> }
>
> return err;
> @@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
> err = prepare_kho_fdt();
> if (err)
> return err;
> -
> - err = register_kho_notifier(&reserve_mem_kho_nb);
> - if (err) {
> - put_page(kho_fdt);
> - kho_fdt = NULL;
> - }
> -
> return err;
> }
> late_initcall(reserve_mem_init);
--
Regards,
Pratyush Yadav
On Mon, Oct 6, 2025 at 1:01 PM Pratyush Yadav <pratyush@kernel.org> wrote:
>
> On Mon, Sep 29 2025, Pasha Tatashin wrote:
>
> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> >
> > The KHO framework uses a notifier chain as the mechanism for clients to
> > participate in the finalization process. While this works for a single,
> > central state machine, it is too restrictive for kernel-internal
> > components like pstore/reserve_mem or IMA. These components need a
> > simpler, direct way to register their state for preservation (e.g.,
> > during their initcall) without being part of a complex,
> > shutdown-time notifier sequence. The notifier model forces all
> > participants into a single finalization flow and makes direct
> > preservation from an arbitrary context difficult.
> > This patch refactors the client participation model by removing the
> > notifier chain and introducing a direct API for managing FDT subtrees.
> >
> > The core kho_finalize() and kho_abort() state machine remains, but
> > clients now register their data with KHO beforehand.
> >
> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> [...]
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index e23e16618e9b..c4b2d4e4c715 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
> > #define MEMBLOCK_KHO_FDT "memblock"
> > #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
> > #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
> > -static struct page *kho_fdt;
> > -
> > -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
> > -{
> > - int err = 0, i;
> > -
> > - for (i = 0; i < reserved_mem_count; i++) {
> > - struct reserve_mem_table *map = &reserved_mem_table[i];
> > - struct page *page = phys_to_page(map->start);
> > - unsigned int nr_pages = map->size >> PAGE_SHIFT;
> > -
> > - err |= kho_preserve_pages(page, nr_pages);
> > - }
> > -
> > - err |= kho_preserve_folio(page_folio(kho_fdt));
> > - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
> > -
> > - return notifier_from_errno(err);
> > -}
> > -
> > -static int reserve_mem_kho_notifier(struct notifier_block *self,
> > - unsigned long cmd, void *v)
> > -{
> > - switch (cmd) {
> > - case KEXEC_KHO_FINALIZE:
> > - return reserve_mem_kho_finalize((struct kho_serialization *)v);
> > - case KEXEC_KHO_ABORT:
> > - return NOTIFY_DONE;
> > - default:
> > - return NOTIFY_BAD;
> > - }
> > -}
> > -
> > -static struct notifier_block reserve_mem_kho_nb = {
> > - .notifier_call = reserve_mem_kho_notifier,
> > -};
> >
> > static int __init prepare_kho_fdt(void)
> > {
> > int err = 0, i;
> > + struct page *fdt_page;
> > void *fdt;
> >
> > - kho_fdt = alloc_page(GFP_KERNEL);
> > - if (!kho_fdt)
> > + fdt_page = alloc_page(GFP_KERNEL);
> > + if (!fdt_page)
> > return -ENOMEM;
> >
> > - fdt = page_to_virt(kho_fdt);
> > + fdt = page_to_virt(fdt_page);
> >
> > err |= fdt_create(fdt, PAGE_SIZE);
> > err |= fdt_finish_reservemap(fdt);
> > @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
> > err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
> > for (i = 0; i < reserved_mem_count; i++) {
> > struct reserve_mem_table *map = &reserved_mem_table[i];
> > + struct page *page = phys_to_page(map->start);
> > + unsigned int nr_pages = map->size >> PAGE_SHIFT;
> >
> > + err |= kho_preserve_pages(page, nr_pages);
> > err |= fdt_begin_node(fdt, map->name);
> > err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
> > err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
> > @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
> > err |= fdt_end_node(fdt);
> > }
> > err |= fdt_end_node(fdt);
> > -
> > err |= fdt_finish(fdt);
> >
> > + err |= kho_preserve_folio(page_folio(fdt_page));
> > + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
> > +
> > if (err) {
> > pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> > - put_page(kho_fdt);
> > - kho_fdt = NULL;
> > + put_page(fdt_page);
>
> This adds subtree to KHO even if the FDT might be invalid. And then
> leaves a dangling reference in KHO to the FDT in case of an error. I
> think you should either do this check after
> kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
> kho_add_subtree(), or call kho_remove_subtree() in the error block.
I agree, I do not like these err |= stuff, we should be checking
errors cleanly, and do proper clean-ups.
> I prefer the former since if kho_add_subtree() is the one that fails,
> there is little sense in removing a subtree that was never added.
>
> > }
> >
> > return err;
> > @@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
> > err = prepare_kho_fdt();
> > if (err)
> > return err;
> > -
> > - err = register_kho_notifier(&reserve_mem_kho_nb);
> > - if (err) {
> > - put_page(kho_fdt);
> > - kho_fdt = NULL;
> > - }
> > -
> > return err;
> > }
> > late_initcall(reserve_mem_init);
>
> --
> Regards,
> Pratyush Yadav
On Mon, Oct 06 2025, Pasha Tatashin wrote:
> On Mon, Oct 6, 2025 at 1:01 PM Pratyush Yadav <pratyush@kernel.org> wrote:
>>
>> On Mon, Sep 29 2025, Pasha Tatashin wrote:
>>
>> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>> >
>> > The KHO framework uses a notifier chain as the mechanism for clients to
>> > participate in the finalization process. While this works for a single,
>> > central state machine, it is too restrictive for kernel-internal
>> > components like pstore/reserve_mem or IMA. These components need a
>> > simpler, direct way to register their state for preservation (e.g.,
>> > during their initcall) without being part of a complex,
>> > shutdown-time notifier sequence. The notifier model forces all
>> > participants into a single finalization flow and makes direct
>> > preservation from an arbitrary context difficult.
>> > This patch refactors the client participation model by removing the
>> > notifier chain and introducing a direct API for managing FDT subtrees.
>> >
>> > The core kho_finalize() and kho_abort() state machine remains, but
>> > clients now register their data with KHO beforehand.
>> >
>> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
>> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>> [...]
>> > diff --git a/mm/memblock.c b/mm/memblock.c
>> > index e23e16618e9b..c4b2d4e4c715 100644
>> > --- a/mm/memblock.c
>> > +++ b/mm/memblock.c
>> > @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
>> > #define MEMBLOCK_KHO_FDT "memblock"
>> > #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
>> > #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
>> > -static struct page *kho_fdt;
>> > -
>> > -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
>> > -{
>> > - int err = 0, i;
>> > -
>> > - for (i = 0; i < reserved_mem_count; i++) {
>> > - struct reserve_mem_table *map = &reserved_mem_table[i];
>> > - struct page *page = phys_to_page(map->start);
>> > - unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> > -
>> > - err |= kho_preserve_pages(page, nr_pages);
>> > - }
>> > -
>> > - err |= kho_preserve_folio(page_folio(kho_fdt));
>> > - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
>> > -
>> > - return notifier_from_errno(err);
>> > -}
>> > -
>> > -static int reserve_mem_kho_notifier(struct notifier_block *self,
>> > - unsigned long cmd, void *v)
>> > -{
>> > - switch (cmd) {
>> > - case KEXEC_KHO_FINALIZE:
>> > - return reserve_mem_kho_finalize((struct kho_serialization *)v);
>> > - case KEXEC_KHO_ABORT:
>> > - return NOTIFY_DONE;
>> > - default:
>> > - return NOTIFY_BAD;
>> > - }
>> > -}
>> > -
>> > -static struct notifier_block reserve_mem_kho_nb = {
>> > - .notifier_call = reserve_mem_kho_notifier,
>> > -};
>> >
>> > static int __init prepare_kho_fdt(void)
>> > {
>> > int err = 0, i;
>> > + struct page *fdt_page;
>> > void *fdt;
>> >
>> > - kho_fdt = alloc_page(GFP_KERNEL);
>> > - if (!kho_fdt)
>> > + fdt_page = alloc_page(GFP_KERNEL);
>> > + if (!fdt_page)
>> > return -ENOMEM;
>> >
>> > - fdt = page_to_virt(kho_fdt);
>> > + fdt = page_to_virt(fdt_page);
>> >
>> > err |= fdt_create(fdt, PAGE_SIZE);
>> > err |= fdt_finish_reservemap(fdt);
>> > @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
>> > err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
>> > for (i = 0; i < reserved_mem_count; i++) {
>> > struct reserve_mem_table *map = &reserved_mem_table[i];
>> > + struct page *page = phys_to_page(map->start);
>> > + unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> >
>> > + err |= kho_preserve_pages(page, nr_pages);
>> > err |= fdt_begin_node(fdt, map->name);
>> > err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
>> > err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
>> > @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
>> > err |= fdt_end_node(fdt);
>> > }
>> > err |= fdt_end_node(fdt);
>> > -
>> > err |= fdt_finish(fdt);
>> >
>> > + err |= kho_preserve_folio(page_folio(fdt_page));
>> > + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
>> > +
>> > if (err) {
>> > pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
>> > - put_page(kho_fdt);
>> > - kho_fdt = NULL;
>> > + put_page(fdt_page);
>>
>> This adds subtree to KHO even if the FDT might be invalid. And then
>> leaves a dangling reference in KHO to the FDT in case of an error. I
>> think you should either do this check after
>> kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
>> kho_add_subtree(), or call kho_remove_subtree() in the error block.
>
> I agree, I do not like these err |= stuff, we should be checking
> errors cleanly, and do proper clean-ups.
Yeah, this is mainly a byproduct of using FDTs. Getting and setting
simple properties also needs error checking and that can get tedious
real quick. Which is why this pattern has shown up I suppose.
>
>> I prefer the former since if kho_add_subtree() is the one that fails,
>> there is little sense in removing a subtree that was never added.
>>
>> > }
>> >
>> > return err;
>> > @@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
>> > err = prepare_kho_fdt();
>> > if (err)
>> > return err;
>> > -
>> > - err = register_kho_notifier(&reserve_mem_kho_nb);
>> > - if (err) {
>> > - put_page(kho_fdt);
>> > - kho_fdt = NULL;
>> > - }
>> > -
>> > return err;
>> > }
>> > late_initcall(reserve_mem_init);
>>
>> --
>> Regards,
>> Pratyush Yadav
--
Regards,
Pratyush Yadav
On Tue, Oct 7, 2025 at 8:10 AM Pratyush Yadav <pratyush@kernel.org> wrote:
>
> On Mon, Oct 06 2025, Pasha Tatashin wrote:
>
> > On Mon, Oct 6, 2025 at 1:01 PM Pratyush Yadav <pratyush@kernel.org> wrote:
> >>
> >> On Mon, Sep 29 2025, Pasha Tatashin wrote:
> >>
> >> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> >> >
> >> > The KHO framework uses a notifier chain as the mechanism for clients to
> >> > participate in the finalization process. While this works for a single,
> >> > central state machine, it is too restrictive for kernel-internal
> >> > components like pstore/reserve_mem or IMA. These components need a
> >> > simpler, direct way to register their state for preservation (e.g.,
> >> > during their initcall) without being part of a complex,
> >> > shutdown-time notifier sequence. The notifier model forces all
> >> > participants into a single finalization flow and makes direct
> >> > preservation from an arbitrary context difficult.
> >> > This patch refactors the client participation model by removing the
> >> > notifier chain and introducing a direct API for managing FDT subtrees.
> >> >
> >> > The core kho_finalize() and kho_abort() state machine remains, but
> >> > clients now register their data with KHO beforehand.
> >> >
> >> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> >> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> >> [...]
> >> > diff --git a/mm/memblock.c b/mm/memblock.c
> >> > index e23e16618e9b..c4b2d4e4c715 100644
> >> > --- a/mm/memblock.c
> >> > +++ b/mm/memblock.c
> >> > @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
> >> > #define MEMBLOCK_KHO_FDT "memblock"
> >> > #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
> >> > #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
> >> > -static struct page *kho_fdt;
> >> > -
> >> > -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
> >> > -{
> >> > - int err = 0, i;
> >> > -
> >> > - for (i = 0; i < reserved_mem_count; i++) {
> >> > - struct reserve_mem_table *map = &reserved_mem_table[i];
> >> > - struct page *page = phys_to_page(map->start);
> >> > - unsigned int nr_pages = map->size >> PAGE_SHIFT;
> >> > -
> >> > - err |= kho_preserve_pages(page, nr_pages);
> >> > - }
> >> > -
> >> > - err |= kho_preserve_folio(page_folio(kho_fdt));
> >> > - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
> >> > -
> >> > - return notifier_from_errno(err);
> >> > -}
> >> > -
> >> > -static int reserve_mem_kho_notifier(struct notifier_block *self,
> >> > - unsigned long cmd, void *v)
> >> > -{
> >> > - switch (cmd) {
> >> > - case KEXEC_KHO_FINALIZE:
> >> > - return reserve_mem_kho_finalize((struct kho_serialization *)v);
> >> > - case KEXEC_KHO_ABORT:
> >> > - return NOTIFY_DONE;
> >> > - default:
> >> > - return NOTIFY_BAD;
> >> > - }
> >> > -}
> >> > -
> >> > -static struct notifier_block reserve_mem_kho_nb = {
> >> > - .notifier_call = reserve_mem_kho_notifier,
> >> > -};
> >> >
> >> > static int __init prepare_kho_fdt(void)
> >> > {
> >> > int err = 0, i;
> >> > + struct page *fdt_page;
> >> > void *fdt;
> >> >
> >> > - kho_fdt = alloc_page(GFP_KERNEL);
> >> > - if (!kho_fdt)
> >> > + fdt_page = alloc_page(GFP_KERNEL);
> >> > + if (!fdt_page)
> >> > return -ENOMEM;
> >> >
> >> > - fdt = page_to_virt(kho_fdt);
> >> > + fdt = page_to_virt(fdt_page);
> >> >
> >> > err |= fdt_create(fdt, PAGE_SIZE);
> >> > err |= fdt_finish_reservemap(fdt);
> >> > @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
> >> > err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
> >> > for (i = 0; i < reserved_mem_count; i++) {
> >> > struct reserve_mem_table *map = &reserved_mem_table[i];
> >> > + struct page *page = phys_to_page(map->start);
> >> > + unsigned int nr_pages = map->size >> PAGE_SHIFT;
> >> >
> >> > + err |= kho_preserve_pages(page, nr_pages);
> >> > err |= fdt_begin_node(fdt, map->name);
> >> > err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
> >> > err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
> >> > @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
> >> > err |= fdt_end_node(fdt);
> >> > }
> >> > err |= fdt_end_node(fdt);
> >> > -
> >> > err |= fdt_finish(fdt);
> >> >
> >> > + err |= kho_preserve_folio(page_folio(fdt_page));
> >> > + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
> >> > +
> >> > if (err) {
> >> > pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> >> > - put_page(kho_fdt);
> >> > - kho_fdt = NULL;
> >> > + put_page(fdt_page);
> >>
> >> This adds subtree to KHO even if the FDT might be invalid. And then
> >> leaves a dangling reference in KHO to the FDT in case of an error. I
> >> think you should either do this check after
> >> kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
> >> kho_add_subtree(), or call kho_remove_subtree() in the error block.
> >
> > I agree, I do not like these err |= stuff, we should be checking
> > errors cleanly, and do proper clean-ups.
>
> Yeah, this is mainly a byproduct of using FDTs. Getting and setting
> simple properties also needs error checking and that can get tedious
> real quick. Which is why this pattern has shown up I suppose.
Exactly. This is also why it's important to replace FDT with something
more sensible for general-purpose live update purposes.
By the way, I forgot to address this comment in the v5 of the KHO
series I sent out yesterday. Could you please take another look? If
everything else is good, I will refresh that series so we can ask
Andrew to take in the KHO patches. That would simplify the LUO series.
Pasha
>
> >
> >> I prefer the former since if kho_add_subtree() is the one that fails,
> >> there is little sense in removing a subtree that was never added.
> >>
> >> > }
> >> >
> >> > return err;
> >> > @@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
> >> > err = prepare_kho_fdt();
> >> > if (err)
> >> > return err;
> >> > -
> >> > - err = register_kho_notifier(&reserve_mem_kho_nb);
> >> > - if (err) {
> >> > - put_page(kho_fdt);
> >> > - kho_fdt = NULL;
> >> > - }
> >> > -
> >> > return err;
> >> > }
> >> > late_initcall(reserve_mem_init);
> >>
> >> --
> >> Regards,
> >> Pratyush Yadav
>
> --
> Regards,
> Pratyush Yadav
On Tue, Oct 07 2025, Pasha Tatashin wrote:
> On Tue, Oct 7, 2025 at 8:10 AM Pratyush Yadav <pratyush@kernel.org> wrote:
>>
>> On Mon, Oct 06 2025, Pasha Tatashin wrote:
>>
>> > On Mon, Oct 6, 2025 at 1:01 PM Pratyush Yadav <pratyush@kernel.org> wrote:
>> >>
>> >> On Mon, Sep 29 2025, Pasha Tatashin wrote:
>> >>
>> >> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>> >> >
>> >> > The KHO framework uses a notifier chain as the mechanism for clients to
>> >> > participate in the finalization process. While this works for a single,
>> >> > central state machine, it is too restrictive for kernel-internal
>> >> > components like pstore/reserve_mem or IMA. These components need a
>> >> > simpler, direct way to register their state for preservation (e.g.,
>> >> > during their initcall) without being part of a complex,
>> >> > shutdown-time notifier sequence. The notifier model forces all
>> >> > participants into a single finalization flow and makes direct
>> >> > preservation from an arbitrary context difficult.
>> >> > This patch refactors the client participation model by removing the
>> >> > notifier chain and introducing a direct API for managing FDT subtrees.
>> >> >
>> >> > The core kho_finalize() and kho_abort() state machine remains, but
>> >> > clients now register their data with KHO beforehand.
>> >> >
>> >> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
>> >> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>> >> [...]
>> >> > diff --git a/mm/memblock.c b/mm/memblock.c
>> >> > index e23e16618e9b..c4b2d4e4c715 100644
>> >> > --- a/mm/memblock.c
>> >> > +++ b/mm/memblock.c
>> >> > @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
>> >> > #define MEMBLOCK_KHO_FDT "memblock"
>> >> > #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
>> >> > #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
>> >> > -static struct page *kho_fdt;
>> >> > -
>> >> > -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
>> >> > -{
>> >> > - int err = 0, i;
>> >> > -
>> >> > - for (i = 0; i < reserved_mem_count; i++) {
>> >> > - struct reserve_mem_table *map = &reserved_mem_table[i];
>> >> > - struct page *page = phys_to_page(map->start);
>> >> > - unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> >> > -
>> >> > - err |= kho_preserve_pages(page, nr_pages);
>> >> > - }
>> >> > -
>> >> > - err |= kho_preserve_folio(page_folio(kho_fdt));
>> >> > - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
>> >> > -
>> >> > - return notifier_from_errno(err);
>> >> > -}
>> >> > -
>> >> > -static int reserve_mem_kho_notifier(struct notifier_block *self,
>> >> > - unsigned long cmd, void *v)
>> >> > -{
>> >> > - switch (cmd) {
>> >> > - case KEXEC_KHO_FINALIZE:
>> >> > - return reserve_mem_kho_finalize((struct kho_serialization *)v);
>> >> > - case KEXEC_KHO_ABORT:
>> >> > - return NOTIFY_DONE;
>> >> > - default:
>> >> > - return NOTIFY_BAD;
>> >> > - }
>> >> > -}
>> >> > -
>> >> > -static struct notifier_block reserve_mem_kho_nb = {
>> >> > - .notifier_call = reserve_mem_kho_notifier,
>> >> > -};
>> >> >
>> >> > static int __init prepare_kho_fdt(void)
>> >> > {
>> >> > int err = 0, i;
>> >> > + struct page *fdt_page;
>> >> > void *fdt;
>> >> >
>> >> > - kho_fdt = alloc_page(GFP_KERNEL);
>> >> > - if (!kho_fdt)
>> >> > + fdt_page = alloc_page(GFP_KERNEL);
>> >> > + if (!fdt_page)
>> >> > return -ENOMEM;
>> >> >
>> >> > - fdt = page_to_virt(kho_fdt);
>> >> > + fdt = page_to_virt(fdt_page);
>> >> >
>> >> > err |= fdt_create(fdt, PAGE_SIZE);
>> >> > err |= fdt_finish_reservemap(fdt);
>> >> > @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
>> >> > err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
>> >> > for (i = 0; i < reserved_mem_count; i++) {
>> >> > struct reserve_mem_table *map = &reserved_mem_table[i];
>> >> > + struct page *page = phys_to_page(map->start);
>> >> > + unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> >> >
>> >> > + err |= kho_preserve_pages(page, nr_pages);
>> >> > err |= fdt_begin_node(fdt, map->name);
>> >> > err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
>> >> > err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
>> >> > @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
>> >> > err |= fdt_end_node(fdt);
>> >> > }
>> >> > err |= fdt_end_node(fdt);
>> >> > -
>> >> > err |= fdt_finish(fdt);
>> >> >
>> >> > + err |= kho_preserve_folio(page_folio(fdt_page));
>> >> > + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
>> >> > +
>> >> > if (err) {
>> >> > pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
>> >> > - put_page(kho_fdt);
>> >> > - kho_fdt = NULL;
>> >> > + put_page(fdt_page);
>> >>
>> >> This adds subtree to KHO even if the FDT might be invalid. And then
>> >> leaves a dangling reference in KHO to the FDT in case of an error. I
>> >> think you should either do this check after
>> >> kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
>> >> kho_add_subtree(), or call kho_remove_subtree() in the error block.
>> >
>> > I agree, I do not like these err |= stuff, we should be checking
>> > errors cleanly, and do proper clean-ups.
>>
>> Yeah, this is mainly a byproduct of using FDTs. Getting and setting
>> simple properties also needs error checking and that can get tedious
>> real quick. Which is why this pattern has shown up I suppose.
>
> Exactly. This is also why it's important to replace FDT with something
> more sensible for general-purpose live update purposes.
>
> By the way, I forgot to address this comment in the v5 of the KHO
> series I sent out yesterday. Could you please take another look? If
> everything else is good, I will refresh that series so we can ask
> Andrew to take in the KHO patches. That would simplify the LUO series.
Good idea. Will take a look.
[...]
--
Regards,
Pratyush Yadav
Hi Pasha, On Mon, Sep 29 2025, Pasha Tatashin wrote: > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > The KHO framework uses a notifier chain as the mechanism for clients to > participate in the finalization process. While this works for a single, > central state machine, it is too restrictive for kernel-internal > components like pstore/reserve_mem or IMA. These components need a > simpler, direct way to register their state for preservation (e.g., > during their initcall) without being part of a complex, > shutdown-time notifier sequence. The notifier model forces all > participants into a single finalization flow and makes direct > preservation from an arbitrary context difficult. > This patch refactors the client participation model by removing the > notifier chain and introducing a direct API for managing FDT subtrees. > > The core kho_finalize() and kho_abort() state machine remains, but > clients now register their data with KHO beforehand. > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com> This patch breaks build of test_kho.c (under CONFIG_TEST_KEXEC_HANDOVER): lib/test_kho.c:49:14: error: ‘KEXEC_KHO_ABORT’ undeclared (first use in this function) 49 | case KEXEC_KHO_ABORT: | ^~~~~~~~~~~~~~~ [...] lib/test_kho.c:51:14: error: ‘KEXEC_KHO_FINALIZE’ undeclared (first use in this function) 51 | case KEXEC_KHO_FINALIZE: | ^~~~~~~~~~~~~~~~~~ [...] I think you need to update it as well to drop notifier usage. [...] -- Regards, Pratyush Yadav
Hi,
On Mon, Oct 06 2025, Pratyush Yadav wrote:
> Hi Pasha,
>
> On Mon, Sep 29 2025, Pasha Tatashin wrote:
>
>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>>
>> The KHO framework uses a notifier chain as the mechanism for clients to
>> participate in the finalization process. While this works for a single,
>> central state machine, it is too restrictive for kernel-internal
>> components like pstore/reserve_mem or IMA. These components need a
>> simpler, direct way to register their state for preservation (e.g.,
>> during their initcall) without being part of a complex,
>> shutdown-time notifier sequence. The notifier model forces all
>> participants into a single finalization flow and makes direct
>> preservation from an arbitrary context difficult.
>> This patch refactors the client participation model by removing the
>> notifier chain and introducing a direct API for managing FDT subtrees.
>>
>> The core kho_finalize() and kho_abort() state machine remains, but
>> clients now register their data with KHO beforehand.
>>
>> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
>> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>
> This patch breaks build of test_kho.c (under CONFIG_TEST_KEXEC_HANDOVER):
>
> lib/test_kho.c:49:14: error: ‘KEXEC_KHO_ABORT’ undeclared (first use in this function)
> 49 | case KEXEC_KHO_ABORT:
> | ^~~~~~~~~~~~~~~
> [...]
> lib/test_kho.c:51:14: error: ‘KEXEC_KHO_FINALIZE’ undeclared (first use in this function)
> 51 | case KEXEC_KHO_FINALIZE:
> | ^~~~~~~~~~~~~~~~~~
> [...]
>
> I think you need to update it as well to drop notifier usage.
Here's the fix. Build passes now and the test succeeds under my qemu
test setup.
--- 8< ---
From a8e6b5dfef38bfbcd41f3dd08598cb79a0701d7e Mon Sep 17 00:00:00 2001
From: Pratyush Yadav <pratyush@kernel.org>
Date: Mon, 6 Oct 2025 18:35:20 +0200
Subject: [PATCH] fixup! kho: drop notifiers
Update KHO test to drop the notifiers as well.
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
---
lib/test_kho.c | 32 +++-----------------------------
1 file changed, 3 insertions(+), 29 deletions(-)
diff --git a/lib/test_kho.c b/lib/test_kho.c
index fe8504e3407b5..e9462a1e4b93b 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -38,33 +38,6 @@ struct kho_test_state {
static struct kho_test_state kho_test_state;
-static int kho_test_notifier(struct notifier_block *self, unsigned long cmd,
- void *v)
-{
- struct kho_test_state *state = &kho_test_state;
- struct kho_serialization *ser = v;
- int err = 0;
-
- switch (cmd) {
- case KEXEC_KHO_ABORT:
- return NOTIFY_DONE;
- case KEXEC_KHO_FINALIZE:
- /* Handled below */
- break;
- default:
- return NOTIFY_BAD;
- }
-
- err |= kho_preserve_folio(state->fdt);
- err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt));
-
- return err ? NOTIFY_BAD : NOTIFY_DONE;
-}
-
-static struct notifier_block kho_test_nb = {
- .notifier_call = kho_test_notifier,
-};
-
static int kho_test_save_data(struct kho_test_state *state, void *fdt)
{
phys_addr_t *folios_info;
@@ -111,6 +84,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state)
fdt = folio_address(state->fdt);
+ err |= kho_preserve_folio(state->fdt);
err |= fdt_create(fdt, fdt_size);
err |= fdt_finish_reservemap(fdt);
@@ -194,7 +168,7 @@ static int kho_test_save(void)
if (err)
goto err_free_folios;
- err = register_kho_notifier(&kho_test_nb);
+ err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
if (err)
goto err_free_fdt;
@@ -309,7 +283,7 @@ static void kho_test_cleanup(void)
static void __exit kho_test_exit(void)
{
- unregister_kho_notifier(&kho_test_nb);
+ kho_remove_subtree(folio_address(kho_test_state.fdt));
kho_test_cleanup();
}
module_exit(kho_test_exit);
--
Regards,
Pratyush Yadav
On Mon, Oct 6, 2025 at 10:30 AM Pratyush Yadav <pratyush@kernel.org> wrote: > > Hi Pasha, > > On Mon, Sep 29 2025, Pasha Tatashin wrote: > > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > > The KHO framework uses a notifier chain as the mechanism for clients to > > participate in the finalization process. While this works for a single, > > central state machine, it is too restrictive for kernel-internal > > components like pstore/reserve_mem or IMA. These components need a > > simpler, direct way to register their state for preservation (e.g., > > during their initcall) without being part of a complex, > > shutdown-time notifier sequence. The notifier model forces all > > participants into a single finalization flow and makes direct > > preservation from an arbitrary context difficult. > > This patch refactors the client participation model by removing the > > notifier chain and introducing a direct API for managing FDT subtrees. > > > > The core kho_finalize() and kho_abort() state machine remains, but > > clients now register their data with KHO beforehand. > > > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com> > > This patch breaks build of test_kho.c (under CONFIG_TEST_KEXEC_HANDOVER): > > lib/test_kho.c:49:14: error: ‘KEXEC_KHO_ABORT’ undeclared (first use in this function) > 49 | case KEXEC_KHO_ABORT: > | ^~~~~~~~~~~~~~~ > [...] > lib/test_kho.c:51:14: error: ‘KEXEC_KHO_FINALIZE’ undeclared (first use in this function) > 51 | case KEXEC_KHO_FINALIZE: > | ^~~~~~~~~~~~~~~~~~ > [...] > > I think you need to update it as well to drop notifier usage. Yes, thank you Pratyush. I missed this change in my patch. Pasha
© 2016 - 2026 Red Hat, Inc.