Especially for support of Xenstore-stubdom live update some memory must
be handed over to the new kernel without moving it around: as the
9pfs device used for storing and retrieving the state of Xenstore
needs to be kept operational across kexec (it can't be reopened due to
Xenstore not being available without access to the device), the ring
pages need to be accessible via active grants by the backend all the
time.
Add the basic support for that by reserving a pre-defined number of
memory pages at the top of the memory. This memory area will be
handed over to the new kernel via specifying it as a module in
struct hvm_start_info.
The contents of the memory area are described via a generic table of
contents in the last page of the memory.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
Config.mk | 2 ++
arch/x86/kexec.c | 77 +++++++++++++++++++++++++++++++++++++++++++
arch/x86/mm.c | 18 ++++++++++
arch/x86/setup.c | 28 ++++++++++++++++
include/kernel.h | 1 +
include/kexec.h | 45 +++++++++++++++++++++++++
include/x86/arch_mm.h | 1 +
kexec.c | 3 ++
mm.c | 6 ++++
9 files changed, 181 insertions(+)
diff --git a/Config.mk b/Config.mk
index b9675e61..0e4e86d8 100644
--- a/Config.mk
+++ b/Config.mk
@@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
$(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
$(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
+CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
+
$(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
CONFIG-x += CONFIG_LIBXS
diff --git a/arch/x86/kexec.c b/arch/x86/kexec.c
index 804e7b6d..7fb98473 100644
--- a/arch/x86/kexec.c
+++ b/arch/x86/kexec.c
@@ -201,10 +201,73 @@ static unsigned long kexec_param_loc;
static unsigned int kexec_param_size;
static unsigned long kexec_param_mem;
+static struct kexec_module *kexec_check_module(void)
+{
+ unsigned long mod_size;
+ unsigned long mod;
+ struct kexec_module *module_ptr;
+
+ mod = get_module(&mod_size);
+ if ( !mod )
+ return NULL;
+ /* Size must be a multiple of PAGE_SIZE. */
+ if ( mod_size & ~PAGE_MASK )
+ return NULL;
+
+ /* Kxec module description is at start of the last page of the module. */
+ module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
+
+ /* Check eye catcher. */
+ if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
+ sizeof(module_ptr->eye_catcher)) )
+ return NULL;
+ if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
+ return NULL;
+
+ return module_ptr;
+}
+
+static void get_mod_addr(unsigned long from, unsigned long to)
+{
+ unsigned long size = PFN_PHYS(CONFIG_KEXEC_MODULE_PAGES);
+
+ if ( to - from >= size && to - size > kexec_mod_start )
+ kexec_mod_start = to - size;
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
+{
+ /* Reuse already existing kexec module. */
+ mod_ptr = kexec_check_module();
+ if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )
+ {
+ max_pfn = min(max_pfn, PHYS_PFN(0xffffffff));
+
+ iterate_memory_range(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn),
+ get_mod_addr);
+ BUG_ON(!kexec_mod_start);
+
+ mod_ptr = (void *)(kexec_mod_start +
+ ((CONFIG_KEXEC_MODULE_PAGES - 1) << PAGE_SHIFT));
+ memset(mod_ptr, 0, PAGE_SIZE);
+ memcpy(mod_ptr->eye_catcher, KEXECMOD_EYECATCHER,
+ sizeof(mod_ptr->eye_catcher));
+ mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1;
+ memset(mod_ptr->pg2rec, KEXECMOD_PG_FREE, mod_ptr->n_pages);
+ mod_ptr->recs_off = sizeof(struct kexec_module) +
+ CONFIG_KEXEC_MODULE_PAGES + (mod_ptr->n_pages & 1);
+
+ set_reserved_range(kexec_mod_start, (unsigned long)mod_ptr + PAGE_SIZE);
+ }
+}
+
void kexec_set_param_loc(const char *cmdline)
{
kexec_param_size = sizeof(struct hvm_start_info);
kexec_param_size += e820_entries * sizeof(struct hvm_memmap_table_entry);
+ if ( mod_ptr )
+ kexec_param_size += sizeof(struct hvm_modlist_entry);
kexec_param_size += strlen(cmdline) + 1;
kexec_last_addr = (kexec_last_addr + 7) & ~7UL;
@@ -218,6 +281,7 @@ int kexec_get_entry(const char *cmdline)
void *next;
struct hvm_start_info *info;
struct hvm_memmap_table_entry *mmap;
+ struct hvm_modlist_entry *mod;
unsigned int order;
unsigned int i;
@@ -252,6 +316,19 @@ int kexec_get_entry(const char *cmdline)
info->memmap_entries = mmap - (struct hvm_memmap_table_entry *)next;
next = mmap;
+ if ( mod_ptr )
+ {
+ mod = next;
+ memset(mod, 0, sizeof(*mod));
+ info->nr_modules = 1;
+ info->modlist_paddr = kexec_param_loc +
+ (unsigned long)next - kexec_param_mem;
+ mod->paddr = kexec_mod_start;
+ mod->size = PFN_PHYS(mod_ptr->n_pages + 1);
+ mod->cmdline_paddr = 0;
+ next = mod + 1;
+ }
+
info->cmdline_paddr = kexec_param_loc + (unsigned long)next - kexec_param_mem;
strcpy(next, cmdline);
diff --git a/arch/x86/mm.c b/arch/x86/mm.c
index 7c3c83be..bdff38fd 100644
--- a/arch/x86/mm.c
+++ b/arch/x86/mm.c
@@ -151,6 +151,24 @@ static struct {
unsigned long size;
} reserved_range[MAX_RSV_RANGES];
+void set_reserved_range(unsigned long start, unsigned long end)
+{
+ unsigned int i;
+
+ for ( i = 0; i < MAX_RSV_RANGES; i++ )
+ {
+ if ( reserved_range[i].size )
+ continue;
+
+ reserved_range[i].start = start;
+ reserved_range[i].size = end - start;
+
+ return;
+ }
+
+ BUG();
+}
+
void check_memory_range(unsigned long *from, unsigned long *to)
{
unsigned int m;
diff --git a/arch/x86/setup.c b/arch/x86/setup.c
index 2085ee5c..299ff8c7 100644
--- a/arch/x86/setup.c
+++ b/arch/x86/setup.c
@@ -127,6 +127,13 @@ static void print_start_of_day(void *p)
printk(" cmd_line: %s\n", cmdline);
printk(" stack: %p-%p\n", stack, stack + sizeof(stack));
}
+
+unsigned long get_module(unsigned long *size)
+{
+ *size = start_info_ptr->mod_len;
+
+ return start_info_ptr->mod_start;
+}
#else
/*
@@ -182,6 +189,27 @@ static void print_start_of_day(void *p)
printk(" stack: %p-%p\n", stack, stack + sizeof(stack));
arch_print_memmap();
}
+
+unsigned long get_module(unsigned long *size)
+{
+ struct hvm_modlist_entry *mod;
+
+ if ( !hvm_start_info_ptr->nr_modules )
+ return 0;
+
+ if ( hvm_start_info_ptr->nr_modules > 1 )
+ {
+ printk("get_module() called with more than 1 module present\n");
+ printk("First module data returned\n");
+ }
+
+ mod = (struct hvm_modlist_entry *)(unsigned long)
+ hvm_start_info_ptr->modlist_paddr;
+
+ *size = mod->size;
+
+ return mod->paddr;
+}
#endif
/*
diff --git a/include/kernel.h b/include/kernel.h
index 161d7571..e11b04cf 100644
--- a/include/kernel.h
+++ b/include/kernel.h
@@ -10,5 +10,6 @@ void post_suspend(int canceled);
void do_exit(void) __attribute__((noreturn));
void arch_do_exit(void);
void stop_kernel(void);
+unsigned long get_module(unsigned long *size);
#endif /* _KERNEL_H_ */
diff --git a/include/kexec.h b/include/kexec.h
index b89c3000..0200005f 100644
--- a/include/kexec.h
+++ b/include/kexec.h
@@ -2,6 +2,48 @@
#define _KEXEC_H
#include <mini-os/elf.h>
+/*
+ * Kexec module used to hand over memory across kexec().
+ *
+ * This is an ABI which should be modified only in a compatible way.
+ * struct kexec_module is located at the start of the last page of the module.
+ *
+ * The module can contain data/pages of multiple users. Each user has an own
+ * record which layout is depending on the user. Records are linked via a table
+ * of record offsets.
+ *
+ * All admin data (struct kexec_module, record offset table and records) must
+ * fit into the last page of the module.
+ */
+struct kexec_module {
+ uint8_t eye_catcher[8];
+#define KEXECMOD_EYECATCHER "KexecMem"
+ uint16_t n_pages; /* Number of allocatable pages in the module. */
+ uint16_t n_records; /* Size of record table (max. 255). */
+#define KEXECMOD_REC_MAX 255
+ uint16_t recs_off; /* Offset to record table from start of page. */
+ /* The record table is an array of */
+ /* struct kexec_module_rec. */
+ uint8_t pg2rec[]; /* One entry per allocatable module page, value */
+ /* is record number (starting from 0) associated */
+ /* with it. Free pages have value 255. */
+#define KEXECMOD_PG_FREE 255
+};
+
+struct kexec_module_rec {
+ uint16_t offset; /* Offset to record from start of page. */
+ uint8_t type; /* Type of record. */
+#define KEXECMOD_REC_NONE 0
+ uint8_t size; /* Size of record. */
+};
+
+#ifndef CONFIG_KEXEC_MODULE_PAGES
+#define CONFIG_KEXEC_MODULE_PAGES 0
+#endif
+
+extern unsigned long kexec_mod_start;
+extern struct kexec_module *mod_ptr;
+
/* One element of kexec actions (last element must have action KEXEC_CALL): */
struct kexec_action {
enum {
@@ -60,4 +102,7 @@ int kexec_move_used_pages(unsigned long boundary, unsigned long kernel,
unsigned long kernel_size);
void kexec_move_used_pages_undo(void);
+/* Check for kexec module and create kexec memory if needed. */
+void kexec_module(unsigned long start_pfn, unsigned long max_pfn);
+
#endif /* _KEXEC_H */
diff --git a/include/x86/arch_mm.h b/include/x86/arch_mm.h
index a1b975dc..ae56781e 100644
--- a/include/x86/arch_mm.h
+++ b/include/x86/arch_mm.h
@@ -289,6 +289,7 @@ unsigned long alloc_virt_kernel(unsigned n_pages);
void arch_mm_pre_suspend(void);
void arch_mm_post_suspend(int canceled);
+void set_reserved_range(unsigned long start, unsigned long end);
#ifndef CONFIG_PARAVIRT
void arch_print_memmap(void);
diff --git a/kexec.c b/kexec.c
index 2607c819..ded29882 100644
--- a/kexec.c
+++ b/kexec.c
@@ -250,3 +250,6 @@ int kexec_add_action(int action, void *dest, void *src, unsigned int len)
return 0;
}
+
+unsigned long kexec_mod_start;
+struct kexec_module *mod_ptr;
diff --git a/mm.c b/mm.c
index 8c41d2f2..b5e8d801 100644
--- a/mm.c
+++ b/mm.c
@@ -44,6 +44,7 @@
#include <mini-os/lib.h>
#include <mini-os/xmalloc.h>
#include <mini-os/e820.h>
+#include <mini-os/kexec.h>
/*
* ALLOCATION BITMAP
@@ -503,6 +504,11 @@ void init_mm(void)
arch_init_mm(&start_pfn, &max_pfn);
get_max_pages();
+#ifdef CONFIG_KEXEC
+ /* To be called after arch_init_mm() and before init_page_allocator(). */
+ kexec_module(start_pfn, max_pfn);
+#endif
+
/* Now we can initialise the page allocator. */
init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
printk("MM: done\n");
--
2.43.0
On 2025-07-02 04:12, Juergen Gross wrote:
> Especially for support of Xenstore-stubdom live update some memory must
> be handed over to the new kernel without moving it around: as the
> 9pfs device used for storing and retrieving the state of Xenstore
> needs to be kept operational across kexec (it can't be reopened due to
> Xenstore not being available without access to the device), the ring
> pages need to be accessible via active grants by the backend all the
> time.
>
> Add the basic support for that by reserving a pre-defined number of
> memory pages at the top of the memory. This memory area will be
> handed over to the new kernel via specifying it as a module in
> struct hvm_start_info.
>
> The contents of the memory area are described via a generic table of
> contents in the last page of the memory.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> Config.mk | 2 ++
> arch/x86/kexec.c | 77 +++++++++++++++++++++++++++++++++++++++++++
> arch/x86/mm.c | 18 ++++++++++
> arch/x86/setup.c | 28 ++++++++++++++++
> include/kernel.h | 1 +
> include/kexec.h | 45 +++++++++++++++++++++++++
> include/x86/arch_mm.h | 1 +
> kexec.c | 3 ++
> mm.c | 6 ++++
> 9 files changed, 181 insertions(+)
>
> diff --git a/Config.mk b/Config.mk
> index b9675e61..0e4e86d8 100644
> --- a/Config.mk
> +++ b/Config.mk
> @@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
> $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
> $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
>
> +CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
> +
I don't know Makefiles well enough to review the preceding patch. This
doesn't seem to be used?
> $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
>
> CONFIG-x += CONFIG_LIBXS
> diff --git a/arch/x86/kexec.c b/arch/x86/kexec.c
> index 804e7b6d..7fb98473 100644
> --- a/arch/x86/kexec.c
> +++ b/arch/x86/kexec.c
> @@ -201,10 +201,73 @@ static unsigned long kexec_param_loc;
> static unsigned int kexec_param_size;
> static unsigned long kexec_param_mem;
>
> +static struct kexec_module *kexec_check_module(void)
> +{
> + unsigned long mod_size;
> + unsigned long mod;
> + struct kexec_module *module_ptr;
> +
> + mod = get_module(&mod_size);
> + if ( !mod )
> + return NULL;
> + /* Size must be a multiple of PAGE_SIZE. */
> + if ( mod_size & ~PAGE_MASK )
> + return NULL;
> +
> + /* Kxec module description is at start of the last page of the module. */
Kexec
> + module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
> +
> + /* Check eye catcher. */
> + if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
> + sizeof(module_ptr->eye_catcher)) )
> + return NULL;
> + if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
> + return NULL;
> +
> + return module_ptr;
> +}
> +#define min(a, b) ((a) < (b) ? (a) : (b))
> +void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
> +{
> + /* Reuse already existing kexec module. */
> + mod_ptr = kexec_check_module();
> + if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )
What if CONFIG_KEXEC_MODULE_PAGES changes between the old and the new
stubdom?
> + {
> + max_pfn = min(max_pfn, PHYS_PFN(0xffffffff));
> +
> + iterate_memory_range(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn),
> + get_mod_addr);
> + BUG_ON(!kexec_mod_start);
> +
> + mod_ptr = (void *)(kexec_mod_start +
> + ((CONFIG_KEXEC_MODULE_PAGES - 1) << PAGE_SHIFT));
> + memset(mod_ptr, 0, PAGE_SIZE);
> + memcpy(mod_ptr->eye_catcher, KEXECMOD_EYECATCHER,
> + sizeof(mod_ptr->eye_catcher));
> + mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1;
> + memset(mod_ptr->pg2rec, KEXECMOD_PG_FREE, mod_ptr->n_pages);
I was wondering about a BUILD_BUG_ON for CONFIG_KEXEC_MODULE_PAGES
versus some limit but I can't think of one.
> + mod_ptr->recs_off = sizeof(struct kexec_module) +
> + CONFIG_KEXEC_MODULE_PAGES + (mod_ptr->n_pages & 1);
mod_ptr->n_pages & 1 is to ensure 16bit alignment?
mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1, and pg2rec is n_pages,
so using CONFIG_KEXEC_MODULE_PAGES makes this off by 1?
> +
> + set_reserved_range(kexec_mod_start, (unsigned long)mod_ptr + PAGE_SIZE);
> + }
> +}
> +
> @@ -252,6 +316,19 @@ int kexec_get_entry(const char *cmdline)
> info->memmap_entries = mmap - (struct hvm_memmap_table_entry *)next;
> next = mmap;
>
> + if ( mod_ptr )
> + {
> + mod = next;
> + memset(mod, 0, sizeof(*mod));
> + info->nr_modules = 1;
> + info->modlist_paddr = kexec_param_loc +
Looking at this again, I wonder if kexec_param_loc would be better named
_pa or _paddr.
> + (unsigned long)next - kexec_param_mem;
> + mod->paddr = kexec_mod_start;
> + mod->size = PFN_PHYS(mod_ptr->n_pages + 1);
> + mod->cmdline_paddr = 0;
> + next = mod + 1;
> + }
> +
> info->cmdline_paddr = kexec_param_loc + (unsigned long)next - kexec_param_mem;
> strcpy(next, cmdline);
>
> diff --git a/include/kexec.h b/include/kexec.h
> index b89c3000..0200005f 100644
> --- a/include/kexec.h
> +++ b/include/kexec.h
> @@ -2,6 +2,48 @@
> #define _KEXEC_H
> #include <mini-os/elf.h>
>
> +/*
> + * Kexec module used to hand over memory across kexec().
> + *
> + * This is an ABI which should be modified only in a compatible way.
> + * struct kexec_module is located at the start of the last page of the module.
Why is kexec_module, which seems like a header, placed in the last page?
> + *
> + * The module can contain data/pages of multiple users. Each user has an own
> + * record which layout is depending on the user. Records are linked via a table
> + * of record offsets.
> + *
> + * All admin data (struct kexec_module, record offset table and records) must
> + * fit into the last page of the module.
> + */
> +struct kexec_module {
> + uint8_t eye_catcher[8];
> +#define KEXECMOD_EYECATCHER "KexecMem"
> + uint16_t n_pages; /* Number of allocatable pages in the module. */
> + uint16_t n_records; /* Size of record table (max. 255). */
> +#define KEXECMOD_REC_MAX 255
> + uint16_t recs_off; /* Offset to record table from start of page. */
> + /* The record table is an array of */
> + /* struct kexec_module_rec. */
> + uint8_t pg2rec[]; /* One entry per allocatable module page, value */
> + /* is record number (starting from 0) associated */
> + /* with it. Free pages have value 255. */
This reads like it is 1 page per record...
> +#define KEXECMOD_PG_FREE 255
> +};
> +
> +struct kexec_module_rec {
> + uint16_t offset; /* Offset to record from start of page. */
... but then why would you need an offset? How do you identify which
"page" or do you mean from the start of the module?
> + uint8_t type; /* Type of record. */
> +#define KEXECMOD_REC_NONE 0
> + uint8_t size; /* Size of record. */
> +};
> +
> +#ifndef CONFIG_KEXEC_MODULE_PAGES
> +#define CONFIG_KEXEC_MODULE_PAGES 0
> +#endif
> +
> +extern unsigned long kexec_mod_start;
> +extern struct kexec_module *mod_ptr;
> +
> /* One element of kexec actions (last element must have action KEXEC_CALL): */
> struct kexec_action {
> enum {
> diff --git a/kexec.c b/kexec.c
> index 2607c819..ded29882 100644
> --- a/kexec.c
> +++ b/kexec.c
> @@ -250,3 +250,6 @@ int kexec_add_action(int action, void *dest, void *src, unsigned int len)
>
> return 0;
> }
> +
> +unsigned long kexec_mod_start;
> +struct kexec_module *mod_ptr;
Personally, I find it a little unusual to add variables at the end of
the file. But it's fine and you don't have to change it.
Regards,
Jason
On 11.07.25 01:21, Jason Andryuk wrote:
> On 2025-07-02 04:12, Juergen Gross wrote:
>> Especially for support of Xenstore-stubdom live update some memory must
>> be handed over to the new kernel without moving it around: as the
>> 9pfs device used for storing and retrieving the state of Xenstore
>> needs to be kept operational across kexec (it can't be reopened due to
>> Xenstore not being available without access to the device), the ring
>> pages need to be accessible via active grants by the backend all the
>> time.
>>
>> Add the basic support for that by reserving a pre-defined number of
>> memory pages at the top of the memory. This memory area will be
>> handed over to the new kernel via specifying it as a module in
>> struct hvm_start_info.
>>
>> The contents of the memory area are described via a generic table of
>> contents in the last page of the memory.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> Config.mk | 2 ++
>> arch/x86/kexec.c | 77 +++++++++++++++++++++++++++++++++++++++++++
>> arch/x86/mm.c | 18 ++++++++++
>> arch/x86/setup.c | 28 ++++++++++++++++
>> include/kernel.h | 1 +
>> include/kexec.h | 45 +++++++++++++++++++++++++
>> include/x86/arch_mm.h | 1 +
>> kexec.c | 3 ++
>> mm.c | 6 ++++
>> 9 files changed, 181 insertions(+)
>>
>> diff --git a/Config.mk b/Config.mk
>> index b9675e61..0e4e86d8 100644
>> --- a/Config.mk
>> +++ b/Config.mk
>> @@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
>> $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
>> $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
>> +CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
>> +
>
> I don't know Makefiles well enough to review the preceding patch. This doesn't
> seem to be used?
It is.
>
>> $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
>> CONFIG-x += CONFIG_LIBXS
>> diff --git a/arch/x86/kexec.c b/arch/x86/kexec.c
>> index 804e7b6d..7fb98473 100644
>> --- a/arch/x86/kexec.c
>> +++ b/arch/x86/kexec.c
>> @@ -201,10 +201,73 @@ static unsigned long kexec_param_loc;
>> static unsigned int kexec_param_size;
>> static unsigned long kexec_param_mem;
>> +static struct kexec_module *kexec_check_module(void)
>> +{
>> + unsigned long mod_size;
>> + unsigned long mod;
>> + struct kexec_module *module_ptr;
>> +
>> + mod = get_module(&mod_size);
>> + if ( !mod )
>> + return NULL;
>> + /* Size must be a multiple of PAGE_SIZE. */
>> + if ( mod_size & ~PAGE_MASK )
>> + return NULL;
>> +
>> + /* Kxec module description is at start of the last page of the module. */
>
> Kexec
Yes. Seen it already.
>
>> + module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
>> +
>> + /* Check eye catcher. */
>> + if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
>> + sizeof(module_ptr->eye_catcher)) )
>> + return NULL;
>> + if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
>> + return NULL;
>> +
>> + return module_ptr;
>> +}
>
>> +#define min(a, b) ((a) < (b) ? (a) : (b))
>> +void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
>> +{
>> + /* Reuse already existing kexec module. */
>> + mod_ptr = kexec_check_module();
>> + if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )
>
> What if CONFIG_KEXEC_MODULE_PAGES changes between the old and the new stubdom?
Right now this wouldn't really matter. The CONFIG value is used
only for sizing the module if we are not started via kexec().
>
>> + {
>> + max_pfn = min(max_pfn, PHYS_PFN(0xffffffff));
>> +
>> + iterate_memory_range(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn),
>> + get_mod_addr);
>> + BUG_ON(!kexec_mod_start);
>> +
>> + mod_ptr = (void *)(kexec_mod_start +
>> + ((CONFIG_KEXEC_MODULE_PAGES - 1) << PAGE_SHIFT));
>> + memset(mod_ptr, 0, PAGE_SIZE);
>> + memcpy(mod_ptr->eye_catcher, KEXECMOD_EYECATCHER,
>> + sizeof(mod_ptr->eye_catcher));
>> + mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1;
>> + memset(mod_ptr->pg2rec, KEXECMOD_PG_FREE, mod_ptr->n_pages);
>
> I was wondering about a BUILD_BUG_ON for CONFIG_KEXEC_MODULE_PAGES versus some
> limit but I can't think of one.
I don't think we need that. If the value is too high, it will be
just a bug.
>> + mod_ptr->recs_off = sizeof(struct kexec_module) +
>> + CONFIG_KEXEC_MODULE_PAGES + (mod_ptr->n_pages & 1);
>
> mod_ptr->n_pages & 1 is to ensure 16bit alignment?
>
> mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1, and pg2rec is n_pages, so
> using CONFIG_KEXEC_MODULE_PAGES makes this off by 1?
Thanks for catching this. I failed to fix that when switching to
"mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1" (initially I didn't
have the "- 1", but this required too many corrections by 1 at other
places).
>
>> +
>> + set_reserved_range(kexec_mod_start, (unsigned long)mod_ptr + PAGE_SIZE);
>> + }
>> +}
>> +
>
>> @@ -252,6 +316,19 @@ int kexec_get_entry(const char *cmdline)
>> info->memmap_entries = mmap - (struct hvm_memmap_table_entry *)next;
>> next = mmap;
>> + if ( mod_ptr )
>> + {
>> + mod = next;
>> + memset(mod, 0, sizeof(*mod));
>> + info->nr_modules = 1;
>> + info->modlist_paddr = kexec_param_loc +
>
> Looking at this again, I wonder if kexec_param_loc would be better named _pa or
> _paddr.
Yes, good idea.
>
>> + (unsigned long)next - kexec_param_mem;
>> + mod->paddr = kexec_mod_start;
>> + mod->size = PFN_PHYS(mod_ptr->n_pages + 1);
>> + mod->cmdline_paddr = 0;
>> + next = mod + 1;
>> + }
>> +
>> info->cmdline_paddr = kexec_param_loc + (unsigned long)next -
>> kexec_param_mem;
>> strcpy(next, cmdline);
>
>
>> diff --git a/include/kexec.h b/include/kexec.h
>> index b89c3000..0200005f 100644
>> --- a/include/kexec.h
>> +++ b/include/kexec.h
>> @@ -2,6 +2,48 @@
>> #define _KEXEC_H
>> #include <mini-os/elf.h>
>> +/*
>> + * Kexec module used to hand over memory across kexec().
>> + *
>> + * This is an ABI which should be modified only in a compatible way.
>> + * struct kexec_module is located at the start of the last page of the module.
>
> Why is kexec_module, which seems like a header, placed in the last page?
In case we ever need to grow the module (e.g. when adding new data
to it and with that having to handle different CONFIG_KEXEC_MODULE_PAGES
values between old and new build), it will be easier, as the module will
normally be located at the end of the usable memory, so we wouldn't have
to move the struct kexec_module around.
>
>> + *
>> + * The module can contain data/pages of multiple users. Each user has an own
>> + * record which layout is depending on the user. Records are linked via a table
>> + * of record offsets.
>> + *
>> + * All admin data (struct kexec_module, record offset table and records) must
>> + * fit into the last page of the module.
>> + */
>> +struct kexec_module {
>> + uint8_t eye_catcher[8];
>> +#define KEXECMOD_EYECATCHER "KexecMem"
>> + uint16_t n_pages; /* Number of allocatable pages in the module. */
>> + uint16_t n_records; /* Size of record table (max. 255). */
>> +#define KEXECMOD_REC_MAX 255
>> + uint16_t recs_off; /* Offset to record table from start of page. */
>> + /* The record table is an array of */
>> + /* struct kexec_module_rec. */
>> + uint8_t pg2rec[]; /* One entry per allocatable module page, value */
>> + /* is record number (starting from 0) associated */
>> + /* with it. Free pages have value 255. */
>
> This reads like it is 1 page per record...
No, this means that each used page is associated with a record, but
there can be [0 ... n_pages] associated with each record.
>
>> +#define KEXECMOD_PG_FREE 255
>> +};
>> +
>> +struct kexec_module_rec {
>> + uint16_t offset; /* Offset to record from start of page. */
>
> ... but then why would you need an offset? How do you identify which "page" or
> do you mean from the start of the module?
The record itself needs to contain the data for finding the pages
associated with it. See patch 18 for the details of the 9pfs record
added there.
It will use all 17 allocatable pages from the module (1 for the main
9pfs shared interface page plus 16 for the shared rings), while the
record itself will hold the grant reference of the 9pfs interface
page, which serves as the key for locating the page itself and the
ring pages.
>
>> + uint8_t type; /* Type of record. */
>> +#define KEXECMOD_REC_NONE 0
>> + uint8_t size; /* Size of record. */
>> +};
>> +
>> +#ifndef CONFIG_KEXEC_MODULE_PAGES
>> +#define CONFIG_KEXEC_MODULE_PAGES 0
>> +#endif
>> +
>> +extern unsigned long kexec_mod_start;
>> +extern struct kexec_module *mod_ptr;
>> +
>> /* One element of kexec actions (last element must have action KEXEC_CALL): */
>> struct kexec_action {
>> enum {
>
>> diff --git a/kexec.c b/kexec.c
>> index 2607c819..ded29882 100644
>> --- a/kexec.c
>> +++ b/kexec.c
>> @@ -250,3 +250,6 @@ int kexec_add_action(int action, void *dest, void *src,
>> unsigned int len)
>> return 0;
>> }
>> +
>> +unsigned long kexec_mod_start;
>> +struct kexec_module *mod_ptr;
>
> Personally, I find it a little unusual to add variables at the end of the file.
> But it's fine and you don't have to change it.
Patch 13 will add the common code below those additions. :-)
Juergen
On 2025-07-11 03:49, Jürgen Groß wrote:
> On 11.07.25 01:21, Jason Andryuk wrote:
>> On 2025-07-02 04:12, Juergen Gross wrote:
>>> Especially for support of Xenstore-stubdom live update some memory must
>>> be handed over to the new kernel without moving it around: as the
>>> 9pfs device used for storing and retrieving the state of Xenstore
>>> needs to be kept operational across kexec (it can't be reopened due to
>>> Xenstore not being available without access to the device), the ring
>>> pages need to be accessible via active grants by the backend all the
>>> time.
>>>
>>> Add the basic support for that by reserving a pre-defined number of
>>> memory pages at the top of the memory. This memory area will be
>>> handed over to the new kernel via specifying it as a module in
>>> struct hvm_start_info.
>>>
>>> The contents of the memory area are described via a generic table of
>>> contents in the last page of the memory.
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> diff --git a/Config.mk b/Config.mk
>>> index b9675e61..0e4e86d8 100644
>>> --- a/Config.mk
>>> +++ b/Config.mk
>>> @@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
>>> $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
>>> $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
>>> +CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
>>> +
>>
>> I don't know Makefiles well enough to review the preceding patch.
>> This doesn't seem to be used?
>
> It is.
Oh, so setting CONFIG-val-y, through the Makefile magic, creates
-DCONFIG_KEXEC_MODULE_PAGES=$n
I said I didn't know Makefiles well :)
>>
>>> $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
>>> CONFIG-x += CONFIG_LIBXS
>>> + module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
>>> +
>>> + /* Check eye catcher. */
>>> + if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
>>> + sizeof(module_ptr->eye_catcher)) )
>>> + return NULL;
>>> + if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
>>> + return NULL;
>>> +
>>> + return module_ptr;
>>> +}
>>
>>> +#define min(a, b) ((a) < (b) ? (a) : (b))
>>> +void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
>>> +{
>>> + /* Reuse already existing kexec module. */
>>> + mod_ptr = kexec_check_module();
>>> + if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )
>>
>> What if CONFIG_KEXEC_MODULE_PAGES changes between the old and the new
>> stubdom?
>
> Right now this wouldn't really matter. The CONFIG value is used
> only for sizing the module if we are not started via kexec().
When I wrote this I was thinking of:
A -- kexec --> B -- kexec --> C
A: CONFIG_KEXEC_MODULE_PAGES=4
B: CONFIG_KEXEC_MODULE_PAGES=6
When B is running, it has the 4 pages from A, but it expects 6 available
for its use....
>>> diff --git a/include/kexec.h b/include/kexec.h
>>> index b89c3000..0200005f 100644
>>> --- a/include/kexec.h
>>> +++ b/include/kexec.h
>>> @@ -2,6 +2,48 @@
>>> #define _KEXEC_H
>>> #include <mini-os/elf.h>
>>> +/*
>>> + * Kexec module used to hand over memory across kexec().
>>> + *
>>> + * This is an ABI which should be modified only in a compatible way.
>>> + * struct kexec_module is located at the start of the last page of
>>> the module.
>>
>> Why is kexec_module, which seems like a header, placed in the last page?
>
> In case we ever need to grow the module (e.g. when adding new data
> to it and with that having to handle different CONFIG_KEXEC_MODULE_PAGES
> values between old and new build), it will be easier, as the module will
> normally be located at the end of the usable memory, so we wouldn't have
> to move the struct kexec_module around.
.. and this avoids that issue since the extra pages can be grabbed
without moving struct kexec_module.
>>
>>> + *
>>> + * The module can contain data/pages of multiple users. Each user
>>> has an own
>>> + * record which layout is depending on the user. Records are linked
>>> via a table
>>> + * of record offsets.
>>> + *
>>> + * All admin data (struct kexec_module, record offset table and
>>> records) must
>>> + * fit into the last page of the module.
>>> + */
>>> +struct kexec_module {
>>> + uint8_t eye_catcher[8];
>>> +#define KEXECMOD_EYECATCHER "KexecMem"
>>> + uint16_t n_pages; /* Number of allocatable pages in the
>>> module. */
>>> + uint16_t n_records; /* Size of record table (max.
>>> 255). */
>>> +#define KEXECMOD_REC_MAX 255
>>> + uint16_t recs_off; /* Offset to record table from start of
>>> page. */
>>> + /* The record table is an array
>>> of */
>>> + /* struct
>>> kexec_module_rec. */
>>> + uint8_t pg2rec[]; /* One entry per allocatable module
>>> page, value */
>>> + /* is record number (starting from 0)
>>> associated */
>>> + /* with it. Free pages have value
>>> 255. */
>>
>> This reads like it is 1 page per record...
>
> No, this means that each used page is associated with a record, but
> there can be [0 ... n_pages] associated with each record.
Maybe:
"Mapping of module pages to associated module record. Allocated pages
are indicated by their record number (starting from 0). Free pages have
value 255."?
>>
>>> +#define KEXECMOD_PG_FREE 255
>>> +};
>>> +
>>> +struct kexec_module_rec {
>>> + uint16_t offset; /* Offset to record from start of
>>> page. */
>>
>> ... but then why would you need an offset? How do you identify which
>> "page" or do you mean from the start of the module?
>
> The record itself needs to contain the data for finding the pages
> associated with it. See patch 18 for the details of the 9pfs record
> added there.
>
> It will use all 17 allocatable pages from the module (1 for the main
> 9pfs shared interface page plus 16 for the shared rings), while the
> record itself will hold the grant reference of the 9pfs interface
> page, which serves as the key for locating the page itself and the
> ring pages.
I was missing the overall view. I think I have it now from reading
patch 13.
From kexec_mod_start, we have:
(CONFIG_KEXEC_MODULE_PAGES - 1) pages (allocated as pages - referenced
by pg2rec)
struct kexec_module *mod_ptr is the start of the final page.
pg2rec[n_pages]
struct kexec_module_rec mod_recs[n_records]
< extra data for each mod_recs found by .offset and .size >
I didn't realize the mod_recs were separate from their "extra data", and
extra data took up the end of the. If that's correct, I think it would
be helpful to describe the overall layout.
Thanks,
Jason
>>
>>> + uint8_t type; /* Type of
>>> record. */
>>> +#define KEXECMOD_REC_NONE 0
>>> + uint8_t size; /* Size of
>>> record. */
>>> +};
>>> +
>>> +#ifndef CONFIG_KEXEC_MODULE_PAGES
>>> +#define CONFIG_KEXEC_MODULE_PAGES 0
>>> +#endif
>>> +
>>> +extern unsigned long kexec_mod_start;
>>> +extern struct kexec_module *mod_ptr;
>>> +
>>> /* One element of kexec actions (last element must have action
>>> KEXEC_CALL): */
>>> struct kexec_action {
>>> enum {
>>
On 14.07.25 23:22, Jason Andryuk wrote:
> On 2025-07-11 03:49, Jürgen Groß wrote:
>> On 11.07.25 01:21, Jason Andryuk wrote:
>>> On 2025-07-02 04:12, Juergen Gross wrote:
>>>> Especially for support of Xenstore-stubdom live update some memory must
>>>> be handed over to the new kernel without moving it around: as the
>>>> 9pfs device used for storing and retrieving the state of Xenstore
>>>> needs to be kept operational across kexec (it can't be reopened due to
>>>> Xenstore not being available without access to the device), the ring
>>>> pages need to be accessible via active grants by the backend all the
>>>> time.
>>>>
>>>> Add the basic support for that by reserving a pre-defined number of
>>>> memory pages at the top of the memory. This memory area will be
>>>> handed over to the new kernel via specifying it as a module in
>>>> struct hvm_start_info.
>>>>
>>>> The contents of the memory area are described via a generic table of
>>>> contents in the last page of the memory.
>>>>
>>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>
>>>> diff --git a/Config.mk b/Config.mk
>>>> index b9675e61..0e4e86d8 100644
>>>> --- a/Config.mk
>>>> +++ b/Config.mk
>>>> @@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
>>>> $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
>>>> $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
>>>> +CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
>>>> +
>>>
>>> I don't know Makefiles well enough to review the preceding patch. This
>>> doesn't seem to be used?
>>
>> It is.
>
> Oh, so setting CONFIG-val-y, through the Makefile magic, creates
> -DCONFIG_KEXEC_MODULE_PAGES=$n
>
> I said I didn't know Makefiles well :)
>
>>>
>>>> $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
>>>> CONFIG-x += CONFIG_LIBXS
>
>>>> + module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
>>>> +
>>>> + /* Check eye catcher. */
>>>> + if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
>>>> + sizeof(module_ptr->eye_catcher)) )
>>>> + return NULL;
>>>> + if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
>>>> + return NULL;
>>>> +
>>>> + return module_ptr;
>>>> +}
>>>
>>>> +#define min(a, b) ((a) < (b) ? (a) : (b))
>>>> +void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
>>>> +{
>>>> + /* Reuse already existing kexec module. */
>>>> + mod_ptr = kexec_check_module();
>>>> + if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )
>>>
>>> What if CONFIG_KEXEC_MODULE_PAGES changes between the old and the new stubdom?
>>
>> Right now this wouldn't really matter. The CONFIG value is used
>> only for sizing the module if we are not started via kexec().
>
> When I wrote this I was thinking of:
> A -- kexec --> B -- kexec --> C
>
> A: CONFIG_KEXEC_MODULE_PAGES=4
> B: CONFIG_KEXEC_MODULE_PAGES=6
>
> When B is running, it has the 4 pages from A, but it expects 6 available for its
> use....
Depends...
Looking at the first user (xenstore-stubdom), things are a little bit more
complicated.
All the allocated pages (so everything apart from the last page containing the
records) are used for the 9pfs device shared pages. This number will only
change, if the ring buffer size is changed (lets skip the case where another
use case of a kexec module page is coming up). As the device is being kept open
across kexec(), the size of the ring buffer will be kept, so there is no reason
to use more kexec module pages. This is working fine, as all the relevant
parameters of the 9pfs device are taken from the kexec 9pfs record or the
shared pages themselves after kexec(), instead of using the builtin defaults.
This could change only, if we'd add support to reconnect the 9pfs device after
kexec() using a different ring buffer size (and/or maybe multiple rings). In
this case we'd need to implement the capability to resize the kexec module,
which is possible with the current design, but not needed yet.
>
>>>> diff --git a/include/kexec.h b/include/kexec.h
>>>> index b89c3000..0200005f 100644
>>>> --- a/include/kexec.h
>>>> +++ b/include/kexec.h
>>>> @@ -2,6 +2,48 @@
>>>> #define _KEXEC_H
>>>> #include <mini-os/elf.h>
>>>> +/*
>>>> + * Kexec module used to hand over memory across kexec().
>>>> + *
>>>> + * This is an ABI which should be modified only in a compatible way.
>>>> + * struct kexec_module is located at the start of the last page of the module.
>>>
>>> Why is kexec_module, which seems like a header, placed in the last page?
>>
>> In case we ever need to grow the module (e.g. when adding new data
>> to it and with that having to handle different CONFIG_KEXEC_MODULE_PAGES
>> values between old and new build), it will be easier, as the module will
>> normally be located at the end of the usable memory, so we wouldn't have
>> to move the struct kexec_module around.
>
> .. and this avoids that issue since the extra pages can be grabbed without
> moving struct kexec_module.
Right.
>
>>>
>>>> + *
>>>> + * The module can contain data/pages of multiple users. Each user has an own
>>>> + * record which layout is depending on the user. Records are linked via a
>>>> table
>>>> + * of record offsets.
>>>> + *
>>>> + * All admin data (struct kexec_module, record offset table and records) must
>>>> + * fit into the last page of the module.
>>>> + */
>>>> +struct kexec_module {
>>>> + uint8_t eye_catcher[8];
>>>> +#define KEXECMOD_EYECATCHER "KexecMem"
>>>> + uint16_t n_pages; /* Number of allocatable pages in the
>>>> module. */
>>>> + uint16_t n_records; /* Size of record table (max.
>>>> 255). */
>>>> +#define KEXECMOD_REC_MAX 255
>>>> + uint16_t recs_off; /* Offset to record table from start of
>>>> page. */
>>>> + /* The record table is an array
>>>> of */
>>>> + /* struct
>>>> kexec_module_rec. */
>>>> + uint8_t pg2rec[]; /* One entry per allocatable module page,
>>>> value */
>>>> + /* is record number (starting from 0)
>>>> associated */
>>>> + /* with it. Free pages have value
>>>> 255. */
>>>
>>> This reads like it is 1 page per record...
>>
>> No, this means that each used page is associated with a record, but
>> there can be [0 ... n_pages] associated with each record.
>
> Maybe:
> "Mapping of module pages to associated module record. Allocated pages are
> indicated by their record number (starting from 0). Free pages have value 255."?
Fine with me.
>
>>>
>>>> +#define KEXECMOD_PG_FREE 255
>>>> +};
>>>> +
>>>> +struct kexec_module_rec {
>>>> + uint16_t offset; /* Offset to record from start of
>>>> page. */
>>>
>>> ... but then why would you need an offset? How do you identify which "page"
>>> or do you mean from the start of the module?
>>
>> The record itself needs to contain the data for finding the pages
>> associated with it. See patch 18 for the details of the 9pfs record
>> added there.
>>
>> It will use all 17 allocatable pages from the module (1 for the main
>> 9pfs shared interface page plus 16 for the shared rings), while the
>> record itself will hold the grant reference of the 9pfs interface
>> page, which serves as the key for locating the page itself and the
>> ring pages.
>
> I was missing the overall view. I think I have it now from reading patch 13.
>
> From kexec_mod_start, we have:
> (CONFIG_KEXEC_MODULE_PAGES - 1) pages (allocated as pages - referenced by pg2rec)
> struct kexec_module *mod_ptr is the start of the final page.
> pg2rec[n_pages]
> struct kexec_module_rec mod_recs[n_records]
> < extra data for each mod_recs found by .offset and .size >
>
> I didn't realize the mod_recs were separate from their "extra data", and extra
> data took up the end of the. If that's correct, I think it would be helpful to
> describe the overall layout.
I'll change the kexec module description comment.
Juergen
On 02.07.25 10:12, Juergen Gross wrote:
> Especially for support of Xenstore-stubdom live update some memory must
> be handed over to the new kernel without moving it around: as the
> 9pfs device used for storing and retrieving the state of Xenstore
> needs to be kept operational across kexec (it can't be reopened due to
> Xenstore not being available without access to the device), the ring
> pages need to be accessible via active grants by the backend all the
> time.
>
> Add the basic support for that by reserving a pre-defined number of
> memory pages at the top of the memory. This memory area will be
> handed over to the new kernel via specifying it as a module in
> struct hvm_start_info.
>
> The contents of the memory area are described via a generic table of
> contents in the last page of the memory.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> Config.mk | 2 ++
> arch/x86/kexec.c | 77 +++++++++++++++++++++++++++++++++++++++++++
> arch/x86/mm.c | 18 ++++++++++
> arch/x86/setup.c | 28 ++++++++++++++++
> include/kernel.h | 1 +
> include/kexec.h | 45 +++++++++++++++++++++++++
> include/x86/arch_mm.h | 1 +
> kexec.c | 3 ++
> mm.c | 6 ++++
> 9 files changed, 181 insertions(+)
>
> diff --git a/Config.mk b/Config.mk
> index b9675e61..0e4e86d8 100644
> --- a/Config.mk
> +++ b/Config.mk
> @@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
> $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
> $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
>
> +CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
> +
> $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
>
> CONFIG-x += CONFIG_LIBXS
> diff --git a/arch/x86/kexec.c b/arch/x86/kexec.c
> index 804e7b6d..7fb98473 100644
> --- a/arch/x86/kexec.c
> +++ b/arch/x86/kexec.c
> @@ -201,10 +201,73 @@ static unsigned long kexec_param_loc;
> static unsigned int kexec_param_size;
> static unsigned long kexec_param_mem;
>
> +static struct kexec_module *kexec_check_module(void)
> +{
> + unsigned long mod_size;
> + unsigned long mod;
> + struct kexec_module *module_ptr;
> +
> + mod = get_module(&mod_size);
> + if ( !mod )
> + return NULL;
> + /* Size must be a multiple of PAGE_SIZE. */
> + if ( mod_size & ~PAGE_MASK )
> + return NULL;
> +
> + /* Kxec module description is at start of the last page of the module. */
> + module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
> +
> + /* Check eye catcher. */
> + if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
> + sizeof(module_ptr->eye_catcher)) )
> + return NULL;
> + if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
> + return NULL;
There is missing:
+ kexec_mod_start = mod;
Otherwise a second live update won't work.
I'll wait for more feedback before sending out V2.
Juergen
© 2016 - 2025 Red Hat, Inc.