During kexec, the kernel jumps to the new kernel in relocate_kernel(),
which is implemented in assembly and both 32-bit and 64-bit have their
own version.
Currently, for both 32-bit and 64-bit, the last two parameters of the
relocate_kernel() are both 'unsigned int' but actually they only convey
a boolean, i.e., one bit information. The 'unsigned int' has enough
space to carry two bits information therefore there's no need to pass
the two booleans in two separate 'unsigned int'.
Consolidate the last two function parameters of relocate_kernel() into a
single 'unsigned int' and pass flags instead.
Only consolidate the 64-bit version albeit the similar optimization can
be done for the 32-bit version too. Don't bother changing the 32-bit
version while it is working (since assembly code change is required).
Signed-off-by: Kai Huang <kai.huang@intel.com>
---
arch/x86/include/asm/kexec.h | 12 ++++++++++--
arch/x86/kernel/machine_kexec_64.c | 22 +++++++++++++---------
arch/x86/kernel/relocate_kernel_64.S | 19 +++++++++----------
3 files changed, 32 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index f2ad77929d6e..5f09791dc4e9 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -13,6 +13,15 @@
# define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */
#endif
+#ifdef CONFIG_X86_64
+
+#include <linux/bits.h>
+
+#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0)
+#define RELOC_KERNEL_HOST_MEM_ACTIVE BIT(1)
+
+#endif
+
# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
@@ -121,8 +130,7 @@ typedef unsigned long
relocate_kernel_fn(unsigned long indirection_page,
unsigned long pa_control_page,
unsigned long start_address,
- unsigned int preserve_context,
- unsigned int host_mem_enc_active);
+ unsigned int flags);
#endif
extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 697fb99406e6..25cff38f5e60 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -384,16 +384,10 @@ void __nocfi machine_kexec(struct kimage *image)
{
unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
relocate_kernel_fn *relocate_kernel_ptr;
- unsigned int host_mem_enc_active;
+ unsigned int relocate_kernel_flags;
int save_ftrace_enabled;
void *control_page;
- /*
- * This must be done before load_segments() since if call depth tracking
- * is used then GS must be valid to make any function calls.
- */
- host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
-
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
save_processor_state();
@@ -427,6 +421,17 @@ void __nocfi machine_kexec(struct kimage *image)
*/
relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start;
+ relocate_kernel_flags = 0;
+ if (image->preserve_context)
+ relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT;
+
+ /*
+ * This must be done before load_segments() since if call depth tracking
+ * is used then GS must be valid to make any function calls.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ relocate_kernel_flags |= RELOC_KERNEL_HOST_MEM_ACTIVE;
+
/*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
@@ -443,8 +448,7 @@ void __nocfi machine_kexec(struct kimage *image)
image->start = relocate_kernel_ptr((unsigned long)image->head,
virt_to_phys(control_page),
image->start,
- image->preserve_context,
- host_mem_enc_active);
+ relocate_kernel_flags);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index ea604f4d0b52..1dfa323b33d5 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
* %rdi indirection_page
* %rsi pa_control_page
* %rdx start address
- * %rcx preserve_context
- * %r8 host_mem_enc_active
+ * %rcx flags: RELOC_KERNEL_*
*/
/* Save the CPU context, used for jumping back */
@@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
/* save indirection list for jumping back */
movq %rdi, pa_backup_pages_map(%rip)
- /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
+ /* Save the flags to %r11 as swap_pages clobbers %rcx. */
movq %rcx, %r11
/* setup a new stack at the end of the physical control page */
@@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/*
* %rdi indirection page
* %rdx start address
- * %r8 host_mem_enc_active
* %r9 page table page
- * %r11 preserve_context
+ * %r11 flags: RELOC_KERNEL_*
* %r13 original CR4 when relocate_kernel() was invoked
*/
@@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* entries that will conflict with the now unencrypted memory
* used by kexec. Flush the caches before copying the kernel.
*/
- testq %r8, %r8
+ testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11
jz .Lsme_off
wbinvd
.Lsme_off:
@@ -220,7 +218,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
movq %cr3, %rax
movq %rax, %cr3
- testq %r11, %r11 /* preserve_context */
+ testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11
jnz .Lrelocate
/*
@@ -273,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
ANNOTATE_NOENDBR
andq $PAGE_MASK, %r8
lea PAGE_SIZE(%r8), %rsp
- movl $1, %r11d /* Ensure preserve_context flag is set */
+ movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d /* Ensure preserve_context flag is set */
call swap_pages
movq kexec_va_control_page(%rip), %rax
0: addq $virtual_mapped - 0b, %rax
@@ -321,7 +319,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
UNWIND_HINT_END_OF_STACK
/*
* %rdi indirection page
- * %r11 preserve_context
+ * %r11 flags: RELOC_KERNEL_*
*/
movq %rdi, %rcx /* Put the indirection_page in %rcx */
xorl %edi, %edi
@@ -357,7 +355,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
movq %rdi, %rdx /* Save destination page to %rdx */
movq %rsi, %rax /* Save source page to %rax */
- testq %r11, %r11 /* Only actually swap for ::preserve_context */
+ /* Only actually swap for ::preserve_context */
+ testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11
jz .Lnoswap
/* copy source page to swap page */
--
2.50.0
On 7/17/25 16:46, Kai Huang wrote: > During kexec, the kernel jumps to the new kernel in relocate_kernel(), > which is implemented in assembly and both 32-bit and 64-bit have their > own version. > > Currently, for both 32-bit and 64-bit, the last two parameters of the > relocate_kernel() are both 'unsigned int' but actually they only convey > a boolean, i.e., one bit information. The 'unsigned int' has enough > space to carry two bits information therefore there's no need to pass > the two booleans in two separate 'unsigned int'. > > Consolidate the last two function parameters of relocate_kernel() into a > single 'unsigned int' and pass flags instead. > > Only consolidate the 64-bit version albeit the similar optimization can > be done for the 32-bit version too. Don't bother changing the 32-bit > version while it is working (since assembly code change is required). > > Signed-off-by: Kai Huang <kai.huang@intel.com> > --- > arch/x86/include/asm/kexec.h | 12 ++++++++++-- > arch/x86/kernel/machine_kexec_64.c | 22 +++++++++++++--------- > arch/x86/kernel/relocate_kernel_64.S | 19 +++++++++---------- > 3 files changed, 32 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h > index f2ad77929d6e..5f09791dc4e9 100644 > --- a/arch/x86/include/asm/kexec.h > +++ b/arch/x86/include/asm/kexec.h > @@ -13,6 +13,15 @@ > # define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */ > #endif > > +#ifdef CONFIG_X86_64 > + > +#include <linux/bits.h> > + > +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) > +#define RELOC_KERNEL_HOST_MEM_ACTIVE BIT(1) This isn't as descriptive with "ENC" removed from the name. It's almost like you read this and think it should always be 1 because the kernel always has host memory active. > + > +#endif > + > # define KEXEC_CONTROL_PAGE_SIZE 4096 > # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 > > @@ -121,8 +130,7 @@ typedef unsigned long > relocate_kernel_fn(unsigned long indirection_page, > unsigned long pa_control_page, > unsigned long start_address, > - unsigned int preserve_context, > - unsigned int host_mem_enc_active); > + unsigned int flags); > #endif > extern relocate_kernel_fn relocate_kernel; > #define ARCH_HAS_KIMAGE_ARCH > diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c > index 697fb99406e6..25cff38f5e60 100644 > --- a/arch/x86/kernel/machine_kexec_64.c > +++ b/arch/x86/kernel/machine_kexec_64.c > @@ -384,16 +384,10 @@ void __nocfi machine_kexec(struct kimage *image) > { > unsigned long reloc_start = (unsigned long)__relocate_kernel_start; > relocate_kernel_fn *relocate_kernel_ptr; > - unsigned int host_mem_enc_active; > + unsigned int relocate_kernel_flags; > int save_ftrace_enabled; > void *control_page; > > - /* > - * This must be done before load_segments() since if call depth tracking > - * is used then GS must be valid to make any function calls. > - */ > - host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); > - > #ifdef CONFIG_KEXEC_JUMP > if (image->preserve_context) > save_processor_state(); > @@ -427,6 +421,17 @@ void __nocfi machine_kexec(struct kimage *image) > */ > relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start; > > + relocate_kernel_flags = 0; > + if (image->preserve_context) > + relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT; > + > + /* > + * This must be done before load_segments() since if call depth tracking > + * is used then GS must be valid to make any function calls. > + */ > + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) > + relocate_kernel_flags |= RELOC_KERNEL_HOST_MEM_ACTIVE; > + > /* > * The segment registers are funny things, they have both a > * visible and an invisible part. Whenever the visible part is > @@ -443,8 +448,7 @@ void __nocfi machine_kexec(struct kimage *image) > image->start = relocate_kernel_ptr((unsigned long)image->head, > virt_to_phys(control_page), > image->start, > - image->preserve_context, > - host_mem_enc_active); > + relocate_kernel_flags); > > #ifdef CONFIG_KEXEC_JUMP > if (image->preserve_context) > diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S > index ea604f4d0b52..1dfa323b33d5 100644 > --- a/arch/x86/kernel/relocate_kernel_64.S > +++ b/arch/x86/kernel/relocate_kernel_64.S > @@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) > * %rdi indirection_page > * %rsi pa_control_page > * %rdx start address > - * %rcx preserve_context > - * %r8 host_mem_enc_active > + * %rcx flags: RELOC_KERNEL_* > */ > > /* Save the CPU context, used for jumping back */ > @@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) > /* save indirection list for jumping back */ > movq %rdi, pa_backup_pages_map(%rip) > > - /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ > + /* Save the flags to %r11 as swap_pages clobbers %rcx. */ > movq %rcx, %r11 > > /* setup a new stack at the end of the physical control page */ > @@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > /* > * %rdi indirection page > * %rdx start address > - * %r8 host_mem_enc_active > * %r9 page table page > - * %r11 preserve_context > + * %r11 flags: RELOC_KERNEL_* > * %r13 original CR4 when relocate_kernel() was invoked > */ > > @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > * entries that will conflict with the now unencrypted memory > * used by kexec. Flush the caches before copying the kernel. > */ > - testq %r8, %r8 > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 Hmmm... can't both bits be set at the same time? If so, then this will fail. This should be doing bit tests now. > jz .Lsme_off > wbinvd > .Lsme_off: > @@ -220,7 +218,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > movq %cr3, %rax > movq %rax, %cr3 > > - testq %r11, %r11 /* preserve_context */ > + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 > jnz .Lrelocate > > /* > @@ -273,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > ANNOTATE_NOENDBR > andq $PAGE_MASK, %r8 > lea PAGE_SIZE(%r8), %rsp > - movl $1, %r11d /* Ensure preserve_context flag is set */ > + movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d /* Ensure preserve_context flag is set */ And this will clear any value that was in r11 vs setting a single bit. Not sure it currently has any effect because r8 (where the memory encryption setting was held) is modified just before this. But if any bits are added in the future that are needed past here, this will be a problem. > call swap_pages > movq kexec_va_control_page(%rip), %rax > 0: addq $virtual_mapped - 0b, %rax > @@ -321,7 +319,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) > UNWIND_HINT_END_OF_STACK > /* > * %rdi indirection page > - * %r11 preserve_context > + * %r11 flags: RELOC_KERNEL_* > */ > movq %rdi, %rcx /* Put the indirection_page in %rcx */ > xorl %edi, %edi > @@ -357,7 +355,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) > movq %rdi, %rdx /* Save destination page to %rdx */ > movq %rsi, %rax /* Save source page to %rax */ > > - testq %r11, %r11 /* Only actually swap for ::preserve_context */ > + /* Only actually swap for ::preserve_context */ > + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 Ditto here on the bit testing. Thanks, Tom > jz .Lnoswap > > /* copy source page to swap page */
On Mon, 2025-07-21 at 09:40 -0500, Tom Lendacky wrote: > On 7/17/25 16:46, Kai Huang wrote: > > During kexec, the kernel jumps to the new kernel in relocate_kernel(), > > which is implemented in assembly and both 32-bit and 64-bit have their > > own version. > > > > Currently, for both 32-bit and 64-bit, the last two parameters of the > > relocate_kernel() are both 'unsigned int' but actually they only convey > > a boolean, i.e., one bit information. The 'unsigned int' has enough > > space to carry two bits information therefore there's no need to pass > > the two booleans in two separate 'unsigned int'. > > > > Consolidate the last two function parameters of relocate_kernel() into a > > single 'unsigned int' and pass flags instead. > > > > Only consolidate the 64-bit version albeit the similar optimization can > > be done for the 32-bit version too. Don't bother changing the 32-bit > > version while it is working (since assembly code change is required). > > > > Signed-off-by: Kai Huang <kai.huang@intel.com> > > --- > > arch/x86/include/asm/kexec.h | 12 ++++++++++-- > > arch/x86/kernel/machine_kexec_64.c | 22 +++++++++++++--------- > > arch/x86/kernel/relocate_kernel_64.S | 19 +++++++++---------- > > 3 files changed, 32 insertions(+), 21 deletions(-) > > > > diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h > > index f2ad77929d6e..5f09791dc4e9 100644 > > --- a/arch/x86/include/asm/kexec.h > > +++ b/arch/x86/include/asm/kexec.h > > @@ -13,6 +13,15 @@ > > # define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */ > > #endif > > > > +#ifdef CONFIG_X86_64 > > + > > +#include <linux/bits.h> > > + > > +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) > > +#define RELOC_KERNEL_HOST_MEM_ACTIVE BIT(1) > > This isn't as descriptive with "ENC" removed from the name. It's almost > like you read this and think it should always be 1 because the kernel > always has host memory active. Hi Tom, Thanks for review. Right. I'll add "ENC" to the name. > > > + > > +#endif > > + > > # define KEXEC_CONTROL_PAGE_SIZE 4096 > > # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 > > > > @@ -121,8 +130,7 @@ typedef unsigned long > > relocate_kernel_fn(unsigned long indirection_page, > > unsigned long pa_control_page, > > unsigned long start_address, > > - unsigned int preserve_context, > > - unsigned int host_mem_enc_active); > > + unsigned int flags); > > #endif > > extern relocate_kernel_fn relocate_kernel; > > #define ARCH_HAS_KIMAGE_ARCH > > diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c > > index 697fb99406e6..25cff38f5e60 100644 > > --- a/arch/x86/kernel/machine_kexec_64.c > > +++ b/arch/x86/kernel/machine_kexec_64.c > > @@ -384,16 +384,10 @@ void __nocfi machine_kexec(struct kimage *image) > > { > > unsigned long reloc_start = (unsigned long)__relocate_kernel_start; > > relocate_kernel_fn *relocate_kernel_ptr; > > - unsigned int host_mem_enc_active; > > + unsigned int relocate_kernel_flags; > > int save_ftrace_enabled; > > void *control_page; > > > > - /* > > - * This must be done before load_segments() since if call depth tracking > > - * is used then GS must be valid to make any function calls. > > - */ > > - host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); > > - > > #ifdef CONFIG_KEXEC_JUMP > > if (image->preserve_context) > > save_processor_state(); > > @@ -427,6 +421,17 @@ void __nocfi machine_kexec(struct kimage *image) > > */ > > relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start; > > > > + relocate_kernel_flags = 0; > > + if (image->preserve_context) > > + relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT; > > + > > + /* > > + * This must be done before load_segments() since if call depth tracking > > + * is used then GS must be valid to make any function calls. > > + */ > > + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) > > + relocate_kernel_flags |= RELOC_KERNEL_HOST_MEM_ACTIVE; > > + > > /* > > * The segment registers are funny things, they have both a > > * visible and an invisible part. Whenever the visible part is > > @@ -443,8 +448,7 @@ void __nocfi machine_kexec(struct kimage *image) > > image->start = relocate_kernel_ptr((unsigned long)image->head, > > virt_to_phys(control_page), > > image->start, > > - image->preserve_context, > > - host_mem_enc_active); > > + relocate_kernel_flags); > > > > #ifdef CONFIG_KEXEC_JUMP > > if (image->preserve_context) > > diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S > > index ea604f4d0b52..1dfa323b33d5 100644 > > --- a/arch/x86/kernel/relocate_kernel_64.S > > +++ b/arch/x86/kernel/relocate_kernel_64.S > > @@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) > > * %rdi indirection_page > > * %rsi pa_control_page > > * %rdx start address > > - * %rcx preserve_context > > - * %r8 host_mem_enc_active > > + * %rcx flags: RELOC_KERNEL_* > > */ > > > > /* Save the CPU context, used for jumping back */ > > @@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) > > /* save indirection list for jumping back */ > > movq %rdi, pa_backup_pages_map(%rip) > > > > - /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ > > + /* Save the flags to %r11 as swap_pages clobbers %rcx. */ > > movq %rcx, %r11 > > > > /* setup a new stack at the end of the physical control page */ > > @@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > /* > > * %rdi indirection page > > * %rdx start address > > - * %r8 host_mem_enc_active > > * %r9 page table page > > - * %r11 preserve_context > > + * %r11 flags: RELOC_KERNEL_* > > * %r13 original CR4 when relocate_kernel() was invoked > > */ > > > > @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > * entries that will conflict with the now unencrypted memory > > * used by kexec. Flush the caches before copying the kernel. > > */ > > - testq %r8, %r8 > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > > Hmmm... can't both bits be set at the same time? If so, then this will > fail. This should be doing bit tests now. TEST instruction performs logical AND of the two operands, therefore the above equals to: set ZF if "R11 AND BIT(1) == 0". Whether there's any other bits set in R11 doesn't impact the above, right? > > > jz .Lsme_off > > wbinvd > > .Lsme_off: > > @@ -220,7 +218,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > movq %cr3, %rax > > movq %rax, %cr3 > > > > - testq %r11, %r11 /* preserve_context */ > > + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 > > jnz .Lrelocate > > > > /* > > @@ -273,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > ANNOTATE_NOENDBR > > andq $PAGE_MASK, %r8 > > lea PAGE_SIZE(%r8), %rsp > > - movl $1, %r11d /* Ensure preserve_context flag is set */ > > + movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d /* Ensure preserve_context flag is set */ > > And this will clear any value that was in r11 vs setting a single bit. > Not sure it currently has any effect because r8 (where the memory > encryption setting was held) is modified just before this. But if any > bits are added in the future that are needed past here, this will be a > problem. Right. It's just for the call swap_pages right after it. Nothing else later uses RELOC_KERNEL_PRESERVE_CONTEXT or RELOC_KERNEL_HOST_MEM_ACTIVE. Maybe we can add a comment to remind that all other flags are not restored so if someone wants to add a new bit and use it at a later he/she can see? /* * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so swap_pages * can do things correctly. Note this doesn't restore any other * RELOC_KERNEL_* flags that were passed to relocate_kernel(). */ > > > call swap_pages > > movq kexec_va_control_page(%rip), %rax > > 0: addq $virtual_mapped - 0b, %rax > > @@ -321,7 +319,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) > > UNWIND_HINT_END_OF_STACK > > /* > > * %rdi indirection page > > - * %r11 preserve_context > > + * %r11 flags: RELOC_KERNEL_* > > */ > > movq %rdi, %rcx /* Put the indirection_page in %rcx */ > > xorl %edi, %edi > > @@ -357,7 +355,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) > > movq %rdi, %rdx /* Save destination page to %rdx */ > > movq %rsi, %rax /* Save source page to %rax */ > > > > - testq %r11, %r11 /* Only actually swap for ::preserve_context */ > > + /* Only actually swap for ::preserve_context */ > > + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 > > Ditto here on the bit testing. I don't see any problem? Please see above.
On 7/21/25 16:20, Huang, Kai wrote: > On Mon, 2025-07-21 at 09:40 -0500, Tom Lendacky wrote: >> On 7/17/25 16:46, Kai Huang wrote: >>> During kexec, the kernel jumps to the new kernel in relocate_kernel(), >>> which is implemented in assembly and both 32-bit and 64-bit have their >>> own version. >>> >>> Currently, for both 32-bit and 64-bit, the last two parameters of the >>> relocate_kernel() are both 'unsigned int' but actually they only convey >>> a boolean, i.e., one bit information. The 'unsigned int' has enough >>> space to carry two bits information therefore there's no need to pass >>> the two booleans in two separate 'unsigned int'. >>> >>> Consolidate the last two function parameters of relocate_kernel() into a >>> single 'unsigned int' and pass flags instead. >>> >>> Only consolidate the 64-bit version albeit the similar optimization can >>> be done for the 32-bit version too. Don't bother changing the 32-bit >>> version while it is working (since assembly code change is required). >>> >>> Signed-off-by: Kai Huang <kai.huang@intel.com> >>> --- >>> arch/x86/include/asm/kexec.h | 12 ++++++++++-- >>> arch/x86/kernel/machine_kexec_64.c | 22 +++++++++++++--------- >>> arch/x86/kernel/relocate_kernel_64.S | 19 +++++++++---------- >>> 3 files changed, 32 insertions(+), 21 deletions(-) >>> >>> @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >>> * entries that will conflict with the now unencrypted memory >>> * used by kexec. Flush the caches before copying the kernel. >>> */ >>> - testq %r8, %r8 >>> + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 >> >> Hmmm... can't both bits be set at the same time? If so, then this will >> fail. This should be doing bit tests now. > > TEST instruction performs logical AND of the two operands, therefore the > above equals to: > > set ZF if "R11 AND BIT(1) == 0". > > Whether there's any other bits set in R11 doesn't impact the above, right? > Doh! My bad, yes, not sure what I was thinking there. Thanks, Tom >> >>> jz .Lsme_off >>> wbinvd >>> .Lsme_off: >>> @@ -220,7 +218,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >>> movq %cr3, %rax >>> movq %rax, %cr3 >>> >>> - testq %r11, %r11 /* preserve_context */ >>> + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 >>> jnz .Lrelocate >>> >>> /* >>> @@ -273,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >>> ANNOTATE_NOENDBR >>> andq $PAGE_MASK, %r8 >>> lea PAGE_SIZE(%r8), %rsp >>> - movl $1, %r11d /* Ensure preserve_context flag is set */ >>> + movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d /* Ensure preserve_context flag is set */ >> >> And this will clear any value that was in r11 vs setting a single bit. >> Not sure it currently has any effect because r8 (where the memory >> encryption setting was held) is modified just before this. But if any >> bits are added in the future that are needed past here, this will be a >> problem. > > Right. It's just for the > > call swap_pages > > right after it. Nothing else later uses RELOC_KERNEL_PRESERVE_CONTEXT or > RELOC_KERNEL_HOST_MEM_ACTIVE. > > Maybe we can add a comment to remind that all other flags are not restored > so if someone wants to add a new bit and use it at a later he/she can see? > > /* > * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so swap_pages > * can do things correctly. Note this doesn't restore any other > * RELOC_KERNEL_* flags that were passed to relocate_kernel(). > */ >> >>> call swap_pages >>> movq kexec_va_control_page(%rip), %rax >>> 0: addq $virtual_mapped - 0b, %rax >>> @@ -321,7 +319,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) >>> UNWIND_HINT_END_OF_STACK >>> /* >>> * %rdi indirection page >>> - * %r11 preserve_context >>> + * %r11 flags: RELOC_KERNEL_* >>> */ >>> movq %rdi, %rcx /* Put the indirection_page in %rcx */ >>> xorl %edi, %edi >>> @@ -357,7 +355,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) >>> movq %rdi, %rdx /* Save destination page to %rdx */ >>> movq %rsi, %rax /* Save source page to %rax */ >>> >>> - testq %r11, %r11 /* Only actually swap for ::preserve_context */ >>> + /* Only actually swap for ::preserve_context */ >>> + testq $RELOC_KERNEL_PRESERVE_CONTEXT, %r11 >> >> Ditto here on the bit testing. > > I don't see any problem? Please see above.
On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: > > > > @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > > > * entries that will conflict with the now unencrypted memory > > > > * used by kexec. Flush the caches before copying the kernel. > > > > */ > > > > - testq %r8, %r8 > > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > > > > > > Hmmm... can't both bits be set at the same time? If so, then this will > > > fail. This should be doing bit tests now. > > > > TEST instruction performs logical AND of the two operands, therefore the > > above equals to: > > > > set ZF if "R11 AND BIT(1) == 0". > > > > Whether there's any other bits set in R11 doesn't impact the above, right? > > > > Doh! My bad, yes, not sure what I was thinking there. > Np and thanks! I'll address your other comments but I'll see whether Boris has any other comments first.
On Mon, Jul 21, 2025 at 09:36:48PM +0000, Huang, Kai wrote: > Np and thanks! I'll address your other comments but I'll see whether Boris > has any other comments first. Nah, he hasn't. This looks exactly like what I had in mind so thanks for doing it. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Tue, 2025-07-22 at 16:58 +0200, Borislav Petkov wrote: > On Mon, Jul 21, 2025 at 09:36:48PM +0000, Huang, Kai wrote: > > Np and thanks! I'll address your other comments but I'll see whether Boris > > has any other comments first. > > Nah, he hasn't. This looks exactly like what I had in mind so thanks for > doing it. > Hi Boris, Thanks for the feedback. I guess we don't need to wait other TDX patches to respin this, so I can quickly address comments from Tom and hpa and send out v2 of this patch as a standalone one if you prefer? :-)
On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: >On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: >> > > > @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >> > > > * entries that will conflict with the now unencrypted memory >> > > > * used by kexec. Flush the caches before copying the kernel. >> > > > */ >> > > > - testq %r8, %r8 >> > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 >> > > >> > > Hmmm... can't both bits be set at the same time? If so, then this will >> > > fail. This should be doing bit tests now. >> > >> > TEST instruction performs logical AND of the two operands, therefore the >> > above equals to: >> > >> > set ZF if "R11 AND BIT(1) == 0". >> > >> > Whether there's any other bits set in R11 doesn't impact the above, right? >> > >> >> Doh! My bad, yes, not sure what I was thinking there. >> > >Np and thanks! I'll address your other comments but I'll see whether Boris >has any other comments first. > You can use testb in this case to save 3 bytes, too.
> On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > >On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: > >> > > > @@ -204,7 +202,7 @@ > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > >> > > > * entries that will conflict with the now unencrypted memory > >> > > > * used by kexec. Flush the caches before copying the kernel. > >> > > > */ > >> > > > - testq %r8, %r8 > >> > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > >> > > > >> > > Hmmm... can't both bits be set at the same time? If so, then this > >> > > will fail. This should be doing bit tests now. > >> > > >> > TEST instruction performs logical AND of the two operands, > >> > therefore the above equals to: > >> > > >> > set ZF if "R11 AND BIT(1) == 0". > >> > > >> > Whether there's any other bits set in R11 doesn't impact the above, right? > >> > > >> > >> Doh! My bad, yes, not sure what I was thinking there. > >> > > > >Np and thanks! I'll address your other comments but I'll see whether > >Boris has any other comments first. > > > > You can use testb in this case to save 3 bytes, too. Yeah I can do that, thanks for the info!
On Mon, 2025-07-21 at 23:29 +0000, Huang, Kai wrote: > > On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > > > On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: > > > > > > > @@ -204,7 +202,7 @@ > > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > > > > > > * entries that will conflict with the now unencrypted memory > > > > > > > * used by kexec. Flush the caches before copying the kernel. > > > > > > > */ > > > > > > > - testq %r8, %r8 > > > > > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > > > > > > > > > > > > Hmmm... can't both bits be set at the same time? If so, then this > > > > > > will fail. This should be doing bit tests now. > > > > > > > > > > TEST instruction performs logical AND of the two operands, > > > > > therefore the above equals to: > > > > > > > > > > set ZF if "R11 AND BIT(1) == 0". > > > > > > > > > > Whether there's any other bits set in R11 doesn't impact the above, right? > > > > > > > > > > > > > Doh! My bad, yes, not sure what I was thinking there. > > > > > > > > > > Np and thanks! I'll address your other comments but I'll see whether > > > Boris has any other comments first. > > > > > > > You can use testb in this case to save 3 bytes, too. > > Yeah I can do that, thanks for the info! I just tried. I need to do: testb $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11b in order to compile, otherwise using plain %r11 generates: arch/x86/kernel/relocate_kernel_64.S:212: Error: `%r11' not allowed with `testb' I'll do some test and if there's no problem I'll switch to use this way, assuming it still saves us 3-bytes.
On July 21, 2025 4:42:22 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: >On Mon, 2025-07-21 at 23:29 +0000, Huang, Kai wrote: >> > On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: >> > > On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: >> > > > > > > @@ -204,7 +202,7 @@ >> > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >> > > > > > > * entries that will conflict with the now unencrypted memory >> > > > > > > * used by kexec. Flush the caches before copying the kernel. >> > > > > > > */ >> > > > > > > - testq %r8, %r8 >> > > > > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 >> > > > > > >> > > > > > Hmmm... can't both bits be set at the same time? If so, then this >> > > > > > will fail. This should be doing bit tests now. >> > > > > >> > > > > TEST instruction performs logical AND of the two operands, >> > > > > therefore the above equals to: >> > > > > >> > > > > set ZF if "R11 AND BIT(1) == 0". >> > > > > >> > > > > Whether there's any other bits set in R11 doesn't impact the above, right? >> > > > > >> > > > >> > > > Doh! My bad, yes, not sure what I was thinking there. >> > > > >> > > >> > > Np and thanks! I'll address your other comments but I'll see whether >> > > Boris has any other comments first. >> > > >> > >> > You can use testb in this case to save 3 bytes, too. >> >> Yeah I can do that, thanks for the info! > >I just tried. I need to do: > > testb $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11b > >in order to compile, otherwise using plain %r11 generates: > >arch/x86/kernel/relocate_kernel_64.S:212: Error: `%r11' not allowed with >`testb' > >I'll do some test and if there's no problem I'll switch to use this way, >assuming it still saves us 3-bytes. > That works just fine.
On Mon, 2025-07-21 at 17:44 -0700, H. Peter Anvin wrote: > On July 21, 2025 4:42:22 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > > On Mon, 2025-07-21 at 23:29 +0000, Huang, Kai wrote: > > > > On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > > > > > On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: > > > > > > > > > @@ -204,7 +202,7 @@ > > > > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > > > > > > > > * entries that will conflict with the now unencrypted memory > > > > > > > > > * used by kexec. Flush the caches before copying the kernel. > > > > > > > > > */ > > > > > > > > > - testq %r8, %r8 > > > > > > > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > > > > > > > > > > > > > > > > Hmmm... can't both bits be set at the same time? If so, then this > > > > > > > > will fail. This should be doing bit tests now. > > > > > > > > > > > > > > TEST instruction performs logical AND of the two operands, > > > > > > > therefore the above equals to: > > > > > > > > > > > > > > set ZF if "R11 AND BIT(1) == 0". > > > > > > > > > > > > > > Whether there's any other bits set in R11 doesn't impact the above, right? > > > > > > > > > > > > > > > > > > > Doh! My bad, yes, not sure what I was thinking there. > > > > > > > > > > > > > > > > Np and thanks! I'll address your other comments but I'll see whether > > > > > Boris has any other comments first. > > > > > > > > > > > > > You can use testb in this case to save 3 bytes, too. > > > > > > Yeah I can do that, thanks for the info! > > > > I just tried. I need to do: > > > > testb $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11b > > > > in order to compile, otherwise using plain %r11 generates: > > > > arch/x86/kernel/relocate_kernel_64.S:212: Error: `%r11' not allowed with > > `testb' > > > > I'll do some test and if there's no problem I'll switch to use this way, > > assuming it still saves us 3-bytes. > > > > That works just fine. Yeah I have now tested. Thanks :-)
On Tue, 2025-07-22 at 00:53 +0000, Huang, Kai wrote: > On Mon, 2025-07-21 at 17:44 -0700, H. Peter Anvin wrote: > > On July 21, 2025 4:42:22 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > > > On Mon, 2025-07-21 at 23:29 +0000, Huang, Kai wrote: > > > > > On July 21, 2025 2:36:48 PM PDT, "Huang, Kai" <kai.huang@intel.com> wrote: > > > > > > On Mon, 2025-07-21 at 16:27 -0500, Tom Lendacky wrote: > > > > > > > > > > @@ -204,7 +202,7 @@ > > > > > SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) > > > > > > > > > > * entries that will conflict with the now unencrypted memory > > > > > > > > > > * used by kexec. Flush the caches before copying the kernel. > > > > > > > > > > */ > > > > > > > > > > - testq %r8, %r8 > > > > > > > > > > + testq $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11 > > > > > > > > > > > > > > > > > > Hmmm... can't both bits be set at the same time? If so, then this > > > > > > > > > will fail. This should be doing bit tests now. > > > > > > > > > > > > > > > > TEST instruction performs logical AND of the two operands, > > > > > > > > therefore the above equals to: > > > > > > > > > > > > > > > > set ZF if "R11 AND BIT(1) == 0". > > > > > > > > > > > > > > > > Whether there's any other bits set in R11 doesn't impact the above, right? > > > > > > > > > > > > > > > > > > > > > > Doh! My bad, yes, not sure what I was thinking there. > > > > > > > > > > > > > > > > > > > Np and thanks! I'll address your other comments but I'll see whether > > > > > > Boris has any other comments first. > > > > > > > > > > > > > > > > You can use testb in this case to save 3 bytes, too. > > > > > > > > Yeah I can do that, thanks for the info! > > > > > > I just tried. I need to do: > > > > > > testb $RELOC_KERNEL_HOST_MEM_ACTIVE, %r11b > > > > > > in order to compile, otherwise using plain %r11 generates: > > > > > > arch/x86/kernel/relocate_kernel_64.S:212: Error: `%r11' not allowed with > > > `testb' > > > > > > I'll do some test and if there's no problem I'll switch to use this way, > > > assuming it still saves us 3-bytes. > > > > > > > That works just fine. > > Yeah I have now tested. Thanks :-) Sorry for multiple emails. One minor thing related to using testb: With this patch there's one movl to R11: movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d I think it would be better to make it consistent with testb: movb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b Please let me know if there's any concern?
© 2016 - 2025 Red Hat, Inc.