[PATCH] xen: Move NX handling to a dedicated place

Julian Vetter posted 1 patch 1 week, 3 days ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20260115151658.3725784-1-julian.vetter@vates.tech
xen/arch/x86/boot/head.S         | 56 --------------------------------
xen/arch/x86/boot/trampoline.S   |  5 ++-
xen/arch/x86/cpu/intel.c         |  4 ---
xen/arch/x86/efi/efi-boot.h      | 12 -------
xen/arch/x86/include/asm/setup.h |  2 ++
xen/arch/x86/setup.c             | 46 ++++++++++++++++++++++++++
6 files changed, 50 insertions(+), 75 deletions(-)
[PATCH] xen: Move NX handling to a dedicated place
Posted by Julian Vetter 1 week, 3 days ago
Currently the CONFIG_REQUIRE_NX prevents booting XEN, if NX is disabled
in the BIOS. AMD doesn't have a software-accessible MSR to re-enable it,
so there is nothing we can do. The system is going to die anyway. But on
Intel NX might just be hidden via IA32_MISC_ENABLE.XD_DISABLE. But the
function to re-enable it is called after the check + panic in
efi_arch_cpu. So, this patch removes the early check and moves the
entire NX handling into a dedicated place.

Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
---
 xen/arch/x86/boot/head.S         | 56 --------------------------------
 xen/arch/x86/boot/trampoline.S   |  5 ++-
 xen/arch/x86/cpu/intel.c         |  4 ---
 xen/arch/x86/efi/efi-boot.h      | 12 -------
 xen/arch/x86/include/asm/setup.h |  2 ++
 xen/arch/x86/setup.c             | 46 ++++++++++++++++++++++++++
 6 files changed, 50 insertions(+), 75 deletions(-)

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 77bb7a9e21..7fb7fb1351 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -133,7 +133,6 @@ multiboot2_header:
 .Lbad_ldr_nbs: .asciz "ERR: Bootloader shutdown EFI x64 boot services!"
 .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
 .Lbag_alg_msg: .asciz "ERR: Xen must be loaded at a 2Mb boundary!"
-.Lno_nx_msg:   .asciz "ERR: Not an NX-capable CPU!"
 
         .section .init.data, "aw", @progbits
         .subsection 1 /* Put data here after the page tables (in x86_64.S). */
@@ -165,11 +164,6 @@ early_error: /* Here to improve the disassembly. */
 .Lnot_aligned:
         mov     $sym_offs(.Lbag_alg_msg), %ecx
         jmp     .Lget_vtb
-#ifdef CONFIG_REQUIRE_NX
-.Lno_nx:
-        mov     $sym_offs(.Lno_nx_msg), %ecx
-        jmp     .Lget_vtb
-#endif
 .Lmb2_no_bs:
         /*
          * Ditto. Additionally, here there is a chance that Xen was started
@@ -547,56 +541,6 @@ trampoline_setup:
         bt      $cpufeat_bit(X86_FEATURE_LM),%edx
         jnc     .Lbad_cpu
 
-        /*
-         * Check for NX
-         *   - If Xen was compiled requiring it simply assert it's
-         *     supported. The trampoline already has the right constant.
-         *   - Otherwise, update the trampoline EFER mask accordingly.
-         */
-        bt      $cpufeat_bit(X86_FEATURE_NX), %edx
-        jc     .Lgot_nx
-
-        /*
-         * NX appears to be unsupported, but it might be hidden.
-         *
-         * The feature is part of the AMD64 spec, but the very first Intel
-         * 64bit CPUs lacked the feature, and thereafter there was a
-         * firmware knob to disable the feature. Undo the disable if
-         * possible.
-         *
-         * All 64bit Intel CPUs support this MSR. If virtualised, expect
-         * the hypervisor to either emulate the MSR or give us NX.
-         */
-        xor     %eax, %eax
-        cpuid
-        cmp     $X86_VENDOR_INTEL_EBX, %ebx
-        jnz     .Lno_nx
-        cmp     $X86_VENDOR_INTEL_EDX, %edx
-        jnz     .Lno_nx
-        cmp     $X86_VENDOR_INTEL_ECX, %ecx
-        jnz     .Lno_nx
-
-        /* Clear the XD_DISABLE bit */
-        mov     $MSR_IA32_MISC_ENABLE, %ecx
-        rdmsr
-        btr     $2, %edx
-        jnc     .Lno_nx
-        wrmsr
-        orb     $MSR_IA32_MISC_ENABLE_XD_DISABLE >> 32, 4 + sym_esi(trampoline_misc_enable_off)
-
-        /* Check again for NX */
-        mov     $0x80000001, %eax
-        cpuid
-        bt      $cpufeat_bit(X86_FEATURE_NX), %edx
-        jnc     .Lno_nx
-
-.Lgot_nx:
-#ifndef CONFIG_REQUIRE_NX
-        /* Adjust EFER given that NX is present */
-        orb     $EFER_NXE >> 8, 1 + sym_esi(trampoline_efer)
-.Lno_nx:
-#endif
-
         /* Stash TSC to calculate a good approximation of time-since-boot */
         rdtsc
         mov     %eax,     sym_esi(boot_tsc_stamp)
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index a92e399fbe..8e8d50cbdf 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -144,10 +144,9 @@ gdt_48:
 GLOBAL(trampoline_misc_enable_off)
         .quad   0
 
-/* EFER OR-mask for boot paths.  SCE conditional on PV support, NX added when available. */
+/* EFER OR-mask for boot paths.  SCE conditional on PV support. */
 GLOBAL(trampoline_efer)
-        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV)) | \
-                (EFER_NXE * IS_ENABLED(CONFIG_REQUIRE_NX))
+        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV))
 
 GLOBAL(trampoline_xen_phys_start)
         .long   0
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index b76797cb9a..e8cf51e853 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -351,10 +351,6 @@ static void cf_check early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
 		c->x86_cache_alignment = 128;
 
-	if (c == &boot_cpu_data &&
-	    bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE)
-		printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
-
 	intel_unlock_cpuid_leaves(c);
 
 	/* CPUID workaround for Intel 0F33/0F34 CPU */
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 0194720003..8dfd549f12 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -748,18 +748,6 @@ static void __init efi_arch_cpu(void)
     if ( (eax >> 16) == 0x8000 && eax > 0x80000000U )
     {
         caps[FEATURESET_e1d] = cpuid_edx(0x80000001U);
-
-        /*
-         * This check purposefully doesn't use cpu_has_nx because
-         * cpu_has_nx bypasses the boot_cpu_data read if Xen was compiled
-         * with CONFIG_REQUIRE_NX
-         */
-        if ( IS_ENABLED(CONFIG_REQUIRE_NX) &&
-             !boot_cpu_has(X86_FEATURE_NX) )
-            blexit(L"This build of Xen requires NX support");
-
-        if ( cpu_has_nx )
-            trampoline_efer |= EFER_NXE;
     }
 }
 
diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
index b01e83a8ed..16f53725ca 100644
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -70,4 +70,6 @@ extern bool opt_dom0_msr_relaxed;
 
 #define max_init_domid (0)
 
+void nx_init(void);
+
 #endif
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 27c63d1d97..608720b717 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1119,6 +1119,50 @@ static struct domain *__init create_dom0(struct boot_info *bi)
     return d;
 }
 
+void __init nx_init(void)
+{
+    uint64_t misc_enable;
+    uint32_t eax, ebx, ecx, edx;
+
+    if ( !boot_cpu_has(X86_FEATURE_NX) )
+    {
+        /* Intel: try to unhide NX by clearing XD_DISABLE */
+        cpuid(0, &eax, &ebx, &ecx, &edx);
+        if ( ebx == X86_VENDOR_INTEL_EBX &&
+             ecx == X86_VENDOR_INTEL_ECX &&
+             edx == X86_VENDOR_INTEL_EDX )
+        {
+            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
+            {
+                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
+                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+
+                /* Re-read CPUID after having cleared XD_DISABLE */
+                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
+
+                /* Adjust misc_enable_off for secondary startup and wakeup code */
+                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
+                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
+            }
+        }
+        /* AMD: nothing we can do - NX must be enabled in BIOS */
+    }
+
+    /* Enable EFER.NXE only if NX is available */
+    if ( boot_cpu_has(X86_FEATURE_NX) )
+    {
+        if ( !(read_efer() & EFER_NXE) )
+            write_efer(read_efer() | EFER_NXE);
+
+        /* Adjust trampoline_efer for secondary startup and wakeup code */
+        bootsym(trampoline_efer) |= EFER_NXE;
+    }
+
+    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
+        panic("This build of Xen requires NX support\n");
+}
+
 /* How much of the directmap is prebuilt at compile time. */
 #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
 
@@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
     rdmsrl(MSR_EFER, this_cpu(efer));
     asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
 
+    nx_init();
+
     /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
     enable_nmis();
 
-- 
2.51.0



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Andrew Cooper 1 week, 3 days ago
On 15/01/2026 3:17 pm, Julian Vetter wrote:
> Currently the CONFIG_REQUIRE_NX prevents booting XEN, if NX is disabled
> in the BIOS. AMD doesn't have a software-accessible MSR to re-enable it,
> so there is nothing we can do. The system is going to die anyway. But on
> Intel NX might just be hidden via IA32_MISC_ENABLE.XD_DISABLE. But the
> function to re-enable it is called after the check + panic in
> efi_arch_cpu. So, this patch removes the early check and moves the
> entire NX handling into a dedicated place.
>
> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>

Sorry I didn't get around to doing the prep work I promised.

This is going along the right lines, but there are a few complexities still.

Also you'll want to split the patch into a series.  More on that when
we've sorted out a few other details.

> diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
> index a92e399fbe..8e8d50cbdf 100644
> --- a/xen/arch/x86/boot/trampoline.S
> +++ b/xen/arch/x86/boot/trampoline.S
> @@ -144,10 +144,9 @@ gdt_48:
>  GLOBAL(trampoline_misc_enable_off)
>          .quad   0
>  
> -/* EFER OR-mask for boot paths.  SCE conditional on PV support, NX added when available. */
> +/* EFER OR-mask for boot paths.  SCE conditional on PV support. */

The comment wants to stay as-was.  NX does get added when available.

>  GLOBAL(trampoline_efer)
> -        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV)) | \
> -                (EFER_NXE * IS_ENABLED(CONFIG_REQUIRE_NX))
> +        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV))
>  
>  GLOBAL(trampoline_xen_phys_start)
>          .long   0
> diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
> index b01e83a8ed..16f53725ca 100644
> --- a/xen/arch/x86/include/asm/setup.h
> +++ b/xen/arch/x86/include/asm/setup.h
> @@ -70,4 +70,6 @@ extern bool opt_dom0_msr_relaxed;
>  
>  #define max_init_domid (0)
>  
> +void nx_init(void);
> +
>  #endif
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index 27c63d1d97..608720b717 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1119,6 +1119,50 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>      return d;
>  }
>  
> +void __init nx_init(void)

This should be static if it's only used in a single file.  However, see
later for doing it a bit differently.

> +{
> +    uint64_t misc_enable;
> +    uint32_t eax, ebx, ecx, edx;
> +
> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
> +    {
> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
> +        cpuid(0, &eax, &ebx, &ecx, &edx);
> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
> +             ecx == X86_VENDOR_INTEL_ECX &&
> +             edx == X86_VENDOR_INTEL_EDX )
> +        {
> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
> +            {
> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
> +
> +                /* Re-read CPUID after having cleared XD_DISABLE */
> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
> +
> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
> +            }
> +        }
> +        /* AMD: nothing we can do - NX must be enabled in BIOS */

The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.

You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
regular CPUID.  This is a little more tricky to arrange because it needs
doing on each CPU, not just the BSP.

> +    }
> +
> +    /* Enable EFER.NXE only if NX is available */
> +    if ( boot_cpu_has(X86_FEATURE_NX) )
> +    {
> +        if ( !(read_efer() & EFER_NXE) )
> +            write_efer(read_efer() | EFER_NXE);
> +
> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
> +        bootsym(trampoline_efer) |= EFER_NXE;
> +    }
> +
> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
> +        panic("This build of Xen requires NX support\n");
> +}
> +
>  /* How much of the directmap is prebuilt at compile time. */
>  #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>  
> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>      rdmsrl(MSR_EFER, this_cpu(efer));
>      asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>  
> +    nx_init();
> +
>      /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>      enable_nmis();
>  

This is too early, as can be seen by the need to make a cpuid() call
rather than using boot_cpu_data.

The cleanup I wanted to do was to create/rework early_cpu_init() to get
things in a better order, so the panic() could go at the end here.  The
current split we've got of early/regular CPU init was inherited from
Linux and can be collapsed substantially.

The intel "unlocking" wants to move back into early_init_intel(), along
with intel_unlock_cpuid_leaves().  (This is where it used to live before
REQUIRE_NX was added).

The AMD side probe wants to live in early_amd_init()  (not that there is
one right now), but the re-enabling of the NX bit in CPUID needs to also
be in amd_init() so it gets applied to APs too.

Does this make sense?

~Andrew

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Julian Vetter 6 days, 19 hours ago
On 1/15/26 4:50 PM, Andrew Cooper wrote:
> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>> Currently the CONFIG_REQUIRE_NX prevents booting XEN, if NX is disabled
>> in the BIOS. AMD doesn't have a software-accessible MSR to re-enable it,
>> so there is nothing we can do. The system is going to die anyway. But on
>> Intel NX might just be hidden via IA32_MISC_ENABLE.XD_DISABLE. But the
>> function to re-enable it is called after the check + panic in
>> efi_arch_cpu. So, this patch removes the early check and moves the
>> entire NX handling into a dedicated place.
>>
>> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> 
> Sorry I didn't get around to doing the prep work I promised.

No problem, I assumed you were quiet busy so I looked into it.

> 
> This is going along the right lines, but there are a few complexities still.

Thank you for the feedback.

> 
> Also you'll want to split the patch into a series.  More on that when
> we've sorted out a few other details.

Yes, I will do that once everything is sorted out.

> 
>> diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
>> index a92e399fbe..8e8d50cbdf 100644
>> --- a/xen/arch/x86/boot/trampoline.S
>> +++ b/xen/arch/x86/boot/trampoline.S
>> @@ -144,10 +144,9 @@ gdt_48:
>>   GLOBAL(trampoline_misc_enable_off)
>>           .quad   0
>>   
>> -/* EFER OR-mask for boot paths.  SCE conditional on PV support, NX added when available. */
>> +/* EFER OR-mask for boot paths.  SCE conditional on PV support. */
> 
> The comment wants to stay as-was.  NX does get added when available.
> 
>>   GLOBAL(trampoline_efer)
>> -        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV)) | \
>> -                (EFER_NXE * IS_ENABLED(CONFIG_REQUIRE_NX))
>> +        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV))
>>   
>>   GLOBAL(trampoline_xen_phys_start)
>>           .long   0
>> diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
>> index b01e83a8ed..16f53725ca 100644
>> --- a/xen/arch/x86/include/asm/setup.h
>> +++ b/xen/arch/x86/include/asm/setup.h
>> @@ -70,4 +70,6 @@ extern bool opt_dom0_msr_relaxed;
>>   
>>   #define max_init_domid (0)
>>   
>> +void nx_init(void);
>> +
>>   #endif
>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>> index 27c63d1d97..608720b717 100644
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -1119,6 +1119,50 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>       return d;
>>   }
>>   
>> +void __init nx_init(void)
> 
> This should be static if it's only used in a single file.  However, see
> later for doing it a bit differently.
> 
>> +{
>> +    uint64_t misc_enable;
>> +    uint32_t eax, ebx, ecx, edx;
>> +
>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>> +    {
>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>> +             ecx == X86_VENDOR_INTEL_ECX &&
>> +             edx == X86_VENDOR_INTEL_EDX )
>> +        {
>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>> +            {
>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>> +
>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>> +
>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>> +            }
>> +        }
>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
> 
> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.

Yes, you're right.
> 
> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
> regular CPUID.  This is a little more tricky to arrange because it needs
> doing on each CPU, not just the BSP.

Ok, yes, I have modified the AMD side to use MSR_K8_EXT_FEATURE_MASK to 
"unhide" it.

> 
>> +    }
>> +
>> +    /* Enable EFER.NXE only if NX is available */
>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>> +    {
>> +        if ( !(read_efer() & EFER_NXE) )
>> +            write_efer(read_efer() | EFER_NXE);
>> +
>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>> +        bootsym(trampoline_efer) |= EFER_NXE;
>> +    }
>> +
>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>> +        panic("This build of Xen requires NX support\n");
>> +}
>> +
>>   /* How much of the directmap is prebuilt at compile time. */
>>   #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>   
>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>       rdmsrl(MSR_EFER, this_cpu(efer));
>>       asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>   
>> +    nx_init();
>> +
>>       /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>       enable_nmis();
>>   
> 
> This is too early, as can be seen by the need to make a cpuid() call
> rather than using boot_cpu_data.
> 
> The cleanup I wanted to do was to create/rework early_cpu_init() to get
> things in a better order, so the panic() could go at the end here.  The
> current split we've got of early/regular CPU init was inherited from
> Linux and can be collapsed substantially.

I have tried to add the logic into the early_init_{intel,amd}() 
functions. But it seems this is already too late in the boot chain. This 
is why I put into an extra function which is called earlier. Because it 
seems there are already pages with PAGE_NX being used on the way to 
early_init_{intel,amd}(). Because when I put my code into 
early_init_intel I get a fault and a reboot. What do you suggest?

> 
> The intel "unlocking" wants to move back into early_init_intel(), along
> with intel_unlock_cpuid_leaves().  (This is where it used to live before
> REQUIRE_NX was added).
> 
> The AMD side probe wants to live in early_amd_init()  (not that there is
> one right now), but the re-enabling of the NX bit in CPUID needs to also
> be in amd_init() so it gets applied to APs too.
> 
> Does this make sense?
> 
> ~Andrew
> 



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Andrew Cooper 6 days, 10 hours ago
On 19/01/2026 10:34 am, Julian Vetter wrote:
> On 1/15/26 4:50 PM, Andrew Cooper wrote:
>> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>>> +{
>>> +    uint64_t misc_enable;
>>> +    uint32_t eax, ebx, ecx, edx;
>>> +
>>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>>> +    {
>>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>>> +             ecx == X86_VENDOR_INTEL_ECX &&
>>> +             edx == X86_VENDOR_INTEL_EDX )
>>> +        {
>>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>>> +            {
>>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>> +
>>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>>> +
>>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>>> +            }
>>> +        }
>>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
>> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.
> Yes, you're right.
>> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
>> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
>> regular CPUID.  This is a little more tricky to arrange because it needs
>> doing on each CPU, not just the BSP.
> Ok, yes, I have modified the AMD side to use MSR_K8_EXT_FEATURE_MASK to 
> "unhide" it.

Great.  And contrary to the other thread, this really must modify the
mask MSRs rather than use setup_force_cpu_cap(), because we still need
it to be visible to PV guest kernels which can't see Xen's choice of
setup_force_cpu_cap().

>
>>> +    }
>>> +
>>> +    /* Enable EFER.NXE only if NX is available */
>>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>>> +    {
>>> +        if ( !(read_efer() & EFER_NXE) )
>>> +            write_efer(read_efer() | EFER_NXE);
>>> +
>>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>>> +        bootsym(trampoline_efer) |= EFER_NXE;
>>> +    }
>>> +
>>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>>> +        panic("This build of Xen requires NX support\n");
>>> +}
>>> +
>>>   /* How much of the directmap is prebuilt at compile time. */
>>>   #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>>   
>>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>>       rdmsrl(MSR_EFER, this_cpu(efer));
>>>       asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>>   
>>> +    nx_init();
>>> +
>>>       /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>>       enable_nmis();
>>>   
>> This is too early, as can be seen by the need to make a cpuid() call
>> rather than using boot_cpu_data.
>>
>> The cleanup I wanted to do was to create/rework early_cpu_init() to get
>> things in a better order, so the panic() could go at the end here.  The
>> current split we've got of early/regular CPU init was inherited from
>> Linux and can be collapsed substantially.
> I have tried to add the logic into the early_init_{intel,amd}() 
> functions. But it seems this is already too late in the boot chain. This 
> is why I put into an extra function which is called earlier. Because it 
> seems there are already pages with PAGE_NX being used on the way to 
> early_init_{intel,amd}(). Because when I put my code into 
> early_init_intel I get a fault and a reboot. What do you suggest?

Have you got the backtrace available?

It's probably easiest if I prototype the split I'd like to see, and you
integrate with that.

~Andrew

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Julian Vetter 3 days, 16 hours ago
On 1/19/26 20:01, Andrew Cooper wrote:
> On 19/01/2026 10:34 am, Julian Vetter wrote:
>> On 1/15/26 4:50 PM, Andrew Cooper wrote:
>>> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>>>> +{
>>>> +    uint64_t misc_enable;
>>>> +    uint32_t eax, ebx, ecx, edx;
>>>> +
>>>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>>>> +    {
>>>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>>>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>>>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>>>> +             ecx == X86_VENDOR_INTEL_ECX &&
>>>> +             edx == X86_VENDOR_INTEL_EDX )
>>>> +        {
>>>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>>>> +            {
>>>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>>> +
>>>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>>>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>>>> +
>>>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>>>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>>>> +            }
>>>> +        }
>>>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
>>> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.
>> Yes, you're right.
>>> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
>>> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
>>> regular CPUID.  This is a little more tricky to arrange because it needs
>>> doing on each CPU, not just the BSP.
>> Ok, yes, I have modified the AMD side to use MSR_K8_EXT_FEATURE_MASK to
>> "unhide" it.
> 
> Great.  And contrary to the other thread, this really must modify the
> mask MSRs rather than use setup_force_cpu_cap(), because we still need
> it to be visible to PV guest kernels which can't see Xen's choice of
> setup_force_cpu_cap().
> 
>>
>>>> +    }
>>>> +
>>>> +    /* Enable EFER.NXE only if NX is available */
>>>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>>>> +    {
>>>> +        if ( !(read_efer() & EFER_NXE) )
>>>> +            write_efer(read_efer() | EFER_NXE);
>>>> +
>>>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>>>> +        bootsym(trampoline_efer) |= EFER_NXE;
>>>> +    }
>>>> +
>>>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>>>> +        panic("This build of Xen requires NX support\n");
>>>> +}
>>>> +
>>>>    /* How much of the directmap is prebuilt at compile time. */
>>>>    #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>>>    
>>>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>>>        rdmsrl(MSR_EFER, this_cpu(efer));
>>>>        asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>>>    
>>>> +    nx_init();
>>>> +
>>>>        /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>>>        enable_nmis();
>>>>    
>>> This is too early, as can be seen by the need to make a cpuid() call
>>> rather than using boot_cpu_data.
>>>
>>> The cleanup I wanted to do was to create/rework early_cpu_init() to get
>>> things in a better order, so the panic() could go at the end here.  The
>>> current split we've got of early/regular CPU init was inherited from
>>> Linux and can be collapsed substantially.
>> I have tried to add the logic into the early_init_{intel,amd}()
>> functions. But it seems this is already too late in the boot chain. This
>> is why I put into an extra function which is called earlier. Because it
>> seems there are already pages with PAGE_NX being used on the way to
>> early_init_{intel,amd}(). Because when I put my code into
>> early_init_intel I get a fault and a reboot. What do you suggest?
> 
> Have you got the backtrace available?

Yes. Here it is. Although I saw before when enabling 
'CONFIG_MICROCODE_LOADING' it faults even earlier, somewhere in 
'find_cpio_data()', but with the same EC = 0x0009 (Protection violation, 
Reserved bit violation).

Xen 4.22-unstable
(XEN) Xen version 4.22-unstable (julian@work) (gcc (Debian 15.2.0-12) 
15.2.0) debug=y Thu Jan 22 14:28:58 CET 2026
(XEN) Latest ChangeSet: Tue Jan 13 16:50:12 2026 +0100 git:ce886ef641
(XEN) build-id: 2e72a4b08fca3ae0f0ed9af0dd3a5de947a966d0
(XEN) CPU Vendor: Intel, Family 6 (0x6), Model 55 (0x37), Stepping 8 
(raw 00030678)
(XEN) BSP microcode revision: 0x00000836
(XEN) Bootloader: GRUB 2.12
(XEN) Command line: dom0_mem=1232M,max:1232M watchdog ucode=scan 
dom0_max_vcpus=1-1 com1=115200,8n1 console=com1
(XEN) Xen image load base address: 0xb5800000
(XEN) Video information:
(XEN)  VGA is graphics mode 800x600, 32 bpp
(XEN) Disc information:
(XEN)  Found 0 MBR signatures
(XEN)  Found 1 EDD information structures
(XEN) EFI RAM map:
(XEN)  [0000000000000000, 000000000003efff] (usable)
(XEN)  [000000000003f000, 000000000003ffff] (ACPI NVS)
(XEN)  [0000000000040000, 000000000009ffff] (usable)
(XEN)  [0000000000100000, 000000001effffff] (usable)
(XEN)  [000000001f000000, 000000001f0fffff] (reserved)
(XEN)  [000000001f100000, 000000001fffffff] (usable)
(XEN)  [0000000020000000, 00000000200fffff] (reserved)
(XEN)  [0000000020100000, 00000000b9377fff] (usable)
(XEN)  [00000000b9378000, 00000000b93a7fff] (reserved)
(XEN)  [00000000b93a8000, 00000000b94bdfff] (usable)
(XEN)  [00000000b94be000, 00000000b98d6fff] (ACPI NVS)
(XEN)  [00000000b98d7000, 00000000b9bb0fff] (reserved)
(XEN)  [00000000b9bb1000, 00000000b9bb1fff] (usable)
(XEN)  [00000000b9bb2000, 00000000b9bf3fff] (reserved)
(XEN)  [00000000b9bf4000, 00000000b9d6dfff] (usable)
(XEN)  [00000000b9d6e000, 00000000b9ff9fff] (reserved)
(XEN)  [00000000b9ffa000, 00000000b9ffffff] (usable)
(XEN)  [00000000e00f8000, 00000000e00f8fff] (reserved)
(XEN)  [00000000fed01000, 00000000fed01fff] (reserved)
(XEN)  [00000000fed08000, 00000000fed08fff] (reserved)
(XEN)  [00000000ffb00000, 00000000ffffffff] (reserved)
(XEN)  [0000000100000000, 000000013fffffff] (usable)
(XEN) Early fatal page fault at e008:ffff82d0403b38e0 
(cr2=0000000001100202, ec=0009)
(XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
(XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
(XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
(XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
(XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
(XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
(XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
(XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
(XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75 
13 48 83
(XEN) Xen stack trace from rsp=ffff82d040497d78:
(XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
(XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
(XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
(XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
(XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
(XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
(XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
(XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
(XEN) Xen call trace:
(XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
(XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
(XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
(XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
(XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
(XEN)
(XEN) Pagetable walk from 0000000001100202:
(XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
(XEN) ****************************************


> 
> It's probably easiest if I prototype the split I'd like to see, and you
> integrate with that.
> 
> ~Andrew



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Andrew Cooper 3 days, 15 hours ago
On 22/01/2026 1:48 pm, Julian Vetter wrote:
> On 1/19/26 20:01, Andrew Cooper wrote:
>> On 19/01/2026 10:34 am, Julian Vetter wrote:
>>> On 1/15/26 4:50 PM, Andrew Cooper wrote:
>>>> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>>>>> +{
>>>>> +    uint64_t misc_enable;
>>>>> +    uint32_t eax, ebx, ecx, edx;
>>>>> +
>>>>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>>>>> +    {
>>>>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>>>>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>>>>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>>>>> +             ecx == X86_VENDOR_INTEL_ECX &&
>>>>> +             edx == X86_VENDOR_INTEL_EDX )
>>>>> +        {
>>>>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>>>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>>>>> +            {
>>>>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>>>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>>>> +
>>>>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>>>>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>>>>> +
>>>>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>>>>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>>>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>>>>> +            }
>>>>> +        }
>>>>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
>>>> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.
>>> Yes, you're right.
>>>> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
>>>> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
>>>> regular CPUID.  This is a little more tricky to arrange because it needs
>>>> doing on each CPU, not just the BSP.
>>> Ok, yes, I have modified the AMD side to use MSR_K8_EXT_FEATURE_MASK to
>>> "unhide" it.
>> Great.  And contrary to the other thread, this really must modify the
>> mask MSRs rather than use setup_force_cpu_cap(), because we still need
>> it to be visible to PV guest kernels which can't see Xen's choice of
>> setup_force_cpu_cap().
>>
>>>>> +    }
>>>>> +
>>>>> +    /* Enable EFER.NXE only if NX is available */
>>>>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>>>>> +    {
>>>>> +        if ( !(read_efer() & EFER_NXE) )
>>>>> +            write_efer(read_efer() | EFER_NXE);
>>>>> +
>>>>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>>>>> +        bootsym(trampoline_efer) |= EFER_NXE;
>>>>> +    }
>>>>> +
>>>>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>>>>> +        panic("This build of Xen requires NX support\n");
>>>>> +}
>>>>> +
>>>>>    /* How much of the directmap is prebuilt at compile time. */
>>>>>    #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>>>>    
>>>>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>>>>        rdmsrl(MSR_EFER, this_cpu(efer));
>>>>>        asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>>>>    
>>>>> +    nx_init();
>>>>> +
>>>>>        /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>>>>        enable_nmis();
>>>>>    
>>>> This is too early, as can be seen by the need to make a cpuid() call
>>>> rather than using boot_cpu_data.
>>>>
>>>> The cleanup I wanted to do was to create/rework early_cpu_init() to get
>>>> things in a better order, so the panic() could go at the end here.  The
>>>> current split we've got of early/regular CPU init was inherited from
>>>> Linux and can be collapsed substantially.
>>> I have tried to add the logic into the early_init_{intel,amd}()
>>> functions. But it seems this is already too late in the boot chain. This
>>> is why I put into an extra function which is called earlier. Because it
>>> seems there are already pages with PAGE_NX being used on the way to
>>> early_init_{intel,amd}(). Because when I put my code into
>>> early_init_intel I get a fault and a reboot. What do you suggest?
>> Have you got the backtrace available?
> Yes. Here it is. Although I saw before when enabling 
> 'CONFIG_MICROCODE_LOADING' it faults even earlier, somewhere in 
> 'find_cpio_data()', but with the same EC = 0x0009 (Protection violation, 
> Reserved bit violation).

That's to be expected.  bootstrap_map_bm() uses PAGE_HYPERVISOR which
has NX set in it.

>
> Xen 4.22-unstable
> (XEN) Xen version 4.22-unstable (julian@work) (gcc (Debian 15.2.0-12) 
> 15.2.0) debug=y Thu Jan 22 14:28:58 CET 2026
> (XEN) Latest ChangeSet: Tue Jan 13 16:50:12 2026 +0100 git:ce886ef641
> (XEN) build-id: 2e72a4b08fca3ae0f0ed9af0dd3a5de947a966d0
> (XEN) CPU Vendor: Intel, Family 6 (0x6), Model 55 (0x37), Stepping 8 
> (raw 00030678)
> (XEN) BSP microcode revision: 0x00000836
> (XEN) Bootloader: GRUB 2.12
> (XEN) Command line: dom0_mem=1232M,max:1232M watchdog ucode=scan 
> dom0_max_vcpus=1-1 com1=115200,8n1 console=com1
> (XEN) Xen image load base address: 0xb5800000
> (XEN) Video information:
> (XEN)  VGA is graphics mode 800x600, 32 bpp
> (XEN) Disc information:
> (XEN)  Found 0 MBR signatures
> (XEN)  Found 1 EDD information structures
> (XEN) EFI RAM map:
> (XEN)  [0000000000000000, 000000000003efff] (usable)
> (XEN)  [000000000003f000, 000000000003ffff] (ACPI NVS)
> (XEN)  [0000000000040000, 000000000009ffff] (usable)
> (XEN)  [0000000000100000, 000000001effffff] (usable)
> (XEN)  [000000001f000000, 000000001f0fffff] (reserved)
> (XEN)  [000000001f100000, 000000001fffffff] (usable)
> (XEN)  [0000000020000000, 00000000200fffff] (reserved)
> (XEN)  [0000000020100000, 00000000b9377fff] (usable)
> (XEN)  [00000000b9378000, 00000000b93a7fff] (reserved)
> (XEN)  [00000000b93a8000, 00000000b94bdfff] (usable)
> (XEN)  [00000000b94be000, 00000000b98d6fff] (ACPI NVS)
> (XEN)  [00000000b98d7000, 00000000b9bb0fff] (reserved)
> (XEN)  [00000000b9bb1000, 00000000b9bb1fff] (usable)
> (XEN)  [00000000b9bb2000, 00000000b9bf3fff] (reserved)
> (XEN)  [00000000b9bf4000, 00000000b9d6dfff] (usable)
> (XEN)  [00000000b9d6e000, 00000000b9ff9fff] (reserved)
> (XEN)  [00000000b9ffa000, 00000000b9ffffff] (usable)
> (XEN)  [00000000e00f8000, 00000000e00f8fff] (reserved)
> (XEN)  [00000000fed01000, 00000000fed01fff] (reserved)
> (XEN)  [00000000fed08000, 00000000fed08fff] (reserved)
> (XEN)  [00000000ffb00000, 00000000ffffffff] (reserved)
> (XEN)  [0000000100000000, 000000013fffffff] (usable)
> (XEN) Early fatal page fault at e008:ffff82d0403b38e0 
> (cr2=0000000001100202, ec=0009)
> (XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
> (XEN) CPU:    0
> (XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
> (XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
> (XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
> (XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
> (XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
> (XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
> (XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
> (XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
> (XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
> (XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
> (XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75 
> 13 48 83
> (XEN) Xen stack trace from rsp=ffff82d040497d78:
> (XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
> (XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
> (XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
> (XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
> (XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
> (XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
> (XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
> (XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
> (XEN) Xen call trace:
> (XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
> (XEN)
> (XEN) Pagetable walk from 0000000001100202:
> (XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 0:
> (XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
> (XEN) ****************************************

Huh, that means we have a bug in the pagewalk rendering.  It shouldn't
give up like that.

>> It's probably easiest if I prototype the split I'd like to see, and you
>> integrate with that.

I've had a go at this.  It's a 6 patch series and growing.  The early
logic is horribly tangled, but there's a lot to delete in it.

~Andrew

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Jan Beulich 3 days, 15 hours ago
On 22.01.2026 14:57, Andrew Cooper wrote:
> On 22/01/2026 1:48 pm, Julian Vetter wrote:
>> (XEN) Early fatal page fault at e008:ffff82d0403b38e0 
>> (cr2=0000000001100202, ec=0009)
>> (XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
>> (XEN) CPU:    0
>> (XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
>> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
>> (XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
>> (XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
>> (XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
>> (XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
>> (XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
>> (XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
>> (XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
>> (XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>> (XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
>> (XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75 
>> 13 48 83
>> (XEN) Xen stack trace from rsp=ffff82d040497d78:
>> (XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
>> (XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
>> (XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
>> (XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
>> (XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
>> (XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
>> (XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
>> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
>> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
>> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
>> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
>> (XEN)
>> (XEN) Pagetable walk from 0000000001100202:
>> (XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
>> (XEN)
>> (XEN) ****************************************
>> (XEN) Panic on CPU 0:
>> (XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
>> (XEN) ****************************************
> 
> Huh, that means we have a bug in the pagewalk rendering.  It shouldn't
> give up like that.

Is it perhaps too early for mfn_valid() to return "true" for the page table
page in question?

Jan

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Jan Beulich 3 days, 15 hours ago
On 22.01.2026 14:57, Andrew Cooper wrote:
> On 22/01/2026 1:48 pm, Julian Vetter wrote:
>> (XEN) Early fatal page fault at e008:ffff82d0403b38e0 
>> (cr2=0000000001100202, ec=0009)
>> (XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
>> (XEN) CPU:    0
>> (XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
>> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
>> (XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
>> (XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
>> (XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
>> (XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
>> (XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
>> (XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
>> (XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
>> (XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>> (XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
>> (XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75 
>> 13 48 83
>> (XEN) Xen stack trace from rsp=ffff82d040497d78:
>> (XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
>> (XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
>> (XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
>> (XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
>> (XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
>> (XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
>> (XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
>> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
>> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
>> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
>> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
>> (XEN)
>> (XEN) Pagetable walk from 0000000001100202:
>> (XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
>> (XEN)
>> (XEN) ****************************************
>> (XEN) Panic on CPU 0:
>> (XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
>> (XEN) ****************************************
> 
> Huh, that means we have a bug in the pagewalk rendering.  It shouldn't
> give up like that.

Is it perhaps too early for mfn_valid() to return "true" for the page table
page in question?

Jan

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Julian Vetter 2 days, 14 hours ago
On 1/22/26 15:11, Jan Beulich wrote:
> On 22.01.2026 14:57, Andrew Cooper wrote:
>> On 22/01/2026 1:48 pm, Julian Vetter wrote:
>>> (XEN) Early fatal page fault at e008:ffff82d0403b38e0
>>> (cr2=0000000001100202, ec=0009)
>>> (XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
>>> (XEN) CPU:    0
>>> (XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
>>> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
>>> (XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
>>> (XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
>>> (XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
>>> (XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
>>> (XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
>>> (XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
>>> (XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
>>> (XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
>>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>>> (XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
>>> (XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75
>>> 13 48 83
>>> (XEN) Xen stack trace from rsp=ffff82d040497d78:
>>> (XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
>>> (XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
>>> (XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
>>> (XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
>>> (XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
>>> (XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
>>> (XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
>>> (XEN) Xen call trace:
>>> (XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
>>> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
>>> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
>>> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
>>> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
>>> (XEN)
>>> (XEN) Pagetable walk from 0000000001100202:
>>> (XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
>>> (XEN)
>>> (XEN) ****************************************
>>> (XEN) Panic on CPU 0:
>>> (XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
>>> (XEN) ****************************************
>>
>> Huh, that means we have a bug in the pagewalk rendering.  It shouldn't
>> give up like that.
> 
> Is it perhaps too early for mfn_valid() to return "true" for the page table
> page in question?

Yes, this is indeed the problem. Thank you Jan. The mfn_valid() doesn't 
work yet, because max_page is set afterwards in __start_xen. Here is the 
actual translation:

(XEN) Xen call trace:
(XEN)    [<ffff82d0403b3820>] R memcmp+0x20/0x46
(XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
(XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
(XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
(XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
(XEN)
(XEN) Pagetable walk from 0000000001100202:
(XEN) Using simple walk without mfn_valid
(XEN) Early pagetable walk from 0000000001100202 (cr3=00000000b5d0e000):
(XEN)  L4[0x000] = 00000000b5c9d063
(XEN)  L3[0x000] = 00000000b5c99063
(XEN)  L2[0x008] = 80000000000001e3 (2MB)

And I also found the actual issue with the code, and why it fails in the 
first place. Somewhere before early_init_{intel,amd}, there is 
bzimage_headroom(bootstrap_map_bm(&bi->mods[0]), bi->mods[0].size), and 
the 'bootstrap_map_bm()' maps the new page with __PAGE_HYPERVISOR_RO, 
which has PAGE_NX. So, not sure how to work around this.

> 
> Jan
> 



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Andrew Cooper 2 days, 10 hours ago
On 23/01/2026 3:31 pm, Julian Vetter wrote:
> On 1/22/26 15:11, Jan Beulich wrote:
>> On 22.01.2026 14:57, Andrew Cooper wrote:
>>> On 22/01/2026 1:48 pm, Julian Vetter wrote:
>>>> (XEN) Early fatal page fault at e008:ffff82d0403b38e0
>>>> (cr2=0000000001100202, ec=0009)
>>>> (XEN) ----[ Xen-4.22-unstable  x86_64  debug=y  Not tainted ]----
>>>> (XEN) CPU:    0
>>>> (XEN) RIP:    e008:[<ffff82d0403b38e0>] memcmp+0x20/0x46
>>>> (XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
>>>> (XEN) rax: 0000000000000000   rbx: 0000000001100000   rcx: 0000000000000000
>>>> (XEN) rdx: 0000000000000004   rsi: ffff82d0404a0d23   rdi: 0000000001100202
>>>> (XEN) rbp: ffff82d040497d88   rsp: ffff82d040497d78   r8:  0000000000000016
>>>> (XEN) r9:  ffff82d04061a180   r10: ffff82d04061a188   r11: 0000000000000010
>>>> (XEN) r12: 0000000001100000   r13: 0000000000000001   r14: ffff82d0404d2b80
>>>> (XEN) r15: ffff82d040462750   cr0: 0000000080050033   cr4: 00000000000000a0
>>>> (XEN) cr3: 00000000b5d0e000   cr2: 0000000001100202
>>>> (XEN) fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
>>>> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
>>>> (XEN) Xen code around <ffff82d0403b38e0> (memcmp+0x20/0x46):
>>>> (XEN)  0f 1f 84 00 00 00 00 00 <0f> b6 04 0f 44 0f b6 04 0e 44 29 c0 75
>>>> 13 48 83
>>>> (XEN) Xen stack trace from rsp=ffff82d040497d78:
>>>> (XEN)    ffff82d040483f79 0000000000696630 ffff82d040497db0 ffff82d040483fd2
>>>> (XEN)    0000000000696630 ffff82d040200000 0000000000000001 ffff82d040497ef8
>>>> (XEN)    ffff82d04047c4ac 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    ffff82d04062c6d8 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000140000 0000000000000000 0000000000000001
>>>> (XEN)    0000000000000000 0000000000000000 ffff82d040497f08 ffff82d0404d2b80
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000800000000 000000010000006e 0000000000000003
>>>> (XEN)    00000000000002f8 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000099f30ba0 0000000099feeda7 0000000000000000 ffff82d040497fff
>>>> (XEN)    00000000b9cf3920 ffff82d0402043e8 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN)    0000000000000000 0000e01000000000 0000000000000000 0000000000000000
>>>> (XEN)    00000000000000a0 0000000000000000 0000000000000000 0000000000000000
>>>> (XEN) Xen call trace:
>>>> (XEN)    [<ffff82d0403b38e0>] R memcmp+0x20/0x46
>>>> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
>>>> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
>>>> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
>>>> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
>>>> (XEN)
>>>> (XEN) Pagetable walk from 0000000001100202:
>>>> (XEN)  L4[0x000] = 00000000b5c9d063 ffffffffffffffff
>>>> (XEN)
>>>> (XEN) ****************************************
>>>> (XEN) Panic on CPU 0:
>>>> (XEN) FATAL TRAP: vec 14, #PF[0009] IN INTERRUPT CONTEXT
>>>> (XEN) ****************************************
>>> Huh, that means we have a bug in the pagewalk rendering.  It shouldn't
>>> give up like that.
>> Is it perhaps too early for mfn_valid() to return "true" for the page table
>> page in question?
> Yes, this is indeed the problem. Thank you Jan. The mfn_valid() doesn't 
> work yet, because max_page is set afterwards in __start_xen. Here is the 
> actual translation:
>
> (XEN) Xen call trace:
> (XEN)    [<ffff82d0403b3820>] R memcmp+0x20/0x46
> (XEN)    [<ffff82d040483f79>] S arch/x86/bzimage.c#bzimage_check+0x2e/0x73
> (XEN)    [<ffff82d040483fd2>] F bzimage_headroom+0x14/0xa5
> (XEN)    [<ffff82d04047c4ac>] F __start_xen+0x908/0x2452
> (XEN)    [<ffff82d0402043e8>] F __high_start+0xb8/0xc0
> (XEN)
> (XEN) Pagetable walk from 0000000001100202:
> (XEN) Using simple walk without mfn_valid
> (XEN) Early pagetable walk from 0000000001100202 (cr3=00000000b5d0e000):
> (XEN)  L4[0x000] = 00000000b5c9d063
> (XEN)  L3[0x000] = 00000000b5c99063
> (XEN)  L2[0x008] = 80000000000001e3 (2MB)
>
> And I also found the actual issue with the code, and why it fails in the 
> first place. Somewhere before early_init_{intel,amd}, there is 
> bzimage_headroom(bootstrap_map_bm(&bi->mods[0]), bi->mods[0].size), and 
> the 'bootstrap_map_bm()' maps the new page with __PAGE_HYPERVISOR_RO, 
> which has PAGE_NX. So, not sure how to work around this.

I'm working on a cleanup series to untangle the mess.

~Andrew

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Teddy Astie 1 week, 3 days ago
Le 15/01/2026 à 16:50, Andrew Cooper a écrit :
> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>> Currently the CONFIG_REQUIRE_NX prevents booting XEN, if NX is disabled
>> in the BIOS. AMD doesn't have a software-accessible MSR to re-enable it,
>> so there is nothing we can do. The system is going to die anyway. But on
>> Intel NX might just be hidden via IA32_MISC_ENABLE.XD_DISABLE. But the
>> function to re-enable it is called after the check + panic in
>> efi_arch_cpu. So, this patch removes the early check and moves the
>> entire NX handling into a dedicated place.
>>
>> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> 
> Sorry I didn't get around to doing the prep work I promised.
> 
> This is going along the right lines, but there are a few complexities still.
> 
> Also you'll want to split the patch into a series.  More on that when
> we've sorted out a few other details.
> 
>> diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
>> index a92e399fbe..8e8d50cbdf 100644
>> --- a/xen/arch/x86/boot/trampoline.S
>> +++ b/xen/arch/x86/boot/trampoline.S
>> @@ -144,10 +144,9 @@ gdt_48:
>>   GLOBAL(trampoline_misc_enable_off)
>>           .quad   0
>>   
>> -/* EFER OR-mask for boot paths.  SCE conditional on PV support, NX added when available. */
>> +/* EFER OR-mask for boot paths.  SCE conditional on PV support. */
> 
> The comment wants to stay as-was.  NX does get added when available.
> 
>>   GLOBAL(trampoline_efer)
>> -        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV)) | \
>> -                (EFER_NXE * IS_ENABLED(CONFIG_REQUIRE_NX))
>> +        .long   EFER_LME | (EFER_SCE * IS_ENABLED(CONFIG_PV))
>>   
>>   GLOBAL(trampoline_xen_phys_start)
>>           .long   0
>> diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
>> index b01e83a8ed..16f53725ca 100644
>> --- a/xen/arch/x86/include/asm/setup.h
>> +++ b/xen/arch/x86/include/asm/setup.h
>> @@ -70,4 +70,6 @@ extern bool opt_dom0_msr_relaxed;
>>   
>>   #define max_init_domid (0)
>>   
>> +void nx_init(void);
>> +
>>   #endif
>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>> index 27c63d1d97..608720b717 100644
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -1119,6 +1119,50 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>       return d;
>>   }
>>   
>> +void __init nx_init(void)
> 
> This should be static if it's only used in a single file.  However, see
> later for doing it a bit differently.
> 
>> +{
>> +    uint64_t misc_enable;
>> +    uint32_t eax, ebx, ecx, edx;
>> +
>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>> +    {
>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>> +             ecx == X86_VENDOR_INTEL_ECX &&
>> +             edx == X86_VENDOR_INTEL_EDX )
>> +        {
>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>> +            {
>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>> +
>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>> +
>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>> +            }
>> +        }
>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
> 
> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.
> 
> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
> regular CPUID.  This is a little more tricky to arrange because it needs
> doing on each CPU, not just the BSP.
> 

I see this MSR in the BKDG (bit 20 is NX). Are these bits stable across 
the AMD CPU generations ?

>> +    }
>> +
>> +    /* Enable EFER.NXE only if NX is available */
>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>> +    {
>> +        if ( !(read_efer() & EFER_NXE) )
>> +            write_efer(read_efer() | EFER_NXE);
>> +
>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>> +        bootsym(trampoline_efer) |= EFER_NXE;
>> +    }
>> +
>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>> +        panic("This build of Xen requires NX support\n");
>> +}
>> +
>>   /* How much of the directmap is prebuilt at compile time. */
>>   #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>   
>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>       rdmsrl(MSR_EFER, this_cpu(efer));
>>       asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>   
>> +    nx_init();
>> +
>>       /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>       enable_nmis();
>>   
> 
> This is too early, as can be seen by the need to make a cpuid() call
> rather than using boot_cpu_data.
> 
> The cleanup I wanted to do was to create/rework early_cpu_init() to get
> things in a better order, so the panic() could go at the end here.  The
> current split we've got of early/regular CPU init was inherited from
> Linux and can be collapsed substantially.
> 
> The intel "unlocking" wants to move back into early_init_intel(), along
> with intel_unlock_cpuid_leaves().  (This is where it used to live before
> REQUIRE_NX was added).
> 
> The AMD side probe wants to live in early_amd_init()  (not that there is
> one right now), but the re-enabling of the NX bit in CPUID needs to also
> be in amd_init() so it gets applied to APs too.
> 
> Does this make sense?

Sounds good to me, I suppose there is no use of NX before this ?

> 
> ~Andrew
> 

Teddy


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Andrew Cooper 1 week, 3 days ago
On 15/01/2026 4:32 pm, Teddy Astie wrote:
> Le 15/01/2026 à 16:50, Andrew Cooper a écrit :
>> On 15/01/2026 3:17 pm, Julian Vetter wrote:
>>> +{
>>> +    uint64_t misc_enable;
>>> +    uint32_t eax, ebx, ecx, edx;
>>> +
>>> +    if ( !boot_cpu_has(X86_FEATURE_NX) )
>>> +    {
>>> +        /* Intel: try to unhide NX by clearing XD_DISABLE */
>>> +        cpuid(0, &eax, &ebx, &ecx, &edx);
>>> +        if ( ebx == X86_VENDOR_INTEL_EBX &&
>>> +             ecx == X86_VENDOR_INTEL_ECX &&
>>> +             edx == X86_VENDOR_INTEL_EDX )
>>> +        {
>>> +            rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>> +            if ( misc_enable & MSR_IA32_MISC_ENABLE_XD_DISABLE )
>>> +            {
>>> +                misc_enable &= ~MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>> +                wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
>>> +
>>> +                /* Re-read CPUID after having cleared XD_DISABLE */
>>> +                boot_cpu_data.x86_capability[FEATURESET_e1d] = cpuid_edx(0x80000001U);
>>> +
>>> +                /* Adjust misc_enable_off for secondary startup and wakeup code */
>>> +                bootsym(trampoline_misc_enable_off) |= MSR_IA32_MISC_ENABLE_XD_DISABLE;
>>> +                printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
>>> +            }
>>> +        }
>>> +        /* AMD: nothing we can do - NX must be enabled in BIOS */
>> The BIOS is only hiding the CPUID bit.  It's not blocking the use of NX.
>>
>> You want to do a wrmsr_safe() trying to set EFER.NXE, and if it
>> succeeds, set the NX bit in MSR_K8_EXT_FEATURE_MASK to "unhide" it in
>> regular CPUID.  This is a little more tricky to arrange because it needs
>> doing on each CPU, not just the BSP.
>>
> I see this MSR in the BKDG (bit 20 is NX). Are these bits stable across 
> the AMD CPU generations ?

Urgh.  Almost, but not quite, and I've apparently lost a patch.

" x86/amd: Fixes for levelling setup"

The K8 RevD and earlier have their masking MSRs at different indices.

Perhaps instead of editing the masking MSRs, just setup_force_cpu_cap(),
which confines the logic to the BSP.

>>> +    }
>>> +
>>> +    /* Enable EFER.NXE only if NX is available */
>>> +    if ( boot_cpu_has(X86_FEATURE_NX) )
>>> +    {
>>> +        if ( !(read_efer() & EFER_NXE) )
>>> +            write_efer(read_efer() | EFER_NXE);
>>> +
>>> +        /* Adjust trampoline_efer for secondary startup and wakeup code */
>>> +        bootsym(trampoline_efer) |= EFER_NXE;
>>> +    }
>>> +
>>> +    if ( IS_ENABLED(CONFIG_REQUIRE_NX) && !boot_cpu_has(X86_FEATURE_NX) )
>>> +        panic("This build of Xen requires NX support\n");
>>> +}
>>> +
>>>   /* How much of the directmap is prebuilt at compile time. */
>>>   #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
>>>   
>>> @@ -1159,6 +1203,8 @@ void asmlinkage __init noreturn __start_xen(void)
>>>       rdmsrl(MSR_EFER, this_cpu(efer));
>>>       asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
>>>   
>>> +    nx_init();
>>> +
>>>       /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
>>>       enable_nmis();
>>>   
>> This is too early, as can be seen by the need to make a cpuid() call
>> rather than using boot_cpu_data.
>>
>> The cleanup I wanted to do was to create/rework early_cpu_init() to get
>> things in a better order, so the panic() could go at the end here.  The
>> current split we've got of early/regular CPU init was inherited from
>> Linux and can be collapsed substantially.
>>
>> The intel "unlocking" wants to move back into early_init_intel(), along
>> with intel_unlock_cpuid_leaves().  (This is where it used to live before
>> REQUIRE_NX was added).
>>
>> The AMD side probe wants to live in early_amd_init()  (not that there is
>> one right now), but the re-enabling of the NX bit in CPUID needs to also
>> be in amd_init() so it gets applied to APs too.
>>
>> Does this make sense?
> Sounds good to me, I suppose there is no use of NX before this ?

NX predates 64bit on AMD CPUs.

~Andrew

Re: [PATCH] xen: Move NX handling to a dedicated place
Posted by Jan Beulich 1 week, 3 days ago
On 15.01.2026 16:17, Julian Vetter wrote:
> Currently the CONFIG_REQUIRE_NX prevents booting XEN, if NX is disabled
> in the BIOS.

Which is what the config option's name says? IOW if this wants changing,
more wants saying here.

> AMD doesn't have a software-accessible MSR to re-enable it,
> so there is nothing we can do. The system is going to die anyway. But on
> Intel NX might just be hidden via IA32_MISC_ENABLE.XD_DISABLE. But the
> function to re-enable it is called after the check + panic in
> efi_arch_cpu. So, this patch removes the early check and moves the
> entire NX handling into a dedicated place.
> 
> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> ---
>  xen/arch/x86/boot/head.S         | 56 --------------------------------
>  xen/arch/x86/boot/trampoline.S   |  5 ++-
>  xen/arch/x86/cpu/intel.c         |  4 ---
>  xen/arch/x86/efi/efi-boot.h      | 12 -------
>  xen/arch/x86/include/asm/setup.h |  2 ++
>  xen/arch/x86/setup.c             | 46 ++++++++++++++++++++++++++
>  6 files changed, 50 insertions(+), 75 deletions(-)

Wasn't there some earlier variant of this? I.e. is this a v2 (or higher),
where it might help if changes made were briefly called out?

Still need to look at the patch as a whole ...

Jan