From: Ard Biesheuvel <ardb@kernel.org>
In order for pgtable_l5_enabled() to be reliable wherever it is used and
however early, set the associated CPU capability from asm code before
entering the startup C code.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/include/asm/cpufeature.h | 12 +++++++++---
arch/x86/kernel/asm-offsets.c | 8 ++++++++
arch/x86/kernel/asm-offsets_32.c | 9 ---------
arch/x86/kernel/cpu/common.c | 3 ---
arch/x86/kernel/head_64.S | 15 +++++++++++++++
5 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 893cbca37fe9..1b5de40e7bf7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -2,10 +2,10 @@
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
+#ifdef __KERNEL__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
-#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
-
#include <asm/asm.h>
#include <linux/bitops.h>
#include <asm/alternative.h>
@@ -137,5 +137,11 @@ static __always_inline bool _static_cpu_has(u16 bit)
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
+#else /* !defined(__ASSEMBLER__) */
+ .macro setup_force_cpu_cap, cap:req
+ btsl $\cap % 32, boot_cpu_data+CPUINFO_x86_capability+4*(\cap / 32)(%rip)
+ btsl $\cap % 32, cpu_caps_set+4*(\cap / 32)(%rip)
+ .endm
+#endif /* !defined(__ASSEMBLER__) */
+#endif /* defined(__KERNEL__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index ad4ea6fb3b6c..6259b474073b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -33,6 +33,14 @@
static void __used common(void)
{
+ OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
+ OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
+ OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
+ OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
+ OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
+ OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
+
BLANK();
OFFSET(TASK_threadsp, task_struct, thread.sp);
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 2b411cd00a4e..e0a292db97b2 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -12,15 +12,6 @@ void foo(void);
void foo(void)
{
- OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
- OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
- OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
- OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
- OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
- OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aaa6d9e51ef1..ea49322ba151 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1672,9 +1672,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
- if (IS_ENABLED(CONFIG_X86_5LEVEL) && (native_read_cr4() & X86_CR4_LA57))
- setup_force_cpu_cap(X86_FEATURE_5LEVEL_PAGING);
-
detect_nopl();
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 069420853304..b4742942bece 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -27,6 +27,7 @@
#include <asm/fixmap.h>
#include <asm/smp.h>
#include <asm/thread_info.h>
+#include <asm/cpufeature.h>
/*
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -58,6 +59,20 @@ SYM_CODE_START_NOALIGN(startup_64)
*/
mov %rsi, %r15
+#ifdef CONFIG_X86_5LEVEL
+ /*
+ * Set the X86_FEATURE_5LEVEL_PAGING capability before calling into the
+ * C code, so that it is guaranteed to have a consistent view of any
+ * global pseudo-constants that are derived from pgtable_l5_enabled().
+ */
+ mov %cr4, %rax
+ btl $X86_CR4_LA57_BIT, %eax
+ jnc 0f
+
+ setup_force_cpu_cap X86_FEATURE_5LEVEL_PAGING
+0:
+#endif
+
/* Set up the stack for verify_cpu() */
leaq __top_init_kernel_stack(%rip), %rsp
--
2.49.0.1045.g170613ef41-goog
* Ard Biesheuvel <ardb+git@google.com> wrote:
> diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
> index ad4ea6fb3b6c..6259b474073b 100644
> --- a/arch/x86/kernel/asm-offsets.c
> +++ b/arch/x86/kernel/asm-offsets.c
> @@ -33,6 +33,14 @@
>
> static void __used common(void)
> {
> + OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
> + OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
> + OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
> + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
> + OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
> + OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
> + OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
> +
> BLANK();
> OFFSET(TASK_threadsp, task_struct, thread.sp);
> #ifdef CONFIG_STACKPROTECTOR
> diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
> index 2b411cd00a4e..e0a292db97b2 100644
> --- a/arch/x86/kernel/asm-offsets_32.c
> +++ b/arch/x86/kernel/asm-offsets_32.c
> @@ -12,15 +12,6 @@ void foo(void);
>
> void foo(void)
> {
> - OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
> - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
> - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
> - OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
> - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
> - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
> - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
> - BLANK();
> -
This is needed so that we can run (well, build) the setup_force_cpu_cap
macro on x86-64 too, right?
Could you please split out this portion into a separate patch, to
simplify the more dangerous half of the patch?
> - if (IS_ENABLED(CONFIG_X86_5LEVEL) && (native_read_cr4() & X86_CR4_LA57))
> - setup_force_cpu_cap(X86_FEATURE_5LEVEL_PAGING);
> +#ifdef CONFIG_X86_5LEVEL
> + /*
> + * Set the X86_FEATURE_5LEVEL_PAGING capability before calling into the
> + * C code, so that it is guaranteed to have a consistent view of any
> + * global pseudo-constants that are derived from pgtable_l5_enabled().
> + */
> + mov %cr4, %rax
> + btl $X86_CR4_LA57_BIT, %eax
> + jnc 0f
> +
> + setup_force_cpu_cap X86_FEATURE_5LEVEL_PAGING
> +0:
> +#endif
Nice!
Thanks,
Ingo
On Wed, 14 May 2025 at 09:15, Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Ard Biesheuvel <ardb+git@google.com> wrote:
>
> > diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
> > index ad4ea6fb3b6c..6259b474073b 100644
> > --- a/arch/x86/kernel/asm-offsets.c
> > +++ b/arch/x86/kernel/asm-offsets.c
> > @@ -33,6 +33,14 @@
> >
> > static void __used common(void)
> > {
> > + OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
> > + OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
> > + OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
> > + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
> > + OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
> > + OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
> > + OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
> > +
> > BLANK();
> > OFFSET(TASK_threadsp, task_struct, thread.sp);
> > #ifdef CONFIG_STACKPROTECTOR
> > diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
> > index 2b411cd00a4e..e0a292db97b2 100644
> > --- a/arch/x86/kernel/asm-offsets_32.c
> > +++ b/arch/x86/kernel/asm-offsets_32.c
> > @@ -12,15 +12,6 @@ void foo(void);
> >
> > void foo(void)
> > {
> > - OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
> > - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
> > - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
> > - OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
> > - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
> > - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
> > - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
> > - BLANK();
> > -
>
> This is needed so that we can run (well, build) the setup_force_cpu_cap
> macro on x86-64 too, right?
>
> Could you please split out this portion into a separate patch, to
> simplify the more dangerous half of the patch?
>
Sure.
> > - if (IS_ENABLED(CONFIG_X86_5LEVEL) && (native_read_cr4() & X86_CR4_LA57))
> > - setup_force_cpu_cap(X86_FEATURE_5LEVEL_PAGING);
>
Note that at this point, we'll likely still have to force clear the
original X86_FEATURE_LA57 bit, to address the issue that Kirill raised
that user space is now likely to conflate the "la57" cpuinfo string
with 5-level paging being in use.
* Ard Biesheuvel <ardb@kernel.org> wrote: > > > - if (IS_ENABLED(CONFIG_X86_5LEVEL) && (native_read_cr4() & X86_CR4_LA57)) > > > - setup_force_cpu_cap(X86_FEATURE_5LEVEL_PAGING); > > Note that at this point, we'll likely still have to force clear the > original X86_FEATURE_LA57 bit, to address the issue that Kirill > raised that user space is now likely to conflate the "la57" cpuinfo > string with 5-level paging being in use. No, I think the general outcome of your series is fine and clean in terms of kernel-internal logic, and I wouldn't mess up that clarity with user ABI quirks: and we can solve the /proc/cpuinfo ABI compatibility requirement by exposing X86_FEATURE_5LEVEL_PAGING as 'la57', and renaming X86_FEATURE_LA57 to X86_FEATURE_LA57_HW and exposing it as a (new) la57_hw flag, or so. Thanks, Ingo
On Wed, 14 May 2025 at 09:37, Ingo Molnar <mingo@kernel.org> wrote: > > > * Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > - if (IS_ENABLED(CONFIG_X86_5LEVEL) && (native_read_cr4() & X86_CR4_LA57)) > > > > - setup_force_cpu_cap(X86_FEATURE_5LEVEL_PAGING); > > > > Note that at this point, we'll likely still have to force clear the > > original X86_FEATURE_LA57 bit, to address the issue that Kirill > > raised that user space is now likely to conflate the "la57" cpuinfo > > string with 5-level paging being in use. > > No, I think the general outcome of your series is fine and clean in > terms of kernel-internal logic, and I wouldn't mess up that clarity > with user ABI quirks: and we can solve the /proc/cpuinfo ABI > compatibility requirement by exposing X86_FEATURE_5LEVEL_PAGING as > 'la57', and renaming X86_FEATURE_LA57 to X86_FEATURE_LA57_HW and > exposing it as a (new) la57_hw flag, or so. > Ok.
© 2016 - 2026 Red Hat, Inc.