[PATCH v5 09/16] x86/percpu/64: Use relative percpu offsets

Brian Gerst posted 16 patches 2 weeks, 4 days ago
[PATCH v5 09/16] x86/percpu/64: Use relative percpu offsets
Posted by Brian Gerst 2 weeks, 4 days ago
The percpu section is currently linked at absolute address 0, because
older compilers hardcoded the stack protector canary value at a fixed
offset from the start of the GS segment.  Now that the canary is a
normal percpu variable, the percpu section does not need to be linked
at a specific address.

x86-64 will now calculate the percpu offsets as the delta between the
initial percpu address and the dynamically allocated memory, like other
architectures.  Note that GSBASE is limited to the canonical address
width (48 or 57 bits, sign-extended).  As long as the kernel text,
modules, and the dynamically allocated percpu memmory are all in the
negative address space, the delta will not overflow this limit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/processor.h |  6 +++++-
 arch/x86/kernel/head_64.S        | 19 +++++++++----------
 arch/x86/kernel/setup_percpu.c   | 12 ++----------
 arch/x86/kernel/vmlinux.lds.S    | 29 +----------------------------
 arch/x86/platform/pvh/head.S     |  5 ++---
 arch/x86/tools/relocs.c          | 10 +++-------
 arch/x86/xen/xen-head.S          |  9 ++++-----
 init/Kconfig                     |  2 +-
 8 files changed, 27 insertions(+), 65 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a113c3f4f558..ae50d5d4fa26 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -428,7 +428,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
 
 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 {
-	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#ifdef CONFIG_SMP
+	return per_cpu_offset(cpu);
+#else
+	return 0;
+#endif
 }
 
 extern asmlinkage void entry_SYSCALL32_ignore(void);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c3028b4df85f..ffbcb0aea450 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
 	/* Set up the stack for verify_cpu() */
 	leaq	__top_init_kernel_stack(%rip), %rsp
 
-	/* Setup GSBASE to allow stack canary access for C code */
+	/*
+	 * Set up GSBASE.
+	 * Note that, on SMP, the boot cpu uses init data section until
+	 * the per cpu areas are set up.
+	 */
 	movl	$MSR_GS_BASE, %ecx
-	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
-	movl	%edx, %eax
-	shrq	$32,  %rdx
+	xorl	%eax, %eax
+	xorl	%edx, %edx
 	wrmsr
 
 	call	startup_64_setup_gdt_idt
@@ -353,16 +356,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
 	movl %eax,%fs
 	movl %eax,%gs
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data.
+	/*
+	 * Set up GSBASE.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-#ifndef CONFIG_SMP
-	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
-#endif
 	movl	%edx, %eax
 	shrq	$32, %rdx
 	wrmsr
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b30d6e180df7..1e7be9409aa2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -23,18 +23,10 @@
 #include <asm/cpumask.h>
 #include <asm/cpu.h>
 
-#ifdef CONFIG_X86_64
-#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
-#else
-#define BOOT_PERCPU_OFFSET 0
-#endif
-
-DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
 EXPORT_PER_CPU_SYMBOL(this_cpu_off);
 
-unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
-	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
-};
+unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
 EXPORT_SYMBOL(__per_cpu_offset);
 
 /*
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d61c3584f3e6..42d1c05b0207 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -99,12 +99,6 @@ const_pcpu_hot = pcpu_hot;
 PHDRS {
 	text PT_LOAD FLAGS(5);          /* R_E */
 	data PT_LOAD FLAGS(6);          /* RW_ */
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_SMP
-	percpu PT_LOAD FLAGS(6);        /* RW_ */
-#endif
-	init PT_LOAD FLAGS(7);          /* RWE */
-#endif
 	note PT_NOTE FLAGS(0);          /* ___ */
 }
 
@@ -199,21 +193,7 @@ SECTIONS
 		__init_begin = .; /* paired with __init_end */
 	}
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
-	/*
-	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-	 * output PHDR, so the next output section - .init.text - should
-	 * start another segment - init.
-	 */
-	PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
-	ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
-	       "per-CPU data too large - increase CONFIG_PHYSICAL_START")
-#endif
-
 	INIT_TEXT_SECTION(PAGE_SIZE)
-#ifdef CONFIG_X86_64
-	:init
-#endif
 
 	/*
 	 * Section for code used exclusively before alternatives are run. All
@@ -330,9 +310,7 @@ SECTIONS
 		EXIT_DATA
 	}
 
-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
 	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
-#endif
 
 	RUNTIME_CONST_VARIABLES
 	RUNTIME_CONST(ptr, USER_PTR_MAX)
@@ -476,16 +454,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
  * Per-cpu symbols which need to be offset from __per_cpu_load
  * for the boot processor.
  */
-#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
 INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(fixed_percpu_data);
 INIT_PER_CPU(irq_stack_backing_store);
 
-#ifdef CONFIG_SMP
-. = ASSERT((fixed_percpu_data == 0),
-           "fixed_percpu_data is not at start of per-cpu area");
-#endif
-
 #ifdef CONFIG_MITIGATION_UNRET_ENTRY
 . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
 #endif
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index b0a9a58952aa..c931e680ef15 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -165,9 +165,8 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
 	 * the per cpu areas are set up.
 	 */
 	movl $MSR_GS_BASE,%ecx
-	leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
-	movq %edx, %eax
-	shrq $32, %rdx
+	xorl %eax, %eax
+	xorl %edx, %edx
 	wrmsr
 
 	/*
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 7d7fc7f0a250..8b5e2bc3d241 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -834,12 +834,7 @@ static void percpu_init(void)
  */
 static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
 {
-	int shndx = sym_index(sym);
-
-	return (shndx == per_cpu_shndx) &&
-		strcmp(symname, "__init_begin") &&
-		strcmp(symname, "__per_cpu_load") &&
-		strncmp(symname, "init_per_cpu_", 13);
+	return 0;
 }
 
 
@@ -1055,7 +1050,8 @@ static int cmp_relocs(const void *va, const void *vb)
 
 static void sort_relocs(struct relocs *r)
 {
-	qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
+	if (r->count)
+		qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
 }
 
 static int write32(uint32_t v, FILE *f)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index ae4672ea00bb..1796884b727d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -51,15 +51,14 @@ SYM_CODE_START(startup_xen)
 
 	leaq	__top_init_kernel_stack(%rip), %rsp
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data.
+	/*
+	 * Set up GSBASE.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
-	cdq
+	xorl	%eax, %eax
+	xorl	%edx, %edx
 	wrmsr
 
 	mov	%rsi, %rdi
diff --git a/init/Kconfig b/init/Kconfig
index c521e1421ad4..b374c0de5cfd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1849,7 +1849,7 @@ config KALLSYMS_ALL
 config KALLSYMS_ABSOLUTE_PERCPU
 	bool
 	depends on KALLSYMS
-	default X86_64 && SMP
+	default n
 
 # end of the "standard kernel features (expert users)" menu
 
-- 
2.47.0
Re: [PATCH v5 09/16] x86/percpu/64: Use relative percpu offsets
Posted by Uros Bizjak 2 weeks, 3 days ago
On Tue, Nov 5, 2024 at 4:58 PM Brian Gerst <brgerst@gmail.com> wrote:
>
> The percpu section is currently linked at absolute address 0, because
> older compilers hardcoded the stack protector canary value at a fixed
> offset from the start of the GS segment.  Now that the canary is a
> normal percpu variable, the percpu section does not need to be linked
> at a specific address.
>
> x86-64 will now calculate the percpu offsets as the delta between the
> initial percpu address and the dynamically allocated memory, like other
> architectures.  Note that GSBASE is limited to the canonical address
> width (48 or 57 bits, sign-extended).  As long as the kernel text,
> modules, and the dynamically allocated percpu memmory are all in the
> negative address space, the delta will not overflow this limit.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/include/asm/processor.h |  6 +++++-
>  arch/x86/kernel/head_64.S        | 19 +++++++++----------
>  arch/x86/kernel/setup_percpu.c   | 12 ++----------
>  arch/x86/kernel/vmlinux.lds.S    | 29 +----------------------------
>  arch/x86/platform/pvh/head.S     |  5 ++---
>  arch/x86/tools/relocs.c          | 10 +++-------
>  arch/x86/xen/xen-head.S          |  9 ++++-----
>  init/Kconfig                     |  2 +-
>  8 files changed, 27 insertions(+), 65 deletions(-)
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index a113c3f4f558..ae50d5d4fa26 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -428,7 +428,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
>
>  static inline unsigned long cpu_kernelmode_gs_base(int cpu)
>  {
> -       return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
> +#ifdef CONFIG_SMP
> +       return per_cpu_offset(cpu);
> +#else
> +       return 0;
> +#endif
>  }
>
>  extern asmlinkage void entry_SYSCALL32_ignore(void);
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index c3028b4df85f..ffbcb0aea450 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
>         /* Set up the stack for verify_cpu() */
>         leaq    __top_init_kernel_stack(%rip), %rsp
>
> -       /* Setup GSBASE to allow stack canary access for C code */
> +       /*
> +        * Set up GSBASE.
> +        * Note that, on SMP, the boot cpu uses init data section until
> +        * the per cpu areas are set up.
> +        */
>         movl    $MSR_GS_BASE, %ecx
> -       leaq    INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> -       movl    %edx, %eax
> -       shrq    $32,  %rdx
> +       xorl    %eax, %eax
> +       xorl    %edx, %edx

You can use cltd after "xor %eax, %eax", it is one byte shorter with
the same effect ...

>         wrmsr
>
>         call    startup_64_setup_gdt_idt
> @@ -353,16 +356,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
>         movl %eax,%fs
>         movl %eax,%gs
>
> -       /* Set up %gs.
> -        *
> -        * The base of %gs always points to fixed_percpu_data.
> +       /*
> +        * Set up GSBASE.
>          * Note that, on SMP, the boot cpu uses init data section until
>          * the per cpu areas are set up.
>          */
>         movl    $MSR_GS_BASE,%ecx
> -#ifndef CONFIG_SMP
> -       leaq    INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> -#endif
>         movl    %edx, %eax
>         shrq    $32, %rdx
>         wrmsr
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index b30d6e180df7..1e7be9409aa2 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -23,18 +23,10 @@
>  #include <asm/cpumask.h>
>  #include <asm/cpu.h>
>
> -#ifdef CONFIG_X86_64
> -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
> -#else
> -#define BOOT_PERCPU_OFFSET 0
> -#endif
> -
> -DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
>  EXPORT_PER_CPU_SYMBOL(this_cpu_off);
>
> -unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
> -       [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
> -};
> +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
>  EXPORT_SYMBOL(__per_cpu_offset);
>
>  /*
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index d61c3584f3e6..42d1c05b0207 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -99,12 +99,6 @@ const_pcpu_hot = pcpu_hot;
>  PHDRS {
>         text PT_LOAD FLAGS(5);          /* R_E */
>         data PT_LOAD FLAGS(6);          /* RW_ */
> -#ifdef CONFIG_X86_64
> -#ifdef CONFIG_SMP
> -       percpu PT_LOAD FLAGS(6);        /* RW_ */
> -#endif
> -       init PT_LOAD FLAGS(7);          /* RWE */
> -#endif
>         note PT_NOTE FLAGS(0);          /* ___ */
>  }
>
> @@ -199,21 +193,7 @@ SECTIONS
>                 __init_begin = .; /* paired with __init_end */
>         }
>
> -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
> -       /*
> -        * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
> -        * output PHDR, so the next output section - .init.text - should
> -        * start another segment - init.
> -        */
> -       PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
> -       ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
> -              "per-CPU data too large - increase CONFIG_PHYSICAL_START")
> -#endif
> -
>         INIT_TEXT_SECTION(PAGE_SIZE)
> -#ifdef CONFIG_X86_64
> -       :init
> -#endif
>
>         /*
>          * Section for code used exclusively before alternatives are run. All
> @@ -330,9 +310,7 @@ SECTIONS
>                 EXIT_DATA
>         }
>
> -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
>         PERCPU_SECTION(INTERNODE_CACHE_BYTES)
> -#endif
>
>         RUNTIME_CONST_VARIABLES
>         RUNTIME_CONST(ptr, USER_PTR_MAX)
> @@ -476,16 +454,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
>   * Per-cpu symbols which need to be offset from __per_cpu_load
>   * for the boot processor.
>   */
> -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
> +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
>  INIT_PER_CPU(gdt_page);
>  INIT_PER_CPU(fixed_percpu_data);
>  INIT_PER_CPU(irq_stack_backing_store);
>
> -#ifdef CONFIG_SMP
> -. = ASSERT((fixed_percpu_data == 0),
> -           "fixed_percpu_data is not at start of per-cpu area");
> -#endif
> -
>  #ifdef CONFIG_MITIGATION_UNRET_ENTRY
>  . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
>  #endif
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index b0a9a58952aa..c931e680ef15 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -165,9 +165,8 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
>          * the per cpu areas are set up.
>          */
>         movl $MSR_GS_BASE,%ecx
> -       leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> -       movq %edx, %eax
> -       shrq $32, %rdx
> +       xorl %eax, %eax
> +       xorl %edx, %edx

... also here ...

>         wrmsr
>
>         /*
> diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
> index 7d7fc7f0a250..8b5e2bc3d241 100644
> --- a/arch/x86/tools/relocs.c
> +++ b/arch/x86/tools/relocs.c
> @@ -834,12 +834,7 @@ static void percpu_init(void)
>   */
>  static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
>  {
> -       int shndx = sym_index(sym);
> -
> -       return (shndx == per_cpu_shndx) &&
> -               strcmp(symname, "__init_begin") &&
> -               strcmp(symname, "__per_cpu_load") &&
> -               strncmp(symname, "init_per_cpu_", 13);
> +       return 0;
>  }
>
>
> @@ -1055,7 +1050,8 @@ static int cmp_relocs(const void *va, const void *vb)
>
>  static void sort_relocs(struct relocs *r)
>  {
> -       qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
> +       if (r->count)
> +               qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
>  }
>
>  static int write32(uint32_t v, FILE *f)
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index ae4672ea00bb..1796884b727d 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -51,15 +51,14 @@ SYM_CODE_START(startup_xen)
>
>         leaq    __top_init_kernel_stack(%rip), %rsp
>
> -       /* Set up %gs.
> -        *
> -        * The base of %gs always points to fixed_percpu_data.
> +       /*
> +        * Set up GSBASE.
>          * Note that, on SMP, the boot cpu uses init data section until
>          * the per cpu areas are set up.
>          */
>         movl    $MSR_GS_BASE,%ecx
> -       movq    $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
> -       cdq
> +       xorl    %eax, %eax
> +       xorl    %edx, %edx

... and here.

Uros.

>         wrmsr
>
>         mov     %rsi, %rdi
> diff --git a/init/Kconfig b/init/Kconfig
> index c521e1421ad4..b374c0de5cfd 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1849,7 +1849,7 @@ config KALLSYMS_ALL
>  config KALLSYMS_ABSOLUTE_PERCPU
>         bool
>         depends on KALLSYMS
> -       default X86_64 && SMP
> +       default n
>
>  # end of the "standard kernel features (expert users)" menu
>
> --
> 2.47.0
>
Re: [PATCH v5 09/16] x86/percpu/64: Use relative percpu offsets
Posted by Brian Gerst 2 weeks, 2 days ago
On Thu, Nov 7, 2024 at 6:28 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 4:58 PM Brian Gerst <brgerst@gmail.com> wrote:
> >
> > The percpu section is currently linked at absolute address 0, because
> > older compilers hardcoded the stack protector canary value at a fixed
> > offset from the start of the GS segment.  Now that the canary is a
> > normal percpu variable, the percpu section does not need to be linked
> > at a specific address.
> >
> > x86-64 will now calculate the percpu offsets as the delta between the
> > initial percpu address and the dynamically allocated memory, like other
> > architectures.  Note that GSBASE is limited to the canonical address
> > width (48 or 57 bits, sign-extended).  As long as the kernel text,
> > modules, and the dynamically allocated percpu memmory are all in the
> > negative address space, the delta will not overflow this limit.
> >
> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
> > ---
> >  arch/x86/include/asm/processor.h |  6 +++++-
> >  arch/x86/kernel/head_64.S        | 19 +++++++++----------
> >  arch/x86/kernel/setup_percpu.c   | 12 ++----------
> >  arch/x86/kernel/vmlinux.lds.S    | 29 +----------------------------
> >  arch/x86/platform/pvh/head.S     |  5 ++---
> >  arch/x86/tools/relocs.c          | 10 +++-------
> >  arch/x86/xen/xen-head.S          |  9 ++++-----
> >  init/Kconfig                     |  2 +-
> >  8 files changed, 27 insertions(+), 65 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> > index a113c3f4f558..ae50d5d4fa26 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -428,7 +428,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
> >
> >  static inline unsigned long cpu_kernelmode_gs_base(int cpu)
> >  {
> > -       return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
> > +#ifdef CONFIG_SMP
> > +       return per_cpu_offset(cpu);
> > +#else
> > +       return 0;
> > +#endif
> >  }
> >
> >  extern asmlinkage void entry_SYSCALL32_ignore(void);
> > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> > index c3028b4df85f..ffbcb0aea450 100644
> > --- a/arch/x86/kernel/head_64.S
> > +++ b/arch/x86/kernel/head_64.S
> > @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
> >         /* Set up the stack for verify_cpu() */
> >         leaq    __top_init_kernel_stack(%rip), %rsp
> >
> > -       /* Setup GSBASE to allow stack canary access for C code */
> > +       /*
> > +        * Set up GSBASE.
> > +        * Note that, on SMP, the boot cpu uses init data section until
> > +        * the per cpu areas are set up.
> > +        */
> >         movl    $MSR_GS_BASE, %ecx
> > -       leaq    INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> > -       movl    %edx, %eax
> > -       shrq    $32,  %rdx
> > +       xorl    %eax, %eax
> > +       xorl    %edx, %edx
>
> You can use cltd after "xor %eax, %eax", it is one byte shorter with
> the same effect ...

I suppose that would work, but I'm not sure it's worth it to
hyper-optimize boot code like this.  It's also confusing since the SDM
calls this instruction CDQ instead of CLTD.

Brian Gerst
Re: [PATCH v5 09/16] x86/percpu/64: Use relative percpu offsets
Posted by Uros Bizjak 2 weeks, 2 days ago
On Thu, Nov 7, 2024 at 1:05 PM Brian Gerst <brgerst@gmail.com> wrote:
>
> On Thu, Nov 7, 2024 at 6:28 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Nov 5, 2024 at 4:58 PM Brian Gerst <brgerst@gmail.com> wrote:
> > >
> > > The percpu section is currently linked at absolute address 0, because
> > > older compilers hardcoded the stack protector canary value at a fixed
> > > offset from the start of the GS segment.  Now that the canary is a
> > > normal percpu variable, the percpu section does not need to be linked
> > > at a specific address.
> > >
> > > x86-64 will now calculate the percpu offsets as the delta between the
> > > initial percpu address and the dynamically allocated memory, like other
> > > architectures.  Note that GSBASE is limited to the canonical address
> > > width (48 or 57 bits, sign-extended).  As long as the kernel text,
> > > modules, and the dynamically allocated percpu memmory are all in the
> > > negative address space, the delta will not overflow this limit.
> > >
> > > Signed-off-by: Brian Gerst <brgerst@gmail.com>
> > > ---
> > >  arch/x86/include/asm/processor.h |  6 +++++-
> > >  arch/x86/kernel/head_64.S        | 19 +++++++++----------
> > >  arch/x86/kernel/setup_percpu.c   | 12 ++----------
> > >  arch/x86/kernel/vmlinux.lds.S    | 29 +----------------------------
> > >  arch/x86/platform/pvh/head.S     |  5 ++---
> > >  arch/x86/tools/relocs.c          | 10 +++-------
> > >  arch/x86/xen/xen-head.S          |  9 ++++-----
> > >  init/Kconfig                     |  2 +-
> > >  8 files changed, 27 insertions(+), 65 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> > > index a113c3f4f558..ae50d5d4fa26 100644
> > > --- a/arch/x86/include/asm/processor.h
> > > +++ b/arch/x86/include/asm/processor.h
> > > @@ -428,7 +428,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
> > >
> > >  static inline unsigned long cpu_kernelmode_gs_base(int cpu)
> > >  {
> > > -       return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
> > > +#ifdef CONFIG_SMP
> > > +       return per_cpu_offset(cpu);
> > > +#else
> > > +       return 0;
> > > +#endif
> > >  }
> > >
> > >  extern asmlinkage void entry_SYSCALL32_ignore(void);
> > > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> > > index c3028b4df85f..ffbcb0aea450 100644
> > > --- a/arch/x86/kernel/head_64.S
> > > +++ b/arch/x86/kernel/head_64.S
> > > @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
> > >         /* Set up the stack for verify_cpu() */
> > >         leaq    __top_init_kernel_stack(%rip), %rsp
> > >
> > > -       /* Setup GSBASE to allow stack canary access for C code */
> > > +       /*
> > > +        * Set up GSBASE.
> > > +        * Note that, on SMP, the boot cpu uses init data section until
> > > +        * the per cpu areas are set up.
> > > +        */
> > >         movl    $MSR_GS_BASE, %ecx
> > > -       leaq    INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> > > -       movl    %edx, %eax
> > > -       shrq    $32,  %rdx
> > > +       xorl    %eax, %eax
> > > +       xorl    %edx, %edx
> >
> > You can use cltd after "xor %eax, %eax", it is one byte shorter with
> > the same effect ...
>
> I suppose that would work, but I'm not sure it's worth it to
> hyper-optimize boot code like this.  It's also confusing since the SDM
> calls this instruction CDQ instead of CLTD.

No big deal, indeed.

Reviewed-by: Uros Bizjak <ubizjak@gmail.com>