[PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler

Hou Wenlong posted 43 patches 2 years, 9 months ago
[PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Hou Wenlong 2 years, 9 months ago
From: Brian Gerst <brgerst@gmail.com>

From: Brian Gerst <brgerst@gmail.com>

If the compiler supports it, use a standard per-cpu variable for the
stack protector instead of the old fixed location.  Keep the fixed
location code for compatibility with older compilers.

[Hou Wenlong: Disable it on Clang, adapt new code change and adapt
missing GS set up path in pvh_start_xen()]

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Co-developed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Cc: Thomas Garnier <thgarnie@chromium.org>
Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Cc: Kees Cook <keescook@chromium.org>
---
 arch/x86/Kconfig                      | 12 ++++++++++++
 arch/x86/Makefile                     | 21 ++++++++++++++-------
 arch/x86/entry/entry_64.S             |  6 +++++-
 arch/x86/include/asm/processor.h      | 17 ++++++++++++-----
 arch/x86/include/asm/stackprotector.h | 16 +++++++---------
 arch/x86/kernel/asm-offsets_64.c      |  2 +-
 arch/x86/kernel/cpu/common.c          | 15 +++++++--------
 arch/x86/kernel/head_64.S             | 16 ++++++++++------
 arch/x86/kernel/vmlinux.lds.S         |  4 +++-
 arch/x86/platform/pvh/head.S          |  8 ++++++++
 arch/x86/xen/xen-head.S               | 14 +++++++++-----
 11 files changed, 88 insertions(+), 43 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68e5da464b96..55cce8cdf9bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -410,6 +410,18 @@ config CC_HAS_SANE_STACKPROTECTOR
 	  the compiler produces broken code or if it does not let us control
 	  the segment on 32-bit kernels.
 
+config CC_HAS_CUSTOMIZED_STACKPROTECTOR
+	bool
+	# Although clang supports -mstack-protector-guard-reg option, it
+	# would generate GOT reference for __stack_chk_guard even with
+	# -fno-PIE flag.
+	default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))
+
+config STACKPROTECTOR_FIXED
+	bool
+	depends on X86_64 && STACKPROTECTOR
+	default !CC_HAS_CUSTOMIZED_STACKPROTECTOR
+
 menu "Processor type and features"
 
 config SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b39975977c03..57e4dbbf501d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -111,13 +111,7 @@ ifeq ($(CONFIG_X86_32),y)
         # temporary until string.h is fixed
         KBUILD_CFLAGS += -ffreestanding
 
-	ifeq ($(CONFIG_STACKPROTECTOR),y)
-		ifeq ($(CONFIG_SMP),y)
-			KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
-		else
-			KBUILD_CFLAGS += -mstack-protector-guard=global
-		endif
-	endif
+	percpu_seg := fs
 else
         BITS := 64
         UTS_MACHINE := x86_64
@@ -167,6 +161,19 @@ else
         KBUILD_CFLAGS += -mcmodel=kernel
         KBUILD_RUSTFLAGS += -Cno-redzone=y
         KBUILD_RUSTFLAGS += -Ccode-model=kernel
+
+	percpu_seg := gs
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+	ifneq ($(CONFIG_STACKPROTECTOR_FIXED),y)
+		ifeq ($(CONFIG_SMP),y)
+			KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) \
+					 -mstack-protector-guard-symbol=__stack_chk_guard
+		else
+			KBUILD_CFLAGS += -mstack-protector-guard=global
+		endif
+	endif
 endif
 
 #
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 6f2297ebb15f..df79b7aa65bb 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -229,6 +229,10 @@ SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
 	int3
 SYM_CODE_END(entry_SYSCALL_64)
 
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data + FIXED_stack_canary
+#endif
+
 /*
  * %rdi: prev task
  * %rsi: next task
@@ -252,7 +256,7 @@ SYM_FUNC_START(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
+	movq	%rbx, PER_CPU_VAR(__stack_chk_guard)
 #endif
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2a5ec5750ba7..3890f609569d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -379,6 +379,8 @@ struct irq_stack {
 } __aligned(IRQ_STACK_SIZE);
 
 #ifdef CONFIG_X86_64
+
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 struct fixed_percpu_data {
 	/*
 	 * GCC hardcodes the stack canary as %gs:40.  Since the
@@ -394,21 +396,26 @@ struct fixed_percpu_data {
 
 DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
 DECLARE_INIT_PER_CPU(fixed_percpu_data);
+#endif /* CONFIG_STACKPROTECTOR_FIXED */
 
 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 {
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#else
+#ifdef CONFIG_SMP
+	return per_cpu_offset(cpu);
+#else
+	return 0;
+#endif
+#endif
 }
 
 extern asmlinkage void ignore_sysret(void);
 
 /* Save actual FS/GS selectors and bases to current->thread */
 void current_save_fsgs(void);
-#else	/* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
-#endif	/* !X86_64 */
+#endif	/* X86_64 */
 
 struct perf_event;
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 00473a650f51..24aa0e2ad0dd 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -36,6 +36,12 @@
 
 #include <linux/sched.h>
 
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data.stack_canary
+#else
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
+#endif
+
 /*
  * Initialize the stackprotector canary value.
  *
@@ -51,25 +57,17 @@ static __always_inline void boot_init_stack_canary(void)
 {
 	unsigned long canary = get_random_canary();
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 	BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
 #endif
 
 	current->stack_canary = canary;
-#ifdef CONFIG_X86_64
-	this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
 	this_cpu_write(__stack_chk_guard, canary);
-#endif
 }
 
 static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 {
-#ifdef CONFIG_X86_64
-	per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
 	per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
 }
 
 #else	/* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bb65371ea9df..f39baf90126c 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,7 +56,7 @@ int main(void)
 
 	BLANK();
 
-#ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 	OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
 	BLANK();
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3ea06b0b4570..972b1babf731 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2051,10 +2051,6 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
 
 #ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
-		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-
 static void wrmsrl_cstar(unsigned long val)
 {
 	/*
@@ -2102,15 +2098,18 @@ void syscall_init(void)
 	       X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF|
 	       X86_EFLAGS_AC|X86_EFLAGS_ID);
 }
-
-#else	/* CONFIG_X86_64 */
+#endif	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
+#else
 DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
 EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
 #endif
-
-#endif	/* CONFIG_X86_64 */
+#endif
 
 /*
  * Clear all 6 debug registers:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 21f0556d3ac0..61f1873d0ff7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -68,7 +68,13 @@ SYM_CODE_START_NOALIGN(startup_64)
 
 	/* Setup GSBASE to allow stack canary access for C code */
 	movl	$MSR_GS_BASE, %ecx
+#if defined(CONFIG_STACKPROTECTOR_FIXED)
 	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#elif defined(CONFIG_SMP)
+	movabs	$__per_cpu_load, %rdx
+#else
+	xorl	%edx, %edx
+#endif
 	movl	%edx, %eax
 	shrq	$32,  %rdx
 	wrmsr
@@ -283,16 +289,14 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	movl %eax,%fs
 	movl %eax,%gs
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data. If the
-	 * stack protector canary is enabled, it is located at %gs:40.
+	/*
+	 * Set up GS base.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-#ifndef CONFIG_SMP
-	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#if !defined(CONFIG_SMP) && defined(CONFIG_STACKPROTECTOR_FIXED)
+	leaq	__per_cpu_load(%rip), %rdx
 #endif
 	movl	%edx, %eax
 	shrq	$32, %rdx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 25f155205770..f02dcde9f8a8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -500,12 +500,14 @@ SECTIONS
  */
 #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
 INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
 INIT_PER_CPU(irq_stack_backing_store);
 
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+INIT_PER_CPU(fixed_percpu_data);
 #ifdef CONFIG_SMP
 . = ASSERT((fixed_percpu_data == 0),
            "fixed_percpu_data is not at start of per-cpu area");
 #endif
+#endif
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index b093996b7e19..5842fe0e4f96 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -96,8 +96,16 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
 1:
 	/* Set base address in stack canary descriptor. */
 	mov $MSR_GS_BASE,%ecx
+#if defined(CONFIG_STACKPROTECTOR_FIXED)
 	mov $_pa(INIT_PER_CPU_VAR(fixed_percpu_data)), %eax
 	xor %edx, %edx
+#elif defined(CONFIG_SMP)
+	mov $__per_cpu_load, %rax
+	cdq
+#else
+	xor %eax, %eax
+	xor %edx, %edx
+#endif
 	wrmsr
 
 	call xen_prepare_pvh
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 643d02900fbb..09eaf59e8066 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -51,15 +51,19 @@ SYM_CODE_START(startup_xen)
 
 	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data.  If the
-	 * stack protector canary is enabled, it is located at %gs:40.
+	/*
+	 * Set up GS base.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
+#if defined(CONFIG_STACKPROTECTOR_FIXED)
+	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#elif defined(CONFIG_SMP)
+	movabs	$__per_cpu_load, %rdx
+#else
+	xorl	%eax, %eax
+#endif
 	cdq
 	wrmsr
 
-- 
2.31.1
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Juergen Gross 2 years, 9 months ago
On 28.04.23 11:50, Hou Wenlong wrote:
> From: Brian Gerst <brgerst@gmail.com>
> 
> From: Brian Gerst <brgerst@gmail.com>
> 
> If the compiler supports it, use a standard per-cpu variable for the
> stack protector instead of the old fixed location.  Keep the fixed
> location code for compatibility with older compilers.
> 
> [Hou Wenlong: Disable it on Clang, adapt new code change and adapt
> missing GS set up path in pvh_start_xen()]
> 
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> Co-developed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Cc: Thomas Garnier <thgarnie@chromium.org>
> Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> Cc: Kees Cook <keescook@chromium.org>
> ---
>   arch/x86/Kconfig                      | 12 ++++++++++++
>   arch/x86/Makefile                     | 21 ++++++++++++++-------
>   arch/x86/entry/entry_64.S             |  6 +++++-
>   arch/x86/include/asm/processor.h      | 17 ++++++++++++-----
>   arch/x86/include/asm/stackprotector.h | 16 +++++++---------
>   arch/x86/kernel/asm-offsets_64.c      |  2 +-
>   arch/x86/kernel/cpu/common.c          | 15 +++++++--------
>   arch/x86/kernel/head_64.S             | 16 ++++++++++------
>   arch/x86/kernel/vmlinux.lds.S         |  4 +++-
>   arch/x86/platform/pvh/head.S          |  8 ++++++++
>   arch/x86/xen/xen-head.S               | 14 +++++++++-----
>   11 files changed, 88 insertions(+), 43 deletions(-)
> 

...

> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 643d02900fbb..09eaf59e8066 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -51,15 +51,19 @@ SYM_CODE_START(startup_xen)
>   
>   	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
>   
> -	/* Set up %gs.
> -	 *
> -	 * The base of %gs always points to fixed_percpu_data.  If the
> -	 * stack protector canary is enabled, it is located at %gs:40.
> +	/*
> +	 * Set up GS base.
>   	 * Note that, on SMP, the boot cpu uses init data section until
>   	 * the per cpu areas are set up.
>   	 */
>   	movl	$MSR_GS_BASE,%ecx
> -	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
> +#if defined(CONFIG_STACKPROTECTOR_FIXED)
> +	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> +#elif defined(CONFIG_SMP)
> +	movabs	$__per_cpu_load, %rdx

Shouldn't above 2 targets be %rax?

> +#else
> +	xorl	%eax, %eax
> +#endif
>   	cdq
>   	wrmsr
>   


Juergen
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Hou Wenlong 2 years, 9 months ago
On Thu, May 04, 2023 at 12:31:59PM +0200, Juergen Gross wrote:
> On 28.04.23 11:50, Hou Wenlong wrote:
> >From: Brian Gerst <brgerst@gmail.com>
> >
> >From: Brian Gerst <brgerst@gmail.com>
> >
> >If the compiler supports it, use a standard per-cpu variable for the
> >stack protector instead of the old fixed location.  Keep the fixed
> >location code for compatibility with older compilers.
> >
> >[Hou Wenlong: Disable it on Clang, adapt new code change and adapt
> >missing GS set up path in pvh_start_xen()]
> >
> >Signed-off-by: Brian Gerst <brgerst@gmail.com>
> >Co-developed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> >Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> >Cc: Thomas Garnier <thgarnie@chromium.org>
> >Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> >Cc: Kees Cook <keescook@chromium.org>
> >---
> >  arch/x86/Kconfig                      | 12 ++++++++++++
> >  arch/x86/Makefile                     | 21 ++++++++++++++-------
> >  arch/x86/entry/entry_64.S             |  6 +++++-
> >  arch/x86/include/asm/processor.h      | 17 ++++++++++++-----
> >  arch/x86/include/asm/stackprotector.h | 16 +++++++---------
> >  arch/x86/kernel/asm-offsets_64.c      |  2 +-
> >  arch/x86/kernel/cpu/common.c          | 15 +++++++--------
> >  arch/x86/kernel/head_64.S             | 16 ++++++++++------
> >  arch/x86/kernel/vmlinux.lds.S         |  4 +++-
> >  arch/x86/platform/pvh/head.S          |  8 ++++++++
> >  arch/x86/xen/xen-head.S               | 14 +++++++++-----
> >  11 files changed, 88 insertions(+), 43 deletions(-)
> >
> 
> ...
> 
> >diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> >index 643d02900fbb..09eaf59e8066 100644
> >--- a/arch/x86/xen/xen-head.S
> >+++ b/arch/x86/xen/xen-head.S
> >@@ -51,15 +51,19 @@ SYM_CODE_START(startup_xen)
> >  	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
> >-	/* Set up %gs.
> >-	 *
> >-	 * The base of %gs always points to fixed_percpu_data.  If the
> >-	 * stack protector canary is enabled, it is located at %gs:40.
> >+	/*
> >+	 * Set up GS base.
> >  	 * Note that, on SMP, the boot cpu uses init data section until
> >  	 * the per cpu areas are set up.
> >  	 */
> >  	movl	$MSR_GS_BASE,%ecx
> >-	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
> >+#if defined(CONFIG_STACKPROTECTOR_FIXED)
> >+	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> >+#elif defined(CONFIG_SMP)
> >+	movabs	$__per_cpu_load, %rdx
> 
> Shouldn't above 2 targets be %rax?
>
Ah yes, my mistake. I didn't test it on XEN guest, sorry,
I'll test XEN guest before the next submission.

Thanks.

> >+#else
> >+	xorl	%eax, %eax
> >+#endif
> >  	cdq
> >  	wrmsr
> 
> 
> Juergen

> pub  2048R/28BF132F 2014-06-02 Juergen Gross <jg@pfupf.net>
> uid                            Juergen Gross <jgross@suse.com>
> uid                            Juergen Gross <jgross@novell.com>
> uid                            Juergen Gross <jgross@suse.de>
> sub  2048R/16375B53 2014-06-02
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Nick Desaulniers 2 years, 9 months ago
On Fri, Apr 28, 2023 at 2:52 AM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
>
> From: Brian Gerst <brgerst@gmail.com>
>
> From: Brian Gerst <brgerst@gmail.com>
>
> If the compiler supports it, use a standard per-cpu variable for the
> stack protector instead of the old fixed location.  Keep the fixed
> location code for compatibility with older compilers.
>
> [Hou Wenlong: Disable it on Clang, adapt new code change and adapt
> missing GS set up path in pvh_start_xen()]
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> Co-developed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Cc: Thomas Garnier <thgarnie@chromium.org>
> Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> Cc: Kees Cook <keescook@chromium.org>
> ---
>  arch/x86/Kconfig                      | 12 ++++++++++++
>  arch/x86/Makefile                     | 21 ++++++++++++++-------
>  arch/x86/entry/entry_64.S             |  6 +++++-
>  arch/x86/include/asm/processor.h      | 17 ++++++++++++-----
>  arch/x86/include/asm/stackprotector.h | 16 +++++++---------
>  arch/x86/kernel/asm-offsets_64.c      |  2 +-
>  arch/x86/kernel/cpu/common.c          | 15 +++++++--------
>  arch/x86/kernel/head_64.S             | 16 ++++++++++------
>  arch/x86/kernel/vmlinux.lds.S         |  4 +++-
>  arch/x86/platform/pvh/head.S          |  8 ++++++++
>  arch/x86/xen/xen-head.S               | 14 +++++++++-----
>  11 files changed, 88 insertions(+), 43 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 68e5da464b96..55cce8cdf9bd 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -410,6 +410,18 @@ config CC_HAS_SANE_STACKPROTECTOR
>           the compiler produces broken code or if it does not let us control
>           the segment on 32-bit kernels.
>
> +config CC_HAS_CUSTOMIZED_STACKPROTECTOR
> +       bool
> +       # Although clang supports -mstack-protector-guard-reg option, it
> +       # would generate GOT reference for __stack_chk_guard even with
> +       # -fno-PIE flag.
> +       default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))

Hi Hou,
I've filed this bug against LLVM and will work with LLVM folks at
Intel to resolve:
https://github.com/llvm/llvm-project/issues/62481
Can you please review that report and let me know here or there if I
missed anything? Would you also mind including a link to that in the
comments in the next version of this patch?

Less relevant issues I filed looking at some related codegen:
https://github.com/llvm/llvm-project/issues/62482
https://github.com/llvm/llvm-project/issues/62480

And we should probably look into:
https://github.com/llvm/llvm-project/issues/22476


-- 
Thanks,
~Nick Desaulniers
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Hou Wenlong 2 years, 9 months ago
On Tue, May 02, 2023 at 01:27:53AM +0800, Nick Desaulniers wrote:
> On Fri, Apr 28, 2023 at 2:52 AM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> >
> > From: Brian Gerst <brgerst@gmail.com>
> >
> > From: Brian Gerst <brgerst@gmail.com>
> >
> > If the compiler supports it, use a standard per-cpu variable for the
> > stack protector instead of the old fixed location.  Keep the fixed
> > location code for compatibility with older compilers.
> >
> > [Hou Wenlong: Disable it on Clang, adapt new code change and adapt
> > missing GS set up path in pvh_start_xen()]
> >
> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
> > Co-developed-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> > Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> > Cc: Thomas Garnier <thgarnie@chromium.org>
> > Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> > Cc: Kees Cook <keescook@chromium.org>
> > ---
> >  arch/x86/Kconfig                      | 12 ++++++++++++
> >  arch/x86/Makefile                     | 21 ++++++++++++++-------
> >  arch/x86/entry/entry_64.S             |  6 +++++-
> >  arch/x86/include/asm/processor.h      | 17 ++++++++++++-----
> >  arch/x86/include/asm/stackprotector.h | 16 +++++++---------
> >  arch/x86/kernel/asm-offsets_64.c      |  2 +-
> >  arch/x86/kernel/cpu/common.c          | 15 +++++++--------
> >  arch/x86/kernel/head_64.S             | 16 ++++++++++------
> >  arch/x86/kernel/vmlinux.lds.S         |  4 +++-
> >  arch/x86/platform/pvh/head.S          |  8 ++++++++
> >  arch/x86/xen/xen-head.S               | 14 +++++++++-----
> >  11 files changed, 88 insertions(+), 43 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 68e5da464b96..55cce8cdf9bd 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -410,6 +410,18 @@ config CC_HAS_SANE_STACKPROTECTOR
> >           the compiler produces broken code or if it does not let us control
> >           the segment on 32-bit kernels.
> >
> > +config CC_HAS_CUSTOMIZED_STACKPROTECTOR
> > +       bool
> > +       # Although clang supports -mstack-protector-guard-reg option, it
> > +       # would generate GOT reference for __stack_chk_guard even with
> > +       # -fno-PIE flag.
> > +       default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))
> 
> Hi Hou,
> I've filed this bug against LLVM and will work with LLVM folks at
> Intel to resolve:
> https://github.com/llvm/llvm-project/issues/62481
> Can you please review that report and let me know here or there if I
> missed anything? Would you also mind including a link to that in the
> comments in the next version of this patch?
> 
Hi Nick,

Thanks for your help, I'll include the link in the next version.
Actually, I had post an issue on github too when I test the patch on
LLVM. But no replies. :(.
https://github.com/llvm/llvm-project/issues/60116

There is another problem I met for this patch, some unexpected code
are generated:

do_one_initcall: (init/main.o)
......
movq    __stack_chk_guard(%rip), %rax
movq    %rax,0x2b0(%rsp)

The complier generates wrong instruction, no GOT reference and gs
register. I only see it in init/main.c file. I have tried to move the
function into another file and compiled it with same cflags. It could
generate right instruction for the function in another file.

The LLVM chain toolsare built by myself:
clang version 15.0.7 (https://github.com/llvm/llvm-project.git
8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)

> Less relevant issues I filed looking at some related codegen:
> https://github.com/llvm/llvm-project/issues/62482
> https://github.com/llvm/llvm-project/issues/62480
> 
> And we should probably look into:
> https://github.com/llvm/llvm-project/issues/22476
> 
>

Except for per-cpu stack canary patch, there is another issue I post on
github: https://github.com/llvm/llvm-project/issues/60096

The related patch is:
https://lore.kernel.org/lkml/175116f75c38c15d8d73a03301eab805fea13a0a.1682673543.git.houwenlong.hwl@antgroup.com/

I couldn't find the related documentation about that, hope you can help
me too.

One more problem that I didn't post is:
https://lore.kernel.org/lkml/8d6bbaf66b90cf1a8fd2c5da98f5e094b9ffcb27.1682673543.git.houwenlong.hwl@antgroup.com/

> -- 
> Thanks,
> ~Nick Desaulniers
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Nick Desaulniers 2 years, 9 months ago
On Thu, May 4, 2023 at 11:14 PM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
>
> On Tue, May 02, 2023 at 01:27:53AM +0800, Nick Desaulniers wrote:
> > On Fri, Apr 28, 2023 at 2:52 AM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> > >
> > > +config CC_HAS_CUSTOMIZED_STACKPROTECTOR
> > > +       bool
> > > +       # Although clang supports -mstack-protector-guard-reg option, it
> > > +       # would generate GOT reference for __stack_chk_guard even with
> > > +       # -fno-PIE flag.
> > > +       default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))
> >
> > Hi Hou,
> > I've filed this bug against LLVM and will work with LLVM folks at
> > Intel to resolve:
> > https://github.com/llvm/llvm-project/issues/62481
> > Can you please review that report and let me know here or there if I
> > missed anything? Would you also mind including a link to that in the
> > comments in the next version of this patch?
> >
> Hi Nick,
>
> Thanks for your help, I'll include the link in the next version.
> Actually, I had post an issue on github too when I test the patch on
> LLVM. But no replies. :(.

Ah, sorry about that.  The issue tracker is pretty high volume and
stuff gets missed.  With many users comes many bug reports.  We could
be better about triage though.  If it's specific to the Linux kernel,
https://github.com/ClangBuiltLinux/linux/issues is a better issue
tracker to use; we can move bug reports upstream to
https://github.com/llvm/llvm-project/issues/ when necessary. It's
linked off of clangbuiltlinux.github.io if you lose it.

> https://github.com/llvm/llvm-project/issues/60116
>
> There is another problem I met for this patch, some unexpected code
> are generated:
>
> do_one_initcall: (init/main.o)
> ......
> movq    __stack_chk_guard(%rip), %rax
> movq    %rax,0x2b0(%rsp)
>
> The complier generates wrong instruction, no GOT reference and gs
> register. I only see it in init/main.c file. I have tried to move the
> function into another file and compiled it with same cflags. It could
> generate right instruction for the function in another file.

The wrong instruction or the wrong operand?  This is loading the
canary into the stack slot in the fn prolog.  Perhaps the expected
cflag is not getting set (or being removed) from init/main.c? You
should be able to do:

$ make LLVM=1 init/main.o V=1

to see how clang was invoked to see if the expected flag was there, or not.

>
> The LLVM chain toolsare built by myself:
> clang version 15.0.7 (https://github.com/llvm/llvm-project.git
> 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)

Perhaps worth rebuilding with top of tree, which is clang 17.

>
> > Less relevant issues I filed looking at some related codegen:
> > https://github.com/llvm/llvm-project/issues/62482
> > https://github.com/llvm/llvm-project/issues/62480
> >
> > And we should probably look into:
> > https://github.com/llvm/llvm-project/issues/22476
> >
> >
>
> Except for per-cpu stack canary patch, there is another issue I post on
> github: https://github.com/llvm/llvm-project/issues/60096

Thanks, I'll bring that up with Intel, too.

>
> The related patch is:
> https://lore.kernel.org/lkml/175116f75c38c15d8d73a03301eab805fea13a0a.1682673543.git.houwenlong.hwl@antgroup.com/
>
> I couldn't find the related documentation about that, hope you can help
> me too.
>
> One more problem that I didn't post is:
> https://lore.kernel.org/lkml/8d6bbaf66b90cf1a8fd2c5da98f5e094b9ffcb27.1682673543.git.houwenlong.hwl@antgroup.com/

Mind filing another bug for this in llvm's issue tracker? We can
discuss there if LLD needs to be doing something different.

Thanks for uncovering these and helping us get them fixed up!
-- 
Thanks,
~Nick Desaulniers
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Hou Wenlong 2 years, 9 months ago
On Sat, May 06, 2023 at 02:02:25AM +0800, Nick Desaulniers wrote:
> On Thu, May 4, 2023 at 11:14 PM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> >
> > On Tue, May 02, 2023 at 01:27:53AM +0800, Nick Desaulniers wrote:
> > > On Fri, Apr 28, 2023 at 2:52 AM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> > > >
> > > > +config CC_HAS_CUSTOMIZED_STACKPROTECTOR
> > > > +       bool
> > > > +       # Although clang supports -mstack-protector-guard-reg option, it
> > > > +       # would generate GOT reference for __stack_chk_guard even with
> > > > +       # -fno-PIE flag.
> > > > +       default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))
> > >
> > > Hi Hou,
> > > I've filed this bug against LLVM and will work with LLVM folks at
> > > Intel to resolve:
> > > https://github.com/llvm/llvm-project/issues/62481
> > > Can you please review that report and let me know here or there if I
> > > missed anything? Would you also mind including a link to that in the
> > > comments in the next version of this patch?
> > >
> > Hi Nick,
> >
> > Thanks for your help, I'll include the link in the next version.
> > Actually, I had post an issue on github too when I test the patch on
> > LLVM. But no replies. :(.
> 
> Ah, sorry about that.  The issue tracker is pretty high volume and
> stuff gets missed.  With many users comes many bug reports.  We could
> be better about triage though.  If it's specific to the Linux kernel,
> https://github.com/ClangBuiltLinux/linux/issues is a better issue
> tracker to use; we can move bug reports upstream to
> https://github.com/llvm/llvm-project/issues/ when necessary. It's
> linked off of clangbuiltlinux.github.io if you lose it.
> 
> > https://github.com/llvm/llvm-project/issues/60116
> >
> > There is another problem I met for this patch, some unexpected code
> > are generated:
> >
> > do_one_initcall: (init/main.o)
> > ......
> > movq    __stack_chk_guard(%rip), %rax
> > movq    %rax,0x2b0(%rsp)
> >
> > The complier generates wrong instruction, no GOT reference and gs
> > register. I only see it in init/main.c file. I have tried to move the
> > function into another file and compiled it with same cflags. It could
> > generate right instruction for the function in another file.
> 
> The wrong instruction or the wrong operand?  This is loading the
> canary into the stack slot in the fn prolog.  Perhaps the expected
> cflag is not getting set (or being removed) from init/main.c? You
> should be able to do:
> 
> $ make LLVM=1 init/main.o V=1
> 
> to see how clang was invoked to see if the expected flag was there, or not.
>
Hi Nick,
The ouput is:
  clang -Wp,-MMD,init/.main.o.d  -nostdinc -I./arch/x86/include
-I./arch/x86/include/generated  -I./include -I./arch/x86/include/uapi
-I./arch/x86/include/generated/uapi -I./include/uapi
-I./include/generated/uapi -include ./include/linux/compiler-version.h
-include ./include/linux/kconfig.h -include
./include/linux/compiler_types.h -D__KERNEL__ -Werror
-fmacro-prefix-map=./= -Wall -Wundef -Werror=strict-prototypes
-Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE
-Werror=implicit-function-declaration -Werror=implicit-int
-Werror=return-type -Wno-format-security -funsigned-char -std=gnu11
--target=x86_64-linux-gnu -fintegrated-as -Werror=unknown-warning-option
-Werror=ignored-optimization-argument -Werror=option-ignored
-Werror=unused-command-line-argument -mno-sse -mno-mmx -mno-sse2
-mno-3dnow -mno-avx -fcf-protection=branch -fno-jump-tables -m64
-falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mstack-alignment=8
-mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel
-mstack-protector-guard-reg=gs
-mstack-protector-guard-symbol=__stack_chk_guard -Wno-sign-compare
-fno-asynchronous-unwind-tables -mretpoline-external-thunk
-mfunction-return=thunk-extern -fpatchable-function-entry=16,16
-fno-delete-null-pointer-checks -Wno-frame-address
-Wno-address-of-packed-member -O2 -Wframe-larger-than=2048
-fstack-protector-strong -Wno-gnu -Wno-unused-but-set-variable
-Wno-unused-const-variable -fomit-frame-pointer
-ftrivial-auto-var-init=zero
-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
-fno-stack-clash-protection -falign-functions=16
-Wdeclaration-after-statement -Wvla -Wno-pointer-sign
-Wcast-function-type -Wimplicit-fallthrough -fno-strict-overflow
-fno-stack-check -Werror=date-time -Werror=incompatible-pointer-types
-Wno-initializer-overrides -Wno-format -Wformat-extra-args
-Wformat-invalid-specifier -Wformat-zero-length -Wnonnull
-Wformat-insufficient-args -Wno-sign-compare -Wno-pointer-to-enum-cast
-Wno-tautological-constant-out-of-range-compare -Wno-unaligned-access
-fno-function-sections -fno-data-sections
-DKBUILD_MODFILE='"init/main"' -DKBUILD_BASENAME='"main"'
-DKBUILD_MODNAME='"main"' -D__KBUILD_MODNAME=kmod_main -c -o init/main.o
init/main.c

I see the expected flags in the ouput. But the generated code is wrong:
00000000000006e0 <do_one_initcall>: (init/main.o)
     .......
     6ff: 48 8b 05 00 00 00 00          movq    (%rip), %rax # 0x706 <do_one_initcall+0x26>
                0000000000000702:  R_X86_64_PC32 __stack_chk_guard-0x4
     706: 48 89 84 24 e0 02 00 00       movq    %rax, 736(%rsp)

The expected generated code should be:
0000000000000010 <name_to_dev_t>: (init/do_mounts.o)
      ......
      2c: 4c 8b 25 00 00 00 00          movq    (%rip), %r12 # 0x33 <name_to_dev_t+0x23>
                000000000000002f:  R_X86_64_REX_GOTPCRELX __stack_chk_guard-0x4
      33: 65 49 8b 04 24                movq    %gs:(%r12), %rax
      38: 48 89 44 24 30                movq    %rax, 48(%rsp)

Actually, this is the main reason why I disable per-cpu stack canary on
LLVM. This patch could be picked separately, if you have time to help me
find out the reason.

Thanks.
> >
> > The LLVM chain toolsare built by myself:
> > clang version 15.0.7 (https://github.com/llvm/llvm-project.git
> > 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
> 
> Perhaps worth rebuilding with top of tree, which is clang 17.
> 
> >
> > > Less relevant issues I filed looking at some related codegen:
> > > https://github.com/llvm/llvm-project/issues/62482
> > > https://github.com/llvm/llvm-project/issues/62480
> > >
> > > And we should probably look into:
> > > https://github.com/llvm/llvm-project/issues/22476
> > >
> > >
> >
> > Except for per-cpu stack canary patch, there is another issue I post on
> > github: https://github.com/llvm/llvm-project/issues/60096
> 
> Thanks, I'll bring that up with Intel, too.
> 
> >
> > The related patch is:
> > https://lore.kernel.org/lkml/175116f75c38c15d8d73a03301eab805fea13a0a.1682673543.git.houwenlong.hwl@antgroup.com/
> >
> > I couldn't find the related documentation about that, hope you can help
> > me too.
> >
> > One more problem that I didn't post is:
> > https://lore.kernel.org/lkml/8d6bbaf66b90cf1a8fd2c5da98f5e094b9ffcb27.1682673543.git.houwenlong.hwl@antgroup.com/
> 
> Mind filing another bug for this in llvm's issue tracker? We can
> discuss there if LLD needs to be doing something different.
> 
> Thanks for uncovering these and helping us get them fixed up!
> -- 
> Thanks,
> ~Nick Desaulniers
Re: [PATCH RFC 16/43] x86-64: Use per-cpu stack canary if supported by compiler
Posted by Fangrui Song 2 years, 9 months ago
On Fri, May 5, 2023 at 11:02 AM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Thu, May 4, 2023 at 11:14 PM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> >
> > On Tue, May 02, 2023 at 01:27:53AM +0800, Nick Desaulniers wrote:
> > > On Fri, Apr 28, 2023 at 2:52 AM Hou Wenlong <houwenlong.hwl@antgroup.com> wrote:
> > > >
> > > > +config CC_HAS_CUSTOMIZED_STACKPROTECTOR
> > > > +       bool
> > > > +       # Although clang supports -mstack-protector-guard-reg option, it
> > > > +       # would generate GOT reference for __stack_chk_guard even with
> > > > +       # -fno-PIE flag.
> > > > +       default y if (!CC_IS_CLANG && $(cc-option,-mstack-protector-guard-reg=gs))
> > >
> > > Hi Hou,
> > > I've filed this bug against LLVM and will work with LLVM folks at
> > > Intel to resolve:
> > > https://github.com/llvm/llvm-project/issues/62481
> > > Can you please review that report and let me know here or there if I
> > > missed anything? Would you also mind including a link to that in the
> > > comments in the next version of this patch?
> > >
> > Hi Nick,
> >
> > Thanks for your help, I'll include the link in the next version.
> > Actually, I had post an issue on github too when I test the patch on
> > LLVM. But no replies. :(.
>
> Ah, sorry about that.  The issue tracker is pretty high volume and
> stuff gets missed.  With many users comes many bug reports.  We could
> be better about triage though.  If it's specific to the Linux kernel,
> https://github.com/ClangBuiltLinux/linux/issues is a better issue
> tracker to use; we can move bug reports upstream to
> https://github.com/llvm/llvm-project/issues/ when necessary. It's
> linked off of clangbuiltlinux.github.io if you lose it.
>
> > https://github.com/llvm/llvm-project/issues/60116
> >
> > There is another problem I met for this patch, some unexpected code
> > are generated:
> >
> > do_one_initcall: (init/main.o)
> > ......
> > movq    __stack_chk_guard(%rip), %rax
> > movq    %rax,0x2b0(%rsp)
> >
> > The complier generates wrong instruction, no GOT reference and gs
> > register. I only see it in init/main.c file. I have tried to move the
> > function into another file and compiled it with same cflags. It could
> > generate right instruction for the function in another file.
>
> The wrong instruction or the wrong operand?  This is loading the
> canary into the stack slot in the fn prolog.  Perhaps the expected
> cflag is not getting set (or being removed) from init/main.c? You
> should be able to do:
>
> $ make LLVM=1 init/main.o V=1
>
> to see how clang was invoked to see if the expected flag was there, or not.
>
> >
> > The LLVM chain toolsare built by myself:
> > clang version 15.0.7 (https://github.com/llvm/llvm-project.git
> > 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
>
> Perhaps worth rebuilding with top of tree, which is clang 17.
>
> >
> > > Less relevant issues I filed looking at some related codegen:
> > > https://github.com/llvm/llvm-project/issues/62482
> > > https://github.com/llvm/llvm-project/issues/62480
> > >
> > > And we should probably look into:
> > > https://github.com/llvm/llvm-project/issues/22476
> > >
> > >
> >
> > Except for per-cpu stack canary patch, there is another issue I post on
> > github: https://github.com/llvm/llvm-project/issues/60096
>
> Thanks, I'll bring that up with Intel, too.
>
> >
> > The related patch is:
> > https://lore.kernel.org/lkml/175116f75c38c15d8d73a03301eab805fea13a0a.1682673543.git.houwenlong.hwl@antgroup.com/
> >
> > I couldn't find the related documentation about that, hope you can help
> > me too.
> >
> > One more problem that I didn't post is:
> > https://lore.kernel.org/lkml/8d6bbaf66b90cf1a8fd2c5da98f5e094b9ffcb27.1682673543.git.houwenlong.hwl@antgroup.com/
>
> Mind filing another bug for this in llvm's issue tracker? We can
> discuss there if LLD needs to be doing something different.
>
> Thanks for uncovering these and helping us get them fixed up!
> --
> Thanks,
> ~Nick Desaulniers

In my opinion, Clang's behavior is working as intended. The Linux
kernel should support R_X86_64_REX_GOTPCRELX, and the solution is
straightforward: treat R_X86_64_REX_GOTPCRELX the same way as
R_X86_64_PC32 (-shared -Bsymbolic), assuming that every symbol is
defined, which means that every symbol is non-preemptible.

Clang's `-fno-pic` option chooses `R_X86_64_REX_GOTPCRELX` which is
correct, although it differs from GCC's `-fno-pic` option.

The compiler doesn't know whether `__stack_chk_guard` will be provided
by the main executable (`libc.a`) or a shared object (`libc.so`,
available on some ports of glibc but not x86, on musl this is
available for all ports).
(Also see `__stack_chk_guard` on
https://maskray.me/blog/2022-12-18-control-flow-integrity)

If an `R_X86_64_32` relocation is used and `__stack_chk_guard` is
defined by a shared object, copy relocation.
We will need an ELF hack called [copy
relocation](https://maskray.me/blog/2021-01-09-copy-relocations-canonical-plt-entries-and-protected).

The instruction movq __stack_chk_guard@GOTPCREL(%rip), %rbx produces
an R_X86_64_REX_GOTPCRELX relocation.
If `__stack_chk_guard` is non-preemptible, linkers can [optimize the
access to be direct](https://maskray.me/blog/2021-08-29-all-about-global-offset-table#got-optimization).

Although we could technically use the
`-fno-direct-access-external-data` option to switch between
`R_X86_64_REX_GOTPCRELX` and `R_X86_64_32`, I think there is no
justification to complicate the compiler.



-- 
宋方睿