From: Ard Biesheuvel <ardb@kernel.org>
Specify the guard symbol for the stack cookie explicitly, rather than
positioning it exactly 40 bytes into the per-CPU area. Doing so removes
the need for the per-CPU region to be absolute rather than relative to
the placement of the per-CPU template region in the kernel image, and
this allows the special handling for absolute per-CPU symbols to be
removed entirely.
This is a worthwhile cleanup in itself, but it is also a prerequisite
for PIE codegen and PIE linking, which can replace our bespoke and
rather clunky runtime relocation handling.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/Makefile | 4 ++++
arch/x86/include/asm/init.h | 2 +-
arch/x86/include/asm/processor.h | 11 +++--------
arch/x86/include/asm/stackprotector.h | 4 ----
tools/perf/util/annotate.c | 4 ++--
5 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 6b3fe6e2aadd..b78b7623a4a9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -193,6 +193,10 @@ else
KBUILD_RUSTFLAGS += -Cno-redzone=y
KBUILD_RUSTFLAGS += -Ccode-model=kernel
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ KBUILD_CFLAGS += -mstack-protector-guard-symbol=fixed_percpu_data
+ endif
+
# Don't emit relaxable GOTPCREL relocations
KBUILD_AFLAGS_KERNEL += -Wa,-mrelax-relocations=no
KBUILD_CFLAGS_KERNEL += -Wa,-mrelax-relocations=no
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 14d72727d7ee..3ed0e8ec973f 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#define __head __section(".head.text")
+#define __head __section(".head.text") __no_stack_protector
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4a686f0e5dbf..56bc36116814 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -402,14 +402,9 @@ struct irq_stack {
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
/*
- * GCC hardcodes the stack canary as %gs:40. Since the
- * irq_stack is the object at %gs:0, we reserve the bottom
- * 48 bytes of the irq stack for the canary.
- *
- * Once we are willing to require -mstack-protector-guard-symbol=
- * support for x86_64 stackprotector, we can get rid of this.
+ * Since the irq_stack is the object at %gs:0, the bottom 8 bytes of
+ * the irq stack are reserved for the canary.
*/
- char gs_base[40];
unsigned long stack_canary;
};
@@ -418,7 +413,7 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+ return (unsigned long)&per_cpu(fixed_percpu_data, cpu);
}
extern asmlinkage void entry_SYSCALL32_ignore(void);
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 00473a650f51..d1dcd22a0a4c 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -51,10 +51,6 @@ static __always_inline void boot_init_stack_canary(void)
{
unsigned long canary = get_random_canary();
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
-
current->stack_canary = canary;
#ifdef CONFIG_X86_64
this_cpu_write(fixed_percpu_data.stack_canary, canary);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 37ce43c4eb8f..7ecfedf5edb9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2485,10 +2485,10 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl)
static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc)
{
- /* On x86_64, %gs:40 is used for stack canary */
+ /* On x86_64, %gs:0 is used for stack canary */
if (arch__is(arch, "x86")) {
if (loc->segment == INSN_SEG_X86_GS && loc->imm &&
- loc->offset == 40)
+ loc->offset == 0)
return true;
}
--
2.46.0.792.g87dc391469-goog
On Wed, Sep 25, 2024 at 5:02 PM Ard Biesheuvel <ardb+git@google.com> wrote: > > From: Ard Biesheuvel <ardb@kernel.org> > > Specify the guard symbol for the stack cookie explicitly, rather than > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > the need for the per-CPU region to be absolute rather than relative to > the placement of the per-CPU template region in the kernel image, and > this allows the special handling for absolute per-CPU symbols to be > removed entirely. > > This is a worthwhile cleanup in itself, but it is also a prerequisite > for PIE codegen and PIE linking, which can replace our bespoke and > rather clunky runtime relocation handling. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/x86/Makefile | 4 ++++ > arch/x86/include/asm/init.h | 2 +- > arch/x86/include/asm/processor.h | 11 +++-------- > arch/x86/include/asm/stackprotector.h | 4 ---- > tools/perf/util/annotate.c | 4 ++-- > 5 files changed, 10 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/Makefile b/arch/x86/Makefile > index 6b3fe6e2aadd..b78b7623a4a9 100644 > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -193,6 +193,10 @@ else > KBUILD_RUSTFLAGS += -Cno-redzone=y > KBUILD_RUSTFLAGS += -Ccode-model=kernel > > + ifeq ($(CONFIG_STACKPROTECTOR),y) > + KBUILD_CFLAGS += -mstack-protector-guard-symbol=fixed_percpu_data Looking at: > + * Since the irq_stack is the object at %gs:0, the bottom 8 bytes of > + * the irq stack are reserved for the canary. Please note that %gs:0 can also be achieved with -mstack-protector-guard-offset=0 Uros.
On Wed, Sep 25, 2024 at 5:02 PM Ard Biesheuvel <ardb+git@google.com> wrote: > > From: Ard Biesheuvel <ardb@kernel.org> > > Specify the guard symbol for the stack cookie explicitly, rather than > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > the need for the per-CPU region to be absolute rather than relative to > the placement of the per-CPU template region in the kernel image, and > this allows the special handling for absolute per-CPU symbols to be > removed entirely. > > This is a worthwhile cleanup in itself, but it is also a prerequisite > for PIE codegen and PIE linking, which can replace our bespoke and > rather clunky runtime relocation handling. I would like to point out a series that converted the stack protector guard symbol to a normal percpu variable [1], so there was no need to assume anything about the location of the guard symbol. [1] "[PATCH v4 00/16] x86-64: Stack protector and percpu improvements" https://lore.kernel.org/lkml/20240322165233.71698-1-brgerst@gmail.com/ Uros. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/x86/Makefile | 4 ++++ > arch/x86/include/asm/init.h | 2 +- > arch/x86/include/asm/processor.h | 11 +++-------- > arch/x86/include/asm/stackprotector.h | 4 ---- > tools/perf/util/annotate.c | 4 ++-- > 5 files changed, 10 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/Makefile b/arch/x86/Makefile > index 6b3fe6e2aadd..b78b7623a4a9 100644 > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -193,6 +193,10 @@ else > KBUILD_RUSTFLAGS += -Cno-redzone=y > KBUILD_RUSTFLAGS += -Ccode-model=kernel > > + ifeq ($(CONFIG_STACKPROTECTOR),y) > + KBUILD_CFLAGS += -mstack-protector-guard-symbol=fixed_percpu_data > + endif > + > # Don't emit relaxable GOTPCREL relocations > KBUILD_AFLAGS_KERNEL += -Wa,-mrelax-relocations=no > KBUILD_CFLAGS_KERNEL += -Wa,-mrelax-relocations=no > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h > index 14d72727d7ee..3ed0e8ec973f 100644 > --- a/arch/x86/include/asm/init.h > +++ b/arch/x86/include/asm/init.h > @@ -2,7 +2,7 @@ > #ifndef _ASM_X86_INIT_H > #define _ASM_X86_INIT_H > > -#define __head __section(".head.text") > +#define __head __section(".head.text") __no_stack_protector > > struct x86_mapping_info { > void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index 4a686f0e5dbf..56bc36116814 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -402,14 +402,9 @@ struct irq_stack { > #ifdef CONFIG_X86_64 > struct fixed_percpu_data { > /* > - * GCC hardcodes the stack canary as %gs:40. Since the > - * irq_stack is the object at %gs:0, we reserve the bottom > - * 48 bytes of the irq stack for the canary. > - * > - * Once we are willing to require -mstack-protector-guard-symbol= > - * support for x86_64 stackprotector, we can get rid of this. > + * Since the irq_stack is the object at %gs:0, the bottom 8 bytes of > + * the irq stack are reserved for the canary. > */ > - char gs_base[40]; > unsigned long stack_canary; > }; > > @@ -418,7 +413,7 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data); > > static inline unsigned long cpu_kernelmode_gs_base(int cpu) > { > - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); > + return (unsigned long)&per_cpu(fixed_percpu_data, cpu); > } > > extern asmlinkage void entry_SYSCALL32_ignore(void); > diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h > index 00473a650f51..d1dcd22a0a4c 100644 > --- a/arch/x86/include/asm/stackprotector.h > +++ b/arch/x86/include/asm/stackprotector.h > @@ -51,10 +51,6 @@ static __always_inline void boot_init_stack_canary(void) > { > unsigned long canary = get_random_canary(); > > -#ifdef CONFIG_X86_64 > - BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); > -#endif > - > current->stack_canary = canary; > #ifdef CONFIG_X86_64 > this_cpu_write(fixed_percpu_data.stack_canary, canary); > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > index 37ce43c4eb8f..7ecfedf5edb9 100644 > --- a/tools/perf/util/annotate.c > +++ b/tools/perf/util/annotate.c > @@ -2485,10 +2485,10 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) > > static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) > { > - /* On x86_64, %gs:40 is used for stack canary */ > + /* On x86_64, %gs:0 is used for stack canary */ > if (arch__is(arch, "x86")) { > if (loc->segment == INSN_SEG_X86_GS && loc->imm && > - loc->offset == 40) > + loc->offset == 0) > return true; > } > > -- > 2.46.0.792.g87dc391469-goog >
On Wed, Sep 25, 2024 at 2:33 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Wed, Sep 25, 2024 at 5:02 PM Ard Biesheuvel <ardb+git@google.com> wrote: > > > > From: Ard Biesheuvel <ardb@kernel.org> > > > > Specify the guard symbol for the stack cookie explicitly, rather than > > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > > the need for the per-CPU region to be absolute rather than relative to > > the placement of the per-CPU template region in the kernel image, and > > this allows the special handling for absolute per-CPU symbols to be > > removed entirely. > > > > This is a worthwhile cleanup in itself, but it is also a prerequisite > > for PIE codegen and PIE linking, which can replace our bespoke and > > rather clunky runtime relocation handling. > > I would like to point out a series that converted the stack protector > guard symbol to a normal percpu variable [1], so there was no need to > assume anything about the location of the guard symbol. > > [1] "[PATCH v4 00/16] x86-64: Stack protector and percpu improvements" > https://lore.kernel.org/lkml/20240322165233.71698-1-brgerst@gmail.com/ > > Uros. I plan on resubmitting that series sometime after the 6.12 merge window closes. As I recall from the last version, it was decided to wait until after the next LTS release to raise the minimum GCC version to 8.1 and avoid the need to be compatible with the old stack protector layout. Brian Gerst
On Sat, 28 Sept 2024 at 15:41, Brian Gerst <brgerst@gmail.com> wrote: > > On Wed, Sep 25, 2024 at 2:33 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Wed, Sep 25, 2024 at 5:02 PM Ard Biesheuvel <ardb+git@google.com> wrote: > > > > > > From: Ard Biesheuvel <ardb@kernel.org> > > > > > > Specify the guard symbol for the stack cookie explicitly, rather than > > > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > > > the need for the per-CPU region to be absolute rather than relative to > > > the placement of the per-CPU template region in the kernel image, and > > > this allows the special handling for absolute per-CPU symbols to be > > > removed entirely. > > > > > > This is a worthwhile cleanup in itself, but it is also a prerequisite > > > for PIE codegen and PIE linking, which can replace our bespoke and > > > rather clunky runtime relocation handling. > > > > I would like to point out a series that converted the stack protector > > guard symbol to a normal percpu variable [1], so there was no need to > > assume anything about the location of the guard symbol. > > > > [1] "[PATCH v4 00/16] x86-64: Stack protector and percpu improvements" > > https://lore.kernel.org/lkml/20240322165233.71698-1-brgerst@gmail.com/ > > > > Uros. > > I plan on resubmitting that series sometime after the 6.12 merge > window closes. As I recall from the last version, it was decided to > wait until after the next LTS release to raise the minimum GCC version > to 8.1 and avoid the need to be compatible with the old stack > protector layout. > Hi Brian, I'd be more than happy to compare notes on that - I wasn't aware of your intentions here, or I would have reached out before sending this RFC. There are two things that you would need to address for Clang support to work correctly: - the workaround I cc'ed you on the other day [0], - a workaround for the module loader so it tolerates the GOTPCRELX relocations that Clang emits [1] [0] https://lore.kernel.org/all/20241002092534.3163838-2-ardb+git@google.com/ [1] https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/commit/?id=a18121aabbdd
On Fri, Oct 4, 2024 at 9:15 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Sat, 28 Sept 2024 at 15:41, Brian Gerst <brgerst@gmail.com> wrote: > > > > On Wed, Sep 25, 2024 at 2:33 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Wed, Sep 25, 2024 at 5:02 PM Ard Biesheuvel <ardb+git@google.com> wrote: > > > > > > > > From: Ard Biesheuvel <ardb@kernel.org> > > > > > > > > Specify the guard symbol for the stack cookie explicitly, rather than > > > > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > > > > the need for the per-CPU region to be absolute rather than relative to > > > > the placement of the per-CPU template region in the kernel image, and > > > > this allows the special handling for absolute per-CPU symbols to be > > > > removed entirely. > > > > > > > > This is a worthwhile cleanup in itself, but it is also a prerequisite > > > > for PIE codegen and PIE linking, which can replace our bespoke and > > > > rather clunky runtime relocation handling. > > > > > > I would like to point out a series that converted the stack protector > > > guard symbol to a normal percpu variable [1], so there was no need to > > > assume anything about the location of the guard symbol. > > > > > > [1] "[PATCH v4 00/16] x86-64: Stack protector and percpu improvements" > > > https://lore.kernel.org/lkml/20240322165233.71698-1-brgerst@gmail.com/ > > > > > > Uros. > > > > I plan on resubmitting that series sometime after the 6.12 merge > > window closes. As I recall from the last version, it was decided to > > wait until after the next LTS release to raise the minimum GCC version > > to 8.1 and avoid the need to be compatible with the old stack > > protector layout. > > > > Hi Brian, > > I'd be more than happy to compare notes on that - I wasn't aware of > your intentions here, or I would have reached out before sending this > RFC. > > There are two things that you would need to address for Clang support > to work correctly: > - the workaround I cc'ed you on the other day [0], > - a workaround for the module loader so it tolerates the GOTPCRELX > relocations that Clang emits [1] > > > > [0] https://lore.kernel.org/all/20241002092534.3163838-2-ardb+git@google.com/ > [1] https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/commit/?id=a18121aabbdd The first patch should be applied independently as a bug fix, since it already affects the 32-bit build with clang. I don't have an environment with an older clang compiler to test the second patch, but I'll assume it will be necessary. I did run into an issue with the GOTPCRELX relocations before [1], but I thought it was just an objtool issue and didn't do more testing to know if modules were broken or not. Brian Gerst [1] https://lore.kernel.org/all/20231026160100.195099-6-brgerst@gmail.com/
On Wed, Sep 25, 2024 at 8:02 AM Ard Biesheuvel <ardb+git@google.com> wrote: > > From: Ard Biesheuvel <ardb@kernel.org> > > Specify the guard symbol for the stack cookie explicitly, rather than > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > the need for the per-CPU region to be absolute rather than relative to > the placement of the per-CPU template region in the kernel image, and > this allows the special handling for absolute per-CPU symbols to be > removed entirely. > > This is a worthwhile cleanup in itself, but it is also a prerequisite > for PIE codegen and PIE linking, which can replace our bespoke and > rather clunky runtime relocation handling. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/x86/Makefile | 4 ++++ > arch/x86/include/asm/init.h | 2 +- > arch/x86/include/asm/processor.h | 11 +++-------- > arch/x86/include/asm/stackprotector.h | 4 ---- > tools/perf/util/annotate.c | 4 ++-- > 5 files changed, 10 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/Makefile b/arch/x86/Makefile > index 6b3fe6e2aadd..b78b7623a4a9 100644 > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -193,6 +193,10 @@ else > KBUILD_RUSTFLAGS += -Cno-redzone=y > KBUILD_RUSTFLAGS += -Ccode-model=kernel > > + ifeq ($(CONFIG_STACKPROTECTOR),y) > + KBUILD_CFLAGS += -mstack-protector-guard-symbol=fixed_percpu_data > + endif > + > # Don't emit relaxable GOTPCREL relocations > KBUILD_AFLAGS_KERNEL += -Wa,-mrelax-relocations=no > KBUILD_CFLAGS_KERNEL += -Wa,-mrelax-relocations=no > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h > index 14d72727d7ee..3ed0e8ec973f 100644 > --- a/arch/x86/include/asm/init.h > +++ b/arch/x86/include/asm/init.h > @@ -2,7 +2,7 @@ > #ifndef _ASM_X86_INIT_H > #define _ASM_X86_INIT_H > > -#define __head __section(".head.text") > +#define __head __section(".head.text") __no_stack_protector > > struct x86_mapping_info { > void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index 4a686f0e5dbf..56bc36116814 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -402,14 +402,9 @@ struct irq_stack { > #ifdef CONFIG_X86_64 > struct fixed_percpu_data { > /* > - * GCC hardcodes the stack canary as %gs:40. Since the > - * irq_stack is the object at %gs:0, we reserve the bottom > - * 48 bytes of the irq stack for the canary. > - * > - * Once we are willing to require -mstack-protector-guard-symbol= > - * support for x86_64 stackprotector, we can get rid of this. > + * Since the irq_stack is the object at %gs:0, the bottom 8 bytes of > + * the irq stack are reserved for the canary. > */ > - char gs_base[40]; > unsigned long stack_canary; > }; > > @@ -418,7 +413,7 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data); > > static inline unsigned long cpu_kernelmode_gs_base(int cpu) > { > - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); > + return (unsigned long)&per_cpu(fixed_percpu_data, cpu); > } > > extern asmlinkage void entry_SYSCALL32_ignore(void); > diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h > index 00473a650f51..d1dcd22a0a4c 100644 > --- a/arch/x86/include/asm/stackprotector.h > +++ b/arch/x86/include/asm/stackprotector.h > @@ -51,10 +51,6 @@ static __always_inline void boot_init_stack_canary(void) > { > unsigned long canary = get_random_canary(); > > -#ifdef CONFIG_X86_64 > - BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); > -#endif > - > current->stack_canary = canary; > #ifdef CONFIG_X86_64 > this_cpu_write(fixed_percpu_data.stack_canary, canary); > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > index 37ce43c4eb8f..7ecfedf5edb9 100644 > --- a/tools/perf/util/annotate.c > +++ b/tools/perf/util/annotate.c > @@ -2485,10 +2485,10 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) > > static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) > { > - /* On x86_64, %gs:40 is used for stack canary */ > + /* On x86_64, %gs:0 is used for stack canary */ > if (arch__is(arch, "x86")) { > if (loc->segment == INSN_SEG_X86_GS && loc->imm && > - loc->offset == 40) > + loc->offset == 0) As a new perf tool can run on old kernels we may need to have this be something like: (loc->offset == 40 /* pre v6.xx kernels */ || loc->offset == 0 /* v6.xx and later */ ) We could make this dependent on the kernel by processing the os_release string: https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/env.h#n55 but that could well be more trouble than it is worth. Thanks, Ian > return true; > } > > -- > 2.46.0.792.g87dc391469-goog >
On Wed, 25 Sept 2024 at 17:54, Ian Rogers <irogers@google.com> wrote: > > On Wed, Sep 25, 2024 at 8:02 AM Ard Biesheuvel <ardb+git@google.com> wrote: > > > > From: Ard Biesheuvel <ardb@kernel.org> > > > > Specify the guard symbol for the stack cookie explicitly, rather than > > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > > the need for the per-CPU region to be absolute rather than relative to > > the placement of the per-CPU template region in the kernel image, and > > this allows the special handling for absolute per-CPU symbols to be > > removed entirely. > > > > This is a worthwhile cleanup in itself, but it is also a prerequisite > > for PIE codegen and PIE linking, which can replace our bespoke and > > rather clunky runtime relocation handling. > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > --- > > arch/x86/Makefile | 4 ++++ > > arch/x86/include/asm/init.h | 2 +- > > arch/x86/include/asm/processor.h | 11 +++-------- > > arch/x86/include/asm/stackprotector.h | 4 ---- > > tools/perf/util/annotate.c | 4 ++-- > > 5 files changed, 10 insertions(+), 15 deletions(-) > > ... > > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > > index 37ce43c4eb8f..7ecfedf5edb9 100644 > > --- a/tools/perf/util/annotate.c > > +++ b/tools/perf/util/annotate.c > > @@ -2485,10 +2485,10 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) > > > > static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) > > { > > - /* On x86_64, %gs:40 is used for stack canary */ > > + /* On x86_64, %gs:0 is used for stack canary */ > > if (arch__is(arch, "x86")) { > > if (loc->segment == INSN_SEG_X86_GS && loc->imm && > > - loc->offset == 40) > > + loc->offset == 0) > > As a new perf tool can run on old kernels we may need to have this be > something like: > (loc->offset == 40 /* pre v6.xx kernels */ || loc->offset == 0 /* > v6.xx and later */ ) > > We could make this dependent on the kernel by processing the os_release string: > https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/env.h#n55 > but that could well be more trouble than it is worth. > Yeah. I also wonder what the purpose of this feature is. At the end of this series, the stack cookie will no longer be at a fixed offset of %GS anyway, and so perf will not be able to identify it in the same manner. So it is probably better to just leave this in place, as the %gs:0 case will not exist in the field (assuming that the series lands all at once). Any idea why this deviates from other architectures? Is x86_64 the only arch that needs to identify stack canary accesses in perf? We could rename the symbol to something identifiable, and do it across all architectures, if this really serves a need (and assuming that perf has insight into the symbol table).
On Wed, Sep 25, 2024 at 10:43 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Wed, 25 Sept 2024 at 17:54, Ian Rogers <irogers@google.com> wrote: > > > > On Wed, Sep 25, 2024 at 8:02 AM Ard Biesheuvel <ardb+git@google.com> wrote: > > > > > > From: Ard Biesheuvel <ardb@kernel.org> > > > > > > Specify the guard symbol for the stack cookie explicitly, rather than > > > positioning it exactly 40 bytes into the per-CPU area. Doing so removes > > > the need for the per-CPU region to be absolute rather than relative to > > > the placement of the per-CPU template region in the kernel image, and > > > this allows the special handling for absolute per-CPU symbols to be > > > removed entirely. > > > > > > This is a worthwhile cleanup in itself, but it is also a prerequisite > > > for PIE codegen and PIE linking, which can replace our bespoke and > > > rather clunky runtime relocation handling. > > > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > > --- > > > arch/x86/Makefile | 4 ++++ > > > arch/x86/include/asm/init.h | 2 +- > > > arch/x86/include/asm/processor.h | 11 +++-------- > > > arch/x86/include/asm/stackprotector.h | 4 ---- > > > tools/perf/util/annotate.c | 4 ++-- > > > 5 files changed, 10 insertions(+), 15 deletions(-) > > > > ... > > > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > > > index 37ce43c4eb8f..7ecfedf5edb9 100644 > > > --- a/tools/perf/util/annotate.c > > > +++ b/tools/perf/util/annotate.c > > > @@ -2485,10 +2485,10 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) > > > > > > static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) > > > { > > > - /* On x86_64, %gs:40 is used for stack canary */ > > > + /* On x86_64, %gs:0 is used for stack canary */ > > > if (arch__is(arch, "x86")) { > > > if (loc->segment == INSN_SEG_X86_GS && loc->imm && > > > - loc->offset == 40) > > > + loc->offset == 0) > > > > As a new perf tool can run on old kernels we may need to have this be > > something like: > > (loc->offset == 40 /* pre v6.xx kernels */ || loc->offset == 0 /* > > v6.xx and later */ ) > > > > We could make this dependent on the kernel by processing the os_release string: > > https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/env.h#n55 > > but that could well be more trouble than it is worth. > > > > Yeah. I also wonder what the purpose of this feature is. At the end of > this series, the stack cookie will no longer be at a fixed offset of > %GS anyway, and so perf will not be able to identify it in the same > manner. So it is probably better to just leave this in place, as the > %gs:0 case will not exist in the field (assuming that the series lands > all at once). > > Any idea why this deviates from other architectures? Is x86_64 the > only arch that needs to identify stack canary accesses in perf? We > could rename the symbol to something identifiable, and do it across > all architectures, if this really serves a need (and assuming that > perf has insight into the symbol table). This is relatively new work coming from Namhyung for data type profiling and I believe is pretty much just x86 at the moment - although the ever awesome IBM made contributions for PowerPC. The data type profiling is trying to classify memory accesses which is why it cares about the stack canary instruction, the particular encoding shouldn't matter. Thanks, Ian
© 2016 - 2024 Red Hat, Inc.