Enable -ffunction-sections by default for the AutoFDO build.
With -ffunction-sections, the compiler places each function in its own
section named .text.function_name instead of placing all functions in
the .text section. In the AutoFDO build, this allows the linker to
utilize profile information to reorganize functions for improved
utilization of iCache and iTLB.
Co-developed-by: Han Shen <shenhan@google.com>
Signed-off-by: Han Shen <shenhan@google.com>
Signed-off-by: Rong Xu <xur@google.com>
Suggested-by: Sriraman Tallam <tmsriram@google.com>
---
include/asm-generic/vmlinux.lds.h | 37 ++++++++++++++++++++++++-------
scripts/Makefile.autofdo | 2 +-
2 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5df589c60401..ace617d1af9b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -95,18 +95,25 @@
* With LTO_CLANG, the linker also splits sections by default, so we need
* these macros to combine the sections during the final link.
*
+ * With LTO_CLANG, the linker also splits sections by default, so we need
+ * these macros to combine the sections during the final link.
+ *
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
-#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \
+defined(CONFIG_AUTOFDO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
+#else
+#define TEXT_MAIN .text
+#endif
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
#else
-#define TEXT_MAIN .text
#define DATA_MAIN .data
#define SDATA_MAIN .sdata
#define RODATA_MAIN .rodata
@@ -549,6 +556,20 @@
__cpuidle_text_end = .; \
__noinstr_text_end = .;
+#ifdef CONFIG_AUTOFDO_CLANG
+#define TEXT_HOT \
+ __hot_text_start = .; \
+ *(.text.hot .text.hot.*) \
+ __hot_text_end = .;
+#define TEXT_UNLIKELY \
+ __unlikely_text_start = .; \
+ *(.text.unlikely .text.unlikely.*) \
+ __unlikely_text_end = .;
+#else
+#define TEXT_HOT *(.text.hot .text.hot.*)
+#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*)
+#endif
+
/*
* .text section. Map to function alignment to avoid address changes
* during second ld run in second ld pass when generating System.map
@@ -557,30 +578,30 @@
* code elimination or function-section is enabled. Match these symbols
* first when in these builds.
*/
-#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \
+defined(CONFIG_AUTOFDO_CLANG)
#define TEXT_TEXT \
ALIGN_FUNCTION(); \
*(.text.asan.* .text.tsan.*) \
*(.text.unknown .text.unknown.*) \
- *(.text.unlikely .text.unlikely.*) \
+ TEXT_UNLIKELY \
. = ALIGN(PAGE_SIZE); \
- *(.text.hot .text.hot.*) \
+ TEXT_HOT \
*(TEXT_MAIN .text.fixup) \
NOINSTR_TEXT \
*(.ref.text)
#else
#define TEXT_TEXT \
ALIGN_FUNCTION(); \
- *(.text.hot .text.hot.*) \
+ TEXT_HOT \
*(TEXT_MAIN .text.fixup) \
- *(.text.unlikely .text.unlikely.*) \
+ TEXT_UNLIKELY \
*(.text.unknown .text.unknown.*) \
NOINSTR_TEXT \
*(.ref.text) \
*(.text.asan.* .text.tsan.*)
#endif
-
/* sched.text is aling to function alignment to secure we have same
* address even at second ld pass when generating System.map */
#define SCHED_TEXT \
diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo
index 1c9f224bc221..9c9a530ef090 100644
--- a/scripts/Makefile.autofdo
+++ b/scripts/Makefile.autofdo
@@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO
endif
ifdef CLANG_AUTOFDO_PROFILE
- CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE)
+ CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections
endif
ifdef CONFIG_LTO_CLANG_THIN
--
2.47.0.rc1.288.g06298d1525-goog
On Tue, Oct 15, 2024 at 6:33 AM Rong Xu <xur@google.com> wrote: > > Enable -ffunction-sections by default for the AutoFDO build. > > With -ffunction-sections, the compiler places each function in its own > section named .text.function_name instead of placing all functions in > the .text section. In the AutoFDO build, this allows the linker to > utilize profile information to reorganize functions for improved > utilization of iCache and iTLB. > > Co-developed-by: Han Shen <shenhan@google.com> > Signed-off-by: Han Shen <shenhan@google.com> > Signed-off-by: Rong Xu <xur@google.com> > Suggested-by: Sriraman Tallam <tmsriram@google.com> > --- > include/asm-generic/vmlinux.lds.h | 37 ++++++++++++++++++++++++------- > scripts/Makefile.autofdo | 2 +- > 2 files changed, 30 insertions(+), 9 deletions(-) > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > index 5df589c60401..ace617d1af9b 100644 > --- a/include/asm-generic/vmlinux.lds.h > +++ b/include/asm-generic/vmlinux.lds.h > @@ -95,18 +95,25 @@ > * With LTO_CLANG, the linker also splits sections by default, so we need > * these macros to combine the sections during the final link. > * > + * With LTO_CLANG, the linker also splits sections by default, so we need > + * these macros to combine the sections during the final link. > + * > * RODATA_MAIN is not used because existing code already defines .rodata.x > * sections to be brought in with rodata. > */ > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > +defined(CONFIG_AUTOFDO_CLANG) > #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* > +#else > +#define TEXT_MAIN .text > +#endif > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* > #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* > #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* > #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* > #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* > #else > -#define TEXT_MAIN .text > #define DATA_MAIN .data > #define SDATA_MAIN .sdata > #define RODATA_MAIN .rodata > @@ -549,6 +556,20 @@ > __cpuidle_text_end = .; \ > __noinstr_text_end = .; > > +#ifdef CONFIG_AUTOFDO_CLANG > +#define TEXT_HOT \ > + __hot_text_start = .; \ > + *(.text.hot .text.hot.*) \ > + __hot_text_end = .; > +#define TEXT_UNLIKELY \ > + __unlikely_text_start = .; \ > + *(.text.unlikely .text.unlikely.*) \ > + __unlikely_text_end = .; > +#else > +#define TEXT_HOT *(.text.hot .text.hot.*) > +#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*) > +#endif Again, why is this conditional? The only difference is *_start and *_end symbols are defined when CONFIG_AUTOFDO_CLANG=y. And, where are these symbols used? > + > /* > * .text section. Map to function alignment to avoid address changes > * during second ld run in second ld pass when generating System.map > @@ -557,30 +578,30 @@ > * code elimination or function-section is enabled. Match these symbols > * first when in these builds. > */ > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > +defined(CONFIG_AUTOFDO_CLANG) > #define TEXT_TEXT \ > ALIGN_FUNCTION(); \ > *(.text.asan.* .text.tsan.*) \ > *(.text.unknown .text.unknown.*) \ > - *(.text.unlikely .text.unlikely.*) \ > + TEXT_UNLIKELY \ > . = ALIGN(PAGE_SIZE); \ > - *(.text.hot .text.hot.*) \ > + TEXT_HOT \ > *(TEXT_MAIN .text.fixup) \ > NOINSTR_TEXT \ > *(.ref.text) > #else > #define TEXT_TEXT \ > ALIGN_FUNCTION(); \ > - *(.text.hot .text.hot.*) \ > + TEXT_HOT \ > *(TEXT_MAIN .text.fixup) \ > - *(.text.unlikely .text.unlikely.*) \ > + TEXT_UNLIKELY \ > *(.text.unknown .text.unknown.*) \ > NOINSTR_TEXT \ > *(.ref.text) \ > *(.text.asan.* .text.tsan.*) > #endif > > - > /* sched.text is aling to function alignment to secure we have same > * address even at second ld pass when generating System.map */ > #define SCHED_TEXT \ > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > index 1c9f224bc221..9c9a530ef090 100644 > --- a/scripts/Makefile.autofdo > +++ b/scripts/Makefile.autofdo > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > endif > > ifdef CLANG_AUTOFDO_PROFILE > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > endif > > ifdef CONFIG_LTO_CLANG_THIN > -- > 2.47.0.rc1.288.g06298d1525-goog > > -- Best Regards Masahiro Yamada
The answers are the same as the reply in [PATCH v4 5/6] On Sun, Oct 20, 2024 at 7:26 PM Masahiro Yamada <masahiroy@kernel.org> wrote: > > On Tue, Oct 15, 2024 at 6:33 AM Rong Xu <xur@google.com> wrote: > > > > Enable -ffunction-sections by default for the AutoFDO build. > > > > With -ffunction-sections, the compiler places each function in its own > > section named .text.function_name instead of placing all functions in > > the .text section. In the AutoFDO build, this allows the linker to > > utilize profile information to reorganize functions for improved > > utilization of iCache and iTLB. > > > > Co-developed-by: Han Shen <shenhan@google.com> > > Signed-off-by: Han Shen <shenhan@google.com> > > Signed-off-by: Rong Xu <xur@google.com> > > Suggested-by: Sriraman Tallam <tmsriram@google.com> > > --- > > include/asm-generic/vmlinux.lds.h | 37 ++++++++++++++++++++++++------- > > scripts/Makefile.autofdo | 2 +- > > 2 files changed, 30 insertions(+), 9 deletions(-) > > > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > > index 5df589c60401..ace617d1af9b 100644 > > --- a/include/asm-generic/vmlinux.lds.h > > +++ b/include/asm-generic/vmlinux.lds.h > > @@ -95,18 +95,25 @@ > > * With LTO_CLANG, the linker also splits sections by default, so we need > > * these macros to combine the sections during the final link. > > * > > + * With LTO_CLANG, the linker also splits sections by default, so we need > > + * these macros to combine the sections during the final link. > > + * > > * RODATA_MAIN is not used because existing code already defines .rodata.x > > * sections to be brought in with rodata. > > */ > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > +defined(CONFIG_AUTOFDO_CLANG) > > #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* > > +#else > > +#define TEXT_MAIN .text > > +#endif > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* > > #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* > > #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* > > #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* > > #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* > > #else > > -#define TEXT_MAIN .text > > #define DATA_MAIN .data > > #define SDATA_MAIN .sdata > > #define RODATA_MAIN .rodata > > @@ -549,6 +556,20 @@ > > __cpuidle_text_end = .; \ > > __noinstr_text_end = .; > > > > +#ifdef CONFIG_AUTOFDO_CLANG > > +#define TEXT_HOT \ > > + __hot_text_start = .; \ > > + *(.text.hot .text.hot.*) \ > > + __hot_text_end = .; > > +#define TEXT_UNLIKELY \ > > + __unlikely_text_start = .; \ > > + *(.text.unlikely .text.unlikely.*) \ > > + __unlikely_text_end = .; > > +#else > > +#define TEXT_HOT *(.text.hot .text.hot.*) > > +#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*) > > +#endif > > > > Again, why is this conditional? The condition is to ensure that we don't change the default kernel build by any means. The new code will introduce a few new symbols. > > > The only difference is *_start and *_end symbols are defined > when CONFIG_AUTOFDO_CLANG=y. > > And, where are these symbols used? These new symbols are currently unreferenced within the kernel source tree. However, they provide a valuable means of identifying hot and cold sections of text, and how large they are. I think they are useful information. > > > > > > > > > > > > > + > > /* > > * .text section. Map to function alignment to avoid address changes > > * during second ld run in second ld pass when generating System.map > > @@ -557,30 +578,30 @@ > > * code elimination or function-section is enabled. Match these symbols > > * first when in these builds. > > */ > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > +defined(CONFIG_AUTOFDO_CLANG) > > #define TEXT_TEXT \ > > ALIGN_FUNCTION(); \ > > *(.text.asan.* .text.tsan.*) \ > > *(.text.unknown .text.unknown.*) \ > > - *(.text.unlikely .text.unlikely.*) \ > > + TEXT_UNLIKELY \ > > . = ALIGN(PAGE_SIZE); \ > > - *(.text.hot .text.hot.*) \ > > + TEXT_HOT \ > > *(TEXT_MAIN .text.fixup) \ > > NOINSTR_TEXT \ > > *(.ref.text) > > #else > > #define TEXT_TEXT \ > > ALIGN_FUNCTION(); \ > > - *(.text.hot .text.hot.*) \ > > + TEXT_HOT \ > > *(TEXT_MAIN .text.fixup) \ > > - *(.text.unlikely .text.unlikely.*) \ > > + TEXT_UNLIKELY \ > > *(.text.unknown .text.unknown.*) \ > > NOINSTR_TEXT \ > > *(.ref.text) \ > > *(.text.asan.* .text.tsan.*) > > #endif > > > > - > > /* sched.text is aling to function alignment to secure we have same > > * address even at second ld pass when generating System.map */ > > #define SCHED_TEXT \ > > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > > index 1c9f224bc221..9c9a530ef090 100644 > > --- a/scripts/Makefile.autofdo > > +++ b/scripts/Makefile.autofdo > > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > > endif > > > > ifdef CLANG_AUTOFDO_PROFILE > > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > > endif > > > > ifdef CONFIG_LTO_CLANG_THIN > > -- > > 2.47.0.rc1.288.g06298d1525-goog > > > > > > > -- > Best Regards > Masahiro Yamada
On Tue, Oct 22, 2024 at 8:32 AM Rong Xu <xur@google.com> wrote: > > The answers are the same as the reply in [PATCH v4 5/6] > > > > > > Again, why is this conditional? > > The condition is to ensure that we don't change the default kernel > build by any means. The new code will introduce a few new symbols. > Same answer. I guess you prefer unmaintainable code because you are not a maintainer. > > > > The only difference is *_start and *_end symbols are defined > > when CONFIG_AUTOFDO_CLANG=y. > > > > And, where are these symbols used? > > These new symbols are currently unreferenced within the kernel source tree. > However, they provide a valuable means of identifying hot and cold > sections of text, and how large they are. I think they are useful information. OK, then you are doing unrelated changes to include/asm-generic/vmlinux.lds.h. This patch should touch only scripts/Makefile.autofdo If you want to insert *_start and *_end markers, you can add a separate patch, explaining your motivation. > > > > > > > > > > > > > > > > > > > > > > > > > > + > > > /* > > > * .text section. Map to function alignment to avoid address changes > > > * during second ld run in second ld pass when generating System.map > > > @@ -557,30 +578,30 @@ > > > * code elimination or function-section is enabled. Match these symbols > > > * first when in these builds. > > > */ > > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > > +defined(CONFIG_AUTOFDO_CLANG) > > > #define TEXT_TEXT \ > > > ALIGN_FUNCTION(); \ > > > *(.text.asan.* .text.tsan.*) \ > > > *(.text.unknown .text.unknown.*) \ > > > - *(.text.unlikely .text.unlikely.*) \ > > > + TEXT_UNLIKELY \ > > > . = ALIGN(PAGE_SIZE); \ > > > - *(.text.hot .text.hot.*) \ > > > + TEXT_HOT \ > > > *(TEXT_MAIN .text.fixup) \ > > > NOINSTR_TEXT \ > > > *(.ref.text) > > > #else > > > #define TEXT_TEXT \ > > > ALIGN_FUNCTION(); \ > > > - *(.text.hot .text.hot.*) \ > > > + TEXT_HOT \ > > > *(TEXT_MAIN .text.fixup) \ > > > - *(.text.unlikely .text.unlikely.*) \ > > > + TEXT_UNLIKELY \ > > > *(.text.unknown .text.unknown.*) \ > > > NOINSTR_TEXT \ > > > *(.ref.text) \ > > > *(.text.asan.* .text.tsan.*) > > > #endif > > > > > > - > > > /* sched.text is aling to function alignment to secure we have same > > > * address even at second ld pass when generating System.map */ > > > #define SCHED_TEXT \ > > > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > > > index 1c9f224bc221..9c9a530ef090 100644 > > > --- a/scripts/Makefile.autofdo > > > +++ b/scripts/Makefile.autofdo > > > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > > > endif > > > > > > ifdef CLANG_AUTOFDO_PROFILE > > > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > > > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > > > endif > > > > > > ifdef CONFIG_LTO_CLANG_THIN > > > -- > > > 2.47.0.rc1.288.g06298d1525-goog > > > > > > > > > > > > -- > > Best Regards > > Masahiro Yamada > -- Best Regards Masahiro Yamada
© 2016 - 2024 Red Hat, Inc.