MAINTAINERS | 5 +++ Makefile | 40 ++++++++++++++++++++--- arch/Kconfig | 12 +++++++ scripts/Makefile.build | 52 +++++++++++++++++++++++++++--- scripts/Makefile.lib | 7 +++- scripts/Makefile.vmlinux_o | 16 +++++++--- scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++ scripts/head-object-list.txt | 1 + 8 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 scripts/Makefile.vmlinux_thinlink
From: Rong Xu <xur@google.com>
Add distributed ThinLTO build support for the Linux kernel.
This new mode offers several advantages: (1) Increased
flexibility in handling user-specified build options.
(2) Improved user-friendliness for developers. (3) Greater
convenience for integrating with objtool and livepatch.
Note that "distributed" in this context refers to a term
that differentiates in-process ThinLTO builds by invoking
backend compilation through the linker, not necessarily
building in distributed environments.
Distributed ThinLTO is enabled via the
`CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example:
> make LLVM=1 defconfig
> scripts/config -e LTO_CLANG_THIN_DIST
> make LLVM=1 oldconfig
> make LLVM=1 vmlinux -j <..>
The implementation changes the top-level Makefile with a
macro for generating `vmlinux.o` for distributed ThinLTO
builds. It uses the existing Kbuild infrastructure to
perform two recursive passes through the subdirectories.
The first pass generates LLVM IR object files, similar to
in-process ThinLTO. Following the thin-link stage, a second
pass compiles these IR files into the final native object
files. The build rules and actions for this two-pass process
are primarily implemented in `scripts/Makefile.build`.
Currently, this patch focuses on building the main kernel
image (`vmlinux`) only. Support for building kernel modules
using this method is planned for a subsequent patch.
Tested on the following arch: x86, arm64, loongarch, and
riscv.
Some implementation details can be found here:
https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934
Signed-off-by: Rong Xu <xur@google.com>
---
MAINTAINERS | 5 +++
Makefile | 40 ++++++++++++++++++++---
arch/Kconfig | 12 +++++++
scripts/Makefile.build | 52 +++++++++++++++++++++++++++---
scripts/Makefile.lib | 7 +++-
scripts/Makefile.vmlinux_o | 16 +++++++---
scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++
scripts/head-object-list.txt | 1 +
8 files changed, 171 insertions(+), 15 deletions(-)
create mode 100644 scripts/Makefile.vmlinux_thinlink
diff --git a/MAINTAINERS b/MAINTAINERS
index c966e71ea60bb..40d3bc223e4b0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5782,6 +5782,11 @@ F: scripts/Makefile.clang
F: scripts/clang-tools/
K: \b(?i:clang|llvm)\b
+CLANG/LLVM THINLTO DISTRIBUTED BUILD
+M: Rong Xu <xur@google.com>
+S: Supported
+F: scripts/Makefile.vmlinux_thinlink
+
CLK API
M: Russell King <linux@armlinux.org.uk>
L: linux-clk@vger.kernel.org
diff --git a/Makefile b/Makefile
index e65f8735c7bf6..18da9a40dd2c8 100644
--- a/Makefile
+++ b/Makefile
@@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \
outputmakefile rustavailable rustfmt rustfmtcheck
no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \
image_name
-single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/
+single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.final_o \
+ %.final_a %.o.thinlto.bc %/
config-build :=
mixed-build :=
@@ -991,10 +992,10 @@ export CC_FLAGS_SCS
endif
ifdef CONFIG_LTO_CLANG
-ifdef CONFIG_LTO_CLANG_THIN
-CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
-else
+ifdef CONFIG_LTO_CLANG_FULL
CC_FLAGS_LTO := -flto
+else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST
+CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
endif
CC_FLAGS_LTO += -fvisibility=hidden
@@ -1218,8 +1219,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
$(call if_changed,ar_vmlinux.a)
PHONY += vmlinux_o
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink
+targets += vmlinux.thinlink
+
+vmlinux_final_a := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_OBJS))
+quiet_cmd_ar_vmlinux.final_a = AR $@
+ cmd_ar_vmlinux.final_a = \
+ rm -f $@; \
+ $(AR) cDPrST $@ $(vmlinux_final_a); \
+ $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt)
+
+define rule_gen_vmlinux_final_a
+ +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.final_a
+ $(call cmd_and_savecmd,ar_vmlinux.final_a)
+endef
+
+vmlinux.final_a: vmlinux.thinlink scripts/head-object-list.txt FORCE
+ $(call if_changed_rule,gen_vmlinux_final_a)
+
+targets += vmlinux.final_a
+
+vmlinux_o: vmlinux.final_a
+ $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o
+else
vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS)
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o
+endif
vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o
@:
@@ -1577,7 +1604,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \
modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
compile_commands.json rust/test \
rust-project.json .vmlinux.objs .vmlinux.export.c \
- .builtin-dtbs-list .builtin-dtb.S
+ .builtin-dtbs-list .builtin-dtb.S \
+ .vmlinux_thinlto_bc_files vmlinux.thinlink
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
@@ -2028,6 +2056,8 @@ clean: $(clean-dirs)
-o -name '*.symtypes' -o -name 'modules.order' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
+ -o -name '*.final_a' -o -name '*.final_o' \
+ -o -name '*.o.thinlto.bc' \
-o -name '*.gcno' \
\) -type f -print \
-o -name '.tmp_*' -print \
diff --git a/arch/Kconfig b/arch/Kconfig
index b0adb665041f1..cbeeeb9b076d8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -810,6 +810,18 @@ config LTO_CLANG_THIN
https://clang.llvm.org/docs/ThinLTO.html
If unsure, say Y.
+
+config LTO_CLANG_THIN_DIST
+ bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)"
+ depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
+ select LTO_CLANG
+ help
+ This option enables Clang's ThinLTO in distributed build mode.
+ In this mode, the linker performs the thin-link, generating
+ ThinLTO index files. Subsequently, the build system explicitly
+ invokes ThinLTO backend compilation using these index files
+ and pre-linked IR objects. The resulting final object files
+ are with the .final_o suffix.
endchoice
config ARCH_SUPPORTS_AUTOFDO_CLANG
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 13dcd86e74ca8..87259b0e2bfc8 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -50,18 +50,23 @@ endif
# ===========================================================================
+builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a)
+ifeq ($(thinlto_final_pass),1)
+builtin_suffix := final_a
+endif
+
# subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...)
-subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y)))
+subdir-builtin := $(sort $(filter %/built-in.$(builtin_suffix), $(real-obj-y)))
subdir-modorder := $(sort $(filter %/modules.order, $(obj-m)))
targets-for-builtin := $(extra-y)
ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),)
-targets-for-builtin += $(obj)/lib.a
+targets-for-builtin += $(obj)/lib.$(builtin_suffix)
endif
ifdef need-builtin
-targets-for-builtin += $(obj)/built-in.a
+targets-for-builtin += $(obj)/built-in.$(builtin_suffix)
endif
targets-for-modules := $(foreach x, o mod, \
@@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE
targets += $(filter-out $(subdir-builtin), $(real-obj-y))
targets += $(filter-out $(subdir-modorder), $(real-obj-m))
targets += $(lib-y) $(always-y)
+ifeq ($(builtin_suffix), final_a)
+final_o_targets = $(patsubst,%.o,%.final_o,$(targets))
+targets += $(final_targets)
+endif
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
@@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@
$(obj)/%.lds: $(src)/%.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+# Generate .final_o (obj) from .o (bitcode) file
+# ---------------------------------------------------------------------------
+quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@
+
+cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \
+ $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \
+ -flto=thin, $(c_flags)) \
+ -Wno-unused-command-line-argument \
+ -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \
+ $(if $(findstring ../,$<), \
+ $$(realpath --relative-to=$(srcroot) $<), $<), \
+ cp $< $@)
+
+$(obj)/%.final_o: $(obj)/%.o FORCE
+ $(call if_changed,cc_o_bc)
+endif
+
# ASN.1 grammar
# ---------------------------------------------------------------------------
quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch]
@@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
# ---------------------------------------------------------------------------
# To build objects in subdirs, we need to descend into the directories
-$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ;
+$(subdir-builtin): $(obj)/%/built-in.$(builtin_suffix): $(obj)/% ;
$(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
#
@@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@
$(obj)/built-in.a: $(real-obj-y) FORCE
$(call if_changed,ar_builtin)
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+# Rule to compile a set of .final_o files into one .final_a file.
+$(obj)/built-in.final_a: $(patsubst %.o,%.final_o,$(real-obj-y)) FORCE
+ $(call if_changed,ar_builtin)
+endif
+
# This is a list of build artifacts from the current Makefile and its
# sub-directories. The timestamp should be updated when any of the member files.
@@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE
$(obj)/lib.a: $(lib-y) FORCE
$(call if_changed,ar)
+ifdef CONFIG_LTO_CLANG_THIN_DIST
+quiet_cmd_ar_final = AR $@
+ cmd_ar_final = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.final_o,$(real-prereqs))
+
+$(obj)/lib.final_a: $(patsubst %.o,%.final_o,$(lib-y)) FORCE
+ $(call if_changed,ar_final)
+endif
+
quiet_cmd_ld_multi_m = LD [M] $@
cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool)
@@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs)
PHONY += $(subdir-ym)
$(subdir-ym):
$(Q)$(MAKE) $(build)=$@ \
- need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \
+ need-builtin=$(if $(filter $@/built-in.$(builtin_suffix), $(subdir-builtin)),1) \
+ thinlto_final_pass=$(if $(filter final_a, $(builtin_suffix)),1) \
need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \
$(filter $@/%, $(single-subdir-goals))
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 2fe73cda0bddb..ce341ed8d1df3 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -34,8 +34,13 @@ else
obj-m := $(filter-out %/, $(obj-m))
endif
+builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a)
+ifeq ($(thinlto_final_pass),1)
+ builtin_suffix := final_a
+endif
+
ifdef need-builtin
-obj-y := $(patsubst %/, %/built-in.a, $(obj-y))
+obj-y := $(patsubst %/, %/built-in.$(builtin_suffix), $(obj-y))
else
obj-y := $(filter-out %/, $(obj-y))
endif
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 938c7457717ea..4f04eb2c11412 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include
# for objtool
include $(srctree)/scripts/Makefile.lib
+ifeq ($(thinlto_final_pass),1)
+vmlinux_a := vmlinux.final_a
+vmlinux_libs := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_LIBS))
+else
+vmlinux_a := vmlinux.a
+vmlinux_libs := $(KBUILD_VMLINUX_LIBS)
+endif
+
# Generate a linker script to ensure correct ordering of initcalls for Clang LTO
# ---------------------------------------------------------------------------
@@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@
$(PERL) $(real-prereqs) > $@
.tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \
- vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
+ $(vmlinux_a) $(vmlinux_libs) FORCE
$(call if_changed,gen_initcalls_lds)
targets := .tmp_initcalls.lds
@@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@
$(LD) ${KBUILD_LDFLAGS} -r -o $@ \
$(vmlinux-o-ld-args-y) \
$(addprefix -T , $(initcalls-lds)) \
- --whole-archive vmlinux.a --no-whole-archive \
- --start-group $(KBUILD_VMLINUX_LIBS) --end-group \
+ --whole-archive $(vmlinux_a) --no-whole-archive \
+ --start-group $(vmlinux_libs) --end-group \
$(cmd_objtool)
define rule_ld_vmlinux.o
@@ -68,7 +76,7 @@ define rule_ld_vmlinux.o
$(call cmd,gen_objtooldep)
endef
-vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
+vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE
$(call if_changed_rule,ld_vmlinux.o)
targets += vmlinux.o
diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink
new file mode 100644
index 0000000000000..13e4026c7d45b
--- /dev/null
+++ b/scripts/Makefile.vmlinux_thinlink
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+PHONY := __default
+__default: vmlinux.thinlink
+
+include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+
+# Generate a linker script to ensure correct ordering of initcalls for Clang LTO
+# ---------------------------------------------------------------------------
+
+quiet_cmd_gen_initcalls_lds = GEN $@
+ cmd_gen_initcalls_lds = \
+ $(PYTHON3) $(srctree)/scripts/jobserver-exec \
+ $(PERL) $(real-prereqs) > $@
+
+.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \
+ vmlinux.a FORCE
+ $(call if_changed,gen_initcalls_lds)
+
+targets := .tmp_initcalls_thinlink.lds
+
+initcalls-lds := .tmp_initcalls_thinlink.lds
+
+quiet_cmd_ld_vmlinux.thinlink = LD $@
+ cmd_ld_vmlinux.thinlink = \
+ $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \
+ $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \
+ --thinlto-index-only @.vmlinux_thinlto_bc_files; \
+ touch vmlinux.thinlink
+
+vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE
+ $(call if_changed,ld_vmlinux.thinlink)
+
+targets += vmlinux.thinlink
+
+# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
+# ---------------------------------------------------------------------------
+
+PHONY += FORCE
+FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
+.PHONY: $(PHONY)
diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt
index 7274dfc65af60..b300222e99a6b 100644
--- a/scripts/head-object-list.txt
+++ b/scripts/head-object-list.txt
@@ -18,6 +18,7 @@ arch/arm/kernel/head.o
arch/csky/kernel/head.o
arch/hexagon/kernel/head.o
arch/loongarch/kernel/head.o
+arch/loongarch/kernel/head.final_o
arch/m68k/68000/head.o
arch/m68k/coldfire/head.o
arch/m68k/kernel/head.o
--
2.49.0.805.g082f7c87e0-goog
Just wanted to gently follow up on this patch that I sent previously.
From a toolchain and compiler perspective, I genuinely believe that
kernel ThinLTO builds should use this. I really want to get some feedback
and reviews on this patch.
Thanks,
-Rong
On Sat, Apr 19, 2025 at 6:02 PM <xur@google.com> wrote:
>
> From: Rong Xu <xur@google.com>
>
> Add distributed ThinLTO build support for the Linux kernel.
> This new mode offers several advantages: (1) Increased
> flexibility in handling user-specified build options.
> (2) Improved user-friendliness for developers. (3) Greater
> convenience for integrating with objtool and livepatch.
>
> Note that "distributed" in this context refers to a term
> that differentiates in-process ThinLTO builds by invoking
> backend compilation through the linker, not necessarily
> building in distributed environments.
>
> Distributed ThinLTO is enabled via the
> `CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example:
> > make LLVM=1 defconfig
> > scripts/config -e LTO_CLANG_THIN_DIST
> > make LLVM=1 oldconfig
> > make LLVM=1 vmlinux -j <..>
>
> The implementation changes the top-level Makefile with a
> macro for generating `vmlinux.o` for distributed ThinLTO
> builds. It uses the existing Kbuild infrastructure to
> perform two recursive passes through the subdirectories.
> The first pass generates LLVM IR object files, similar to
> in-process ThinLTO. Following the thin-link stage, a second
> pass compiles these IR files into the final native object
> files. The build rules and actions for this two-pass process
> are primarily implemented in `scripts/Makefile.build`.
>
> Currently, this patch focuses on building the main kernel
> image (`vmlinux`) only. Support for building kernel modules
> using this method is planned for a subsequent patch.
>
> Tested on the following arch: x86, arm64, loongarch, and
> riscv.
>
> Some implementation details can be found here:
> https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934
>
> Signed-off-by: Rong Xu <xur@google.com>
> ---
> MAINTAINERS | 5 +++
> Makefile | 40 ++++++++++++++++++++---
> arch/Kconfig | 12 +++++++
> scripts/Makefile.build | 52 +++++++++++++++++++++++++++---
> scripts/Makefile.lib | 7 +++-
> scripts/Makefile.vmlinux_o | 16 +++++++---
> scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++
> scripts/head-object-list.txt | 1 +
> 8 files changed, 171 insertions(+), 15 deletions(-)
> create mode 100644 scripts/Makefile.vmlinux_thinlink
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c966e71ea60bb..40d3bc223e4b0 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -5782,6 +5782,11 @@ F: scripts/Makefile.clang
> F: scripts/clang-tools/
> K: \b(?i:clang|llvm)\b
>
> +CLANG/LLVM THINLTO DISTRIBUTED BUILD
> +M: Rong Xu <xur@google.com>
> +S: Supported
> +F: scripts/Makefile.vmlinux_thinlink
> +
> CLK API
> M: Russell King <linux@armlinux.org.uk>
> L: linux-clk@vger.kernel.org
> diff --git a/Makefile b/Makefile
> index e65f8735c7bf6..18da9a40dd2c8 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \
> outputmakefile rustavailable rustfmt rustfmtcheck
> no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \
> image_name
> -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/
> +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.final_o \
> + %.final_a %.o.thinlto.bc %/
>
> config-build :=
> mixed-build :=
> @@ -991,10 +992,10 @@ export CC_FLAGS_SCS
> endif
>
> ifdef CONFIG_LTO_CLANG
> -ifdef CONFIG_LTO_CLANG_THIN
> -CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
> -else
> +ifdef CONFIG_LTO_CLANG_FULL
> CC_FLAGS_LTO := -flto
> +else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST
> +CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
> endif
> CC_FLAGS_LTO += -fvisibility=hidden
>
> @@ -1218,8 +1219,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
> $(call if_changed,ar_vmlinux.a)
>
> PHONY += vmlinux_o
> +ifdef CONFIG_LTO_CLANG_THIN_DIST
> +vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
> + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink
> +targets += vmlinux.thinlink
> +
> +vmlinux_final_a := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_OBJS))
> +quiet_cmd_ar_vmlinux.final_a = AR $@
> + cmd_ar_vmlinux.final_a = \
> + rm -f $@; \
> + $(AR) cDPrST $@ $(vmlinux_final_a); \
> + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt)
> +
> +define rule_gen_vmlinux_final_a
> + +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.final_a
> + $(call cmd_and_savecmd,ar_vmlinux.final_a)
> +endef
> +
> +vmlinux.final_a: vmlinux.thinlink scripts/head-object-list.txt FORCE
> + $(call if_changed_rule,gen_vmlinux_final_a)
> +
> +targets += vmlinux.final_a
> +
> +vmlinux_o: vmlinux.final_a
> + $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o
> +else
> vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS)
> $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o
> +endif
>
> vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o
> @:
> @@ -1577,7 +1604,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \
> modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \
> compile_commands.json rust/test \
> rust-project.json .vmlinux.objs .vmlinux.export.c \
> - .builtin-dtbs-list .builtin-dtb.S
> + .builtin-dtbs-list .builtin-dtb.S \
> + .vmlinux_thinlto_bc_files vmlinux.thinlink
>
> # Directories & files removed with 'make mrproper'
> MRPROPER_FILES += include/config include/generated \
> @@ -2028,6 +2056,8 @@ clean: $(clean-dirs)
> -o -name '*.symtypes' -o -name 'modules.order' \
> -o -name '*.c.[012]*.*' \
> -o -name '*.ll' \
> + -o -name '*.final_a' -o -name '*.final_o' \
> + -o -name '*.o.thinlto.bc' \
> -o -name '*.gcno' \
> \) -type f -print \
> -o -name '.tmp_*' -print \
> diff --git a/arch/Kconfig b/arch/Kconfig
> index b0adb665041f1..cbeeeb9b076d8 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -810,6 +810,18 @@ config LTO_CLANG_THIN
> https://clang.llvm.org/docs/ThinLTO.html
>
> If unsure, say Y.
> +
> +config LTO_CLANG_THIN_DIST
> + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)"
> + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
> + select LTO_CLANG
> + help
> + This option enables Clang's ThinLTO in distributed build mode.
> + In this mode, the linker performs the thin-link, generating
> + ThinLTO index files. Subsequently, the build system explicitly
> + invokes ThinLTO backend compilation using these index files
> + and pre-linked IR objects. The resulting final object files
> + are with the .final_o suffix.
> endchoice
>
> config ARCH_SUPPORTS_AUTOFDO_CLANG
> diff --git a/scripts/Makefile.build b/scripts/Makefile.build
> index 13dcd86e74ca8..87259b0e2bfc8 100644
> --- a/scripts/Makefile.build
> +++ b/scripts/Makefile.build
> @@ -50,18 +50,23 @@ endif
>
> # ===========================================================================
>
> +builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a)
> +ifeq ($(thinlto_final_pass),1)
> +builtin_suffix := final_a
> +endif
> +
> # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...)
> -subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y)))
> +subdir-builtin := $(sort $(filter %/built-in.$(builtin_suffix), $(real-obj-y)))
> subdir-modorder := $(sort $(filter %/modules.order, $(obj-m)))
>
> targets-for-builtin := $(extra-y)
>
> ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),)
> -targets-for-builtin += $(obj)/lib.a
> +targets-for-builtin += $(obj)/lib.$(builtin_suffix)
> endif
>
> ifdef need-builtin
> -targets-for-builtin += $(obj)/built-in.a
> +targets-for-builtin += $(obj)/built-in.$(builtin_suffix)
> endif
>
> targets-for-modules := $(foreach x, o mod, \
> @@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE
> targets += $(filter-out $(subdir-builtin), $(real-obj-y))
> targets += $(filter-out $(subdir-modorder), $(real-obj-m))
> targets += $(lib-y) $(always-y)
> +ifeq ($(builtin_suffix), final_a)
> +final_o_targets = $(patsubst,%.o,%.final_o,$(targets))
> +targets += $(final_targets)
> +endif
>
> # Linker scripts preprocessor (.lds.S -> .lds)
> # ---------------------------------------------------------------------------
> @@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@
> $(obj)/%.lds: $(src)/%.lds.S FORCE
> $(call if_changed_dep,cpp_lds_S)
>
> +ifdef CONFIG_LTO_CLANG_THIN_DIST
> +# Generate .final_o (obj) from .o (bitcode) file
> +# ---------------------------------------------------------------------------
> +quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@
> +
> +cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \
> + $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \
> + -flto=thin, $(c_flags)) \
> + -Wno-unused-command-line-argument \
> + -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \
> + $(if $(findstring ../,$<), \
> + $$(realpath --relative-to=$(srcroot) $<), $<), \
> + cp $< $@)
> +
> +$(obj)/%.final_o: $(obj)/%.o FORCE
> + $(call if_changed,cc_o_bc)
> +endif
> +
> # ASN.1 grammar
> # ---------------------------------------------------------------------------
> quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch]
> @@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
> # ---------------------------------------------------------------------------
>
> # To build objects in subdirs, we need to descend into the directories
> -$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ;
> +$(subdir-builtin): $(obj)/%/built-in.$(builtin_suffix): $(obj)/% ;
> $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
>
> #
> @@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@
> $(obj)/built-in.a: $(real-obj-y) FORCE
> $(call if_changed,ar_builtin)
>
> +ifdef CONFIG_LTO_CLANG_THIN_DIST
> +# Rule to compile a set of .final_o files into one .final_a file.
> +$(obj)/built-in.final_a: $(patsubst %.o,%.final_o,$(real-obj-y)) FORCE
> + $(call if_changed,ar_builtin)
> +endif
> +
> # This is a list of build artifacts from the current Makefile and its
> # sub-directories. The timestamp should be updated when any of the member files.
>
> @@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE
> $(obj)/lib.a: $(lib-y) FORCE
> $(call if_changed,ar)
>
> +ifdef CONFIG_LTO_CLANG_THIN_DIST
> +quiet_cmd_ar_final = AR $@
> + cmd_ar_final = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.final_o,$(real-prereqs))
> +
> +$(obj)/lib.final_a: $(patsubst %.o,%.final_o,$(lib-y)) FORCE
> + $(call if_changed,ar_final)
> +endif
> +
> quiet_cmd_ld_multi_m = LD [M] $@
> cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool)
>
> @@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs)
> PHONY += $(subdir-ym)
> $(subdir-ym):
> $(Q)$(MAKE) $(build)=$@ \
> - need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \
> + need-builtin=$(if $(filter $@/built-in.$(builtin_suffix), $(subdir-builtin)),1) \
> + thinlto_final_pass=$(if $(filter final_a, $(builtin_suffix)),1) \
> need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \
> $(filter $@/%, $(single-subdir-goals))
>
> diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> index 2fe73cda0bddb..ce341ed8d1df3 100644
> --- a/scripts/Makefile.lib
> +++ b/scripts/Makefile.lib
> @@ -34,8 +34,13 @@ else
> obj-m := $(filter-out %/, $(obj-m))
> endif
>
> +builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a)
> +ifeq ($(thinlto_final_pass),1)
> + builtin_suffix := final_a
> +endif
> +
> ifdef need-builtin
> -obj-y := $(patsubst %/, %/built-in.a, $(obj-y))
> +obj-y := $(patsubst %/, %/built-in.$(builtin_suffix), $(obj-y))
> else
> obj-y := $(filter-out %/, $(obj-y))
> endif
> diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
> index 938c7457717ea..4f04eb2c11412 100644
> --- a/scripts/Makefile.vmlinux_o
> +++ b/scripts/Makefile.vmlinux_o
> @@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include
> # for objtool
> include $(srctree)/scripts/Makefile.lib
>
> +ifeq ($(thinlto_final_pass),1)
> +vmlinux_a := vmlinux.final_a
> +vmlinux_libs := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_LIBS))
> +else
> +vmlinux_a := vmlinux.a
> +vmlinux_libs := $(KBUILD_VMLINUX_LIBS)
> +endif
> +
> # Generate a linker script to ensure correct ordering of initcalls for Clang LTO
> # ---------------------------------------------------------------------------
>
> @@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@
> $(PERL) $(real-prereqs) > $@
>
> .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \
> - vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
> + $(vmlinux_a) $(vmlinux_libs) FORCE
> $(call if_changed,gen_initcalls_lds)
>
> targets := .tmp_initcalls.lds
> @@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@
> $(LD) ${KBUILD_LDFLAGS} -r -o $@ \
> $(vmlinux-o-ld-args-y) \
> $(addprefix -T , $(initcalls-lds)) \
> - --whole-archive vmlinux.a --no-whole-archive \
> - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \
> + --whole-archive $(vmlinux_a) --no-whole-archive \
> + --start-group $(vmlinux_libs) --end-group \
> $(cmd_objtool)
>
> define rule_ld_vmlinux.o
> @@ -68,7 +76,7 @@ define rule_ld_vmlinux.o
> $(call cmd,gen_objtooldep)
> endef
>
> -vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
> +vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE
> $(call if_changed_rule,ld_vmlinux.o)
>
> targets += vmlinux.o
> diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink
> new file mode 100644
> index 0000000000000..13e4026c7d45b
> --- /dev/null
> +++ b/scripts/Makefile.vmlinux_thinlink
> @@ -0,0 +1,53 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +
> +PHONY := __default
> +__default: vmlinux.thinlink
> +
> +include include/config/auto.conf
> +include $(srctree)/scripts/Kbuild.include
> +
> +
> +# Generate a linker script to ensure correct ordering of initcalls for Clang LTO
> +# ---------------------------------------------------------------------------
> +
> +quiet_cmd_gen_initcalls_lds = GEN $@
> + cmd_gen_initcalls_lds = \
> + $(PYTHON3) $(srctree)/scripts/jobserver-exec \
> + $(PERL) $(real-prereqs) > $@
> +
> +.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \
> + vmlinux.a FORCE
> + $(call if_changed,gen_initcalls_lds)
> +
> +targets := .tmp_initcalls_thinlink.lds
> +
> +initcalls-lds := .tmp_initcalls_thinlink.lds
> +
> +quiet_cmd_ld_vmlinux.thinlink = LD $@
> + cmd_ld_vmlinux.thinlink = \
> + $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \
> + $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \
> + --thinlto-index-only @.vmlinux_thinlto_bc_files; \
> + touch vmlinux.thinlink
> +
> +vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE
> + $(call if_changed,ld_vmlinux.thinlink)
> +
> +targets += vmlinux.thinlink
> +
> +# Add FORCE to the prerequisites of a target to force it to be always rebuilt.
> +# ---------------------------------------------------------------------------
> +
> +PHONY += FORCE
> +FORCE:
> +
> +# Read all saved command lines and dependencies for the $(targets) we
> +# may be building above, using $(if_changed{,_dep}). As an
> +# optimization, we don't need to read them if the target does not
> +# exist, we will rebuild anyway in that case.
> +
> +existing-targets := $(wildcard $(sort $(targets)))
> +
> +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
> +
> +.PHONY: $(PHONY)
> diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt
> index 7274dfc65af60..b300222e99a6b 100644
> --- a/scripts/head-object-list.txt
> +++ b/scripts/head-object-list.txt
> @@ -18,6 +18,7 @@ arch/arm/kernel/head.o
> arch/csky/kernel/head.o
> arch/hexagon/kernel/head.o
> arch/loongarch/kernel/head.o
> +arch/loongarch/kernel/head.final_o
> arch/m68k/68000/head.o
> arch/m68k/coldfire/head.o
> arch/m68k/kernel/head.o
> --
> 2.49.0.805.g082f7c87e0-goog
>
Hi Rong, On Tue, Apr 29, 2025 at 01:34:00PM -0700, Rong Xu wrote: > Just wanted to gently follow up on this patch that I sent previously. > > From a toolchain and compiler perspective, I genuinely believe that > kernel ThinLTO builds should use this. I really want to get some feedback > and reviews on this patch. My apologies for some radio silence, other regressions and issues have been higher priority until now. I will try to take a closer look at this soon but I do have one comment based on your comment above and the Kconfig help text... > On Sat, Apr 19, 2025 at 6:02 PM <xur@google.com> wrote: ... > > diff --git a/arch/Kconfig b/arch/Kconfig > > index b0adb665041f1..cbeeeb9b076d8 100644 > > --- a/arch/Kconfig > > +++ b/arch/Kconfig > > @@ -810,6 +810,18 @@ config LTO_CLANG_THIN > > https://clang.llvm.org/docs/ThinLTO.html > > > > If unsure, say Y. > > + > > +config LTO_CLANG_THIN_DIST > > + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" > > + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN > > + select LTO_CLANG > > + help > > + This option enables Clang's ThinLTO in distributed build mode. > > + In this mode, the linker performs the thin-link, generating > > + ThinLTO index files. Subsequently, the build system explicitly > > + invokes ThinLTO backend compilation using these index files > > + and pre-linked IR objects. The resulting final object files > > + are with the .final_o suffix. > > endchoice This says what the option does but it does not really say why the user would want to use this over CONFIG_LTO_CLANG_THIN_DIST; even as someone relatively familiar with these technologies, I am not sure what this means for the build or resulting kernel. Is there any reason why a user would want to use in-process ThinLTO instead of distributed? You seem to make the case above and in Discourse that the kernel would benefit from this, so should we just wholesale replace in-process ThinLTO with the distributed one? Cheers, Nathan
© 2016 - 2026 Red Hat, Inc.