arch/x86/Kconfig.cpu | 60 +++++++++++++++++++++++++++++++++++++++----- arch/x86/Makefile | 6 +++++ 2 files changed, 60 insertions(+), 6 deletions(-)
GCC 11.1 and Clang 12.0[1] allow for the following new generic
64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4. This commit
adds them as options accessible under:
Processor type and features --->
Processor family --->
Users of glibc 2.33 and above can see which level is supported
by running: /lib/ld-linux-x86-64.so.2 --help | grep supported
or: /lib64/ld-linux-x86-64.so.2 --help | grep supported
ACKNOWLEDGMENTS
This patch builds on the seminal work by Jeroen.[2]
REFERENCES
1. https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
2. http://www.linuxforge.net/docs/linux/linux-gcc.php
Signed-off-by: John Audia <therealgraysky@proton.me>
---
arch/x86/Kconfig.cpu | 60 +++++++++++++++++++++++++++++++++++++++-----
arch/x86/Makefile | 6 +++++
2 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2a7279d80460..b09a764e6dd1 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,6 +294,54 @@ config GENERIC_CPU
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
+config MAMD_CPU_V2
+ bool "AMD x86-64-v2"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ AMD x86-64 CPU with v2 instructions.
+ Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v2.
+
+config MAMD_CPU_V3
+ bool "AMD x86-64-v3"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ AMD x86-64-v3 CPU with v3 instructions.
+ Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v3.
+
+config MAMD_CPU_V4
+ bool "AMD x86-64-v4"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ AMD x86-64 CPU with v4 instructions.
+ Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v4.
+
+config MINTEL_CPU_V2
+ bool "Intel x86-64-v2"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ Intel x86-64 CPU with v2 instructions.
+ Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v2.
+
+config MINTEL_CPU_V3
+ bool "Intel x86-64-v3"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ Intel x86-64 CPU with v3 instructions.
+ Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v3.
+
+config MINTEL_CPU_V4
+ bool "Intel x86-64-v4"
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ depends on X86_64
+ help
+ Intel x86-64 CPU with v4 instructions.
+ Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v4.
+
endchoice
config X86_GENERIC
@@ -318,7 +366,7 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
default "4" if MELAN || M486SX || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
@@ -336,11 +384,11 @@ config X86_ALIGNMENT_16
config X86_INTEL_USERCOPY
def_bool y
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
config X86_USE_PPRO_CHECKSUM
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
#
# P6_NOPs are a relatively minor optimization that require a family >=
@@ -356,7 +404,7 @@ config X86_USE_PPRO_CHECKSUM
config X86_P6_NOP
def_bool y
depends on X86_64
- depends on (MCORE2 || MPENTIUM4 || MPSC)
+ depends on (MCORE2 || MPENTIUM4 || MPSC || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4)
config X86_TSC
def_bool y
@@ -364,7 +412,7 @@ config X86_TSC
config X86_HAVE_PAE
def_bool y
- depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
+ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
config X86_CMPXCHG64
def_bool y
@@ -379,7 +427,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 801fd85c3ef6..3d03e687eaac 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,6 +179,12 @@ else
cflags-$(CONFIG_MCORE2) += -march=core2
cflags-$(CONFIG_MATOM) += -march=atom
cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
+ cflags-$(CONFIG_MAMD_CPU_V2) += -march=x86-64-v2
+ cflags-$(CONFIG_MAMD_CPU_V3) += -march=x86-64-v3
+ cflags-$(CONFIG_MAMD_CPU_V4) += -march=x86-64-v4
+ cflags-$(CONFIG_MINTEL_CPU_V2) += -march=x86-64-v2
+ cflags-$(CONFIG_MINTEL_CPU_V3) += -march=x86-64-v3
+ cflags-$(CONFIG_MINTEL_CPU_V4) += -march=x86-64-v4
KBUILD_CFLAGS += $(cflags-y)
rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
--
2.46.1
hi, we don't have enough knowledge how this commit causing the random early crash issue as report below. we noticed the config has below diff comparing to parent. --- /pkg/linux/x86_64-randconfig-016-20240921/clang-18/70ad4cfb4d4a9f97afd7ba12ae5c4a62e719aa44/.config 2024-09-23 14:10:14.423097567 +0800 +++ /pkg/linux/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/.config 2024-09-23 13:13:36.831871815 +0800 @@ -350,14 +350,19 @@ CONFIG_PVH=y CONFIG_PARAVIRT_CLOCK=y # CONFIG_JAILHOUSE_GUEST is not set CONFIG_ACRN_GUEST=y -CONFIG_MK8=y +# CONFIG_MK8 is not set # CONFIG_MPSC is not set # CONFIG_MCORE2 is not set # CONFIG_MATOM is not set # CONFIG_GENERIC_CPU is not set +# CONFIG_MAMD_CPU_V2 is not set +# CONFIG_MAMD_CPU_V3 is not set +CONFIG_MAMD_CPU_V4=y +# CONFIG_MINTEL_CPU_V2 is not set +# CONFIG_MINTEL_CPU_V3 is not set +# CONFIG_MINTEL_CPU_V4 is not set CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y CONFIG_X86_TSC=y CONFIG_X86_HAVE_PAE=y early crash happens 70 times out of 500 runs. for parent, keeps clean when we run same tests almost 1000 times. 70ad4cfb4d4a9f97 178c2862ab0388f7de1ca23b7b4 ---------------- --------------------------- fail:runs %reproduction fail:runs | | | :991 7% 70:500 dmesg.BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code just FYI what we observed in our tests. Hello, kernel test robot noticed "BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code" on: commit: 178c2862ab0388f7de1ca23b7b4718e09d8acc24 ("[PATCH] x86: add more x86-64 micro-architecture levels") url: https://github.com/intel-lab-lkp/linux/commits/John/x86-add-more-x86-64-micro-architecture-levels/20240915-190636 base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 70ad4cfb4d4a9f97afd7ba12ae5c4a62e719aa44 patch link: https://lore.kernel.org/all/W22JX8eWQctCiWIDKGjx4IUU4ZgYmKa1zPOZSKHHVZ74zpUEmVV1VoPMMNcyc-zhraUayW0d4d7OIUYZHuiEqllnAc1tB8DthZahsHZuw0Y=@proton.me/ patch subject: [PATCH] x86: add more x86-64 micro-architecture levels in testcase: trinity version: trinity-i386-abe9de86-1_20230429 with following parameters: runtime: 300s group: group-04 nr_groups: 5 compiler: clang-18 test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202409241436.b37a069e-oliver.sang@intel.com early console in setup code convert early boot stage from hang to failed BUG: kernel failed in early-boot stage, last printk: early console in setup code Linux version 6.11.0-rc7-00546-g178c2862ab03 #1 Command line: ip=::::vm-meta-98::dhcp root=/dev/ram0 RESULT_ROOT=/result/trinity/group-04-5-300s/vm-snb/debian-11.1-i386-20220923.cgz/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/454 BOOT_IMAGE=/pkg/linux/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/vmlinuz-6.11.0-rc7-00546-g178c2862ab03 branch=linux-devel/devel-hourly-20240921-005829 job=/lkp/jobs/scheduled/vm-meta-98/trinity-group-04-5-300s-debian-11.1-i386-20220923.cgz-x86_64-randconfig-016-20240921-178c2862ab03-20240923-37395-1iv09pj-434.yaml user=lkp ARCH=x86_64 kconfig=x86_64-randconfig-016-20240921 commit=178c2862ab0388f7de1ca23b7b4718e09d8acc24 intremap=posted_msi vmalloc=256M initramfs_async=0 page_owner=on carrier_timeout=60 max_uptime=1200 LKP_SERVER=internal-lkp-server selinux=0 debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 net.ifnames=0 printk.devkmsg=on panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0 drbd.minor_count=8 systemd.log_level=err ignore_loglevel console=tty0 earlyprintk=ttyS0,115200 console=ttyS0,115200 vga=normal rw rcuperf.shutdown=0 rcuscale.shutdown=0 refscale.shutdown=0 watchdog_thresh=240 audit=0 kunit.enable=0 ia32_emulation=on riscv_isa_fallback=1 Kboot worker: lkp-worker22 Elapsed time: 600 The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20240924/202409241436.b37a069e-oliver.sang@intel.com -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Tuesday, September 24th, 2024 at 3:00 AM, kernel test robot <oliver.sang@intel.com> wrote: > early crash happens 70 times out of 500 runs. > for parent, keeps clean when we run same tests almost 1000 times. Many thanks for this rigorous testing. Would you mind using the current revision of this patch (attached) or accessible at my github linked below? https://github.com/graysky2/kernel_compiler_patch
hi, John, On Tue, Sep 24, 2024 at 05:40:36PM +0000, John wrote: > On Tuesday, September 24th, 2024 at 3:00 AM, kernel test robot <oliver.sang@intel.com> wrote: > > > early crash happens 70 times out of 500 runs. > > for parent, keeps clean when we run same tests almost 1000 times. > > Many thanks for this rigorous testing. Would you mind using the current revision of this patch (attached) with this version, we cannot reproduced the early crash issue. like previous version (178c2862ab0388f7de1ca23b7b4), we still apply new version upon 70ad4cfb4d4a9f97. we run tests up to 1000 times. 70ad4cfb4d4a9f97 178c2862ab0388f7de1ca23b7b4 e9725b726c3c0bd129959c308fb ---------------- --------------------------- --------------------------- fail:runs %reproduction fail:runs %reproduction fail:runs | | | | | :991 7% 70:500 0% :1000 dmesg.BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code > or accessible at my github linked below? > > https://github.com/graysky2/kernel_compiler_patch > From 718155e6164b4bec45bcba8814c3f82e84f36db0 Mon Sep 17 00:00:00 2001 > From: graysky <therealgraysky AT proton DOT me> > Date: Mon, 16 Sep 2024 14:47:03 -0400 > > FEATURES > This patch adds additional tunings via new x86-64 ISA levels to the > Linux kernel. > > These are selectable under: > Processor type and features ---> x86-64 compiler ISA level > > ??? x86-64 A value of (1) is the default > ??? x86-64-v2 A value of (2) brings support for vector > instructions up to Streaming SIMD Extensions 4.2 (SSE4.2) > and Supplemental Streaming SIMD Extensions 3 (SSSE3), the > POPCNT instruction, and CMPXCHG16B. > ??? x86-64-v3 A value of (3) adds vector instructions up to AVX2, MOVBE, > and additional bit-manipulation instructions. > > There is also x86-64-v4 but including this makes little sense as > the kernel does not use any of the AVX512 instructions anyway. > > Users of glibc 2.33 and above can see which level is supported by running: > /lib/ld-linux-x86-64.so.2 --help | grep supported > Or > /lib64/ld-linux-x86-64.so.2 --help | grep supported > > BENEFITS > Small but real speed increases are measurable using a make endpoint comparing > a generic kernel to one built with one of the respective microarchs. > > See the following experimental evidence supporting this statement: > https://github.com/graysky2/kernel_compiler_patch?tab=readme-ov-file#benchmarks > > REQUIREMENTS > linux version 6.8-rc3+ > gcc version >=9.0 or clang version >=9.0 > > ACKNOWLEDGMENTS > This patch builds on the seminal work by Jeroen.[2] > > REFERENCES > 1. https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 > 2. http://www.linuxforge.net/docs/linux/linux-gcc.php > > --- > arch/x86/Kconfig.cpu | 24 ++++++++++++++++++++++++ > arch/x86/Makefile | 11 +++++++++-- > 2 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu > index 2a7279d80460..562a273be222 100644 > --- a/arch/x86/Kconfig.cpu > +++ b/arch/x86/Kconfig.cpu > @@ -308,6 +308,30 @@ config X86_GENERIC > This is really intended for distributors who need more > generic optimizations. > > +config X86_64_VERSION > + int "x86-64 compiler ISA level" > + range 1 3 > + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) > + depends on X86_64 && GENERIC_CPU > + help > + Specify a specific x86-64 compiler ISA level. > + > + There are three x86-64 ISA levels that work on top of > + the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4. > + > + x86-64-v2 brings support for vector instructions up to Streaming SIMD > + Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 > + (SSSE3), the POPCNT instruction, and CMPXCHG16B. > + > + x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional > + bit-manipulation instructions. > + > + x86-64-v4 is not included since the kernel does not use AVX512 instructions > + > + You can find the best version for your CPU by running one of the following: > + /lib/ld-linux-x86-64.so.2 --help | grep supported > + /lib64/ld-linux-x86-64.so.2 --help | grep supported > + > # > # Define implied options from the CPU selection here > config X86_INTERNODE_CACHE_SHIFT > diff --git a/arch/x86/Makefile b/arch/x86/Makefile > index 801fd85c3ef6..e1f88f846bed 100644 > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -178,14 +178,21 @@ else > cflags-$(CONFIG_MPSC) += -march=nocona > cflags-$(CONFIG_MCORE2) += -march=core2 > cflags-$(CONFIG_MATOM) += -march=atom > - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic > + ifeq ($(CONFIG_X86_64_VERSION),1) > + cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic > + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic > + else > + cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION) > + rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) > + endif > + cflags-$(CONFIG_MATOM) += -march=bonnell > + cflags-$(CONFIG_MCORE2) += -march=core2 > KBUILD_CFLAGS += $(cflags-y) > > rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 > rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona > rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 > rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom > - rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic > KBUILD_RUSTFLAGS += $(rustflags-y) > > KBUILD_CFLAGS += -mno-red-zone > -- > 2.46.1 >
On 9/24/24 00:00, kernel test robot wrote: > we don't have enough knowledge how this commit causing the random > early crash issue as report below. > > we noticed the config has below diff comparing to parent.... > +# CONFIG_MAMD_CPU_V2 is not set > +# CONFIG_MAMD_CPU_V3 is not set > +CONFIG_MAMD_CPU_V4=y > +# CONFIG_MINTEL_CPU_V2 is not set > +# CONFIG_MINTEL_CPU_V3 is not set > +# CONFIG_MINTEL_CPU_V4 is not set Clang is probably being induced to use some ISA that isn't supported on Sandybridge. In any case, I think this series is very unlikely to get applied.
Yes, specifying '-march=x86-64-v3' can indeed yield significant performance improvements for CPUs that support it. I could confirm this. Please allow me a few days, as I will provide a detailed test data report from my own tests after my vacation. Given that such submissions 'pop up' in the mailing list from time to time, I hope this time we can see it through. We should have a broad discussion, comprehensive testing and a calm judgment until we reach a final conclusion on whether this modification brings more benefits or drawbacks. Link: https://github.com/graysky2/kernel_compiler_patch/issues/100 Suggested-by: WangYuli <wangyuli@uniontech.com> Tested-by: WangYuli <wangyuli@uniontech.com> Best regards, -- WangYuli
On Sun, Sep 15, 2024 at 11:05:52AM +0000, John wrote: > GCC 11.1 and Clang 12.0[1] allow for the following new generic > 64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4. This commit > adds them as options accessible under: > Processor type and features ---> > Processor family ---> > > Users of glibc 2.33 and above can see which level is supported > by running: /lib/ld-linux-x86-64.so.2 --help | grep supported > > or: /lib64/ld-linux-x86-64.so.2 --help | grep supported > > ACKNOWLEDGMENTS > This patch builds on the seminal work by Jeroen.[2] > > REFERENCES > 1. https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 > 2. http://www.linuxforge.net/docs/linux/linux-gcc.php > > Signed-off-by: John Audia <therealgraysky@proton.me> > --- > arch/x86/Kconfig.cpu | 60 +++++++++++++++++++++++++++++++++++++++----- > arch/x86/Makefile | 6 +++++ > 2 files changed, 60 insertions(+), 6 deletions(-) Patches like this one appear off and on on the mailing list and each time I ask what's the upside of maintaining this complexity? And everytime I get no reply or random handwaving. That's because -march settings have no noticeable effect on kernel code generation. Because the kernel code is already pretty much optimized when generated by the compiler and all those flavors don't bring anything additional. So this is not going anywhere. But hey, I'm always open to nice surprises... Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 9/15/24 12:49, Borislav Petkov wrote: > Patches like this one appear off and on on the mailing list and each > time I ask what's the upside of maintaining this complexity? Besides, there are already well-known patches exist for years. So why reinventing the wheel here? E.g. graysky patch used by ZEN kernel: https://github.com/zen-kernel/zen-kernel/commit/6f32b8af8ccdb56ef2856db3631eea55b79378c6 It contains way more architectures, includig ISA levels. On 9/15/24 11:05, John wrote: > GCC 11.1 and Clang 12.0[1] allow for the following new generic > 64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4. This commit > adds them as options accessible under: > Processor type and features ---> > Processor family ---> Anyway, this whole thing is actually more complicated than simply setting '-march'. Vector instructions are known to be problematic for the kernel, so they are disabled by KBUILD_CFLAGS. If you want to go with higher ISA levels than the kernel expects, an additional patch like this is required: https://github.com/zen-kernel/zen-kernel/commit/addc601c58e035e28153deeb6d441b91f1a50247
On Tuesday, September 17th, 2024 at 2:45 PM, H. Peter Anvin <hpa@zytor.com> wrote: > On September 17, 2024 8:22:38 PM GMT+02:00, John > Also, these are not uarch levels, they are ISA levels... Thank you for pointing that out. I see now the differences between ISA levels and uarches. > Besides, there are already well-known patches exist for years. So why reinventing the wheel here? > > E.g. graysky patch used by ZEN kernel: > https://github.com/zen-kernel/zen-kernel/commit/6f32b8af8ccdb56ef2856db3631eea55b79378c6 > It contains way more architectures, includig ISA levels. Yes, that is my git repo. I created the subset (just -march=x86-64-v[2,3,4]) patch specifically to post on lkml thinking that the larger patch with all of the uarches would be too complex.
On 9/19/24 00:02, John wrote: > Yes, that is my git repo. Oops, I didn't realize that. Even better then! Could you please explain where the performance gains should come from, considering that the kernel force disables all SIMD extensions? https://github.com/torvalds/linux/blob/4a39ac5b7d62679c07a3e3d12b0f6982377d8a7d/arch/x86/Makefile#L67-L80 I.e. if we won't have them anyway, what gives?
On Wednesday, September 18th, 2024 at 4:25 PM, Hanabishi <i.r.e.c.c.a.k.u.n+kernel.org@gmail.com> wrote: > Even better then! Could you please explain where the performance gains should come from, considering that the kernel force disables all SIMD extensions? > https://github.com/torvalds/linux/blob/4a39ac5b7d62679c07a3e3d12b0f6982377d8a7d/arch/x86/Makefile#L67-L80 > > I.e. if we won't have them anyway, what gives? I am not sure. Are some of the other things -march=-x86-64-v3 driving them? I will say that these timed benchmarks have been consistently reproducible for me. My code for the benchmark script is in that github repo as well if you would like to give it a whirl. As to the code you referenced re: disabling the SIMD extensions. Do you know why that is in place?
On 9/18/24 21:14, John wrote: > I am not sure. Are some of the other things -march=-x86-64-v3 driving them? Looking up a full table, v3 adds more than just AVX. x86-64-v2: CMPXCHG16B LAHF-SAHF POPCNT SSE3 SSE4_1 SSE4_2 SSSE3 x86-64-v3: AVX AVX2 BMI1 BMI2 F16C FMA LZCNT MOVBE OSXSAVE x86-64-v4: AVX512F AVX512BW AVX512CD AVX512DQ AVX512VL Maybe some other enabled instructions could issue some benefit. v4 seems to be useless for us though. > As to the code you referenced re: disabling the SIMD extensions. Do you know why that is in place? Not really. There is a link above pointing to a bug report discussing GCC quirks. I am not an expert in that. One day, out of curiosity, I tried to override it and build the kernel with '-mavx' (free performance, yay!). Well, it didn't even start and crashed immediately. I don't know if something has changed since then, but I guess there are reasons.
On Wednesday, September 18th, 2024 at 5:48 PM, Hanabishi <i.r.e.c.c.a.k.u.n+kernel.org@gmail.com> wrote: > One day, out of curiosity, I tried to override it and build the kernel with '-mavx' (free performance, yay!). > Well, it didn't even start and crashed immediately. > > I don't know if something has changed since then, but I guess there are reasons. I also tried commenting out the entire line. I too was able to boot into the kernel but it just rebooted before the login screen.
On 9/15/24 05:49, Borislav Petkov wrote: > So this is not going anywhere. But hey, I'm always open to nice > surprises... Oh, gah, and I just realized that this is doing "-march" and not "-mtune". So this really can build binaries that won't even run on older CPUs. That's just mean. So there needs to be a lot more justification before we go down this road.
On 9/15/24 04:05, John wrote: > +config MAMD_CPU_V2 > + bool "AMD x86-64-v2" > + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) > + depends on X86_64 > + help > + AMD x86-64 CPU with v2 instructions. > + Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v2. If these are going to be exposed to end users, we need *some* kind of help text that helps end users select among these options and what the pitfalls are. I actually don't have the foggiest idea what an "AMD x86-64 CPU with v2 instructions" even is. Even saying "AMD x86-64 CPU" isn't super helpful because "AMD x86_64" is kinda a generic way to refer to all the 64-bit x86 CPUs, Intel included. I assume that the compilers have grouped the CPUs into epochs that have some similarity. That's great and all, but we need to tell users what those are. Why are there v4's for both AMD and Intel that do the exact same thing? + cflags-$(CONFIG_MAMD_CPU_V4) += -march=x86-64-v4 ... + cflags-$(CONFIG_MINTEL_CPU_V4) += -march=x86-64-v4 Why is this copied and pasted six times? + depends on (CC_IS_GCC && GCC_VERSION > 110000)... I'm also _kinda_ surprised we don't have some kind of Kconfig option to just pass random flags into the compiler. That would be another way to do this. That would also be a, maybe, 10-line patch. Alternatively, anyone wanting to do this could just hack their makefile or (I assume) pass CFLAGS= into the build command-line. Why is something like that insufficient. In the *WORST* case, we shouldn't be doing this with bools. Do this: config X86_MARCH_VER int "Compiler Micro-Architecture Level" range 2 4 depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) depends on EXPERT depends on X86_64 help Specify a specific compiler "micro-architecture" version. You might want to do this when... You can find the best version for your CPU here... The pitfalls of this option are... Then you can do fun like: config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || ... + X86_MARCH_VER >= 2 which has the added advantage of never needing to be touched when v5 gets added. Oh, and this: > config X86_HAVE_PAE > def_bool y > - depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 > + depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4 is rather silly when M*_CPU_V* all: depends on X86_64 right? So, taking a step back: Please convince us that this is something we want to expose to end users in the first place, as opposed to having them hack makefiles or just allowing users a string instead of using the existing CONFIG_M* Kconfig options. Then, we can discuss the structure of these options. Should these "versions" be new "Processor family" options? Or, should they be _instead_ of selecting a "Processor family" Then, should the new Kconfig options be a series of bools, or an int? Last, how do we deal with multiple vendors? Or do we need it at all? I'm not actually sure at all why this has the AMD versus Intel distinction at all.
On Sunday, September 15th, 2024 at 7:40 AM, Dave Hansen <dave.hansen@intel.com> wrote: > In the WORST case, we shouldn't be doing this with bools. Do this: > > config X86_MARCH_VER > int "Compiler Micro-Architecture Level" > range 2 4 > depends on (CC_IS_GCC && GCC_VERSION > 110000) || > > (CC_IS_CLANG && CLANG_VERSION >= 120000) > > depends on EXPERT > depends on X86_64 > help > Specify a specific compiler "micro-architecture" version. > You might want to do this when... > You can find the best version for your CPU here... > The pitfalls of this option are... > > Then you can do fun like: > > config X86_L1_CACHE_SHIFT > int > default "7" if MPENTIUM4 || MPSC > + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || ... > + X86_MARCH_VER >= 2 > > > which has the added advantage of never needing to be touched when v5 > gets added. I like this approach much better, it is more streamlined and clean. I ran with your suggestions and the attached seems to work. I am grateful for my feedback and suggestions on the syntax. --- arch/x86/Kconfig.cpu | 27 +++++++++++++++++++-------- arch/x86/Makefile | 9 +++++++-- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2a7279d80460..2b24574f6ac5 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -308,6 +308,17 @@ config X86_GENERIC This is really intended for distributors who need more generic optimizations. +config X86_MARCH_VER + int "Compiler Micro-Architecture Level" + range 1 4 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Specify a specific compiler "micro-architecture" version. + You might want to do this when... + You can find the best version for your CPU here... + The pitfalls of this option are... + # # Define implied options from the CPU selection here config X86_INTERNODE_CACHE_SHIFT @@ -318,7 +329,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || X86_MARCH_VER >= 2 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,11 +347,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || X86_MARCH_VER >= 2 config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || X86_MARCH_VER >= 2 # # P6_NOPs are a relatively minor optimization that require a family >= @@ -356,15 +367,15 @@ config X86_USE_PPRO_CHECKSUM config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || X86_MARCH_VER >= 2) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || X86_MARCH_VER >= 2) || X86_64 config X86_HAVE_PAE def_bool y - depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 + depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || X86_MARCH_VER >= 2 config X86_CMPXCHG64 def_bool y @@ -374,12 +385,12 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || X86_MARCH_VER >= 2) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || X86_MARCH_VER >= 2) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 801fd85c3ef6..e2d0d156a919 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -178,14 +178,19 @@ else cflags-$(CONFIG_MPSC) += -march=nocona cflags-$(CONFIG_MCORE2) += -march=core2 cflags-$(CONFIG_MATOM) += -march=atom - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic + ifeq ($(CONFIG_X86_MARCH_VER),1) + cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic + else + cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_MARCH_VER) + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=x86-64-v$(CONFIG_X86_MARCH_VER) + endif KBUILD_CFLAGS += $(cflags-y) rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom - rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic KBUILD_RUSTFLAGS += $(rustflags-y) KBUILD_CFLAGS += -mno-red-zone -- 2.46.1
On Sunday, September 15th, 2024 at 2:42 PM, John wrote: > I like this approach much better, it is more streamlined and clean. I ran with your suggestions and the attached seems to work. I am grateful for my feedback and suggestions on the syntax. I pushed my draft incorporating your suggestions out to my github at the following link. I am going to unsubscribe from lkml now (hundreds of emails per day) so please cc me on any replies or use the github. Thanks. https://github.com/graysky2/kernel_compiler_patch/blob/master/lite-more-uarches-for-kernel-6.8-rc4%2B.patch
On September 17, 2024 8:22:38 PM GMT+02:00, John <therealgraysky@proton.me> wrote: >On Sunday, September 15th, 2024 at 2:42 PM, John wrote: >> I like this approach much better, it is more streamlined and clean. I ran with your suggestions and the attached seems to work. I am grateful for my feedback and suggestions on the syntax. > >I pushed my draft incorporating your suggestions out to my github at the following link. I am going to unsubscribe from lkml now (hundreds of emails per day) so please cc me on any replies or use the github. Thanks. > >https://github.com/graysky2/kernel_compiler_patch/blob/master/lite-more-uarches-for-kernel-6.8-rc4%2B.patch > Also, these are *not* uarch levels, they are ISA levels...
On Sunday, September 15th, 2024 at 7:40 AM, Dave Hansen <dave.hansen@intel.com> wrote: > If these are going to be exposed to end users, we need some kind of > help text that helps end users select among these options and what the > pitfalls are. > > I actually don't have the foggiest idea what an "AMD x86-64 CPU with v2 > instructions" even is. Even saying "AMD x86-64 CPU" isn't super helpful > because "AMD x86_64" is kinda a generic way to refer to all the 64-bit > x86 CPUs, Intel included. > Why are there v4's for both AMD and Intel that do the exact same thing? I did it this way to selectively include the AMD-specific and Intel-specific membership in the config options below. For example, the AMD options should be included in the X86_INTEL_USERCOPY config. > Why is this copied and pasted six times? > > + depends on (CC_IS_GCC && GCC_VERSION > 110000)... I believe the version requirement is needed for each of these new options. Please correct me if I am mistaken. > Alternatively, anyone wanting to do this could just hack their makefile > or (I assume) pass CFLAGS= into the build command-line. Why is > something like that insufficient. I believe this would work: export KCFLAGS=' -march=x86-64-v3' export KCPPFLAGS=' -march=x86-64-v3' > > config X86_HAVE_PAE > > def_bool y > > - depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 > > + depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4 > > > is rather silly when M*_CPU_V* all: > > depends on X86_64 > > right? True! > So, taking a step back: Please convince us that this is something we > want to expose to end users in the first place, as opposed to having > them hack makefiles or just allowing users a string instead of using the > existing CONFIG_M* Kconfig options. This was just the logical extension of the already included and now antiquated options, for example pentium-mmx, k6, etc.
On 9/15/24 05:25, John wrote: >> Why is this copied and pasted six times? >> >> + depends on (CC_IS_GCC && GCC_VERSION > 110000)... > I believe the version requirement is needed for each of these new > options. Please correct me if I am mistaken. The requirement is fine. But copying and pasting the same string without refactoring it is not. You should refactor it: bool SUPPORT_MARCH_CODEVERS depends on (CC_IS_GCC && GCC_VERSION > 110000)... depends on X86_64 and then have each site do this: +config MINTEL_CPU_V4 + bool "Intel x86-64-v4" + depends on SUPPORT_MARCH_CODEVERS + help ... >> Why are there v4's for both AMD and Intel that do the exact same >> thing? > > I did it this way to selectively include the AMD-specific and > Intel-specific membership in the config options below. For example, > the AMD options should be included in the X86_INTEL_USERCOPY config. I think you mean "the AMD options should *not* be included..." ... >> Alternatively, anyone wanting to do this could just hack their makefile >> or (I assume) pass CFLAGS= into the build command-line. Why is >> something like that insufficient. > > I believe this would work: > export KCFLAGS=' -march=x86-64-v3' > export KCPPFLAGS=' -march=x86-64-v3' So why not just have users do that? >> So, taking a step back: Please convince us that this is something we >> want to expose to end users in the first place, as opposed to having >> them hack makefiles or just allowing users a string instead of using the >> existing CONFIG_M* Kconfig options. > > This was just the logical extension of the already included and now > antiquated options, for example pentium-mmx, k6, etc. It's probably best not to extend that beast. It really is a relic of the past and, practically, all of our 64-bit builds are GENERIC_CPU=y and have been for a long time. We've moved away from the old days where you could easily compile a kernel that didn't boot. We're basically handing our users a big long piece of rope with which to hang themselves here. This patch makes it easy and doesn't do a great job of explaining why they'd take the risk or what the benefit is. I don't think we should do this.
© 2016 - 2024 Red Hat, Inc.