x86: add more x86-64 micro-architecture levels

[PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

GCC 11.1 and Clang 12.0[1] allow for the following new generic
64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4.  This commit
adds them as options accessible under:
 Processor type and features  --->
  Processor family --->

Users of glibc 2.33 and above can see which level is supported
by running: /lib/ld-linux-x86-64.so.2 --help | grep supported

or: /lib64/ld-linux-x86-64.so.2 --help | grep supported

ACKNOWLEDGMENTS
This patch builds on the seminal work by Jeroen.[2]

REFERENCES
1.  https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
2.  http://www.linuxforge.net/docs/linux/linux-gcc.php

Signed-off-by: John Audia <therealgraysky@proton.me>
---
 arch/x86/Kconfig.cpu | 60 +++++++++++++++++++++++++++++++++++++++-----
 arch/x86/Makefile    |  6 +++++
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2a7279d80460..b09a764e6dd1 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,6 +294,54 @@ config GENERIC_CPU
 	  Generic x86-64 CPU.
 	  Run equally well on all x86-64 CPUs.

+config MAMD_CPU_V2
+	bool "AMD x86-64-v2"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  AMD x86-64 CPU with v2 instructions.
+	  Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v2.
+
+config MAMD_CPU_V3
+	bool "AMD x86-64-v3"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  AMD x86-64-v3 CPU with v3 instructions.
+	  Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v3.
+
+config MAMD_CPU_V4
+	bool "AMD x86-64-v4"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  AMD x86-64 CPU with v4 instructions.
+	  Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v4.
+
+config MINTEL_CPU_V2
+	bool "Intel x86-64-v2"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  Intel x86-64 CPU with v2 instructions.
+	  Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v2.
+
+config MINTEL_CPU_V3
+	bool "Intel x86-64-v3"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  Intel x86-64 CPU with v3 instructions.
+	  Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v3.
+
+config MINTEL_CPU_V4
+	bool "Intel x86-64-v4"
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  Intel x86-64 CPU with v4 instructions.
+	  Run equally well on all Intel x86-64 CPUs with min support of -march=x86-64-v4.
+
 endchoice

 config X86_GENERIC
@@ -318,7 +366,7 @@ config X86_INTERNODE_CACHE_SHIFT
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
 	default "4" if MELAN || M486SX || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX

@@ -336,11 +384,11 @@ config X86_ALIGNMENT_16

 config X86_INTEL_USERCOPY
 	def_bool y
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4

 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4

 #
 # P6_NOPs are a relatively minor optimization that require a family >=
@@ -356,7 +404,7 @@ config X86_USE_PPRO_CHECKSUM
 config X86_P6_NOP
 	def_bool y
 	depends on X86_64
-	depends on (MCORE2 || MPENTIUM4 || MPSC)
+	depends on (MCORE2 || MPENTIUM4 || MPSC || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4)

 config X86_TSC
 	def_bool y
@@ -364,7 +412,7 @@ config X86_TSC

 config X86_HAVE_PAE
 	def_bool y
-	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
+	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4

 config X86_CMPXCHG64
 	def_bool y
@@ -379,7 +427,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
 	int
 	default "64" if X86_64
-	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
+	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4)
 	default "5" if X86_32 && X86_CMPXCHG64
 	default "4"

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 801fd85c3ef6..3d03e687eaac 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,6 +179,12 @@ else
         cflags-$(CONFIG_MCORE2)		+= -march=core2
         cflags-$(CONFIG_MATOM)		+= -march=atom
         cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
+        cflags-$(CONFIG_MAMD_CPU_V2)	+= -march=x86-64-v2
+        cflags-$(CONFIG_MAMD_CPU_V3)	+= -march=x86-64-v3
+        cflags-$(CONFIG_MAMD_CPU_V4)	+= -march=x86-64-v4
+        cflags-$(CONFIG_MINTEL_CPU_V2)	+= -march=x86-64-v2
+        cflags-$(CONFIG_MINTEL_CPU_V3)	+= -march=x86-64-v3
+        cflags-$(CONFIG_MINTEL_CPU_V4)	+= -march=x86-64-v4
         KBUILD_CFLAGS += $(cflags-y)

         rustflags-$(CONFIG_MK8)		+= -Ctarget-cpu=k8
--
2.46.1

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by kernel test robot 1 year, 4 months ago

hi,

we don't have enough knowledge how this commit causing the random early crash
issue as report below.

we noticed the config has below diff comparing to parent.

--- /pkg/linux/x86_64-randconfig-016-20240921/clang-18/70ad4cfb4d4a9f97afd7ba12ae5c4a62e719aa44/.config 2024-09-23 14:10:14.423097567 +0800
+++ /pkg/linux/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/.config 2024-09-23 13:13:36.831871815 +0800
@@ -350,14 +350,19 @@ CONFIG_PVH=y
 CONFIG_PARAVIRT_CLOCK=y
 # CONFIG_JAILHOUSE_GUEST is not set
 CONFIG_ACRN_GUEST=y
-CONFIG_MK8=y
+# CONFIG_MK8 is not set
 # CONFIG_MPSC is not set
 # CONFIG_MCORE2 is not set
 # CONFIG_MATOM is not set
 # CONFIG_GENERIC_CPU is not set
+# CONFIG_MAMD_CPU_V2 is not set
+# CONFIG_MAMD_CPU_V3 is not set
+CONFIG_MAMD_CPU_V4=y
+# CONFIG_MINTEL_CPU_V2 is not set
+# CONFIG_MINTEL_CPU_V3 is not set
+# CONFIG_MINTEL_CPU_V4 is not set
 CONFIG_X86_INTERNODE_CACHE_SHIFT=6
 CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 CONFIG_X86_TSC=y
 CONFIG_X86_HAVE_PAE=y


early crash happens 70 times out of 500 runs.
for parent, keeps clean when we run same tests almost 1000 times.

70ad4cfb4d4a9f97 178c2862ab0388f7de1ca23b7b4
---------------- ---------------------------
       fail:runs  %reproduction    fail:runs
           |             |             |
           :991          7%          70:500   dmesg.BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code

just FYI what we observed in our tests.



Hello,

kernel test robot noticed "BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code" on:

commit: 178c2862ab0388f7de1ca23b7b4718e09d8acc24 ("[PATCH] x86: add more x86-64 micro-architecture levels")
url: https://github.com/intel-lab-lkp/linux/commits/John/x86-add-more-x86-64-micro-architecture-levels/20240915-190636
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 70ad4cfb4d4a9f97afd7ba12ae5c4a62e719aa44
patch link: https://lore.kernel.org/all/W22JX8eWQctCiWIDKGjx4IUU4ZgYmKa1zPOZSKHHVZ74zpUEmVV1VoPMMNcyc-zhraUayW0d4d7OIUYZHuiEqllnAc1tB8DthZahsHZuw0Y=@proton.me/
patch subject: [PATCH] x86: add more x86-64 micro-architecture levels

in testcase: trinity
version: trinity-i386-abe9de86-1_20230429
with following parameters:

	runtime: 300s
	group: group-04
	nr_groups: 5



compiler: clang-18
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202409241436.b37a069e-oliver.sang@intel.com


early console in setup code
convert early boot stage from hang to failed
BUG: kernel failed in early-boot stage, last printk: early console in setup code
Linux version 6.11.0-rc7-00546-g178c2862ab03 #1
Command line: ip=::::vm-meta-98::dhcp root=/dev/ram0 RESULT_ROOT=/result/trinity/group-04-5-300s/vm-snb/debian-11.1-i386-20220923.cgz/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/454 BOOT_IMAGE=/pkg/linux/x86_64-randconfig-016-20240921/clang-18/178c2862ab0388f7de1ca23b7b4718e09d8acc24/vmlinuz-6.11.0-rc7-00546-g178c2862ab03 branch=linux-devel/devel-hourly-20240921-005829 job=/lkp/jobs/scheduled/vm-meta-98/trinity-group-04-5-300s-debian-11.1-i386-20220923.cgz-x86_64-randconfig-016-20240921-178c2862ab03-20240923-37395-1iv09pj-434.yaml user=lkp ARCH=x86_64 kconfig=x86_64-randconfig-016-20240921 commit=178c2862ab0388f7de1ca23b7b4718e09d8acc24 intremap=posted_msi vmalloc=256M initramfs_async=0 page_owner=on carrier_timeout=60 max_uptime=1200 LKP_SERVER=internal-lkp-server selinux=0 debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 net.ifnames=0 printk.devkmsg=on panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0 drbd.minor_count=8 systemd.log_level=err ignore_loglevel console=tty0 earlyprintk=ttyS0,115200 console=ttyS0,115200 vga=normal rw rcuperf.shutdown=0 rcuscale.shutdown=0 refscale.shutdown=0 watchdog_thresh=240 audit=0 kunit.enable=0 ia32_emulation=on riscv_isa_fallback=1

Kboot worker: lkp-worker22
Elapsed time: 600


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240924/202409241436.b37a069e-oliver.sang@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Tuesday, September 24th, 2024 at 3:00 AM, kernel test robot <oliver.sang@intel.com> wrote:

> early crash happens 70 times out of 500 runs.
> for parent, keeps clean when we run same tests almost 1000 times.

Many thanks for this rigorous testing.  Would you mind using the current revision of this patch (attached) or accessible at my github linked below?

https://github.com/graysky2/kernel_compiler_patch

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Oliver Sang 1 year, 4 months ago

hi, John,

On Tue, Sep 24, 2024 at 05:40:36PM +0000, John wrote:
> On Tuesday, September 24th, 2024 at 3:00 AM, kernel test robot <oliver.sang@intel.com> wrote:
> 
> > early crash happens 70 times out of 500 runs.
> > for parent, keeps clean when we run same tests almost 1000 times.
> 
> Many thanks for this rigorous testing.  Would you mind using the current revision of this patch (attached)

with this version, we cannot reproduced the early crash issue.

like previous version (178c2862ab0388f7de1ca23b7b4), we still apply new version
upon 70ad4cfb4d4a9f97. we run tests up to 1000 times.

70ad4cfb4d4a9f97 178c2862ab0388f7de1ca23b7b4 e9725b726c3c0bd129959c308fb
---------------- --------------------------- ---------------------------
       fail:runs  %reproduction    fail:runs  %reproduction    fail:runs
           |             |             |             |             |
           :991          7%          70:500          0%            :1000  dmesg.BUG:kernel_failed_in_early-boot_stage,last_printk:early_console_in_setup_code


> or accessible at my github linked below?
> 
> https://github.com/graysky2/kernel_compiler_patch

> From 718155e6164b4bec45bcba8814c3f82e84f36db0 Mon Sep 17 00:00:00 2001
> From: graysky <therealgraysky AT proton DOT me>
> Date: Mon, 16 Sep 2024 14:47:03 -0400
> 
> FEATURES
> This patch adds additional tunings via new x86-64 ISA levels to the
> Linux kernel.
> 
> These are selectable under:
> 	Processor type and features ---> x86-64 compiler ISA level
> 
> ??? x86-64     A value of (1) is the default
> ??? x86-64-v2  A value of (2) brings support for vector
>              instructions up to Streaming SIMD Extensions 4.2 (SSE4.2)
> 	     and Supplemental Streaming SIMD Extensions 3 (SSSE3), the
> 	     POPCNT instruction, and CMPXCHG16B.
> ??? x86-64-v3  A value of (3) adds vector instructions up to AVX2, MOVBE,
>              and additional bit-manipulation instructions.
> 
> There is also x86-64-v4 but including this makes little sense as
> the kernel does not use any of the AVX512 instructions anyway.
> 
> Users of glibc 2.33 and above can see which level is supported by running:
> 	/lib/ld-linux-x86-64.so.2 --help | grep supported
> Or
> 	/lib64/ld-linux-x86-64.so.2 --help | grep supported
> 
> BENEFITS
> Small but real speed increases are measurable using a make endpoint comparing
> a generic kernel to one built with one of the respective microarchs.
> 
> See the following experimental evidence supporting this statement:
> https://github.com/graysky2/kernel_compiler_patch?tab=readme-ov-file#benchmarks
> 
> REQUIREMENTS
> linux version 6.8-rc3+
> gcc version >=9.0 or clang version >=9.0
> 
> ACKNOWLEDGMENTS
> This patch builds on the seminal work by Jeroen.[2]
> 
> REFERENCES
> 1.  https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
> 2.  http://www.linuxforge.net/docs/linux/linux-gcc.php
> 
> ---
>  arch/x86/Kconfig.cpu | 24 ++++++++++++++++++++++++
>  arch/x86/Makefile    | 11 +++++++++--
>  2 files changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
> index 2a7279d80460..562a273be222 100644
> --- a/arch/x86/Kconfig.cpu
> +++ b/arch/x86/Kconfig.cpu
> @@ -308,6 +308,30 @@ config X86_GENERIC
>  	  This is really intended for distributors who need more
>  	  generic optimizations.
> 
> +config X86_64_VERSION
> +	int "x86-64 compiler ISA level"
> +	range 1 3
> +	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
> +	depends on X86_64 && GENERIC_CPU
> +	help
> +	  Specify a specific x86-64 compiler ISA level.
> +
> +	  There are three x86-64 ISA levels that work on top of
> +	  the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4.
> +
> +	  x86-64-v2 brings support for vector instructions up to Streaming SIMD
> +	  Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
> +	  (SSSE3), the POPCNT instruction, and CMPXCHG16B.
> +
> +	  x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional
> +	  bit-manipulation instructions.
> +
> +	  x86-64-v4 is not included since the kernel does not use AVX512 instructions
> +
> +	  You can find the best version for your CPU by running one of the following:
> +	  /lib/ld-linux-x86-64.so.2 --help | grep supported
> +	  /lib64/ld-linux-x86-64.so.2 --help | grep supported
> +
>  #
>  # Define implied options from the CPU selection here
>  config X86_INTERNODE_CACHE_SHIFT
> diff --git a/arch/x86/Makefile b/arch/x86/Makefile
> index 801fd85c3ef6..e1f88f846bed 100644
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -178,14 +178,21 @@ else
>          cflags-$(CONFIG_MPSC)		+= -march=nocona
>          cflags-$(CONFIG_MCORE2)		+= -march=core2
>          cflags-$(CONFIG_MATOM)		+= -march=atom
> -        cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
> +        ifeq ($(CONFIG_X86_64_VERSION),1)
> +          cflags-$(CONFIG_GENERIC_CPU)		+= -mtune=generic
> +          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
> +        else
> +          cflags-$(CONFIG_GENERIC_CPU)		+= -march=x86-64-v$(CONFIG_X86_64_VERSION)
> +          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
> +        endif
> +        cflags-$(CONFIG_MATOM) 	+= -march=bonnell
> +        cflags-$(CONFIG_MCORE2) 	+= -march=core2
>          KBUILD_CFLAGS += $(cflags-y)
> 
>          rustflags-$(CONFIG_MK8)		+= -Ctarget-cpu=k8
>          rustflags-$(CONFIG_MPSC)	+= -Ctarget-cpu=nocona
>          rustflags-$(CONFIG_MCORE2)	+= -Ctarget-cpu=core2
>          rustflags-$(CONFIG_MATOM)	+= -Ctarget-cpu=atom
> -        rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
>          KBUILD_RUSTFLAGS += $(rustflags-y)
> 
>          KBUILD_CFLAGS += -mno-red-zone
> --
> 2.46.1
>

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Dave Hansen 1 year, 4 months ago

On 9/24/24 00:00, kernel test robot wrote:
> we don't have enough knowledge how this commit causing the random
> early crash issue as report below.
> 
> we noticed the config has below diff comparing to parent....
> +# CONFIG_MAMD_CPU_V2 is not set
> +# CONFIG_MAMD_CPU_V3 is not set
> +CONFIG_MAMD_CPU_V4=y
> +# CONFIG_MINTEL_CPU_V2 is not set
> +# CONFIG_MINTEL_CPU_V3 is not set
> +# CONFIG_MINTEL_CPU_V4 is not set

Clang is probably being induced to use some ISA that isn't supported on
Sandybridge.

In any case, I think this series is very unlikely to get applied.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by WangYuli 1 year, 4 months ago

Yes, specifying '-march=x86-64-v3' can indeed yield significant performance improvements for CPUs that support it. I could confirm this.

Please allow me a few days, as I will provide a detailed test data report from my own tests after my vacation.

Given that such submissions 'pop up' in the mailing list from time to time, I hope this time we can see it through.

We should have a broad discussion, comprehensive testing and a calm judgment until we reach a final conclusion on whether this modification brings more benefits or drawbacks.

Link: https://github.com/graysky2/kernel_compiler_patch/issues/100
Suggested-by: WangYuli <wangyuli@uniontech.com>
Tested-by: WangYuli <wangyuli@uniontech.com>

Best regards,
--
WangYuli

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Borislav Petkov 1 year, 4 months ago

On Sun, Sep 15, 2024 at 11:05:52AM +0000, John wrote:
> GCC 11.1 and Clang 12.0[1] allow for the following new generic
> 64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4.  This commit
> adds them as options accessible under:
>  Processor type and features  --->
>   Processor family --->
> 
> Users of glibc 2.33 and above can see which level is supported
> by running: /lib/ld-linux-x86-64.so.2 --help | grep supported
> 
> or: /lib64/ld-linux-x86-64.so.2 --help | grep supported
> 
> ACKNOWLEDGMENTS
> This patch builds on the seminal work by Jeroen.[2]
> 
> REFERENCES
> 1.  https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
> 2.  http://www.linuxforge.net/docs/linux/linux-gcc.php
> 
> Signed-off-by: John Audia <therealgraysky@proton.me>
> ---
>  arch/x86/Kconfig.cpu | 60 +++++++++++++++++++++++++++++++++++++++-----
>  arch/x86/Makefile    |  6 +++++
>  2 files changed, 60 insertions(+), 6 deletions(-)

Patches like this one appear off and on on the mailing list and each
time I ask what's the upside of maintaining this complexity?

And everytime I get no reply or random handwaving. That's because -march
settings have no noticeable effect on kernel code generation. Because
the kernel code is already pretty much optimized when generated by the
compiler and all those flavors don't bring anything additional.

So this is not going anywhere. But hey, I'm always open to nice
surprises...

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Hanabishi 1 year, 4 months ago

On 9/15/24 12:49, Borislav Petkov wrote:
> Patches like this one appear off and on on the mailing list and each
> time I ask what's the upside of maintaining this complexity?

Besides, there are already well-known patches exist for years. So why reinventing the wheel here?

E.g. graysky patch used by ZEN kernel:
https://github.com/zen-kernel/zen-kernel/commit/6f32b8af8ccdb56ef2856db3631eea55b79378c6
It contains way more architectures, includig ISA levels.

On 9/15/24 11:05, John wrote:
> GCC 11.1 and Clang 12.0[1] allow for the following new generic
> 64-bit levels: x86-64-v2, x86-64-v3, and x86-64-v4.  This commit
> adds them as options accessible under:
>   Processor type and features  --->
>    Processor family --->

Anyway, this whole thing is actually more complicated than simply setting '-march'.
Vector instructions are known to be problematic for the kernel, so they are disabled by KBUILD_CFLAGS.
If you want to go with higher ISA levels than the kernel expects, an additional patch like this is required:
https://github.com/zen-kernel/zen-kernel/commit/addc601c58e035e28153deeb6d441b91f1a50247

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Tuesday, September 17th, 2024 at 2:45 PM, H. Peter Anvin <hpa@zytor.com> wrote:

> On September 17, 2024 8:22:38 PM GMT+02:00, John 
> Also, these are not uarch levels, they are ISA levels...

Thank you for pointing that out.  I see now the differences between ISA levels and uarches.

> Besides, there are already well-known patches exist for years. So why reinventing the wheel here?
> 
> E.g. graysky patch used by ZEN kernel:
> https://github.com/zen-kernel/zen-kernel/commit/6f32b8af8ccdb56ef2856db3631eea55b79378c6
> It contains way more architectures, includig ISA levels.

Yes, that is my git repo.  I created the subset (just -march=x86-64-v[2,3,4]) patch specifically to post on lkml thinking that the larger patch with all of the uarches would be too complex.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Hanabishi 1 year, 4 months ago

On 9/19/24 00:02, John wrote:
> Yes, that is my git repo.

Oops, I didn't realize that.

Even better then! Could you please explain where the performance gains should come from, considering that the kernel force disables all SIMD extensions?
https://github.com/torvalds/linux/blob/4a39ac5b7d62679c07a3e3d12b0f6982377d8a7d/arch/x86/Makefile#L67-L80

I.e. if we won't have them anyway, what gives?

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Wednesday, September 18th, 2024 at 4:25 PM, Hanabishi <i.r.e.c.c.a.k.u.n+kernel.org@gmail.com> wrote:

> Even better then! Could you please explain where the performance gains should come from, considering that the kernel force disables all SIMD extensions?
> https://github.com/torvalds/linux/blob/4a39ac5b7d62679c07a3e3d12b0f6982377d8a7d/arch/x86/Makefile#L67-L80
> 
> I.e. if we won't have them anyway, what gives?

I am not sure.  Are some of the other things -march=-x86-64-v3 driving them?  I will say that these timed benchmarks have been consistently reproducible for me.  My code for the benchmark script is in that github repo as well if you would like to give it a whirl.

As to the code you referenced re: disabling the SIMD extensions.  Do you know why that is in place?

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Hanabishi 1 year, 4 months ago

On 9/18/24 21:14, John wrote:
> I am not sure.  Are some of the other things -march=-x86-64-v3 driving them?

Looking up a full table, v3 adds more than just AVX.

x86-64-v2:
CMPXCHG16B
LAHF-SAHF
POPCNT
SSE3
SSE4_1
SSE4_2
SSSE3

x86-64-v3:
AVX
AVX2
BMI1
BMI2
F16C
FMA
LZCNT
MOVBE
OSXSAVE

x86-64-v4:
AVX512F
AVX512BW
AVX512CD
AVX512DQ
AVX512VL

Maybe some other enabled instructions could issue some benefit.

v4 seems to be useless for us though.

> As to the code you referenced re: disabling the SIMD extensions.  Do you know why that is in place?

Not really. There is a link above pointing to a bug report discussing GCC quirks. I am not an expert in that.

One day, out of curiosity, I tried to override it and build the kernel with '-mavx' (free performance, yay!).
Well, it didn't even start and crashed immediately.

I don't know if something has changed since then, but I guess there are reasons.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Wednesday, September 18th, 2024 at 5:48 PM, Hanabishi <i.r.e.c.c.a.k.u.n+kernel.org@gmail.com> wrote:

> One day, out of curiosity, I tried to override it and build the kernel with '-mavx' (free performance, yay!).
> Well, it didn't even start and crashed immediately.
> 
> I don't know if something has changed since then, but I guess there are reasons.

I also tried commenting out the entire line.  I too was able to boot into the kernel but it just rebooted before the login screen.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Dave Hansen 1 year, 4 months ago

On 9/15/24 05:49, Borislav Petkov wrote:
> So this is not going anywhere. But hey, I'm always open to nice
> surprises...

Oh, gah, and I just realized that this is doing "-march" and not
"-mtune".  So this really can build binaries that won't even run on
older CPUs.

That's just mean.

So there needs to be a lot more justification before we go down this road.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Dave Hansen 1 year, 4 months ago

On 9/15/24 04:05, John wrote:
> +config MAMD_CPU_V2
> +	bool "AMD x86-64-v2"
> +	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
> +	depends on X86_64
> +	help
> +	  AMD x86-64 CPU with v2 instructions.
> +	  Run equally well on all AMD x86-64 CPUs with min support of -march=x86-64-v2.

If these are going to be exposed to end users, we need *some* kind of
help text that helps end users select among these options and what the
pitfalls are.

I actually don't have the foggiest idea what an "AMD x86-64 CPU with v2
instructions" even is.  Even saying "AMD x86-64 CPU" isn't super helpful
because "AMD x86_64" is kinda a generic way to refer to all the 64-bit
x86 CPUs, Intel included.

I assume that the compilers have grouped the CPUs into epochs that have
some similarity.  That's great and all, but we need to tell users what
those are.

Why are there v4's for both AMD and Intel that do the exact same thing?

+        cflags-$(CONFIG_MAMD_CPU_V4)	+= -march=x86-64-v4
...
+        cflags-$(CONFIG_MINTEL_CPU_V4)	+= -march=x86-64-v4

Why is this copied and pasted six times?

+	depends on (CC_IS_GCC && GCC_VERSION > 110000)...

I'm also _kinda_ surprised we don't have some kind of Kconfig option to
just pass random flags into the compiler.  That would be another way to
do this.  That would also be a, maybe, 10-line patch.

Alternatively, anyone wanting to do this could just hack their makefile
or (I assume) pass CFLAGS= into the build command-line.  Why is
something like that insufficient.

In the *WORST* case, we shouldn't be doing this with bools.  Do this:

config X86_MARCH_VER
	int "Compiler Micro-Architecture Level"
	range 2 4
	depends on (CC_IS_GCC   && GCC_VERSION   >  110000) ||
                   (CC_IS_CLANG && CLANG_VERSION >= 120000)
	depends on EXPERT
	depends on X86_64
	help
	  Specify a specific compiler "micro-architecture" version.
	  You might want to do this when...
	  You can find the best version for your CPU here...
	  The pitfalls of this option are...

Then you can do fun like:

 config X86_L1_CACHE_SHIFT
 	int
	default "7" if MPENTIUM4 || MPSC
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || ...
+		       X86_MARCH_VER >= 2

which has the added advantage of never needing to be touched when v5
gets added.

Oh, and this:

>  config X86_HAVE_PAE
>  	def_bool y
> -	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
> +	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4

is rather silly when M*_CPU_V* all:

	depends on X86_64

right?

So, taking a step back: Please convince us that this is something we
want to expose to end users in the first place, as opposed to having
them hack makefiles or just allowing users a string instead of using the
existing CONFIG_M* Kconfig options.

Then, we can discuss the structure of these options.  Should these
"versions" be new "Processor family" options?  Or, should they be
_instead_ of selecting a "Processor family"

Then, should the new Kconfig options be a series of bools, or an int?

Last, how do we deal with multiple vendors?  Or do we need it at all?
I'm not actually sure at all why this has the AMD versus Intel
distinction at all.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Sunday, September 15th, 2024 at 7:40 AM, Dave Hansen <dave.hansen@intel.com> wrote:

> In the WORST case, we shouldn't be doing this with bools. Do this:
> 
> config X86_MARCH_VER
> int "Compiler Micro-Architecture Level"
> range 2 4
> depends on (CC_IS_GCC && GCC_VERSION > 110000) ||
> 
> (CC_IS_CLANG && CLANG_VERSION >= 120000)
> 
> depends on EXPERT
> depends on X86_64
> help
> Specify a specific compiler "micro-architecture" version.
> You might want to do this when...
> You can find the best version for your CPU here...
> The pitfalls of this option are...
> 
> Then you can do fun like:
> 
> config X86_L1_CACHE_SHIFT
> int
> default "7" if MPENTIUM4 || MPSC
> + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || ...
> + X86_MARCH_VER >= 2
> 
> 
> which has the added advantage of never needing to be touched when v5
> gets added.

I like this approach much better, it is more streamlined and clean.  I ran with your suggestions and the attached seems to work.  I am grateful for my feedback and suggestions on the syntax.


---
 arch/x86/Kconfig.cpu | 27 +++++++++++++++++++--------
 arch/x86/Makefile    |  9 +++++++--
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2a7279d80460..2b24574f6ac5 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -308,6 +308,17 @@ config X86_GENERIC
 	  This is really intended for distributors who need more
 	  generic optimizations.
 
+config X86_MARCH_VER
+	int "Compiler Micro-Architecture Level"
+	range 1 4
+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+	depends on X86_64
+	help
+	  Specify a specific compiler "micro-architecture" version.
+	  You might want to do this when...
+	  You can find the best version for your CPU here...
+	  The pitfalls of this option are...
+
 #
 # Define implied options from the CPU selection here
 config X86_INTERNODE_CACHE_SHIFT
@@ -318,7 +329,7 @@ config X86_INTERNODE_CACHE_SHIFT
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || X86_MARCH_VER >= 2
 	default "4" if MELAN || M486SX || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
@@ -336,11 +347,11 @@ config X86_ALIGNMENT_16
 
 config X86_INTEL_USERCOPY
 	def_bool y
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || X86_MARCH_VER >= 2
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || X86_MARCH_VER >= 2
 
 #
 # P6_NOPs are a relatively minor optimization that require a family >=
@@ -356,15 +367,15 @@ config X86_USE_PPRO_CHECKSUM
 config X86_P6_NOP
 	def_bool y
 	depends on X86_64
-	depends on (MCORE2 || MPENTIUM4 || MPSC)
+	depends on (MCORE2 || MPENTIUM4 || MPSC || X86_MARCH_VER >= 2)
 
 config X86_TSC
 	def_bool y
-	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || X86_MARCH_VER >= 2) || X86_64
 
 config X86_HAVE_PAE
 	def_bool y
-	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
+	depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || X86_MARCH_VER >= 2
 
 config X86_CMPXCHG64
 	def_bool y
@@ -374,12 +385,12 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || X86_MARCH_VER >= 2)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
 	default "64" if X86_64
-	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
+	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8 || X86_MARCH_VER >= 2)
 	default "5" if X86_32 && X86_CMPXCHG64
 	default "4"
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 801fd85c3ef6..e2d0d156a919 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -178,14 +178,19 @@ else
         cflags-$(CONFIG_MPSC)		+= -march=nocona
         cflags-$(CONFIG_MCORE2)		+= -march=core2
         cflags-$(CONFIG_MATOM)		+= -march=atom
-        cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
+        ifeq ($(CONFIG_X86_MARCH_VER),1)
+          cflags-$(CONFIG_GENERIC_CPU)		+= -mtune=generic
+          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
+        else
+          cflags-$(CONFIG_GENERIC_CPU)		+= -march=x86-64-v$(CONFIG_X86_MARCH_VER)
+          rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=x86-64-v$(CONFIG_X86_MARCH_VER)
+        endif
         KBUILD_CFLAGS += $(cflags-y)
 
         rustflags-$(CONFIG_MK8)		+= -Ctarget-cpu=k8
         rustflags-$(CONFIG_MPSC)	+= -Ctarget-cpu=nocona
         rustflags-$(CONFIG_MCORE2)	+= -Ctarget-cpu=core2
         rustflags-$(CONFIG_MATOM)	+= -Ctarget-cpu=atom
-        rustflags-$(CONFIG_GENERIC_CPU)	+= -Ztune-cpu=generic
         KBUILD_RUSTFLAGS += $(rustflags-y)
 
         KBUILD_CFLAGS += -mno-red-zone
-- 
2.46.1

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Sunday, September 15th, 2024 at 2:42 PM, John wrote:
> I like this approach much better, it is more streamlined and clean. I ran with your suggestions and the attached seems to work. I am grateful for my feedback and suggestions on the syntax.

I pushed my draft incorporating your suggestions out to my github at the following link.  I am going to unsubscribe from lkml now (hundreds of emails per day) so please cc me on any replies or use the github.  Thanks.

https://github.com/graysky2/kernel_compiler_patch/blob/master/lite-more-uarches-for-kernel-6.8-rc4%2B.patch

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by H. Peter Anvin 1 year, 4 months ago

On September 17, 2024 8:22:38 PM GMT+02:00, John <therealgraysky@proton.me> wrote:
>On Sunday, September 15th, 2024 at 2:42 PM, John wrote:
>> I like this approach much better, it is more streamlined and clean. I ran with your suggestions and the attached seems to work. I am grateful for my feedback and suggestions on the syntax.
>
>I pushed my draft incorporating your suggestions out to my github at the following link.  I am going to unsubscribe from lkml now (hundreds of emails per day) so please cc me on any replies or use the github.  Thanks.
>
>https://github.com/graysky2/kernel_compiler_patch/blob/master/lite-more-uarches-for-kernel-6.8-rc4%2B.patch
>

Also, these are *not* uarch levels, they are ISA levels...

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by John 1 year, 4 months ago

On Sunday, September 15th, 2024 at 7:40 AM, Dave Hansen <dave.hansen@intel.com> wrote:
> If these are going to be exposed to end users, we need some kind of
> help text that helps end users select among these options and what the
> pitfalls are.
> 
> I actually don't have the foggiest idea what an "AMD x86-64 CPU with v2
> instructions" even is. Even saying "AMD x86-64 CPU" isn't super helpful
> because "AMD x86_64" is kinda a generic way to refer to all the 64-bit
> x86 CPUs, Intel included.

> Why are there v4's for both AMD and Intel that do the exact same thing?

I did it this way to selectively include the AMD-specific and Intel-specific membership in the config options below.  For example, the AMD options should be included in the X86_INTEL_USERCOPY config.

> Why is this copied and pasted six times?
> 
> + depends on (CC_IS_GCC && GCC_VERSION > 110000)...

I believe the version requirement is needed for each of these new options.  Please correct me if I am mistaken.

> Alternatively, anyone wanting to do this could just hack their makefile
> or (I assume) pass CFLAGS= into the build command-line. Why is
> something like that insufficient.

I believe this would work:
export KCFLAGS=' -march=x86-64-v3'
export KCPPFLAGS=' -march=x86-64-v3'

> > config X86_HAVE_PAE
> > def_bool y
> > - depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
> > + depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64 || MAMD_CPU_V2 || MAMD_CPU_V3 || MAMD_CPU_V4 || MINTEL_CPU_V2 || MINTEL_CPU_V3 || MINTEL_CPU_V4
> 
> 
> is rather silly when M*_CPU_V* all:
> 
> depends on X86_64
> 
> right?

True!
 
> So, taking a step back: Please convince us that this is something we
> want to expose to end users in the first place, as opposed to having
> them hack makefiles or just allowing users a string instead of using the
> existing CONFIG_M* Kconfig options.

This was just the logical extension of the already included and now antiquated options, for example pentium-mmx, k6, etc.

Re: [PATCH] x86: add more x86-64 micro-architecture levels

Posted by Dave Hansen 1 year, 4 months ago

On 9/15/24 05:25, John wrote:
>> Why is this copied and pasted six times?
>> 
>> + depends on (CC_IS_GCC && GCC_VERSION > 110000)...
> I believe the version requirement is needed for each of these new
> options.  Please correct me if I am mistaken.

The requirement is fine.

But copying and pasting the same string without refactoring it is not.
You should refactor it:

bool SUPPORT_MARCH_CODEVERS
	depends on (CC_IS_GCC && GCC_VERSION > 110000)...
	depends on X86_64

and then have each site do this:

+config MINTEL_CPU_V4
+	bool "Intel x86-64-v4"
+	depends on SUPPORT_MARCH_CODEVERS
+	help
...

>> Why are there v4's for both AMD and Intel that do the exact same
>> thing?
> 
> I did it this way to selectively include the AMD-specific and
> Intel-specific membership in the config options below.  For example,
> the AMD options should be included in the X86_INTEL_USERCOPY config.

I think you mean "the AMD options should *not* be included..."

...
>> Alternatively, anyone wanting to do this could just hack their makefile
>> or (I assume) pass CFLAGS= into the build command-line. Why is
>> something like that insufficient.
> 
> I believe this would work:
> export KCFLAGS=' -march=x86-64-v3'
> export KCPPFLAGS=' -march=x86-64-v3'

So why not just have users do that?

>> So, taking a step back: Please convince us that this is something we
>> want to expose to end users in the first place, as opposed to having
>> them hack makefiles or just allowing users a string instead of using the
>> existing CONFIG_M* Kconfig options.
> 
> This was just the logical extension of the already included and now
> antiquated options, for example pentium-mmx, k6, etc.
It's probably best not to extend that beast.  It really is a relic of
the past and, practically, all of our 64-bit builds are GENERIC_CPU=y
and have been for a long time.  We've moved away from the old days where
you could easily compile a kernel that didn't boot.

We're basically handing our users a big long piece of rope with which to
hang themselves here.  This patch makes it easy and doesn't do a great
job of explaining why they'd take the risk or what the benefit is.

I don't think we should do this.