Add a subsection to the percpu data for frequently accessed variables
that should remain cached on each processor. These varables should not
be accessed from other processors to avoid cacheline bouncing.
This will replace the pcpu_hot struct on x86, and open up similar
functionality to other architectures and the kernel core.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
include/asm-generic/vmlinux.lds.h | 10 ++++++++++
include/linux/percpu-defs.h | 12 ++++++++++++
2 files changed, 22 insertions(+)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 92fc06f7da74..92dd6065fd0a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
. = ALIGN(PAGE_SIZE); \
__nosave_end = .;
+#define CACHE_HOT_DATA(align) \
+ . = ALIGN(align); \
+ *(SORT_BY_ALIGNMENT(.data..hot.*)) \
+ . = ALIGN(align);
+
#define PAGE_ALIGNED_DATA(page_align) \
. = ALIGN(page_align); \
*(.data..page_aligned) \
@@ -1065,6 +1070,10 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
+ __per_cpu_hot_start = .; \
+ *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \
+ . = ALIGN(cacheline); \
+ __per_cpu_hot_end = .; \
*(.data..percpu..read_mostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
@@ -1112,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
INIT_TASK_DATA(inittask) \
NOSAVE_DATA \
PAGE_ALIGNED_DATA(pagealigned) \
+ CACLE_HOT_DATA(cacheline) \
CACHELINE_ALIGNED_DATA(cacheline) \
READ_MOSTLY_DATA(cacheline) \
DATA_DATA \
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 40d34e032d5b..eb3393f96e5a 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -112,6 +112,18 @@
#define DEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "")
+/*
+ * Declaration/definition used for per-CPU variables that are frequently
+ * accessed and should be in a single cacheline.
+ *
+ * For use only by architecture and core code.
+ */
+#define DECLARE_PER_CPU_CACHE_HOT(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name)
+
+#define DEFINE_PER_CPU_CACHE_HOT(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name)
+
/*
* Declaration/definition used for per-CPU variables that must be cacheline
* aligned under SMP conditions so that, whilst a particular instance of the
--
2.48.1
Hi Brian, kernel test robot noticed the following build errors: [auto build test ERROR on 79165720f31868d9a9f7e5a50a09d5fe510d1822] url: https://github.com/intel-lab-lkp/linux/commits/Brian-Gerst/percpu-Introduce-percpu-hot-section/20250227-021212 base: 79165720f31868d9a9f7e5a50a09d5fe510d1822 patch link: https://lore.kernel.org/r/20250226180531.1242429-2-brgerst%40gmail.com patch subject: [PATCH v2 01/11] percpu: Introduce percpu hot section config: arm64-allnoconfig (https://download.01.org/0day-ci/archive/20250228/202502280328.SFEgOJ50-lkp@intel.com/config) compiler: aarch64-linux-gcc (GCC) 14.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250228/202502280328.SFEgOJ50-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202502280328.SFEgOJ50-lkp@intel.com/ All errors (new ones prefixed by >>): >> aarch64-linux-ld:./arch/arm64/kernel/vmlinux.lds:107: syntax error -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Brian, kernel test robot noticed the following build errors: [auto build test ERROR on 79165720f31868d9a9f7e5a50a09d5fe510d1822] url: https://github.com/intel-lab-lkp/linux/commits/Brian-Gerst/percpu-Introduce-percpu-hot-section/20250227-021212 base: 79165720f31868d9a9f7e5a50a09d5fe510d1822 patch link: https://lore.kernel.org/r/20250226180531.1242429-2-brgerst%40gmail.com patch subject: [PATCH v2 01/11] percpu: Introduce percpu hot section config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20250227/202502272142.2EFoWquv-lkp@intel.com/config) compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250227/202502272142.2EFoWquv-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202502272142.2EFoWquv-lkp@intel.com/ All errors (new ones prefixed by >>): >> s390x-linux-ld: cannot find CACLE_HOT_DATA: No such file or directory -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Wed, Feb 26, 2025 at 7:05 PM Brian Gerst <brgerst@gmail.com> wrote: > > Add a subsection to the percpu data for frequently accessed variables > that should remain cached on each processor. These varables should not > be accessed from other processors to avoid cacheline bouncing. > > This will replace the pcpu_hot struct on x86, and open up similar > functionality to other architectures and the kernel core. > > Signed-off-by: Brian Gerst <brgerst@gmail.com> > --- > include/asm-generic/vmlinux.lds.h | 10 ++++++++++ > include/linux/percpu-defs.h | 12 ++++++++++++ > 2 files changed, 22 insertions(+) > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > index 92fc06f7da74..92dd6065fd0a 100644 > --- a/include/asm-generic/vmlinux.lds.h > +++ b/include/asm-generic/vmlinux.lds.h > @@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > . = ALIGN(PAGE_SIZE); \ > __nosave_end = .; > > +#define CACHE_HOT_DATA(align) \ > + . = ALIGN(align); \ > + *(SORT_BY_ALIGNMENT(.data..hot.*)) \ > + . = ALIGN(align); > + > #define PAGE_ALIGNED_DATA(page_align) \ > . = ALIGN(page_align); \ > *(.data..page_aligned) \ > @@ -1065,6 +1070,10 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > . = ALIGN(PAGE_SIZE); \ > *(.data..percpu..page_aligned) \ > . = ALIGN(cacheline); \ > + __per_cpu_hot_start = .; \ > + *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \ > + . = ALIGN(cacheline); \ > + __per_cpu_hot_end = .; \ > *(.data..percpu..read_mostly) \ > . = ALIGN(cacheline); \ > *(.data..percpu) \ > @@ -1112,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > INIT_TASK_DATA(inittask) \ > NOSAVE_DATA \ > PAGE_ALIGNED_DATA(pagealigned) \ > + CACLE_HOT_DATA(cacheline) \ There is a typo in the above macro name. Uros. > CACHELINE_ALIGNED_DATA(cacheline) \ > READ_MOSTLY_DATA(cacheline) \ > DATA_DATA \ > diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h > index 40d34e032d5b..eb3393f96e5a 100644 > --- a/include/linux/percpu-defs.h > +++ b/include/linux/percpu-defs.h > @@ -112,6 +112,18 @@ > #define DEFINE_PER_CPU(type, name) \ > DEFINE_PER_CPU_SECTION(type, name, "") > > +/* > + * Declaration/definition used for per-CPU variables that are frequently > + * accessed and should be in a single cacheline. > + * > + * For use only by architecture and core code. > + */ > +#define DECLARE_PER_CPU_CACHE_HOT(type, name) \ > + DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name) > + > +#define DEFINE_PER_CPU_CACHE_HOT(type, name) \ > + DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name) > + > /* > * Declaration/definition used for per-CPU variables that must be cacheline > * aligned under SMP conditions so that, whilst a particular instance of the > -- > 2.48.1 >
On Wed, Feb 26, 2025 at 2:36 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Wed, Feb 26, 2025 at 7:05 PM Brian Gerst <brgerst@gmail.com> wrote: > > > > Add a subsection to the percpu data for frequently accessed variables > > that should remain cached on each processor. These varables should not > > be accessed from other processors to avoid cacheline bouncing. > > > > This will replace the pcpu_hot struct on x86, and open up similar > > functionality to other architectures and the kernel core. > > > > Signed-off-by: Brian Gerst <brgerst@gmail.com> > > --- > > include/asm-generic/vmlinux.lds.h | 10 ++++++++++ > > include/linux/percpu-defs.h | 12 ++++++++++++ > > 2 files changed, 22 insertions(+) > > > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > > index 92fc06f7da74..92dd6065fd0a 100644 > > --- a/include/asm-generic/vmlinux.lds.h > > +++ b/include/asm-generic/vmlinux.lds.h > > @@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > > . = ALIGN(PAGE_SIZE); \ > > __nosave_end = .; > > > > +#define CACHE_HOT_DATA(align) \ > > + . = ALIGN(align); \ > > + *(SORT_BY_ALIGNMENT(.data..hot.*)) \ > > + . = ALIGN(align); > > + > > #define PAGE_ALIGNED_DATA(page_align) \ > > . = ALIGN(page_align); \ > > *(.data..page_aligned) \ > > @@ -1065,6 +1070,10 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > > . = ALIGN(PAGE_SIZE); \ > > *(.data..percpu..page_aligned) \ > > . = ALIGN(cacheline); \ > > + __per_cpu_hot_start = .; \ > > + *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \ > > + . = ALIGN(cacheline); \ > > + __per_cpu_hot_end = .; \ > > *(.data..percpu..read_mostly) \ > > . = ALIGN(cacheline); \ > > *(.data..percpu) \ > > @@ -1112,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) > > INIT_TASK_DATA(inittask) \ > > NOSAVE_DATA \ > > PAGE_ALIGNED_DATA(pagealigned) \ > > + CACLE_HOT_DATA(cacheline) \ > > There is a typo in the above macro name. Fixed in the next version. Brian Gerst
© 2016 - 2026 Red Hat, Inc.