[PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations

Vishal Chourasia posted 2 patches 1 month ago
There is a newer version of this series
[PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
Posted by Vishal Chourasia 1 month ago
Expedite synchronize_rcu during the SMT mode switch operation when
initiated via /sys/devices/system/cpu/smt/control interface

SMT mode switch operation i.e. between SMT 8 to SMT 1 or vice versa and
others, are user driven operations and therefore should complete as soon
as possible. Switching SMT states involves iterating over a list of CPUs
and performing hotplug operations. It was found these transitions took
significantly large amount of time to complete particularly on
high-core-count systems because system was blocked on synchronize_rcu
calls.

Below is one of the call-stacks that accounted for most of the blocking
time overhead as reported by offcputime bcc script for CPU offline
operation,

    finish_task_switch
    __schedule
    schedule
    schedule_timeout
    wait_for_completion
    __wait_rcu_gp
    synchronize_rcu
    cpuidle_uninstall_idle_handler
    powernv_cpuidle_cpu_dead
    cpuhp_invoke_callback
    __cpuhp_invoke_callback_range
    _cpu_down
    cpu_device_down
    cpu_subsys_offline
    device_offline
    online_store
    dev_attr_store
    sysfs_kf_write
    kernfs_fop_write_iter
    vfs_write
    ksys_write
    system_call_exception
    system_call_common
   -                bash (29705)
        5771569  ------------------------>  Duration (us)

Signed-off-by: Vishal Chourasia <vishalc@linux.ibm.com>
---
 include/linux/rcupdate.h | 3 +++
 kernel/cpu.c             | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7729fef249e1..f12d0d0f008d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,6 +1190,9 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
 extern int rcu_expedited;
 extern int rcu_normal;
 
+extern void rcu_expedite_gp(void);
+extern void rcu_unexpedite_gp(void);
+
 DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
 DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU))
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index edaa37419036..f5517d64d3f3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2683,6 +2683,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 		ret = -EBUSY;
 		goto out;
 	}
+	rcu_expedite_gp();
 	/* Hold cpus_write_lock() for entire batch operation. */
 	cpus_write_lock();
 	for_each_online_cpu(cpu) {
@@ -2715,6 +2716,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 	if (!ret)
 		cpu_smt_control = ctrlval;
 	cpus_write_unlock();
+	rcu_unexpedite_gp();
 	arch_smt_update();
 out:
 	cpu_maps_update_done();
@@ -2734,6 +2736,7 @@ int cpuhp_smt_enable(void)
 	int cpu, ret = 0;
 
 	cpu_maps_update_begin();
+	rcu_expedite_gp();
 	/* Hold cpus_write_lock() for entire batch operation. */
 	cpus_write_lock();
 	cpu_smt_control = CPU_SMT_ENABLED;
@@ -2750,6 +2753,7 @@ int cpuhp_smt_enable(void)
 		cpuhp_online_cpu_device(cpu);
 	}
 	cpus_write_unlock();
+	rcu_unexpedite_gp();
 	arch_smt_update();
 	cpu_maps_update_done();
 	return ret;
-- 
2.53.0
Re: [PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
Posted by Shrikanth Hegde 1 month ago

On 2/16/26 5:49 PM, Vishal Chourasia wrote:
> Expedite synchronize_rcu during the SMT mode switch operation when
> initiated via /sys/devices/system/cpu/smt/control interface
> 
> SMT mode switch operation i.e. between SMT 8 to SMT 1 or vice versa and
> others, are user driven operations and therefore should complete as soon
> as possible. Switching SMT states involves iterating over a list of CPUs
> and performing hotplug operations. It was found these transitions took
> significantly large amount of time to complete particularly on
> high-core-count systems because system was blocked on synchronize_rcu
> calls.
> 
> Below is one of the call-stacks that accounted for most of the blocking
> time overhead as reported by offcputime bcc script for CPU offline
> operation,
> 
>      finish_task_switch
>      __schedule
>      schedule
>      schedule_timeout
>      wait_for_completion
>      __wait_rcu_gp
>      synchronize_rcu
>      cpuidle_uninstall_idle_handler
>      powernv_cpuidle_cpu_dead
>      cpuhp_invoke_callback
>      __cpuhp_invoke_callback_range
>      _cpu_down
>      cpu_device_down
>      cpu_subsys_offline
>      device_offline
>      online_store
>      dev_attr_store
>      sysfs_kf_write
>      kernfs_fop_write_iter
>      vfs_write
>      ksys_write
>      system_call_exception
>      system_call_common
>     -                bash (29705)
>          5771569  ------------------------>  Duration (us)
> 
> Signed-off-by: Vishal Chourasia <vishalc@linux.ibm.com>
> ---
>   include/linux/rcupdate.h | 3 +++
>   kernel/cpu.c             | 4 ++++
>   2 files changed, 7 insertions(+)
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 7729fef249e1..f12d0d0f008d 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -1190,6 +1190,9 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
>   extern int rcu_expedited;
>   extern int rcu_normal;
>   
> +extern void rcu_expedite_gp(void);
> +extern void rcu_unexpedite_gp(void);
> +
>   DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
>   DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU))
>   

IMHO, below maybe better.

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f12d0d0f008d..61b80c29d53b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,8 +1190,13 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, 
rcu_callback_t f)
  extern int rcu_expedited;
  extern int rcu_normal;

-extern void rcu_expedite_gp(void);
-extern void rcu_unexpedite_gp(void);
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_expedite_gp(void) { }
+static inline void rcu_unexpedite_gp(void) { }
+#else
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+#endif

  DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
  DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), 
__releases_shared(RCU))
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index dc5d614b372c..41a0d262e964 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -512,8 +512,6 @@ do { 
                        \
  static inline bool rcu_gp_is_normal(void) { return true; }
  static inline bool rcu_gp_is_expedited(void) { return false; }
  static inline bool rcu_async_should_hurry(void) { return false; }
-static inline void rcu_expedite_gp(void) { }
-static inline void rcu_unexpedite_gp(void) { }
  static inline void rcu_async_hurry(void) { }
  static inline void rcu_async_relax(void) { }
  static inline bool rcu_cpu_online(int cpu) { return true; }
@@ -521,8 +519,6 @@ static inline bool rcu_cpu_online(int cpu) { return 
true; }
  bool rcu_gp_is_normal(void);     /* Internal RCU use. */
  bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
  bool rcu_async_should_hurry(void);  /* Internal RCU use. */
-void rcu_expedite_gp(void);
-void rcu_unexpedite_gp(void);
  void rcu_async_hurry(void);
  void rcu_async_relax(void);
  void rcupdate_announce_bootup_oddness(void);
Re: [PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
Posted by kernel test robot 1 month ago
Hi Vishal,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20260213]
[also build test ERROR on linus/master]
[cannot apply to tip/smp/core rcu/rcu/dev v6.19 v6.19-rc8 v6.19-rc7 v6.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vishal-Chourasia/cpuhp-Optimize-SMT-switch-operation-by-batching-lock-acquisition/20260216-202247
base:   next-20260213
patch link:    https://lore.kernel.org/r/20260216121927.489062-6-vishalc%40linux.ibm.com
patch subject: [PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
config: arm-allnoconfig (https://download.01.org/0day-ci/archive/20260217/202602170049.WQD7Wcuj-lkp@intel.com/config)
compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project e86750b29fa0ff207cd43213d66dabe565417638)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260217/202602170049.WQD7Wcuj-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602170049.WQD7Wcuj-lkp@intel.com/

All errors (new ones prefixed by >>):

>> ld.lld: error: duplicate symbol: rcu_expedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_expedite_gp) in archive vmlinux.a
   >>> defined at srcutiny.c
   >>>            kernel/rcu/srcutiny.o:(.text+0x1) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: rcu_unexpedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_unexpedite_gp) in archive vmlinux.a
   >>> defined at srcutiny.c
   >>>            kernel/rcu/srcutiny.o:(.text+0x41) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: rcu_expedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_expedite_gp) in archive vmlinux.a
   >>> defined at tiny.c
   >>>            kernel/rcu/tiny.o:(.text+0x1) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: rcu_unexpedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_unexpedite_gp) in archive vmlinux.a
   >>> defined at tiny.c
   >>>            kernel/rcu/tiny.o:(.text+0x41) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: rcu_expedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_expedite_gp) in archive vmlinux.a
   >>> defined at slab_common.c
   >>>            mm/slab_common.o:(.text+0x1) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: rcu_unexpedite_gp
   >>> defined at update.c
   >>>            kernel/rcu/update.o:(rcu_unexpedite_gp) in archive vmlinux.a
   >>> defined at slab_common.c
   >>>            mm/slab_common.o:(.text+0x41) in archive vmlinux.a

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
Posted by kernel test robot 1 month ago
Hi Vishal,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20260213]
[also build test ERROR on linus/master]
[cannot apply to tip/smp/core rcu/rcu/dev v6.19 v6.19-rc8 v6.19-rc7 v6.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vishal-Chourasia/cpuhp-Optimize-SMT-switch-operation-by-batching-lock-acquisition/20260216-202247
base:   next-20260213
patch link:    https://lore.kernel.org/r/20260216121927.489062-6-vishalc%40linux.ibm.com
patch subject: [PATCH v2 2/2] cpuhp: Expedite RCU grace periods during SMT operations
config: alpha-allnoconfig (https://download.01.org/0day-ci/archive/20260217/202602170042.5jdtaQeu-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260217/202602170042.5jdtaQeu-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602170042.5jdtaQeu-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from kernel/rcu/update.c:50:
>> kernel/rcu/rcu.h:515:20: error: static declaration of 'rcu_expedite_gp' follows non-static declaration
     515 | static inline void rcu_expedite_gp(void) { }
         |                    ^~~~~~~~~~~~~~~
   In file included from include/linux/rbtree.h:24,
                    from include/linux/mm_types.h:11,
                    from include/linux/mmzone.h:22,
                    from include/linux/topology.h:34,
                    from include/linux/irq.h:19,
                    from include/asm-generic/hardirq.h:17,
                    from arch/alpha/include/asm/hardirq.h:8,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from kernel/rcu/update.c:25:
   include/linux/rcupdate.h:1193:13: note: previous declaration of 'rcu_expedite_gp' with type 'void(void)'
    1193 | extern void rcu_expedite_gp(void);
         |             ^~~~~~~~~~~~~~~
>> kernel/rcu/rcu.h:516:20: error: static declaration of 'rcu_unexpedite_gp' follows non-static declaration
     516 | static inline void rcu_unexpedite_gp(void) { }
         |                    ^~~~~~~~~~~~~~~~~
   include/linux/rcupdate.h:1194:13: note: previous declaration of 'rcu_unexpedite_gp' with type 'void(void)'
    1194 | extern void rcu_unexpedite_gp(void);
         |             ^~~~~~~~~~~~~~~~~


vim +/rcu_expedite_gp +515 kernel/rcu/rcu.h

2b34c43cc1671c Paul E. McKenney        2017-03-14  509  
25c36329a30c8c Paul E. McKenney        2017-05-03  510  #ifdef CONFIG_TINY_RCU
25c36329a30c8c Paul E. McKenney        2017-05-03  511  /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
7414fac050d5e0 Paul E. McKenney        2017-06-12  512  static inline bool rcu_gp_is_normal(void) { return true; }
7414fac050d5e0 Paul E. McKenney        2017-06-12  513  static inline bool rcu_gp_is_expedited(void) { return false; }
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  514) static inline bool rcu_async_should_hurry(void) { return false; }
7414fac050d5e0 Paul E. McKenney        2017-06-12 @515  static inline void rcu_expedite_gp(void) { }
7414fac050d5e0 Paul E. McKenney        2017-06-12 @516  static inline void rcu_unexpedite_gp(void) { }
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  517) static inline void rcu_async_hurry(void) { }
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  518) static inline void rcu_async_relax(void) { }
2be4686d866ad5 Frederic Weisbecker     2023-10-27  519  static inline bool rcu_cpu_online(int cpu) { return true; }
25c36329a30c8c Paul E. McKenney        2017-05-03  520  #else /* #ifdef CONFIG_TINY_RCU */
25c36329a30c8c Paul E. McKenney        2017-05-03  521  bool rcu_gp_is_normal(void);     /* Internal RCU use. */
25c36329a30c8c Paul E. McKenney        2017-05-03  522  bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  523) bool rcu_async_should_hurry(void);  /* Internal RCU use. */
25c36329a30c8c Paul E. McKenney        2017-05-03  524  void rcu_expedite_gp(void);
25c36329a30c8c Paul E. McKenney        2017-05-03  525  void rcu_unexpedite_gp(void);
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  526) void rcu_async_hurry(void);
6efdda8bec2900 Joel Fernandes (Google  2023-01-12  527) void rcu_async_relax(void);
25c36329a30c8c Paul E. McKenney        2017-05-03  528  void rcupdate_announce_bootup_oddness(void);
2be4686d866ad5 Frederic Weisbecker     2023-10-27  529  bool rcu_cpu_online(int cpu);
474d0997361c07 Paul E. McKenney        2021-04-20  530  #ifdef CONFIG_TASKS_RCU_GENERIC
e21408ceec2de5 Paul E. McKenney        2020-03-16  531  void show_rcu_tasks_gp_kthreads(void);
e0a34641eb551e Arnd Bergmann           2023-06-09  532  #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
e0a34641eb551e Arnd Bergmann           2023-06-09  533  static inline void show_rcu_tasks_gp_kthreads(void) {}
e0a34641eb551e Arnd Bergmann           2023-06-09  534  #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
e0a34641eb551e Arnd Bergmann           2023-06-09  535  #endif /* #else #ifdef CONFIG_TINY_RCU */
e0a34641eb551e Arnd Bergmann           2023-06-09  536  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki