On Sun, Apr 13, 2025 at 11:00:52PM -0700, Boqun Feng wrote:
> Add a module parameter for shazptr to allow skip the self scan in
> synchronize_shaptr(). This can force every synchronize_shaptr() to use
> shazptr scan kthread, and help testing the shazptr scan kthread.
>
> Another reason users may want to set this paramter is to reduce the self
> scan CPU cost in synchronize_shaptr().
>
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
One nit below, but nevertheless:
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> kernel/locking/shazptr.c | 28 +++++++++++++++++++++-------
> 1 file changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/locking/shazptr.c b/kernel/locking/shazptr.c
> index a8559cb559f8..b3f7e8390eb2 100644
> --- a/kernel/locking/shazptr.c
> +++ b/kernel/locking/shazptr.c
> @@ -14,11 +14,17 @@
> #include <linux/completion.h>
> #include <linux/kthread.h>
> #include <linux/list.h>
> +#include <linux/moduleparam.h>
> #include <linux/mutex.h>
> #include <linux/shazptr.h>
> #include <linux/slab.h>
> #include <linux/sort.h>
>
> +#ifdef MODULE_PARAM_PREFIX
> +#undef MODULE_PARAM_PREFIX
> +#endif
> +#define MODULE_PARAM_PREFIX "shazptr."
I do not believe that you need this when the desired MODULE_PARAM_PREFIX
matches the name of the file, as it does in this case. For example,
kernel/rcu/tree.c needs this to get the "rcutree." prefix, but
kernel/rcu/refscale.c can do without it.
> +
> DEFINE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
> EXPORT_PER_CPU_SYMBOL_GPL(shazptr_slots);
>
> @@ -252,6 +258,10 @@ static void synchronize_shazptr_busywait(void *ptr)
> }
> }
>
> +/* Disabled by default. */
> +static int skip_synchronize_self_scan;
> +module_param(skip_synchronize_self_scan, int, 0644);
> +
> static void synchronize_shazptr_normal(void *ptr)
> {
> int cpu;
> @@ -259,15 +269,19 @@ static void synchronize_shazptr_normal(void *ptr)
>
> smp_mb(); /* Synchronize with the smp_mb() in shazptr_acquire(). */
>
> - for_each_possible_cpu(cpu) {
> - void **slot = per_cpu_ptr(&shazptr_slots, cpu);
> - void *val;
> + if (unlikely(skip_synchronize_self_scan)) {
> + blocking_grp_mask = ~0UL;
> + } else {
> + for_each_possible_cpu(cpu) {
> + void **slot = per_cpu_ptr(&shazptr_slots, cpu);
> + void *val;
>
> - /* Pair with smp_store_release() in shazptr_clear(). */
> - val = smp_load_acquire(slot);
> + /* Pair with smp_store_release() in shazptr_clear(). */
> + val = smp_load_acquire(slot);
>
> - if (val == ptr || val == SHAZPTR_WILDCARD)
> - blocking_grp_mask |= 1UL << (cpu / shazptr_scan.cpu_grp_size);
> + if (val == ptr || val == SHAZPTR_WILDCARD)
> + blocking_grp_mask |= 1UL << (cpu / shazptr_scan.cpu_grp_size);
> + }
> }
>
> /* Found blocking slots, prepare to wait. */
> --
> 2.47.1
>