This patch use on-stack cpumask to replace percpu cfd cpumask in
smp_call_function_many_cond(). alloc_cpumask_var() may fail when
CONFIG_CPUMASK_OFFSTACK is enabled. In such extreme case, fall back to
cfd->cpumask. This is a preparation for the next patch.
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
kernel/smp.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index f572716c3c7d..35948afced2e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -805,11 +805,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
int cpu, last_cpu, this_cpu = smp_processor_id();
struct call_function_data *cfd;
bool wait = scf_flags & SCF_WAIT;
+ bool preemptible_wait = true;
+ cpumask_var_t cpumask_stack;
+ struct cpumask *cpumask;
int nr_cpus = 0;
bool run_remote = false;
lockdep_assert_preemption_disabled();
+ if (!alloc_cpumask_var(&cpumask_stack, GFP_ATOMIC))
+ preemptible_wait = false;
+
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
@@ -831,15 +837,18 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
/* Check if we need remote execution, i.e., any CPU excluding this one. */
if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) {
cfd = this_cpu_ptr(&cfd_data);
- cpumask_and(cfd->cpumask, mask, cpu_online_mask);
- __cpumask_clear_cpu(this_cpu, cfd->cpumask);
+
+ cpumask = preemptible_wait ? cpumask_stack : cfd->cpumask;
+
+ cpumask_and(cpumask, mask, cpu_online_mask);
+ __cpumask_clear_cpu(this_cpu, cpumask);
cpumask_clear(cfd->cpumask_ipi);
- for_each_cpu(cpu, cfd->cpumask) {
+ for_each_cpu(cpu, cpumask) {
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
if (cond_func && !cond_func(cpu, info)) {
- __cpumask_clear_cpu(cpu, cfd->cpumask);
+ __cpumask_clear_cpu(cpu, cpumask);
continue;
}
@@ -890,13 +899,16 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
}
if (run_remote && wait) {
- for_each_cpu(cpu, cfd->cpumask) {
+ for_each_cpu(cpu, cpumask) {
call_single_data_t *csd;
csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock_wait(csd);
}
}
+
+ if (preemptible_wait)
+ free_cpumask_var(cpumask_stack);
}
/**
--
2.20.1
On Tue, Feb 03, 2026 at 07:23:54PM +0800, Chuyi Zhou wrote:
> This patch use on-stack cpumask to replace percpu cfd cpumask in
> smp_call_function_many_cond(). alloc_cpumask_var() may fail when
> CONFIG_CPUMASK_OFFSTACK is enabled. In such extreme case, fall back to
> cfd->cpumask. This is a preparation for the next patch.
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
> ---
> kernel/smp.c | 22 +++++++++++++++++-----
> 1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/smp.c b/kernel/smp.c
> index f572716c3c7d..35948afced2e 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -805,11 +805,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
> int cpu, last_cpu, this_cpu = smp_processor_id();
> struct call_function_data *cfd;
> bool wait = scf_flags & SCF_WAIT;
> + bool preemptible_wait = true;
> + cpumask_var_t cpumask_stack;
> + struct cpumask *cpumask;
> int nr_cpus = 0;
> bool run_remote = false;
>
> lockdep_assert_preemption_disabled();
>
> + if (!alloc_cpumask_var(&cpumask_stack, GFP_ATOMIC))
> + preemptible_wait = false;
> +
> /*
> * Can deadlock when called with interrupts disabled.
> * We allow cpu's that are not yet online though, as no one else can
> @@ -831,15 +837,18 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
> /* Check if we need remote execution, i.e., any CPU excluding this one. */
> if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) {
> cfd = this_cpu_ptr(&cfd_data);
> - cpumask_and(cfd->cpumask, mask, cpu_online_mask);
> - __cpumask_clear_cpu(this_cpu, cfd->cpumask);
> +
> + cpumask = preemptible_wait ? cpumask_stack : cfd->cpumask;
> +
> + cpumask_and(cpumask, mask, cpu_online_mask);
> + __cpumask_clear_cpu(this_cpu, cpumask);
>
> cpumask_clear(cfd->cpumask_ipi);
> - for_each_cpu(cpu, cfd->cpumask) {
> + for_each_cpu(cpu, cpumask) {
> call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
>
> if (cond_func && !cond_func(cpu, info)) {
> - __cpumask_clear_cpu(cpu, cfd->cpumask);
> + __cpumask_clear_cpu(cpu, cpumask);
> continue;
> }
>
> @@ -890,13 +899,16 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
> }
>
> if (run_remote && wait) {
> - for_each_cpu(cpu, cfd->cpumask) {
> + for_each_cpu(cpu, cpumask) {
> call_single_data_t *csd;
>
> csd = per_cpu_ptr(cfd->csd, cpu);
> csd_lock_wait(csd);
> }
> }
> +
> + if (preemptible_wait)
> + free_cpumask_var(cpumask_stack);
> }
*sigh*, even if you don't break RT, this is quite terrible, what is
wrong with something like so?
---
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -802,19 +802,18 @@ static void smp_call_function_many_cond(
unsigned int scf_flags,
smp_cond_func_t cond_func)
{
+ struct call_function_data *cfd = this_cpu_ptr(&cfd_data);
int cpu, last_cpu, this_cpu = smp_processor_id();
- struct call_function_data *cfd;
+ struct cpumask *cpumask = cfd->cpumask;
bool wait = scf_flags & SCF_WAIT;
- bool preemptible_wait = true;
cpumask_var_t cpumask_stack;
- struct cpumask *cpumask;
int nr_cpus = 0;
bool run_remote = false;
lockdep_assert_preemption_disabled();
if (!alloc_cpumask_var(&cpumask_stack, GFP_ATOMIC))
- preemptible_wait = false;
+ cpumask = cpumask_stack;
/*
* Can deadlock when called with interrupts disabled.
@@ -836,10 +835,6 @@ static void smp_call_function_many_cond(
/* Check if we need remote execution, i.e., any CPU excluding this one. */
if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) {
- cfd = this_cpu_ptr(&cfd_data);
-
- cpumask = preemptible_wait ? cpumask_stack : cfd->cpumask;
-
cpumask_and(cpumask, mask, cpu_online_mask);
__cpumask_clear_cpu(this_cpu, cpumask);
@@ -907,8 +902,7 @@ static void smp_call_function_many_cond(
}
}
- if (preemptible_wait)
- free_cpumask_var(cpumask_stack);
+ free_cpumask_var(cpumask_stack);
}
/**
On Tue, Feb 03, 2026 at 07:23:54PM +0800, Chuyi Zhou wrote: > This patch use on-stack cpumask to replace percpu cfd cpumask in > smp_call_function_many_cond(). alloc_cpumask_var() may fail when > CONFIG_CPUMASK_OFFSTACK is enabled. In such extreme case, fall back to > cfd->cpumask. This is a preparation for the next patch. > > Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > --- > kernel/smp.c | 22 +++++++++++++++++----- > 1 file changed, 17 insertions(+), 5 deletions(-) > > diff --git a/kernel/smp.c b/kernel/smp.c > index f572716c3c7d..35948afced2e 100644 > --- a/kernel/smp.c > +++ b/kernel/smp.c > @@ -805,11 +805,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask, > int cpu, last_cpu, this_cpu = smp_processor_id(); > struct call_function_data *cfd; > bool wait = scf_flags & SCF_WAIT; > + bool preemptible_wait = true; > + cpumask_var_t cpumask_stack; > + struct cpumask *cpumask; > int nr_cpus = 0; > bool run_remote = false; > > lockdep_assert_preemption_disabled(); > > + if (!alloc_cpumask_var(&cpumask_stack, GFP_ATOMIC)) > + preemptible_wait = false; IIRC this breaks RT, must not allocate with preemption disabled.
在 2026/2/5 17:44, Peter Zijlstra 写道: > On Tue, Feb 03, 2026 at 07:23:54PM +0800, Chuyi Zhou wrote: >> This patch use on-stack cpumask to replace percpu cfd cpumask in >> smp_call_function_many_cond(). alloc_cpumask_var() may fail when >> CONFIG_CPUMASK_OFFSTACK is enabled. In such extreme case, fall back to >> cfd->cpumask. This is a preparation for the next patch. >> >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> >> --- >> kernel/smp.c | 22 +++++++++++++++++----- >> 1 file changed, 17 insertions(+), 5 deletions(-) >> >> diff --git a/kernel/smp.c b/kernel/smp.c >> index f572716c3c7d..35948afced2e 100644 >> --- a/kernel/smp.c >> +++ b/kernel/smp.c >> @@ -805,11 +805,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask, >> int cpu, last_cpu, this_cpu = smp_processor_id(); >> struct call_function_data *cfd; >> bool wait = scf_flags & SCF_WAIT; >> + bool preemptible_wait = true; >> + cpumask_var_t cpumask_stack; >> + struct cpumask *cpumask; >> int nr_cpus = 0; >> bool run_remote = false; >> >> lockdep_assert_preemption_disabled(); >> >> + if (!alloc_cpumask_var(&cpumask_stack, GFP_ATOMIC)) >> + preemptible_wait = false; > > IIRC this breaks RT, must not allocate with preemption disabled. Thank you for the reminder. Perhaps another feasible approach is only consider CONFIG_CPUMASK_OFFSTACK=n. Of course, if we use cpus_read_lock and ensure that the caller’s context is sleepable, this issue would also be eliminated.
© 2016 - 2026 Red Hat, Inc.