This patch prepares the task-local IPI cpumask during thread creation, and
uses the local cpumask to replace the percpu cfd cpumask in
smp_call_function_many_cond(). We will enable preemption during
csd_lock_wait() later, and this can prevent concurrent access to the
cfd->cpumask from other tasks on the current CPU. For cases where
cpumask_size() is smaller than or equal to the pointer size, it tries to
stash the cpumask in the pointer itself to avoid extra memory allocations.
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
include/linux/sched.h | 6 +++++
include/linux/smp.h | 20 +++++++++++++++
kernel/fork.c | 9 ++++++-
kernel/smp.c | 59 ++++++++++++++++++++++++++++++++++++++-----
4 files changed, 87 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5d3dbc9cdf..6daab67caacc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1346,6 +1346,12 @@ struct task_struct {
struct list_head perf_event_list;
struct perf_ctx_data __rcu *perf_ctx_data;
#endif
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPTION)
+ union {
+ cpumask_t *ipi_mask_ptr;
+ unsigned long ipi_mask_val;
+ };
+#endif
#ifdef CONFIG_DEBUG_PREEMPT
unsigned long preempt_disable_ip;
#endif
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 1ebd88026119..c7b8cc82ad3c 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -167,6 +167,12 @@ void smp_call_function_many(const struct cpumask *mask,
int smp_call_function_any(const struct cpumask *mask,
smp_call_func_t func, void *info, int wait);
+#ifdef CONFIG_PREEMPTION
+int smp_task_ipi_mask_alloc(struct task_struct *task);
+void smp_task_ipi_mask_free(struct task_struct *task);
+cpumask_t *smp_task_ipi_mask(struct task_struct *cur);
+#endif
+
void kick_all_cpus_sync(void);
void wake_up_all_idle_cpus(void);
bool cpus_peek_for_pending_ipi(const struct cpumask *mask);
@@ -306,4 +312,18 @@ bool csd_lock_is_stuck(void);
static inline bool csd_lock_is_stuck(void) { return false; }
#endif
+#if !defined(CONFIG_SMP) || !defined(CONFIG_PREEMPTION)
+static inline int smp_task_ipi_mask_alloc(struct task_struct *task)
+{
+ return 0;
+}
+static inline void smp_task_ipi_mask_free(struct task_struct *task)
+{
+}
+static inline cpumask_t *smp_task_ipi_mask(struct task_struct *cur)
+{
+ return NULL;
+}
+#endif
+
#endif /* __LINUX_SMP_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index bc2bf58b93b6..7082eb1c02c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -533,6 +533,7 @@ void free_task(struct task_struct *tsk)
#endif
release_user_cpus_ptr(tsk);
scs_release(tsk);
+ smp_task_ipi_mask_free(tsk);
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -930,10 +931,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
account_kernel_stack(tsk, 1);
- err = scs_prepare(tsk, node);
+ err = smp_task_ipi_mask_alloc(tsk);
if (err)
goto free_stack;
+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_ipi_mask;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
@@ -1004,6 +1009,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
return tsk;
+free_ipi_mask:
+ smp_task_ipi_mask_free(tsk);
free_stack:
exit_task_stack_account(tsk);
free_thread_stack(tsk);
diff --git a/kernel/smp.c b/kernel/smp.c
index 80daf9dd4a25..446e3f80007e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -785,6 +785,44 @@ int smp_call_function_any(const struct cpumask *mask,
}
EXPORT_SYMBOL_GPL(smp_call_function_any);
+static DEFINE_STATIC_KEY_FALSE(ipi_mask_inlined);
+
+#ifdef CONFIG_PREEMPTION
+
+int smp_task_ipi_mask_alloc(struct task_struct *task)
+{
+ if (static_branch_unlikely(&ipi_mask_inlined))
+ return 0;
+
+ task->ipi_mask_ptr = kmalloc(cpumask_size(), GFP_KERNEL);
+ if (!task->ipi_mask_ptr)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void smp_task_ipi_mask_free(struct task_struct *task)
+{
+ if (static_branch_unlikely(&ipi_mask_inlined))
+ return;
+
+ kfree(task->ipi_mask_ptr);
+}
+
+cpumask_t *smp_task_ipi_mask(struct task_struct *cur)
+{
+ /*
+ * If cpumask_size() is smaller than or equal to the pointer
+ * size, it stashes the cpumask in the pointer itself to
+ * avoid extra memory allocations.
+ */
+ if (static_branch_unlikely(&ipi_mask_inlined))
+ return (cpumask_t *)&cur->ipi_mask_val;
+
+ return cur->ipi_mask_ptr;
+}
+#endif
+
/*
* Flags to be used as scf_flags argument of smp_call_function_many_cond().
*
@@ -802,11 +840,18 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
int cpu, last_cpu, this_cpu = smp_processor_id();
struct call_function_data *cfd;
bool wait = scf_flags & SCF_WAIT;
+ struct cpumask *cpumask, *task_mask;
+ bool preemptible_wait;
int nr_cpus = 0;
bool run_remote = false;
lockdep_assert_preemption_disabled();
+ task_mask = smp_task_ipi_mask(current);
+ preemptible_wait = task_mask && preemptible();
+ cfd = this_cpu_ptr(&cfd_data);
+ cpumask = preemptible_wait ? task_mask : cfd->cpumask;
+
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
@@ -827,16 +872,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
/* Check if we need remote execution, i.e., any CPU excluding this one. */
if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) {
- cfd = this_cpu_ptr(&cfd_data);
- cpumask_and(cfd->cpumask, mask, cpu_online_mask);
- __cpumask_clear_cpu(this_cpu, cfd->cpumask);
+ cpumask_and(cpumask, mask, cpu_online_mask);
+ __cpumask_clear_cpu(this_cpu, cpumask);
cpumask_clear(cfd->cpumask_ipi);
- for_each_cpu(cpu, cfd->cpumask) {
+ for_each_cpu(cpu, cpumask) {
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
if (cond_func && !cond_func(cpu, info)) {
- __cpumask_clear_cpu(cpu, cfd->cpumask);
+ __cpumask_clear_cpu(cpu, cpumask);
continue;
}
@@ -887,7 +931,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
}
if (run_remote && wait) {
- for_each_cpu(cpu, cfd->cpumask) {
+ for_each_cpu(cpu, cpumask) {
call_single_data_t *csd;
csd = per_cpu_ptr(cfd->csd, cpu);
@@ -1003,6 +1047,9 @@ EXPORT_SYMBOL(nr_cpu_ids);
void __init setup_nr_cpu_ids(void)
{
set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
+
+ if (IS_ENABLED(CONFIG_PREEMPTION) && cpumask_size() <= sizeof(unsigned long))
+ static_branch_enable(&ipi_mask_inlined);
}
/* Called by boot processor to activate the rest. */
--
2.20.1