on_selected_cpus() holds a global lock even if the function is to be
called on non-overlapping CPUs. This is a scalability bottleneck so to
avoid that:
1. Remove the global lock.
2. Make call_data_struct per-CPU.
3. Track which CPUs are currently running on_selected_cpus() using a
global CPU mask. This tells CPUs running the interrupt which per-CPU
call_data_structs to look at.
Since the call data is now per-CPU, skip waiting for CPUs to "check in"
for async calls. Instead, delay it until the next time
on_selected_cpus() is called by which point there should be nothing to
wait for.
Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
---
xen/common/smp.c | 101 +++++++++++++++++++++++++++++++----------------
1 file changed, 66 insertions(+), 35 deletions(-)
diff --git a/xen/common/smp.c b/xen/common/smp.c
index a011f541f1ea..e592e8453fb3 100644
--- a/xen/common/smp.c
+++ b/xen/common/smp.c
@@ -24,13 +24,15 @@
/*
* Structure and data for smp_call_function()/on_selected_cpus().
*/
-static DEFINE_SPINLOCK(call_lock);
-static struct call_data_struct {
+struct call_data_struct {
void (*func) (void *info);
void *info;
int wait;
- cpumask_t selected;
-} call_data;
+ cpumask_t selected __cacheline_aligned;
+};
+
+DEFINE_PER_CPU(struct call_data_struct, call_data);
+static cpumask_t tasks;
void smp_call_function(
void (*func) (void *info),
@@ -50,55 +52,84 @@ void on_selected_cpus(
void *info,
int wait)
{
+ struct call_data_struct *data;
+ unsigned int cpu = smp_processor_id();
+
ASSERT(local_irq_is_enabled());
ASSERT(cpumask_subset(selected, &cpu_online_map));
- spin_lock(&call_lock);
+ if ( cpumask_empty(selected) )
+ return;
+
+ data = &this_cpu(call_data);
- cpumask_copy(&call_data.selected, selected);
+ if ( !data->wait )
+ {
+ /* Wait for any previous async call to complete */
+ while ( !cpumask_empty(&data->selected) )
+ cpu_relax();
+
+ cpumask_clear_cpu(cpu, &tasks);
+ }
- if ( cpumask_empty(&call_data.selected) )
- goto out;
+ data->func = func;
+ data->info = info;
+ data->wait = wait;
- call_data.func = func;
- call_data.info = info;
- call_data.wait = wait;
+ smp_wmb();
- smp_send_call_function_mask(&call_data.selected);
+ cpumask_copy(&data->selected, selected);
- while ( !cpumask_empty(&call_data.selected) )
- cpu_relax();
+ cpumask_set_cpu(cpu, &tasks);
-out:
- spin_unlock(&call_lock);
+ smp_send_call_function_mask(&data->selected);
+
+ if ( wait )
+ {
+ while ( !cpumask_empty(&data->selected) )
+ cpu_relax();
+
+ cpumask_clear_cpu(cpu, &tasks);
+ }
}
void smp_call_function_interrupt(void)
{
- void (*func)(void *info) = call_data.func;
- void *info = call_data.info;
unsigned int cpu = smp_processor_id();
-
- if ( !cpumask_test_cpu(cpu, &call_data.selected) )
- return;
+ unsigned int i;
+ struct call_data_struct *data;
+ void (*func)(void *info);
+ void *info;
irq_enter();
- if ( unlikely(!func) )
- {
- cpumask_clear_cpu(cpu, &call_data.selected);
- }
- else if ( call_data.wait )
- {
- (*func)(info);
- smp_mb();
- cpumask_clear_cpu(cpu, &call_data.selected);
- }
- else
+ for_each_cpu ( i, &tasks )
{
- smp_mb();
- cpumask_clear_cpu(cpu, &call_data.selected);
- (*func)(info);
+ data = &per_cpu(call_data, i);
+
+ if ( !cpumask_test_cpu(cpu, &data->selected) )
+ continue;
+
+ smp_rmb();
+ func = data->func;
+ info = data->info;
+
+ if ( unlikely(!func) )
+ {
+ cpumask_clear_cpu(cpu, &data->selected);
+ }
+ else if ( data->wait )
+ {
+ (*func)(info);
+ smp_mb();
+ cpumask_clear_cpu(cpu, &data->selected);
+ }
+ else
+ {
+ smp_mb();
+ cpumask_clear_cpu(cpu, &data->selected);
+ (*func)(info);
+ }
}
irq_exit();
--
2.53.0
© 2016 - 2026 Red Hat, Inc.