Lockless SMP function call and TLB flushing

[PATCH v1 2/3] xen/smp: Rewrite on_selected_cpus() to be lockless
Posted by Ross Lagerwall 4 hours ago
on_selected_cpus() holds a global lock even if the function is to be
called on non-overlapping CPUs. This is a scalability bottleneck so to
avoid that:

1. Remove the global lock.
2. Make call_data_struct per-CPU.
3. Track which CPUs are currently running on_selected_cpus() using a
   global CPU mask. This tells CPUs running the interrupt which per-CPU
   call_data_structs to look at.

Since the call data is now per-CPU, skip waiting for CPUs to "check in"
for async calls. Instead, delay it until the next time
on_selected_cpus() is called by which point there should be nothing to
wait for.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
---
 xen/common/smp.c | 101 +++++++++++++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 35 deletions(-)

diff --git a/xen/common/smp.c b/xen/common/smp.c
index a011f541f1ea..e592e8453fb3 100644
--- a/xen/common/smp.c
+++ b/xen/common/smp.c
@@ -24,13 +24,15 @@
 /*
  * Structure and data for smp_call_function()/on_selected_cpus().
  */
-static DEFINE_SPINLOCK(call_lock);
-static struct call_data_struct {
+struct call_data_struct {
     void (*func) (void *info);
     void *info;
     int wait;
-    cpumask_t selected;
-} call_data;
+    cpumask_t selected __cacheline_aligned;
+};
+
+DEFINE_PER_CPU(struct call_data_struct, call_data);
+static cpumask_t tasks;
 
 void smp_call_function(
     void (*func) (void *info),
@@ -50,55 +52,84 @@ void on_selected_cpus(
     void *info,
     int wait)
 {
+    struct call_data_struct *data;
+    unsigned int cpu = smp_processor_id();
+
     ASSERT(local_irq_is_enabled());
     ASSERT(cpumask_subset(selected, &cpu_online_map));
 
-    spin_lock(&call_lock);
+    if ( cpumask_empty(selected) )
+        return;
+
+    data = &this_cpu(call_data);
 
-    cpumask_copy(&call_data.selected, selected);
+    if ( !data->wait )
+    {
+        /* Wait for any previous async call to complete */
+        while ( !cpumask_empty(&data->selected) )
+            cpu_relax();
+
+        cpumask_clear_cpu(cpu, &tasks);
+    }
 
-    if ( cpumask_empty(&call_data.selected) )
-        goto out;
+    data->func = func;
+    data->info = info;
+    data->wait = wait;
 
-    call_data.func = func;
-    call_data.info = info;
-    call_data.wait = wait;
+    smp_wmb();
 
-    smp_send_call_function_mask(&call_data.selected);
+    cpumask_copy(&data->selected, selected);
 
-    while ( !cpumask_empty(&call_data.selected) )
-        cpu_relax();
+    cpumask_set_cpu(cpu, &tasks);
 
-out:
-    spin_unlock(&call_lock);
+    smp_send_call_function_mask(&data->selected);
+
+    if ( wait )
+    {
+        while ( !cpumask_empty(&data->selected) )
+            cpu_relax();
+
+        cpumask_clear_cpu(cpu, &tasks);
+    }
 }
 
 void smp_call_function_interrupt(void)
 {
-    void (*func)(void *info) = call_data.func;
-    void *info = call_data.info;
     unsigned int cpu = smp_processor_id();
-
-    if ( !cpumask_test_cpu(cpu, &call_data.selected) )
-        return;
+    unsigned int i;
+    struct call_data_struct *data;
+    void (*func)(void *info);
+    void *info;
 
     irq_enter();
 
-    if ( unlikely(!func) )
-    {
-        cpumask_clear_cpu(cpu, &call_data.selected);
-    }
-    else if ( call_data.wait )
-    {
-        (*func)(info);
-        smp_mb();
-        cpumask_clear_cpu(cpu, &call_data.selected);
-    }
-    else
+    for_each_cpu ( i, &tasks )
     {
-        smp_mb();
-        cpumask_clear_cpu(cpu, &call_data.selected);
-        (*func)(info);
+        data = &per_cpu(call_data, i);
+
+        if ( !cpumask_test_cpu(cpu, &data->selected) )
+            continue;
+
+        smp_rmb();
+        func = data->func;
+        info = data->info;
+
+        if ( unlikely(!func) )
+        {
+            cpumask_clear_cpu(cpu, &data->selected);
+        }
+        else if ( data->wait )
+        {
+            (*func)(info);
+            smp_mb();
+            cpumask_clear_cpu(cpu, &data->selected);
+        }
+        else
+        {
+            smp_mb();
+            cpumask_clear_cpu(cpu, &data->selected);
+            (*func)(info);
+        }
     }
 
     irq_exit();
-- 
2.53.0
[PATCH v1 1/3] x86/hap: Wait for remote CPUs during TLB flush
[PATCH v1 2/3] xen/smp: Rewrite on_selected_cpus() to be lockless
[PATCH v1 3/3] x86/smp: Rewrite TLB flush using on_selected_cpus()