[v1] xen/sched: fix cpu hotplug

[PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()

Posted by Juergen Gross 3 years, 6 months ago

For updating the node affinities of all domains in a cpupool add a new
function cpupool_update_node_affinity().

In order to avoid multiple allocations of cpumasks split
domain_update_node_affinity() into a wrapper doing the needed
allocations and a work function, which can be called by
cpupool_update_node_affinity(), too.

This will help later to pre-allocate the cpumasks in order to avoid
allocations in stop-machine context.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/sched/core.c    | 61 ++++++++++++++++++++-----------------
 xen/common/sched/cpupool.c | 62 +++++++++++++++++++++++++++-----------
 xen/common/sched/private.h |  8 +++++
 3 files changed, 87 insertions(+), 44 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index f689b55783..c8d1034d3d 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -1790,28 +1790,14 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
     return ret;
 }
 
-void domain_update_node_affinity(struct domain *d)
+void domain_update_node_affinity_noalloc(struct domain *d,
+                                         const cpumask_t *online,
+                                         struct affinity_masks *affinity)
 {
-    cpumask_var_t dom_cpumask, dom_cpumask_soft;
     cpumask_t *dom_affinity;
-    const cpumask_t *online;
     struct sched_unit *unit;
     unsigned int cpu;
 
-    /* Do we have vcpus already? If not, no need to update node-affinity. */
-    if ( !d->vcpu || !d->vcpu[0] )
-        return;
-
-    if ( !zalloc_cpumask_var(&dom_cpumask) )
-        return;
-    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
-    {
-        free_cpumask_var(dom_cpumask);
-        return;
-    }
-
-    online = cpupool_domain_master_cpumask(d);
-
     spin_lock(&d->node_affinity_lock);
 
     /*
@@ -1830,22 +1816,21 @@ void domain_update_node_affinity(struct domain *d)
          */
         for_each_sched_unit ( d, unit )
         {
-            cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
-            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
-                       unit->cpu_soft_affinity);
+            cpumask_or(affinity->hard, affinity->hard, unit->cpu_hard_affinity);
+            cpumask_or(affinity->soft, affinity->soft, unit->cpu_soft_affinity);
         }
         /* Filter out non-online cpus */
-        cpumask_and(dom_cpumask, dom_cpumask, online);
-        ASSERT(!cpumask_empty(dom_cpumask));
+        cpumask_and(affinity->hard, affinity->hard, online);
+        ASSERT(!cpumask_empty(affinity->hard));
         /* And compute the intersection between hard, online and soft */
-        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
+        cpumask_and(affinity->soft, affinity->soft, affinity->hard);
 
         /*
          * If not empty, the intersection of hard, soft and online is the
          * narrowest set we want. If empty, we fall back to hard&online.
          */
-        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
-                           dom_cpumask : dom_cpumask_soft;
+        dom_affinity = cpumask_empty(affinity->soft) ? affinity->hard
+                                                     : affinity->soft;
 
         nodes_clear(d->node_affinity);
         for_each_cpu ( cpu, dom_affinity )
@@ -1853,9 +1838,31 @@ void domain_update_node_affinity(struct domain *d)
     }
 
     spin_unlock(&d->node_affinity_lock);
+}
+
+void domain_update_node_affinity(struct domain *d)
+{
+    struct affinity_masks masks;
+    const cpumask_t *online;
+
+    /* Do we have vcpus already? If not, no need to update node-affinity. */
+    if ( !d->vcpu || !d->vcpu[0] )
+        return;
+
+    if ( !zalloc_cpumask_var(&masks.hard) )
+        return;
+    if ( !zalloc_cpumask_var(&masks.soft) )
+    {
+        free_cpumask_var(masks.hard);
+        return;
+    }
+
+    online = cpupool_domain_master_cpumask(d);
+
+    domain_update_node_affinity_noalloc(d, online, &masks);
 
-    free_cpumask_var(dom_cpumask_soft);
-    free_cpumask_var(dom_cpumask);
+    free_cpumask_var(masks.soft);
+    free_cpumask_var(masks.hard);
 }
 
 typedef long ret_t;
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index 2afe54f54d..1463dcd767 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -410,6 +410,48 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
     return ret;
 }
 
+/* Update affinities of all domains in a cpupool. */
+static int cpupool_alloc_affin_masks(struct affinity_masks *masks)
+{
+    if ( !alloc_cpumask_var(&masks->hard) )
+        return -ENOMEM;
+    if ( alloc_cpumask_var(&masks->soft) )
+        return 0;
+
+    free_cpumask_var(masks->hard);
+    return -ENOMEM;
+}
+
+static void cpupool_free_affin_masks(struct affinity_masks *masks)
+{
+    free_cpumask_var(masks->soft);
+    free_cpumask_var(masks->hard);
+}
+
+static void cpupool_update_node_affinity(const struct cpupool *c)
+{
+    const cpumask_t *online = c->res_valid;
+    struct affinity_masks masks;
+    struct domain *d;
+
+    if ( cpupool_alloc_affin_masks(&masks) )
+        return;
+
+    rcu_read_lock(&domlist_read_lock);
+    for_each_domain_in_cpupool(d, c)
+    {
+        if ( d->vcpu && d->vcpu[0] )
+        {
+            cpumask_clear(masks.hard);
+            cpumask_clear(masks.soft);
+            domain_update_node_affinity_noalloc(d, online, &masks);
+        }
+    }
+    rcu_read_unlock(&domlist_read_lock);
+
+    cpupool_free_affin_masks(&masks);
+}
+
 /*
  * assign a specific cpu to a cpupool
  * cpupool_lock must be held
@@ -417,7 +459,6 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
 static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
 {
     int ret;
-    struct domain *d;
     const cpumask_t *cpus;
 
     cpus = sched_get_opt_cpumask(c->gran, cpu);
@@ -442,12 +483,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
 
     rcu_read_unlock(&sched_res_rculock);
 
-    rcu_read_lock(&domlist_read_lock);
-    for_each_domain_in_cpupool(d, c)
-    {
-        domain_update_node_affinity(d);
-    }
-    rcu_read_unlock(&domlist_read_lock);
+    cpupool_update_node_affinity(c);
 
     return 0;
 }
@@ -456,18 +492,14 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
 {
     int cpu = cpupool_moving_cpu;
     const cpumask_t *cpus;
-    struct domain *d;
     int ret;
 
     if ( c != cpupool_cpu_moving )
         return -EADDRNOTAVAIL;
 
-    /*
-     * We need this for scanning the domain list, both in
-     * cpu_disable_scheduler(), and at the bottom of this function.
-     */
     rcu_read_lock(&domlist_read_lock);
     ret = cpu_disable_scheduler(cpu);
+    rcu_read_unlock(&domlist_read_lock);
 
     rcu_read_lock(&sched_res_rculock);
     cpus = get_sched_res(cpu)->cpus;
@@ -494,11 +526,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
     }
     rcu_read_unlock(&sched_res_rculock);
 
-    for_each_domain_in_cpupool(d, c)
-    {
-        domain_update_node_affinity(d);
-    }
-    rcu_read_unlock(&domlist_read_lock);
+    cpupool_update_node_affinity(c);
 
     return ret;
 }
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index a870320146..de0cf63ce8 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -593,6 +593,14 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step,
         cpumask_copy(mask, unit->cpu_hard_affinity);
 }
 
+struct affinity_masks {
+    cpumask_var_t hard;
+    cpumask_var_t soft;
+};
+
+void domain_update_node_affinity_noalloc(struct domain *d,
+                                         const cpumask_t *online,
+                                         struct affinity_masks *affinity);
 void sched_rm_cpu(unsigned int cpu);
 const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
 void schedule_dump(struct cpupool *c);
-- 
2.35.3

Re: [PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()

Posted by Jan Beulich 3 years, 6 months ago

On 02.08.2022 15:27, Juergen Gross wrote:
> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -1790,28 +1790,14 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
>      return ret;
>  }
>  
> -void domain_update_node_affinity(struct domain *d)
> +void domain_update_node_affinity_noalloc(struct domain *d,
> +                                         const cpumask_t *online,
> +                                         struct affinity_masks *affinity)
>  {
> -    cpumask_var_t dom_cpumask, dom_cpumask_soft;
>      cpumask_t *dom_affinity;
> -    const cpumask_t *online;
>      struct sched_unit *unit;
>      unsigned int cpu;
>  
> -    /* Do we have vcpus already? If not, no need to update node-affinity. */
> -    if ( !d->vcpu || !d->vcpu[0] )
> -        return;
> -
> -    if ( !zalloc_cpumask_var(&dom_cpumask) )
> -        return;
> -    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
> -    {
> -        free_cpumask_var(dom_cpumask);
> -        return;
> -    }

Instead of splitting the function, did you consider using
cond_zalloc_cpumask_var() here, thus allowing (but not requiring)
callers to pre-allocate the masks? Would imo be quite a bit less
code churn (I think).

> --- a/xen/common/sched/cpupool.c
> +++ b/xen/common/sched/cpupool.c
> @@ -410,6 +410,48 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
>      return ret;
>  }
>  
> +/* Update affinities of all domains in a cpupool. */
> +static int cpupool_alloc_affin_masks(struct affinity_masks *masks)
> +{
> +    if ( !alloc_cpumask_var(&masks->hard) )
> +        return -ENOMEM;
> +    if ( alloc_cpumask_var(&masks->soft) )
> +        return 0;
> +
> +    free_cpumask_var(masks->hard);
> +    return -ENOMEM;
> +}

Wouldn't this be a nice general helper function, also usable from
outside of this CU?

As a nit - right now the only caller treats the return value as boolean,
so perhaps the function better would return bool?

Jan

Re: [PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()

Posted by Juergen Gross 3 years, 6 months ago

On 03.08.22 09:50, Jan Beulich wrote:
> On 02.08.2022 15:27, Juergen Gross wrote:
>> --- a/xen/common/sched/core.c
>> +++ b/xen/common/sched/core.c
>> @@ -1790,28 +1790,14 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
>>       return ret;
>>   }
>>   
>> -void domain_update_node_affinity(struct domain *d)
>> +void domain_update_node_affinity_noalloc(struct domain *d,
>> +                                         const cpumask_t *online,
>> +                                         struct affinity_masks *affinity)
>>   {
>> -    cpumask_var_t dom_cpumask, dom_cpumask_soft;
>>       cpumask_t *dom_affinity;
>> -    const cpumask_t *online;
>>       struct sched_unit *unit;
>>       unsigned int cpu;
>>   
>> -    /* Do we have vcpus already? If not, no need to update node-affinity. */
>> -    if ( !d->vcpu || !d->vcpu[0] )
>> -        return;
>> -
>> -    if ( !zalloc_cpumask_var(&dom_cpumask) )
>> -        return;
>> -    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
>> -    {
>> -        free_cpumask_var(dom_cpumask);
>> -        return;
>> -    }
> 
> Instead of splitting the function, did you consider using
> cond_zalloc_cpumask_var() here, thus allowing (but not requiring)
> callers to pre-allocate the masks? Would imo be quite a bit less
> code churn (I think).

This would require to change all callers of domain_update_node_affinity()
to add the additional mask parameter. The now common/sched local struct
affinity_masks would then need to made globally visible.

I'm not sure this is a good idea.

> 
>> --- a/xen/common/sched/cpupool.c
>> +++ b/xen/common/sched/cpupool.c
>> @@ -410,6 +410,48 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
>>       return ret;
>>   }
>>   
>> +/* Update affinities of all domains in a cpupool. */
>> +static int cpupool_alloc_affin_masks(struct affinity_masks *masks)
>> +{
>> +    if ( !alloc_cpumask_var(&masks->hard) )
>> +        return -ENOMEM;
>> +    if ( alloc_cpumask_var(&masks->soft) )
>> +        return 0;
>> +
>> +    free_cpumask_var(masks->hard);
>> +    return -ENOMEM;
>> +}
> 
> Wouldn't this be a nice general helper function, also usable from
> outside of this CU?

I considered that, but wasn't sure this is really helpful. The only
potential other user would be domain_update_node_affinity(), requiring
to use the zalloc variant of the allocation in the helper (not that this
would be a major problem, though).

> As a nit - right now the only caller treats the return value as boolean,
> so perhaps the function better would return bool?

I can do that.


Juergen

Re: [PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()

Posted by Jan Beulich 3 years, 6 months ago

On 03.08.2022 10:01, Juergen Gross wrote:
> On 03.08.22 09:50, Jan Beulich wrote:
>> On 02.08.2022 15:27, Juergen Gross wrote:
>>> --- a/xen/common/sched/core.c
>>> +++ b/xen/common/sched/core.c
>>> @@ -1790,28 +1790,14 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
>>>       return ret;
>>>   }
>>>   
>>> -void domain_update_node_affinity(struct domain *d)
>>> +void domain_update_node_affinity_noalloc(struct domain *d,
>>> +                                         const cpumask_t *online,
>>> +                                         struct affinity_masks *affinity)
>>>   {
>>> -    cpumask_var_t dom_cpumask, dom_cpumask_soft;
>>>       cpumask_t *dom_affinity;
>>> -    const cpumask_t *online;
>>>       struct sched_unit *unit;
>>>       unsigned int cpu;
>>>   
>>> -    /* Do we have vcpus already? If not, no need to update node-affinity. */
>>> -    if ( !d->vcpu || !d->vcpu[0] )
>>> -        return;
>>> -
>>> -    if ( !zalloc_cpumask_var(&dom_cpumask) )
>>> -        return;
>>> -    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
>>> -    {
>>> -        free_cpumask_var(dom_cpumask);
>>> -        return;
>>> -    }
>>
>> Instead of splitting the function, did you consider using
>> cond_zalloc_cpumask_var() here, thus allowing (but not requiring)
>> callers to pre-allocate the masks? Would imo be quite a bit less
>> code churn (I think).
> 
> This would require to change all callers of domain_update_node_affinity()
> to add the additional mask parameter. The now common/sched local struct
> affinity_masks would then need to made globally visible.
> 
> I'm not sure this is a good idea.

Hmm, I see there are quite a few callers (so there would be code churn
elsewhere). But I don't think the struct would need making globally
visible - the majority of callers could simply pass NULL, making the
function use a local instance of the struct instead. Personally I think
that would still be neater than having a _noalloc-suffixed variant of a
function (and specifically in this case also with an already long name).
But I guess this is then up to you / the scheduler maintainers.

>>> --- a/xen/common/sched/cpupool.c
>>> +++ b/xen/common/sched/cpupool.c
>>> @@ -410,6 +410,48 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
>>>       return ret;
>>>   }
>>>   
>>> +/* Update affinities of all domains in a cpupool. */
>>> +static int cpupool_alloc_affin_masks(struct affinity_masks *masks)
>>> +{
>>> +    if ( !alloc_cpumask_var(&masks->hard) )
>>> +        return -ENOMEM;
>>> +    if ( alloc_cpumask_var(&masks->soft) )
>>> +        return 0;
>>> +
>>> +    free_cpumask_var(masks->hard);
>>> +    return -ENOMEM;
>>> +}
>>
>> Wouldn't this be a nice general helper function, also usable from
>> outside of this CU?
> 
> I considered that, but wasn't sure this is really helpful. The only
> potential other user would be domain_update_node_affinity(), requiring
> to use the zalloc variant of the allocation in the helper (not that this
> would be a major problem, though).

I was actually thinking the other way around - the clearing of the masks
might better move into what is domain_update_node_affinity_noalloc() in
this version of the patch, so the helper could continue to use the non-
clearing allocations.

Jan

Re: [PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()

Posted by Juergen Gross 3 years, 6 months ago

On 03.08.22 10:30, Jan Beulich wrote:
> On 03.08.2022 10:01, Juergen Gross wrote:
>> On 03.08.22 09:50, Jan Beulich wrote:
>>> On 02.08.2022 15:27, Juergen Gross wrote:
>>>> --- a/xen/common/sched/core.c
>>>> +++ b/xen/common/sched/core.c
>>>> @@ -1790,28 +1790,14 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
>>>>        return ret;
>>>>    }
>>>>    
>>>> -void domain_update_node_affinity(struct domain *d)
>>>> +void domain_update_node_affinity_noalloc(struct domain *d,
>>>> +                                         const cpumask_t *online,
>>>> +                                         struct affinity_masks *affinity)
>>>>    {
>>>> -    cpumask_var_t dom_cpumask, dom_cpumask_soft;
>>>>        cpumask_t *dom_affinity;
>>>> -    const cpumask_t *online;
>>>>        struct sched_unit *unit;
>>>>        unsigned int cpu;
>>>>    
>>>> -    /* Do we have vcpus already? If not, no need to update node-affinity. */
>>>> -    if ( !d->vcpu || !d->vcpu[0] )
>>>> -        return;
>>>> -
>>>> -    if ( !zalloc_cpumask_var(&dom_cpumask) )
>>>> -        return;
>>>> -    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
>>>> -    {
>>>> -        free_cpumask_var(dom_cpumask);
>>>> -        return;
>>>> -    }
>>>
>>> Instead of splitting the function, did you consider using
>>> cond_zalloc_cpumask_var() here, thus allowing (but not requiring)
>>> callers to pre-allocate the masks? Would imo be quite a bit less
>>> code churn (I think).
>>
>> This would require to change all callers of domain_update_node_affinity()
>> to add the additional mask parameter. The now common/sched local struct
>> affinity_masks would then need to made globally visible.
>>
>> I'm not sure this is a good idea.
> 
> Hmm, I see there are quite a few callers (so there would be code churn
> elsewhere). But I don't think the struct would need making globally
> visible - the majority of callers could simply pass NULL, making the
> function use a local instance of the struct instead. Personally I think
> that would still be neater than having a _noalloc-suffixed variant of a
> function (and specifically in this case also with an already long name).

Hmm, true.

I could even rename the real function to domain_update_node_aff() and
add an inline domain_update_node_affinity() function adding the NULL
parameter.

> But I guess this is then up to you / the scheduler maintainers.
> 
>>>> --- a/xen/common/sched/cpupool.c
>>>> +++ b/xen/common/sched/cpupool.c
>>>> @@ -410,6 +410,48 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
>>>>        return ret;
>>>>    }
>>>>    
>>>> +/* Update affinities of all domains in a cpupool. */
>>>> +static int cpupool_alloc_affin_masks(struct affinity_masks *masks)
>>>> +{
>>>> +    if ( !alloc_cpumask_var(&masks->hard) )
>>>> +        return -ENOMEM;
>>>> +    if ( alloc_cpumask_var(&masks->soft) )
>>>> +        return 0;
>>>> +
>>>> +    free_cpumask_var(masks->hard);
>>>> +    return -ENOMEM;
>>>> +}
>>>
>>> Wouldn't this be a nice general helper function, also usable from
>>> outside of this CU?
>>
>> I considered that, but wasn't sure this is really helpful. The only
>> potential other user would be domain_update_node_affinity(), requiring
>> to use the zalloc variant of the allocation in the helper (not that this
>> would be a major problem, though).
> 
> I was actually thinking the other way around - the clearing of the masks
> might better move into what is domain_update_node_affinity_noalloc() in
> this version of the patch, so the helper could continue to use the non-
> clearing allocations.

I guess with cond_zalloc_cpumask_var() this would come for free.


Juergen

[PATCH 1/3] xen/sched: introduce cpupool_update_node_affinity()
[PATCH 2/3] xen/sched: carve out memory allocation and freeing from schedule_cpu_rm()
[PATCH 3/3] xen/sched: fix cpu hotplug