[v2] xen: add core scheduling support

[Xen-devel] [PATCH v2 31/48] xen/sched: add support for multiple vcpus per sched unit where missing

Posted by Juergen Gross 6 years, 6 months ago

In several places there is support for multiple vcpus per sched unit
missing. Add that missing support (with the exception of initial
allocation) and missing helpers for that.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
RFC V2:
- fix vcpu_runstate_helper()
V1:
- add special handling for idle unit in unit_runnable() and
  unit_runnable_state()
V2:
- handle affinity_broken correctly (Jan Beulich)
---
 xen/common/domain.c        |  5 ++-
 xen/common/schedule.c      | 36 ++++++++++---------
 xen/include/xen/sched-if.h | 88 +++++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 34 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3797f954f5..0c763ceb25 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1255,7 +1255,10 @@ int vcpu_reset(struct vcpu *v)
     v->async_exception_mask = 0;
     memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
 #endif
-    v->affinity_broken = 0;
+    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
+        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
+    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
+        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
     clear_bit(_VPF_blocked, &v->pause_flags);
     clear_bit(_VPF_in_reset, &v->pause_flags);
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 2ecb76e3b9..1f45fc7373 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -240,8 +240,9 @@ static inline void vcpu_runstate_change(
     s_time_t delta;
     struct sched_unit *unit = v->sched_unit;
 
-    ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
+    if ( v->runstate.state == new_state )
+        return;
 
     vcpu_urgent_count_update(v);
 
@@ -263,15 +264,16 @@ static inline void vcpu_runstate_change(
 static inline void sched_unit_runstate_change(struct sched_unit *unit,
     bool running, s_time_t new_entry_time)
 {
-    struct vcpu *v = unit->vcpu_list;
+    struct vcpu *v;
 
-    if ( running )
-        vcpu_runstate_change(v, v->new_state, new_entry_time);
-    else
-        vcpu_runstate_change(v,
-            ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
-             (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
-            new_entry_time);
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( running )
+            vcpu_runstate_change(v, v->new_state, new_entry_time);
+        else
+            vcpu_runstate_change(v,
+                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
+                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
+                new_entry_time);
 }
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
@@ -1035,9 +1037,9 @@ int cpu_disable_scheduler(unsigned int cpu)
             if ( cpumask_empty(&online_affinity) &&
                  cpumask_test_cpu(cpu, unit->cpu_hard_affinity) )
             {
-                if ( unit->vcpu_list->affinity_broken )
+                if ( sched_check_affinity_broken(unit) )
                 {
-                    /* The vcpu is temporarily pinned, can't move it. */
+                    /* The unit is temporarily pinned, can't move it. */
                     unit_schedule_unlock_irqrestore(lock, flags, unit);
                     ret = -EADDRINUSE;
                     break;
@@ -1395,17 +1397,17 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
             ret = 0;
             v->affinity_broken &= ~reason;
         }
-        if ( !ret && !v->affinity_broken )
+        if ( !ret && !sched_check_affinity_broken(unit) )
             sched_set_affinity(v, unit->cpu_hard_affinity_saved, NULL);
     }
     else if ( cpu < nr_cpu_ids )
     {
         if ( (v->affinity_broken & reason) ||
-             (v->affinity_broken && v->processor != cpu) )
+             (sched_check_affinity_broken(unit) && v->processor != cpu) )
             ret = -EBUSY;
         else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
         {
-            if ( !v->affinity_broken )
+            if ( !sched_check_affinity_broken(unit) )
             {
                 cpumask_copy(unit->cpu_hard_affinity_saved,
                              unit->cpu_hard_affinity);
@@ -1701,14 +1703,14 @@ static void sched_switch_units(struct sched_resource *sd,
              (next->vcpu_list->runstate.state == RUNSTATE_runnable) ?
              (now - next->state_entry_time) : 0, prev->next_time);
 
-    ASSERT(prev->vcpu_list->runstate.state == RUNSTATE_running);
+    ASSERT(unit_running(prev));
 
     TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
              next->domain->domain_id, next->unit_id);
 
     sched_unit_runstate_change(prev, false, now);
 
-    ASSERT(next->vcpu_list->runstate.state != RUNSTATE_running);
+    ASSERT(!unit_running(next));
     sched_unit_runstate_change(next, true, now);
 
     /*
@@ -1829,7 +1831,7 @@ void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext)
             while ( atomic_read(&next->rendezvous_out_cnt) )
                 cpu_relax();
     }
-    else if ( vprev != vnext )
+    else if ( vprev != vnext && sched_granularity == 1 )
         context_saved(vprev);
 }
 
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index aa896f49ef..e1d61a05b7 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -67,32 +67,70 @@ static inline bool is_idle_unit(const struct sched_unit *unit)
 
 static inline bool is_unit_online(const struct sched_unit *unit)
 {
-    return is_vcpu_online(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( is_vcpu_online(v) )
+            return true;
+
+    return false;
+}
+
+static inline unsigned int unit_running(const struct sched_unit *unit)
+{
+    return unit->runstate_cnt[RUNSTATE_running];
 }
 
 static inline bool unit_runnable(const struct sched_unit *unit)
 {
-    return vcpu_runnable(unit->vcpu_list);
+    struct vcpu *v;
+
+    if ( is_idle_unit(unit) )
+        return true;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( vcpu_runnable(v) )
+            return true;
+
+    return false;
 }
 
 static inline bool unit_runnable_state(const struct sched_unit *unit)
 {
     struct vcpu *v;
-    bool runnable;
+    bool runnable, ret = false;
+
+    if ( is_idle_unit(unit) )
+        return true;
+
+    for_each_sched_unit_vcpu ( unit, v )
+    {
+        runnable = vcpu_runnable(v);
+
+        v->new_state = runnable ? RUNSTATE_running
+                                : (v->pause_flags & VPF_blocked)
+                                  ? RUNSTATE_blocked : RUNSTATE_offline;
 
-    v = unit->vcpu_list;
-    runnable = vcpu_runnable(v);
+        if ( runnable )
+            ret = true;
+    }
 
-    v->new_state = runnable ? RUNSTATE_running
-                            : (v->pause_flags & VPF_blocked)
-                              ? RUNSTATE_blocked : RUNSTATE_offline;
-    return runnable;
+    return ret;
 }
 
 static inline void sched_set_res(struct sched_unit *unit,
                                  struct sched_resource *res)
 {
-    unit->vcpu_list->processor = res->processor;
+    int cpu = cpumask_first(res->cpus);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+    {
+        ASSERT(cpu < nr_cpu_ids);
+        v->processor = cpu;
+        cpu = cpumask_next(cpu, res->cpus);
+    }
+
     unit->res = res;
 }
 
@@ -104,25 +142,37 @@ static inline unsigned int sched_unit_cpu(struct sched_unit *unit)
 static inline void sched_set_pause_flags(struct sched_unit *unit,
                                          unsigned int bit)
 {
-    __set_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        __set_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_clear_pause_flags(struct sched_unit *unit,
                                            unsigned int bit)
 {
-    __clear_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        __clear_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
                                                 unsigned int bit)
 {
-    set_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        set_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
                                                   unsigned int bit)
 {
-    clear_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        clear_bit(bit, &v->pause_flags);
 }
 
 static inline struct sched_unit *sched_idle_unit(unsigned int cpu)
@@ -448,12 +498,18 @@ static inline int sched_adjust_cpupool(const struct scheduler *s,
 
 static inline void sched_unit_pause_nosync(struct sched_unit *unit)
 {
-    vcpu_pause_nosync(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        vcpu_pause_nosync(v);
 }
 
 static inline void sched_unit_unpause(struct sched_unit *unit)
 {
-    vcpu_unpause(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        vcpu_unpause(v);
 }
 
 #define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 31/48] xen/sched: add support for multiple vcpus per sched unit where missing

Posted by Jan Beulich 6 years, 5 months ago

On 09.08.2019 16:58, Juergen Gross wrote:
> V1:
> - add special handling for idle unit in unit_runnable() and
>   unit_runnable_state()

Why was this done? Isn't vcpu_runnable() going to always return
true for idle vCPU-s?

> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -1255,7 +1255,10 @@ int vcpu_reset(struct vcpu *v)
>      v->async_exception_mask = 0;
>      memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
>  #endif
> -    v->affinity_broken = 0;
> +    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
> +    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);

Shouldn't this live in an earlier patch? I guess the same question
is applicable to almost everything here, as also already mentioned
e.g. in the previous patch.

>  static inline void sched_set_res(struct sched_unit *unit,
>                                   struct sched_resource *res)
>  {
> -    unit->vcpu_list->processor = res->processor;
> +    int cpu = cpumask_first(res->cpus);

unsigned int

>  static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
>                                                  unsigned int bit)
>  {
> -    set_bit(bit, &unit->vcpu_list->pause_flags);
> +    struct vcpu *v;
> +
> +    for_each_sched_unit_vcpu ( unit, v )
> +        set_bit(bit, &v->pause_flags);
>  }
>  
>  static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
>                                                    unsigned int bit)
>  {
> -    clear_bit(bit, &unit->vcpu_list->pause_flags);
> +    struct vcpu *v;
> +
> +    for_each_sched_unit_vcpu ( unit, v )
> +        clear_bit(bit, &v->pause_flags);
>  }

The atomicity is (necessarily) limited here - it's atomic for an
individual vCPU, but not atomic for a unit as a whole. If this
is indeed a useful hybrid, then I think it would be nice if there
was a comment next to these functions clarifying under what
conditions use of them is correct without it also being correct
to use their non-atomic counterparts (e.g. due to use of an
external lock).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 31/48] xen/sched: add support for multiple vcpus per sched unit where missing

Posted by Juergen Gross 6 years, 4 months ago

On 11.09.19 12:43, Jan Beulich wrote:
> On 09.08.2019 16:58, Juergen Gross wrote:
>> V1:
>> - add special handling for idle unit in unit_runnable() and
>>    unit_runnable_state()
> 
> Why was this done? Isn't vcpu_runnable() going to always return
> true for idle vCPU-s?

The problem is the for_each_sched_unit_vcpu() loop handling.

The loop is ending as soon as vcpu->sched_unit != unit. But this
might be true for idle vcpus in case they are temporarily active
for a normal domain's unit.

> 
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -1255,7 +1255,10 @@ int vcpu_reset(struct vcpu *v)
>>       v->async_exception_mask = 0;
>>       memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
>>   #endif
>> -    v->affinity_broken = 0;
>> +    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
>> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
>> +    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
>> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
> 
> Shouldn't this live in an earlier patch? I guess the same question
> is applicable to almost everything here, as also already mentioned
> e.g. in the previous patch.

Hmm, this _is_ a missing part for multiple vcpus per unit.

I wouldn't know which other patch would be more suitable.

> 
>>   static inline void sched_set_res(struct sched_unit *unit,
>>                                    struct sched_resource *res)
>>   {
>> -    unit->vcpu_list->processor = res->processor;
>> +    int cpu = cpumask_first(res->cpus);
> 
> unsigned int

Yes.

> 
>>   static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
>>                                                   unsigned int bit)
>>   {
>> -    set_bit(bit, &unit->vcpu_list->pause_flags);
>> +    struct vcpu *v;
>> +
>> +    for_each_sched_unit_vcpu ( unit, v )
>> +        set_bit(bit, &v->pause_flags);
>>   }
>>   
>>   static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
>>                                                     unsigned int bit)
>>   {
>> -    clear_bit(bit, &unit->vcpu_list->pause_flags);
>> +    struct vcpu *v;
>> +
>> +    for_each_sched_unit_vcpu ( unit, v )
>> +        clear_bit(bit, &v->pause_flags);
>>   }
> 
> The atomicity is (necessarily) limited here - it's atomic for an
> individual vCPU, but not atomic for a unit as a whole. If this
> is indeed a useful hybrid, then I think it would be nice if there
> was a comment next to these functions clarifying under what
> conditions use of them is correct without it also being correct
> to use their non-atomic counterparts (e.g. due to use of an
> external lock).

Okay, I'll add that.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel