[PATCH v2] xen/sched: fix restore_vcpu_affinity() by removing it

Juergen Gross posted 1 patch 1 year, 6 months ago
Test gitlab-ci passed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20221021105026.16186-1-jgross@suse.com
xen/arch/x86/acpi/power.c |  3 --
xen/common/sched/core.c   | 78 ---------------------------------------
xen/include/xen/sched.h   |  1 -
3 files changed, 82 deletions(-)
[PATCH v2] xen/sched: fix restore_vcpu_affinity() by removing it
Posted by Juergen Gross 1 year, 6 months ago
When the system is coming up after having been suspended,
restore_vcpu_affinity() is called for each domain in order to adjust
the vcpu's affinity settings in case a cpu didn't come to live again.

The way restore_vcpu_affinity() is doing that is wrong, because the
specific scheduler isn't being informed about a possible migration of
the vcpu to another cpu. Additionally the migration is often even
happening if all cpus are running again, as it is done without check
whether it is really needed.

As cpupool management is already calling cpu_disable_scheduler() for
cpus not having come up again, and cpu_disable_scheduler() is taking
care of eventually needed vcpu migration in the proper way, there is
simply no need for restore_vcpu_affinity().

So just remove restore_vcpu_affinity() completely, together with the
no longer used sched_reset_affinity_broken().

Fixes: 8a04eaa8ea83 ("xen/sched: move some per-vcpu items to struct sched_unit")
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Acked-by: Dario Faggioli <dfaggioli@suse.com>
Release-acked-by: Henry Wang <Henry.Wang@arm.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- also remove sched_reset_affinity_broken() (Jan Beulich)
---
 xen/arch/x86/acpi/power.c |  3 --
 xen/common/sched/core.c   | 78 ---------------------------------------
 xen/include/xen/sched.h   |  1 -
 3 files changed, 82 deletions(-)

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index 1bb4d78392..b76f673acb 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -159,10 +159,7 @@ static void thaw_domains(void)
 
     rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
-    {
-        restore_vcpu_affinity(d);
         domain_unpause(d);
-    }
     rcu_read_unlock(&domlist_read_lock);
 }
 
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 83455fbde1..23fa6845a8 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -1188,84 +1188,6 @@ static bool sched_check_affinity_broken(const struct sched_unit *unit)
     return false;
 }
 
-static void sched_reset_affinity_broken(const struct sched_unit *unit)
-{
-    struct vcpu *v;
-
-    for_each_sched_unit_vcpu ( unit, v )
-        v->affinity_broken = false;
-}
-
-void restore_vcpu_affinity(struct domain *d)
-{
-    unsigned int cpu = smp_processor_id();
-    struct sched_unit *unit;
-
-    ASSERT(system_state == SYS_STATE_resume);
-
-    rcu_read_lock(&sched_res_rculock);
-
-    for_each_sched_unit ( d, unit )
-    {
-        spinlock_t *lock;
-        unsigned int old_cpu = sched_unit_master(unit);
-        struct sched_resource *res;
-
-        ASSERT(!unit_runnable(unit));
-
-        /*
-         * Re-assign the initial processor as after resume we have no
-         * guarantee the old processor has come back to life again.
-         *
-         * Therefore, here, before actually unpausing the domains, we should
-         * set v->processor of each of their vCPUs to something that will
-         * make sense for the scheduler of the cpupool in which they are in.
-         */
-        lock = unit_schedule_lock_irq(unit);
-
-        cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
-                    cpupool_domain_master_cpumask(d));
-        if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
-        {
-            if ( sched_check_affinity_broken(unit) )
-            {
-                sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
-                sched_reset_affinity_broken(unit);
-                cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
-                            cpupool_domain_master_cpumask(d));
-            }
-
-            if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
-            {
-                /* Affinity settings of one vcpu are for the complete unit. */
-                printk(XENLOG_DEBUG "Breaking affinity for %pv\n",
-                       unit->vcpu_list);
-                sched_set_affinity(unit, &cpumask_all, NULL);
-                cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
-                            cpupool_domain_master_cpumask(d));
-            }
-        }
-
-        res = get_sched_res(cpumask_any(cpumask_scratch_cpu(cpu)));
-        sched_set_res(unit, res);
-
-        spin_unlock_irq(lock);
-
-        /* v->processor might have changed, so reacquire the lock. */
-        lock = unit_schedule_lock_irq(unit);
-        res = sched_pick_resource(unit_scheduler(unit), unit);
-        sched_set_res(unit, res);
-        spin_unlock_irq(lock);
-
-        if ( old_cpu != sched_unit_master(unit) )
-            sched_move_irqs(unit);
-    }
-
-    rcu_read_unlock(&sched_res_rculock);
-
-    domain_update_node_affinity(d);
-}
-
 /*
  * This function is used by cpu_hotplug code via cpu notifier chain
  * and from cpupools to switch schedulers on a cpu.
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 557b3229f6..072e4846aa 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -1019,7 +1019,6 @@ void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
 void sched_setup_dom0_vcpus(struct domain *d);
 int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
-void restore_vcpu_affinity(struct domain *d);
 int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
                          struct xen_domctl_vcpuaffinity *vcpuaff);
 
-- 
2.35.3


Re: [PATCH v2] xen/sched: fix restore_vcpu_affinity() by removing it
Posted by Andrew Cooper 1 year, 6 months ago
On 21/10/2022 11:50, Juergen Gross wrote:
> When the system is coming up after having been suspended,
> restore_vcpu_affinity() is called for each domain in order to adjust
> the vcpu's affinity settings in case a cpu didn't come to live again.
>
> The way restore_vcpu_affinity() is doing that is wrong, because the
> specific scheduler isn't being informed about a possible migration of
> the vcpu to another cpu. Additionally the migration is often even
> happening if all cpus are running again, as it is done without check
> whether it is really needed.
>
> As cpupool management is already calling cpu_disable_scheduler() for
> cpus not having come up again, and cpu_disable_scheduler() is taking
> care of eventually needed vcpu migration in the proper way, there is
> simply no need for restore_vcpu_affinity().
>
> So just remove restore_vcpu_affinity() completely, together with the
> no longer used sched_reset_affinity_broken().
>
> Fixes: 8a04eaa8ea83 ("xen/sched: move some per-vcpu items to struct sched_unit")
> Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
> Acked-by: Dario Faggioli <dfaggioli@suse.com>
> Release-acked-by: Henry Wang <Henry.Wang@arm.com>
> Signed-off-by: Juergen Gross <jgross@suse.com>

For whomever commits this, Marek's T-by on v1 specifically included the
delta including in v2, and is therefore still applicable.

~Andrew

> ---
> V2:
> - also remove sched_reset_affinity_broken() (Jan Beulich)
> ---
>  xen/arch/x86/acpi/power.c |  3 --
>  xen/common/sched/core.c   | 78 ---------------------------------------
>  xen/include/xen/sched.h   |  1 -
>  3 files changed, 82 deletions(-)
>
> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
> index 1bb4d78392..b76f673acb 100644
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -159,10 +159,7 @@ static void thaw_domains(void)
>  
>      rcu_read_lock(&domlist_read_lock);
>      for_each_domain ( d )
> -    {
> -        restore_vcpu_affinity(d);
>          domain_unpause(d);
> -    }
>      rcu_read_unlock(&domlist_read_lock);
>  }
>  
> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> index 83455fbde1..23fa6845a8 100644
> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -1188,84 +1188,6 @@ static bool sched_check_affinity_broken(const struct sched_unit *unit)
>      return false;
>  }
>  
> -static void sched_reset_affinity_broken(const struct sched_unit *unit)
> -{
> -    struct vcpu *v;
> -
> -    for_each_sched_unit_vcpu ( unit, v )
> -        v->affinity_broken = false;
> -}
> -
> -void restore_vcpu_affinity(struct domain *d)
> -{
> -    unsigned int cpu = smp_processor_id();
> -    struct sched_unit *unit;
> -
> -    ASSERT(system_state == SYS_STATE_resume);
> -
> -    rcu_read_lock(&sched_res_rculock);
> -
> -    for_each_sched_unit ( d, unit )
> -    {
> -        spinlock_t *lock;
> -        unsigned int old_cpu = sched_unit_master(unit);
> -        struct sched_resource *res;
> -
> -        ASSERT(!unit_runnable(unit));
> -
> -        /*
> -         * Re-assign the initial processor as after resume we have no
> -         * guarantee the old processor has come back to life again.
> -         *
> -         * Therefore, here, before actually unpausing the domains, we should
> -         * set v->processor of each of their vCPUs to something that will
> -         * make sense for the scheduler of the cpupool in which they are in.
> -         */
> -        lock = unit_schedule_lock_irq(unit);
> -
> -        cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
> -                    cpupool_domain_master_cpumask(d));
> -        if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
> -        {
> -            if ( sched_check_affinity_broken(unit) )
> -            {
> -                sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
> -                sched_reset_affinity_broken(unit);
> -                cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
> -                            cpupool_domain_master_cpumask(d));
> -            }
> -
> -            if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
> -            {
> -                /* Affinity settings of one vcpu are for the complete unit. */
> -                printk(XENLOG_DEBUG "Breaking affinity for %pv\n",
> -                       unit->vcpu_list);
> -                sched_set_affinity(unit, &cpumask_all, NULL);
> -                cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
> -                            cpupool_domain_master_cpumask(d));
> -            }
> -        }
> -
> -        res = get_sched_res(cpumask_any(cpumask_scratch_cpu(cpu)));
> -        sched_set_res(unit, res);
> -
> -        spin_unlock_irq(lock);
> -
> -        /* v->processor might have changed, so reacquire the lock. */
> -        lock = unit_schedule_lock_irq(unit);
> -        res = sched_pick_resource(unit_scheduler(unit), unit);
> -        sched_set_res(unit, res);
> -        spin_unlock_irq(lock);
> -
> -        if ( old_cpu != sched_unit_master(unit) )
> -            sched_move_irqs(unit);
> -    }
> -
> -    rcu_read_unlock(&sched_res_rculock);
> -
> -    domain_update_node_affinity(d);
> -}
> -
>  /*
>   * This function is used by cpu_hotplug code via cpu notifier chain
>   * and from cpupools to switch schedulers on a cpu.
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 557b3229f6..072e4846aa 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -1019,7 +1019,6 @@ void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
>  void sched_setup_dom0_vcpus(struct domain *d);
>  int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
>  int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
> -void restore_vcpu_affinity(struct domain *d);
>  int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
>                           struct xen_domctl_vcpuaffinity *vcpuaff);
>