From nobody Tue Nov 11 08:50:23 2025
Delivered-To: importer@patchew.org
Received-SPF: none (zoho.com: 192.237.175.120 is neither permitted nor denied
 by domain of lists.xenproject.org) client-ip=192.237.175.120;
 envelope-from=xen-devel-bounces@lists.xenproject.org;
 helo=lists.xenproject.org;
Authentication-Results: mx.zohomail.com;
	spf=none (zoho.com: 192.237.175.120 is neither permitted nor denied by domain
 of lists.xenproject.org)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
ARC-Seal: i=1; a=rsa-sha256; t=1569567836; cv=none;
	d=zoho.com; s=zohoarc;
	b=atgQwTahiK/6bSeFa/J/klm8AlAI7bJYNd3yMq5oSfelOhJP/UCUywwbsfUND5HzjRr3xDl/oMe+KUT4yc3toa7gZCTWHWiFvK4vU2uwMUNMILUCzPxUuAU/pTPfpQ4kQdChzEUfWhQGpUM1udVsUTh3Aq+lDVQRRVUeAmG5nwc=
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zoho.com;
 s=zohoarc;
	t=1569567836;
 h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To:ARC-Authentication-Results;
	bh=4OwMWJbZD61MQS0958tFXXGZsVdXB1J5Ull2h1pr3VI=;
	b=E8Q0Fohr1o6aWxVM3LC0+2ZL4eYnWPZy9gDEOZ32gfbvho/wPjGppLhS3hrIDTYvthKAgpBrQMqGMST09niAVIhNDwDCWPvYySKHmrjvjTKAGr0klGMNjq1y1ACQTYGXTFnHR0Ytb5cL4rhud2O3R/cyFYvzyCJIsdz+bh2Bb00=
ARC-Authentication-Results: i=1; mx.zoho.com;
	spf=none (zoho.com: 192.237.175.120 is neither permitted nor denied by domain
 of lists.xenproject.org)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
Return-Path: <xen-devel-bounces@lists.xenproject.org>
Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120])
 by mx.zohomail.com
	with SMTPS id 1569567836170161.822312355574;
 Fri, 27 Sep 2019 00:03:56 -0700 (PDT)
Received: from localhost ([127.0.0.1] helo=lists.xenproject.org)
	by lists.xenproject.org with esmtp (Exim 4.89)
	(envelope-from <xen-devel-bounces@lists.xenproject.org>)
	id 1iDkH8-0005Wg-5M; Fri, 27 Sep 2019 07:02:46 +0000
Received: from all-amaz-eas1.inumbo.com ([34.197.232.57]
 helo=us1-amaz-eas2.inumbo.com)
 by lists.xenproject.org with esmtp (Exim 4.89)
 (envelope-from <SRS0=Rbpq=XW=suse.com=jgross@srs-us1.protection.inumbo.net>)
 id 1iDkH7-0005Ut-6U
 for xen-devel@lists.xenproject.org; Fri, 27 Sep 2019 07:02:45 +0000
Received: from mx1.suse.de (unknown [195.135.220.15])
 by localhost (Halon) with ESMTPS
 id 94ff5ae7-e0f4-11e9-966c-12813bfff9fa;
 Fri, 27 Sep 2019 07:01:09 +0000 (UTC)
Received: from relay2.suse.de (unknown [195.135.220.254])
 by mx1.suse.de (Postfix) with ESMTP id 9E7F6B03C;
 Fri, 27 Sep 2019 07:01:07 +0000 (UTC)
X-Inumbo-ID: 94ff5ae7-e0f4-11e9-966c-12813bfff9fa
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Date: Fri, 27 Sep 2019 09:00:47 +0200
Message-Id: <20190927070050.12405-44-jgross@suse.com>
X-Mailer: git-send-email 2.16.4
In-Reply-To: <20190927070050.12405-1-jgross@suse.com>
References: <20190927070050.12405-1-jgross@suse.com>
Subject: [Xen-devel] [PATCH v4 43/46] xen/sched: support differing
 granularity in schedule_cpu_[add/rm]()
X-BeenThere: xen-devel@lists.xenproject.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xenproject.org>
List-Unsubscribe: <https://lists.xenproject.org/mailman/options/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xenproject.org>
List-Help: <mailto:xen-devel-request@lists.xenproject.org?subject=help>
List-Subscribe: <https://lists.xenproject.org/mailman/listinfo/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=subscribe>
Cc: Juergen Gross <jgross@suse.com>,
 George Dunlap <george.dunlap@eu.citrix.com>,
 Dario Faggioli <dfaggioli@suse.com>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Errors-To: xen-devel-bounces@lists.xenproject.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>

With core scheduling active schedule_cpu_[add/rm]() has to cope with
different scheduling granularity: a cpu not in any cpupool is subject
to granularity 1 (cpu scheduling), while a cpu in a cpupool might be
in a scheduling resource with more than one cpu.

Handle that by having arrays of old/new pdata and vdata and loop over
those where appropriate.

Additionally the scheduling resource(s) must either be merged or
split.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
 xen/common/cpupool.c  |  18 ++--
 xen/common/schedule.c | 226 +++++++++++++++++++++++++++++++++++++++++++---=
----
 2 files changed, 204 insertions(+), 40 deletions(-)

diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
index 13dffaadcf..04c3b3c04b 100644
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -536,6 +536,7 @@ static void cpupool_cpu_remove(unsigned int cpu)
         ret =3D cpupool_unassign_cpu_finish(cpupool0);
         BUG_ON(ret);
     }
+    cpumask_clear_cpu(cpu, &cpupool_free_cpus);
 }
=20
 /*
@@ -585,20 +586,19 @@ static void cpupool_cpu_remove_forced(unsigned int cp=
u)
     struct cpupool **c;
     int ret;
=20
-    if ( cpumask_test_cpu(cpu, &cpupool_free_cpus) )
-        cpumask_clear_cpu(cpu, &cpupool_free_cpus);
-    else
+    for_each_cpupool ( c )
     {
-        for_each_cpupool(c)
+        if ( cpumask_test_cpu(cpu, (*c)->cpu_valid) )
         {
-            if ( cpumask_test_cpu(cpu, (*c)->cpu_valid) )
-            {
-                ret =3D cpupool_unassign_cpu(*c, cpu);
-                BUG_ON(ret);
-            }
+            ret =3D cpupool_unassign_cpu_start(*c, cpu);
+            BUG_ON(ret);
+            ret =3D cpupool_unassign_cpu_finish(*c);
+            BUG_ON(ret);
         }
     }
=20
+    cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+
     rcu_read_lock(&sched_res_rculock);
     sched_rm_cpu(cpu);
     rcu_read_unlock(&sched_res_rculock);
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index bab24104cd..89238f801d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -416,27 +416,30 @@ static void sched_unit_add_vcpu(struct sched_unit *un=
it, struct vcpu *v)
     unit->runstate_cnt[v->runstate.state]++;
 }
=20
-static struct sched_unit *sched_alloc_unit(struct vcpu *v)
+static struct sched_unit *sched_alloc_unit_mem(void)
 {
-    struct sched_unit *unit, **prev_unit;
-    struct domain *d =3D v->domain;
-    unsigned int gran =3D cpupool_get_granularity(d->cpupool);
+    struct sched_unit *unit;
=20
-    for_each_sched_unit ( d, unit )
-        if ( unit->unit_id / gran =3D=3D v->vcpu_id / gran )
-            break;
+    unit =3D xzalloc(struct sched_unit);
+    if ( !unit )
+        return NULL;
=20
-    if ( unit )
+    if ( !zalloc_cpumask_var(&unit->cpu_hard_affinity) ||
+         !zalloc_cpumask_var(&unit->cpu_hard_affinity_saved) ||
+         !zalloc_cpumask_var(&unit->cpu_soft_affinity) )
     {
-        sched_unit_add_vcpu(unit, v);
-        return unit;
+        sched_free_unit_mem(unit);
+        unit =3D NULL;
     }
=20
-    if ( (unit =3D xzalloc(struct sched_unit)) =3D=3D NULL )
-        return NULL;
+    return unit;
+}
+
+static void sched_domain_insert_unit(struct sched_unit *unit, struct domai=
n *d)
+{
+    struct sched_unit **prev_unit;
=20
     unit->domain =3D d;
-    sched_unit_add_vcpu(unit, v);
=20
     for ( prev_unit =3D &d->sched_unit_list; *prev_unit;
           prev_unit =3D &(*prev_unit)->next_in_list )
@@ -446,17 +449,31 @@ static struct sched_unit *sched_alloc_unit(struct vcp=
u *v)
=20
     unit->next_in_list =3D *prev_unit;
     *prev_unit =3D unit;
+}
=20
-    if ( !zalloc_cpumask_var(&unit->cpu_hard_affinity) ||
-         !zalloc_cpumask_var(&unit->cpu_hard_affinity_saved) ||
-         !zalloc_cpumask_var(&unit->cpu_soft_affinity) )
-        goto fail;
+static struct sched_unit *sched_alloc_unit(struct vcpu *v)
+{
+    struct sched_unit *unit;
+    struct domain *d =3D v->domain;
+    unsigned int gran =3D cpupool_get_granularity(d->cpupool);
=20
-    return unit;
+    for_each_sched_unit ( d, unit )
+        if ( unit->unit_id / gran =3D=3D v->vcpu_id / gran )
+            break;
=20
- fail:
-    sched_free_unit(unit, v);
-    return NULL;
+    if ( unit )
+    {
+        sched_unit_add_vcpu(unit, v);
+        return unit;
+    }
+
+    if ( (unit =3D sched_alloc_unit_mem()) =3D=3D NULL )
+        return NULL;
+
+    sched_unit_add_vcpu(unit, v);
+    sched_domain_insert_unit(unit, d);
+
+    return unit;
 }
=20
 static unsigned int sched_select_initial_cpu(const struct vcpu *v)
@@ -2404,18 +2421,28 @@ static void poll_timer_fn(void *data)
         vcpu_unblock(v);
 }
=20
-static int cpu_schedule_up(unsigned int cpu)
+static struct sched_resource *sched_alloc_res(void)
 {
     struct sched_resource *sr;
=20
     sr =3D xzalloc(struct sched_resource);
     if ( sr =3D=3D NULL )
-        return -ENOMEM;
+        return NULL;
     if ( !zalloc_cpumask_var(&sr->cpus) )
     {
         xfree(sr);
-        return -ENOMEM;
+        return NULL;
     }
+    return sr;
+}
+
+static int cpu_schedule_up(unsigned int cpu)
+{
+    struct sched_resource *sr;
+
+    sr =3D sched_alloc_res();
+    if ( sr =3D=3D NULL )
+        return -ENOMEM;
=20
     sr->master_cpu =3D cpu;
     cpumask_copy(sr->cpus, cpumask_of(cpu));
@@ -2465,6 +2492,8 @@ static void sched_res_free(struct rcu_head *head)
     struct sched_resource *sr =3D container_of(head, struct sched_resource=
, rcu);
=20
     free_cpumask_var(sr->cpus);
+    if ( sr->sched_unit_idle )
+        sched_free_unit_mem(sr->sched_unit_idle);
     xfree(sr);
 }
=20
@@ -2481,6 +2510,8 @@ static void cpu_schedule_down(unsigned int cpu)
     cpumask_clear_cpu(cpu, &sched_res_mask);
     set_sched_res(cpu, NULL);
=20
+    /* Keep idle unit. */
+    sr->sched_unit_idle =3D NULL;
     call_rcu(&sr->rcu, sched_res_free);
=20
     rcu_read_unlock(&sched_res_rculock);
@@ -2560,6 +2591,30 @@ static struct notifier_block cpu_schedule_nfb =3D {
     .notifier_call =3D cpu_schedule_callback
 };
=20
+static const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt,
+                                              unsigned int cpu)
+{
+    const cpumask_t *mask;
+
+    switch ( opt )
+    {
+    case SCHED_GRAN_cpu:
+        mask =3D cpumask_of(cpu);
+        break;
+    case SCHED_GRAN_core:
+        mask =3D per_cpu(cpu_sibling_mask, cpu);
+        break;
+    case SCHED_GRAN_socket:
+        mask =3D per_cpu(cpu_core_mask, cpu);
+        break;
+    default:
+        ASSERT_UNREACHABLE();
+        return NULL;
+    }
+
+    return mask;
+}
+
 /* Initialise the data structures. */
 void __init scheduler_init(void)
 {
@@ -2715,6 +2770,46 @@ int schedule_cpu_add(unsigned int cpu, struct cpupoo=
l *c)
      */
     old_lock =3D pcpu_schedule_lock_irqsave(cpu, &flags);
=20
+    if ( cpupool_get_granularity(c) > 1 )
+    {
+        const cpumask_t *mask;
+        unsigned int cpu_iter, idx =3D 0;
+        struct sched_unit *old_unit, *master_unit;
+        struct sched_resource *sr_old;
+
+        /*
+         * We need to merge multiple idle_vcpu units and sched_resource st=
ructs
+         * into one. As the free cpus all share the same lock we are fine =
doing
+         * that now. The worst which could happen would be someone waiting=
 for
+         * the lock, thus dereferencing sched_res->schedule_lock. This is =
the
+         * reason we are freeing struct sched_res via call_rcu() to avoid =
the
+         * lock pointer suddenly disappearing.
+         */
+        mask =3D sched_get_opt_cpumask(c->gran, cpu);
+        master_unit =3D idle_vcpu[cpu]->sched_unit;
+
+        for_each_cpu ( cpu_iter, mask )
+        {
+            if ( idx )
+                cpumask_clear_cpu(cpu_iter, &sched_res_mask);
+
+            per_cpu(sched_res_idx, cpu_iter) =3D idx++;
+
+            if ( cpu =3D=3D cpu_iter )
+                continue;
+
+            old_unit =3D idle_vcpu[cpu_iter]->sched_unit;
+            sr_old =3D get_sched_res(cpu_iter);
+            kill_timer(&sr_old->s_timer);
+            idle_vcpu[cpu_iter]->sched_unit =3D master_unit;
+            master_unit->runstate_cnt[RUNSTATE_running]++;
+            set_sched_res(cpu_iter, sr);
+            cpumask_set_cpu(cpu_iter, sr->cpus);
+
+            call_rcu(&sr_old->rcu, sched_res_free);
+        }
+    }
+
     new_lock =3D sched_switch_sched(new_ops, cpu, ppriv, vpriv);
=20
     sr->scheduler =3D new_ops;
@@ -2752,33 +2847,100 @@ out:
  */
 int schedule_cpu_rm(unsigned int cpu)
 {
-    struct vcpu *idle;
     void *ppriv_old, *vpriv_old;
-    struct sched_resource *sr;
+    struct sched_resource *sr, **sr_new =3D NULL;
+    struct sched_unit *unit;
     struct scheduler *old_ops;
     spinlock_t *old_lock;
     unsigned long flags;
+    int idx, ret =3D -ENOMEM;
+    unsigned int cpu_iter;
=20
     rcu_read_lock(&sched_res_rculock);
=20
     sr =3D get_sched_res(cpu);
     old_ops =3D sr->scheduler;
=20
+    if ( sr->granularity > 1 )
+    {
+        sr_new =3D xmalloc_array(struct sched_resource *, sr->granularity =
- 1);
+        if ( !sr_new )
+            goto out;
+        for ( idx =3D 0; idx < sr->granularity - 1; idx++ )
+        {
+            sr_new[idx] =3D sched_alloc_res();
+            if ( sr_new[idx] )
+            {
+                sr_new[idx]->sched_unit_idle =3D sched_alloc_unit_mem();
+                if ( !sr_new[idx]->sched_unit_idle )
+                {
+                    sched_res_free(&sr_new[idx]->rcu);
+                    sr_new[idx] =3D NULL;
+                }
+            }
+            if ( !sr_new[idx] )
+            {
+                for ( idx--; idx >=3D 0; idx-- )
+                    sched_res_free(&sr_new[idx]->rcu);
+                goto out;
+            }
+            sr_new[idx]->curr =3D sr_new[idx]->sched_unit_idle;
+            sr_new[idx]->scheduler =3D &sched_idle_ops;
+            sr_new[idx]->granularity =3D 1;
+
+            /* We want the lock not to change when replacing the resource.=
 */
+            sr_new[idx]->schedule_lock =3D sr->schedule_lock;
+        }
+    }
+
+    ret =3D 0;
     ASSERT(sr->cpupool !=3D NULL);
     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
     ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
=20
-    idle =3D idle_vcpu[cpu];
-
     sched_do_tick_suspend(old_ops, cpu);
=20
     /* See comment in schedule_cpu_add() regarding lock switching. */
     old_lock =3D pcpu_schedule_lock_irqsave(cpu, &flags);
=20
-    vpriv_old =3D idle->sched_unit->priv;
+    vpriv_old =3D idle_vcpu[cpu]->sched_unit->priv;
     ppriv_old =3D sr->sched_priv;
=20
-    idle->sched_unit->priv =3D NULL;
+    idx =3D 0;
+    for_each_cpu ( cpu_iter, sr->cpus )
+    {
+        per_cpu(sched_res_idx, cpu_iter) =3D 0;
+        if ( cpu_iter =3D=3D cpu )
+        {
+            idle_vcpu[cpu_iter]->sched_unit->priv =3D NULL;
+        }
+        else
+        {
+            /* Initialize unit. */
+            unit =3D sr_new[idx]->sched_unit_idle;
+            unit->res =3D sr_new[idx];
+            unit->is_running =3D true;
+            sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
+            sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
+
+            /* Adjust cpu masks of resources (old and new). */
+            cpumask_clear_cpu(cpu_iter, sr->cpus);
+            cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
+
+            /* Init timer. */
+            init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+
+            /* Last resource initializations and insert resource pointer. =
*/
+            sr_new[idx]->master_cpu =3D cpu_iter;
+            set_sched_res(cpu_iter, sr_new[idx]);
+
+            /* Last action: set the new lock pointer. */
+            smp_mb();
+            sr_new[idx]->schedule_lock =3D &sched_free_cpu_lock;
+
+            idx++;
+        }
+    }
     sr->scheduler =3D &sched_idle_ops;
     sr->sched_priv =3D NULL;
=20
@@ -2796,9 +2958,11 @@ int schedule_cpu_rm(unsigned int cpu)
     sr->granularity =3D 1;
     sr->cpupool =3D NULL;
=20
+out:
     rcu_read_unlock(&sched_res_rculock);
+    xfree(sr_new);
=20
-    return 0;
+    return ret;
 }
=20
 struct scheduler *scheduler_get_default(void)
--=20
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel