From nobody Fri May  3 10:06:35 2024
Delivered-To: importer@patchew.org
Received-SPF: pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender) client-ip=192.237.175.120;
 envelope-from=xen-devel-bounces@lists.xenproject.org;
 helo=lists.xenproject.org;
Authentication-Results: mx.zohomail.com;
	spf=pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
ARC-Seal: i=1; a=rsa-sha256; t=1588181809; cv=none;
	d=zohomail.com; s=zohoarc;
	b=FLFL/8adgFfdpL5Y4GHv62EGx7gjbteo4J78nsbUxtSzeNGjXTYudvizJWirZe4T+rnRw1CaSzgFuEtnve9PjtqdPtGOJ+UjrwaZDhQngIDROUBZuyVh8jdK7aQq2JHIyf3ENZsacTW9FlULY+UGABHmzhAHunCqDn3kxyIhsoA=
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com;
 s=zohoarc;
	t=1588181809;
 h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To;
	bh=YlTYpP2lAmqY0oKCVcZWNr5Jn2sk4xml3pytUqyyAhc=;
	b=iaX9ezqBwFJWa+JXZu6jbxfOZtwF56FFp0KQH5U6jjOtHU5XDtEK7PJIXa4yZ9wHeiFJSXEHavipvykWfcFXrTIUwjVFo1kTFI1k6jQZp/YeUzl/hbd46zsbSjchQtS2ilUkv3eD3E1mEK4FE2N6K0mwJgsanhjBS4DaNj5i9nY=
ARC-Authentication-Results: i=1; mx.zohomail.com;
	spf=pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
Return-Path: <xen-devel-bounces@lists.xenproject.org>
Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120])
 by mx.zohomail.com
	with SMTPS id 158818180978534.16392767102718;
 Wed, 29 Apr 2020 10:36:49 -0700 (PDT)
Received: from localhost ([127.0.0.1] helo=lists.xenproject.org)
	by lists.xenproject.org with esmtp (Exim 4.92)
	(envelope-from <xen-devel-bounces@lists.xenproject.org>)
	id 1jTqdP-0002AW-Iv; Wed, 29 Apr 2020 17:36:35 +0000
Received: from all-amaz-eas1.inumbo.com ([34.197.232.57]
 helo=us1-amaz-eas2.inumbo.com)
 by lists.xenproject.org with esmtp (Exim 4.92) (envelope-from
 <SRS0=vbvF=6N=suse.com=dfaggioli@srs-us1.protection.inumbo.net>)
 id 1jTqdN-00029s-KT
 for xen-devel@lists.xenproject.org; Wed, 29 Apr 2020 17:36:33 +0000
Received: from mx2.suse.de (unknown [195.135.220.15])
 by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS
 id f68dd874-8a3f-11ea-9980-12813bfff9fa;
 Wed, 29 Apr 2020 17:36:31 +0000 (UTC)
Received: from relay2.suse.de (unknown [195.135.220.254])
 by mx2.suse.de (Postfix) with ESMTP id 265DFAC37;
 Wed, 29 Apr 2020 17:36:30 +0000 (UTC)
X-Inumbo-ID: f68dd874-8a3f-11ea-9980-12813bfff9fa
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Subject: [PATCH 1/2] xen: credit2: factor cpu to runqueue matching in a
 function
From: Dario Faggioli <dfaggioli@suse.com>
To: xen-devel@lists.xenproject.org
Date: Wed, 29 Apr 2020 19:36:30 +0200
Message-ID: <158818178990.24327.6732870355943077303.stgit@Palanthas>
In-Reply-To: <158818022727.24327.14309662489731832234.stgit@Palanthas>
References: <158818022727.24327.14309662489731832234.stgit@Palanthas>
User-Agent: StGit/0.21
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
X-BeenThere: xen-devel@lists.xenproject.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xenproject.org>
List-Unsubscribe: <https://lists.xenproject.org/mailman/options/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xenproject.org>
List-Help: <mailto:xen-devel-request@lists.xenproject.org?subject=help>
List-Subscribe: <https://lists.xenproject.org/mailman/listinfo/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=subscribe>
Cc: George Dunlap <george.dunlap@citrix.com>
Errors-To: xen-devel-bounces@lists.xenproject.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>

Just move the big if() condition in an inline function.

No functional change intended.

Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
---
Cc: George Dunlap <george.dunlap@citrix.com>
---
 xen/common/sched/credit2.c |   28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 34f05c3e2a..697c9f917d 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -838,6 +838,20 @@ static inline bool same_core(unsigned int cpua, unsign=
ed int cpub)
            cpu_to_core(cpua) =3D=3D cpu_to_core(cpub);
 }
=20
+static inline bool
+cpu_runqueue_match(const struct csched2_runqueue_data *rqd, unsigned int c=
pu)
+{
+    unsigned int peer_cpu =3D rqd->pick_bias;
+
+    BUG_ON(cpu_to_socket(peer_cpu) =3D=3D XEN_INVALID_SOCKET_ID);
+
+    /* OPT_RUNQUEUE_CPU will never find an existing runqueue. */
+    return opt_runqueue =3D=3D OPT_RUNQUEUE_ALL ||
+           (opt_runqueue =3D=3D OPT_RUNQUEUE_CORE && same_core(peer_cpu, c=
pu)) ||
+           (opt_runqueue =3D=3D OPT_RUNQUEUE_SOCKET && same_socket(peer_cp=
u, cpu)) ||
+           (opt_runqueue =3D=3D OPT_RUNQUEUE_NODE && same_node(peer_cpu, c=
pu));
+}
+
 static struct csched2_runqueue_data *
 cpu_add_to_runqueue(struct csched2_private *prv, unsigned int cpu)
 {
@@ -855,21 +869,11 @@ cpu_add_to_runqueue(struct csched2_private *prv, unsi=
gned int cpu)
     rqd_ins =3D &prv->rql;
     list_for_each_entry ( rqd, &prv->rql, rql )
     {
-        unsigned int peer_cpu;
-
         /* Remember first unused queue index. */
         if ( !rqi_unused && rqd->id > rqi )
             rqi_unused =3D true;
=20
-        peer_cpu =3D rqd->pick_bias;
-        BUG_ON(cpu_to_socket(cpu) =3D=3D XEN_INVALID_SOCKET_ID ||
-               cpu_to_socket(peer_cpu) =3D=3D XEN_INVALID_SOCKET_ID);
-
-        /* OPT_RUNQUEUE_CPU will never find an existing runqueue. */
-        if ( opt_runqueue =3D=3D OPT_RUNQUEUE_ALL ||
-             (opt_runqueue =3D=3D OPT_RUNQUEUE_CORE && same_core(peer_cpu,=
 cpu)) ||
-             (opt_runqueue =3D=3D OPT_RUNQUEUE_SOCKET && same_socket(peer_=
cpu, cpu)) ||
-             (opt_runqueue =3D=3D OPT_RUNQUEUE_NODE && same_node(peer_cpu,=
 cpu)) )
+        if ( cpu_runqueue_match(rqd, cpu) )
         {
             rqd_valid =3D true;
             break;
@@ -3744,6 +3748,8 @@ csched2_alloc_pdata(const struct scheduler *ops, int =
cpu)
     struct csched2_pcpu *spc;
     struct csched2_runqueue_data *rqd;
=20
+    BUG_ON(cpu_to_socket(cpu) =3D=3D XEN_INVALID_SOCKET_ID);
+
     spc =3D xzalloc(struct csched2_pcpu);
     if ( spc =3D=3D NULL )
         return ERR_PTR(-ENOMEM);


From nobody Fri May  3 10:06:35 2024
Delivered-To: importer@patchew.org
Received-SPF: pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender) client-ip=192.237.175.120;
 envelope-from=xen-devel-bounces@lists.xenproject.org;
 helo=lists.xenproject.org;
Authentication-Results: mx.zohomail.com;
	spf=pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
ARC-Seal: i=1; a=rsa-sha256; t=1588181820; cv=none;
	d=zohomail.com; s=zohoarc;
	b=eHZCfVHiHwXxJfrowTKkQpWYQKQKfUPLOr7I3IowUbg7hKiZ/bTB2nLyXz04ytdoHNJjm7O1vcLcsIwM2/MeAB+8mzv7/xbWylhalMVQBfatmWL4Em7g9+BV7koRJO9r212FQyIcerP1cYGwGmciScHUmilhCEkl/9YHFsXYxvs=
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com;
 s=zohoarc;
	t=1588181820;
 h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To;
	bh=rl9XYIzcPJUia7Tx8TKZr+diCS/TTl7zVQpvDKCP6mU=;
	b=ggMel5t9nCzw1CBSzvsW1ghZ3T45dNvEL0Swgb31H2xVJUqktjR3g+WPZUHFImzZw6elDtmeCp4kZCLGWyd9MMwTLb61TNWVUspR1okMZ6XphWhFrlTcyVZzzXJYRFgha7obxdp5IZihdoSw/LmNSqRe/cRCnxeMUMrydW+gIYM=
ARC-Authentication-Results: i=1; mx.zohomail.com;
	spf=pass (zohomail.com: domain of lists.xenproject.org designates
 192.237.175.120 as permitted sender)
  smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
Return-Path: <xen-devel-bounces@lists.xenproject.org>
Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120])
 by mx.zohomail.com
	with SMTPS id 1588181820099186.52798839284537;
 Wed, 29 Apr 2020 10:37:00 -0700 (PDT)
Received: from localhost ([127.0.0.1] helo=lists.xenproject.org)
	by lists.xenproject.org with esmtp (Exim 4.92)
	(envelope-from <xen-devel-bounces@lists.xenproject.org>)
	id 1jTqdT-0002CB-SU; Wed, 29 Apr 2020 17:36:39 +0000
Received: from us1-rack-iad1.inumbo.com ([172.99.69.81])
 by lists.xenproject.org with esmtp (Exim 4.92) (envelope-from
 <SRS0=vbvF=6N=suse.com=dfaggioli@srs-us1.protection.inumbo.net>)
 id 1jTqdS-0002Bo-Lt
 for xen-devel@lists.xenproject.org; Wed, 29 Apr 2020 17:36:38 +0000
Received: from mx2.suse.de (unknown [195.135.220.15])
 by us1-rack-iad1.inumbo.com (Halon) with ESMTPS
 id fa24707e-8a3f-11ea-ae69-bc764e2007e4;
 Wed, 29 Apr 2020 17:36:37 +0000 (UTC)
Received: from relay2.suse.de (unknown [195.135.220.254])
 by mx2.suse.de (Postfix) with ESMTP id C7367ACC5;
 Wed, 29 Apr 2020 17:36:35 +0000 (UTC)
X-Inumbo-ID: fa24707e-8a3f-11ea-ae69-bc764e2007e4
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Subject: [PATCH 2/2] xen: credit2: limit the max number of CPUs in a runqueue
From: Dario Faggioli <dfaggioli@suse.com>
To: xen-devel@lists.xenproject.org
Date: Wed, 29 Apr 2020 19:36:35 +0200
Message-ID: <158818179558.24327.11334680191217289878.stgit@Palanthas>
In-Reply-To: <158818022727.24327.14309662489731832234.stgit@Palanthas>
References: <158818022727.24327.14309662489731832234.stgit@Palanthas>
User-Agent: StGit/0.21
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
X-BeenThere: xen-devel@lists.xenproject.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xenproject.org>
List-Unsubscribe: <https://lists.xenproject.org/mailman/options/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xenproject.org>
List-Help: <mailto:xen-devel-request@lists.xenproject.org?subject=help>
List-Subscribe: <https://lists.xenproject.org/mailman/listinfo/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=subscribe>
Cc: Juergen Gross <jgross@suse.com>,
 Andrew Cooper <andrew.cooper3@citrix.com>,
 George Dunlap <george.dunlap@citrix.com>, Jan Beulich <jbeulich@suse.com>
Errors-To: xen-devel-bounces@lists.xenproject.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>

In Credit2 CPUs (can) share runqueues, depending on the topology. For
instance, with per-socket runqueues (the default) all the CPUs that are
part of the same socket share a runqueue.

On platform with a huge number of CPUs per socket, that could be a
problem. An example is AMD EPYC2 servers, where we can have up to 128
CPUs in a socket.

It is of course possible to define other, still topology-based, runqueue
arrangements (e.g., per-LLC, per-DIE, etc). But that may still result in
runqueues with too many CPUs on other/future platforms.

Therefore, let's set a limit to the max number of CPUs that can share a
Credit2 runqueue. The actual value is configurable (at boot time), the
default being 16. If, for instance,  there are more than 16 CPUs in a
socket, they'll be split among two (or more) runqueues.

Note: with core scheduling enabled, this parameter sets the max number
of *scheduling resources* that can share a runqueue. Therefore, with
granularity set to core (and assumint 2 threads per core), we will have
at most 16 cores per runqueue, which corresponds to 32 threads. But that
is fine, considering how core scheduling works.

Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
---
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Juergen Gross <jgross@suse.com>
---
 xen/common/sched/cpupool.c |    2 -
 xen/common/sched/credit2.c |  104 ++++++++++++++++++++++++++++++++++++++++=
++--
 xen/common/sched/private.h |    2 +
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d40345b585..0227457285 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -37,7 +37,7 @@ static cpumask_t cpupool_locked_cpus;
=20
 static DEFINE_SPINLOCK(cpupool_lock);
=20
-static enum sched_gran __read_mostly opt_sched_granularity =3D SCHED_GRAN_=
cpu;
+enum sched_gran __read_mostly opt_sched_granularity =3D SCHED_GRAN_cpu;
 static unsigned int __read_mostly sched_granularity =3D 1;
=20
 #ifdef CONFIG_HAS_SCHED_GRANULARITY
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 697c9f917d..abe4d048c8 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -471,6 +471,16 @@ static int __init parse_credit2_runqueue(const char *s)
 }
 custom_param("credit2_runqueue", parse_credit2_runqueue);
=20
+/*
+ * How many CPUs will be put, at most, in the same runqueue.
+ * Runqueues are still arranged according to the host topology (and
+ * according to the value of the 'credit2_runqueue' parameter). But
+ * we also have a cap to the number of CPUs that share runqueues.
+ * As soon as we reach the limit, a new runqueue will be created.
+ */
+static unsigned int __read_mostly opt_max_cpus_runqueue =3D 16;
+integer_param("sched_credit2_max_cpus_runqueue", opt_max_cpus_runqueue);
+
 /*
  * Per-runqueue data
  */
@@ -852,14 +862,61 @@ cpu_runqueue_match(const struct csched2_runqueue_data=
 *rqd, unsigned int cpu)
            (opt_runqueue =3D=3D OPT_RUNQUEUE_NODE && same_node(peer_cpu, c=
pu));
 }
=20
+/* Additional checks, to avoid separating siblings in different runqueues.=
 */
+static bool
+cpu_runqueue_smt_match(const struct csched2_runqueue_data *rqd, unsigned i=
nt cpu)
+{
+    unsigned int nr_sibl =3D cpumask_weight(per_cpu(cpu_sibling_mask, cpu)=
);
+    unsigned int rcpu, nr_smts =3D 0;
+
+    /*
+     * If we put the CPU in this runqueue, we must be sure that there will
+     * be enough room for accepting its hyperthread sibling(s) here as wel=
l.
+     */
+    cpumask_clear(cpumask_scratch_cpu(cpu));
+    for_each_cpu ( rcpu, &rqd->active )
+    {
+        ASSERT(rcpu !=3D cpu);
+        if ( !cpumask_test_cpu(rcpu, cpumask_scratch_cpu(cpu)) )
+        {
+            /*
+             * For each CPU already in the runqueue, account for it and for
+             * its sibling(s), independently from whether such sibling(s) =
are
+             * in the runqueue already or not.
+             *
+             * Of course, if there are sibling CPUs in the runqueue alread=
y,
+             * only count them once.
+             */
+            cpumask_or(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+                       per_cpu(cpu_sibling_mask, rcpu));
+            nr_smts +=3D nr_sibl;
+        }
+    }
+    /*
+     * We know that neither the CPU, nor any of its sibling are here,
+     * or we wouldn't even have entered the function.
+     */
+    ASSERT(!cpumask_intersects(cpumask_scratch_cpu(cpu),
+                               per_cpu(cpu_sibling_mask, cpu)));
+
+    /* Try adding CPU and its sibling(s) to the count and check... */
+    nr_smts +=3D nr_sibl;
+
+    if ( nr_smts <=3D opt_max_cpus_runqueue )
+        return true;
+
+    return false;
+}
+
 static struct csched2_runqueue_data *
 cpu_add_to_runqueue(struct csched2_private *prv, unsigned int cpu)
 {
     struct csched2_runqueue_data *rqd, *rqd_new;
+    struct csched2_runqueue_data *rqd_valid =3D NULL;
     struct list_head *rqd_ins;
     unsigned long flags;
     int rqi =3D 0;
-    bool rqi_unused =3D false, rqd_valid =3D false;
+    bool rqi_unused =3D false;
=20
     /* Prealloc in case we need it - not allowed with interrupts off. */
     rqd_new =3D xzalloc(struct csched2_runqueue_data);
@@ -873,11 +930,44 @@ cpu_add_to_runqueue(struct csched2_private *prv, unsi=
gned int cpu)
         if ( !rqi_unused && rqd->id > rqi )
             rqi_unused =3D true;
=20
-        if ( cpu_runqueue_match(rqd, cpu) )
+        /*
+         * Check whether the CPU should (according to the topology) and al=
so
+         * can (if we there aren't too many already) go in this runqueue.
+         */
+        if ( rqd->refcnt < opt_max_cpus_runqueue &&
+             cpu_runqueue_match(rqd, cpu) )
         {
-            rqd_valid =3D true;
-            break;
+            cpumask_t *siblings =3D per_cpu(cpu_sibling_mask, cpu);
+
+            dprintk(XENLOG_DEBUG, "CPU %d matches runq %d, cpus=3D{%*pbl} =
(max %d)\n",
+                    cpu, rqd->id, CPUMASK_PR(&rqd->active),
+                    opt_max_cpus_runqueue);
+
+            /*
+             * If we're using core (or socket!) scheduling, or we don't ha=
ve
+             * hyperthreading, no need to do any further checking.
+             *
+             * If no (to both), but our sibling is already in this runqueu=
e,
+             * then it's also ok for the CPU to stay in this runqueue..
+             *
+             * Otherwise, do some more checks, to better account for SMT.
+             */
+            if ( opt_sched_granularity !=3D SCHED_GRAN_cpu ||
+                 cpumask_weight(siblings) <=3D 1 ||
+                 cpumask_intersects(&rqd->active, siblings) )
+            {
+                dprintk(XENLOG_DEBUG, "runq %d selected\n", rqd->id);
+                rqd_valid =3D rqd;
+                break;
+            }
+            else if ( cpu_runqueue_smt_match(rqd, cpu) )
+            {
+                dprintk(XENLOG_DEBUG, "considering runq %d...\n", rqd->id);
+                rqd_valid =3D rqd;
+            }
         }
+	else
+            dprintk(XENLOG_DEBUG, "ignoring runq %d\n", rqd->id);
=20
         if ( !rqi_unused )
         {
@@ -900,6 +990,12 @@ cpu_add_to_runqueue(struct csched2_private *prv, unsig=
ned int cpu)
         rqd->pick_bias =3D cpu;
         rqd->id =3D rqi;
     }
+    else
+        rqd =3D rqd_valid;
+
+    printk(XENLOG_INFO "CPU %d (sibling=3D{%*pbl}) will go to runqueue %d =
with {%*pbl}\n",
+           cpu, CPUMASK_PR(per_cpu(cpu_sibling_mask, cpu)), rqd->id,
+           CPUMASK_PR(&rqd->active));
=20
     rqd->refcnt++;
=20
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 367811a12f..e964e3f407 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -30,6 +30,8 @@ enum sched_gran {
     SCHED_GRAN_socket
 };
=20
+extern enum sched_gran opt_sched_granularity;
+
 /*
  * In order to allow a scheduler to remap the lock->cpu mapping,
  * we have a per-cpu pointer, along with a pre-allocated set of