[libvirt PATCH] qemu: fix detection of vCPU pids when multiple dies are present

Daniel P. Berrangé posted 1 patch 4 years, 1 month ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200313164616.1417745-1-berrange@redhat.com
src/qemu/qemu_domain.c       | 10 ++++++++--
src/qemu/qemu_monitor.c      |  2 ++
src/qemu/qemu_monitor.h      |  2 ++
src/qemu/qemu_monitor_json.c |  4 ++++
4 files changed, 16 insertions(+), 2 deletions(-)
[libvirt PATCH] qemu: fix detection of vCPU pids when multiple dies are present
Posted by Daniel P. Berrangé 4 years, 1 month ago
The logic for querying hotpluggable CPUs needs to sort the list
of CPUs returned by QEMU. Unfortunately our sorting method failed
to use the die_id field, so CPUs were not correctly sorted.

This is seen when configuring a guest with partially populated
CPUs

  <vcpu placement='static' current='1'>16</vcpu>
  <cpu...>
    <topology sockets='4' dies='2' cores='1' threads='2'/>
  </cpu>

Then trying to start it would fail:

  # virsh -c qemu:///system start demo
  error: Failed to start domain demo
  error: internal error: qemu didn't report thread id for vcpu '0'

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 src/qemu/qemu_domain.c       | 10 ++++++++--
 src/qemu/qemu_monitor.c      |  2 ++
 src/qemu/qemu_monitor.h      |  2 ++
 src/qemu/qemu_monitor_json.c |  4 ++++
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 4b467afa81..acb93d24ae 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -13537,8 +13537,14 @@ qemuDomainRefreshVcpuInfo(virQEMUDriverPtr driver,
         }
 
         if (validTIDs)
-            VIR_DEBUG("vCPU[%zu] PID %llu is valid",
-                      i, (unsigned long long)info[i].tid);
+            VIR_DEBUG("vCPU[%zu] PID %llu is valid "
+                      "(node=%d socket=%d die=%d core=%d thread=%d)",
+                      i, (unsigned long long)info[i].tid,
+                      info[i].node_id,
+                      info[i].socket_id,
+                      info[i].die_id,
+                      info[i].core_id,
+                      info[i].thread_id);
     }
 
     VIR_DEBUG("Extracting vCPU information validTIDs=%d", validTIDs);
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index e3d621f038..017b818f73 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -1615,6 +1615,7 @@ qemuMonitorCPUInfoClear(qemuMonitorCPUInfoPtr cpus,
         cpus[i].id = 0;
         cpus[i].qemu_id = -1;
         cpus[i].socket_id = -1;
+        cpus[i].die_id = -1;
         cpus[i].core_id = -1;
         cpus[i].thread_id = -1;
         cpus[i].node_id = -1;
@@ -1770,6 +1771,7 @@ qemuMonitorGetCPUInfoHotplug(struct qemuMonitorQueryHotpluggableCpusEntry *hotpl
         vcpus[mastervcpu].hotpluggable = !!hotplugvcpus[i].alias ||
                                          !vcpus[mastervcpu].online;
         vcpus[mastervcpu].socket_id = hotplugvcpus[i].socket_id;
+        vcpus[mastervcpu].die_id = hotplugvcpus[i].die_id;
         vcpus[mastervcpu].core_id = hotplugvcpus[i].core_id;
         vcpus[mastervcpu].thread_id = hotplugvcpus[i].thread_id;
         vcpus[mastervcpu].node_id = hotplugvcpus[i].node_id;
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 6ccea909e9..be4195e734 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -561,6 +561,7 @@ struct qemuMonitorQueryHotpluggableCpusEntry {
     /* topology information -1 if qemu didn't report given parameter */
     int node_id;
     int socket_id;
+    int die_id;
     int core_id;
     int thread_id;
 
@@ -583,6 +584,7 @@ struct _qemuMonitorCPUInfo {
     /* topology info for hotplug purposes. Hotplug of given vcpu impossible if
      * all entries are -1 */
     int socket_id;
+    int die_id;
     int core_id;
     int thread_id;
     int node_id;
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 3eac80c060..04affb851f 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -8565,6 +8565,7 @@ qemuMonitorJSONProcessHotpluggableCpusReply(virJSONValuePtr vcpu,
 
     ignore_value(virJSONValueObjectGetNumberInt(props, "node-id", &entry->node_id));
     ignore_value(virJSONValueObjectGetNumberInt(props, "socket-id", &entry->socket_id));
+    ignore_value(virJSONValueObjectGetNumberInt(props, "die-id", &entry->die_id));
     ignore_value(virJSONValueObjectGetNumberInt(props, "core-id", &entry->core_id));
     ignore_value(virJSONValueObjectGetNumberInt(props, "thread-id", &entry->thread_id));
 
@@ -8599,6 +8600,9 @@ qemuMonitorQueryHotpluggableCpusEntrySort(const void *p1,
     if (a->socket_id != b->socket_id)
         return a->socket_id - b->socket_id;
 
+    if (a->die_id != b->die_id)
+        return a->die_id - b->die_id;
+
     if (a->core_id != b->core_id)
         return a->core_id - b->core_id;
 
-- 
2.24.1

Re: [libvirt PATCH] qemu: fix detection of vCPU pids when multiple dies are present
Posted by Peter Krempa 4 years, 1 month ago
On Fri, Mar 13, 2020 at 16:46:16 +0000, Daniel Berrange wrote:
> The logic for querying hotpluggable CPUs needs to sort the list
> of CPUs returned by QEMU. Unfortunately our sorting method failed
> to use the die_id field, so CPUs were not correctly sorted.
> 
> This is seen when configuring a guest with partially populated
> CPUs
> 
>   <vcpu placement='static' current='1'>16</vcpu>
>   <cpu...>
>     <topology sockets='4' dies='2' cores='1' threads='2'/>
>   </cpu>
> 
> Then trying to start it would fail:
> 
>   # virsh -c qemu:///system start demo
>   error: Failed to start domain demo
>   error: internal error: qemu didn't report thread id for vcpu '0'
> 
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> ---
>  src/qemu/qemu_domain.c       | 10 ++++++++--
>  src/qemu/qemu_monitor.c      |  2 ++
>  src/qemu/qemu_monitor.h      |  2 ++
>  src/qemu/qemu_monitor_json.c |  4 ++++
>  4 files changed, 16 insertions(+), 2 deletions(-)

Please add a test entry to 'testQemuMonitorCPUInfo' test case for this
purpose. You'll need to extend testQemuMonitorCPUInfoFormat.

You can use:

Reviewed-by: Peter Krempa <pkrempa@redhat.com>

on this patch, but the test change should be reviewed separately and
also should go in together with this patch.

Re: [libvirt PATCH] qemu: fix detection of vCPU pids when multiple dies are present
Posted by Daniel P. Berrangé 4 years, 1 month ago
On Mon, Mar 16, 2020 at 09:53:13AM +0100, Peter Krempa wrote:
> On Fri, Mar 13, 2020 at 16:46:16 +0000, Daniel Berrange wrote:
> > The logic for querying hotpluggable CPUs needs to sort the list
> > of CPUs returned by QEMU. Unfortunately our sorting method failed
> > to use the die_id field, so CPUs were not correctly sorted.
> > 
> > This is seen when configuring a guest with partially populated
> > CPUs
> > 
> >   <vcpu placement='static' current='1'>16</vcpu>
> >   <cpu...>
> >     <topology sockets='4' dies='2' cores='1' threads='2'/>
> >   </cpu>
> > 
> > Then trying to start it would fail:
> > 
> >   # virsh -c qemu:///system start demo
> >   error: Failed to start domain demo
> >   error: internal error: qemu didn't report thread id for vcpu '0'
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >  src/qemu/qemu_domain.c       | 10 ++++++++--
> >  src/qemu/qemu_monitor.c      |  2 ++
> >  src/qemu/qemu_monitor.h      |  2 ++
> >  src/qemu/qemu_monitor_json.c |  4 ++++
> >  4 files changed, 16 insertions(+), 2 deletions(-)
> 
> Please add a test entry to 'testQemuMonitorCPUInfo' test case for this
> purpose. You'll need to extend testQemuMonitorCPUInfoFormat.

That test suite doesn't illustrate the brokenness AFAICT. I added
the data files and it still passes, even without the fix.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [libvirt PATCH] qemu: fix detection of vCPU pids when multiple dies are present
Posted by Daniel P. Berrangé 4 years, 1 month ago
On Mon, Mar 16, 2020 at 01:06:47PM +0000, Daniel P. Berrangé wrote:
> On Mon, Mar 16, 2020 at 09:53:13AM +0100, Peter Krempa wrote:
> > On Fri, Mar 13, 2020 at 16:46:16 +0000, Daniel Berrange wrote:
> > > The logic for querying hotpluggable CPUs needs to sort the list
> > > of CPUs returned by QEMU. Unfortunately our sorting method failed
> > > to use the die_id field, so CPUs were not correctly sorted.
> > > 
> > > This is seen when configuring a guest with partially populated
> > > CPUs
> > > 
> > >   <vcpu placement='static' current='1'>16</vcpu>
> > >   <cpu...>
> > >     <topology sockets='4' dies='2' cores='1' threads='2'/>
> > >   </cpu>
> > > 
> > > Then trying to start it would fail:
> > > 
> > >   # virsh -c qemu:///system start demo
> > >   error: Failed to start domain demo
> > >   error: internal error: qemu didn't report thread id for vcpu '0'
> > > 
> > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > > ---
> > >  src/qemu/qemu_domain.c       | 10 ++++++++--
> > >  src/qemu/qemu_monitor.c      |  2 ++
> > >  src/qemu/qemu_monitor.h      |  2 ++
> > >  src/qemu/qemu_monitor_json.c |  4 ++++
> > >  4 files changed, 16 insertions(+), 2 deletions(-)
> > 
> > Please add a test entry to 'testQemuMonitorCPUInfo' test case for this
> > purpose. You'll need to extend testQemuMonitorCPUInfoFormat.
> 
> That test suite doesn't illustrate the brokenness AFAICT. I added
> the data files and it still passes, even without the fix.

Actually this was a bug in the test case addition which made it silently
do the wrong thing.

I passed current CPUs instead of max CPUs to the test case and thus

qemuMonitorGetCPUInfoHotplug() ended up hitting:

    if (totalvcpus != maxvcpus) {
        VIR_DEBUG("expected '%zu' total vcpus got '%zu'", maxvcpus, totalvcpus);
        return -1;
    }

qemuMonitorGetCPUInfo()  ignores this error though and falls back to
reporting some generic info. I'm not convinced that qemuMonitorGetCPUInfo
is right in ignoring all errors from qemuMonitorGetCPUInfoHotplug but
I've not investigated further.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|