[libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled

John Ferlan posted 1 patch 5 years, 4 months ago
Test syntax-check passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20181126233806.28276-1-jferlan@redhat.com
There is a newer version of this series
src/qemu/qemu_capabilities.c | 45 ++++++++++++++++++++++++++++++++++++
src/qemu/qemu_capspriv.h     |  2 ++
tests/qemucapabilitiestest.c |  3 +++
3 files changed, 50 insertions(+)
[libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by John Ferlan 5 years, 4 months ago
Support for nested KVM is handled via a kernel module configuration
adjustment which if done after libvirtd is started and/or the last
QEMU capabilities adjustment can result in the inability to start a
guest and use nested KVM until the capabilities cache is invalidated.
This is because without knowing, the CPU settings for a guest may not
add the vmx=on to/for the guest config.

Thus, let's fetch and save the setting during initialization and then
when the capabilities are checked for various host related adjustments
that could affect whether the capabilities cache is updated add a check
whether the nested value was set for Intel, AMD, or s390 to force a
refetch of the capabilities.

Signed-off-by: John Ferlan <jferlan@redhat.com>
---
 v1 was part of an RFC:

 https://www.redhat.com/archives/libvir-list/2018-November/msg00494.html

 This patch alters that code slightly to add the check Marc Hartmayer
 requested for S390 and to use "kvm" in the API names and variables to
 make it clearer that it's not CapsIsNested but CapsKVMIsNested.

 If it's felt the new check should slide down further in virQEMUCapsIsValid
 then that's fine - just let me know what it should follow.

 src/qemu/qemu_capabilities.c | 45 ++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_capspriv.h     |  2 ++
 tests/qemucapabilitiestest.c |  3 +++
 3 files changed, 50 insertions(+)

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index fde27010e4..c377733fe6 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -40,6 +40,7 @@
 #include "virnodesuspend.h"
 #include "virnuma.h"
 #include "virhostcpu.h"
+#include "virkmod.h"
 #include "qemu_monitor.h"
 #include "virstring.h"
 #include "qemu_hostdev.h"
@@ -557,6 +558,7 @@ struct _virQEMUCaps {
     virObject parent;
 
     bool usedQMP;
+    bool kvmIsNested;
 
     char *binary;
     time_t ctime;
@@ -1528,6 +1530,7 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps)
         return NULL;
 
     ret->usedQMP = qemuCaps->usedQMP;
+    ret->kvmIsNested = qemuCaps->kvmIsNested;
 
     if (VIR_STRDUP(ret->binary, qemuCaps->binary) < 0)
         goto error;
@@ -3587,6 +3590,9 @@ virQEMUCapsLoadCache(virArch hostArch,
     virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_KVM);
     virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_QEMU);
 
+    qemuCaps->kvmIsNested = virXPathBoolean("count(./kvmIsNested) > 0",
+                                            ctxt) > 0;
+
     ret = 0;
  cleanup:
     VIR_FREE(str);
@@ -3806,6 +3812,9 @@ virQEMUCapsFormatCache(virQEMUCapsPtr qemuCaps)
     if (qemuCaps->sevCapabilities)
         virQEMUCapsFormatSEVInfo(qemuCaps, &buf);
 
+    if (qemuCaps->kvmIsNested)
+        virBufferAddLit(&buf, "<kvmIsNested/>\n");
+
     virBufferAdjustIndent(&buf, -2);
     virBufferAddLit(&buf, "</qemuCaps>\n");
 
@@ -3846,6 +3855,30 @@ virQEMUCapsSaveFile(void *data,
 }
 
 
+static bool
+virQEMUCapsKVMIsNested(void)
+{
+    VIR_AUTOFREE(char *) kConfig = NULL;
+
+    /* Intel, AMD, and s390 related checks */
+    if ((kConfig = virKModConfig()) &&
+        (strstr(kConfig, "kvm_intel nested=1") ||
+         strstr(kConfig, "kvm_amd nested=1") ||
+         strstr(kConfig, "kvm nested=1")))
+        return true;
+    return false;
+}
+
+
+void
+virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps)
+{
+    /* For qemucapabilitiestest to avoid printing the </kvmIsNested> on
+     * hosts with nested set in the kernel */
+    qemuCaps->kvmIsNested = false;
+}
+
+
 static bool
 virQEMUCapsIsValid(void *data,
                    void *privData)
@@ -3854,6 +3887,7 @@ virQEMUCapsIsValid(void *data,
     virQEMUCapsCachePrivPtr priv = privData;
     bool kvmUsable;
     struct stat sb;
+    bool kvmIsNested;
 
     if (!qemuCaps->binary)
         return true;
@@ -3886,6 +3920,15 @@ virQEMUCapsIsValid(void *data,
         return false;
     }
 
+    /* Check if someone changed the nested={0|1} value for the kernel from
+     * the previous time we checked. If so, then refresh the capabilities. */
+    kvmIsNested = virQEMUCapsKVMIsNested();
+    if (kvmIsNested != qemuCaps->kvmIsNested) {
+        VIR_WARN("changed kernel nested kvm value was %d", qemuCaps->kvmIsNested);
+        qemuCaps->kvmIsNested = kvmIsNested;
+        return false;
+    }
+
     if (!virQEMUCapsGuestIsNative(priv->hostArch, qemuCaps->arch)) {
         VIR_DEBUG("Guest arch (%s) is not native to host arch (%s), "
                   "skipping KVM-related checks",
@@ -4472,6 +4515,8 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
     if (virQEMUCapsInitQMPMonitor(qemuCaps, cmd->mon) < 0)
         goto cleanup;
 
+    qemuCaps->kvmIsNested = virQEMUCapsKVMIsNested();
+
     if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM)) {
         virQEMUCapsInitQMPCommandAbort(cmd);
         if ((rc = virQEMUCapsInitQMPCommandRun(cmd, true)) != 0) {
diff --git a/src/qemu/qemu_capspriv.h b/src/qemu/qemu_capspriv.h
index 8d1a40fe74..edfe2cd6f6 100644
--- a/src/qemu/qemu_capspriv.h
+++ b/src/qemu/qemu_capspriv.h
@@ -48,6 +48,8 @@ int
 virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps,
                           qemuMonitorPtr mon);
 
+void virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps);
+
 int
 virQEMUCapsInitQMPMonitorTCG(virQEMUCapsPtr qemuCaps,
                              qemuMonitorPtr mon);
diff --git a/tests/qemucapabilitiestest.c b/tests/qemucapabilitiestest.c
index 8fe5a55e1d..90942c6fce 100644
--- a/tests/qemucapabilitiestest.c
+++ b/tests/qemucapabilitiestest.c
@@ -63,6 +63,9 @@ testQemuCaps(const void *opaque)
                                   qemuMonitorTestGetMonitor(mon)) < 0)
         goto cleanup;
 
+    /* Don't apply what the host has... force clear for testing purposes */
+    virQEMUCapsClearKVMIsNested(capsActual);
+
     if (virQEMUCapsGet(capsActual, QEMU_CAPS_KVM)) {
         qemuMonitorResetCommandID(qemuMonitorTestGetMonitor(mon));
         if (virQEMUCapsInitQMPMonitorTCG(capsActual,
-- 
2.17.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by Andrea Bolognani 5 years, 4 months ago
On Mon, 2018-11-26 at 18:38 -0500, John Ferlan wrote:
[...]
> +static bool
> +virQEMUCapsKVMIsNested(void)
> +{
> +    VIR_AUTOFREE(char *) kConfig = NULL;
> +
> +    /* Intel, AMD, and s390 related checks */
> +    if ((kConfig = virKModConfig()) &&
> +        (strstr(kConfig, "kvm_intel nested=1") ||
> +         strstr(kConfig, "kvm_amd nested=1") ||
> +         strstr(kConfig, "kvm nested=1")))
> +        return true;
> +    return false;
> +}

I might be doing it wrong, but I'm pretty sure I've enabled nested
virtualization properly on my laptop given that I can successfully
run 'modprobe kvm_intel' inside the L1 guest, and yet I get

  # modprobe -c | grep -c nested=1
  0

both in the L0 host and the L1 guest, so this check doesn't seem
accurate to me.

Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
configuration, but the *static* one! So if you enable nested KVM
support by doing

  # modprobe -r kvm_intel
  # modprobe kvm_intel nested=1

like I did, then the check above will not report it as enabled even
though it is; conversely, if you drop the appropriate config snippet
in /etc/modprobe.d/ but don't reload the module it will report it as
enabled even though it's not!

As an added bonus, if you have random whitespace or additional
options in the configuration line for the module, both of which are
completely legal, then the string matching will fail :)

We will probably also need to add a completely different check for
POWER9 hosts, where nested KVM support is enabled through a machine
type property of the L1 guest rather than a setting on the host.
I'll look into that as soon as I can get my hands on some suitable
hardware.

One final remark about the naming: <kvmIsNested/> seems wrong to me,
as IIUC it's not part of the capabilities of the L1 guest (where
KVM is, indeed, nested) but rather of the L0 host, which makes
<kvmSupportsNesting/> or something like that a much better choice
in my opinion.

-- 
Andrea Bolognani / Red Hat / Virtualization

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by John Ferlan 5 years, 4 months ago

On 11/27/18 12:05 PM, Andrea Bolognani wrote:
> On Mon, 2018-11-26 at 18:38 -0500, John Ferlan wrote:
> [...]
>> +static bool
>> +virQEMUCapsKVMIsNested(void)
>> +{
>> +    VIR_AUTOFREE(char *) kConfig = NULL;
>> +
>> +    /* Intel, AMD, and s390 related checks */
>> +    if ((kConfig = virKModConfig()) &&
>> +        (strstr(kConfig, "kvm_intel nested=1") ||
>> +         strstr(kConfig, "kvm_amd nested=1") ||
>> +         strstr(kConfig, "kvm nested=1")))
>> +        return true;
>> +    return false;
>> +}
> 
> I might be doing it wrong, but I'm pretty sure I've enabled nested
> virtualization properly on my laptop given that I can successfully
> run 'modprobe kvm_intel' inside the L1 guest, and yet I get
> 
>   # modprobe -c | grep -c nested=1
>   0
> 
> both in the L0 host and the L1 guest, so this check doesn't seem
> accurate to me.
> 
> Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
> configuration, but the *static* one! So if you enable nested KVM
> support by doing
> 
>   # modprobe -r kvm_intel
>   # modprobe kvm_intel nested=1
> 
> like I did, then the check above will not report it as enabled even
> though it is; conversely, if you drop the appropriate config snippet
> in /etc/modprobe.d/ but don't reload the module it will report it as
> enabled even though it's not!

Ugh, sigh... Yep, I was thinking primarily the static config option
since we had helpers to read. Of course that won't be enough. Joy, more
code to probe... Maybe it is easier to just say - clear your
capabilities cache if you alter that particular kernel value.

> 
> As an added bonus, if you have random whitespace or additional
> options in the configuration line for the module, both of which are
> completely legal, then the string matching will fail :)
> 

So much for the easy way out.

> We will probably also need to add a completely different check for
> POWER9 hosts, where nested KVM support is enabled through a machine
> type property of the L1 guest rather than a setting on the host.
> I'll look into that as soon as I can get my hands on some suitable
> hardware.
> 
> One final remark about the naming: <kvmIsNested/> seems wrong to me,
> as IIUC it's not part of the capabilities of the L1 guest (where
> KVM is, indeed, nested) but rather of the L0 host, which makes
> <kvmSupportsNesting/> or something like that a much better choice
> in my opinion.
> 

Naming is hard ;-)  I like your naming better though.

John

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by Andrea Bolognani 5 years, 4 months ago
On Tue, 2018-11-27 at 14:15 -0500, John Ferlan wrote:
> On 11/27/18 12:05 PM, Andrea Bolognani wrote:
> > Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
> > configuration, but the *static* one! So if you enable nested KVM
> > support by doing
> > 
> >   # modprobe -r kvm_intel
> >   # modprobe kvm_intel nested=1
> > 
> > like I did, then the check above will not report it as enabled even
> > though it is; conversely, if you drop the appropriate config snippet
> > in /etc/modprobe.d/ but don't reload the module it will report it as
> > enabled even though it's not!
> 
> Ugh, sigh... Yep, I was thinking primarily the static config option
> since we had helpers to read. Of course that won't be enough. Joy, more
> code to probe... Maybe it is easier to just say - clear your
> capabilities cache if you alter that particular kernel value.

You should be able to just read the contents of

  /sys/module/kvm_{amd,intel}/parameters/nested

for x86_64; not sure whether a similar trick will work on s390, but
it will definitely *not* work on ppc64, and I haven't even started
thinking about aarch64 yet!

Of course there's the usual caveats about whether this has been
available in the kernel for a long enough time that it's okay for us
to rely on it, and whether the approach is not entirely flawed for
reasons that I'm not aware of :)

-- 
Andrea Bolognani / Red Hat / Virtualization

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by Daniel P. Berrangé 5 years, 4 months ago
On Tue, Nov 27, 2018 at 02:15:39PM -0500, John Ferlan wrote:
> 
> 
> On 11/27/18 12:05 PM, Andrea Bolognani wrote:
> > On Mon, 2018-11-26 at 18:38 -0500, John Ferlan wrote:
> > [...]
> >> +static bool
> >> +virQEMUCapsKVMIsNested(void)
> >> +{
> >> +    VIR_AUTOFREE(char *) kConfig = NULL;
> >> +
> >> +    /* Intel, AMD, and s390 related checks */
> >> +    if ((kConfig = virKModConfig()) &&
> >> +        (strstr(kConfig, "kvm_intel nested=1") ||
> >> +         strstr(kConfig, "kvm_amd nested=1") ||
> >> +         strstr(kConfig, "kvm nested=1")))
> >> +        return true;
> >> +    return false;
> >> +}
> > 
> > I might be doing it wrong, but I'm pretty sure I've enabled nested
> > virtualization properly on my laptop given that I can successfully
> > run 'modprobe kvm_intel' inside the L1 guest, and yet I get
> > 
> >   # modprobe -c | grep -c nested=1
> >   0
> > 
> > both in the L0 host and the L1 guest, so this check doesn't seem
> > accurate to me.
> > 
> > Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
> > configuration, but the *static* one! So if you enable nested KVM
> > support by doing
> > 
> >   # modprobe -r kvm_intel
> >   # modprobe kvm_intel nested=1
> > 
> > like I did, then the check above will not report it as enabled even
> > though it is; conversely, if you drop the appropriate config snippet
> > in /etc/modprobe.d/ but don't reload the module it will report it as
> > enabled even though it's not!
> 
> Ugh, sigh... Yep, I was thinking primarily the static config option
> since we had helpers to read. Of course that won't be enough. Joy, more
> code to probe... Maybe it is easier to just say - clear your
> capabilities cache if you alter that particular kernel value.

Surely its already easier just to ask the kernel for the live status

$ cat  /sys/module/kvm_intel/parameters/nested
1

likewise for other kmods

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by John Ferlan 5 years, 4 months ago

On 11/28/18 5:02 AM, Daniel P. Berrangé wrote:
> On Tue, Nov 27, 2018 at 02:15:39PM -0500, John Ferlan wrote:
>>
>>
>> On 11/27/18 12:05 PM, Andrea Bolognani wrote:
>>> On Mon, 2018-11-26 at 18:38 -0500, John Ferlan wrote:
>>> [...]
>>>> +static bool
>>>> +virQEMUCapsKVMIsNested(void)
>>>> +{
>>>> +    VIR_AUTOFREE(char *) kConfig = NULL;
>>>> +
>>>> +    /* Intel, AMD, and s390 related checks */
>>>> +    if ((kConfig = virKModConfig()) &&
>>>> +        (strstr(kConfig, "kvm_intel nested=1") ||
>>>> +         strstr(kConfig, "kvm_amd nested=1") ||
>>>> +         strstr(kConfig, "kvm nested=1")))
>>>> +        return true;
>>>> +    return false;
>>>> +}
>>>
>>> I might be doing it wrong, but I'm pretty sure I've enabled nested
>>> virtualization properly on my laptop given that I can successfully
>>> run 'modprobe kvm_intel' inside the L1 guest, and yet I get
>>>
>>>   # modprobe -c | grep -c nested=1
>>>   0
>>>
>>> both in the L0 host and the L1 guest, so this check doesn't seem
>>> accurate to me.
>>>
>>> Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
>>> configuration, but the *static* one! So if you enable nested KVM
>>> support by doing
>>>
>>>   # modprobe -r kvm_intel
>>>   # modprobe kvm_intel nested=1
>>>
>>> like I did, then the check above will not report it as enabled even
>>> though it is; conversely, if you drop the appropriate config snippet
>>> in /etc/modprobe.d/ but don't reload the module it will report it as
>>> enabled even though it's not!
>>
>> Ugh, sigh... Yep, I was thinking primarily the static config option
>> since we had helpers to read. Of course that won't be enough. Joy, more
>> code to probe... Maybe it is easier to just say - clear your
>> capabilities cache if you alter that particular kernel value.
> 
> Surely its already easier just to ask the kernel for the live status
> 
> $ cat  /sys/module/kvm_intel/parameters/nested
> 1
> 

My commentary was more towards I was hoping to "reuse" the exising kmod
helpers ;-) before I had thought about or written any code to read the
live data.

The other "annoying" part is that you have "1" in your output and I have
"Y" or "N". So much for "consistency".

John

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by Daniel P. Berrangé 5 years, 4 months ago
On Wed, Nov 28, 2018 at 07:13:05AM -0500, John Ferlan wrote:
> 
> 
> On 11/28/18 5:02 AM, Daniel P. Berrangé wrote:
> > On Tue, Nov 27, 2018 at 02:15:39PM -0500, John Ferlan wrote:
> >>
> >>
> >> On 11/27/18 12:05 PM, Andrea Bolognani wrote:
> >>> On Mon, 2018-11-26 at 18:38 -0500, John Ferlan wrote:
> >>> [...]
> >>>> +static bool
> >>>> +virQEMUCapsKVMIsNested(void)
> >>>> +{
> >>>> +    VIR_AUTOFREE(char *) kConfig = NULL;
> >>>> +
> >>>> +    /* Intel, AMD, and s390 related checks */
> >>>> +    if ((kConfig = virKModConfig()) &&
> >>>> +        (strstr(kConfig, "kvm_intel nested=1") ||
> >>>> +         strstr(kConfig, "kvm_amd nested=1") ||
> >>>> +         strstr(kConfig, "kvm nested=1")))
> >>>> +        return true;
> >>>> +    return false;
> >>>> +}
> >>>
> >>> I might be doing it wrong, but I'm pretty sure I've enabled nested
> >>> virtualization properly on my laptop given that I can successfully
> >>> run 'modprobe kvm_intel' inside the L1 guest, and yet I get
> >>>
> >>>   # modprobe -c | grep -c nested=1
> >>>   0
> >>>
> >>> both in the L0 host and the L1 guest, so this check doesn't seem
> >>> accurate to me.
> >>>
> >>> Oh, wait, I get it now: 'modprobe -c' doesn't dump the *current* host
> >>> configuration, but the *static* one! So if you enable nested KVM
> >>> support by doing
> >>>
> >>>   # modprobe -r kvm_intel
> >>>   # modprobe kvm_intel nested=1
> >>>
> >>> like I did, then the check above will not report it as enabled even
> >>> though it is; conversely, if you drop the appropriate config snippet
> >>> in /etc/modprobe.d/ but don't reload the module it will report it as
> >>> enabled even though it's not!
> >>
> >> Ugh, sigh... Yep, I was thinking primarily the static config option
> >> since we had helpers to read. Of course that won't be enough. Joy, more
> >> code to probe... Maybe it is easier to just say - clear your
> >> capabilities cache if you alter that particular kernel value.
> > 
> > Surely its already easier just to ask the kernel for the live status
> > 
> > $ cat  /sys/module/kvm_intel/parameters/nested
> > 1
> > 
> 
> My commentary was more towards I was hoping to "reuse" the exising kmod
> helpers ;-) before I had thought about or written any code to read the
> live data.
> 
> The other "annoying" part is that you have "1" in your output and I have
> "Y" or "N". So much for "consistency".

Actually I just invented that output from memory and got it wrong :)


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v2] qemu: Add check for whether KVM nesting is enabled
Posted by Jiri Denemark 5 years, 4 months ago
On Mon, Nov 26, 2018 at 18:38:06 -0500, John Ferlan wrote:
> Support for nested KVM is handled via a kernel module configuration
> adjustment which if done after libvirtd is started and/or the last
> QEMU capabilities adjustment can result in the inability to start a
> guest and use nested KVM until the capabilities cache is invalidated.
> This is because without knowing, the CPU settings for a guest may not
> add the vmx=on to/for the guest config.
> 
> Thus, let's fetch and save the setting during initialization and then
> when the capabilities are checked for various host related adjustments
> that could affect whether the capabilities cache is updated add a check
> whether the nested value was set for Intel, AMD, or s390 to force a
> refetch of the capabilities.
> 
> Signed-off-by: John Ferlan <jferlan@redhat.com>
> ---
>  v1 was part of an RFC:
> 
>  https://www.redhat.com/archives/libvir-list/2018-November/msg00494.html
> 
>  This patch alters that code slightly to add the check Marc Hartmayer
>  requested for S390 and to use "kvm" in the API names and variables to
>  make it clearer that it's not CapsIsNested but CapsKVMIsNested.
> 
>  If it's felt the new check should slide down further in virQEMUCapsIsValid
>  then that's fine - just let me know what it should follow.
> 
>  src/qemu/qemu_capabilities.c | 45 ++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_capspriv.h     |  2 ++
>  tests/qemucapabilitiestest.c |  3 +++
>  3 files changed, 50 insertions(+)
> 
> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> index fde27010e4..c377733fe6 100644
> --- a/src/qemu/qemu_capabilities.c
> +++ b/src/qemu/qemu_capabilities.c
> @@ -40,6 +40,7 @@
>  #include "virnodesuspend.h"
>  #include "virnuma.h"
>  #include "virhostcpu.h"
> +#include "virkmod.h"
>  #include "qemu_monitor.h"
>  #include "virstring.h"
>  #include "qemu_hostdev.h"
> @@ -557,6 +558,7 @@ struct _virQEMUCaps {
>      virObject parent;
>  
>      bool usedQMP;
> +    bool kvmIsNested;
>  
>      char *binary;
>      time_t ctime;
> @@ -1528,6 +1530,7 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps)
>          return NULL;
>  
>      ret->usedQMP = qemuCaps->usedQMP;
> +    ret->kvmIsNested = qemuCaps->kvmIsNested;
>  
>      if (VIR_STRDUP(ret->binary, qemuCaps->binary) < 0)
>          goto error;
> @@ -3587,6 +3590,9 @@ virQEMUCapsLoadCache(virArch hostArch,
>      virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_KVM);
>      virQEMUCapsInitHostCPUModel(qemuCaps, hostArch, VIR_DOMAIN_VIRT_QEMU);
>  
> +    qemuCaps->kvmIsNested = virXPathBoolean("count(./kvmIsNested) > 0",

I think the XPath expression could be as simple as "./kvmIsNested" or
maybe "boolean(./kvmIsNested)", but it doesn't really matter.

> +                                            ctxt) > 0;
> +
>      ret = 0;
>   cleanup:
>      VIR_FREE(str);
> @@ -3806,6 +3812,9 @@ virQEMUCapsFormatCache(virQEMUCapsPtr qemuCaps)
>      if (qemuCaps->sevCapabilities)
>          virQEMUCapsFormatSEVInfo(qemuCaps, &buf);
>  
> +    if (qemuCaps->kvmIsNested)
> +        virBufferAddLit(&buf, "<kvmIsNested/>\n");
> +
>      virBufferAdjustIndent(&buf, -2);
>      virBufferAddLit(&buf, "</qemuCaps>\n");
>  
> @@ -3846,6 +3855,30 @@ virQEMUCapsSaveFile(void *data,
>  }
>  
>  
> +static bool
> +virQEMUCapsKVMIsNested(void)
> +{
> +    VIR_AUTOFREE(char *) kConfig = NULL;
> +
> +    /* Intel, AMD, and s390 related checks */
> +    if ((kConfig = virKModConfig()) &&
> +        (strstr(kConfig, "kvm_intel nested=1") ||
> +         strstr(kConfig, "kvm_amd nested=1") ||
> +         strstr(kConfig, "kvm nested=1")))
> +        return true;
> +    return false;
> +}
> +
> +
> +void
> +virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps)
> +{
> +    /* For qemucapabilitiestest to avoid printing the </kvmIsNested> on
> +     * hosts with nested set in the kernel */
> +    qemuCaps->kvmIsNested = false;
> +}

I don't see why this function should be needed (see below for more
details).

> +
> +
>  static bool
>  virQEMUCapsIsValid(void *data,
>                     void *privData)
> @@ -3854,6 +3887,7 @@ virQEMUCapsIsValid(void *data,
>      virQEMUCapsCachePrivPtr priv = privData;
>      bool kvmUsable;
>      struct stat sb;
> +    bool kvmIsNested;
>  
>      if (!qemuCaps->binary)
>          return true;
> @@ -3886,6 +3920,15 @@ virQEMUCapsIsValid(void *data,
>          return false;
>      }
>  
> +    /* Check if someone changed the nested={0|1} value for the kernel from
> +     * the previous time we checked. If so, then refresh the capabilities. */
> +    kvmIsNested = virQEMUCapsKVMIsNested();
> +    if (kvmIsNested != qemuCaps->kvmIsNested) {
> +        VIR_WARN("changed kernel nested kvm value was %d", qemuCaps->kvmIsNested);
> +        qemuCaps->kvmIsNested = kvmIsNested;
> +        return false;
> +    }
> +
>      if (!virQEMUCapsGuestIsNative(priv->hostArch, qemuCaps->arch)) {
>          VIR_DEBUG("Guest arch (%s) is not native to host arch (%s), "
>                    "skipping KVM-related checks",
> @@ -4472,6 +4515,8 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
>      if (virQEMUCapsInitQMPMonitor(qemuCaps, cmd->mon) < 0)
>          goto cleanup;
>  
> +    qemuCaps->kvmIsNested = virQEMUCapsKVMIsNested();
> +

This assignment should be done in the caller, i.e., in
virQEMUCapsNewForBinaryInternal. Probably somewhere close to setting
microcodeVersion since both should be done only if KVM was enabled.

>      if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM)) {
>          virQEMUCapsInitQMPCommandAbort(cmd);
>          if ((rc = virQEMUCapsInitQMPCommandRun(cmd, true)) != 0) {
> diff --git a/src/qemu/qemu_capspriv.h b/src/qemu/qemu_capspriv.h
> index 8d1a40fe74..edfe2cd6f6 100644
> --- a/src/qemu/qemu_capspriv.h
> +++ b/src/qemu/qemu_capspriv.h
> @@ -48,6 +48,8 @@ int
>  virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps,
>                            qemuMonitorPtr mon);
>  
> +void virQEMUCapsClearKVMIsNested(virQEMUCapsPtr qemuCaps);
> +
>  int
>  virQEMUCapsInitQMPMonitorTCG(virQEMUCapsPtr qemuCaps,
>                               qemuMonitorPtr mon);
> diff --git a/tests/qemucapabilitiestest.c b/tests/qemucapabilitiestest.c
> index 8fe5a55e1d..90942c6fce 100644
> --- a/tests/qemucapabilitiestest.c
> +++ b/tests/qemucapabilitiestest.c
> @@ -63,6 +63,9 @@ testQemuCaps(const void *opaque)
>                                    qemuMonitorTestGetMonitor(mon)) < 0)
>          goto cleanup;
>  
> +    /* Don't apply what the host has... force clear for testing purposes */
> +    virQEMUCapsClearKVMIsNested(capsActual);
> +

We call virQEMUCapsInitQMPMonitor to parse the QEMU replies files via a
fake monitor. Your code sets kvmIsNested inside virQEMUCapsInitQMP
(which is one level above virQEMUCapsInitQMPMonitor in the call stack)
and thus it should never be set according to the host environment. And
even less so when kvmIsNested is set properly in
virQEMUCapsNewForBinaryInternal.

>      if (virQEMUCapsGet(capsActual, QEMU_CAPS_KVM)) {
>          qemuMonitorResetCommandID(qemuMonitorTestGetMonitor(mon));
>          if (virQEMUCapsInitQMPMonitorTCG(capsActual,

Jirka

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list