[v2] Introduce NVMe support

[libvirt] [PATCH v2 07/39] qemu: Explicitly add/remove /dev/vfio/vfio to/from NS/CGroups

Posted by Michal Privoznik 6 years, 4 months ago

In near future, the decision what to do with /dev/vfio/vfio with
respect to domain namespace and CGroup is going to be moved out
of qemuDomainGetHostdevPath() because there will be some other
types of devices than hostdevs that need access to VFIO.

All functions that I'm changing assume that hostdev we are
adding/removing to VM is not in the definition yet (because of
how qemuDomainNeedsVFIO() is written). Fortunately, this
assumption is true.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---
 src/qemu/qemu_cgroup.c | 48 +++++++++++++++++++++++++++++++++++++++++-
 src/qemu/qemu_domain.c | 36 +++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 4d6f0c33cd..f110b49d16 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -25,6 +25,7 @@
 #include "qemu_domain.h"
 #include "qemu_process.h"
 #include "qemu_extdevice.h"
+#include "qemu_hostdev.h"
 #include "vircgroup.h"
 #include "virlog.h"
 #include "viralloc.h"
@@ -360,6 +361,17 @@ qemuTeardownInputCgroup(virDomainObjPtr vm,
 }
 
 
+/**
+ * qemuSetupHostdevCgroup:
+ * vm: domain object
+ * @dev: device to allow
+ *
+ * For given host device @dev allow access to in Cgroups.
+ * Note, @dev must not be in @vm's definition.
+ *
+ * Returns: 0 on success,
+ *         -1 otherwise.
+ */
 int
 qemuSetupHostdevCgroup(virDomainObjPtr vm,
                        virDomainHostdevDefPtr dev)
@@ -386,6 +398,17 @@ qemuSetupHostdevCgroup(virDomainObjPtr vm,
             goto cleanup;
     }
 
+    if (qemuHostdevNeedsVFIO(dev) &&
+        !qemuDomainNeedsVFIO(vm->def)) {
+        VIR_DEBUG("Cgroup allow %s perms=%d", QEMU_DEV_VFIO, VIR_CGROUP_DEVICE_RW);
+        rv = virCgroupAllowDevicePath(priv->cgroup, QEMU_DEV_VFIO,
+                                      VIR_CGROUP_DEVICE_RW, false);
+        virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
+                                 QEMU_DEV_VFIO, "rw", rv);
+        if (rv < 0)
+            goto cleanup;
+    }
+
     ret = 0;
 
  cleanup:
@@ -396,9 +419,21 @@ qemuSetupHostdevCgroup(virDomainObjPtr vm,
     return ret;
 }
 
+
+/**
+ * qemuTeardownHostdevCgroup:
+ * @vm: doamin object
+ * @dev: device to tear down
+ *
+ * For given host device @dev deny access to it in CGroups.
+ * Note, @dev must not be in @vm's definition.
+ *
+ * Returns: 0 on success,
+ *         -1 otherwise.
+ */
 int
 qemuTeardownHostdevCgroup(virDomainObjPtr vm,
-                       virDomainHostdevDefPtr dev)
+                          virDomainHostdevDefPtr dev)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     char **path = NULL;
@@ -422,6 +457,17 @@ qemuTeardownHostdevCgroup(virDomainObjPtr vm,
             goto cleanup;
     }
 
+    if (qemuHostdevNeedsVFIO(dev) &&
+        !qemuDomainNeedsVFIO(vm->def)) {
+        VIR_DEBUG("Cgroup deny " QEMU_DEV_VFIO);
+        rv = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO,
+                                     VIR_CGROUP_DEVICE_RWM, false);
+        virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
+                                 QEMU_DEV_VFIO, "rwm", rv);
+        if (rv < 0)
+            goto cleanup;
+    }
+
     ret = 0;
  cleanup:
     for (i = 0; i < npaths; i++)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 6502c6191c..02b6e590cd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -13540,6 +13540,10 @@ qemuDomainSetupHostdev(virQEMUDriverConfigPtr cfg ATTRIBUTE_UNUSED,
             goto cleanup;
     }
 
+    if (qemuHostdevNeedsVFIO(dev) &&
+        qemuDomainCreateDevice(QEMU_DEV_VFIO, data, false) < 0)
+        goto cleanup;
+
     ret = 0;
  cleanup:
     for (i = 0; i < npaths; i++)
@@ -14576,6 +14580,17 @@ qemuDomainNamespaceTeardownDisk(virDomainObjPtr vm ATTRIBUTE_UNUSED,
 }
 
 
+/**
+ * qemuDomainNamespaceSetupHostdev:
+ * @vm: domain object
+ * @hostdev: hostdev to create in @vm's namespace
+ *
+ * For given @hostdev, create its devfs representation (if it has one) in
+ * domain namespace. Note, @hostdev must not be in @vm's definition.
+ *
+ * Returns: 0 on success,
+ *         -1 otherwise.
+ */
 int
 qemuDomainNamespaceSetupHostdev(virDomainObjPtr vm,
                                 virDomainHostdevDefPtr hostdev)
@@ -14590,6 +14605,11 @@ qemuDomainNamespaceSetupHostdev(virDomainObjPtr vm,
     if (qemuDomainNamespaceMknodPaths(vm, (const char **)paths, npaths) < 0)
         goto cleanup;
 
+    if (qemuHostdevNeedsVFIO(hostdev) &&
+        !qemuDomainNeedsVFIO(vm->def) &&
+        qemuDomainNamespaceMknodPath(vm, QEMU_DEV_VFIO) < 0)
+        goto cleanup;
+
     ret = 0;
  cleanup:
     for (i = 0; i < npaths; i++)
@@ -14599,6 +14619,17 @@ qemuDomainNamespaceSetupHostdev(virDomainObjPtr vm,
 }
 
 
+/**
+ * qemuDomainNamespaceTeardownHostdev:
+ * @vm: domain object
+ * @hostdev: hostdev to remove in @vm's namespace
+ *
+ * For given @hostdev, remove its devfs representation (if it has one) in
+ * domain namespace. Note, @hostdev must not be in @vm's definition.
+ *
+ * Returns: 0 on success,
+ *         -1 otherwise.
+ */
 int
 qemuDomainNamespaceTeardownHostdev(virDomainObjPtr vm,
                                    virDomainHostdevDefPtr hostdev)
@@ -14614,6 +14645,11 @@ qemuDomainNamespaceTeardownHostdev(virDomainObjPtr vm,
     if (qemuDomainNamespaceUnlinkPaths(vm, (const char **)paths, npaths) < 0)
         goto cleanup;
 
+    if (qemuHostdevNeedsVFIO(hostdev) &&
+        !qemuDomainNeedsVFIO(vm->def) &&
+        qemuDomainNamespaceUnlinkPath(vm, QEMU_DEV_VFIO) < 0)
+        goto cleanup;
+
     ret = 0;
  cleanup:
     for (i = 0; i < npaths; i++)
-- 
2.21.0

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v2 07/39] qemu: Explicitly add/remove /dev/vfio/vfio to/from NS/CGroups

Posted by Cole Robinson 6 years, 3 months ago

On 9/26/19 12:12 PM, Michal Privoznik wrote:
> In near future, the decision what to do with /dev/vfio/vfio with
> respect to domain namespace and CGroup is going to be moved out
> of qemuDomainGetHostdevPath() because there will be some other
> types of devices than hostdevs that need access to VFIO.
> 
> All functions that I'm changing assume that hostdev we are
> adding/removing to VM is not in the definition yet (because of
> how qemuDomainNeedsVFIO() is written). Fortunately, this
> assumption is true.
> 

qemuProcessLaunch ->
  qemuSetupCgroup ->
    qemuSetupDevicesCgroup ->

has

    for (i = 0; i < vm->def->nhostdevs; i++) {

        if (qemuSetupHostdevCgroup(vm, vm->def->hostdevs[i]) < 0)

            goto cleanup;

    }

So that above paragraph doesn't seem correct. If I apply up to patch
#17, this breaks VM startup with a PCI passthrough device, but caveat
only with cgroupv1. Apparently cgroupv2 doesn't have any notion of
allowDevice ? or at least there's no impl there.

> Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
> ---
>  src/qemu/qemu_cgroup.c | 48 +++++++++++++++++++++++++++++++++++++++++-
>  src/qemu/qemu_domain.c | 36 +++++++++++++++++++++++++++++++
>  2 files changed, 83 insertions(+), 1 deletion(-)
> 
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index 4d6f0c33cd..f110b49d16 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -25,6 +25,7 @@
>  #include "qemu_domain.h"
>  #include "qemu_process.h"
>  #include "qemu_extdevice.h"
> +#include "qemu_hostdev.h"
>  #include "vircgroup.h"
>  #include "virlog.h"
>  #include "viralloc.h"
> @@ -360,6 +361,17 @@ qemuTeardownInputCgroup(virDomainObjPtr vm,
>  }
>  
>  
> +/**
> + * qemuSetupHostdevCgroup:
> + * vm: domain object
> + * @dev: device to allow
> + *
> + * For given host device @dev allow access to in Cgroups.
> + * Note, @dev must not be in @vm's definition.
> + *
> + * Returns: 0 on success,
> + *         -1 otherwise.
> + */
>  int
>  qemuSetupHostdevCgroup(virDomainObjPtr vm,
>                         virDomainHostdevDefPtr dev)
> @@ -386,6 +398,17 @@ qemuSetupHostdevCgroup(virDomainObjPtr vm,
>              goto cleanup;
>      }
>  
> +    if (qemuHostdevNeedsVFIO(dev) &&
> +        !qemuDomainNeedsVFIO(vm->def)) {
> +        VIR_DEBUG("Cgroup allow %s perms=%d", QEMU_DEV_VFIO, VIR_CGROUP_DEVICE_RW);
> +        rv = virCgroupAllowDevicePath(priv->cgroup, QEMU_DEV_VFIO,
> +                                      VIR_CGROUP_DEVICE_RW, false);
> +        virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
> +                                 QEMU_DEV_VFIO, "rw", rv);
> +        if (rv < 0)
> +            goto cleanup;
> +    }
> +
>      ret = 0;
>

So on VM startup this code path isn't hit, because dev is already in
vm->def, so the if() condition will never be true.

However this patch itself doesn't break things, because
qemuDomainGetHostdevPath will also return /dev/vfio/vfio if the device
needs it. I guess later patches undo that somehow but I didn't look into
yet why that is.

Is the !qemuDomainNeedsVFIO even necessary? The existing code will
already call virCgroupAllowDevicePath(/dev/vfio/vfio) multiple times if
the device has multiple VFIO devices attached so apparently that's not
problematic.

- Cole

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v2 07/39] qemu: Explicitly add/remove /dev/vfio/vfio to/from NS/CGroups

Posted by Michal Privoznik 6 years, 3 months ago

On 10/18/19 12:10 AM, Cole Robinson wrote:
> On 9/26/19 12:12 PM, Michal Privoznik wrote:
>> In near future, the decision what to do with /dev/vfio/vfio with
>> respect to domain namespace and CGroup is going to be moved out
>> of qemuDomainGetHostdevPath() because there will be some other
>> types of devices than hostdevs that need access to VFIO.
>>
>> All functions that I'm changing assume that hostdev we are
>> adding/removing to VM is not in the definition yet (because of
>> how qemuDomainNeedsVFIO() is written). Fortunately, this
>> assumption is true.
>>
> 
> qemuProcessLaunch ->
>    qemuSetupCgroup ->
>      qemuSetupDevicesCgroup ->
> 
> has
> 
>      for (i = 0; i < vm->def->nhostdevs; i++) {
> 
>          if (qemuSetupHostdevCgroup(vm, vm->def->hostdevs[i]) < 0)
> 
>              goto cleanup;
> 
>      }
> 
> So that above paragraph doesn't seem correct. If I apply up to patch
> #17, this breaks VM startup with a PCI passthrough device, but caveat
> only with cgroupv1. Apparently cgroupv2 doesn't have any notion of
> allowDevice ? or at least there's no impl there.

Yeah, cgroupv2 doesn't implement devices controller just yet.

> 
>> Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
>> ---
>>   src/qemu/qemu_cgroup.c | 48 +++++++++++++++++++++++++++++++++++++++++-
>>   src/qemu/qemu_domain.c | 36 +++++++++++++++++++++++++++++++
>>   2 files changed, 83 insertions(+), 1 deletion(-)
>>

>> @@ -386,6 +398,17 @@ qemuSetupHostdevCgroup(virDomainObjPtr vm,
>>               goto cleanup;
>>       }
>>   
>> +    if (qemuHostdevNeedsVFIO(dev) &&
>> +        !qemuDomainNeedsVFIO(vm->def)) {
>> +        VIR_DEBUG("Cgroup allow %s perms=%d", QEMU_DEV_VFIO, VIR_CGROUP_DEVICE_RW);
>> +        rv = virCgroupAllowDevicePath(priv->cgroup, QEMU_DEV_VFIO,
>> +                                      VIR_CGROUP_DEVICE_RW, false);
>> +        virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
>> +                                 QEMU_DEV_VFIO, "rw", rv);
>> +        if (rv < 0)
>> +            goto cleanup;
>> +    }
>> +
>>       ret = 0;
>>
> 
> So on VM startup this code path isn't hit, because dev is already in
> vm->def, so the if() condition will never be true.
> 
> However this patch itself doesn't break things, because
> qemuDomainGetHostdevPath will also return /dev/vfio/vfio if the device
> needs it. I guess later patches undo that somehow but I didn't look into
> yet why that is.
> 
> Is the !qemuDomainNeedsVFIO even necessary? The existing code will
> already call virCgroupAllowDevicePath(/dev/vfio/vfio) multiple times if
> the device has multiple VFIO devices attached so apparently that's not
> problematic.

Ah, good catch. It's not necessary. Will fix and repost.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list