[libvirt] [PATCH] lxc: Fix 'domblkstat' error with attached disk devices.

jcfaracco@gmail.com posted 1 patch 4 years, 6 months ago
Test syntax-check passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20191021035436.14063-1-jcfaracco@gmail.com
src/lxc/lxc_cgroup.c | 29 ++++++++++++++++++++++++++++-
src/lxc/lxc_cgroup.h |  4 ++++
src/lxc/lxc_driver.c |  8 ++++----
3 files changed, 36 insertions(+), 5 deletions(-)
[libvirt] [PATCH] lxc: Fix 'domblkstat' error with attached disk devices.
Posted by jcfaracco@gmail.com 4 years, 6 months ago
From: Julio Faracco <jcfaracco@gmail.com>

LXC was not working when you attached a new disk that points to block
device. See: https://bugzilla.redhat.com/show_bug.cgi?id=1697115.
Command line from virsh was showing problems with alias first (this
feature is not supported) and after, problems to query block device. It
was happening because extra disks were not being included into
cgroup blkio.throttle properties and libvirt could not query this info.
Applying those devices into 'allowed' list is not enough, libvirt should
reset blkio.throttle as a workaround to include disks into container's
cgroup directory.

Before:

    virsh # domblkstat CentOS hda
    error: Failed to get block stats for domain 'CentOS' device 'hda'
    error: internal error: domain stats query failed

Now:

    virsh # domblkstat CentOS hda
    hda rd_req 0
    hda rd_bytes 0
    hda wr_req 0
    hda wr_bytes 0

Signed-off-by: Julio Faracco <jcfaracco@gmail.com>
---
 src/lxc/lxc_cgroup.c | 29 ++++++++++++++++++++++++++++-
 src/lxc/lxc_cgroup.h |  4 ++++
 src/lxc/lxc_driver.c |  8 ++++----
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 5efb495b56..de6d892521 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -302,6 +302,24 @@ virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
     return 0;
 }
 
+int
+virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
+                                     const char *path)
+{
+    if (virCgroupSetBlkioDeviceReadBps(cgroup, path, 0) < 0)
+        return -1;
+
+    if (virCgroupSetBlkioDeviceWriteBps(cgroup, path, 0) < 0)
+        return -1;
+
+    if (virCgroupSetBlkioDeviceReadIops(cgroup, path, 0) < 0)
+        return -1;
+
+    if (virCgroupSetBlkioDeviceWriteIops(cgroup, path, 0) < 0)
+        return -1;
+
+    return 0;
+}
 
 static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                       virCgroupPtr cgroup)
@@ -309,6 +327,7 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
     int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
     int ret = -1;
     size_t i;
+    const char *src_path = NULL;
     static virLXCCgroupDevicePolicy devices[] = {
         {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
         {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
@@ -346,8 +365,16 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
             !virStorageSourceIsBlockLocal(def->disks[i]->src))
             continue;
 
+        /* Workaround to include disks into blkio.throttle.
+         * To include it, we need to reset any feature of
+         * blkio.throttle.* */
+        src_path = virDomainDiskGetSource(def->disks[i]);
+        if (virLXCCgroupResetBlkioDeviceThrottle(cgroup,
+                                                 src_path) < 0)
+            goto cleanup;
+
         if (virCgroupAllowDevicePath(cgroup,
-                                     virDomainDiskGetSource(def->disks[i]),
+                                     src_path,
                                      (def->disks[i]->src->readonly ?
                                       VIR_CGROUP_DEVICE_READ :
                                       VIR_CGROUP_DEVICE_RW) |
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index 63e9e837b0..3d643a4fea 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -46,3 +46,7 @@ int
 virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev,
                                   const char *path,
                                   void *opaque);
+
+int
+virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
+                                     const char *path);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 204a3ed522..87da55f308 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -2339,14 +2339,14 @@ lxcDomainBlockStats(virDomainPtr dom,
         goto endjob;
     }
 
-    if (!disk->info.alias) {
+    if (!disk->src->path) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("missing disk device alias name for %s"), disk->dst);
         goto endjob;
     }
 
     ret = virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
-                                            disk->info.alias,
+                                            disk->src->path,
                                             &stats->rd_bytes,
                                             &stats->wr_bytes,
                                             &stats->rd_req,
@@ -2424,14 +2424,14 @@ lxcDomainBlockStatsFlags(virDomainPtr dom,
             goto endjob;
         }
 
-        if (!disk->info.alias) {
+        if (!disk->src->path) {
             virReportError(VIR_ERR_INTERNAL_ERROR,
                            _("missing disk device alias name for %s"), disk->dst);
             goto endjob;
         }
 
         if (virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
-                                              disk->info.alias,
+                                              disk->src->path,
                                               &rd_bytes,
                                               &wr_bytes,
                                               &rd_req,
-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] lxc: Fix 'domblkstat' error with attached disk devices.
Posted by Cole Robinson 4 years, 5 months ago
On 10/20/19 11:54 PM, jcfaracco@gmail.com wrote:
> From: Julio Faracco <jcfaracco@gmail.com>
> 

I think if you set your gitconfig correctly, you can avoid this 'From:'
showing up. I have:

[sendemail]
    from = "Cole Robinson <crobinso@redhat.com>"
[user]
    name = Cole Robinson
    email = crobinso@redhat.com



> LXC was not working when you attached a new disk that points to block
> device. See: https://bugzilla.redhat.com/show_bug.cgi?id=1697115.
> Command line from virsh was showing problems with alias first (this
> feature is not supported) and after, problems to query block device. 

If you are fixing two distinct issues, this should at be at least two
separate patches. Otherwise review is more difficult among other things

It
> was happening because extra disks were not being included into
> cgroup blkio.throttle properties and libvirt could not query this info.
> Applying those devices into 'allowed' list is not enough, libvirt should
> reset blkio.throttle as a workaround to include disks into container's
> cgroup directory.
> 
> Before:
> 
>     virsh # domblkstat CentOS hda
>     error: Failed to get block stats for domain 'CentOS' device 'hda'
>     error: internal error: domain stats query failed
> 
> Now:
> 
>     virsh # domblkstat CentOS hda
>     hda rd_req 0
>     hda rd_bytes 0
>     hda wr_req 0
>     hda wr_bytes 0
> 
> Signed-off-by: Julio Faracco <jcfaracco@gmail.com>
> ---
>  src/lxc/lxc_cgroup.c | 29 ++++++++++++++++++++++++++++-
>  src/lxc/lxc_cgroup.h |  4 ++++
>  src/lxc/lxc_driver.c |  8 ++++----
>  3 files changed, 36 insertions(+), 5 deletions(-)
> 
> diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
> index 5efb495b56..de6d892521 100644
> --- a/src/lxc/lxc_cgroup.c
> +++ b/src/lxc/lxc_cgroup.c
> @@ -302,6 +302,24 @@ virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
>      return 0;
>  }
>  
> +int
> +virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
> +                                     const char *path)
> +{
> +    if (virCgroupSetBlkioDeviceReadBps(cgroup, path, 0) < 0)
> +        return -1;
> +
> +    if (virCgroupSetBlkioDeviceWriteBps(cgroup, path, 0) < 0)
> +        return -1;
> +
> +    if (virCgroupSetBlkioDeviceReadIops(cgroup, path, 0) < 0)
> +        return -1;
> +
> +    if (virCgroupSetBlkioDeviceWriteIops(cgroup, path, 0) < 0)
> +        return -1;
> +
> +    return 0;
> +}
>  
>  static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
>                                        virCgroupPtr cgroup)
> @@ -309,6 +327,7 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
>      int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
>      int ret = -1;
>      size_t i;
> +    const char *src_path = NULL;
>      static virLXCCgroupDevicePolicy devices[] = {
>          {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
>          {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
> @@ -346,8 +365,16 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
>              !virStorageSourceIsBlockLocal(def->disks[i]->src))
>              continue;
>  
> +        /* Workaround to include disks into blkio.throttle.
> +         * To include it, we need to reset any feature of
> +         * blkio.throttle.* */
> +        src_path = virDomainDiskGetSource(def->disks[i]);
> +        if (virLXCCgroupResetBlkioDeviceThrottle(cgroup,
> +                                                 src_path) < 0)
> +            goto cleanup;
> +

I'll admit I don't really know how cgroups work here. But it seems odd.
Why exactly is this needed? How does blkstats fail without this, after
the alias piece is fixed? Is something similar needed for the qemu
driver, and if not, why the difference?

Also FWIW, not sure if you have fedora 31 installed anywhere, but seems
like the lxc driver is completely broken with cgroupv2:

https://bugzilla.redhat.com/show_bug.cgi?id=1770763

The suggestion in the bug sounds simple though. If you wanted to
separately take a stab at that I'm motivated to test and review. Just a
thought

>          if (virCgroupAllowDevicePath(cgroup,
> -                                     virDomainDiskGetSource(def->disks[i]),
> +                                     src_path,
>                                       (def->disks[i]->src->readonly ?
>                                        VIR_CGROUP_DEVICE_READ :
>                                        VIR_CGROUP_DEVICE_RW) |
> diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
> index 63e9e837b0..3d643a4fea 100644
> --- a/src/lxc/lxc_cgroup.h
> +++ b/src/lxc/lxc_cgroup.h
> @@ -46,3 +46,7 @@ int
>  virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev,
>                                    const char *path,
>                                    void *opaque);
> +
> +int
> +virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
> +                                     const char *path);
> diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
> index 204a3ed522..87da55f308 100644
> --- a/src/lxc/lxc_driver.c
> +++ b/src/lxc/lxc_driver.c
> @@ -2339,14 +2339,14 @@ lxcDomainBlockStats(virDomainPtr dom,
>          goto endjob;
>      }
>  
> -    if (!disk->info.alias) {
> +    if (!disk->src->path) {
>          virReportError(VIR_ERR_INTERNAL_ERROR,
>                         _("missing disk device alias name for %s"), disk->dst);
>          goto endjob;
>      }
> 

I think this whole conditional can be deleted. Earlier in the function
!path is already handled.

>      ret = virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
> -                                            disk->info.alias,
> +                                            disk->src->path,
>                                              &stats->rd_bytes,
>                                              &stats->wr_bytes,
>                                              &stats->rd_req,
> @@ -2424,14 +2424,14 @@ lxcDomainBlockStatsFlags(virDomainPtr dom,
>              goto endjob;
>          }
>  
> -        if (!disk->info.alias) {
> +        if (!disk->src->path) {
>              virReportError(VIR_ERR_INTERNAL_ERROR,
>                             _("missing disk device alias name for %s"), disk->dst);
>              goto endjob;
>          }
>  

Same with this block. The alias changes make sense though

Thanks,
Cole

>          if (virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
> -                                              disk->info.alias,
> +                                              disk->src->path,
>                                                &rd_bytes,
>                                                &wr_bytes,
>                                                &rd_req,
> 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] lxc: Fix 'domblkstat' error with attached disk devices.
Posted by Julio Faracco 4 years, 5 months ago
Hi Cole!

Thanks for your review and suggestion.
I admit that I'm not a CGroups expert.
IHMO, when you allow a device (with virCgroupAllowDevice) for a
specific CGroup that device would be automatically added into
io_serviced monitor.
But this is not happening. I tested with a stand alone container using
Docker and LXC. Same results when you attach a block device.
The best approach that I found was resetting throttle to include them
into io_service of that specific LXC instance.
It worked (and as it is an inclusion, reset does not affect block
read/write stats).

I'm free to accept suggestions of CGroups experts from here. I avoided
to ask it on Kernel Cgroups mailing list.

About aliases, there are two problems here: alias is not working for
LXC (minor bug and irrelevant) and block path makes more sense to
check io_serviced stats than alias.

I will handle other points about your comment.

Thanks again!

--
Julio Cesar Faracco

Em qui., 14 de nov. de 2019 às 18:34, Cole Robinson
<crobinso@redhat.com> escreveu:
>
> On 10/20/19 11:54 PM, jcfaracco@gmail.com wrote:
> > From: Julio Faracco <jcfaracco@gmail.com>
> >
>
> I think if you set your gitconfig correctly, you can avoid this 'From:'
> showing up. I have:
>
> [sendemail]
>     from = "Cole Robinson <crobinso@redhat.com>"
> [user]
>     name = Cole Robinson
>     email = crobinso@redhat.com
>
>
>
> > LXC was not working when you attached a new disk that points to block
> > device. See: https://bugzilla.redhat.com/show_bug.cgi?id=1697115.
> > Command line from virsh was showing problems with alias first (this
> > feature is not supported) and after, problems to query block device.
>
> If you are fixing two distinct issues, this should at be at least two
> separate patches. Otherwise review is more difficult among other things
>
> It
> > was happening because extra disks were not being included into
> > cgroup blkio.throttle properties and libvirt could not query this info.
> > Applying those devices into 'allowed' list is not enough, libvirt should
> > reset blkio.throttle as a workaround to include disks into container's
> > cgroup directory.
> >
> > Before:
> >
> >     virsh # domblkstat CentOS hda
> >     error: Failed to get block stats for domain 'CentOS' device 'hda'
> >     error: internal error: domain stats query failed
> >
> > Now:
> >
> >     virsh # domblkstat CentOS hda
> >     hda rd_req 0
> >     hda rd_bytes 0
> >     hda wr_req 0
> >     hda wr_bytes 0
> >
> > Signed-off-by: Julio Faracco <jcfaracco@gmail.com>
> > ---
> >  src/lxc/lxc_cgroup.c | 29 ++++++++++++++++++++++++++++-
> >  src/lxc/lxc_cgroup.h |  4 ++++
> >  src/lxc/lxc_driver.c |  8 ++++----
> >  3 files changed, 36 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
> > index 5efb495b56..de6d892521 100644
> > --- a/src/lxc/lxc_cgroup.c
> > +++ b/src/lxc/lxc_cgroup.c
> > @@ -302,6 +302,24 @@ virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
> >      return 0;
> >  }
> >
> > +int
> > +virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
> > +                                     const char *path)
> > +{
> > +    if (virCgroupSetBlkioDeviceReadBps(cgroup, path, 0) < 0)
> > +        return -1;
> > +
> > +    if (virCgroupSetBlkioDeviceWriteBps(cgroup, path, 0) < 0)
> > +        return -1;
> > +
> > +    if (virCgroupSetBlkioDeviceReadIops(cgroup, path, 0) < 0)
> > +        return -1;
> > +
> > +    if (virCgroupSetBlkioDeviceWriteIops(cgroup, path, 0) < 0)
> > +        return -1;
> > +
> > +    return 0;
> > +}
> >
> >  static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
> >                                        virCgroupPtr cgroup)
> > @@ -309,6 +327,7 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
> >      int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
> >      int ret = -1;
> >      size_t i;
> > +    const char *src_path = NULL;
> >      static virLXCCgroupDevicePolicy devices[] = {
> >          {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
> >          {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
> > @@ -346,8 +365,16 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
> >              !virStorageSourceIsBlockLocal(def->disks[i]->src))
> >              continue;
> >
> > +        /* Workaround to include disks into blkio.throttle.
> > +         * To include it, we need to reset any feature of
> > +         * blkio.throttle.* */
> > +        src_path = virDomainDiskGetSource(def->disks[i]);
> > +        if (virLXCCgroupResetBlkioDeviceThrottle(cgroup,
> > +                                                 src_path) < 0)
> > +            goto cleanup;
> > +
>
> I'll admit I don't really know how cgroups work here. But it seems odd.
> Why exactly is this needed? How does blkstats fail without this, after
> the alias piece is fixed? Is something similar needed for the qemu
> driver, and if not, why the difference?
>
> Also FWIW, not sure if you have fedora 31 installed anywhere, but seems
> like the lxc driver is completely broken with cgroupv2:
>
> https://bugzilla.redhat.com/show_bug.cgi?id=1770763
>
> The suggestion in the bug sounds simple though. If you wanted to
> separately take a stab at that I'm motivated to test and review. Just a
> thought
>
> >          if (virCgroupAllowDevicePath(cgroup,
> > -                                     virDomainDiskGetSource(def->disks[i]),
> > +                                     src_path,
> >                                       (def->disks[i]->src->readonly ?
> >                                        VIR_CGROUP_DEVICE_READ :
> >                                        VIR_CGROUP_DEVICE_RW) |
> > diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
> > index 63e9e837b0..3d643a4fea 100644
> > --- a/src/lxc/lxc_cgroup.h
> > +++ b/src/lxc/lxc_cgroup.h
> > @@ -46,3 +46,7 @@ int
> >  virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev,
> >                                    const char *path,
> >                                    void *opaque);
> > +
> > +int
> > +virLXCCgroupResetBlkioDeviceThrottle(virCgroupPtr cgroup,
> > +                                     const char *path);
> > diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
> > index 204a3ed522..87da55f308 100644
> > --- a/src/lxc/lxc_driver.c
> > +++ b/src/lxc/lxc_driver.c
> > @@ -2339,14 +2339,14 @@ lxcDomainBlockStats(virDomainPtr dom,
> >          goto endjob;
> >      }
> >
> > -    if (!disk->info.alias) {
> > +    if (!disk->src->path) {
> >          virReportError(VIR_ERR_INTERNAL_ERROR,
> >                         _("missing disk device alias name for %s"), disk->dst);
> >          goto endjob;
> >      }
> >
>
> I think this whole conditional can be deleted. Earlier in the function
> !path is already handled.
>
> >      ret = virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
> > -                                            disk->info.alias,
> > +                                            disk->src->path,
> >                                              &stats->rd_bytes,
> >                                              &stats->wr_bytes,
> >                                              &stats->rd_req,
> > @@ -2424,14 +2424,14 @@ lxcDomainBlockStatsFlags(virDomainPtr dom,
> >              goto endjob;
> >          }
> >
> > -        if (!disk->info.alias) {
> > +        if (!disk->src->path) {
> >              virReportError(VIR_ERR_INTERNAL_ERROR,
> >                             _("missing disk device alias name for %s"), disk->dst);
> >              goto endjob;
> >          }
> >
>
> Same with this block. The alias changes make sense though
>
> Thanks,
> Cole
>
> >          if (virCgroupGetBlkioIoDeviceServiced(priv->cgroup,
> > -                                              disk->info.alias,
> > +                                              disk->src->path,
> >                                                &rd_bytes,
> >                                                &wr_bytes,
> >                                                &rd_req,
> >
>


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list