[libvirt] [PATCH] Avoid hidden cgroup mount points

Juan Hernandez posted 1 patch 6 years, 9 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20170706150331.27644-1-jhernand@redhat.com
src/util/vircgroup.c                | 23 ++++++++++++++---------
tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++++++++
tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
tests/vircgrouptest.c               |  1 +
4 files changed, 61 insertions(+), 9 deletions(-)
create mode 100644 tests/vircgroupdata/kubevirt.mounts
create mode 100644 tests/vircgroupdata/kubevirt.parsed
[libvirt] [PATCH] Avoid hidden cgroup mount points
Posted by Juan Hernandez 6 years, 9 months ago
Currently the scan of the /proc/mounts file used to find cgroup mount
points doesn't take into account that mount points may hidden by other
mount points. For, example in certain Kubernetes environments the
/proc/mounts contains the following lines:

  cgroup /sys/fs/cgroup/net_prio,net_cls cgroup ...
  tmpfs /sys/fs/cgroup tmpfs ...
  cgroup /sys/fs/cgroup/net_cls,net_prio cgroup ...

In this particular environment the first mount point is hidden by the
second one. The correct mount point is the third one, but libvirt will
never process it because it only checks the first mount point for each
controller (net_cls in this case). So libvirt will try to use the first
mount point, which doesn't actually exist, and the complete detection
process will fail.

To avoid that issue this patch changes the virCgroupDetectMountsFromFile
function so that when there are duplicates it takes the information from
the last line in /proc/mounts. This requires removing the previous
explicit condition to skip duplicates, and adding code to free the
memory used by the processing of duplicated lines.

Related-To: https://bugzilla.redhat.com/1468214
Related-To: https://github.com/kubevirt/libvirt/issues/4
Signed-off-by: Juan Hernandez <jhernand@redhat.com>
---
 src/util/vircgroup.c                | 23 ++++++++++++++---------
 tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++++++++
 tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
 tests/vircgrouptest.c               |  1 +
 4 files changed, 61 insertions(+), 9 deletions(-)
 create mode 100644 tests/vircgroupdata/kubevirt.mounts
 create mode 100644 tests/vircgroupdata/kubevirt.parsed

diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 5aa1db5..41d90e7 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -397,6 +397,7 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
             const char *typestr = virCgroupControllerTypeToString(i);
             int typelen = strlen(typestr);
             char *tmp = entry.mnt_opts;
+            struct virCgroupController *controller = &group->controllers[i];
             while (tmp) {
                 char *next = strchr(tmp, ',');
                 int len;
@@ -406,18 +407,21 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
                 } else {
                     len = strlen(tmp);
                 }
-                /* NB, the same controller can appear >1 time in mount list
-                 * due to bind mounts from one location to another. Pick the
-                 * first entry only
-                 */
-                if (typelen == len && STREQLEN(typestr, tmp, len) &&
-                    !group->controllers[i].mountPoint) {
+
+                if (typelen == len && STREQLEN(typestr, tmp, len)) {
                     char *linksrc;
                     struct stat sb;
                     char *tmp2;
 
-                    if (VIR_STRDUP(group->controllers[i].mountPoint,
-                                   entry.mnt_dir) < 0)
+                    /* Note that the lines in /proc/mounts have the same
+                     * order than the mount operations, and that there may
+                     * be duplicates due to bind mounts. This means
+                     * that the same mount point may be processed more than
+                     * once. We need to save the results of the last one,
+                     * and we need to be careful to release the memory used
+                     * by previous processing. */
+                    VIR_FREE(controller->mountPoint);
+                    if (VIR_STRDUP(controller->mountPoint, entry.mnt_dir) < 0)
                         goto error;
 
                     tmp2 = strrchr(entry.mnt_dir, '/');
@@ -453,7 +457,8 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
                                 VIR_WARN("Expecting a symlink at %s for controller %s",
                                          linksrc, typestr);
                             } else {
-                                group->controllers[i].linkPoint = linksrc;
+                                VIR_FREE(controller->linkPoint);
+                                controller->linkPoint = linksrc;
                             }
                         }
                     }
diff --git a/tests/vircgroupdata/kubevirt.mounts b/tests/vircgroupdata/kubevirt.mounts
new file mode 100644
index 0000000..b0d31bb
--- /dev/null
+++ b/tests/vircgroupdata/kubevirt.mounts
@@ -0,0 +1,36 @@
+tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,mode=755 0 0
+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
+cgroup /sys/fs/cgroup/cpuacct,cpu cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
+cgroup /sys/fs/cgroup/net_prio,net_cls cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
+tmpfs /host-sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
+cgroup /host-sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
+cgroup /host-sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
+cgroup /host-sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
+cgroup /host-sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
+cgroup /host-sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
+cgroup /host-sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
+cgroup /host-sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
+cgroup /host-sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
+cgroup /host-sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
+cgroup /host-sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
+cgroup /host-sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
+tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
+cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
+cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
diff --git a/tests/vircgroupdata/kubevirt.parsed b/tests/vircgroupdata/kubevirt.parsed
new file mode 100644
index 0000000..3377af0
--- /dev/null
+++ b/tests/vircgroupdata/kubevirt.parsed
@@ -0,0 +1,10 @@
+cpu          /sys/fs/cgroup/cpu,cpuacct
+cpuacct      /sys/fs/cgroup/cpu,cpuacct
+cpuset       /sys/fs/cgroup/cpuset
+memory       /sys/fs/cgroup/memory
+devices      /sys/fs/cgroup/devices
+freezer      /sys/fs/cgroup/freezer
+blkio        /sys/fs/cgroup/blkio
+net_cls      /sys/fs/cgroup/net_cls,net_prio
+perf_event   /sys/fs/cgroup/perf_event
+name=systemd /sys/fs/cgroup/systemd
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
index 8af5e2c..b932b1a 100644
--- a/tests/vircgrouptest.c
+++ b/tests/vircgrouptest.c
@@ -885,6 +885,7 @@ mymain(void)
     DETECT_MOUNTS("cgroups3");
     DETECT_MOUNTS("all-in-one");
     DETECT_MOUNTS("no-cgroups");
+    DETECT_MOUNTS("kubevirt");
 
     if (virTestRun("New cgroup for self", testCgroupNewForSelf, NULL) < 0)
         ret = -1;
-- 
2.9.4

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] Avoid hidden cgroup mount points
Posted by Martin Kletzander 6 years, 9 months ago
On Thu, Jul 06, 2017 at 05:03:31PM +0200, Juan Hernandez wrote:
>Currently the scan of the /proc/mounts file used to find cgroup mount
>points doesn't take into account that mount points may hidden by other
>mount points. For, example in certain Kubernetes environments the
>/proc/mounts contains the following lines:
>
>  cgroup /sys/fs/cgroup/net_prio,net_cls cgroup ...
>  tmpfs /sys/fs/cgroup tmpfs ...
>  cgroup /sys/fs/cgroup/net_cls,net_prio cgroup ...
>
>In this particular environment the first mount point is hidden by the
>second one. The correct mount point is the third one, but libvirt will
>never process it because it only checks the first mount point for each
>controller (net_cls in this case). So libvirt will try to use the first
>mount point, which doesn't actually exist, and the complete detection
>process will fail.
>
>To avoid that issue this patch changes the virCgroupDetectMountsFromFile
>function so that when there are duplicates it takes the information from
>the last line in /proc/mounts. This requires removing the previous
>explicit condition to skip duplicates, and adding code to free the
>memory used by the processing of duplicated lines.
>
>Related-To: https://bugzilla.redhat.com/1468214
>Related-To: https://github.com/kubevirt/libvirt/issues/4
>Signed-off-by: Juan Hernandez <jhernand@redhat.com>
>---
> src/util/vircgroup.c                | 23 ++++++++++++++---------
> tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++++++++
> tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
> tests/vircgrouptest.c               |  1 +
> 4 files changed, 61 insertions(+), 9 deletions(-)
> create mode 100644 tests/vircgroupdata/kubevirt.mounts
> create mode 100644 tests/vircgroupdata/kubevirt.parsed
>
>diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
>index 5aa1db5..41d90e7 100644
>--- a/src/util/vircgroup.c
>+++ b/src/util/vircgroup.c
>@@ -397,6 +397,7 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>             const char *typestr = virCgroupControllerTypeToString(i);
>             int typelen = strlen(typestr);
>             char *tmp = entry.mnt_opts;
>+            struct virCgroupController *controller = &group->controllers[i];
>             while (tmp) {
>                 char *next = strchr(tmp, ',');
>                 int len;
>@@ -406,18 +407,21 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>                 } else {
>                     len = strlen(tmp);
>                 }
>-                /* NB, the same controller can appear >1 time in mount list
>-                 * due to bind mounts from one location to another. Pick the
>-                 * first entry only
>-                 */
>-                if (typelen == len && STREQLEN(typestr, tmp, len) &&
>-                    !group->controllers[i].mountPoint) {
>+
>+                if (typelen == len && STREQLEN(typestr, tmp, len)) {
>                     char *linksrc;
>                     struct stat sb;
>                     char *tmp2;
>
>-                    if (VIR_STRDUP(group->controllers[i].mountPoint,
>-                                   entry.mnt_dir) < 0)
>+                    /* Note that the lines in /proc/mounts have the same
>+                     * order than the mount operations, and that there may
>+                     * be duplicates due to bind mounts. This means
>+                     * that the same mount point may be processed more than
>+                     * once. We need to save the results of the last one,
>+                     * and we need to be careful to release the memory used
>+                     * by previous processing. */
>+                    VIR_FREE(controller->mountPoint);

You need to free the linkPoint here as well as it is no longer valid if
we are throwing out the previous entry.

There's also pre-existing leak with linksrc in this function.

>+                    if (VIR_STRDUP(controller->mountPoint, entry.mnt_dir) < 0)
>                         goto error;
>
>                     tmp2 = strrchr(entry.mnt_dir, '/');
>@@ -453,7 +457,8 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>                                 VIR_WARN("Expecting a symlink at %s for controller %s",
>                                          linksrc, typestr);
>                             } else {
>-                                group->controllers[i].linkPoint = linksrc;
>+                                VIR_FREE(controller->linkPoint);
>+                                controller->linkPoint = linksrc;
>                             }
>                         }
>                     }
>diff --git a/tests/vircgroupdata/kubevirt.mounts b/tests/vircgroupdata/kubevirt.mounts
>new file mode 100644
>index 0000000..b0d31bb
>--- /dev/null
>+++ b/tests/vircgroupdata/kubevirt.mounts
>@@ -0,0 +1,36 @@
>+tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /sys/fs/cgroup/cpuacct,cpu cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /sys/fs/cgroup/net_prio,net_cls cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>+tmpfs /host-sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /host-sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /host-sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /host-sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /host-sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /host-sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /host-sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /host-sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /host-sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /host-sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /host-sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /host-sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>+tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0

It's not clear what we're testing here, I would either add just two
controllers mounted in three places (just keep the net_{cls,prio}) or
copy some existing test and add two lines so that the diff later in the
history shows exactly what is being tested if used with '-C', e.g.:

diff --git a/tests/vircgroupdata/cgroups2.mounts b/tests/vircgroupdata/cgroups4.mounts
similarity index 86%
copy from tests/vircgroupdata/cgroups2.mounts
copy to tests/vircgroupdata/cgroups4.mounts
index 21ab867d71ed..2b06c2ea1e16 100644
--- a/tests/vircgroupdata/cgroups2.mounts
+++ b/tests/vircgroupdata/cgroups4.mounts
@@ -10,8 +10,10 @@ shm /dev/shm tmpfs rw,nosuid,nodev,noexec,relatime 0 0
 debugfs /sys/kernel/debug debugfs rw,nosuid,nodev,noexec,relatime 0 0
 cgroup_root /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,size=10240k,mode=755 0 0
 openrc /sys/fs/cgroup/openrc cgroup rw,nosuid,nodev,noexec,relatime,release_agent=/lib64/rc/sh/cgroup-release-agent.sh,name=openrc 0 0
+cpuset /some/random/location/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
 cpuset /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
 cpu /sys/fs/cgroup/cpu cgroup rw,nosuid,nodev,noexec,relatime,cpu 0 0
+cpuacct /some/random/location cgroup rw,nosuid,nodev,noexec,relatime,cpuacct 0 0
 cpuacct /sys/fs/cgroup/cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct 0 0
 memory /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
 devices /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
@@ -20,3 +22,4 @@ blkio /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
 perf_event /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
 hugetlb /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
 binfmt_misc /proc/sys/fs/binfmt_misc binfmt_misc rw,nosuid,nodev,noexec,relatime 0 0
+freezer /some/random/location/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
diff --git a/tests/vircgroupdata/cgroups2.parsed b/tests/vircgroupdata/cgroups4.parsed
similarity index 100%
copy from tests/vircgroupdata/cgroups2.parsed
copy to tests/vircgroupdata/cgroups4.parsed
--

You see how clear that is from the above diff?

Other than that I think this works nicely, so ACK if you are OK with the
proposed changes, I'll squash them in and push it.

Have a nice day,
Martin
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] Avoid hidden cgroup mount points
Posted by Juan Antonio Hernandez Fernandez 6 years, 9 months ago
On Tue, Jul 11, 2017 at 9:25 AM, Martin Kletzander <mkletzan@redhat.com>
wrote:

> On Thu, Jul 06, 2017 at 05:03:31PM +0200, Juan Hernandez wrote:
>
>> Currently the scan of the /proc/mounts file used to find cgroup mount
>> points doesn't take into account that mount points may hidden by other
>> mount points. For, example in certain Kubernetes environments the
>> /proc/mounts contains the following lines:
>>
>>  cgroup /sys/fs/cgroup/net_prio,net_cls cgroup ...
>>  tmpfs /sys/fs/cgroup tmpfs ...
>>  cgroup /sys/fs/cgroup/net_cls,net_prio cgroup ...
>>
>> In this particular environment the first mount point is hidden by the
>> second one. The correct mount point is the third one, but libvirt will
>> never process it because it only checks the first mount point for each
>> controller (net_cls in this case). So libvirt will try to use the first
>> mount point, which doesn't actually exist, and the complete detection
>> process will fail.
>>
>> To avoid that issue this patch changes the virCgroupDetectMountsFromFile
>> function so that when there are duplicates it takes the information from
>> the last line in /proc/mounts. This requires removing the previous
>> explicit condition to skip duplicates, and adding code to free the
>> memory used by the processing of duplicated lines.
>>
>> Related-To: https://bugzilla.redhat.com/1468214
>> Related-To: https://github.com/kubevirt/libvirt/issues/4
>> Signed-off-by: Juan Hernandez <jhernand@redhat.com>
>> ---
>> src/util/vircgroup.c                | 23 ++++++++++++++---------
>> tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++
>> ++++++
>> tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
>> tests/vircgrouptest.c               |  1 +
>> 4 files changed, 61 insertions(+), 9 deletions(-)
>> create mode 100644 tests/vircgroupdata/kubevirt.mounts
>> create mode 100644 tests/vircgroupdata/kubevirt.parsed
>>
>> diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
>> index 5aa1db5..41d90e7 100644
>> --- a/src/util/vircgroup.c
>> +++ b/src/util/vircgroup.c
>> @@ -397,6 +397,7 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>             const char *typestr = virCgroupControllerTypeToString(i);
>>             int typelen = strlen(typestr);
>>             char *tmp = entry.mnt_opts;
>> +            struct virCgroupController *controller =
>> &group->controllers[i];
>>             while (tmp) {
>>                 char *next = strchr(tmp, ',');
>>                 int len;
>> @@ -406,18 +407,21 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>                 } else {
>>                     len = strlen(tmp);
>>                 }
>> -                /* NB, the same controller can appear >1 time in mount
>> list
>> -                 * due to bind mounts from one location to another. Pick
>> the
>> -                 * first entry only
>> -                 */
>> -                if (typelen == len && STREQLEN(typestr, tmp, len) &&
>> -                    !group->controllers[i].mountPoint) {
>> +
>> +                if (typelen == len && STREQLEN(typestr, tmp, len)) {
>>                     char *linksrc;
>>                     struct stat sb;
>>                     char *tmp2;
>>
>> -                    if (VIR_STRDUP(group->controllers[i].mountPoint,
>> -                                   entry.mnt_dir) < 0)
>> +                    /* Note that the lines in /proc/mounts have the same
>> +                     * order than the mount operations, and that there
>> may
>> +                     * be duplicates due to bind mounts. This means
>> +                     * that the same mount point may be processed more
>> than
>> +                     * once. We need to save the results of the last one,
>> +                     * and we need to be careful to release the memory
>> used
>> +                     * by previous processing. */
>> +                    VIR_FREE(controller->mountPoint);
>>
>
> You need to free the linkPoint here as well as it is no longer valid if
> we are throwing out the previous entry.
>
> There's also pre-existing leak with linksrc in this function.
>
>
> +                    if (VIR_STRDUP(controller->mountPoint,
>> entry.mnt_dir) < 0)
>>                         goto error;
>>
>>                     tmp2 = strrchr(entry.mnt_dir, '/');
>> @@ -453,7 +457,8 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>                                 VIR_WARN("Expecting a symlink at %s for
>> controller %s",
>>                                          linksrc, typestr);
>>                             } else {
>> -                                group->controllers[i].linkPoint =
>> linksrc;
>> +                                VIR_FREE(controller->linkPoint);
>> +                                controller->linkPoint = linksrc;
>>                             }
>>                         }
>>                     }
>> diff --git a/tests/vircgroupdata/kubevirt.mounts
>> b/tests/vircgroupdata/kubevirt.mounts
>> new file mode 100644
>> index 0000000..b0d31bb
>> --- /dev/null
>> +++ b/tests/vircgroupdata/kubevirt.mounts
>> @@ -0,0 +1,36 @@
>> +tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,mode=755 0 0
>> +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuacct,cpu cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
>> 0 0
>> +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
>> 0 0
>> +cgroup /sys/fs/cgroup/net_prio,net_cls cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>> +tmpfs /host-sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755
>> 0 0
>> +cgroup /host-sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /host-sys/fs/cgroup/cpu,cpuacct cgroup
>> rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>> +cgroup /host-sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /host-sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /host-sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /host-sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /host-sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /host-sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /host-sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /host-sys/fs/cgroup/perf_event cgroup
>> rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>> +cgroup /host-sys/fs/cgroup/net_cls,net_prio cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>> +tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>> +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
>> 0 0
>> +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
>> 0 0
>> +cgroup /sys/fs/cgroup/net_cls,net_prio cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>>
>
> It's not clear what we're testing here, I would either add just two
> controllers mounted in three places (just keep the net_{cls,prio}) or
> copy some existing test and add two lines so that the diff later in the
> history shows exactly what is being tested if used with '-C', e.g.:
>
> diff --git a/tests/vircgroupdata/cgroups2.mounts
> b/tests/vircgroupdata/cgroups4.mounts
> similarity index 86%
> copy from tests/vircgroupdata/cgroups2.mounts
> copy to tests/vircgroupdata/cgroups4.mounts
> index 21ab867d71ed..2b06c2ea1e16 100644
> --- a/tests/vircgroupdata/cgroups2.mounts
> +++ b/tests/vircgroupdata/cgroups4.mounts
> @@ -10,8 +10,10 @@ shm /dev/shm tmpfs rw,nosuid,nodev,noexec,relatime 0 0
> debugfs /sys/kernel/debug debugfs rw,nosuid,nodev,noexec,relatime 0 0
> cgroup_root /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,size=10240k,mode=755
> 0 0
> openrc /sys/fs/cgroup/openrc cgroup rw,nosuid,nodev,noexec,relatim
> e,release_agent=/lib64/rc/sh/cgroup-release-agent.sh,name=openrc 0 0
> +cpuset /some/random/location/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
> 0 0
> cpuset /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
> 0 0
> cpu /sys/fs/cgroup/cpu cgroup rw,nosuid,nodev,noexec,relatime,cpu 0 0
> +cpuacct /some/random/location cgroup rw,nosuid,nodev,noexec,relatime,cpuacct
> 0 0
> cpuacct /sys/fs/cgroup/cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct
> 0 0
> memory /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
> 0 0
> devices /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
> 0 0
> @@ -20,3 +22,4 @@ blkio /sys/fs/cgroup/blkio cgroup
> rw,nosuid,nodev,noexec,relatime,blkio 0 0
> perf_event /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
> 0 0
> hugetlb /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
> 0 0
> binfmt_misc /proc/sys/fs/binfmt_misc binfmt_misc
> rw,nosuid,nodev,noexec,relatime 0 0
> +freezer /some/random/location/freezer cgroup
> rw,nosuid,nodev,noexec,relatime,freezer 0 0
> diff --git a/tests/vircgroupdata/cgroups2.parsed
> b/tests/vircgroupdata/cgroups4.parsed
> similarity index 100%
> copy from tests/vircgroupdata/cgroups2.parsed
> copy to tests/vircgroupdata/cgroups4.parsed
> --
>
> You see how clear that is from the above diff?
>
> Other than that I think this works nicely, so ACK if you are OK with the
> proposed changes, I'll squash them in and push it.
>
>
Your suggestions look good to me, so ACK, feel free to squash and push
Martin.
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list