Changeset
src/lxc/lxc_container.c  |  58 +------------------
src/lxc/lxc_container.h  |   4 ++
src/lxc/lxc_controller.c | 145 +++++++++++++++++++++++++++--------------------
3 files changed, 87 insertions(+), 120 deletions(-)
Git apply log
Switched to a new branch '20180415212539.19974-1-rstoyanov1@gmail.com'
Applying: lxc: Make lxcContainerMountFSBlock non static
Applying: lxc: Move up virLXCControllerAppendNBDPids
Applying: lxc: Mount NBD devices before clone
To https://github.com/patchew-project/libvirt
 * [new tag]         patchew/20180415212539.19974-1-rstoyanov1@gmail.com -> patchew/20180415212539.19974-1-rstoyanov1@gmail.com
Test passed: syntax-check

loading

[libvirt] [RFC 0/3] LXC with block device and enabled userns
Posted by Radostin Stoyanov, 4 days ago
Problem background
------------------

The LXC driver has support for the filesystem types "file" and "block"
that allow a disk image to be mounted in the guest (container). [1]

However, when user-namespace is enabled (uid/gid mapping is used) the
mount of the root filesystem block device fails. [2]

According to "man 7 user_namespaces":

	Mounting block-based filesystems can be done only by a process that holds
	CAP_SYS_ADMIN in the initial user namespace.


Suggested approach
------------------
Mount the root file system block device before the clone() call, then set
filesystem type to VIR_DOMAIN_FS_TYPE_MOUNT and filesystem source to the folder
where it was mounted.


Issues encountered
--------------------

This patch series implements the basic idea of the mentioned approach.
In result, a container with configured idmap and NBD filesystem is able to start.

However, on guest shutdown this kernel error [3] occurs.

Similar messages [4] occur on shutdown when NBD filesystem is used with LXC
container without idmap.

Perhaps, one reason could be that on guest shutdown the LXC driver kills qemu-nbd
process without sending disconnect for the specified device.


References
---------- 

[1] https://libvirt.org/formatdomain.html#elementsFilesystems
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1328946
[3] https://pastebin.com/raw/jMBk5mtG
[4] https://pastebin.com/raw/wTKbuRP9

Radostin Stoyanov (3):
  lxc: Make lxcContainerMountFSBlock non static
  lxc: Move up virLXCControllerAppendNBDPids
  lxc: Mount NBD devices before clone

 src/lxc/lxc_container.c  |  58 +------------------
 src/lxc/lxc_container.h  |   4 ++
 src/lxc/lxc_controller.c | 145 +++++++++++++++++++++++++++--------------------
 3 files changed, 87 insertions(+), 120 deletions(-)

-- 
2.14.3

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC 1/3] lxc: Make lxcContainerMountFSBlock non static
Posted by Radostin Stoyanov, 4 days ago
---
 src/lxc/lxc_container.c | 5 +----
 src/lxc/lxc_container.h | 4 ++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 532fd0be0..3b8cb966e 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -113,9 +113,6 @@ struct __lxc_child_argv {
     int *nsInheritFDs;
 };
 
-static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
-                                    const char *srcprefix,
-                                    const char *sec_mount_options);
 
 
 /*
@@ -1499,7 +1496,7 @@ static int lxcContainerMountFSBlockHelper(virDomainFSDefPtr fs,
 }
 
 
-static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
+int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
                                     const char *srcprefix,
                                     const char *sec_mount_options)
 {
diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h
index 641e2d460..3a3564b79 100644
--- a/src/lxc/lxc_container.h
+++ b/src/lxc/lxc_container.h
@@ -68,4 +68,8 @@ int lxcContainerChown(virDomainDefPtr def, const char *path);
 
 bool lxcIsBasicMountLocation(const char *path);
 
+int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
+                             const char *srcprefix,
+                             const char *sec_mount_options);
+
 #endif /* LXC_CONTAINER_H */
-- 
2.14.3

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC 2/3] lxc: Move up virLXCControllerAppendNBDPids
Posted by Radostin Stoyanov, 4 days ago
There is no functional change in this patch.

It only moves virLXCControllerAppendNBDPids above
virLXCControllerSetupNBDDeviceFS.
---
 src/lxc/lxc_controller.c | 96 ++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 507bffda0..61d9ed07b 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -481,6 +481,55 @@ static int virLXCControllerSetupLoopDeviceDisk(virDomainDiskDefPtr disk)
 }
 
 
+static int virLXCControllerAppendNBDPids(virLXCControllerPtr ctrl,
+                                         const char *dev)
+{
+    char *pidpath = NULL;
+    pid_t *pids = NULL;
+    size_t npids = 0;
+    size_t i;
+    int ret = -1;
+    size_t loops = 0;
+    pid_t pid;
+
+    if (!STRPREFIX(dev, "/dev/") ||
+        virAsprintf(&pidpath, "/sys/devices/virtual/block/%s/pid", dev + 5) < 0)
+        goto cleanup;
+
+    /* Wait for the pid file to appear */
+    while (!virFileExists(pidpath)) {
+        /* wait for 100ms before checking again, but don't do it for ever */
+        if (errno == ENOENT && loops < 10) {
+            usleep(100 * 1000);
+            loops++;
+        } else {
+            virReportSystemError(errno,
+                                 _("Cannot check NBD device %s pid"),
+                                 dev + 5);
+            goto cleanup;
+        }
+    }
+
+    if (virPidFileReadPath(pidpath, &pid) < 0)
+        goto cleanup;
+
+    if (virProcessGetPids(pid, &npids, &pids) < 0)
+        goto cleanup;
+
+    for (i = 0; i < npids; i++) {
+        if (VIR_APPEND_ELEMENT(ctrl->nbdpids, ctrl->nnbdpids, pids[i]) < 0)
+            goto cleanup;
+    }
+
+    ret = 0;
+
+ cleanup:
+    VIR_FREE(pids);
+    VIR_FREE(pidpath);
+    return ret;
+}
+
+
 static int virLXCControllerSetupNBDDeviceFS(virDomainFSDefPtr fs)
 {
     char *dev;
@@ -545,53 +594,6 @@ static int virLXCControllerSetupNBDDeviceDisk(virDomainDiskDefPtr disk)
     return 0;
 }
 
-static int virLXCControllerAppendNBDPids(virLXCControllerPtr ctrl,
-                                         const char *dev)
-{
-    char *pidpath = NULL;
-    pid_t *pids = NULL;
-    size_t npids = 0;
-    size_t i;
-    int ret = -1;
-    size_t loops = 0;
-    pid_t pid;
-
-    if (!STRPREFIX(dev, "/dev/") ||
-        virAsprintf(&pidpath, "/sys/devices/virtual/block/%s/pid", dev + 5) < 0)
-        goto cleanup;
-
-    /* Wait for the pid file to appear */
-    while (!virFileExists(pidpath)) {
-        /* wait for 100ms before checking again, but don't do it for ever */
-        if (errno == ENOENT && loops < 10) {
-            usleep(100 * 1000);
-            loops++;
-        } else {
-            virReportSystemError(errno,
-                                 _("Cannot check NBD device %s pid"),
-                                 dev + 5);
-            goto cleanup;
-        }
-    }
-
-    if (virPidFileReadPath(pidpath, &pid) < 0)
-        goto cleanup;
-
-    if (virProcessGetPids(pid, &npids, &pids) < 0)
-        goto cleanup;
-
-    for (i = 0; i < npids; i++) {
-        if (VIR_APPEND_ELEMENT(ctrl->nbdpids, ctrl->nnbdpids, pids[i]) < 0)
-            goto cleanup;
-    }
-
-    ret = 0;
-
- cleanup:
-    VIR_FREE(pids);
-    VIR_FREE(pidpath);
-    return ret;
-}
 
 static int virLXCControllerSetupLoopDevices(virLXCControllerPtr ctrl)
 {
-- 
2.14.3

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC 3/3] lxc: Mount NBD devices before clone
Posted by Radostin Stoyanov, 4 days ago
When user-namespace is enabled we are not allowed
to mount block/NBD devices.

Instead, mount /dev/nbdX to /run/libvirt/lxc/<domain>.root
and set:

	fs->src->path = /run/libvirt/lxc/<domain>.root
	fs->type = VIR_DOMAIN_FS_TYPE_MOUNT
---
 src/lxc/lxc_container.c  | 53 ------------------------------------------------
 src/lxc/lxc_controller.c | 49 +++++++++++++++++++++++++++++---------------
 2 files changed, 33 insertions(+), 69 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 3b8cb966e..420bb20ab 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -658,55 +658,6 @@ static int lxcContainerResolveSymlinks(virDomainFSDefPtr fs, bool gentle)
     return 0;
 }
 
-static int lxcContainerPrepareRoot(virDomainDefPtr def,
-                                   virDomainFSDefPtr root,
-                                   const char *sec_mount_options)
-{
-    char *dst;
-    char *tmp;
-
-    VIR_DEBUG("Prepare root %d", root->type);
-
-    if (root->type == VIR_DOMAIN_FS_TYPE_MOUNT)
-        return 0;
-
-    if (root->type == VIR_DOMAIN_FS_TYPE_FILE) {
-        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                       _("Unexpected root filesystem without loop device"));
-        return -1;
-    }
-
-    if (root->type != VIR_DOMAIN_FS_TYPE_BLOCK) {
-        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-                       _("Unsupported root filesystem type %s"),
-                       virDomainFSTypeToString(root->type));
-        return -1;
-    }
-
-    if (lxcContainerResolveSymlinks(root, false) < 0)
-        return -1;
-
-    if (virAsprintf(&dst, "%s/%s.root",
-                    LXC_STATE_DIR, def->name) < 0)
-        return -1;
-
-    tmp = root->dst;
-    root->dst = dst;
-
-    if (lxcContainerMountFSBlock(root, "", sec_mount_options) < 0) {
-        root->dst = tmp;
-        VIR_FREE(dst);
-        return -1;
-    }
-
-    root->dst = tmp;
-    root->type = VIR_DOMAIN_FS_TYPE_MOUNT;
-    VIR_FREE(root->src->path);
-    root->src->path = dst;
-
-    return 0;
-}
-
 static int lxcContainerPivotRoot(virDomainFSDefPtr root)
 {
     int ret;
@@ -1755,10 +1706,6 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
     if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0)
         goto cleanup;
 
-    /* Ensure the root filesystem is mounted */
-    if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0)
-        goto cleanup;
-
     /* Gives us a private root, leaving all parent OS mounts on /.oldroot */
     if (lxcContainerPivotRoot(root) < 0)
         goto cleanup;
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 61d9ed07b..d1ae60b1d 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -530,9 +530,12 @@ static int virLXCControllerAppendNBDPids(virLXCControllerPtr ctrl,
 }
 
 
-static int virLXCControllerSetupNBDDeviceFS(virDomainFSDefPtr fs)
+static int virLXCControllerSetupNBDDeviceFS(virLXCControllerPtr ctrl,
+                                            virDomainFSDefPtr fs)
 {
-    char *dev;
+    char *dev, *dst, *tmp, *sec_mount_options;
+    virDomainDefPtr def = ctrl->def;
+    virSecurityManagerPtr securityDriver = ctrl->securityManager;
 
     if (fs->format <= VIR_STORAGE_FILE_NONE) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
@@ -540,22 +543,42 @@ static int virLXCControllerSetupNBDDeviceFS(virDomainFSDefPtr fs)
         return -1;
     }
 
+    if (virAsprintf(&dst, "%s/%s.root/",
+                    LXC_STATE_DIR, def->name) < 0)
+        return -1;
+
+    if (!(sec_mount_options = virSecurityManagerGetMountOptions(securityDriver, def)))
+        return -1;
+
     if (virFileNBDDeviceAssociate(fs->src->path,
                                   fs->format,
                                   fs->readonly,
                                   &dev) < 0)
         return -1;
 
-    VIR_DEBUG("Changing fs %s to use type=block for dev %s",
-              fs->src->path, dev);
-    /*
-     * We now change it into a block device type, so that
-     * the rest of container setup 'just works'
-     */
-    fs->type = VIR_DOMAIN_FS_TYPE_BLOCK;
     VIR_FREE(fs->src->path);
     fs->src->path = dev;
 
+    tmp = fs->dst;
+    fs->dst = dst;
+
+    if (lxcContainerMountFSBlock(fs, "", sec_mount_options) < 0) {
+        fs->dst = tmp;
+        VIR_FREE(dst);
+        return -1;
+    }
+
+    fs->dst = tmp;
+    fs->type = VIR_DOMAIN_FS_TYPE_MOUNT;
+
+    /* The NBD device will be cleaned up while the cgroup will end.
+     * For this we need to remember the qemu-nbd pid and add it to
+     * the cgroup*/
+    if (virLXCControllerAppendNBDPids(ctrl, fs->src->path) < 0)
+
+    VIR_FREE(fs->src->path);
+    fs->src->path = dst;
+
     return 0;
 }
 
@@ -637,13 +660,7 @@ static int virLXCControllerSetupLoopDevices(virLXCControllerPtr ctrl)
             }
             ctrl->loopDevFds[ctrl->nloopDevs - 1] = fd;
         } else if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_NBD) {
-            if (virLXCControllerSetupNBDDeviceFS(fs) < 0)
-                goto cleanup;
-
-            /* The NBD device will be cleaned up while the cgroup will end.
-             * For this we need to remember the qemu-nbd pid and add it to
-             * the cgroup*/
-            if (virLXCControllerAppendNBDPids(ctrl, fs->src->path) < 0)
+            if (virLXCControllerSetupNBDDeviceFS(ctrl, fs) < 0)
                 goto cleanup;
         } else {
             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-- 
2.14.3

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list