[PATCH v2] lxc_container: Increase stack size for lxcContainerChild()

Michal Privoznik posted 1 patch 9 months, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/3e4a9d0c0c23e9c9e787461e7a5f9447ac77ce37.1691160754.git.mprivozn@redhat.com
src/lxc/lxc_container.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
[PATCH v2] lxc_container: Increase stack size for lxcContainerChild()
Posted by Michal Privoznik 9 months, 1 week ago
When spawning a new container (via clone()) we allocate stack for
lxcContainerChild(). So far, we allocate 4 pages for the stack
and this used to be enough until we started rewriting everything
to glib. With glib we switched to g_strerror() which localizes
errno strings and thus increases stack usage, while the
previously used strerror_r() was more compact.

Fortunately, the solution is easy - just increase how much stack
the child can use (16 pages ought to be enough for anybody).

And while at it, lets use mmap() for allocation which offer some
nice features:

MAP_STACK - align allocation to be suitable for stack (even
            though, currently ignored on Linux),
MAP_GROWSDOWN - kernel guards out of bounds access from child

Resolves: https://gitlab.com/libvirt/libvirt/-/issues/511
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---

This is a v2 of:

https://listman.redhat.com/archives/libvir-list/2023-August/241127.html

diff to v1:
- switched from g_new0() to mmap() for additional security

 src/lxc/lxc_container.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 63cf283285..c215b83848 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -26,6 +26,7 @@
 #include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <unistd.h>
 #include <mntent.h>
 #include <sys/reboot.h>
@@ -2132,9 +2133,10 @@ int lxcContainerStart(virDomainDef *def,
 {
     pid_t pid;
     int cflags;
-    int stacksize = getpagesize() * 4;
-    g_autofree char *stack = NULL;
+    int stacksize = getpagesize() * 16;
+    char *stack = NULL;
     char *stacktop;
+    int ret = -1;
     lxc_child_argv_t args = {
         .config = def,
         .securityDriver = securityDriver,
@@ -2150,7 +2152,14 @@ int lxcContainerStart(virDomainDef *def,
     };
 
     /* allocate a stack for the container */
-    stack = g_new0(char, stacksize);
+    stack = mmap(NULL, stacksize, PROT_READ | PROT_WRITE,
+                 MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN | MAP_STACK,
+                 -1, 0);
+    if (stack == MAP_FAILED) {
+        virReportSystemError(errno, "%s",
+                             _("Unable to allocate stack"));
+        return -1;
+    }
 
     stacktop = stack + stacksize;
 
@@ -2160,7 +2169,7 @@ int lxcContainerStart(virDomainDef *def,
         if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) < 0) {
             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                            _("Kernel doesn't support user namespace"));
-            return -1;
+            goto cleanup;
         }
         VIR_DEBUG("Enable user namespace");
         cflags |= CLONE_NEWUSER;
@@ -2175,7 +2184,7 @@ int lxcContainerStart(virDomainDef *def,
             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                            _("Config asks for inherit net namespace "
                              "as well as private network interfaces"));
-            return -1;
+            goto cleanup;
         }
         VIR_DEBUG("Inheriting a net namespace");
     }
@@ -2199,10 +2208,16 @@ int lxcContainerStart(virDomainDef *def,
     if (pid < 0) {
         virReportSystemError(errno, "%s",
                              _("Failed to run clone container"));
-        return -1;
+        goto cleanup;
     }
 
-    return pid;
+    ret = pid;
+ cleanup:
+    if (munmap(stack, stacksize) < 0) {
+        VIR_WARN("Unable to munmap() stack: %s", g_strerror(errno));
+    }
+
+    return ret;
 }
 
 int lxcContainerChown(virDomainDef *def, const char *path)
-- 
2.41.0
Re: [PATCH v2] lxc_container: Increase stack size for lxcContainerChild()
Posted by Martin Kletzander 9 months ago
On Fri, Aug 04, 2023 at 04:53:22PM +0200, Michal Privoznik wrote:
>When spawning a new container (via clone()) we allocate stack for
>lxcContainerChild(). So far, we allocate 4 pages for the stack
>and this used to be enough until we started rewriting everything
>to glib. With glib we switched to g_strerror() which localizes
>errno strings and thus increases stack usage, while the
>previously used strerror_r() was more compact.
>
>Fortunately, the solution is easy - just increase how much stack
>the child can use (16 pages ought to be enough for anybody).
>
>And while at it, lets use mmap() for allocation which offer some
>nice features:
>
>MAP_STACK - align allocation to be suitable for stack (even
>            though, currently ignored on Linux),
>MAP_GROWSDOWN - kernel guards out of bounds access from child
>
>Resolves: https://gitlab.com/libvirt/libvirt/-/issues/511
>Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
>---
>
>This is a v2 of:
>
>https://listman.redhat.com/archives/libvir-list/2023-August/241127.html
>
>diff to v1:
>- switched from g_new0() to mmap() for additional security
>
> src/lxc/lxc_container.c | 29 ++++++++++++++++++++++-------
> 1 file changed, 22 insertions(+), 7 deletions(-)
>
>diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
>index 63cf283285..c215b83848 100644
>--- a/src/lxc/lxc_container.c
>+++ b/src/lxc/lxc_container.c
>@@ -26,6 +26,7 @@
> #include <sys/ioctl.h>
> #include <sys/mount.h>
> #include <sys/stat.h>
>+#include <sys/mman.h>
> #include <unistd.h>
> #include <mntent.h>
> #include <sys/reboot.h>
>@@ -2132,9 +2133,10 @@ int lxcContainerStart(virDomainDef *def,
> {
>     pid_t pid;
>     int cflags;
>-    int stacksize = getpagesize() * 4;
>-    g_autofree char *stack = NULL;
>+    int stacksize = getpagesize() * 16;
>+    char *stack = NULL;
>     char *stacktop;
>+    int ret = -1;
>     lxc_child_argv_t args = {
>         .config = def,
>         .securityDriver = securityDriver,
>@@ -2150,7 +2152,14 @@ int lxcContainerStart(virDomainDef *def,
>     };
>
>     /* allocate a stack for the container */
>-    stack = g_new0(char, stacksize);
>+    stack = mmap(NULL, stacksize, PROT_READ | PROT_WRITE,
>+                 MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN | MAP_STACK,
>+                 -1, 0);
>+    if (stack == MAP_FAILED) {
>+        virReportSystemError(errno, "%s",
>+                             _("Unable to allocate stack"));
>+        return -1;
>+    }
>
>     stacktop = stack + stacksize;
>
>@@ -2160,7 +2169,7 @@ int lxcContainerStart(virDomainDef *def,
>         if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) < 0) {
>             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                            _("Kernel doesn't support user namespace"));
>-            return -1;
>+            goto cleanup;
>         }
>         VIR_DEBUG("Enable user namespace");
>         cflags |= CLONE_NEWUSER;
>@@ -2175,7 +2184,7 @@ int lxcContainerStart(virDomainDef *def,
>             virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                            _("Config asks for inherit net namespace "
>                              "as well as private network interfaces"));
>-            return -1;
>+            goto cleanup;
>         }
>         VIR_DEBUG("Inheriting a net namespace");
>     }
>@@ -2199,10 +2208,16 @@ int lxcContainerStart(virDomainDef *def,
>     if (pid < 0) {
>         virReportSystemError(errno, "%s",
>                              _("Failed to run clone container"));
>-        return -1;
>+        goto cleanup;
>     }
>
>-    return pid;
>+    ret = pid;
>+ cleanup:
>+    if (munmap(stack, stacksize) < 0) {
>+        VIR_WARN("Unable to munmap() stack: %s", g_strerror(errno));
>+    }

No need for the curly brackets here.

Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

>+
>+    return ret;
> }
>
> int lxcContainerChown(virDomainDef *def, const char *path)
>-- 
>2.41.0
>