[PATCH] [v4] linux-user: add option to intercept execve() syscalls

Ricardo Jesus posted 1 patch 3 years, 8 months ago
Test docker-quick@centos7 failed
Test docker-mingw@fedora failed
Test checkpatch failed
Test FreeBSD failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20200730160106.16613-1-rj.bcjesus@gmail.com
Maintainers: Laurent Vivier <laurent@vivier.eu>
linux-user/main.c    |   8 +++
linux-user/qemu.h    |   1 +
linux-user/syscall.c | 138 +++++++++++++++++++++++++++++++++++++++----
3 files changed, 136 insertions(+), 11 deletions(-)
[PATCH] [v4] linux-user: add option to intercept execve() syscalls
Posted by Ricardo Jesus 3 years, 8 months ago
This patch is based on the original work by Petros Angelatos [1], which
I have updated to the current git master. The following commit message
is largely based on Petros' original message.

In order for one to use QEMU user mode emulation under a chroot, it is
required to use binfmt_misc. This can be avoided by QEMU never doing a
raw execve() to the host system.

Introduce a new option, --execve, that uses the current QEMU interpreter
to intercept execve().

qemu_execve() will prepend the interpreter path, similar to what
binfmt_misc would do, and then pass the modified execve() to the host.

[1] https://patchwork.ozlabs.org/patch/582756/

Signed-off-by: Ricardo Jesus <rj.bcjesus@gmail.com>
---
 linux-user/main.c    |   8 +++
 linux-user/qemu.h    |   1 +
 linux-user/syscall.c | 138 +++++++++++++++++++++++++++++++++++++++----
 3 files changed, 136 insertions(+), 11 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 75c9785157..52f6860b45 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -107,6 +107,7 @@ static void usage(int exitcode);
 
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
+const char *qemu_execve_path;
 
 /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
    we allocate a bigger stack. Need a better solution, for example
@@ -337,6 +338,11 @@ static void handle_arg_guest_base(const char *arg)
     have_guest_base = true;
 }
 
+static void handle_arg_execve(const char *arg)
+{
+    qemu_execve_path = strdup(arg);
+}
+
 static void handle_arg_reserved_va(const char *arg)
 {
     char *p;
@@ -441,6 +447,8 @@ static const struct qemu_argument arg_table[] = {
      "uname",      "set qemu uname release string to 'uname'"},
     {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
      "address",    "set guest_base address to 'address'"},
+    {"execve",     "QEMU_EXECVE",      true,  handle_arg_execve,
+     "",           "use this interpreter when a process calls execve()"},
     {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
      "size",       "reserve 'size' bytes for guest virtual address space"},
     {"d",          "QEMU_LOG",         true,  handle_arg_log,
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 5c964389c1..d1b0d5716b 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -160,6 +160,7 @@ void init_task_state(TaskState *ts);
 void task_settid(TaskState *);
 void stop_all_tasks(void);
 extern const char *qemu_uname_release;
+extern const char *qemu_execve_path;
 extern unsigned long mmap_min_addr;
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 945fc25279..a266895a61 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -115,6 +115,7 @@
 #ifdef HAVE_DRM_H
 #include <libdrm/drm.h>
 #endif
+#include <linux/binfmts.h>
 #include "linux_loop.h"
 #include "uname.h"
 
@@ -7674,6 +7675,131 @@ static target_timer_t get_timer_id(abi_long arg)
     return timerid;
 }
 
+/* qemu_execve() Must return target values and target errnos.
+ *
+ * Although execve() is not an interruptible syscall it is
+ * a special case where we must use the safe_syscall wrapper:
+ * if we allow a signal to happen before we make the host
+ * syscall then we will 'lose' it, because at the point of
+ * execve the process leaves QEMU's control. So we use the
+ * safe syscall wrapper to ensure that we either take the
+ * signal as a guest signal, or else it does not happen
+ * before the execve completes and makes it the other
+ * program's problem.
+ */
+static abi_long qemu_execve(char *filename, char *argv[],
+                  char *envp[])
+{
+    char *i_arg = NULL, *i_name = NULL;
+    char **new_argp;
+    int argc, fd, ret, i, offset = 5;
+    char *cp;
+    char buf[BINPRM_BUF_SIZE];
+
+    /* normal execve case */
+    if (qemu_execve_path == NULL || *qemu_execve_path == 0) {
+        return get_errno(safe_execve(filename, argv, envp));
+    }
+
+    for (argc = 0; argv[argc] != NULL; argc++) {
+        /* nothing */ ;
+    }
+
+    fd = open(filename, O_RDONLY);
+    if (fd == -1) {
+        return get_errno(fd);
+    }
+
+    ret = read(fd, buf, BINPRM_BUF_SIZE);
+    if (ret == -1) {
+        close(fd);
+        return get_errno(ret);
+    }
+
+    /* if we have less than 2 bytes, we can guess it is not executable */
+    if (ret < 2) {
+        close(fd);
+        return -host_to_target_errno(ENOEXEC);
+    }
+
+    close(fd);
+
+    /* adapted from the kernel
+     * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c
+     */
+    if ((buf[0] == '#') && (buf[1] == '!')) {
+        /*
+         * This section does the #! interpretation.
+         * Sorta complicated, but hopefully it will work.  -TYT
+         */
+
+        buf[BINPRM_BUF_SIZE - 1] = '\0';
+        cp = strchr(buf, '\n');
+        if (cp == NULL) {
+            cp = buf + BINPRM_BUF_SIZE - 1;
+        }
+        *cp = '\0';
+        while (cp > buf) {
+            cp--;
+            if ((*cp == ' ') || (*cp == '\t')) {
+                *cp = '\0';
+            } else {
+                break;
+            }
+        }
+        for (cp = buf + 2; (*cp == ' ') || (*cp == '\t'); cp++) {
+            /* nothing */ ;
+        }
+        if (*cp == '\0') {
+            return -ENOEXEC; /* No interpreter name found */
+        }
+        i_name = cp;
+        i_arg = NULL;
+        for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
+            /* nothing */ ;
+        }
+        while ((*cp == ' ') || (*cp == '\t')) {
+            *cp++ = '\0';
+        }
+        if (*cp) {
+            i_arg = cp;
+        }
+
+        if (i_arg) {
+            offset += 2;
+        } else {
+            offset += 1;
+        }
+    }
+
+    new_argp = alloca((argc + offset + 1) * sizeof(void *));
+
+    /* Copy the original arguments with offset */
+    for (i = 0; i < argc; i++) {
+        new_argp[i + offset] = argv[i];
+    }
+
+    new_argp[0] = strdup(qemu_execve_path);
+    new_argp[1] = strdup("--execve");
+    new_argp[2] = strdup(qemu_execve_path);
+    new_argp[3] = strdup("-0");
+    new_argp[offset] = filename;
+    new_argp[argc + offset] = NULL;
+
+    if (i_name) {
+        new_argp[4] = i_name;
+        new_argp[5] = i_name;
+
+        if (i_arg) {
+            new_argp[6] = i_arg;
+        }
+    } else {
+        new_argp[4] = argv[0];
+    }
+
+    return get_errno(safe_execve(qemu_execve_path, new_argp, envp));
+}
+
 static int target_to_host_cpu_mask(unsigned long *host_mask,
                                    size_t host_size,
                                    abi_ulong target_addr,
@@ -8023,17 +8149,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 
             if (!(p = lock_user_string(arg1)))
                 goto execve_efault;
-            /* Although execve() is not an interruptible syscall it is
-             * a special case where we must use the safe_syscall wrapper:
-             * if we allow a signal to happen before we make the host
-             * syscall then we will 'lose' it, because at the point of
-             * execve the process leaves QEMU's control. So we use the
-             * safe syscall wrapper to ensure that we either take the
-             * signal as a guest signal, or else it does not happen
-             * before the execve completes and makes it the other
-             * program's problem.
-             */
-            ret = get_errno(safe_execve(p, argp, envp));
+            ret = qemu_execve(p, argp, envp);
             unlock_user(p, arg1, 0);
 
             goto execve_end;
-- 
2.27.0