:p
atchew
Login
These patches, along with a few more hacks [1] I didn't include in this patchset, allowed me to run arm64 and armv7 version of dind image on amd64. [1] https://github.com/yamt/qemu/tree/linux-user-for-docker You can find my test setup here: https://github.com/yamt/garbage/tree/master/binfmt-aarch64-install YAMAMOTO Takashi (5): linux-user: handle /proc/self/exe for execve linux-uesr: make exec_path realpath linux-user: Fix the execfd case of /proc/self/exe open linux-user: dup the execfd on start up linux-user: Implement pivot_root linux-user/main.c | 14 +++++++++++++- linux-user/qemu.h | 2 ++ linux-user/syscall.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 55 insertions(+), 4 deletions(-) -- 2.21.1 (Apple Git-122.3)
It seems somehow common to execve /proc/self/exe in docker or golang community these days. At least, moby "reexec" and runc "libcontainer" do that. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, #endif case TARGET_NR_execve: { + const char *path; char **argp, **envp; int argc, envc; abi_ulong gp; @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, * before the execve completes and makes it the other * program's problem. */ - ret = get_errno(safe_execve(p, argp, envp)); + path = p; + if (is_proc_myself(path, "exe")) { + path = exec_path; + } + ret = get_errno(safe_execve(path, argp, envp)); unlock_user(p, arg1, 0); goto execve_end; -- 2.21.1 (Apple Git-122.3)
Otherwise, it can be easily fooled by the user app using chdir(). Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/linux-user/main.c b/linux-user/main.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -XXX,XX +XXX,XX @@ #endif char *exec_path; +char exec_path_store[PATH_MAX]; int singlestep; static const char *argv0; @@ -XXX,XX +XXX,XX @@ static int parse_args(int argc, char **argv) exit(EXIT_FAILURE); } - exec_path = argv[optind]; + exec_path = realpath(argv[optind], exec_path_store); + if (exec_path == NULL) { + exec_path = argv[optind]; + } return optind; } -- 2.21.1 (Apple Git-122.3)
It's problematic to return AT_EXECFD as it is because the user app would close it. This patch opens it via /proc/self/fd instead. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, if (is_proc_myself(pathname, "exe")) { int execfd = qemu_getauxval(AT_EXECFD); - return execfd ? execfd : safe_openat(dirfd, exec_path, flags, mode); + if (execfd) { + char filename[PATH_MAX]; + int ret; + + snprintf(filename, sizeof(filename), "/proc/self/fd/%d", execfd); + ret = safe_openat(dirfd, filename, flags, mode); + if (ret != -1) { + return ret; + } + } + return safe_openat(dirfd, exec_path, flags, mode); } for (fake_open = fakes; fake_open->filename; fake_open++) { -- 2.21.1 (Apple Git-122.3)
So that it can be used for other purposes (e.g. syscall.c) after the elf loader closed it. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/main.c | 8 ++++++++ linux-user/qemu.h | 2 ++ linux-user/syscall.c | 5 ++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -XXX,XX +XXX,XX @@ char *exec_path; char exec_path_store[PATH_MAX]; +int exec_fd = -1; int singlestep; static const char *argv0; @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) cpu->opaque = ts; task_settid(ts); + /* + * dup execfd to a global so that it can be used after loader_exec + * closes it. + */ + + exec_fd = dup(execfd); + ret = loader_exec(execfd, exec_path, target_argv, target_environ, regs, info, &bprm); if (ret != 0) { diff --git a/linux-user/qemu.h b/linux-user/qemu.h index XXXXXXX..XXXXXXX 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -XXX,XX +XXX,XX @@ typedef struct TaskState { } __attribute__((aligned(16))) TaskState; extern char *exec_path; +extern int exec_fd; + void init_task_state(TaskState *ts); void task_settid(TaskState *); void stop_all_tasks(void); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, }; if (is_proc_myself(pathname, "exe")) { - int execfd = qemu_getauxval(AT_EXECFD); - if (execfd) { + if (exec_fd != -1) { char filename[PATH_MAX]; int ret; - snprintf(filename, sizeof(filename), "/proc/self/fd/%d", execfd); + snprintf(filename, sizeof(filename), "/proc/self/fd/%d", exec_fd); ret = safe_openat(dirfd, filename, flags, mode); if (ret != -1) { return ret; -- 2.21.1 (Apple Git-122.3)
Used by runc. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ #include <sys/prctl.h> #include <sys/resource.h> #include <sys/swap.h> +#include <sys/syscall.h> #include <linux/capability.h> #include <sched.h> #include <sys/timex.h> @@ -XXX,XX +XXX,XX @@ static int host_to_target_cpu_mask(const unsigned long *host_mask, return 0; } +static int pivot_root(const char *new_root, const char *put_old) +{ + return syscall(SYS_pivot_root, new_root, put_old); +} + /* This is an internal helper for do_syscall so that it is easier * to have a single return point, so that actions, such as logging * of syscall results, can be performed. @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, return ret; #endif +#if defined(TARGET_NR_pivot_root) + case TARGET_NR_pivot_root: + { + void *p2; + p = lock_user_string(arg1); /* new_root */ + p2 = lock_user_string(arg2); /* put_old */ + if (!p || !p2) { + ret = -TARGET_EFAULT; + } else { + ret = get_errno(pivot_root(p, p2)); + } + unlock_user(p2, arg2, 0); + unlock_user(p, arg1, 0); + } + return ret; +#endif + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); return -TARGET_ENOSYS; -- 2.21.1 (Apple Git-122.3)
These patches allowed me to run arm64 and armv7 version of dind image on amd64. This patchset includes a few patches marked [!MERGE], which are not for the upsteam merge. They are included here just to show the context to reviewers. You can find my test setup here: https://github.com/yamt/garbage/tree/master/binfmt-aarch64-install YAMAMOTO Takashi (11): linux-user: handle /proc/self/exe for execve linux-user: Fix the execfd case of /proc/self/exe open linux-user: dup the execfd on start up linux-user: make exec_path realpath linux-user: Implement pivot_root linux-user: add get_exe_path linux-user: simplify is_proc_myself linux-user: Implement exec of /proc/$pid/exe of qemu process linux-user: Make the qemu detection for /proc/$pid/exe a bit conservative linux-user: a crude hack for libcontainer (CLONE_PARENT) [!MERGE] linux-user: always assume preserve_argv0 for now [!MERGE] linux-user/main.c | 57 ++++++++++++++- linux-user/qemu.h | 2 + linux-user/syscall.c | 171 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 219 insertions(+), 11 deletions(-) -- 2.21.1 (Apple Git-122.3)
It seems somehow common to execve /proc/self/exe in docker or golang community these days. At least, moby "reexec" and runc "libcontainer" do that. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, #endif case TARGET_NR_execve: { + const char *path; char **argp, **envp; int argc, envc; abi_ulong gp; @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, * before the execve completes and makes it the other * program's problem. */ - ret = get_errno(safe_execve(p, argp, envp)); + path = p; + if (is_proc_myself(path, "exe")) { + path = exec_path; + } + ret = get_errno(safe_execve(path, argp, envp)); unlock_user(p, arg1, 0); goto execve_end; -- 2.21.1 (Apple Git-122.3)
It's problematic to return AT_EXECFD as it is because the user app would close it. This patch opens it via /proc/self/fd instead. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, if (is_proc_myself(pathname, "exe")) { int execfd = qemu_getauxval(AT_EXECFD); - return execfd ? execfd : safe_openat(dirfd, exec_path, flags, mode); + if (execfd) { + char filename[PATH_MAX]; + int ret; + + snprintf(filename, sizeof(filename), "/proc/self/fd/%d", execfd); + ret = safe_openat(dirfd, filename, flags, mode); + if (ret != -1) { + return ret; + } + } + return safe_openat(dirfd, exec_path, flags, mode); } for (fake_open = fakes; fake_open->filename; fake_open++) { -- 2.21.1 (Apple Git-122.3)
So that it can be used for other purposes (e.g. syscall.c) after the elf loader closed it. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/main.c | 10 +++++++++- linux-user/qemu.h | 2 ++ linux-user/syscall.c | 5 ++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -XXX,XX +XXX,XX @@ #endif char *exec_path; +int exec_fd = -1; int singlestep; static const char *argv0; @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) * Manage binfmt-misc open-binary flag */ execfd = qemu_getauxval(AT_EXECFD); - if (execfd == 0) { + if (execfd > 0) { + /* + * dup execfd to a global so that it can be used after loader_exec + * closes it. + */ + + exec_fd = dup(execfd); + } else { execfd = open(exec_path, O_RDONLY); if (execfd < 0) { printf("Error while loading %s: %s\n", exec_path, strerror(errno)); diff --git a/linux-user/qemu.h b/linux-user/qemu.h index XXXXXXX..XXXXXXX 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -XXX,XX +XXX,XX @@ typedef struct TaskState { } __attribute__((aligned(16))) TaskState; extern char *exec_path; +extern int exec_fd; + void init_task_state(TaskState *ts); void task_settid(TaskState *); void stop_all_tasks(void); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, }; if (is_proc_myself(pathname, "exe")) { - int execfd = qemu_getauxval(AT_EXECFD); - if (execfd) { + if (exec_fd != -1) { char filename[PATH_MAX]; int ret; - snprintf(filename, sizeof(filename), "/proc/self/fd/%d", execfd); + snprintf(filename, sizeof(filename), "/proc/self/fd/%d", exec_fd); ret = safe_openat(dirfd, filename, flags, mode); if (ret != -1) { return ret; -- 2.21.1 (Apple Git-122.3)
Otherwise, it can be easily fooled by the user app using chdir(). Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/main.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/linux-user/main.c b/linux-user/main.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -XXX,XX +XXX,XX @@ #endif char *exec_path; +char exec_path_store[PATH_MAX]; int exec_fd = -1; int singlestep; @@ -XXX,XX +XXX,XX @@ static int parse_args(int argc, char **argv) exit(EXIT_FAILURE); } - exec_path = argv[optind]; + /* + * Try to get the realpath of the executable to avoid being + * fooled by chdir is the user app. + * + * Note: realpath here can fail for some use cases. + * For example, runc executes an unlinked binary via + * /proc/self/fd. + * It isn't fatal as far as we have an exec fd. + * (Otherwise, we will fail to load the binary. + */ + exec_path = realpath(argv[optind], exec_path_store); + if (exec_path == NULL) { + exec_path = argv[optind]; + } return optind; } -- 2.21.1 (Apple Git-122.3)
Used by runc. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int host_to_target_cpu_mask(const unsigned long *host_mask, return 0; } +#if defined(TARGET_NR_pivot_root) && defined(__NR_pivot_root) +_syscall2(int, pivot_root, const char *, new_root, const char *, put_old) +#endif + /* This is an internal helper for do_syscall so that it is easier * to have a single return point, so that actions, such as logging * of syscall results, can be performed. @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, return ret; #endif +#if defined(TARGET_NR_pivot_root) + case TARGET_NR_pivot_root: + { + void *p2; + p = lock_user_string(arg1); /* new_root */ + p2 = lock_user_string(arg2); /* put_old */ + if (!p || !p2) { + ret = -TARGET_EFAULT; + } else { + ret = get_errno(pivot_root(p, p2)); + } + unlock_user(p2, arg2, 0); + unlock_user(p, arg1, 0); + } + return ret; +#endif + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); return -TARGET_ENOSYS; -- 2.21.1 (Apple Git-122.3)
Refactor to prepare the special cases for /proc/$pid/exe where pid is not the calling process. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 48 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int open_self_auxv(void *cpu_env, int fd) return 0; } +static const char *get_exe_path(int pid, char *buf, size_t bufsize) +{ + if (pid == getpid()) { + return exec_path; + } + + return NULL; +} + +static int is_proc_file(const char *filename, int *pidp, const char *entry) +{ + if (!strncmp(filename, "/proc/", strlen("/proc/"))) { + int pid; + + filename += strlen("/proc/"); + if (!strncmp(filename, "self/", strlen("self/"))) { + pid = getpid(); + filename += strlen("self/"); + } else if (*filename >= '1' && *filename <= '9') { + pid = 0; + while (*filename >= '0' && *filename <= '9') { + pid = pid * 10 + *filename - '0'; + filename++; + } + if (*filename != '/') { + return 0; + } + filename++; + } else { + return 0; + } + if (!strcmp(filename, entry)) { + *pidp = pid; + return 1; + } + } + return 0; +} + static int is_proc_myself(const char *filename, const char *entry) { if (!strncmp(filename, "/proc/", strlen("/proc/"))) { @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, abi_ulong addr; char **q; int total_size = 0; + int pid; + char path_store[PATH_MAX]; argc = 0; guest_argp = arg2; @@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, * program's problem. */ path = p; - if (is_proc_myself(path, "exe")) { - path = exec_path; + if (is_proc_file(path, &pid, "exe")) { + path = get_exe_path(pid, path_store, sizeof(path_store)); + if (path == NULL) { + path = p; + } } ret = get_errno(safe_execve(path, argp, envp)); unlock_user(p, arg1, 0); -- 2.21.1 (Apple Git-122.3)
Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int is_proc_file(const char *filename, int *pidp, const char *entry) static int is_proc_myself(const char *filename, const char *entry) { - if (!strncmp(filename, "/proc/", strlen("/proc/"))) { - filename += strlen("/proc/"); - if (!strncmp(filename, "self/", strlen("self/"))) { - filename += strlen("self/"); - } else if (*filename >= '1' && *filename <= '9') { - char myself[80]; - snprintf(myself, sizeof(myself), "%d/", getpid()); - if (!strncmp(filename, myself, strlen(myself))) { - filename += strlen(myself); - } else { - return 0; - } - } else { - return 0; - } - if (!strcmp(filename, entry)) { - return 1; - } - } - return 0; + int pid; + + return is_proc_file(filename, &pid, entry) && pid == getpid(); } #if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN) || \ -- 2.21.1 (Apple Git-122.3)
dockerd makes runc invoke dockerd using /proc/$pid/exe. This commit makes it work when both of dockerd and runc are emulated by qemu linux-user. In that case, we (the qemu interpreting runc) need to invoke the real executable (dockerd), where /proc/$pid/exe in question is the qemu command interpreting dockerd. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int open_self_auxv(void *cpu_env, int fd) static const char *get_exe_path(int pid, char *buf, size_t bufsize) { + ssize_t ssz; + int fd; + if (pid == getpid()) { return exec_path; } + /* dockerd makes runc invoke dockerd using "/proc/${dockerd_pid}/exe". */ + snprintf(buf, bufsize, "/proc/%d/cmdline", pid); + fd = open(buf, O_RDONLY); + if (fd == -1) { + return NULL; + } + ssz = read(fd, buf, bufsize); + if (ssz != -1) { + const char *argv0; + const char *argv1; + const char *cp; + const char *ep; + const char *slash; + + cp = buf; + ep = cp + ssz; + + argv0 = cp; + while (*cp != 0) { + cp++; + if (cp >= ep) { + goto fail; + } + } + + cp++; + if (cp >= ep) { + goto fail; + } + + argv1 = cp; + while (*cp != 0) { + cp++; + if (cp >= ep) { + goto fail; + } + } + + /* + * XXX a bit too loose detection of qemu. + * maybe we can compare /proc/$pid/exe with ours. + */ + slash = strrchr(argv0, '/'); + if (slash != NULL) { + argv0 = slash + 1; /* basename */ + } + if (strncmp(argv0, "qemu-", sizeof("qemu-") - 1)) { + goto fail; + } + + close(fd); + return argv1; + } +fail: + close(fd); + return NULL; } -- 2.21.1 (Apple Git-122.3)
Perform the qemu special case only when the binary seems the same as our own executable. This is enough for my use case (docker and runc) where the involved qemu binaries are always for the same arch. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int open_self_auxv(void *cpu_env, int fd) return 0; } +static int is_same_qemu(const char *their_exe) +{ + struct stat our_st; + struct stat their_st; + + if (stat("/proc/self/exe", &our_st) != 0) { + return 0; + } + if (stat(their_exe, &their_st) != 0) { + return 0; + } + if (our_st.st_dev != their_st.st_dev) { + return 0; + } + if (our_st.st_ino != their_st.st_ino) { + return 0; + } + return 1; +} + static const char *get_exe_path(int pid, char *buf, size_t bufsize) { ssize_t ssz; @@ -XXX,XX +XXX,XX @@ static const char *get_exe_path(int pid, char *buf, size_t bufsize) } /* dockerd makes runc invoke dockerd using "/proc/${dockerd_pid}/exe". */ + + /* + * Check that it's the same qemu binary as ours + * to avoid false positives. + * + * While ideally we want to allow different qemu binaries, + * (E.g. linux-user for a different arch) + * I can't think of any reliable way to detect the cases. + */ + snprintf(buf, bufsize, "/proc/%d/exe", pid); + if (!is_same_qemu(buf)) { + return NULL; + } + snprintf(buf, bufsize, "/proc/%d/cmdline", pid); fd = open(buf, O_RDONLY); if (fd == -1) { @@ -XXX,XX +XXX,XX @@ static const char *get_exe_path(int pid, char *buf, size_t bufsize) } } - /* - * XXX a bit too loose detection of qemu. - * maybe we can compare /proc/$pid/exe with ours. - */ slash = strrchr(argv0, '/'); if (slash != NULL) { argv0 = slash + 1; /* basename */ -- 2.21.1 (Apple Git-122.3)
runc uses clone() with a combination of flags which we don't support. This commit works it around by ignoring CLONE_PARENT. [!MERGE] because this is just a crude hack for the very specific application. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/syscall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -XXX,XX +XXX,XX @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, pthread_mutex_destroy(&info.mutex); pthread_mutex_unlock(&clone_lock); } else { + flags &= ~CLONE_PARENT; /* XXX crude hack for libcontainer. */ + /* if no CLONE_VM, we consider it is a fork */ if (flags & CLONE_INVALID_FORK_FLAGS) { return -TARGET_EINVAL; -- 2.21.1 (Apple Git-122.3)
Just because the kernel I'm using is not new enough. [!MERGE] because this is specific to my environment and would break others. Signed-off-by: YAMAMOTO Takashi <yamamoto@midokura.com> --- linux-user/main.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/linux-user/main.c b/linux-user/main.c index XXXXXXX..XXXXXXX 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) /* * get binfmt_misc flags */ +#if 0 preserve_argv0 = !!(qemu_getauxval(AT_FLAGS) & AT_FLAGS_PRESERVE_ARGV0); +#else +/* + * my kernel doesn't have the following commit. this is a crude workaroud. + +commit 2347961b11d4079deace3c81dceed460c08a8fc1 +Author: Laurent Vivier <laurent@vivier.eu> +Date: Tue Jan 28 14:25:39 2020 +0100 + + binfmt_misc: pass binfmt_misc flags to the interpreter + + It can be useful to the interpreter to know which flags are in use. + + For instance, knowing if the preserve-argv[0] is in use would + allow to skip the pathname argument. + + This patch uses an unused auxiliary vector, AT_FLAGS, to add a + flag to inform interpreter if the preserve-argv[0] is enabled. + + Note by Helge Deller: + The real-world user of this patch is qemu-user, which needs to know + if it has to preserve the argv[0]. See Debian bug #970460. + + Signed-off-by: Laurent Vivier <laurent@vivier.eu> + Reviewed-by: YunQiang Su <ysu@wavecomp.com> + URL: http://bugs.debian.org/970460 + Signed-off-by: Helge Deller <deller@gmx.de> + + */ + preserve_argv0 = true; +#endif /* * Manage binfmt-misc preserve-arg[0] flag -- 2.21.1 (Apple Git-122.3)