linux-user/main.c | 22 ++++++++++++++++++++++ linux-user/syscall.c | 20 +++++++++++++++----- linux-user/user-internals.h | 4 ++++ 3 files changed, 41 insertions(+), 5 deletions(-)
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.
The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children. Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
linux-user/main.c | 22 ++++++++++++++++++++++
linux-user/syscall.c | 20 +++++++++++++++-----
linux-user/user-internals.h | 4 ++++
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..dfb303a1f2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
uintptr_t guest_base;
bool have_guest_base;
+bool qemu_dup_for_children;
+int qemu_argc;
+char ** qemu_argv;
+
/*
* Used to implement backwards-compatibility for the `-strace`, and
* QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
perf_enable_jitdump();
}
+static void handle_arg_qemu_children(const char *arg)
+{
+ qemu_dup_for_children = true;
+}
+
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
#ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
"", "Generate a /tmp/perf-${pid}.map file for perf"},
{"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
"", "Generate a jit-${pid}.dump file for perf"},
+ {"qemu-children",
+ "QEMU_CHILDREN", false, handle_arg_qemu_children,
+ "", "Run child processes (created with execve) with qemu "
+ "(as instantiated for the parent)"},
{NULL, NULL, false, NULL, NULL, NULL}
};
@@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
optind = parse_args(argc, argv);
+ if (qemu_dup_for_children) {
+ int i;
+ qemu_argc = optind;
+ qemu_argv = g_new0(char *, qemu_argc);
+ for (i = 0; i < optind; ++i) {
+ qemu_argv[i] = strdup(argv[i]);
+ }
+ }
+
qemu_set_log_filename_flags(last_log_filename,
last_log_mask | (enable_strace * LOG_STRACE),
&error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d5415674d..732ef89054 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
abi_long pathname, abi_long guest_argp,
abi_long guest_envp, int flags, bool is_execveat)
{
- int ret;
+ int ret, argp_offset;
char **argp, **envp;
int argc, envc;
abi_ulong gp;
abi_ulong addr;
char **q;
void *p;
+ bool through_qemu = !is_execveat && qemu_dup_for_children;
argc = 0;
@@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
envc++;
}
- argp = g_new0(char *, argc + 1);
+ argp_offset = through_qemu ? qemu_argc : 0;
+ argp = g_new0(char *, argc + argp_offset + 1);
envp = g_new0(char *, envc + 1);
- for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp)) {
goto execve_efault;
}
@@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
}
const char *exe = p;
- if (is_proc_myself(p, "exe")) {
+ if (through_qemu) {
+ int i;
+ for (i = 0; i < argp_offset; ++i) {
+ argp[i] = qemu_argv[i];
+ }
+ exe = qemu_argv[0];
+ }
+ else if (is_proc_myself(p, "exe")) {
exe = exec_path;
}
+
ret = is_execveat
? safe_execveat(dirfd, exe, argp, envp, flags)
: safe_execve(exe, argp, envp);
@@ -8553,7 +8563,7 @@ execve_efault:
ret = -TARGET_EFAULT;
execve_end:
- for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp) || !addr) {
break;
}
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 5c7f173ceb..0719e65ff4 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
extern const char *qemu_uname_release;
extern unsigned long mmap_min_addr;
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char ** qemu_argv;
+
typedef struct IOCTLEntry IOCTLEntry;
typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
--
2.34.1
Noah Goldstein <goldstein.w.n@gmail.com> writes: > The new option '-qemu-children' makes it so that on `execve` the child > process will be launch by the same `qemu` executable that is currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. Broadly I don't see anything wrong with the patch. However my principle concern is how test it. Currently nothing in check-tcg ensures this mechanism works and stresses the argument copying. > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > --- > linux-user/main.c | 22 ++++++++++++++++++++++ > linux-user/syscall.c | 20 +++++++++++++++----- > linux-user/user-internals.h | 4 ++++ > 3 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..dfb303a1f2 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char ** qemu_argv; > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > + "", "Run child processes (created with execve) with qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + int i; > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); > + } > + } > + > qemu_set_log_filename_flags(last_log_filename, > last_log_mask | (enable_strace * LOG_STRACE), > &error_fatal); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 9d5415674d..732ef89054 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > abi_long pathname, abi_long guest_argp, > abi_long guest_envp, int flags, bool is_execveat) > { > - int ret; > + int ret, argp_offset; > char **argp, **envp; > int argc, envc; > abi_ulong gp; > abi_ulong addr; > char **q; > void *p; > + bool through_qemu = !is_execveat && qemu_dup_for_children; > > argc = 0; > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > envc++; > } > > - argp = g_new0(char *, argc + 1); > + argp_offset = through_qemu ? qemu_argc : 0; > + argp = g_new0(char *, argc + argp_offset + 1); > envp = g_new0(char *, envc + 1); > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp)) { > goto execve_efault; > } > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } > + else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + > ret = is_execveat > ? safe_execveat(dirfd, exe, argp, envp, flags) > : safe_execve(exe, argp, envp); > @@ -8553,7 +8563,7 @@ execve_efault: > ret = -TARGET_EFAULT; > > execve_end: > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp) || !addr) { > break; > } > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > index 5c7f173ceb..0719e65ff4 100644 > --- a/linux-user/user-internals.h > +++ b/linux-user/user-internals.h > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > extern const char *qemu_uname_release; > extern unsigned long mmap_min_addr; > > +extern bool qemu_dup_for_children; > +extern int qemu_argc; > +extern char ** qemu_argv; > + > typedef struct IOCTLEntry IOCTLEntry; > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, -- Alex Bennée Virtualisation Tech Lead @ Linaro
On Tue, Oct 29, 2024 at 10:23 AM Alex Bennée <alex.bennee@linaro.org> wrote: > > Noah Goldstein <goldstein.w.n@gmail.com> writes: > > > The new option '-qemu-children' makes it so that on `execve` the child > > process will be launch by the same `qemu` executable that is currently > > running along with its current commandline arguments. > > > > The motivation for the change is to make it so that plugins running > > through `qemu` can continue to run on children. Why not just > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > emulation, and can sometimes be useful for elf files that can run > > natively. Enabling `binfmt` for all natively runnable elf files may > > not be desirable. > > Broadly I don't see anything wrong with the patch. However my principle > concern is how test it. Currently nothing in check-tcg ensures this > mechanism works and stresses the argument copying. Fair enough. I will add some tests. > > > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > --- > > linux-user/main.c | 22 ++++++++++++++++++++++ > > linux-user/syscall.c | 20 +++++++++++++++----- > > linux-user/user-internals.h | 4 ++++ > > 3 files changed, 41 insertions(+), 5 deletions(-) > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > index 8143a0d4b0..dfb303a1f2 100644 > > --- a/linux-user/main.c > > +++ b/linux-user/main.c > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > uintptr_t guest_base; > > bool have_guest_base; > > > > +bool qemu_dup_for_children; > > +int qemu_argc; > > +char ** qemu_argv; > > + > > /* > > * Used to implement backwards-compatibility for the `-strace`, and > > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > perf_enable_jitdump(); > > } > > > > +static void handle_arg_qemu_children(const char *arg) > > +{ > > + qemu_dup_for_children = true; > > +} > > + > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > #ifdef CONFIG_PLUGIN > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > "", "Generate a jit-${pid}.dump file for perf"}, > > + {"qemu-children", > > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > > + "", "Run child processes (created with execve) with qemu " > > + "(as instantiated for the parent)"}, > > {NULL, NULL, false, NULL, NULL, NULL} > > }; > > > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > > > optind = parse_args(argc, argv); > > > > + if (qemu_dup_for_children) { > > + int i; > > + qemu_argc = optind; > > + qemu_argv = g_new0(char *, qemu_argc); > > + for (i = 0; i < optind; ++i) { > > + qemu_argv[i] = strdup(argv[i]); > > + } > > + } > > + > > qemu_set_log_filename_flags(last_log_filename, > > last_log_mask | (enable_strace * LOG_STRACE), > > &error_fatal); > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > > index 9d5415674d..732ef89054 100644 > > --- a/linux-user/syscall.c > > +++ b/linux-user/syscall.c > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > abi_long pathname, abi_long guest_argp, > > abi_long guest_envp, int flags, bool is_execveat) > > { > > - int ret; > > + int ret, argp_offset; > > char **argp, **envp; > > int argc, envc; > > abi_ulong gp; > > abi_ulong addr; > > char **q; > > void *p; > > + bool through_qemu = !is_execveat && qemu_dup_for_children; > > > > argc = 0; > > > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > envc++; > > } > > > > - argp = g_new0(char *, argc + 1); > > + argp_offset = through_qemu ? qemu_argc : 0; > > + argp = g_new0(char *, argc + argp_offset + 1); > > envp = g_new0(char *, envc + 1); > > > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp)) { > > goto execve_efault; > > } > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > } > > > > const char *exe = p; > > - if (is_proc_myself(p, "exe")) { > > + if (through_qemu) { > > + int i; > > + for (i = 0; i < argp_offset; ++i) { > > + argp[i] = qemu_argv[i]; > > + } > > + exe = qemu_argv[0]; > > + } > > + else if (is_proc_myself(p, "exe")) { > > exe = exec_path; > > } > > + > > ret = is_execveat > > ? safe_execveat(dirfd, exe, argp, envp, flags) > > : safe_execve(exe, argp, envp); > > @@ -8553,7 +8563,7 @@ execve_efault: > > ret = -TARGET_EFAULT; > > > > execve_end: > > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp) || !addr) { > > break; > > } > > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > > index 5c7f173ceb..0719e65ff4 100644 > > --- a/linux-user/user-internals.h > > +++ b/linux-user/user-internals.h > > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > > extern const char *qemu_uname_release; > > extern unsigned long mmap_min_addr; > > > > +extern bool qemu_dup_for_children; > > +extern int qemu_argc; > > +extern char ** qemu_argv; > > + > > typedef struct IOCTLEntry IOCTLEntry; > > > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > > -- > Alex Bennée > Virtualisation Tech Lead @ Linaro
On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > The new option '-qemu-children' makes it so that on `execve` the > child > process will be launch by the same `qemu` executable that is > currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. Another reason to have this is that one may not have root permissions to configure binfmt-misc. There was a similar patch posted to the mailing list some years back, which I used to cherry-pick when I needed this. I'm not sure what happened to that discussion though. > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > --- > linux-user/main.c | 22 ++++++++++++++++++++++ > linux-user/syscall.c | 20 +++++++++++++++----- > linux-user/user-internals.h | 4 ++++ > 3 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..dfb303a1f2 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char ** qemu_argv; Style: ** belong to the variable name. There are a couple other issues, please check the output of git format-patch -1 --stdout | ./scripts/checkpatch.pl - > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be > overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = > { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, > handle_arg_qemu_children, > + "", "Run child processes (created with execve) with > qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + int i; I get the following build error: qemu/linux-user/main.c: In function ‘main’: qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a previous local [-Werror=shadow=compatible-local] 746 | int i; | ^ qemu/linux-user/main.c:699:9: note: shadowed declaration is here 699 | int i; | ^ I don't think this variable is needed at all. > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); > + } > + } > + > qemu_set_log_filename_flags(last_log_filename, > last_log_mask | (enable_strace * > LOG_STRACE), > &error_fatal); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 9d5415674d..732ef89054 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, > int dirfd, > abi_long pathname, abi_long guest_argp, > abi_long guest_envp, int flags, bool > is_execveat) > { > - int ret; > + int ret, argp_offset; > char **argp, **envp; > int argc, envc; > abi_ulong gp; > abi_ulong addr; > char **q; > void *p; > + bool through_qemu = !is_execveat && qemu_dup_for_children; Wouldn't it be better to check for dirfd == AT_FDCWD? > argc = 0; > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, > int dirfd, > envc++; > } > > - argp = g_new0(char *, argc + 1); > + argp_offset = through_qemu ? qemu_argc : 0; > + argp = g_new0(char *, argc + argp_offset + 1); > envp = g_new0(char *, envc + 1); > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), > q++) { > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += > sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp)) { > goto execve_efault; > } > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int > dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } > + else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + > ret = is_execveat > ? safe_execveat(dirfd, exe, argp, envp, flags) > : safe_execve(exe, argp, envp); > @@ -8553,7 +8563,7 @@ execve_efault: > ret = -TARGET_EFAULT; > > execve_end: > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), > q++) { > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += > sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp) || !addr) { > break; > } > diff --git a/linux-user/user-internals.h b/linux-user/user- > internals.h > index 5c7f173ceb..0719e65ff4 100644 > --- a/linux-user/user-internals.h > +++ b/linux-user/user-internals.h > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > extern const char *qemu_uname_release; > extern unsigned long mmap_min_addr; > > +extern bool qemu_dup_for_children; > +extern int qemu_argc; > +extern char ** qemu_argv; > + > typedef struct IOCTLEntry IOCTLEntry; > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t > *buf_temp,
Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: >> The new option '-qemu-children' makes it so that on `execve` the >> child >> process will be launch by the same `qemu` executable that is >> currently >> running along with its current commandline arguments. >> >> The motivation for the change is to make it so that plugins running >> through `qemu` can continue to run on children. Why not just >> `binfmt`?: Plugins can be desirable regardless of system/architecture >> emulation, and can sometimes be useful for elf files that can run >> natively. Enabling `binfmt` for all natively runnable elf files may >> not be desirable. > > Another reason to have this is that one may not have root permissions > to configure binfmt-misc. A little note on that: binfmt_misc is now part of the user namespace (since linux v6.7), so you can configure binfmt_misc as a non root user in a given namepace. There is helper to use it with unshare from util-linux, you can do things like that: With 'F' flag, load the interpreter from the initial namespace: $ /bin/qemu-m68k-static --version qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers $ unshare --map-root-user --fork --pid --load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu-m68k-static:OCF" --root=chroot/m68k/sid # QEMU_VERSION= ls qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers # /qemu-m68k --version qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers Without 'F' flag, from inside the namespace: $ unshare --map-root-user --fork --pid --load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-m68k:OC" --root=chroot/m68k/sid # QEMU_VERSION= ls qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers # /qemu-m68k --version qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers Thanks, Laurent
On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > The new option '-qemu-children' makes it so that on `execve` the > > > child > > > process will be launch by the same `qemu` executable that is > > > currently > > > running along with its current commandline arguments. > > > > > > The motivation for the change is to make it so that plugins > > > running > > > through `qemu` can continue to run on children. Why not just > > > `binfmt`?: Plugins can be desirable regardless of > > > system/architecture > > > emulation, and can sometimes be useful for elf files that can run > > > natively. Enabling `binfmt` for all natively runnable elf files > > > may > > > not be desirable. > > > > Another reason to have this is that one may not have root > > permissions > > to configure binfmt-misc. > > A little note on that: binfmt_misc is now part of the user namespace > (since linux v6.7), so you can > configure binfmt_misc as a non root user in a given namepace. > > There is helper to use it with unshare from util-linux, you can do > things like that: > > With 'F' flag, load the interpreter from the initial namespace: > > $ /bin/qemu-m68k-static --version > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > developers > $ unshare --map-root-user --fork --pid > --load-interp=":qemu- > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\ > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu > -m68k-static:OCF" > --root=chroot/m68k/sid > # QEMU_VERSION= ls > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > developers > # /qemu-m68k --version > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > developers > > Without 'F' flag, from inside the namespace: > > $ unshare --map-root-user --fork --pid > --load-interp=":qemu- > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\ > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu- > m68k:OC" > --root=chroot/m68k/sid > # QEMU_VERSION= ls > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > developers > # /qemu-m68k --version > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > developers > > Thanks, > Laurent > Thanks for posting this, I wasn't aware of this feature and it looks really useful. IIUC it also resolves the main problem this patch is dealing with: Enabling `binfmt` for all natively runnable elf files may not be desirable.
On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > The new option '-qemu-children' makes it so that on `execve` the > > > > child > > > > process will be launch by the same `qemu` executable that is > > > > currently > > > > running along with its current commandline arguments. > > > > > > > > The motivation for the change is to make it so that plugins > > > > running > > > > through `qemu` can continue to run on children. Why not just > > > > `binfmt`?: Plugins can be desirable regardless of > > > > system/architecture > > > > emulation, and can sometimes be useful for elf files that can run > > > > natively. Enabling `binfmt` for all natively runnable elf files > > > > may > > > > not be desirable. > > > > > > Another reason to have this is that one may not have root > > > permissions > > > to configure binfmt-misc. > > > > A little note on that: binfmt_misc is now part of the user namespace > > (since linux v6.7), so you can > > configure binfmt_misc as a non root user in a given namepace. > > > > There is helper to use it with unshare from util-linux, you can do > > things like that: > > > > With 'F' flag, load the interpreter from the initial namespace: > > > > $ /bin/qemu-m68k-static --version > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > developers > > $ unshare --map-root-user --fork --pid > > --load-interp=":qemu- > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\ > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu > > -m68k-static:OCF" > > --root=chroot/m68k/sid > > # QEMU_VERSION= ls > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > developers > > # /qemu-m68k --version > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > developers > > > > Without 'F' flag, from inside the namespace: > > > > $ unshare --map-root-user --fork --pid > > --load-interp=":qemu- > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\ > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu- > > m68k:OC" > > --root=chroot/m68k/sid > > # QEMU_VERSION= ls > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > developers > > # /qemu-m68k --version > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > developers > > > > Thanks, > > Laurent > > > > Thanks for posting this, I wasn't aware of this feature and it looks > really useful. > > IIUC it also resolves the main problem this patch is dealing with: I might misunderstand, but I don't think it does in the sense that it still might not be desirable to use the same qemu flags for the entire class of executables. I.e the original motivating case was wanting to attach some plugins to a process and its children and AFAICT binfmt still doesn't give that level of control. > > Enabling `binfmt` for all natively runnable elf files may > not be desirable.
On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > The new option '-qemu-children' makes it so that on `execve` > > > > > the > > > > > child > > > > > process will be launch by the same `qemu` executable that is > > > > > currently > > > > > running along with its current commandline arguments. > > > > > > > > > > The motivation for the change is to make it so that plugins > > > > > running > > > > > through `qemu` can continue to run on children. Why not just > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > system/architecture > > > > > emulation, and can sometimes be useful for elf files that can > > > > > run > > > > > natively. Enabling `binfmt` for all natively runnable elf > > > > > files > > > > > may > > > > > not be desirable. > > > > > > > > Another reason to have this is that one may not have root > > > > permissions > > > > to configure binfmt-misc. > > > > > > A little note on that: binfmt_misc is now part of the user > > > namespace > > > (since linux v6.7), so you can > > > configure binfmt_misc as a non root user in a given namepace. > > > > > > There is helper to use it with unshare from util-linux, you can > > > do > > > things like that: > > > > > > With 'F' flag, load the interpreter from the initial > > > namespace: > > > > > > $ /bin/qemu-m68k-static --version > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > > developers > > > $ unshare --map-root-user --fork --pid > > > --load-interp=":qemu- > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 > > > 0\\x > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ > > > x00\ > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/ > > > qemu > > > -m68k-static:OCF" > > > --root=chroot/m68k/sid > > > # QEMU_VERSION= ls > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > > developers > > > # /qemu-m68k --version > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > developers > > > > > > Without 'F' flag, from inside the namespace: > > > > > > $ unshare --map-root-user --fork --pid > > > --load-interp=":qemu- > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 > > > 0\\x > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ > > > x00\ > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu > > > - > > > m68k:OC" > > > --root=chroot/m68k/sid > > > # QEMU_VERSION= ls > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > developers > > > # /qemu-m68k --version > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > developers > > > > > > Thanks, > > > Laurent > > > > > > > Thanks for posting this, I wasn't aware of this feature and it > > looks > > really useful. > > > > IIUC it also resolves the main problem this patch is dealing with: > > I might misunderstand, but I don't think it does in the sense > that it still might not be desirable to use the same qemu flags > for the entire class of executables. > > I.e the original motivating case was wanting to attach > some plugins to a process and its children and AFAICT > binfmt still doesn't give that level of control. I think if you start a process in a user namespace, which has a binfmt_misc handler for a certain class of binaries, then this handler will affect only this process and its children, and not the rest of the system. > > Enabling `binfmt` for all natively runnable elf files may > > not be desirable.
Le 02/10/2024 à 16:53, Ilya Leoshkevich a écrit : > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: >> On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> >> wrote: >>> >>> On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: >>>> Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : >>>>> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: >>>>>> The new option '-qemu-children' makes it so that on `execve` >>>>>> the >>>>>> child >>>>>> process will be launch by the same `qemu` executable that is >>>>>> currently >>>>>> running along with its current commandline arguments. >>>>>> >>>>>> The motivation for the change is to make it so that plugins >>>>>> running >>>>>> through `qemu` can continue to run on children. Why not just >>>>>> `binfmt`?: Plugins can be desirable regardless of >>>>>> system/architecture >>>>>> emulation, and can sometimes be useful for elf files that can >>>>>> run >>>>>> natively. Enabling `binfmt` for all natively runnable elf >>>>>> files >>>>>> may >>>>>> not be desirable. >>>>> >>>>> Another reason to have this is that one may not have root >>>>> permissions >>>>> to configure binfmt-misc. >>>> >>>> A little note on that: binfmt_misc is now part of the user >>>> namespace >>>> (since linux v6.7), so you can >>>> configure binfmt_misc as a non root user in a given namepace. >>>> >>>> There is helper to use it with unshare from util-linux, you can >>>> do >>>> things like that: >>>> >>>> With 'F' flag, load the interpreter from the initial >>>> namespace: >>>> >>>> $ /bin/qemu-m68k-static --version >>>> qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) >>>> Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project >>>> developers >>>> $ unshare --map-root-user --fork --pid >>>> --load-interp=":qemu- >>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 >>>> 0\\x >>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ >>>> x00\ >>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/ >>>> qemu >>>> -m68k-static:OCF" >>>> --root=chroot/m68k/sid >>>> # QEMU_VERSION= ls >>>> qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) >>>> Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project >>>> developers >>>> # /qemu-m68k --version >>>> qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) >>>> Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project >>>> developers >>>> >>>> Without 'F' flag, from inside the namespace: >>>> >>>> $ unshare --map-root-user --fork --pid >>>> --load-interp=":qemu- >>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 >>>> 0\\x >>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ >>>> x00\ >>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu >>>> - >>>> m68k:OC" >>>> --root=chroot/m68k/sid >>>> # QEMU_VERSION= ls >>>> qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) >>>> Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project >>>> developers >>>> # /qemu-m68k --version >>>> qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) >>>> Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project >>>> developers >>>> >>>> Thanks, >>>> Laurent >>>> >>> >>> Thanks for posting this, I wasn't aware of this feature and it >>> looks >>> really useful. >>> >>> IIUC it also resolves the main problem this patch is dealing with: >> >> I might misunderstand, but I don't think it does in the sense >> that it still might not be desirable to use the same qemu flags >> for the entire class of executables. >> >> I.e the original motivating case was wanting to attach >> some plugins to a process and its children and AFAICT >> binfmt still doesn't give that level of control. > > I think if you start a process in a user namespace, which has a > binfmt_misc handler for a certain class of binaries, then this handler > will affect only this process and its children, and not the rest of the > system. > Yes, the binfmt_misc configuration is only available in the given namespace. Thanks, Laurent
On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> > > wrote: > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > > The new option '-qemu-children' makes it so that on `execve` > > > > > > the > > > > > > child > > > > > > process will be launch by the same `qemu` executable that is > > > > > > currently > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > The motivation for the change is to make it so that plugins > > > > > > running > > > > > > through `qemu` can continue to run on children. Why not just > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > system/architecture > > > > > > emulation, and can sometimes be useful for elf files that can > > > > > > run > > > > > > natively. Enabling `binfmt` for all natively runnable elf > > > > > > files > > > > > > may > > > > > > not be desirable. > > > > > > > > > > Another reason to have this is that one may not have root > > > > > permissions > > > > > to configure binfmt-misc. > > > > > > > > A little note on that: binfmt_misc is now part of the user > > > > namespace > > > > (since linux v6.7), so you can > > > > configure binfmt_misc as a non root user in a given namepace. > > > > > > > > There is helper to use it with unshare from util-linux, you can > > > > do > > > > things like that: > > > > > > > > With 'F' flag, load the interpreter from the initial > > > > namespace: > > > > > > > > $ /bin/qemu-m68k-static --version > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > > > developers > > > > $ unshare --map-root-user --fork --pid > > > > --load-interp=":qemu- > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 > > > > 0\\x > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ > > > > x00\ > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/ > > > > qemu > > > > -m68k-static:OCF" > > > > --root=chroot/m68k/sid > > > > # QEMU_VERSION= ls > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project > > > > developers > > > > # /qemu-m68k --version > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > > developers > > > > > > > > Without 'F' flag, from inside the namespace: > > > > > > > > $ unshare --map-root-user --fork --pid > > > > --load-interp=":qemu- > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0 > > > > 0\\x > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\ > > > > x00\ > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu > > > > - > > > > m68k:OC" > > > > --root=chroot/m68k/sid > > > > # QEMU_VERSION= ls > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > > developers > > > > # /qemu-m68k --version > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project > > > > developers > > > > > > > > Thanks, > > > > Laurent > > > > > > > > > > Thanks for posting this, I wasn't aware of this feature and it > > > looks > > > really useful. > > > > > > IIUC it also resolves the main problem this patch is dealing with: > > > > I might misunderstand, but I don't think it does in the sense > > that it still might not be desirable to use the same qemu flags > > for the entire class of executables. > > > > I.e the original motivating case was wanting to attach > > some plugins to a process and its children and AFAICT > > binfmt still doesn't give that level of control. > > I think if you start a process in a user namespace, which has a > binfmt_misc handler for a certain class of binaries, then this handler > will affect only this process and its children, and not the rest of the > system. It won't also affect other binaries in the user namespace? > > > > Enabling `binfmt` for all natively runnable elf files may > > > not be desirable. >
On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote: > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich > > > <iii@linux.ibm.com> > > > wrote: > > > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > > > The new option '-qemu-children' makes it so that on > > > > > > > `execve` > > > > > > > the > > > > > > > child > > > > > > > process will be launch by the same `qemu` executable that > > > > > > > is > > > > > > > currently > > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > > > The motivation for the change is to make it so that > > > > > > > plugins > > > > > > > running > > > > > > > through `qemu` can continue to run on children. Why not > > > > > > > just > > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > > system/architecture > > > > > > > emulation, and can sometimes be useful for elf files that > > > > > > > can > > > > > > > run > > > > > > > natively. Enabling `binfmt` for all natively runnable elf > > > > > > > files > > > > > > > may > > > > > > > not be desirable. > > > > > > > > > > > > Another reason to have this is that one may not have root > > > > > > permissions > > > > > > to configure binfmt-misc. > > > > > > > > > > A little note on that: binfmt_misc is now part of the user > > > > > namespace > > > > > (since linux v6.7), so you can > > > > > configure binfmt_misc as a non root user in a given namepace. > > > > > > > > > > There is helper to use it with unshare from util-linux, you > > > > > can > > > > > do > > > > > things like that: > > > > > > > > > > With 'F' flag, load the interpreter from the initial > > > > > namespace: > > > > > > > > > > $ /bin/qemu-m68k-static --version > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > Project > > > > > developers > > > > > $ unshare --map-root-user --fork --pid > > > > > --load-interp=":qemu- > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00 > > > > > \\x0 > > > > > 0\\x > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x > > > > > fe\\ > > > > > x00\ > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/ > > > > > bin/ > > > > > qemu > > > > > -m68k-static:OCF" > > > > > --root=chroot/m68k/sid > > > > > # QEMU_VERSION= ls > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > Project > > > > > developers > > > > > # /qemu-m68k --version > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > Project > > > > > developers > > > > > > > > > > Without 'F' flag, from inside the namespace: > > > > > > > > > > $ unshare --map-root-user --fork --pid > > > > > --load-interp=":qemu- > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00 > > > > > \\x0 > > > > > 0\\x > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x > > > > > fe\\ > > > > > x00\ > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/ > > > > > qemu > > > > > - > > > > > m68k:OC" > > > > > --root=chroot/m68k/sid > > > > > # QEMU_VERSION= ls > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > Project > > > > > developers > > > > > # /qemu-m68k --version > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > Project > > > > > developers > > > > > > > > > > Thanks, > > > > > Laurent > > > > > > > > > > > > > Thanks for posting this, I wasn't aware of this feature and it > > > > looks > > > > really useful. > > > > > > > > IIUC it also resolves the main problem this patch is dealing > > > > with: > > > > > > I might misunderstand, but I don't think it does in the sense > > > that it still might not be desirable to use the same qemu flags > > > for the entire class of executables. > > > > > > I.e the original motivating case was wanting to attach > > > some plugins to a process and its children and AFAICT > > > binfmt still doesn't give that level of control. > > > > I think if you start a process in a user namespace, which has a > > binfmt_misc handler for a certain class of binaries, then this > > handler > > will affect only this process and its children, and not the rest of > > the > > system. > > It won't also affect other binaries in the user namespace? It would, but you should be able to create a user namespace just for your program. It should also be possible to nest user namespaces. > > > > Enabling `binfmt` for all natively runnable elf files may > > > > not be desirable.
On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote: > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com> > > wrote: > > > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich > > > > <iii@linux.ibm.com> > > > > wrote: > > > > > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > > > > The new option '-qemu-children' makes it so that on > > > > > > > > `execve` > > > > > > > > the > > > > > > > > child > > > > > > > > process will be launch by the same `qemu` executable that > > > > > > > > is > > > > > > > > currently > > > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > > > > > The motivation for the change is to make it so that > > > > > > > > plugins > > > > > > > > running > > > > > > > > through `qemu` can continue to run on children. Why not > > > > > > > > just > > > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > > > system/architecture > > > > > > > > emulation, and can sometimes be useful for elf files that > > > > > > > > can > > > > > > > > run > > > > > > > > natively. Enabling `binfmt` for all natively runnable elf > > > > > > > > files > > > > > > > > may > > > > > > > > not be desirable. > > > > > > > > > > > > > > Another reason to have this is that one may not have root > > > > > > > permissions > > > > > > > to configure binfmt-misc. > > > > > > > > > > > > A little note on that: binfmt_misc is now part of the user > > > > > > namespace > > > > > > (since linux v6.7), so you can > > > > > > configure binfmt_misc as a non root user in a given namepace. > > > > > > > > > > > > There is helper to use it with unshare from util-linux, you > > > > > > can > > > > > > do > > > > > > things like that: > > > > > > > > > > > > With 'F' flag, load the interpreter from the initial > > > > > > namespace: > > > > > > > > > > > > $ /bin/qemu-m68k-static --version > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > Project > > > > > > developers > > > > > > $ unshare --map-root-user --fork --pid > > > > > > --load-interp=":qemu- > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00 > > > > > > \\x0 > > > > > > 0\\x > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x > > > > > > fe\\ > > > > > > x00\ > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/ > > > > > > bin/ > > > > > > qemu > > > > > > -m68k-static:OCF" > > > > > > --root=chroot/m68k/sid > > > > > > # QEMU_VERSION= ls > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > Project > > > > > > developers > > > > > > # /qemu-m68k --version > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > Project > > > > > > developers > > > > > > > > > > > > Without 'F' flag, from inside the namespace: > > > > > > > > > > > > $ unshare --map-root-user --fork --pid > > > > > > --load-interp=":qemu- > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00 > > > > > > \\x0 > > > > > > 0\\x > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x > > > > > > fe\\ > > > > > > x00\ > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/ > > > > > > qemu > > > > > > - > > > > > > m68k:OC" > > > > > > --root=chroot/m68k/sid > > > > > > # QEMU_VERSION= ls > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > Project > > > > > > developers > > > > > > # /qemu-m68k --version > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > Project > > > > > > developers > > > > > > > > > > > > Thanks, > > > > > > Laurent > > > > > > > > > > > > > > > > Thanks for posting this, I wasn't aware of this feature and it > > > > > looks > > > > > really useful. > > > > > > > > > > IIUC it also resolves the main problem this patch is dealing > > > > > with: > > > > > > > > I might misunderstand, but I don't think it does in the sense > > > > that it still might not be desirable to use the same qemu flags > > > > for the entire class of executables. > > > > > > > > I.e the original motivating case was wanting to attach > > > > some plugins to a process and its children and AFAICT > > > > binfmt still doesn't give that level of control. > > > > > > I think if you start a process in a user namespace, which has a > > > binfmt_misc handler for a certain class of binaries, then this > > > handler > > > will affect only this process and its children, and not the rest of > > > the > > > system. > > > > It won't also affect other binaries in the user namespace? > > It would, but you should be able to create a user namespace just > for your program. It should also be possible to nest user namespaces. Okay fair enough. Still pro this patch as an easier means but guess it loses any necessity. To be clear, are you rejecting? > > > > > > Enabling `binfmt` for all natively runnable elf files may > > > > > not be desirable. >
On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote: > On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > > > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote: > > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich > > > <iii@linux.ibm.com> > > > wrote: > > > > > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich > > > > > <iii@linux.ibm.com> > > > > > wrote: > > > > > > > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein > > > > > > > > wrote: > > > > > > > > > The new option '-qemu-children' makes it so that on > > > > > > > > > `execve` > > > > > > > > > the > > > > > > > > > child > > > > > > > > > process will be launch by the same `qemu` executable > > > > > > > > > that > > > > > > > > > is > > > > > > > > > currently > > > > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > > > > > > > The motivation for the change is to make it so that > > > > > > > > > plugins > > > > > > > > > running > > > > > > > > > through `qemu` can continue to run on children. Why > > > > > > > > > not > > > > > > > > > just > > > > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > > > > system/architecture > > > > > > > > > emulation, and can sometimes be useful for elf files > > > > > > > > > that > > > > > > > > > can > > > > > > > > > run > > > > > > > > > natively. Enabling `binfmt` for all natively runnable > > > > > > > > > elf > > > > > > > > > files > > > > > > > > > may > > > > > > > > > not be desirable. > > > > > > > > > > > > > > > > Another reason to have this is that one may not have > > > > > > > > root > > > > > > > > permissions > > > > > > > > to configure binfmt-misc. > > > > > > > > > > > > > > A little note on that: binfmt_misc is now part of the > > > > > > > user > > > > > > > namespace > > > > > > > (since linux v6.7), so you can > > > > > > > configure binfmt_misc as a non root user in a given > > > > > > > namepace. > > > > > > > > > > > > > > There is helper to use it with unshare from util-linux, > > > > > > > you > > > > > > > can > > > > > > > do > > > > > > > things like that: > > > > > > > > > > > > > > With 'F' flag, load the interpreter from the initial > > > > > > > namespace: > > > > > > > > > > > > > > $ /bin/qemu-m68k-static --version > > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > > Project > > > > > > > developers > > > > > > > $ unshare --map-root-user --fork --pid > > > > > > > --load-interp=":qemu- > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\ > > > > > > > \x00 > > > > > > > \\x0 > > > > > > > 0\\x > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf > > > > > > > f\\x > > > > > > > fe\\ > > > > > > > x00\ > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x > > > > > > > ff:/ > > > > > > > bin/ > > > > > > > qemu > > > > > > > -m68k-static:OCF" > > > > > > > --root=chroot/m68k/sid > > > > > > > # QEMU_VERSION= ls > > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > > Project > > > > > > > developers > > > > > > > # /qemu-m68k --version > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > Project > > > > > > > developers > > > > > > > > > > > > > > Without 'F' flag, from inside the namespace: > > > > > > > > > > > > > > $ unshare --map-root-user --fork --pid > > > > > > > --load-interp=":qemu- > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\ > > > > > > > \x00 > > > > > > > \\x0 > > > > > > > 0\\x > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf > > > > > > > f\\x > > > > > > > fe\\ > > > > > > > x00\ > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x > > > > > > > ff:/ > > > > > > > qemu > > > > > > > - > > > > > > > m68k:OC" > > > > > > > --root=chroot/m68k/sid > > > > > > > # QEMU_VERSION= ls > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > Project > > > > > > > developers > > > > > > > # /qemu-m68k --version > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > Project > > > > > > > developers > > > > > > > > > > > > > > Thanks, > > > > > > > Laurent > > > > > > > > > > > > > > > > > > > Thanks for posting this, I wasn't aware of this feature and > > > > > > it > > > > > > looks > > > > > > really useful. > > > > > > > > > > > > IIUC it also resolves the main problem this patch is > > > > > > dealing > > > > > > with: > > > > > > > > > > I might misunderstand, but I don't think it does in the sense > > > > > that it still might not be desirable to use the same qemu > > > > > flags > > > > > for the entire class of executables. > > > > > > > > > > I.e the original motivating case was wanting to attach > > > > > some plugins to a process and its children and AFAICT > > > > > binfmt still doesn't give that level of control. > > > > > > > > I think if you start a process in a user namespace, which has a > > > > binfmt_misc handler for a certain class of binaries, then this > > > > handler > > > > will affect only this process and its children, and not the > > > > rest of > > > > the > > > > system. > > > > > > It won't also affect other binaries in the user namespace? > > > > It would, but you should be able to create a user namespace just > > for your program. It should also be possible to nest user > > namespaces. > > Okay fair enough. Still pro this patch as an easier means > but guess it loses any necessity. > > To be clear, are you rejecting? No, personally I still find it useful, because the systems with old kernels will be around for quite some time. > > > > > > Enabling `binfmt` for all natively runnable elf files may > > > > > > not be desirable.
On Wed, Oct 2, 2024 at 11:35 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote: > > On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com> > > wrote: > > > > > > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote: > > > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich > > > > <iii@linux.ibm.com> > > > > wrote: > > > > > > > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote: > > > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich > > > > > > <iii@linux.ibm.com> > > > > > > wrote: > > > > > > > > > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote: > > > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit : > > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein > > > > > > > > > wrote: > > > > > > > > > > The new option '-qemu-children' makes it so that on > > > > > > > > > > `execve` > > > > > > > > > > the > > > > > > > > > > child > > > > > > > > > > process will be launch by the same `qemu` executable > > > > > > > > > > that > > > > > > > > > > is > > > > > > > > > > currently > > > > > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > > > > > > > > > The motivation for the change is to make it so that > > > > > > > > > > plugins > > > > > > > > > > running > > > > > > > > > > through `qemu` can continue to run on children. Why > > > > > > > > > > not > > > > > > > > > > just > > > > > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > > > > > system/architecture > > > > > > > > > > emulation, and can sometimes be useful for elf files > > > > > > > > > > that > > > > > > > > > > can > > > > > > > > > > run > > > > > > > > > > natively. Enabling `binfmt` for all natively runnable > > > > > > > > > > elf > > > > > > > > > > files > > > > > > > > > > may > > > > > > > > > > not be desirable. > > > > > > > > > > > > > > > > > > Another reason to have this is that one may not have > > > > > > > > > root > > > > > > > > > permissions > > > > > > > > > to configure binfmt-misc. > > > > > > > > > > > > > > > > A little note on that: binfmt_misc is now part of the > > > > > > > > user > > > > > > > > namespace > > > > > > > > (since linux v6.7), so you can > > > > > > > > configure binfmt_misc as a non root user in a given > > > > > > > > namepace. > > > > > > > > > > > > > > > > There is helper to use it with unshare from util-linux, > > > > > > > > you > > > > > > > > can > > > > > > > > do > > > > > > > > things like that: > > > > > > > > > > > > > > > > With 'F' flag, load the interpreter from the initial > > > > > > > > namespace: > > > > > > > > > > > > > > > > $ /bin/qemu-m68k-static --version > > > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > > > Project > > > > > > > > developers > > > > > > > > $ unshare --map-root-user --fork --pid > > > > > > > > --load-interp=":qemu- > > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\ > > > > > > > > \x00 > > > > > > > > \\x0 > > > > > > > > 0\\x > > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf > > > > > > > > f\\x > > > > > > > > fe\\ > > > > > > > > x00\ > > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x > > > > > > > > ff:/ > > > > > > > > bin/ > > > > > > > > qemu > > > > > > > > -m68k-static:OCF" > > > > > > > > --root=chroot/m68k/sid > > > > > > > > # QEMU_VERSION= ls > > > > > > > > qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40) > > > > > > > > Copyright (c) 2003-2023 Fabrice Bellard and the QEMU > > > > > > > > Project > > > > > > > > developers > > > > > > > > # /qemu-m68k --version > > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > > Project > > > > > > > > developers > > > > > > > > > > > > > > > > Without 'F' flag, from inside the namespace: > > > > > > > > > > > > > > > > $ unshare --map-root-user --fork --pid > > > > > > > > --load-interp=":qemu- > > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\ > > > > > > > > \x00 > > > > > > > > \\x0 > > > > > > > > 0\\x > > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf > > > > > > > > f\\x > > > > > > > > fe\\ > > > > > > > > x00\ > > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x > > > > > > > > ff:/ > > > > > > > > qemu > > > > > > > > - > > > > > > > > m68k:OC" > > > > > > > > --root=chroot/m68k/sid > > > > > > > > # QEMU_VERSION= ls > > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > > Project > > > > > > > > developers > > > > > > > > # /qemu-m68k --version > > > > > > > > qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95) > > > > > > > > Copyright (c) 2003-2022 Fabrice Bellard and the QEMU > > > > > > > > Project > > > > > > > > developers > > > > > > > > > > > > > > > > Thanks, > > > > > > > > Laurent > > > > > > > > > > > > > > > > > > > > > > Thanks for posting this, I wasn't aware of this feature and > > > > > > > it > > > > > > > looks > > > > > > > really useful. > > > > > > > > > > > > > > IIUC it also resolves the main problem this patch is > > > > > > > dealing > > > > > > > with: > > > > > > > > > > > > I might misunderstand, but I don't think it does in the sense > > > > > > that it still might not be desirable to use the same qemu > > > > > > flags > > > > > > for the entire class of executables. > > > > > > > > > > > > I.e the original motivating case was wanting to attach > > > > > > some plugins to a process and its children and AFAICT > > > > > > binfmt still doesn't give that level of control. > > > > > > > > > > I think if you start a process in a user namespace, which has a > > > > > binfmt_misc handler for a certain class of binaries, then this > > > > > handler > > > > > will affect only this process and its children, and not the > > > > > rest of > > > > > the > > > > > system. > > > > > > > > It won't also affect other binaries in the user namespace? > > > > > > It would, but you should be able to create a user namespace just > > > for your program. It should also be possible to nest user > > > namespaces. > > > > Okay fair enough. Still pro this patch as an easier means > > but guess it loses any necessity. > > > > To be clear, are you rejecting? > > No, personally I still find it useful, because the systems with old > kernels will be around for quite some time. :) > > > > > > > > Enabling `binfmt` for all natively runnable elf files may > > > > > > > not be desirable. >
On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > The new option '-qemu-children' makes it so that on `execve` the > > child > > process will be launch by the same `qemu` executable that is > > currently > > running along with its current commandline arguments. > > > > The motivation for the change is to make it so that plugins running > > through `qemu` can continue to run on children. Why not just > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > emulation, and can sometimes be useful for elf files that can run > > natively. Enabling `binfmt` for all natively runnable elf files may > > not be desirable. > > Another reason to have this is that one may not have root permissions > to configure binfmt-misc. > +1 > There was a similar patch posted to the mailing list some years back, > which I used to cherry-pick when I needed this. I'm not sure what > happened to that discussion though. Yes(ish): https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > --- > > linux-user/main.c | 22 ++++++++++++++++++++++ > > linux-user/syscall.c | 20 +++++++++++++++----- > > linux-user/user-internals.h | 4 ++++ > > 3 files changed, 41 insertions(+), 5 deletions(-) > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > index 8143a0d4b0..dfb303a1f2 100644 > > --- a/linux-user/main.c > > +++ b/linux-user/main.c > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > uintptr_t guest_base; > > bool have_guest_base; > > > > +bool qemu_dup_for_children; > > +int qemu_argc; > > +char ** qemu_argv; > > Style: ** belong to the variable name. > There are a couple other issues, please check the output of > > git format-patch -1 --stdout | ./scripts/checkpatch.pl - > Will Do. > > + > > /* > > * Used to implement backwards-compatibility for the `-strace`, and > > * QEMU_STRACE options. Without this, the QEMU_LOG can be > > overwritten by > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > perf_enable_jitdump(); > > } > > > > +static void handle_arg_qemu_children(const char *arg) > > +{ > > + qemu_dup_for_children = true; > > +} > > + > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > #ifdef CONFIG_PLUGIN > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = > > { > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > "", "Generate a jit-${pid}.dump file for perf"}, > > + {"qemu-children", > > + "QEMU_CHILDREN", false, > > handle_arg_qemu_children, > > + "", "Run child processes (created with execve) with > > qemu " > > + "(as instantiated for the parent)"}, > > {NULL, NULL, false, NULL, NULL, NULL} > > }; > > > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > > > optind = parse_args(argc, argv); > > > > + if (qemu_dup_for_children) { > > + int i; > > I get the following build error: > > qemu/linux-user/main.c: In function ‘main’: > qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a > previous local [-Werror=shadow=compatible-local] > 746 | int i; > | ^ > qemu/linux-user/main.c:699:9: note: shadowed declaration is here > 699 | int i; > | ^ > > I don't think this variable is needed at all. > Bah sorry. > > + qemu_argc = optind; > > + qemu_argv = g_new0(char *, qemu_argc); > > + for (i = 0; i < optind; ++i) { > > + qemu_argv[i] = strdup(argv[i]); > > + } > > + } > > + > > qemu_set_log_filename_flags(last_log_filename, > > last_log_mask | (enable_strace * > > LOG_STRACE), > > &error_fatal); > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > > index 9d5415674d..732ef89054 100644 > > --- a/linux-user/syscall.c > > +++ b/linux-user/syscall.c > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, > > int dirfd, > > abi_long pathname, abi_long guest_argp, > > abi_long guest_envp, int flags, bool > > is_execveat) > > { > > - int ret; > > + int ret, argp_offset; > > char **argp, **envp; > > int argc, envc; > > abi_ulong gp; > > abi_ulong addr; > > char **q; > > void *p; > > + bool through_qemu = !is_execveat && qemu_dup_for_children; > > Wouldn't it be better to check for dirfd == AT_FDCWD? Yeah that works. Ill fix in V2. > > > argc = 0; > > > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, > > int dirfd, > > envc++; > > } > > > > - argp = g_new0(char *, argc + 1); > > + argp_offset = through_qemu ? qemu_argc : 0; > > + argp = g_new0(char *, argc + argp_offset + 1); > > envp = g_new0(char *, envc + 1); > > > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), > > q++) { > > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += > > sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp)) { > > goto execve_efault; > > } > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int > > dirfd, > > } > > > > const char *exe = p; > > - if (is_proc_myself(p, "exe")) { > > + if (through_qemu) { > > + int i; > > + for (i = 0; i < argp_offset; ++i) { > > + argp[i] = qemu_argv[i]; > > + } > > + exe = qemu_argv[0]; > > + } > > + else if (is_proc_myself(p, "exe")) { > > exe = exec_path; > > } > > + > > ret = is_execveat > > ? safe_execveat(dirfd, exe, argp, envp, flags) > > : safe_execve(exe, argp, envp); > > @@ -8553,7 +8563,7 @@ execve_efault: > > ret = -TARGET_EFAULT; > > > > execve_end: > > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), > > q++) { > > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += > > sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp) || !addr) { > > break; > > } > > diff --git a/linux-user/user-internals.h b/linux-user/user- > > internals.h > > index 5c7f173ceb..0719e65ff4 100644 > > --- a/linux-user/user-internals.h > > +++ b/linux-user/user-internals.h > > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > > extern const char *qemu_uname_release; > > extern unsigned long mmap_min_addr; > > > > +extern bool qemu_dup_for_children; > > +extern int qemu_argc; > > +extern char ** qemu_argv; > > + > > typedef struct IOCTLEntry IOCTLEntry; > > > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t > > *buf_temp, >
On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote: > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > The new option '-qemu-children' makes it so that on `execve` the > > > child > > > process will be launch by the same `qemu` executable that is > > > currently > > > running along with its current commandline arguments. > > > > > > The motivation for the change is to make it so that plugins > > > running > > > through `qemu` can continue to run on children. Why not just > > > `binfmt`?: Plugins can be desirable regardless of > > > system/architecture > > > emulation, and can sometimes be useful for elf files that can run > > > natively. Enabling `binfmt` for all natively runnable elf files > > > may > > > not be desirable. > > > > Another reason to have this is that one may not have root > > permissions > > to configure binfmt-misc. > > > +1 > > > There was a similar patch posted to the mailing list some years > > back, > > which I used to cherry-pick when I needed this. I'm not sure what > > happened to that discussion though. > > Yes(ish): > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ Thanks for finding this! Don't we need the shebang handling here as well? Laurent, do you per chance know why was it not accepted back then?Unfortunately I cannot find any discussion associated with v3 or v4 [1]. There were some concerns regarding v1 [2], but from what I can see they all were addressed. [1] https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/ [2] https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote: > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> > > wrote: > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > The new option '-qemu-children' makes it so that on `execve` the > > > > child > > > > process will be launch by the same `qemu` executable that is > > > > currently > > > > running along with its current commandline arguments. > > > > > > > > The motivation for the change is to make it so that plugins > > > > running > > > > through `qemu` can continue to run on children. Why not just > > > > `binfmt`?: Plugins can be desirable regardless of > > > > system/architecture > > > > emulation, and can sometimes be useful for elf files that can run > > > > natively. Enabling `binfmt` for all natively runnable elf files > > > > may > > > > not be desirable. > > > > > > Another reason to have this is that one may not have root > > > permissions > > > to configure binfmt-misc. > > > > > +1 > > > > > There was a similar patch posted to the mailing list some years > > > back, > > > which I used to cherry-pick when I needed this. I'm not sure what > > > happened to that discussion though. > > > > Yes(ish): > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ > > Thanks for finding this! Don't we need the shebang handling here as > well? > I don't think so. In this case we aren't making it so execve can point to some arbitrary impl, just that we propagate the current running qemu env. > Laurent, do you per chance know why was it not accepted back > then?Unfortunately I cannot find any discussion associated with v3 or > v4 > [1]. There were some concerns regarding v1 [2], but from what I can see > they all were addressed. > > [1] > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/ > [2] > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote: > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> > > > wrote: > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > The new option '-qemu-children' makes it so that on `execve` the > > > > > child > > > > > process will be launch by the same `qemu` executable that is > > > > > currently > > > > > running along with its current commandline arguments. > > > > > > > > > > The motivation for the change is to make it so that plugins > > > > > running > > > > > through `qemu` can continue to run on children. Why not just > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > system/architecture > > > > > emulation, and can sometimes be useful for elf files that can run > > > > > natively. Enabling `binfmt` for all natively runnable elf files > > > > > may > > > > > not be desirable. > > > > > > > > Another reason to have this is that one may not have root > > > > permissions > > > > to configure binfmt-misc. > > > > > > > +1 > > > > > > > There was a similar patch posted to the mailing list some years > > > > back, > > > > which I used to cherry-pick when I needed this. I'm not sure what > > > > happened to that discussion though. > > > > > > Yes(ish): > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ > > > > Thanks for finding this! Don't we need the shebang handling here as > > well? > > > I don't think so. In this case we aren't making it so execve can point to > some arbitrary impl, just that we propagate the current running qemu > env. > ping > > Laurent, do you per chance know why was it not accepted back > > then?Unfortunately I cannot find any discussion associated with v3 or > > v4 > > [1]. There were some concerns regarding v1 [2], but from what I can see > > they all were addressed. > > > > [1] > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/ > > [2] > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > > > > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote: > > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> > > > > wrote: > > > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > > The new option '-qemu-children' makes it so that on `execve` the > > > > > > child > > > > > > process will be launch by the same `qemu` executable that is > > > > > > currently > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > The motivation for the change is to make it so that plugins > > > > > > running > > > > > > through `qemu` can continue to run on children. Why not just > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > system/architecture > > > > > > emulation, and can sometimes be useful for elf files that can run > > > > > > natively. Enabling `binfmt` for all natively runnable elf files > > > > > > may > > > > > > not be desirable. > > > > > > > > > > Another reason to have this is that one may not have root > > > > > permissions > > > > > to configure binfmt-misc. > > > > > > > > > +1 > > > > > > > > > There was a similar patch posted to the mailing list some years > > > > > back, > > > > > which I used to cherry-pick when I needed this. I'm not sure what > > > > > happened to that discussion though. > > > > > > > > Yes(ish): > > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ > > > > > > Thanks for finding this! Don't we need the shebang handling here as > > > well? > > > > > I don't think so. In this case we aren't making it so execve can point to > > some arbitrary impl, just that we propagate the current running qemu > > env. > > > > ping ping2 > > > Laurent, do you per chance know why was it not accepted back > > > then?Unfortunately I cannot find any discussion associated with v3 or > > > v4 > > > [1]. There were some concerns regarding v1 [2], but from what I can see > > > they all were addressed. > > > > > > [1] > > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/ > > > [2] > > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
On Tue, Oct 22, 2024 at 5:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > > > > > > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote: > > > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> > > > > > wrote: > > > > > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote: > > > > > > > The new option '-qemu-children' makes it so that on `execve` the > > > > > > > child > > > > > > > process will be launch by the same `qemu` executable that is > > > > > > > currently > > > > > > > running along with its current commandline arguments. > > > > > > > > > > > > > > The motivation for the change is to make it so that plugins > > > > > > > running > > > > > > > through `qemu` can continue to run on children. Why not just > > > > > > > `binfmt`?: Plugins can be desirable regardless of > > > > > > > system/architecture > > > > > > > emulation, and can sometimes be useful for elf files that can run > > > > > > > natively. Enabling `binfmt` for all natively runnable elf files > > > > > > > may > > > > > > > not be desirable. > > > > > > > > > > > > Another reason to have this is that one may not have root > > > > > > permissions > > > > > > to configure binfmt-misc. > > > > > > > > > > > +1 > > > > > > > > > > > There was a similar patch posted to the mailing list some years > > > > > > back, > > > > > > which I used to cherry-pick when I needed this. I'm not sure what > > > > > > happened to that discussion though. > > > > > > > > > > Yes(ish): > > > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/ > > > > > > > > Thanks for finding this! Don't we need the shebang handling here as > > > > well? > > > > > > > I don't think so. In this case we aren't making it so execve can point to > > > some arbitrary impl, just that we propagate the current running qemu > > > env. > > > > > > > ping > ping2 pint3 > > > > Laurent, do you per chance know why was it not accepted back > > > > then?Unfortunately I cannot find any discussion associated with v3 or > > > > v4 > > > > [1]. There were some concerns regarding v1 [2], but from what I can see > > > > they all were addressed. > > > > > > > > [1] > > > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/ > > > > [2] > > > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The new option '-qemu-children' makes it so that on `execve` the child > process will be launch by the same `qemu` executable that is currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > --- > linux-user/main.c | 22 ++++++++++++++++++++++ > linux-user/syscall.c | 20 +++++++++++++++----- > linux-user/user-internals.h | 4 ++++ > 3 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..dfb303a1f2 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char ** qemu_argv; > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > + "", "Run child processes (created with execve) with qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + int i; > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); > + } > + } > + > qemu_set_log_filename_flags(last_log_filename, > last_log_mask | (enable_strace * LOG_STRACE), > &error_fatal); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 9d5415674d..732ef89054 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > abi_long pathname, abi_long guest_argp, > abi_long guest_envp, int flags, bool is_execveat) > { > - int ret; > + int ret, argp_offset; > char **argp, **envp; > int argc, envc; > abi_ulong gp; > abi_ulong addr; > char **q; > void *p; > + bool through_qemu = !is_execveat && qemu_dup_for_children; > I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts it would be nice to support that as well. > argc = 0; > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > envc++; > } > > - argp = g_new0(char *, argc + 1); > + argp_offset = through_qemu ? qemu_argc : 0; > + argp = g_new0(char *, argc + argp_offset + 1); > envp = g_new0(char *, envc + 1); > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp)) { > goto execve_efault; > } > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } > + else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + > ret = is_execveat > ? safe_execveat(dirfd, exe, argp, envp, flags) > : safe_execve(exe, argp, envp); > @@ -8553,7 +8563,7 @@ execve_efault: > ret = -TARGET_EFAULT; > > execve_end: > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp) || !addr) { > break; > } > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > index 5c7f173ceb..0719e65ff4 100644 > --- a/linux-user/user-internals.h > +++ b/linux-user/user-internals.h > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > extern const char *qemu_uname_release; > extern unsigned long mmap_min_addr; > > +extern bool qemu_dup_for_children; > +extern int qemu_argc; > +extern char ** qemu_argv; > + > typedef struct IOCTLEntry IOCTLEntry; > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > -- > 2.34.1 >
On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > The new option '-qemu-children' makes it so that on `execve` the child > > process will be launch by the same `qemu` executable that is currently > > running along with its current commandline arguments. > > > > The motivation for the change is to make it so that plugins running > > through `qemu` can continue to run on children. Why not just > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > emulation, and can sometimes be useful for elf files that can run > > natively. Enabling `binfmt` for all natively runnable elf files may > > not be desirable. > > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > --- > > linux-user/main.c | 22 ++++++++++++++++++++++ > > linux-user/syscall.c | 20 +++++++++++++++----- > > linux-user/user-internals.h | 4 ++++ > > 3 files changed, 41 insertions(+), 5 deletions(-) > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > index 8143a0d4b0..dfb303a1f2 100644 > > --- a/linux-user/main.c > > +++ b/linux-user/main.c > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > uintptr_t guest_base; > > bool have_guest_base; > > > > +bool qemu_dup_for_children; > > +int qemu_argc; > > +char ** qemu_argv; > > + > > /* > > * Used to implement backwards-compatibility for the `-strace`, and > > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > perf_enable_jitdump(); > > } > > > > +static void handle_arg_qemu_children(const char *arg) > > +{ > > + qemu_dup_for_children = true; > > +} > > + > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > #ifdef CONFIG_PLUGIN > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > "", "Generate a jit-${pid}.dump file for perf"}, > > + {"qemu-children", > > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > > + "", "Run child processes (created with execve) with qemu " > > + "(as instantiated for the parent)"}, > > {NULL, NULL, false, NULL, NULL, NULL} > > }; > > > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > > > optind = parse_args(argc, argv); > > > > + if (qemu_dup_for_children) { > > + int i; > > + qemu_argc = optind; > > + qemu_argv = g_new0(char *, qemu_argc); > > + for (i = 0; i < optind; ++i) { > > + qemu_argv[i] = strdup(argv[i]); > > + } > > + } > > + > > qemu_set_log_filename_flags(last_log_filename, > > last_log_mask | (enable_strace * LOG_STRACE), > > &error_fatal); > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > > index 9d5415674d..732ef89054 100644 > > --- a/linux-user/syscall.c > > +++ b/linux-user/syscall.c > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > abi_long pathname, abi_long guest_argp, > > abi_long guest_envp, int flags, bool is_execveat) > > { > > - int ret; > > + int ret, argp_offset; > > char **argp, **envp; > > int argc, envc; > > abi_ulong gp; > > abi_ulong addr; > > char **q; > > void *p; > > + bool through_qemu = !is_execveat && qemu_dup_for_children; > > > I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts > it would be nice to support that as well. > > > argc = 0; > > > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > envc++; > > } > > > > - argp = g_new0(char *, argc + 1); > > + argp_offset = through_qemu ? qemu_argc : 0; > > + argp = g_new0(char *, argc + argp_offset + 1); > > envp = g_new0(char *, envc + 1); > > > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp)) { > > goto execve_efault; > > } > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > } > > > > const char *exe = p; > > - if (is_proc_myself(p, "exe")) { > > + if (through_qemu) { > > + int i; > > + for (i = 0; i < argp_offset; ++i) { > > + argp[i] = qemu_argv[i]; > > + } > > + exe = qemu_argv[0]; > > + } > > + else if (is_proc_myself(p, "exe")) { > > exe = exec_path; > > } > > + > > ret = is_execveat > > ? safe_execveat(dirfd, exe, argp, envp, flags) > > : safe_execve(exe, argp, envp); > > @@ -8553,7 +8563,7 @@ execve_efault: > > ret = -TARGET_EFAULT; > > > > execve_end: > > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) { > > if (get_user_ual(addr, gp) || !addr) { > > break; > > } > > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > > index 5c7f173ceb..0719e65ff4 100644 > > --- a/linux-user/user-internals.h > > +++ b/linux-user/user-internals.h > > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > > extern const char *qemu_uname_release; > > extern unsigned long mmap_min_addr; > > > > +extern bool qemu_dup_for_children; > > +extern int qemu_argc; > > +extern char ** qemu_argv; > > + > > typedef struct IOCTLEntry IOCTLEntry; > > > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > > -- > > 2.34.1 > > ping
On Tue, Sep 10, 2024 at 3:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > The new option '-qemu-children' makes it so that on `execve` the child > > > process will be launch by the same `qemu` executable that is currently > > > running along with its current commandline arguments. > > > > > > The motivation for the change is to make it so that plugins running > > > through `qemu` can continue to run on children. Why not just > > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > > emulation, and can sometimes be useful for elf files that can run > > > natively. Enabling `binfmt` for all natively runnable elf files may > > > not be desirable. > > > > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > > --- > > > linux-user/main.c | 22 ++++++++++++++++++++++ > > > linux-user/syscall.c | 20 +++++++++++++++----- > > > linux-user/user-internals.h | 4 ++++ > > > 3 files changed, 41 insertions(+), 5 deletions(-) > > > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > > index 8143a0d4b0..dfb303a1f2 100644 > > > --- a/linux-user/main.c > > > +++ b/linux-user/main.c > > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > > uintptr_t guest_base; > > > bool have_guest_base; > > > > > > +bool qemu_dup_for_children; > > > +int qemu_argc; > > > +char ** qemu_argv; > > > + > > > /* > > > * Used to implement backwards-compatibility for the `-strace`, and > > > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > > perf_enable_jitdump(); > > > } > > > > > > +static void handle_arg_qemu_children(const char *arg) > > > +{ > > > + qemu_dup_for_children = true; > > > +} > > > + > > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > > > #ifdef CONFIG_PLUGIN > > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > > "", "Generate a jit-${pid}.dump file for perf"}, > > > + {"qemu-children", > > > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > > > + "", "Run child processes (created with execve) with qemu " > > > + "(as instantiated for the parent)"}, > > > {NULL, NULL, false, NULL, NULL, NULL} > > > }; > > > > > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp) > > > > > > optind = parse_args(argc, argv); > > > > > > + if (qemu_dup_for_children) { > > > + int i; > > > + qemu_argc = optind; > > > + qemu_argv = g_new0(char *, qemu_argc); > > > + for (i = 0; i < optind; ++i) { > > > + qemu_argv[i] = strdup(argv[i]); > > > + } > > > + } > > > + > > > qemu_set_log_filename_flags(last_log_filename, > > > last_log_mask | (enable_strace * LOG_STRACE), > > > &error_fatal); > > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > > > index 9d5415674d..732ef89054 100644 > > > --- a/linux-user/syscall.c > > > +++ b/linux-user/syscall.c > > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > > abi_long pathname, abi_long guest_argp, > > > abi_long guest_envp, int flags, bool is_execveat) > > > { > > > - int ret; > > > + int ret, argp_offset; > > > char **argp, **envp; > > > int argc, envc; > > > abi_ulong gp; > > > abi_ulong addr; > > > char **q; > > > void *p; > > > + bool through_qemu = !is_execveat && qemu_dup_for_children; > > > > > I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts > > it would be nice to support that as well. > > > > > argc = 0; > > > > > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > > envc++; > > > } > > > > > > - argp = g_new0(char *, argc + 1); > > > + argp_offset = through_qemu ? qemu_argc : 0; > > > + argp = g_new0(char *, argc + argp_offset + 1); > > > envp = g_new0(char *, envc + 1); > > > > > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > > > + for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) { > > > if (get_user_ual(addr, gp)) { > > > goto execve_efault; > > > } > > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > > } > > > > > > const char *exe = p; > > > - if (is_proc_myself(p, "exe")) { > > > + if (through_qemu) { > > > + int i; > > > + for (i = 0; i < argp_offset; ++i) { > > > + argp[i] = qemu_argv[i]; > > > + } > > > + exe = qemu_argv[0]; > > > + } > > > + else if (is_proc_myself(p, "exe")) { > > > exe = exec_path; > > > } > > > + > > > ret = is_execveat > > > ? safe_execveat(dirfd, exe, argp, envp, flags) > > > : safe_execve(exe, argp, envp); > > > @@ -8553,7 +8563,7 @@ execve_efault: > > > ret = -TARGET_EFAULT; > > > > > > execve_end: > > > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > > > + for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) { > > > if (get_user_ual(addr, gp) || !addr) { > > > break; > > > } > > > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > > > index 5c7f173ceb..0719e65ff4 100644 > > > --- a/linux-user/user-internals.h > > > +++ b/linux-user/user-internals.h > > > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > > > extern const char *qemu_uname_release; > > > extern unsigned long mmap_min_addr; > > > > > > +extern bool qemu_dup_for_children; > > > +extern int qemu_argc; > > > +extern char ** qemu_argv; > > > + > > > typedef struct IOCTLEntry IOCTLEntry; > > > > > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > > > -- > > > 2.34.1 > > > > > ping ping
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.
The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children. Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
linux-user/main.c | 21 ++++++
linux-user/syscall.c | 21 ++++--
linux-user/user-internals.h | 4 ++
tests/tcg/multiarch/Makefile.target | 8 +++
.../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++
5 files changed, 117 insertions(+), 5 deletions(-)
create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c
diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..5e3d41dc2b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
uintptr_t guest_base;
bool have_guest_base;
+bool qemu_dup_for_children;
+int qemu_argc;
+char **qemu_argv;
+
/*
* Used to implement backwards-compatibility for the `-strace`, and
* QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
perf_enable_jitdump();
}
+static void handle_arg_qemu_children(const char *arg)
+{
+ qemu_dup_for_children = true;
+}
+
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
#ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
"", "Generate a /tmp/perf-${pid}.map file for perf"},
{"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
"", "Generate a jit-${pid}.dump file for perf"},
+ {"qemu-children",
+ "QEMU_CHILDREN", false, handle_arg_qemu_children,
+ "", "Run child processes (created with execve) with qemu "
+ "(as instantiated for the parent)"},
{NULL, NULL, false, NULL, NULL, NULL}
};
@@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
optind = parse_args(argc, argv);
+ if (qemu_dup_for_children) {
+ qemu_argc = optind;
+ qemu_argv = g_new0(char *, qemu_argc);
+ for (i = 0; i < optind; ++i) {
+ qemu_argv[i] = strdup(argv[i]);
+ }
+ }
+
qemu_set_log_filename_flags(last_log_filename,
last_log_mask | (enable_strace * LOG_STRACE),
&error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 59b2080b98..96b105e9ce 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
abi_long pathname, abi_long guest_argp,
abi_long guest_envp, int flags, bool is_execveat)
{
- int ret;
+ int ret, argp_offset;
char **argp, **envp;
int argc, envc;
abi_ulong gp;
abi_ulong addr;
char **q;
void *p;
+ bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
argc = 0;
@@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
envc++;
}
- argp = g_new0(char *, argc + 1);
+ argp_offset = through_qemu ? qemu_argc : 0;
+ argp = g_new0(char *, argc + argp_offset + 1);
envp = g_new0(char *, envc + 1);
- for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset;
+ gp; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp)) {
goto execve_efault;
}
@@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
}
const char *exe = p;
- if (is_proc_myself(p, "exe")) {
+ if (through_qemu) {
+ int i;
+ for (i = 0; i < argp_offset; ++i) {
+ argp[i] = qemu_argv[i];
+ }
+ exe = qemu_argv[0];
+ } else if (is_proc_myself(p, "exe")) {
exe = exec_path;
}
+
ret = is_execveat
? safe_execveat(dirfd, exe, argp, envp, flags)
: safe_execve(exe, argp, envp);
@@ -8644,7 +8654,8 @@ execve_efault:
ret = -TARGET_EFAULT;
execve_end:
- for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset;
+ *q; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp) || !addr) {
break;
}
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 46ffc093f4..ed3ed666a0 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
extern const char *qemu_uname_release;
extern unsigned long mmap_min_addr;
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char **qemu_argv;
+
typedef struct IOCTLEntry IOCTLEntry;
typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 78b83d5575..0e220953e7 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -30,6 +30,14 @@ run-float_%: float_%
$(call conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref)
+run-linux-execve-qemu-children: linux-execve-qemu-children
+ $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+ $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
+run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children
+ $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+ $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
testthread: LDFLAGS+=-lpthread
threadcount: LDFLAGS+=-lpthread
diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
new file mode 100644
index 0000000000..60d6537666
--- /dev/null
+++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
@@ -0,0 +1,68 @@
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MAX_COMM_SIZE (4096)
+
+int
+main(int argc, char ** argv, char ** envp) {
+ int fd;
+ char next_arg[2];
+ char * buf;
+ ssize_t off;
+ const char * expec_comm;
+ assert(argc == 3 || argc == 4);
+ fd = open("/proc/self/comm", O_RDONLY);
+ assert(fd > 0);
+
+ buf = calloc(MAX_COMM_SIZE + 1, 1);
+ assert(buf != NULL);
+
+ off = 0;
+ for (;;) {
+ ssize_t res = read(fd, buf + off, 1);
+ if (res < 0 && errno != EAGAIN) {
+ perror("Failed to read comm");
+ return -1;
+ }
+ if (res == 0) {
+ break;
+ }
+
+ off += res;
+
+ if (off >= MAX_COMM_SIZE) {
+ fprintf(stderr, "/proc/self/comm too large for test\n");
+ return -1;
+ }
+ }
+ assert(off && buf[off] == '\0' && buf[off - 1] == '\n');
+ buf[off - 1] = '\0';
+ expec_comm = basename(argv[1]);
+ if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) {
+ fprintf(stderr,
+ "Didn't propagate qemu settings\nComm: '%s'\nExpec: '%s'\n",
+ buf, expec_comm);
+ return -1;
+ }
+ free(buf);
+ next_arg[0] = argv[2][0];
+ next_arg[1] = '\0';
+ if (next_arg[0] == '9') {
+ return 0;
+ }
+ next_arg[0] += 1;
+ char * next_args[] = { argv[0], argv[1], next_arg, NULL };
+ int eres = execve(argv[0], &next_args[0], envp);
+ if (eres != 0) {
+ fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
--
2.43.0
On 10/30/24 14:10, Noah Goldstein wrote: > The new option '-qemu-children' makes it so that on `execve` the child > process will be launch by the same `qemu` executable that is currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > --- > linux-user/main.c | 21 ++++++ > linux-user/syscall.c | 21 ++++-- > linux-user/user-internals.h | 4 ++ > tests/tcg/multiarch/Makefile.target | 8 +++ > .../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++ > 5 files changed, 117 insertions(+), 5 deletions(-) > create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..5e3d41dc2b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char **qemu_argv; > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > + "", "Run child processes (created with execve) with qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); g_strdup. > + bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children; Why is this limited to AT_FDCWD? Why not for execvat too? > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + You still need to handle is_proc_myself, for the guest binary. I wonder if those two cases are related. Do we need to also add an argument so that we can pass the executable to the next qemu via file descriptor? I.e. execvat becomes f = openat() execv(qemu, "-execfd", f) and is_proc_myself uses execfd, which we already have open. r~
On Tue, Nov 5, 2024 at 5:37 AM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 10/30/24 14:10, Noah Goldstein wrote: > > The new option '-qemu-children' makes it so that on `execve` the child > > process will be launch by the same `qemu` executable that is currently > > running along with its current commandline arguments. > > > > The motivation for the change is to make it so that plugins running > > through `qemu` can continue to run on children. Why not just > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > emulation, and can sometimes be useful for elf files that can run > > natively. Enabling `binfmt` for all natively runnable elf files may > > not be desirable. > > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > --- > > linux-user/main.c | 21 ++++++ > > linux-user/syscall.c | 21 ++++-- > > linux-user/user-internals.h | 4 ++ > > tests/tcg/multiarch/Makefile.target | 8 +++ > > .../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++ > > 5 files changed, 117 insertions(+), 5 deletions(-) > > create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > index 8143a0d4b0..5e3d41dc2b 100644 > > --- a/linux-user/main.c > > +++ b/linux-user/main.c > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > uintptr_t guest_base; > > bool have_guest_base; > > > > +bool qemu_dup_for_children; > > +int qemu_argc; > > +char **qemu_argv; > > + > > /* > > * Used to implement backwards-compatibility for the `-strace`, and > > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > perf_enable_jitdump(); > > } > > > > +static void handle_arg_qemu_children(const char *arg) > > +{ > > + qemu_dup_for_children = true; > > +} > > + > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > #ifdef CONFIG_PLUGIN > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > "", "Generate a jit-${pid}.dump file for perf"}, > > + {"qemu-children", > > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > > + "", "Run child processes (created with execve) with qemu " > > + "(as instantiated for the parent)"}, > > {NULL, NULL, false, NULL, NULL, NULL} > > }; > > > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp) > > > > optind = parse_args(argc, argv); > > > > + if (qemu_dup_for_children) { > > + qemu_argc = optind; > > + qemu_argv = g_new0(char *, qemu_argc); > > + for (i = 0; i < optind; ++i) { > > + qemu_argv[i] = strdup(argv[i]); > > g_strdup. ack > > > + bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children; > > Why is this limited to AT_FDCWD? Why not for execvat too? > We could, initially it was because AFAICT qemu doesn't support executing a program relative to another dir, but it would be simple enough to just join the relative program path and path dirfd points to. Want me to add support? > > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > } > > > > const char *exe = p; > > - if (is_proc_myself(p, "exe")) { > > + if (through_qemu) { > > + int i; > > + for (i = 0; i < argp_offset; ++i) { > > + argp[i] = qemu_argv[i]; > > + } > > + exe = qemu_argv[0]; > > + } else if (is_proc_myself(p, "exe")) { > > exe = exec_path; > > } > > + > > You still need to handle is_proc_myself, for the guest binary. > > I wonder if those two cases are related. Do we need to also add an argument so that we > can pass the executable to the next qemu via file descriptor? I.e. execvat becomes > > f = openat() > execv(qemu, "-execfd", f) > > and is_proc_myself uses execfd, which we already have open. > > > r~
On Tue, Nov 5, 2024 at 5:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Tue, Nov 5, 2024 at 5:37 AM Richard Henderson > <richard.henderson@linaro.org> wrote: > > > > On 10/30/24 14:10, Noah Goldstein wrote: > > > The new option '-qemu-children' makes it so that on `execve` the child > > > process will be launch by the same `qemu` executable that is currently > > > running along with its current commandline arguments. > > > > > > The motivation for the change is to make it so that plugins running > > > through `qemu` can continue to run on children. Why not just > > > `binfmt`?: Plugins can be desirable regardless of system/architecture > > > emulation, and can sometimes be useful for elf files that can run > > > natively. Enabling `binfmt` for all natively runnable elf files may > > > not be desirable. > > > > > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > > > --- > > > linux-user/main.c | 21 ++++++ > > > linux-user/syscall.c | 21 ++++-- > > > linux-user/user-internals.h | 4 ++ > > > tests/tcg/multiarch/Makefile.target | 8 +++ > > > .../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++ > > > 5 files changed, 117 insertions(+), 5 deletions(-) > > > create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c > > > > > > diff --git a/linux-user/main.c b/linux-user/main.c > > > index 8143a0d4b0..5e3d41dc2b 100644 > > > --- a/linux-user/main.c > > > +++ b/linux-user/main.c > > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > > > uintptr_t guest_base; > > > bool have_guest_base; > > > > > > +bool qemu_dup_for_children; > > > +int qemu_argc; > > > +char **qemu_argv; > > > + > > > /* > > > * Used to implement backwards-compatibility for the `-strace`, and > > > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > > > perf_enable_jitdump(); > > > } > > > > > > +static void handle_arg_qemu_children(const char *arg) > > > +{ > > > + qemu_dup_for_children = true; > > > +} > > > + > > > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > > > > > #ifdef CONFIG_PLUGIN > > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > > > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > > > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > > > "", "Generate a jit-${pid}.dump file for perf"}, > > > + {"qemu-children", > > > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > > > + "", "Run child processes (created with execve) with qemu " > > > + "(as instantiated for the parent)"}, > > > {NULL, NULL, false, NULL, NULL, NULL} > > > }; > > > > > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp) > > > > > > optind = parse_args(argc, argv); > > > > > > + if (qemu_dup_for_children) { > > > + qemu_argc = optind; > > > + qemu_argv = g_new0(char *, qemu_argc); > > > + for (i = 0; i < optind; ++i) { > > > + qemu_argv[i] = strdup(argv[i]); > > > > g_strdup. > ack > > > > > + bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children; > > > > Why is this limited to AT_FDCWD? Why not for execvat too? > > > > We could, initially it was because AFAICT qemu doesn't support executing a > program relative to another dir, but it would be simple enough to just join > the relative program path and path dirfd points to. > > Want me to add support? > > > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > > > } > > > > > > const char *exe = p; > > > - if (is_proc_myself(p, "exe")) { > > > + if (through_qemu) { > > > + int i; > > > + for (i = 0; i < argp_offset; ++i) { > > > + argp[i] = qemu_argv[i]; > > > + } > > > + exe = qemu_argv[0]; > > > + } else if (is_proc_myself(p, "exe")) { > > > exe = exec_path; > > > } > > > + > > > > You still need to handle is_proc_myself, for the guest binary. Would this by handled by basically do: ``` if (is_proc_myself(p, "exe")) { exe = exec_path; if (through_qemu) argp[argp_offset] = exec_path; } ``` Or am I missing something? > > > > I wonder if those two cases are related. Do we need to also add an argument so that we > > can pass the executable to the next qemu via file descriptor? I.e. execvat becomes > > > > f = openat() > > execv(qemu, "-execfd", f) > > > > and is_proc_myself uses execfd, which we already have open. How does passing a fd from one process to another work? > > > > > > r~
On 11/5/24 23:54, Noah Goldstein wrote: >>> You still need to handle is_proc_myself, for the guest binary. > > Would this by handled by basically do: > > ``` > if (is_proc_myself(p, "exe")) { > exe = exec_path; > if (through_qemu) > argp[argp_offset] = exec_path; > } > ``` > Or am I missing something? Something like that, yes. >>> I wonder if those two cases are related. Do we need to also add an argument so that we >>> can pass the executable to the next qemu via file descriptor? I.e. execvat becomes >>> >>> f = openat() >>> execv(qemu, "-execfd", f) >>> >>> and is_proc_myself uses execfd, which we already have open. > > How does passing a fd from one process to another work? As long as the fd is not marked O_CLOEXEC, it stays open in the new process. Providing the number via command-line, or whatever, is sufficient for the new process to know what is going on. I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file descriptor. r~
On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/5/24 23:54, Noah Goldstein wrote: > >>> You still need to handle is_proc_myself, for the guest binary. > > > > Would this by handled by basically do: > > > > ``` > > if (is_proc_myself(p, "exe")) { > > exe = exec_path; > > if (through_qemu) > > argp[argp_offset] = exec_path; > > } > > ``` > > Or am I missing something? > > Something like that, yes. > > >>> I wonder if those two cases are related. Do we need to also add an argument so that we > >>> can pass the executable to the next qemu via file descriptor? I.e. execvat becomes > >>> > >>> f = openat() > >>> execv(qemu, "-execfd", f) > >>> > >>> and is_proc_myself uses execfd, which we already have open. > > > > How does passing a fd from one process to another work? > As long as the fd is not marked O_CLOEXEC, it stays open in the new process. Providing > the number via command-line, or whatever, is sufficient for the new process to know what > is going on. Err I guess I was thinking its a bit weird having an option that is only really applicable if qemu is a child process. I.e the `-execfd` argument is not usable from commandline. > > I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file > descriptor. We could also do something along the lines of: ``` fd = openat(dirfd, exe); char new_exe[PATH_MAX]; char fd_path[PATH_MAX]; sprintf(fd_path, "/proc/self/fd/%d", fd); readlink(fd_path, new_exe, PATH_MAX); exe = new_exe; ``` > > > r~
On 11/6/24 17:03, Noah Goldstein wrote: > On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson > <richard.henderson@linaro.org> wrote: >> >> On 11/5/24 23:54, Noah Goldstein wrote: >>>>> You still need to handle is_proc_myself, for the guest binary. >>> >>> Would this by handled by basically do: >>> >>> ``` >>> if (is_proc_myself(p, "exe")) { >>> exe = exec_path; >>> if (through_qemu) >>> argp[argp_offset] = exec_path; >>> } >>> ``` >>> Or am I missing something? >> >> Something like that, yes. >> >>>>> I wonder if those two cases are related. Do we need to also add an argument so that we >>>>> can pass the executable to the next qemu via file descriptor? I.e. execvat becomes >>>>> >>>>> f = openat() >>>>> execv(qemu, "-execfd", f) >>>>> >>>>> and is_proc_myself uses execfd, which we already have open. >>> >>> How does passing a fd from one process to another work? >> As long as the fd is not marked O_CLOEXEC, it stays open in the new process. Providing >> the number via command-line, or whatever, is sufficient for the new process to know what >> is going on. > > Err I guess I was thinking its a bit weird having an option that is > only really applicable > if qemu is a child process. I.e the `-execfd` argument is not usable > from commandline. qemu-foo -execfd 3 3< /some/file Or perhaps opened via other tooling. >> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file >> descriptor. > > We could also do something along the lines of: > > ``` > fd = openat(dirfd, exe); > char new_exe[PATH_MAX]; > char fd_path[PATH_MAX]; > sprintf(fd_path, "/proc/self/fd/%d", fd); > readlink(fd_path, new_exe, PATH_MAX); Reading the link doesn't always work. Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored. r~
On Wed, Nov 6, 2024 at 11:26 AM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/6/24 17:03, Noah Goldstein wrote: > > On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson > > <richard.henderson@linaro.org> wrote: > >> > >> On 11/5/24 23:54, Noah Goldstein wrote: > >>>>> You still need to handle is_proc_myself, for the guest binary. > >>> > >>> Would this by handled by basically do: > >>> > >>> ``` > >>> if (is_proc_myself(p, "exe")) { > >>> exe = exec_path; > >>> if (through_qemu) > >>> argp[argp_offset] = exec_path; > >>> } > >>> ``` > >>> Or am I missing something? > >> > >> Something like that, yes. > >> > >>>>> I wonder if those two cases are related. Do we need to also add an argument so that we > >>>>> can pass the executable to the next qemu via file descriptor? I.e. execvat becomes > >>>>> > >>>>> f = openat() > >>>>> execv(qemu, "-execfd", f) > >>>>> > >>>>> and is_proc_myself uses execfd, which we already have open. > >>> > >>> How does passing a fd from one process to another work? > >> As long as the fd is not marked O_CLOEXEC, it stays open in the new process. Providing > >> the number via command-line, or whatever, is sufficient for the new process to know what > >> is going on. > > > > Err I guess I was thinking its a bit weird having an option that is > > only really applicable > > if qemu is a child process. I.e the `-execfd` argument is not usable > > from commandline. > > qemu-foo -execfd 3 3< /some/file > > Or perhaps opened via other tooling. > > >> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file > >> descriptor. > > > > We could also do something along the lines of: > > > > ``` > > fd = openat(dirfd, exe); > > char new_exe[PATH_MAX]; > > char fd_path[PATH_MAX]; > > sprintf(fd_path, "/proc/self/fd/%d", fd); > > readlink(fd_path, new_exe, PATH_MAX); > > Reading the link doesn't always work. > Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored. Okay, fair enough, I will get started on adding `-execfd` > > > r~
On Wed, Nov 6, 2024 at 11:53 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Nov 6, 2024 at 11:26 AM Richard Henderson > <richard.henderson@linaro.org> wrote: > > > > On 11/6/24 17:03, Noah Goldstein wrote: > > > On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson > > > <richard.henderson@linaro.org> wrote: > > >> > > >> On 11/5/24 23:54, Noah Goldstein wrote: > > >>>>> You still need to handle is_proc_myself, for the guest binary. > > >>> > > >>> Would this by handled by basically do: > > >>> > > >>> ``` > > >>> if (is_proc_myself(p, "exe")) { > > >>> exe = exec_path; > > >>> if (through_qemu) > > >>> argp[argp_offset] = exec_path; > > >>> } > > >>> ``` > > >>> Or am I missing something? > > >> > > >> Something like that, yes. > > >> > > >>>>> I wonder if those two cases are related. Do we need to also add an argument so that we > > >>>>> can pass the executable to the next qemu via file descriptor? I.e. execvat becomes > > >>>>> > > >>>>> f = openat() > > >>>>> execv(qemu, "-execfd", f) > > >>>>> > > >>>>> and is_proc_myself uses execfd, which we already have open. > > >>> > > >>> How does passing a fd from one process to another work? > > >> As long as the fd is not marked O_CLOEXEC, it stays open in the new process. Providing > > >> the number via command-line, or whatever, is sufficient for the new process to know what > > >> is going on. > > > > > > Err I guess I was thinking its a bit weird having an option that is > > > only really applicable > > > if qemu is a child process. I.e the `-execfd` argument is not usable > > > from commandline. > > > > qemu-foo -execfd 3 3< /some/file > > > > Or perhaps opened via other tooling. > > > > >> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file > > >> descriptor. > > > > > > We could also do something along the lines of: > > > > > > ``` > > > fd = openat(dirfd, exe); > > > char new_exe[PATH_MAX]; > > > char fd_path[PATH_MAX]; > > > sprintf(fd_path, "/proc/self/fd/%d", fd); > > > readlink(fd_path, new_exe, PATH_MAX); > > > > Reading the link doesn't always work. > > Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored. > > Okay, fair enough, I will get started on adding `-execfd` Question about impl regarding handling of `-execfd` with/without a program name. 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. Do you want to allow both of these? If you want to allow (1), what should we use for `argv[0]`/`exec_path`. The program pass ("ls") or `readlink(<some_fd>)`?
On 11/6/24 18:13, Noah Goldstein wrote: > Question about impl regarding handling of `-execfd` with/without a program name. > > 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. > 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. > > Do you want to allow both of these? If you want to allow (1), what should > we use for `argv[0]`/`exec_path`. The program pass ("ls") or > `readlink(<some_fd>)`? The canonical response is, examine the kernel source. We're not implementing this in a vacuum, you're replicating execveat(2). I suspect the answer is (1), to be compared with syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH); r~
On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/6/24 18:13, Noah Goldstein wrote: > > Question about impl regarding handling of `-execfd` with/without a program name. > > > > 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. > > 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. > > > > Do you want to allow both of these? If you want to allow (1), what should > > we use for `argv[0]`/`exec_path`. The program pass ("ls") or > > `readlink(<some_fd>)`? > > The canonical response is, examine the kernel source. > We're not implementing this in a vacuum, you're replicating execveat(2). > > I suspect the answer is (1), to be compared with > > syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH); Err, I think the reference for '-execfd' is `fexecve`: https://man7.org/linux/man-pages/man3/fexecve.3.html Which doesn't take a path. So I guess we just interpret the "ls" as argv[0] but not as "exec_path". > > > r~
On 11/6/24 21:30, Noah Goldstein wrote: > On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson > <richard.henderson@linaro.org> wrote: >> >> On 11/6/24 18:13, Noah Goldstein wrote: >>> Question about impl regarding handling of `-execfd` with/without a program name. >>> >>> 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. >>> 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. >>> >>> Do you want to allow both of these? If you want to allow (1), what should >>> we use for `argv[0]`/`exec_path`. The program pass ("ls") or >>> `readlink(<some_fd>)`? >> >> The canonical response is, examine the kernel source. >> We're not implementing this in a vacuum, you're replicating execveat(2). >> >> I suspect the answer is (1), to be compared with >> >> syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH); > > Err, I think the reference for '-execfd' is `fexecve`: > https://man7.org/linux/man-pages/man3/fexecve.3.html No, fexecve(3) is a glibc function which (nowadays) uses the execveat(2) syscall that we want to emulate. > Which doesn't take a path... ... corresponding to the "" and AT_EMPTY_PATH above. > So I guess we just interpret the "ls" as argv[0] but not as "exec_path". But your conclusion is correct. r~
On Wed, Nov 6, 2024 at 3:30 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson > <richard.henderson@linaro.org> wrote: > > > > On 11/6/24 18:13, Noah Goldstein wrote: > > > Question about impl regarding handling of `-execfd` with/without a program name. > > > > > > 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. > > > 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. > > > > > > Do you want to allow both of these? If you want to allow (1), what should > > > we use for `argv[0]`/`exec_path`. The program pass ("ls") or > > > `readlink(<some_fd>)`? > > > > The canonical response is, examine the kernel source. > > We're not implementing this in a vacuum, you're replicating execveat(2). > > > > I suspect the answer is (1), to be compared with > > > > syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH); > > Err, I think the reference for '-execfd' is `fexecve`: > https://man7.org/linux/man-pages/man3/fexecve.3.html > > Which doesn't take a path. So I guess we just interpret the "ls" as > argv[0] but not > as "exec_path". One more point, what should the behavior be if we have AT_EXECFD from binfmt-misc? > > > > > > r~
On 11/6/24 23:49, Noah Goldstein wrote: > On Wed, Nov 6, 2024 at 3:30 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: >> >> On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson >> <richard.henderson@linaro.org> wrote: >>> >>> On 11/6/24 18:13, Noah Goldstein wrote: >>>> Question about impl regarding handling of `-execfd` with/without a program name. >>>> >>>> 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`. >>>> 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`. >>>> >>>> Do you want to allow both of these? If you want to allow (1), what should >>>> we use for `argv[0]`/`exec_path`. The program pass ("ls") or >>>> `readlink(<some_fd>)`? >>> >>> The canonical response is, examine the kernel source. >>> We're not implementing this in a vacuum, you're replicating execveat(2). >>> >>> I suspect the answer is (1), to be compared with >>> >>> syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH); >> >> Err, I think the reference for '-execfd' is `fexecve`: >> https://man7.org/linux/man-pages/man3/fexecve.3.html >> >> Which doesn't take a path. So I guess we just interpret the "ls" as >> argv[0] but not >> as "exec_path". > > One more point, what should the behavior be if we have > AT_EXECFD from binfmt-misc? You mean precedence of AT_EXECFD vs the command-line option? Arbitrary, since it would be a usage error to have both. You'd have to do something silly with the binfmt-misc rule for that to happen. Perhaps static int execfd = -1; // option processing // main if (execfd < 0) { errno = 0; execfd = qemu_getauxval(AT_EXECFD); if (errno != 0) { execfd = open(...); } } just because that's a simple change to what's currently present. r~
On Wed, Oct 30, 2024 at 9:10 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The new option '-qemu-children' makes it so that on `execve` the child > process will be launch by the same `qemu` executable that is currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> > --- > linux-user/main.c | 21 ++++++ > linux-user/syscall.c | 21 ++++-- > linux-user/user-internals.h | 4 ++ > tests/tcg/multiarch/Makefile.target | 8 +++ > .../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++ > 5 files changed, 117 insertions(+), 5 deletions(-) > create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..5e3d41dc2b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char **qemu_argv; > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > + "", "Run child processes (created with execve) with qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); > + } > + } > + > qemu_set_log_filename_flags(last_log_filename, > last_log_mask | (enable_strace * LOG_STRACE), > &error_fatal); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 59b2080b98..96b105e9ce 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > abi_long pathname, abi_long guest_argp, > abi_long guest_envp, int flags, bool is_execveat) > { > - int ret; > + int ret, argp_offset; > char **argp, **envp; > int argc, envc; > abi_ulong gp; > abi_ulong addr; > char **q; > void *p; > + bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children; > > argc = 0; > > @@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > envc++; > } > > - argp = g_new0(char *, argc + 1); > + argp_offset = through_qemu ? qemu_argc : 0; > + argp = g_new0(char *, argc + argp_offset + 1); > envp = g_new0(char *, envc + 1); > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; > + gp; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp)) { > goto execve_efault; > } > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + > ret = is_execveat > ? safe_execveat(dirfd, exe, argp, envp, flags) > : safe_execve(exe, argp, envp); > @@ -8644,7 +8654,8 @@ execve_efault: > ret = -TARGET_EFAULT; > > execve_end: > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; > + *q; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp) || !addr) { > break; > } > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > index 46ffc093f4..ed3ed666a0 100644 > --- a/linux-user/user-internals.h > +++ b/linux-user/user-internals.h > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > extern const char *qemu_uname_release; > extern unsigned long mmap_min_addr; > > +extern bool qemu_dup_for_children; > +extern int qemu_argc; > +extern char **qemu_argv; > + > typedef struct IOCTLEntry IOCTLEntry; > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target > index 78b83d5575..0e220953e7 100644 > --- a/tests/tcg/multiarch/Makefile.target > +++ b/tests/tcg/multiarch/Makefile.target > @@ -30,6 +30,14 @@ run-float_%: float_% > $(call conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref) > > > +run-linux-execve-qemu-children: linux-execve-qemu-children > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0) > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip) > + > +run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0) > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip) > + > testthread: LDFLAGS+=-lpthread > > threadcount: LDFLAGS+=-lpthread > diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c > new file mode 100644 > index 0000000000..60d6537666 > --- /dev/null > +++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c > @@ -0,0 +1,68 @@ > +#include <assert.h> > +#include <errno.h> > +#include <fcntl.h> > +#include <libgen.h> > +#include <malloc.h> > +#include <stdio.h> > +#include <string.h> > +#include <unistd.h> > + > +#define MAX_COMM_SIZE (4096) > + > +int > +main(int argc, char ** argv, char ** envp) { > + int fd; > + char next_arg[2]; > + char * buf; > + ssize_t off; > + const char * expec_comm; > + assert(argc == 3 || argc == 4); > + fd = open("/proc/self/comm", O_RDONLY); > + assert(fd > 0); > + > + buf = calloc(MAX_COMM_SIZE + 1, 1); > + assert(buf != NULL); > + > + off = 0; > + for (;;) { > + ssize_t res = read(fd, buf + off, 1); > + if (res < 0 && errno != EAGAIN) { > + perror("Failed to read comm"); > + return -1; > + } > + if (res == 0) { > + break; > + } > + > + off += res; > + > + if (off >= MAX_COMM_SIZE) { > + fprintf(stderr, "/proc/self/comm too large for test\n"); > + return -1; > + } > + } > + assert(off && buf[off] == '\0' && buf[off - 1] == '\n'); > + buf[off - 1] = '\0'; > + expec_comm = basename(argv[1]); > + if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) { > + fprintf(stderr, > + "Didn't propagate qemu settings\nComm: '%s'\nExpec: '%s'\n", > + buf, expec_comm); > + return -1; > + } > + free(buf); > + next_arg[0] = argv[2][0]; > + next_arg[1] = '\0'; > + if (next_arg[0] == '9') { > + return 0; > + } > + next_arg[0] += 1; > + char * next_args[] = { argv[0], argv[1], next_arg, NULL }; > + int eres = execve(argv[0], &next_args[0], envp); > + if (eres != 0) { > + fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno, > + strerror(errno)); > + return -1; > + } > + return 0; > +} > -- > 2.43.0 > Added test that tests both old behavior (no propagation of qemu) and new behavior (propagation of qemu + cmdline). Tested on Aarch64 + Linux with: ``` make check-tcg ```
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.
The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children. Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
linux-user/main.c | 21 +++++++++++++++++++++
linux-user/syscall.c | 21 ++++++++++++++++-----
linux-user/user-internals.h | 4 ++++
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..5e3d41dc2b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
uintptr_t guest_base;
bool have_guest_base;
+bool qemu_dup_for_children;
+int qemu_argc;
+char **qemu_argv;
+
/*
* Used to implement backwards-compatibility for the `-strace`, and
* QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
perf_enable_jitdump();
}
+static void handle_arg_qemu_children(const char *arg)
+{
+ qemu_dup_for_children = true;
+}
+
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
#ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
"", "Generate a /tmp/perf-${pid}.map file for perf"},
{"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
"", "Generate a jit-${pid}.dump file for perf"},
+ {"qemu-children",
+ "QEMU_CHILDREN", false, handle_arg_qemu_children,
+ "", "Run child processes (created with execve) with qemu "
+ "(as instantiated for the parent)"},
{NULL, NULL, false, NULL, NULL, NULL}
};
@@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
optind = parse_args(argc, argv);
+ if (qemu_dup_for_children) {
+ qemu_argc = optind;
+ qemu_argv = g_new0(char *, qemu_argc);
+ for (i = 0; i < optind; ++i) {
+ qemu_argv[i] = strdup(argv[i]);
+ }
+ }
+
qemu_set_log_filename_flags(last_log_filename,
last_log_mask | (enable_strace * LOG_STRACE),
&error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a666986189..79019c0db0 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8457,13 +8457,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
abi_long pathname, abi_long guest_argp,
abi_long guest_envp, int flags, bool is_execveat)
{
- int ret;
+ int ret, argp_offset;
char **argp, **envp;
int argc, envc;
abi_ulong gp;
abi_ulong addr;
char **q;
void *p;
+ bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
argc = 0;
@@ -8487,10 +8488,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
envc++;
}
- argp = g_new0(char *, argc + 1);
+ argp_offset = through_qemu ? qemu_argc : 0;
+ argp = g_new0(char *, argc + argp_offset + 1);
envp = g_new0(char *, envc + 1);
- for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset;
+ gp; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp)) {
goto execve_efault;
}
@@ -8535,9 +8538,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
}
const char *exe = p;
- if (is_proc_myself(p, "exe")) {
+ if (through_qemu) {
+ int i;
+ for (i = 0; i < argp_offset; ++i) {
+ argp[i] = qemu_argv[i];
+ }
+ exe = qemu_argv[0];
+ } else if (is_proc_myself(p, "exe")) {
exe = exec_path;
}
+
ret = is_execveat
? safe_execveat(dirfd, exe, argp, envp, flags)
: safe_execve(exe, argp, envp);
@@ -8551,7 +8561,8 @@ execve_efault:
ret = -TARGET_EFAULT;
execve_end:
- for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+ for (gp = guest_argp, q = argp + argp_offset;
+ *q; gp += sizeof(abi_ulong), q++) {
if (get_user_ual(addr, gp) || !addr) {
break;
}
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 5c7f173ceb..a4615eee54 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
extern const char *qemu_uname_release;
extern unsigned long mmap_min_addr;
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char **qemu_argv;
+
typedef struct IOCTLEntry IOCTLEntry;
typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
--
2.43.0
© 2016 - 2024 Red Hat, Inc.