[PATCH v1] linux-user: Add option to run `execve`d programs through QEMU

Noah Goldstein posted 1 patch 2 months, 3 weeks ago
There is a newer version of this series
linux-user/main.c           | 22 ++++++++++++++++++++++
linux-user/syscall.c        | 20 +++++++++++++++-----
linux-user/user-internals.h |  4 ++++
3 files changed, 41 insertions(+), 5 deletions(-)
[PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 months, 3 weeks ago
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.

The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children.  Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
 linux-user/main.c           | 22 ++++++++++++++++++++++
 linux-user/syscall.c        | 20 +++++++++++++++-----
 linux-user/user-internals.h |  4 ++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..dfb303a1f2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
 
+bool qemu_dup_for_children;
+int qemu_argc;
+char ** qemu_argv;
+
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
  * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
     perf_enable_jitdump();
 }
 
+static void handle_arg_qemu_children(const char *arg)
+{
+    qemu_dup_for_children = true;
+}
+
 static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
 
 #ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
      "",           "Generate a /tmp/perf-${pid}.map file for perf"},
     {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
      "",           "Generate a jit-${pid}.dump file for perf"},
+    {"qemu-children",
+                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
+     "",           "Run child processes (created with execve) with qemu "
+                   "(as instantiated for the parent)"},
     {NULL, NULL, false, NULL, NULL, NULL}
 };
 
@@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
 
     optind = parse_args(argc, argv);
 
+    if (qemu_dup_for_children) {
+        int i;
+        qemu_argc = optind;
+        qemu_argv = g_new0(char *, qemu_argc);
+        for (i = 0; i < optind; ++i) {
+            qemu_argv[i] = strdup(argv[i]);
+        }
+    }
+
     qemu_set_log_filename_flags(last_log_filename,
                                 last_log_mask | (enable_strace * LOG_STRACE),
                                 &error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d5415674d..732ef89054 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long pathname, abi_long guest_argp,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
-    int ret;
+    int ret, argp_offset;
     char **argp, **envp;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
     char **q;
     void *p;
+    bool through_qemu = !is_execveat && qemu_dup_for_children;
 
     argc = 0;
 
@@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
         envc++;
     }
 
-    argp = g_new0(char *, argc + 1);
+    argp_offset = through_qemu ? qemu_argc : 0;
+    argp = g_new0(char *, argc + argp_offset + 1);
     envp = g_new0(char *, envc + 1);
 
-    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp)) {
             goto execve_efault;
         }
@@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
     }
 
     const char *exe = p;
-    if (is_proc_myself(p, "exe")) {
+    if (through_qemu) {
+        int i;
+        for (i = 0; i < argp_offset; ++i) {
+            argp[i] = qemu_argv[i];
+        }
+        exe = qemu_argv[0];
+    }
+    else if (is_proc_myself(p, "exe")) {
         exe = exec_path;
     }
+
     ret = is_execveat
         ? safe_execveat(dirfd, exe, argp, envp, flags)
         : safe_execve(exe, argp, envp);
@@ -8553,7 +8563,7 @@ execve_efault:
     ret = -TARGET_EFAULT;
 
 execve_end:
-    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp) || !addr) {
             break;
         }
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 5c7f173ceb..0719e65ff4 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char ** qemu_argv;
+
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
-- 
2.34.1
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Alex Bennée 3 weeks, 4 days ago
Noah Goldstein <goldstein.w.n@gmail.com> writes:

> The new option '-qemu-children' makes it so that on `execve` the child
> process will be launch by the same `qemu` executable that is currently
> running along with its current commandline arguments.
>
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.

Broadly I don't see anything wrong with the patch. However my principle
concern is how test it. Currently nothing in check-tcg ensures this
mechanism works and stresses the argument copying.

>
> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c           | 22 ++++++++++++++++++++++
>  linux-user/syscall.c        | 20 +++++++++++++++-----
>  linux-user/user-internals.h |  4 ++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..dfb303a1f2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>  
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char ** qemu_argv;
> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>  
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>  
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
>  
>      optind = parse_args(argc, argv);
>  
> +    if (qemu_dup_for_children) {
> +        int i;
> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace * LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9d5415674d..732ef89054 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = !is_execveat && qemu_dup_for_children;
>  
>      argc = 0;
>  
> @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>          envc++;
>      }
>  
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>  
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>      }
>  
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    }
> +    else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8553,7 +8563,7 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>  
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> index 5c7f173ceb..0719e65ff4 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>  
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char ** qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>  
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 3 weeks, 4 days ago
On Tue, Oct 29, 2024 at 10:23 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Noah Goldstein <goldstein.w.n@gmail.com> writes:
>
> > The new option '-qemu-children' makes it so that on `execve` the child
> > process will be launch by the same `qemu` executable that is currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
>
> Broadly I don't see anything wrong with the patch. However my principle
> concern is how test it. Currently nothing in check-tcg ensures this
> mechanism works and stresses the argument copying.

Fair enough. I will add some tests.
>
> >
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace * LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> >
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
>
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> The new option '-qemu-children' makes it so that on `execve` the
> child
> process will be launch by the same `qemu` executable that is
> currently
> running along with its current commandline arguments.
> 
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.

Another reason to have this is that one may not have root permissions
to configure binfmt-misc.

There was a similar patch posted to the mailing list some years back,
which I used to cherry-pick when I needed this. I'm not sure what
happened to that discussion though.

> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c           | 22 ++++++++++++++++++++++
>  linux-user/syscall.c        | 20 +++++++++++++++-----
>  linux-user/user-internals.h |  4 ++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..dfb303a1f2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>  
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char ** qemu_argv;

Style: ** belong to the variable name.
There are a couple other issues, please check the output of

git format-patch -1 --stdout | ./scripts/checkpatch.pl -

> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be
> overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>  
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>  
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] =
> {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false,
> handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with
> qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
>  
>      optind = parse_args(argc, argv);
>  
> +    if (qemu_dup_for_children) {
> +        int i;

I get the following build error:

qemu/linux-user/main.c: In function ‘main’:
qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a
previous local [-Werror=shadow=compatible-local]
  746 |         int i;
      |             ^
qemu/linux-user/main.c:699:9: note: shadowed declaration is here
  699 |     int i;
      |         ^

I don't think this variable is needed at all.

> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace *
> LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9d5415674d..732ef89054 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env,
> int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool
> is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = !is_execveat && qemu_dup_for_children;

Wouldn't it be better to check for dirfd == AT_FDCWD?

>      argc = 0;
>  
> @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env,
> int dirfd,
>          envc++;
>      }
>  
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>  
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong),
> q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; gp; gp +=
> sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int
> dirfd,
>      }
>  
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    }
> +    else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8553,7 +8563,7 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>  
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong),
> q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; *q; gp +=
> sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-
> internals.h
> index 5c7f173ceb..0719e65ff4 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>  
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char ** qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>  
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t
> *buf_temp,
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Laurent Vivier 1 month, 3 weeks ago
Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
>> The new option '-qemu-children' makes it so that on `execve` the
>> child
>> process will be launch by the same `qemu` executable that is
>> currently
>> running along with its current commandline arguments.
>>
>> The motivation for the change is to make it so that plugins running
>> through `qemu` can continue to run on children.  Why not just
>> `binfmt`?: Plugins can be desirable regardless of system/architecture
>> emulation, and can sometimes be useful for elf files that can run
>> natively. Enabling `binfmt` for all natively runnable elf files may
>> not be desirable.
> 
> Another reason to have this is that one may not have root permissions
> to configure binfmt-misc.

A little note on that: binfmt_misc is now part of the user namespace (since linux v6.7), so you can 
configure binfmt_misc as a non root user in a given namepace.

There is helper to use it with unshare from util-linux, you can do things like that:

   With 'F' flag, load the interpreter from the initial namespace:

     $ /bin/qemu-m68k-static --version
     qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
     Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
     $ unshare --map-root-user --fork --pid 
--load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu-m68k-static:OCF" 
--root=chroot/m68k/sid
     # QEMU_VERSION= ls
     qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
     Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
     # /qemu-m68k  --version
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers

   Without 'F' flag, from inside the namespace:

     $ unshare --map-root-user --fork --pid 
--load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-m68k:OC" 
--root=chroot/m68k/sid
     # QEMU_VERSION= ls
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers
     # /qemu-m68k  --version
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers

Thanks,
Laurent


Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > The new option '-qemu-children' makes it so that on `execve` the
> > > child
> > > process will be launch by the same `qemu` executable that is
> > > currently
> > > running along with its current commandline arguments.
> > > 
> > > The motivation for the change is to make it so that plugins
> > > running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of
> > > system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files
> > > may
> > > not be desirable.
> > 
> > Another reason to have this is that one may not have root
> > permissions
> > to configure binfmt-misc.
> 
> A little note on that: binfmt_misc is now part of the user namespace
> (since linux v6.7), so you can 
> configure binfmt_misc as a non root user in a given namepace.
> 
> There is helper to use it with unshare from util-linux, you can do
> things like that:
> 
>    With 'F' flag, load the interpreter from the initial namespace:
> 
>      $ /bin/qemu-m68k-static --version
>      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> developers
>      $ unshare --map-root-user --fork --pid 
> --load-interp=":qemu-
> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu
> -m68k-static:OCF" 
> --root=chroot/m68k/sid
>      # QEMU_VERSION= ls
>      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> developers
>      # /qemu-m68k  --version
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
> 
>    Without 'F' flag, from inside the namespace:
> 
>      $ unshare --map-root-user --fork --pid 
> --load-interp=":qemu-
> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-
> m68k:OC" 
> --root=chroot/m68k/sid
>      # QEMU_VERSION= ls
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
>      # /qemu-m68k  --version
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
> 
> Thanks,
> Laurent
> 

Thanks for posting this, I wasn't aware of this feature and it looks
really useful. 

IIUC it also resolves the main problem this patch is dealing with:

  Enabling `binfmt` for all natively runnable elf files may
  not be desirable.
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > child
> > > > process will be launch by the same `qemu` executable that is
> > > > currently
> > > > running along with its current commandline arguments.
> > > >
> > > > The motivation for the change is to make it so that plugins
> > > > running
> > > > through `qemu` can continue to run on children.  Why not just
> > > > `binfmt`?: Plugins can be desirable regardless of
> > > > system/architecture
> > > > emulation, and can sometimes be useful for elf files that can run
> > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > may
> > > > not be desirable.
> > >
> > > Another reason to have this is that one may not have root
> > > permissions
> > > to configure binfmt-misc.
> >
> > A little note on that: binfmt_misc is now part of the user namespace
> > (since linux v6.7), so you can
> > configure binfmt_misc as a non root user in a given namepace.
> >
> > There is helper to use it with unshare from util-linux, you can do
> > things like that:
> >
> >    With 'F' flag, load the interpreter from the initial namespace:
> >
> >      $ /bin/qemu-m68k-static --version
> >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > developers
> >      $ unshare --map-root-user --fork --pid
> > --load-interp=":qemu-
> > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu
> > -m68k-static:OCF"
> > --root=chroot/m68k/sid
> >      # QEMU_VERSION= ls
> >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > developers
> >      # /qemu-m68k  --version
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >
> >    Without 'F' flag, from inside the namespace:
> >
> >      $ unshare --map-root-user --fork --pid
> > --load-interp=":qemu-
> > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-
> > m68k:OC"
> > --root=chroot/m68k/sid
> >      # QEMU_VERSION= ls
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >      # /qemu-m68k  --version
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >
> > Thanks,
> > Laurent
> >
>
> Thanks for posting this, I wasn't aware of this feature and it looks
> really useful.
>
> IIUC it also resolves the main problem this patch is dealing with:

I might misunderstand, but I don't think it does in the sense
that it still might not be desirable to use the same qemu flags
for the entire class of executables.

I.e the original motivating case was wanting to attach
some plugins to a process and its children and AFAICT
binfmt still doesn't give that level of control.
>
>   Enabling `binfmt` for all natively runnable elf files may
>   not be desirable.
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > The new option '-qemu-children' makes it so that on `execve`
> > > > > the
> > > > > child
> > > > > process will be launch by the same `qemu` executable that is
> > > > > currently
> > > > > running along with its current commandline arguments.
> > > > > 
> > > > > The motivation for the change is to make it so that plugins
> > > > > running
> > > > > through `qemu` can continue to run on children.  Why not just
> > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > system/architecture
> > > > > emulation, and can sometimes be useful for elf files that can
> > > > > run
> > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > files
> > > > > may
> > > > > not be desirable.
> > > > 
> > > > Another reason to have this is that one may not have root
> > > > permissions
> > > > to configure binfmt-misc.
> > > 
> > > A little note on that: binfmt_misc is now part of the user
> > > namespace
> > > (since linux v6.7), so you can
> > > configure binfmt_misc as a non root user in a given namepace.
> > > 
> > > There is helper to use it with unshare from util-linux, you can
> > > do
> > > things like that:
> > > 
> > >    With 'F' flag, load the interpreter from the initial
> > > namespace:
> > > 
> > >      $ /bin/qemu-m68k-static --version
> > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > developers
> > >      $ unshare --map-root-user --fork --pid
> > > --load-interp=":qemu-
> > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > 0\\x
> > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > x00\
> > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
> > > qemu
> > > -m68k-static:OCF"
> > > --root=chroot/m68k/sid
> > >      # QEMU_VERSION= ls
> > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > developers
> > >      # /qemu-m68k  --version
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > > 
> > >    Without 'F' flag, from inside the namespace:
> > > 
> > >      $ unshare --map-root-user --fork --pid
> > > --load-interp=":qemu-
> > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > 0\\x
> > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > x00\
> > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
> > > -
> > > m68k:OC"
> > > --root=chroot/m68k/sid
> > >      # QEMU_VERSION= ls
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > >      # /qemu-m68k  --version
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > > 
> > > Thanks,
> > > Laurent
> > > 
> > 
> > Thanks for posting this, I wasn't aware of this feature and it
> > looks
> > really useful.
> > 
> > IIUC it also resolves the main problem this patch is dealing with:
> 
> I might misunderstand, but I don't think it does in the sense
> that it still might not be desirable to use the same qemu flags
> for the entire class of executables.
> 
> I.e the original motivating case was wanting to attach
> some plugins to a process and its children and AFAICT
> binfmt still doesn't give that level of control.

I think if you start a process in a user namespace, which has a
binfmt_misc handler for a certain class of binaries, then this handler
will affect only this process and its children, and not the rest of the
system.

> >   Enabling `binfmt` for all natively runnable elf files may
> >   not be desirable.
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Laurent Vivier 1 month, 3 weeks ago
Le 02/10/2024 à 16:53, Ilya Leoshkevich a écrit :
> On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
>> On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
>> wrote:
>>>
>>> On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
>>>> Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
>>>>> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
>>>>>> The new option '-qemu-children' makes it so that on `execve`
>>>>>> the
>>>>>> child
>>>>>> process will be launch by the same `qemu` executable that is
>>>>>> currently
>>>>>> running along with its current commandline arguments.
>>>>>>
>>>>>> The motivation for the change is to make it so that plugins
>>>>>> running
>>>>>> through `qemu` can continue to run on children.  Why not just
>>>>>> `binfmt`?: Plugins can be desirable regardless of
>>>>>> system/architecture
>>>>>> emulation, and can sometimes be useful for elf files that can
>>>>>> run
>>>>>> natively. Enabling `binfmt` for all natively runnable elf
>>>>>> files
>>>>>> may
>>>>>> not be desirable.
>>>>>
>>>>> Another reason to have this is that one may not have root
>>>>> permissions
>>>>> to configure binfmt-misc.
>>>>
>>>> A little note on that: binfmt_misc is now part of the user
>>>> namespace
>>>> (since linux v6.7), so you can
>>>> configure binfmt_misc as a non root user in a given namepace.
>>>>
>>>> There is helper to use it with unshare from util-linux, you can
>>>> do
>>>> things like that:
>>>>
>>>>     With 'F' flag, load the interpreter from the initial
>>>> namespace:
>>>>
>>>>       $ /bin/qemu-m68k-static --version
>>>>       qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>>>>       Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       $ unshare --map-root-user --fork --pid
>>>> --load-interp=":qemu-
>>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
>>>> 0\\x
>>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
>>>> x00\
>>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
>>>> qemu
>>>> -m68k-static:OCF"
>>>> --root=chroot/m68k/sid
>>>>       # QEMU_VERSION= ls
>>>>       qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>>>>       Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       # /qemu-m68k  --version
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>
>>>>     Without 'F' flag, from inside the namespace:
>>>>
>>>>       $ unshare --map-root-user --fork --pid
>>>> --load-interp=":qemu-
>>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
>>>> 0\\x
>>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
>>>> x00\
>>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
>>>> -
>>>> m68k:OC"
>>>> --root=chroot/m68k/sid
>>>>       # QEMU_VERSION= ls
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       # /qemu-m68k  --version
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>
>>>> Thanks,
>>>> Laurent
>>>>
>>>
>>> Thanks for posting this, I wasn't aware of this feature and it
>>> looks
>>> really useful.
>>>
>>> IIUC it also resolves the main problem this patch is dealing with:
>>
>> I might misunderstand, but I don't think it does in the sense
>> that it still might not be desirable to use the same qemu flags
>> for the entire class of executables.
>>
>> I.e the original motivating case was wanting to attach
>> some plugins to a process and its children and AFAICT
>> binfmt still doesn't give that level of control.
> 
> I think if you start a process in a user namespace, which has a
> binfmt_misc handler for a certain class of binaries, then this handler
> will affect only this process and its children, and not the rest of the
> system.
>

Yes, the binfmt_misc configuration is only available in the given namespace.

Thanks,
Laurent


Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > The new option '-qemu-children' makes it so that on `execve`
> > > > > > the
> > > > > > child
> > > > > > process will be launch by the same `qemu` executable that is
> > > > > > currently
> > > > > > running along with its current commandline arguments.
> > > > > >
> > > > > > The motivation for the change is to make it so that plugins
> > > > > > running
> > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > system/architecture
> > > > > > emulation, and can sometimes be useful for elf files that can
> > > > > > run
> > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > files
> > > > > > may
> > > > > > not be desirable.
> > > > >
> > > > > Another reason to have this is that one may not have root
> > > > > permissions
> > > > > to configure binfmt-misc.
> > > >
> > > > A little note on that: binfmt_misc is now part of the user
> > > > namespace
> > > > (since linux v6.7), so you can
> > > > configure binfmt_misc as a non root user in a given namepace.
> > > >
> > > > There is helper to use it with unshare from util-linux, you can
> > > > do
> > > > things like that:
> > > >
> > > >    With 'F' flag, load the interpreter from the initial
> > > > namespace:
> > > >
> > > >      $ /bin/qemu-m68k-static --version
> > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      $ unshare --map-root-user --fork --pid
> > > > --load-interp=":qemu-
> > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > > 0\\x
> > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > > x00\
> > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
> > > > qemu
> > > > -m68k-static:OCF"
> > > > --root=chroot/m68k/sid
> > > >      # QEMU_VERSION= ls
> > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      # /qemu-m68k  --version
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >
> > > >    Without 'F' flag, from inside the namespace:
> > > >
> > > >      $ unshare --map-root-user --fork --pid
> > > > --load-interp=":qemu-
> > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > > 0\\x
> > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > > x00\
> > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
> > > > -
> > > > m68k:OC"
> > > > --root=chroot/m68k/sid
> > > >      # QEMU_VERSION= ls
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      # /qemu-m68k  --version
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >
> > > > Thanks,
> > > > Laurent
> > > >
> > >
> > > Thanks for posting this, I wasn't aware of this feature and it
> > > looks
> > > really useful.
> > >
> > > IIUC it also resolves the main problem this patch is dealing with:
> >
> > I might misunderstand, but I don't think it does in the sense
> > that it still might not be desirable to use the same qemu flags
> > for the entire class of executables.
> >
> > I.e the original motivating case was wanting to attach
> > some plugins to a process and its children and AFAICT
> > binfmt still doesn't give that level of control.
>
> I think if you start a process in a user namespace, which has a
> binfmt_misc handler for a certain class of binaries, then this handler
> will affect only this process and its children, and not the rest of the
> system.

It won't also affect other binaries in the user namespace?

>
> > >   Enabling `binfmt` for all natively runnable elf files may
> > >   not be desirable.
>
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > <iii@linux.ibm.com>
> > > wrote:
> > > > 
> > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > `execve`
> > > > > > > the
> > > > > > > child
> > > > > > > process will be launch by the same `qemu` executable that
> > > > > > > is
> > > > > > > currently
> > > > > > > running along with its current commandline arguments.
> > > > > > > 
> > > > > > > The motivation for the change is to make it so that
> > > > > > > plugins
> > > > > > > running
> > > > > > > through `qemu` can continue to run on children.  Why not
> > > > > > > just
> > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > system/architecture
> > > > > > > emulation, and can sometimes be useful for elf files that
> > > > > > > can
> > > > > > > run
> > > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > > files
> > > > > > > may
> > > > > > > not be desirable.
> > > > > > 
> > > > > > Another reason to have this is that one may not have root
> > > > > > permissions
> > > > > > to configure binfmt-misc.
> > > > > 
> > > > > A little note on that: binfmt_misc is now part of the user
> > > > > namespace
> > > > > (since linux v6.7), so you can
> > > > > configure binfmt_misc as a non root user in a given namepace.
> > > > > 
> > > > > There is helper to use it with unshare from util-linux, you
> > > > > can
> > > > > do
> > > > > things like that:
> > > > > 
> > > > >    With 'F' flag, load the interpreter from the initial
> > > > > namespace:
> > > > > 
> > > > >      $ /bin/qemu-m68k-static --version
> > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      $ unshare --map-root-user --fork --pid
> > > > > --load-interp=":qemu-
> > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > \\x0
> > > > > 0\\x
> > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > fe\\
> > > > > x00\
> > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > bin/
> > > > > qemu
> > > > > -m68k-static:OCF"
> > > > > --root=chroot/m68k/sid
> > > > >      # QEMU_VERSION= ls
> > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      # /qemu-m68k  --version
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > > 
> > > > >    Without 'F' flag, from inside the namespace:
> > > > > 
> > > > >      $ unshare --map-root-user --fork --pid
> > > > > --load-interp=":qemu-
> > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > \\x0
> > > > > 0\\x
> > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > fe\\
> > > > > x00\
> > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > qemu
> > > > > -
> > > > > m68k:OC"
> > > > > --root=chroot/m68k/sid
> > > > >      # QEMU_VERSION= ls
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      # /qemu-m68k  --version
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > > 
> > > > > Thanks,
> > > > > Laurent
> > > > > 
> > > > 
> > > > Thanks for posting this, I wasn't aware of this feature and it
> > > > looks
> > > > really useful.
> > > > 
> > > > IIUC it also resolves the main problem this patch is dealing
> > > > with:
> > > 
> > > I might misunderstand, but I don't think it does in the sense
> > > that it still might not be desirable to use the same qemu flags
> > > for the entire class of executables.
> > > 
> > > I.e the original motivating case was wanting to attach
> > > some plugins to a process and its children and AFAICT
> > > binfmt still doesn't give that level of control.
> > 
> > I think if you start a process in a user namespace, which has a
> > binfmt_misc handler for a certain class of binaries, then this
> > handler
> > will affect only this process and its children, and not the rest of
> > the
> > system.
> 
> It won't also affect other binaries in the user namespace?

It would, but you should be able to create a user namespace just
for your program. It should also be possible to nest user namespaces.

> > > >   Enabling `binfmt` for all natively runnable elf files may
> > > >   not be desirable.
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > `execve`
> > > > > > > > the
> > > > > > > > child
> > > > > > > > process will be launch by the same `qemu` executable that
> > > > > > > > is
> > > > > > > > currently
> > > > > > > > running along with its current commandline arguments.
> > > > > > > >
> > > > > > > > The motivation for the change is to make it so that
> > > > > > > > plugins
> > > > > > > > running
> > > > > > > > through `qemu` can continue to run on children.  Why not
> > > > > > > > just
> > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > system/architecture
> > > > > > > > emulation, and can sometimes be useful for elf files that
> > > > > > > > can
> > > > > > > > run
> > > > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > > > files
> > > > > > > > may
> > > > > > > > not be desirable.
> > > > > > >
> > > > > > > Another reason to have this is that one may not have root
> > > > > > > permissions
> > > > > > > to configure binfmt-misc.
> > > > > >
> > > > > > A little note on that: binfmt_misc is now part of the user
> > > > > > namespace
> > > > > > (since linux v6.7), so you can
> > > > > > configure binfmt_misc as a non root user in a given namepace.
> > > > > >
> > > > > > There is helper to use it with unshare from util-linux, you
> > > > > > can
> > > > > > do
> > > > > > things like that:
> > > > > >
> > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > namespace:
> > > > > >
> > > > > >      $ /bin/qemu-m68k-static --version
> > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > --load-interp=":qemu-
> > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > > \\x0
> > > > > > 0\\x
> > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > > fe\\
> > > > > > x00\
> > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > > bin/
> > > > > > qemu
> > > > > > -m68k-static:OCF"
> > > > > > --root=chroot/m68k/sid
> > > > > >      # QEMU_VERSION= ls
> > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      # /qemu-m68k  --version
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >
> > > > > >    Without 'F' flag, from inside the namespace:
> > > > > >
> > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > --load-interp=":qemu-
> > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > > \\x0
> > > > > > 0\\x
> > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > > fe\\
> > > > > > x00\
> > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > > qemu
> > > > > > -
> > > > > > m68k:OC"
> > > > > > --root=chroot/m68k/sid
> > > > > >      # QEMU_VERSION= ls
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      # /qemu-m68k  --version
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >
> > > > > > Thanks,
> > > > > > Laurent
> > > > > >
> > > > >
> > > > > Thanks for posting this, I wasn't aware of this feature and it
> > > > > looks
> > > > > really useful.
> > > > >
> > > > > IIUC it also resolves the main problem this patch is dealing
> > > > > with:
> > > >
> > > > I might misunderstand, but I don't think it does in the sense
> > > > that it still might not be desirable to use the same qemu flags
> > > > for the entire class of executables.
> > > >
> > > > I.e the original motivating case was wanting to attach
> > > > some plugins to a process and its children and AFAICT
> > > > binfmt still doesn't give that level of control.
> > >
> > > I think if you start a process in a user namespace, which has a
> > > binfmt_misc handler for a certain class of binaries, then this
> > > handler
> > > will affect only this process and its children, and not the rest of
> > > the
> > > system.
> >
> > It won't also affect other binaries in the user namespace?
>
> It would, but you should be able to create a user namespace just
> for your program. It should also be possible to nest user namespaces.

Okay fair enough. Still pro this patch as an easier means
but guess it loses any necessity.

To be clear, are you rejecting?
>
> > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > >   not be desirable.
>
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich
> > > <iii@linux.ibm.com>
> > > wrote:
> > > > 
> > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > > <iii@linux.ibm.com>
> > > > > wrote:
> > > > > > 
> > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein
> > > > > > > > wrote:
> > > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > > `execve`
> > > > > > > > > the
> > > > > > > > > child
> > > > > > > > > process will be launch by the same `qemu` executable
> > > > > > > > > that
> > > > > > > > > is
> > > > > > > > > currently
> > > > > > > > > running along with its current commandline arguments.
> > > > > > > > > 
> > > > > > > > > The motivation for the change is to make it so that
> > > > > > > > > plugins
> > > > > > > > > running
> > > > > > > > > through `qemu` can continue to run on children.  Why
> > > > > > > > > not
> > > > > > > > > just
> > > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > > system/architecture
> > > > > > > > > emulation, and can sometimes be useful for elf files
> > > > > > > > > that
> > > > > > > > > can
> > > > > > > > > run
> > > > > > > > > natively. Enabling `binfmt` for all natively runnable
> > > > > > > > > elf
> > > > > > > > > files
> > > > > > > > > may
> > > > > > > > > not be desirable.
> > > > > > > > 
> > > > > > > > Another reason to have this is that one may not have
> > > > > > > > root
> > > > > > > > permissions
> > > > > > > > to configure binfmt-misc.
> > > > > > > 
> > > > > > > A little note on that: binfmt_misc is now part of the
> > > > > > > user
> > > > > > > namespace
> > > > > > > (since linux v6.7), so you can
> > > > > > > configure binfmt_misc as a non root user in a given
> > > > > > > namepace.
> > > > > > > 
> > > > > > > There is helper to use it with unshare from util-linux,
> > > > > > > you
> > > > > > > can
> > > > > > > do
> > > > > > > things like that:
> > > > > > > 
> > > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > > namespace:
> > > > > > > 
> > > > > > >      $ /bin/qemu-m68k-static --version
> > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > --load-interp=":qemu-
> > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > \x00
> > > > > > > \\x0
> > > > > > > 0\\x
> > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > f\\x
> > > > > > > fe\\
> > > > > > > x00\
> > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > ff:/
> > > > > > > bin/
> > > > > > > qemu
> > > > > > > -m68k-static:OCF"
> > > > > > > --root=chroot/m68k/sid
> > > > > > >      # QEMU_VERSION= ls
> > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      # /qemu-m68k  --version
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > > 
> > > > > > >    Without 'F' flag, from inside the namespace:
> > > > > > > 
> > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > --load-interp=":qemu-
> > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > \x00
> > > > > > > \\x0
> > > > > > > 0\\x
> > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > f\\x
> > > > > > > fe\\
> > > > > > > x00\
> > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > ff:/
> > > > > > > qemu
> > > > > > > -
> > > > > > > m68k:OC"
> > > > > > > --root=chroot/m68k/sid
> > > > > > >      # QEMU_VERSION= ls
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      # /qemu-m68k  --version
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > > 
> > > > > > > Thanks,
> > > > > > > Laurent
> > > > > > > 
> > > > > > 
> > > > > > Thanks for posting this, I wasn't aware of this feature and
> > > > > > it
> > > > > > looks
> > > > > > really useful.
> > > > > > 
> > > > > > IIUC it also resolves the main problem this patch is
> > > > > > dealing
> > > > > > with:
> > > > > 
> > > > > I might misunderstand, but I don't think it does in the sense
> > > > > that it still might not be desirable to use the same qemu
> > > > > flags
> > > > > for the entire class of executables.
> > > > > 
> > > > > I.e the original motivating case was wanting to attach
> > > > > some plugins to a process and its children and AFAICT
> > > > > binfmt still doesn't give that level of control.
> > > > 
> > > > I think if you start a process in a user namespace, which has a
> > > > binfmt_misc handler for a certain class of binaries, then this
> > > > handler
> > > > will affect only this process and its children, and not the
> > > > rest of
> > > > the
> > > > system.
> > > 
> > > It won't also affect other binaries in the user namespace?
> > 
> > It would, but you should be able to create a user namespace just
> > for your program. It should also be possible to nest user
> > namespaces.
> 
> Okay fair enough. Still pro this patch as an easier means
> but guess it loses any necessity.
> 
> To be clear, are you rejecting?

No, personally I still find it useful, because the systems with old
kernels will be around for quite some time.

> > > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > > >   not be desirable.
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 11:35 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich
> > > > <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > > > <iii@linux.ibm.com>
> > > > > > wrote:
> > > > > > >
> > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein
> > > > > > > > > wrote:
> > > > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > > > `execve`
> > > > > > > > > > the
> > > > > > > > > > child
> > > > > > > > > > process will be launch by the same `qemu` executable
> > > > > > > > > > that
> > > > > > > > > > is
> > > > > > > > > > currently
> > > > > > > > > > running along with its current commandline arguments.
> > > > > > > > > >
> > > > > > > > > > The motivation for the change is to make it so that
> > > > > > > > > > plugins
> > > > > > > > > > running
> > > > > > > > > > through `qemu` can continue to run on children.  Why
> > > > > > > > > > not
> > > > > > > > > > just
> > > > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > > > system/architecture
> > > > > > > > > > emulation, and can sometimes be useful for elf files
> > > > > > > > > > that
> > > > > > > > > > can
> > > > > > > > > > run
> > > > > > > > > > natively. Enabling `binfmt` for all natively runnable
> > > > > > > > > > elf
> > > > > > > > > > files
> > > > > > > > > > may
> > > > > > > > > > not be desirable.
> > > > > > > > >
> > > > > > > > > Another reason to have this is that one may not have
> > > > > > > > > root
> > > > > > > > > permissions
> > > > > > > > > to configure binfmt-misc.
> > > > > > > >
> > > > > > > > A little note on that: binfmt_misc is now part of the
> > > > > > > > user
> > > > > > > > namespace
> > > > > > > > (since linux v6.7), so you can
> > > > > > > > configure binfmt_misc as a non root user in a given
> > > > > > > > namepace.
> > > > > > > >
> > > > > > > > There is helper to use it with unshare from util-linux,
> > > > > > > > you
> > > > > > > > can
> > > > > > > > do
> > > > > > > > things like that:
> > > > > > > >
> > > > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > > > namespace:
> > > > > > > >
> > > > > > > >      $ /bin/qemu-m68k-static --version
> > > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > > --load-interp=":qemu-
> > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > > \x00
> > > > > > > > \\x0
> > > > > > > > 0\\x
> > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > > f\\x
> > > > > > > > fe\\
> > > > > > > > x00\
> > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > > ff:/
> > > > > > > > bin/
> > > > > > > > qemu
> > > > > > > > -m68k-static:OCF"
> > > > > > > > --root=chroot/m68k/sid
> > > > > > > >      # QEMU_VERSION= ls
> > > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      # /qemu-m68k  --version
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >
> > > > > > > >    Without 'F' flag, from inside the namespace:
> > > > > > > >
> > > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > > --load-interp=":qemu-
> > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > > \x00
> > > > > > > > \\x0
> > > > > > > > 0\\x
> > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > > f\\x
> > > > > > > > fe\\
> > > > > > > > x00\
> > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > > ff:/
> > > > > > > > qemu
> > > > > > > > -
> > > > > > > > m68k:OC"
> > > > > > > > --root=chroot/m68k/sid
> > > > > > > >      # QEMU_VERSION= ls
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      # /qemu-m68k  --version
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >
> > > > > > > > Thanks,
> > > > > > > > Laurent
> > > > > > > >
> > > > > > >
> > > > > > > Thanks for posting this, I wasn't aware of this feature and
> > > > > > > it
> > > > > > > looks
> > > > > > > really useful.
> > > > > > >
> > > > > > > IIUC it also resolves the main problem this patch is
> > > > > > > dealing
> > > > > > > with:
> > > > > >
> > > > > > I might misunderstand, but I don't think it does in the sense
> > > > > > that it still might not be desirable to use the same qemu
> > > > > > flags
> > > > > > for the entire class of executables.
> > > > > >
> > > > > > I.e the original motivating case was wanting to attach
> > > > > > some plugins to a process and its children and AFAICT
> > > > > > binfmt still doesn't give that level of control.
> > > > >
> > > > > I think if you start a process in a user namespace, which has a
> > > > > binfmt_misc handler for a certain class of binaries, then this
> > > > > handler
> > > > > will affect only this process and its children, and not the
> > > > > rest of
> > > > > the
> > > > > system.
> > > >
> > > > It won't also affect other binaries in the user namespace?
> > >
> > > It would, but you should be able to create a user namespace just
> > > for your program. It should also be possible to nest user
> > > namespaces.
> >
> > Okay fair enough. Still pro this patch as an easier means
> > but guess it loses any necessity.
> >
> > To be clear, are you rejecting?
>
> No, personally I still find it useful, because the systems with old
> kernels will be around for quite some time.

:)

>
> > > > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > > > >   not be desirable.
>
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > The new option '-qemu-children' makes it so that on `execve` the
> > child
> > process will be launch by the same `qemu` executable that is
> > currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
>
> Another reason to have this is that one may not have root permissions
> to configure binfmt-misc.
>
+1

> There was a similar patch posted to the mailing list some years back,
> which I used to cherry-pick when I needed this. I'm not sure what
> happened to that discussion though.

Yes(ish): https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/

>
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
>
> Style: ** belong to the variable name.
> There are a couple other issues, please check the output of
>
> git format-patch -1 --stdout | ./scripts/checkpatch.pl -
>

Will Do.

> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be
> > overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] =
> > {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false,
> > handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with
> > qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
>
> I get the following build error:
>
> qemu/linux-user/main.c: In function ‘main’:
> qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a
> previous local [-Werror=shadow=compatible-local]
>   746 |         int i;
>       |             ^
> qemu/linux-user/main.c:699:9: note: shadowed declaration is here
>   699 |     int i;
>       |         ^
>
> I don't think this variable is needed at all.
>

Bah sorry.

> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace *
> > LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env,
> > int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool
> > is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
>
> Wouldn't it be better to check for dirfd == AT_FDCWD?

Yeah that works.
Ill fix in V2.
>
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env,
> > int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong),
> > q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp +=
> > sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int
> > dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong),
> > q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp +=
> > sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-
> > internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t
> > *buf_temp,
>
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Ilya Leoshkevich 1 month, 3 weeks ago
On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > The new option '-qemu-children' makes it so that on `execve` the
> > > child
> > > process will be launch by the same `qemu` executable that is
> > > currently
> > > running along with its current commandline arguments.
> > > 
> > > The motivation for the change is to make it so that plugins
> > > running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of
> > > system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files
> > > may
> > > not be desirable.
> > 
> > Another reason to have this is that one may not have root
> > permissions
> > to configure binfmt-misc.
> > 
> +1
> 
> > There was a similar patch posted to the mailing list some years
> > back,
> > which I used to cherry-pick when I needed this. I'm not sure what
> > happened to that discussion though.
> 
> Yes(ish):
> https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/

Thanks for finding this! Don't we need the shebang handling here as
well?

Laurent, do you per chance know why was it not accepted back
then?Unfortunately I cannot find any discussion associated with v3 or
v4
[1]. There were some concerns regarding v1 [2], but from what I can see
they all were addressed.

[1]
https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
[2]
https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > child
> > > > process will be launch by the same `qemu` executable that is
> > > > currently
> > > > running along with its current commandline arguments.
> > > >
> > > > The motivation for the change is to make it so that plugins
> > > > running
> > > > through `qemu` can continue to run on children.  Why not just
> > > > `binfmt`?: Plugins can be desirable regardless of
> > > > system/architecture
> > > > emulation, and can sometimes be useful for elf files that can run
> > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > may
> > > > not be desirable.
> > >
> > > Another reason to have this is that one may not have root
> > > permissions
> > > to configure binfmt-misc.
> > >
> > +1
> >
> > > There was a similar patch posted to the mailing list some years
> > > back,
> > > which I used to cherry-pick when I needed this. I'm not sure what
> > > happened to that discussion though.
> >
> > Yes(ish):
> > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
>
> Thanks for finding this! Don't we need the shebang handling here as
> well?
>
I don't think so. In this case we aren't making it so execve can point to
some arbitrary impl, just that we propagate the current running qemu
env.

> Laurent, do you per chance know why was it not accepted back
> then?Unfortunately I cannot find any discussion associated with v3 or
> v4
> [1]. There were some concerns regarding v1 [2], but from what I can see
> they all were addressed.
>
> [1]
> https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> [2]
> https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 1 week ago
On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> >
> > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > wrote:
> > > >
> > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > child
> > > > > process will be launch by the same `qemu` executable that is
> > > > > currently
> > > > > running along with its current commandline arguments.
> > > > >
> > > > > The motivation for the change is to make it so that plugins
> > > > > running
> > > > > through `qemu` can continue to run on children.  Why not just
> > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > system/architecture
> > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > may
> > > > > not be desirable.
> > > >
> > > > Another reason to have this is that one may not have root
> > > > permissions
> > > > to configure binfmt-misc.
> > > >
> > > +1
> > >
> > > > There was a similar patch posted to the mailing list some years
> > > > back,
> > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > happened to that discussion though.
> > >
> > > Yes(ish):
> > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> >
> > Thanks for finding this! Don't we need the shebang handling here as
> > well?
> >
> I don't think so. In this case we aren't making it so execve can point to
> some arbitrary impl, just that we propagate the current running qemu
> env.
>

ping
> > Laurent, do you per chance know why was it not accepted back
> > then?Unfortunately I cannot find any discussion associated with v3 or
> > v4
> > [1]. There were some concerns regarding v1 [2], but from what I can see
> > they all were addressed.
> >
> > [1]
> > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > [2]
> > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month ago
On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> > >
> > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > > child
> > > > > > process will be launch by the same `qemu` executable that is
> > > > > > currently
> > > > > > running along with its current commandline arguments.
> > > > > >
> > > > > > The motivation for the change is to make it so that plugins
> > > > > > running
> > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > system/architecture
> > > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > > may
> > > > > > not be desirable.
> > > > >
> > > > > Another reason to have this is that one may not have root
> > > > > permissions
> > > > > to configure binfmt-misc.
> > > > >
> > > > +1
> > > >
> > > > > There was a similar patch posted to the mailing list some years
> > > > > back,
> > > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > > happened to that discussion though.
> > > >
> > > > Yes(ish):
> > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> > >
> > > Thanks for finding this! Don't we need the shebang handling here as
> > > well?
> > >
> > I don't think so. In this case we aren't making it so execve can point to
> > some arbitrary impl, just that we propagate the current running qemu
> > env.
> >
>
> ping
ping2
> > > Laurent, do you per chance know why was it not accepted back
> > > then?Unfortunately I cannot find any discussion associated with v3 or
> > > v4
> > > [1]. There were some concerns regarding v1 [2], but from what I can see
> > > they all were addressed.
> > >
> > > [1]
> > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > > [2]
> > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 3 weeks, 4 days ago
On Tue, Oct 22, 2024 at 5:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> > > >
> > > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > > > wrote:
> > > > > >
> > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > > > child
> > > > > > > process will be launch by the same `qemu` executable that is
> > > > > > > currently
> > > > > > > running along with its current commandline arguments.
> > > > > > >
> > > > > > > The motivation for the change is to make it so that plugins
> > > > > > > running
> > > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > system/architecture
> > > > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > > > may
> > > > > > > not be desirable.
> > > > > >
> > > > > > Another reason to have this is that one may not have root
> > > > > > permissions
> > > > > > to configure binfmt-misc.
> > > > > >
> > > > > +1
> > > > >
> > > > > > There was a similar patch posted to the mailing list some years
> > > > > > back,
> > > > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > > > happened to that discussion though.
> > > > >
> > > > > Yes(ish):
> > > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> > > >
> > > > Thanks for finding this! Don't we need the shebang handling here as
> > > > well?
> > > >
> > > I don't think so. In this case we aren't making it so execve can point to
> > > some arbitrary impl, just that we propagate the current running qemu
> > > env.
> > >
> >
> > ping
> ping2
pint3
> > > > Laurent, do you per chance know why was it not accepted back
> > > > then?Unfortunately I cannot find any discussion associated with v3 or
> > > > v4
> > > > [1]. There were some concerns regarding v1 [2], but from what I can see
> > > > they all were addressed.
> > > >
> > > > [1]
> > > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > > > [2]
> > > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 months, 3 weeks ago
On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The new option '-qemu-children' makes it so that on `execve` the child
> process will be launch by the same `qemu` executable that is currently
> running along with its current commandline arguments.
>
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.
>
> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c           | 22 ++++++++++++++++++++++
>  linux-user/syscall.c        | 20 +++++++++++++++-----
>  linux-user/user-internals.h |  4 ++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..dfb303a1f2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char ** qemu_argv;
> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>
> @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
>
>      optind = parse_args(argc, argv);
>
> +    if (qemu_dup_for_children) {
> +        int i;
> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace * LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9d5415674d..732ef89054 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = !is_execveat && qemu_dup_for_children;
>
I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts
it would be nice to support that as well.

>      argc = 0;
>
> @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>          envc++;
>      }
>
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>      }
>
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    }
> +    else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8553,7 +8563,7 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> index 5c7f173ceb..0719e65ff4 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char ** qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> --
> 2.34.1
>
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 months, 2 weeks ago
On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > The new option '-qemu-children' makes it so that on `execve` the child
> > process will be launch by the same `qemu` executable that is currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
> >
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace * LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> >
> I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts
> it would be nice to support that as well.
>
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> > --
> > 2.34.1
> >

ping
Re: [PATCH v1] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 months ago
On Tue, Sep 10, 2024 at 3:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > The new option '-qemu-children' makes it so that on `execve` the child
> > > process will be launch by the same `qemu` executable that is currently
> > > running along with its current commandline arguments.
> > >
> > > The motivation for the change is to make it so that plugins running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files may
> > > not be desirable.
> > >
> > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > > ---
> > >  linux-user/main.c           | 22 ++++++++++++++++++++++
> > >  linux-user/syscall.c        | 20 +++++++++++++++-----
> > >  linux-user/user-internals.h |  4 ++++
> > >  3 files changed, 41 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/linux-user/main.c b/linux-user/main.c
> > > index 8143a0d4b0..dfb303a1f2 100644
> > > --- a/linux-user/main.c
> > > +++ b/linux-user/main.c
> > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> > >  uintptr_t guest_base;
> > >  bool have_guest_base;
> > >
> > > +bool qemu_dup_for_children;
> > > +int qemu_argc;
> > > +char ** qemu_argv;
> > > +
> > >  /*
> > >   * Used to implement backwards-compatibility for the `-strace`, and
> > >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> > >      perf_enable_jitdump();
> > >  }
> > >
> > > +static void handle_arg_qemu_children(const char *arg)
> > > +{
> > > +    qemu_dup_for_children = true;
> > > +}
> > > +
> > >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> > >
> > >  #ifdef CONFIG_PLUGIN
> > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> > >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> > >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> > >       "",           "Generate a jit-${pid}.dump file for perf"},
> > > +    {"qemu-children",
> > > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > > +     "",           "Run child processes (created with execve) with qemu "
> > > +                   "(as instantiated for the parent)"},
> > >      {NULL, NULL, false, NULL, NULL, NULL}
> > >  };
> > >
> > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> > >
> > >      optind = parse_args(argc, argv);
> > >
> > > +    if (qemu_dup_for_children) {
> > > +        int i;
> > > +        qemu_argc = optind;
> > > +        qemu_argv = g_new0(char *, qemu_argc);
> > > +        for (i = 0; i < optind; ++i) {
> > > +            qemu_argv[i] = strdup(argv[i]);
> > > +        }
> > > +    }
> > > +
> > >      qemu_set_log_filename_flags(last_log_filename,
> > >                                  last_log_mask | (enable_strace * LOG_STRACE),
> > >                                  &error_fatal);
> > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > > index 9d5415674d..732ef89054 100644
> > > --- a/linux-user/syscall.c
> > > +++ b/linux-user/syscall.c
> > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >                      abi_long pathname, abi_long guest_argp,
> > >                      abi_long guest_envp, int flags, bool is_execveat)
> > >  {
> > > -    int ret;
> > > +    int ret, argp_offset;
> > >      char **argp, **envp;
> > >      int argc, envc;
> > >      abi_ulong gp;
> > >      abi_ulong addr;
> > >      char **q;
> > >      void *p;
> > > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> > >
> > I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts
> > it would be nice to support that as well.
> >
> > >      argc = 0;
> > >
> > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >          envc++;
> > >      }
> > >
> > > -    argp = g_new0(char *, argc + 1);
> > > +    argp_offset = through_qemu ? qemu_argc : 0;
> > > +    argp = g_new0(char *, argc + argp_offset + 1);
> > >      envp = g_new0(char *, envc + 1);
> > >
> > > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> > >          if (get_user_ual(addr, gp)) {
> > >              goto execve_efault;
> > >          }
> > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >      }
> > >
> > >      const char *exe = p;
> > > -    if (is_proc_myself(p, "exe")) {
> > > +    if (through_qemu) {
> > > +        int i;
> > > +        for (i = 0; i < argp_offset; ++i) {
> > > +            argp[i] = qemu_argv[i];
> > > +        }
> > > +        exe = qemu_argv[0];
> > > +    }
> > > +    else if (is_proc_myself(p, "exe")) {
> > >          exe = exec_path;
> > >      }
> > > +
> > >      ret = is_execveat
> > >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> > >          : safe_execve(exe, argp, envp);
> > > @@ -8553,7 +8563,7 @@ execve_efault:
> > >      ret = -TARGET_EFAULT;
> > >
> > >  execve_end:
> > > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> > >          if (get_user_ual(addr, gp) || !addr) {
> > >              break;
> > >          }
> > > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > > index 5c7f173ceb..0719e65ff4 100644
> > > --- a/linux-user/user-internals.h
> > > +++ b/linux-user/user-internals.h
> > > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> > >  extern const char *qemu_uname_release;
> > >  extern unsigned long mmap_min_addr;
> > >
> > > +extern bool qemu_dup_for_children;
> > > +extern int qemu_argc;
> > > +extern char ** qemu_argv;
> > > +
> > >  typedef struct IOCTLEntry IOCTLEntry;
> > >
> > >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> > > --
> > > 2.34.1
> > >
>
> ping
ping
linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 3 weeks, 3 days ago
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.

The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children.  Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
 linux-user/main.c                             | 21 ++++++
 linux-user/syscall.c                          | 21 ++++--
 linux-user/user-internals.h                   |  4 ++
 tests/tcg/multiarch/Makefile.target           |  8 +++
 .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
 5 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c

diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..5e3d41dc2b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
 
+bool qemu_dup_for_children;
+int qemu_argc;
+char **qemu_argv;
+
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
  * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
     perf_enable_jitdump();
 }
 
+static void handle_arg_qemu_children(const char *arg)
+{
+    qemu_dup_for_children = true;
+}
+
 static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
 
 #ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
      "",           "Generate a /tmp/perf-${pid}.map file for perf"},
     {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
      "",           "Generate a jit-${pid}.dump file for perf"},
+    {"qemu-children",
+                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
+     "",           "Run child processes (created with execve) with qemu "
+                   "(as instantiated for the parent)"},
     {NULL, NULL, false, NULL, NULL, NULL}
 };
 
@@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
 
     optind = parse_args(argc, argv);
 
+    if (qemu_dup_for_children) {
+        qemu_argc = optind;
+        qemu_argv = g_new0(char *, qemu_argc);
+        for (i = 0; i < optind; ++i) {
+            qemu_argv[i] = strdup(argv[i]);
+        }
+    }
+
     qemu_set_log_filename_flags(last_log_filename,
                                 last_log_mask | (enable_strace * LOG_STRACE),
                                 &error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 59b2080b98..96b105e9ce 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long pathname, abi_long guest_argp,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
-    int ret;
+    int ret, argp_offset;
     char **argp, **envp;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
     char **q;
     void *p;
+    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
 
     argc = 0;
 
@@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
         envc++;
     }
 
-    argp = g_new0(char *, argc + 1);
+    argp_offset = through_qemu ? qemu_argc : 0;
+    argp = g_new0(char *, argc + argp_offset + 1);
     envp = g_new0(char *, envc + 1);
 
-    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         gp; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp)) {
             goto execve_efault;
         }
@@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
     }
 
     const char *exe = p;
-    if (is_proc_myself(p, "exe")) {
+    if (through_qemu) {
+        int i;
+        for (i = 0; i < argp_offset; ++i) {
+            argp[i] = qemu_argv[i];
+        }
+        exe = qemu_argv[0];
+    } else if (is_proc_myself(p, "exe")) {
         exe = exec_path;
     }
+
     ret = is_execveat
         ? safe_execveat(dirfd, exe, argp, envp, flags)
         : safe_execve(exe, argp, envp);
@@ -8644,7 +8654,8 @@ execve_efault:
     ret = -TARGET_EFAULT;
 
 execve_end:
-    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         *q; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp) || !addr) {
             break;
         }
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 46ffc093f4..ed3ed666a0 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char **qemu_argv;
+
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 78b83d5575..0e220953e7 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -30,6 +30,14 @@ run-float_%: float_%
 	$(call conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref)
 
 
+run-linux-execve-qemu-children: linux-execve-qemu-children
+	$(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+	$(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
+run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children
+	$(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+	$(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
 testthread: LDFLAGS+=-lpthread
 
 threadcount: LDFLAGS+=-lpthread
diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
new file mode 100644
index 0000000000..60d6537666
--- /dev/null
+++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
@@ -0,0 +1,68 @@
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MAX_COMM_SIZE (4096)
+
+int
+main(int argc, char ** argv, char ** envp) {
+    int          fd;
+    char         next_arg[2];
+    char *       buf;
+    ssize_t      off;
+    const char * expec_comm;
+    assert(argc == 3 || argc == 4);
+    fd = open("/proc/self/comm", O_RDONLY);
+    assert(fd > 0);
+
+    buf = calloc(MAX_COMM_SIZE + 1, 1);
+    assert(buf != NULL);
+
+    off = 0;
+    for (;;) {
+        ssize_t res = read(fd, buf + off, 1);
+        if (res < 0 && errno != EAGAIN) {
+            perror("Failed to read comm");
+            return -1;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        off += res;
+
+        if (off >= MAX_COMM_SIZE) {
+            fprintf(stderr, "/proc/self/comm too large for test\n");
+            return -1;
+        }
+    }
+    assert(off && buf[off] == '\0' && buf[off - 1] == '\n');
+    buf[off - 1] = '\0';
+    expec_comm   = basename(argv[1]);
+    if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) {
+        fprintf(stderr,
+                "Didn't propagate qemu settings\nComm:  '%s'\nExpec: '%s'\n",
+                buf, expec_comm);
+        return -1;
+    }
+    free(buf);
+    next_arg[0] = argv[2][0];
+    next_arg[1] = '\0';
+    if (next_arg[0] == '9') {
+        return 0;
+    }
+    next_arg[0] += 1;
+    char * next_args[] = { argv[0], argv[1], next_arg, NULL };
+    int    eres        = execve(argv[0], &next_args[0], envp);
+    if (eres != 0) {
+        fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno,
+                strerror(errno));
+        return -1;
+    }
+    return 0;
+}
-- 
2.43.0
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 4 days ago
On 10/30/24 14:10, Noah Goldstein wrote:
> The new option '-qemu-children' makes it so that on `execve` the child
> process will be launch by the same `qemu` executable that is currently
> running along with its current commandline arguments.
> 
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.
> 
> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>   linux-user/main.c                             | 21 ++++++
>   linux-user/syscall.c                          | 21 ++++--
>   linux-user/user-internals.h                   |  4 ++
>   tests/tcg/multiarch/Makefile.target           |  8 +++
>   .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
>   5 files changed, 117 insertions(+), 5 deletions(-)
>   create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..5e3d41dc2b 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>   uintptr_t guest_base;
>   bool have_guest_base;
>   
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char **qemu_argv;
> +
>   /*
>    * Used to implement backwards-compatibility for the `-strace`, and
>    * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>       perf_enable_jitdump();
>   }
>   
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>   static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>   
>   #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
>        "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>       {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>        "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with qemu "
> +                   "(as instantiated for the parent)"},
>       {NULL, NULL, false, NULL, NULL, NULL}
>   };
>   
> @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
>   
>       optind = parse_args(argc, argv);
>   
> +    if (qemu_dup_for_children) {
> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);

g_strdup.

> +    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;

Why is this limited to AT_FDCWD?  Why not for execvat too?

> @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>       }
>   
>       const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    } else if (is_proc_myself(p, "exe")) {
>           exe = exec_path;
>       }
> +

You still need to handle is_proc_myself, for the guest binary.

I wonder if those two cases are related.  Do we need to also add an argument so that we 
can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes

     f = openat()
     execv(qemu, "-execfd", f)

and is_proc_myself uses execfd, which we already have open.


r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 4 days ago
On Tue, Nov 5, 2024 at 5:37 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 10/30/24 14:10, Noah Goldstein wrote:
> > The new option '-qemu-children' makes it so that on `execve` the child
> > process will be launch by the same `qemu` executable that is currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
> >
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >   linux-user/main.c                             | 21 ++++++
> >   linux-user/syscall.c                          | 21 ++++--
> >   linux-user/user-internals.h                   |  4 ++
> >   tests/tcg/multiarch/Makefile.target           |  8 +++
> >   .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
> >   5 files changed, 117 insertions(+), 5 deletions(-)
> >   create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..5e3d41dc2b 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >   uintptr_t guest_base;
> >   bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char **qemu_argv;
> > +
> >   /*
> >    * Used to implement backwards-compatibility for the `-strace`, and
> >    * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >       perf_enable_jitdump();
> >   }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >   static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >   #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> >        "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >       {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >        "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with qemu "
> > +                   "(as instantiated for the parent)"},
> >       {NULL, NULL, false, NULL, NULL, NULL}
> >   };
> >
> > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
> >
> >       optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
>
> g_strdup.
ack
>
> > +    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
>
> Why is this limited to AT_FDCWD?  Why not for execvat too?
>

We could, initially it was because AFAICT qemu doesn't support executing a
program relative to another dir, but it would be simple enough to just join
the relative program path and path dirfd points to.

Want me to add support?
> > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >       }
> >
> >       const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    } else if (is_proc_myself(p, "exe")) {
> >           exe = exec_path;
> >       }
> > +
>
> You still need to handle is_proc_myself, for the guest binary.
>
> I wonder if those two cases are related.  Do we need to also add an argument so that we
> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
>
>      f = openat()
>      execv(qemu, "-execfd", f)
>
> and is_proc_myself uses execfd, which we already have open.
>
>
> r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 4 days ago
On Tue, Nov 5, 2024 at 5:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 5:37 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
> >
> > On 10/30/24 14:10, Noah Goldstein wrote:
> > > The new option '-qemu-children' makes it so that on `execve` the child
> > > process will be launch by the same `qemu` executable that is currently
> > > running along with its current commandline arguments.
> > >
> > > The motivation for the change is to make it so that plugins running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files may
> > > not be desirable.
> > >
> > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > > ---
> > >   linux-user/main.c                             | 21 ++++++
> > >   linux-user/syscall.c                          | 21 ++++--
> > >   linux-user/user-internals.h                   |  4 ++
> > >   tests/tcg/multiarch/Makefile.target           |  8 +++
> > >   .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
> > >   5 files changed, 117 insertions(+), 5 deletions(-)
> > >   create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c
> > >
> > > diff --git a/linux-user/main.c b/linux-user/main.c
> > > index 8143a0d4b0..5e3d41dc2b 100644
> > > --- a/linux-user/main.c
> > > +++ b/linux-user/main.c
> > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> > >   uintptr_t guest_base;
> > >   bool have_guest_base;
> > >
> > > +bool qemu_dup_for_children;
> > > +int qemu_argc;
> > > +char **qemu_argv;
> > > +
> > >   /*
> > >    * Used to implement backwards-compatibility for the `-strace`, and
> > >    * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> > >       perf_enable_jitdump();
> > >   }
> > >
> > > +static void handle_arg_qemu_children(const char *arg)
> > > +{
> > > +    qemu_dup_for_children = true;
> > > +}
> > > +
> > >   static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> > >
> > >   #ifdef CONFIG_PLUGIN
> > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> > >        "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> > >       {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> > >        "",           "Generate a jit-${pid}.dump file for perf"},
> > > +    {"qemu-children",
> > > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > > +     "",           "Run child processes (created with execve) with qemu "
> > > +                   "(as instantiated for the parent)"},
> > >       {NULL, NULL, false, NULL, NULL, NULL}
> > >   };
> > >
> > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
> > >
> > >       optind = parse_args(argc, argv);
> > >
> > > +    if (qemu_dup_for_children) {
> > > +        qemu_argc = optind;
> > > +        qemu_argv = g_new0(char *, qemu_argc);
> > > +        for (i = 0; i < optind; ++i) {
> > > +            qemu_argv[i] = strdup(argv[i]);
> >
> > g_strdup.
> ack
> >
> > > +    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
> >
> > Why is this limited to AT_FDCWD?  Why not for execvat too?
> >
>
> We could, initially it was because AFAICT qemu doesn't support executing a
> program relative to another dir, but it would be simple enough to just join
> the relative program path and path dirfd points to.
>
> Want me to add support?
> > > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >       }
> > >
> > >       const char *exe = p;
> > > -    if (is_proc_myself(p, "exe")) {
> > > +    if (through_qemu) {
> > > +        int i;
> > > +        for (i = 0; i < argp_offset; ++i) {
> > > +            argp[i] = qemu_argv[i];
> > > +        }
> > > +        exe = qemu_argv[0];
> > > +    } else if (is_proc_myself(p, "exe")) {
> > >           exe = exec_path;
> > >       }
> > > +
> >
> > You still need to handle is_proc_myself, for the guest binary.

Would this by handled by basically do:

```
if (is_proc_myself(p, "exe")) {
        exe = exec_path;
        if (through_qemu)
            argp[argp_offset] = exec_path;
}
```
Or am I missing something?

> >
> > I wonder if those two cases are related.  Do we need to also add an argument so that we
> > can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
> >
> >      f = openat()
> >      execv(qemu, "-execfd", f)
> >
> > and is_proc_myself uses execfd, which we already have open.

How does passing a fd from one process to another work?

> >
> >
> > r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 3 days ago
On 11/5/24 23:54, Noah Goldstein wrote:
>>> You still need to handle is_proc_myself, for the guest binary.
> 
> Would this by handled by basically do:
> 
> ```
> if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>          if (through_qemu)
>              argp[argp_offset] = exec_path;
> }
> ```
> Or am I missing something?

Something like that, yes.

>>> I wonder if those two cases are related.  Do we need to also add an argument so that we
>>> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
>>>
>>>       f = openat()
>>>       execv(qemu, "-execfd", f)
>>>
>>> and is_proc_myself uses execfd, which we already have open.
> 
> How does passing a fd from one process to another work?
As long as the fd is not marked O_CLOEXEC, it stays open in the new process.  Providing 
the number via command-line, or whatever, is sufficient for the new process to know what 
is going on.

I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file 
descriptor.


r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 3 days ago
On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/5/24 23:54, Noah Goldstein wrote:
> >>> You still need to handle is_proc_myself, for the guest binary.
> >
> > Would this by handled by basically do:
> >
> > ```
> > if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >          if (through_qemu)
> >              argp[argp_offset] = exec_path;
> > }
> > ```
> > Or am I missing something?
>
> Something like that, yes.
>
> >>> I wonder if those two cases are related.  Do we need to also add an argument so that we
> >>> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
> >>>
> >>>       f = openat()
> >>>       execv(qemu, "-execfd", f)
> >>>
> >>> and is_proc_myself uses execfd, which we already have open.
> >
> > How does passing a fd from one process to another work?
> As long as the fd is not marked O_CLOEXEC, it stays open in the new process.  Providing
> the number via command-line, or whatever, is sufficient for the new process to know what
> is going on.

Err I guess I was thinking its a bit weird having an option that is
only really applicable
if qemu is a child process. I.e the `-execfd` argument is not usable
from commandline.

>
> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file
> descriptor.

We could also do something along the lines of:

```
fd = openat(dirfd, exe);
char new_exe[PATH_MAX];
char fd_path[PATH_MAX];
sprintf(fd_path, "/proc/self/fd/%d", fd);
readlink(fd_path, new_exe, PATH_MAX);
exe = new_exe;
```
>
>
> r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 3 days ago
On 11/6/24 17:03, Noah Goldstein wrote:
> On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 11/5/24 23:54, Noah Goldstein wrote:
>>>>> You still need to handle is_proc_myself, for the guest binary.
>>>
>>> Would this by handled by basically do:
>>>
>>> ```
>>> if (is_proc_myself(p, "exe")) {
>>>           exe = exec_path;
>>>           if (through_qemu)
>>>               argp[argp_offset] = exec_path;
>>> }
>>> ```
>>> Or am I missing something?
>>
>> Something like that, yes.
>>
>>>>> I wonder if those two cases are related.  Do we need to also add an argument so that we
>>>>> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
>>>>>
>>>>>        f = openat()
>>>>>        execv(qemu, "-execfd", f)
>>>>>
>>>>> and is_proc_myself uses execfd, which we already have open.
>>>
>>> How does passing a fd from one process to another work?
>> As long as the fd is not marked O_CLOEXEC, it stays open in the new process.  Providing
>> the number via command-line, or whatever, is sufficient for the new process to know what
>> is going on.
> 
> Err I guess I was thinking its a bit weird having an option that is
> only really applicable
> if qemu is a child process. I.e the `-execfd` argument is not usable
> from commandline.

qemu-foo -execfd 3 3< /some/file

Or perhaps opened via other tooling.

>> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file
>> descriptor.
> 
> We could also do something along the lines of:
> 
> ```
> fd = openat(dirfd, exe);
> char new_exe[PATH_MAX];
> char fd_path[PATH_MAX];
> sprintf(fd_path, "/proc/self/fd/%d", fd);
> readlink(fd_path, new_exe, PATH_MAX);

Reading the link doesn't always work.
Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored.


r~

Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 3 days ago
On Wed, Nov 6, 2024 at 11:26 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/6/24 17:03, Noah Goldstein wrote:
> > On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson
> > <richard.henderson@linaro.org> wrote:
> >>
> >> On 11/5/24 23:54, Noah Goldstein wrote:
> >>>>> You still need to handle is_proc_myself, for the guest binary.
> >>>
> >>> Would this by handled by basically do:
> >>>
> >>> ```
> >>> if (is_proc_myself(p, "exe")) {
> >>>           exe = exec_path;
> >>>           if (through_qemu)
> >>>               argp[argp_offset] = exec_path;
> >>> }
> >>> ```
> >>> Or am I missing something?
> >>
> >> Something like that, yes.
> >>
> >>>>> I wonder if those two cases are related.  Do we need to also add an argument so that we
> >>>>> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
> >>>>>
> >>>>>        f = openat()
> >>>>>        execv(qemu, "-execfd", f)
> >>>>>
> >>>>> and is_proc_myself uses execfd, which we already have open.
> >>>
> >>> How does passing a fd from one process to another work?
> >> As long as the fd is not marked O_CLOEXEC, it stays open in the new process.  Providing
> >> the number via command-line, or whatever, is sufficient for the new process to know what
> >> is going on.
> >
> > Err I guess I was thinking its a bit weird having an option that is
> > only really applicable
> > if qemu is a child process. I.e the `-execfd` argument is not usable
> > from commandline.
>
> qemu-foo -execfd 3 3< /some/file
>
> Or perhaps opened via other tooling.
>
> >> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file
> >> descriptor.
> >
> > We could also do something along the lines of:
> >
> > ```
> > fd = openat(dirfd, exe);
> > char new_exe[PATH_MAX];
> > char fd_path[PATH_MAX];
> > sprintf(fd_path, "/proc/self/fd/%d", fd);
> > readlink(fd_path, new_exe, PATH_MAX);
>
> Reading the link doesn't always work.
> Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored.

Okay, fair enough, I will get started on adding `-execfd`
>
>
> r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 3 days ago
On Wed, Nov 6, 2024 at 11:53 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Nov 6, 2024 at 11:26 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
> >
> > On 11/6/24 17:03, Noah Goldstein wrote:
> > > On Wed, Nov 6, 2024 at 3:38 AM Richard Henderson
> > > <richard.henderson@linaro.org> wrote:
> > >>
> > >> On 11/5/24 23:54, Noah Goldstein wrote:
> > >>>>> You still need to handle is_proc_myself, for the guest binary.
> > >>>
> > >>> Would this by handled by basically do:
> > >>>
> > >>> ```
> > >>> if (is_proc_myself(p, "exe")) {
> > >>>           exe = exec_path;
> > >>>           if (through_qemu)
> > >>>               argp[argp_offset] = exec_path;
> > >>> }
> > >>> ```
> > >>> Or am I missing something?
> > >>
> > >> Something like that, yes.
> > >>
> > >>>>> I wonder if those two cases are related.  Do we need to also add an argument so that we
> > >>>>> can pass the executable to the next qemu via file descriptor?  I.e. execvat becomes
> > >>>>>
> > >>>>>        f = openat()
> > >>>>>        execv(qemu, "-execfd", f)
> > >>>>>
> > >>>>> and is_proc_myself uses execfd, which we already have open.
> > >>>
> > >>> How does passing a fd from one process to another work?
> > >> As long as the fd is not marked O_CLOEXEC, it stays open in the new process.  Providing
> > >> the number via command-line, or whatever, is sufficient for the new process to know what
> > >> is going on.
> > >
> > > Err I guess I was thinking its a bit weird having an option that is
> > > only really applicable
> > > if qemu is a child process. I.e the `-execfd` argument is not usable
> > > from commandline.
> >
> > qemu-foo -execfd 3 3< /some/file
> >
> > Or perhaps opened via other tooling.
> >
> > >> I now realize this is necessary for the AT_EMPTY_PATH flag, where we only have the file
> > >> descriptor.
> > >
> > > We could also do something along the lines of:
> > >
> > > ```
> > > fd = openat(dirfd, exe);
> > > char new_exe[PATH_MAX];
> > > char fd_path[PATH_MAX];
> > > sprintf(fd_path, "/proc/self/fd/%d", fd);
> > > readlink(fd_path, new_exe, PATH_MAX);
> >
> > Reading the link doesn't always work.
> > Reading or passing the link means AT_SYMLINK_NOFOLLOW isn't honored.
>
> Okay, fair enough, I will get started on adding `-execfd`

Question about impl regarding handling of `-execfd` with/without a program name.

1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.

Do you want to allow both of these? If you want to allow (1), what should
we use for `argv[0]`/`exec_path`. The program pass ("ls") or
`readlink(<some_fd>)`?
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 3 days ago
On 11/6/24 18:13, Noah Goldstein wrote:
> Question about impl regarding handling of `-execfd` with/without a program name.
> 
> 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
> 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.
> 
> Do you want to allow both of these? If you want to allow (1), what should
> we use for `argv[0]`/`exec_path`. The program pass ("ls") or
> `readlink(<some_fd>)`?

The canonical response is, examine the kernel source.
We're not implementing this in a vacuum, you're replicating execveat(2).

I suspect the answer is (1), to be compared with

     syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH);


r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 3 days ago
On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/6/24 18:13, Noah Goldstein wrote:
> > Question about impl regarding handling of `-execfd` with/without a program name.
> >
> > 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
> > 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.
> >
> > Do you want to allow both of these? If you want to allow (1), what should
> > we use for `argv[0]`/`exec_path`. The program pass ("ls") or
> > `readlink(<some_fd>)`?
>
> The canonical response is, examine the kernel source.
> We're not implementing this in a vacuum, you're replicating execveat(2).
>
> I suspect the answer is (1), to be compared with
>
>      syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH);

Err, I think the reference for '-execfd' is `fexecve`:
https://man7.org/linux/man-pages/man3/fexecve.3.html

Which doesn't take a path. So I guess we just interpret the "ls" as
argv[0] but not
as "exec_path".
>
>
> r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 2 days ago
On 11/6/24 21:30, Noah Goldstein wrote:
> On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 11/6/24 18:13, Noah Goldstein wrote:
>>> Question about impl regarding handling of `-execfd` with/without a program name.
>>>
>>> 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
>>> 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.
>>>
>>> Do you want to allow both of these? If you want to allow (1), what should
>>> we use for `argv[0]`/`exec_path`. The program pass ("ls") or
>>> `readlink(<some_fd>)`?
>>
>> The canonical response is, examine the kernel source.
>> We're not implementing this in a vacuum, you're replicating execveat(2).
>>
>> I suspect the answer is (1), to be compared with
>>
>>       syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH);
> 
> Err, I think the reference for '-execfd' is `fexecve`:
> https://man7.org/linux/man-pages/man3/fexecve.3.html

No, fexecve(3) is a glibc function which (nowadays) uses the execveat(2) syscall that we 
want to emulate.

> Which doesn't take a path...

... corresponding to the "" and AT_EMPTY_PATH above.

> So I guess we just interpret the "ls" as argv[0] but not as "exec_path".

But your conclusion is correct.


r~

Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 2 weeks, 3 days ago
On Wed, Nov 6, 2024 at 3:30 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson
> <richard.henderson@linaro.org> wrote:
> >
> > On 11/6/24 18:13, Noah Goldstein wrote:
> > > Question about impl regarding handling of `-execfd` with/without a program name.
> > >
> > > 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
> > > 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.
> > >
> > > Do you want to allow both of these? If you want to allow (1), what should
> > > we use for `argv[0]`/`exec_path`. The program pass ("ls") or
> > > `readlink(<some_fd>)`?
> >
> > The canonical response is, examine the kernel source.
> > We're not implementing this in a vacuum, you're replicating execveat(2).
> >
> > I suspect the answer is (1), to be compared with
> >
> >      syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH);
>
> Err, I think the reference for '-execfd' is `fexecve`:
> https://man7.org/linux/man-pages/man3/fexecve.3.html
>
> Which doesn't take a path. So I guess we just interpret the "ls" as
> argv[0] but not
> as "exec_path".

One more point, what should the behavior be if we have
AT_EXECFD from binfmt-misc?
> >
> >
> > r~
Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Richard Henderson 2 weeks, 2 days ago
On 11/6/24 23:49, Noah Goldstein wrote:
> On Wed, Nov 6, 2024 at 3:30 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>
>> On Wed, Nov 6, 2024 at 3:10 PM Richard Henderson
>> <richard.henderson@linaro.org> wrote:
>>>
>>> On 11/6/24 18:13, Noah Goldstein wrote:
>>>> Question about impl regarding handling of `-execfd` with/without a program name.
>>>>
>>>> 1) `-execfd` + program name ie: `qemu -execfd <some_fd> ls -a`.
>>>> 2) `-execfd` without program name i.e: `qemu -execfd <some_fd> -a`.
>>>>
>>>> Do you want to allow both of these? If you want to allow (1), what should
>>>> we use for `argv[0]`/`exec_path`. The program pass ("ls") or
>>>> `readlink(<some_fd>)`?
>>>
>>> The canonical response is, examine the kernel source.
>>> We're not implementing this in a vacuum, you're replicating execveat(2).
>>>
>>> I suspect the answer is (1), to be compared with
>>>
>>>       syscall(__NR_execveat, some_fd, "", &["ls", "-a"], env, AT_EMPTY_PATH);
>>
>> Err, I think the reference for '-execfd' is `fexecve`:
>> https://man7.org/linux/man-pages/man3/fexecve.3.html
>>
>> Which doesn't take a path. So I guess we just interpret the "ls" as
>> argv[0] but not
>> as "exec_path".
> 
> One more point, what should the behavior be if we have
> AT_EXECFD from binfmt-misc?

You mean precedence of AT_EXECFD vs the command-line option?

Arbitrary, since it would be a usage error to have both.  You'd have to do something silly 
with the binfmt-misc rule for that to happen.

Perhaps

static int execfd = -1;
// option processing
// main

     if (execfd < 0) {
         errno = 0;
         execfd = qemu_getauxval(AT_EXECFD);
         if (errno != 0) {
             execfd = open(...);
         }
     }

just because that's a simple change to what's currently present.


r~

Re: linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 3 weeks, 3 days ago
On Wed, Oct 30, 2024 at 9:10 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The new option '-qemu-children' makes it so that on `execve` the child
> process will be launch by the same `qemu` executable that is currently
> running along with its current commandline arguments.
>
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.
>
> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c                             | 21 ++++++
>  linux-user/syscall.c                          | 21 ++++--
>  linux-user/user-internals.h                   |  4 ++
>  tests/tcg/multiarch/Makefile.target           |  8 +++
>  .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
>  5 files changed, 117 insertions(+), 5 deletions(-)
>  create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..5e3d41dc2b 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char **qemu_argv;
> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>
> @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
>
>      optind = parse_args(argc, argv);
>
> +    if (qemu_dup_for_children) {
> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace * LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 59b2080b98..96b105e9ce 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
>
>      argc = 0;
>
> @@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>          envc++;
>      }
>
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset;
> +         gp; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>      }
>
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    } else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8644,7 +8654,8 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset;
> +         *q; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> index 46ffc093f4..ed3ed666a0 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char **qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
> index 78b83d5575..0e220953e7 100644
> --- a/tests/tcg/multiarch/Makefile.target
> +++ b/tests/tcg/multiarch/Makefile.target
> @@ -30,6 +30,14 @@ run-float_%: float_%
>         $(call conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref)
>
>
> +run-linux-execve-qemu-children: linux-execve-qemu-children
> +       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
> +       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
> +
> +run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children
> +       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
> +       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
> +
>  testthread: LDFLAGS+=-lpthread
>
>  threadcount: LDFLAGS+=-lpthread
> diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
> new file mode 100644
> index 0000000000..60d6537666
> --- /dev/null
> +++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
> @@ -0,0 +1,68 @@
> +#include <assert.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <libgen.h>
> +#include <malloc.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +
> +#define MAX_COMM_SIZE (4096)
> +
> +int
> +main(int argc, char ** argv, char ** envp) {
> +    int          fd;
> +    char         next_arg[2];
> +    char *       buf;
> +    ssize_t      off;
> +    const char * expec_comm;
> +    assert(argc == 3 || argc == 4);
> +    fd = open("/proc/self/comm", O_RDONLY);
> +    assert(fd > 0);
> +
> +    buf = calloc(MAX_COMM_SIZE + 1, 1);
> +    assert(buf != NULL);
> +
> +    off = 0;
> +    for (;;) {
> +        ssize_t res = read(fd, buf + off, 1);
> +        if (res < 0 && errno != EAGAIN) {
> +            perror("Failed to read comm");
> +            return -1;
> +        }
> +        if (res == 0) {
> +            break;
> +        }
> +
> +        off += res;
> +
> +        if (off >= MAX_COMM_SIZE) {
> +            fprintf(stderr, "/proc/self/comm too large for test\n");
> +            return -1;
> +        }
> +    }
> +    assert(off && buf[off] == '\0' && buf[off - 1] == '\n');
> +    buf[off - 1] = '\0';
> +    expec_comm   = basename(argv[1]);
> +    if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) {
> +        fprintf(stderr,
> +                "Didn't propagate qemu settings\nComm:  '%s'\nExpec: '%s'\n",
> +                buf, expec_comm);
> +        return -1;
> +    }
> +    free(buf);
> +    next_arg[0] = argv[2][0];
> +    next_arg[1] = '\0';
> +    if (next_arg[0] == '9') {
> +        return 0;
> +    }
> +    next_arg[0] += 1;
> +    char * next_args[] = { argv[0], argv[1], next_arg, NULL };
> +    int    eres        = execve(argv[0], &next_args[0], envp);
> +    if (eres != 0) {
> +        fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno,
> +                strerror(errno));
> +        return -1;
> +    }
> +    return 0;
> +}
> --
> 2.43.0
>

Added test that tests both old behavior (no propagation of qemu)
and new behavior (propagation of qemu + cmdline).

Tested on Aarch64 + Linux with:
```
make check-tcg
```
[PATCH v2] linux-user: Add option to run `execve`d programs through QEMU
Posted by Noah Goldstein 1 month, 3 weeks ago
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.

The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children.  Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
 linux-user/main.c           | 21 +++++++++++++++++++++
 linux-user/syscall.c        | 21 ++++++++++++++++-----
 linux-user/user-internals.h |  4 ++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..5e3d41dc2b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
 
+bool qemu_dup_for_children;
+int qemu_argc;
+char **qemu_argv;
+
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
  * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
     perf_enable_jitdump();
 }
 
+static void handle_arg_qemu_children(const char *arg)
+{
+    qemu_dup_for_children = true;
+}
+
 static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
 
 #ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
      "",           "Generate a /tmp/perf-${pid}.map file for perf"},
     {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
      "",           "Generate a jit-${pid}.dump file for perf"},
+    {"qemu-children",
+                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
+     "",           "Run child processes (created with execve) with qemu "
+                   "(as instantiated for the parent)"},
     {NULL, NULL, false, NULL, NULL, NULL}
 };
 
@@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
 
     optind = parse_args(argc, argv);
 
+    if (qemu_dup_for_children) {
+        qemu_argc = optind;
+        qemu_argv = g_new0(char *, qemu_argc);
+        for (i = 0; i < optind; ++i) {
+            qemu_argv[i] = strdup(argv[i]);
+        }
+    }
+
     qemu_set_log_filename_flags(last_log_filename,
                                 last_log_mask | (enable_strace * LOG_STRACE),
                                 &error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a666986189..79019c0db0 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8457,13 +8457,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long pathname, abi_long guest_argp,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
-    int ret;
+    int ret, argp_offset;
     char **argp, **envp;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
     char **q;
     void *p;
+    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
 
     argc = 0;
 
@@ -8487,10 +8488,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
         envc++;
     }
 
-    argp = g_new0(char *, argc + 1);
+    argp_offset = through_qemu ? qemu_argc : 0;
+    argp = g_new0(char *, argc + argp_offset + 1);
     envp = g_new0(char *, envc + 1);
 
-    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         gp; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp)) {
             goto execve_efault;
         }
@@ -8535,9 +8538,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
     }
 
     const char *exe = p;
-    if (is_proc_myself(p, "exe")) {
+    if (through_qemu) {
+        int i;
+        for (i = 0; i < argp_offset; ++i) {
+            argp[i] = qemu_argv[i];
+        }
+        exe = qemu_argv[0];
+    } else if (is_proc_myself(p, "exe")) {
         exe = exec_path;
     }
+
     ret = is_execveat
         ? safe_execveat(dirfd, exe, argp, envp, flags)
         : safe_execve(exe, argp, envp);
@@ -8551,7 +8561,8 @@ execve_efault:
     ret = -TARGET_EFAULT;
 
 execve_end:
-    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         *q; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp) || !addr) {
             break;
         }
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 5c7f173ceb..a4615eee54 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char **qemu_argv;
+
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
-- 
2.43.0