This patch adds [,spawn=deny] argument to `-sandbox on' option. It
blacklists fork and execve system calls, avoiding Qemu to spawn new
threads or processes.
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
---
include/sysemu/seccomp.h | 1 +
qemu-options.hx | 9 +++++++--
qemu-seccomp.c | 12 ++++++++++++
vl.c | 16 ++++++++++++++++
4 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 4a9e63c7cd..3ab5fc4f61 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -18,6 +18,7 @@
#define QEMU_SECCOMP_SET_DEFAULT (1 << 0)
#define QEMU_SECCOMP_SET_OBSOLETE (1 << 1)
#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2)
+#define QEMU_SECCOMP_SET_SPAWN (1 << 3)
#include <seccomp.h>
diff --git a/qemu-options.hx b/qemu-options.hx
index 5c1b163fb5..2b04b9f170 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4018,6 +4018,7 @@ ETEXI
DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
"-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
+ " [,spawn=allow|deny]\n" \
" Enable seccomp mode 2 system call filter (default 'off').\n" \
" use 'obsolete' to allow obsolete system calls that are provided\n" \
" by the kernel, but typically no longer used by modern\n" \
@@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
" use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
" its privileges by blacklisting all set*uid|gid system calls.\n" \
" The value 'children' will deny set*uid|gid system calls for\n" \
- " main QEMU process but will allow forks and execves to run unprivileged\n",
+ " main QEMU process but will allow forks and execves to run unprivileged\n" \
+ " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
+ " blacklisting *fork and execve\n",
QEMU_ARCH_ALL)
STEXI
-@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
+@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
@findex -sandbox
Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
disable it. The default is 'off'.
@@ -4037,6 +4040,8 @@ disable it. The default is 'off'.
Enable Obsolete system calls
@item elevateprivileges=@var{string}
Disable set*uid|gid system calls
+@item spawn=@var{string}
+Disable *fork and execve
@end table
ETEXI
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 2bad16cafb..4c169febf8 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
{ SCMP_SYS(setresgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
{ SCMP_SYS(setfsuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
{ SCMP_SYS(setfsgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ /* spawn */
+ { SCMP_SYS(fork), 8, QEMU_SECCOMP_SET_SPAWN },
+ { SCMP_SYS(vfork), 8, QEMU_SECCOMP_SET_SPAWN },
+ { SCMP_SYS(execve), 8, QEMU_SECCOMP_SET_SPAWN },
};
@@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
}
break;
+ case QEMU_SECCOMP_SET_SPAWN:
+ if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
+ break;
+ } else {
+ continue;
+ }
+
+ break;
default:
break;
}
diff --git a/vl.c b/vl.c
index 413cfe8504..0af137da17 100644
--- a/vl.c
+++ b/vl.c
@@ -280,6 +280,10 @@ static QemuOptsList qemu_sandbox_opts = {
.name = "elevateprivileges",
.type = QEMU_OPT_STRING,
},
+ {
+ .name = "spawn",
+ .type = QEMU_OPT_STRING,
+ },
{ /* end of list */ }
},
};
@@ -1081,6 +1085,18 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
}
}
+ value = qemu_opt_get(opts, "spawn");
+ if (value) {
+ if (g_str_equal(value, "deny")) {
+ seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+ } else if (g_str_equal(value, "allow")) {
+ /* default value */
+ } else {
+ error_report("invalid argument for spawn");
+ return -1;
+ }
+ }
+
if (seccomp_start(seccomp_opts) < 0) {
error_report("failed to install seccomp syscall filter "
"in the kernel");
--
2.13.5
On Fri, Sep 08, 2017 at 11:10:26AM +0200, Eduardo Otubo wrote: > This patch adds [,spawn=deny] argument to `-sandbox on' option. It > blacklists fork and execve system calls, avoiding Qemu to spawn new > threads or processes. > > Signed-off-by: Eduardo Otubo <otubo@redhat.com> > --- > include/sysemu/seccomp.h | 1 + > qemu-options.hx | 9 +++++++-- > qemu-seccomp.c | 12 ++++++++++++ > vl.c | 16 ++++++++++++++++ > 4 files changed, 36 insertions(+), 2 deletions(-) Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 08.09.2017 11:10, Eduardo Otubo wrote:
> This patch adds [,spawn=deny] argument to `-sandbox on' option. It
> blacklists fork and execve system calls, avoiding Qemu to spawn new
> threads or processes.
>
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
> include/sysemu/seccomp.h | 1 +
> qemu-options.hx | 9 +++++++--
> qemu-seccomp.c | 12 ++++++++++++
> vl.c | 16 ++++++++++++++++
> 4 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 4a9e63c7cd..3ab5fc4f61 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -18,6 +18,7 @@
> #define QEMU_SECCOMP_SET_DEFAULT (1 << 0)
> #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1)
> #define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2)
> +#define QEMU_SECCOMP_SET_SPAWN (1 << 3)
>
> #include <seccomp.h>
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 5c1b163fb5..2b04b9f170 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4018,6 +4018,7 @@ ETEXI
>
> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
> + " [,spawn=allow|deny]\n" \
> " Enable seccomp mode 2 system call filter (default 'off').\n" \
> " use 'obsolete' to allow obsolete system calls that are provided\n" \
> " by the kernel, but typically no longer used by modern\n" \
> @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
> " its privileges by blacklisting all set*uid|gid system calls.\n" \
> " The value 'children' will deny set*uid|gid system calls for\n" \
> - " main QEMU process but will allow forks and execves to run unprivileged\n",
> + " main QEMU process but will allow forks and execves to run unprivileged\n" \
> + " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
> + " blacklisting *fork and execve\n",
> QEMU_ARCH_ALL)
> STEXI
> -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
> +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
> @findex -sandbox
> Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> disable it. The default is 'off'.
> @@ -4037,6 +4040,8 @@ disable it. The default is 'off'.
> Enable Obsolete system calls
> @item elevateprivileges=@var{string}
> Disable set*uid|gid system calls
> +@item spawn=@var{string}
> +Disable *fork and execve
> @end table
> ETEXI
>
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 2bad16cafb..4c169febf8 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
> { SCMP_SYS(setresgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> { SCMP_SYS(setfsuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> { SCMP_SYS(setfsgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> + /* spawn */
> + { SCMP_SYS(fork), 8, QEMU_SECCOMP_SET_SPAWN },
> + { SCMP_SYS(vfork), 8, QEMU_SECCOMP_SET_SPAWN },
> + { SCMP_SYS(execve), 8, QEMU_SECCOMP_SET_SPAWN },
> };
>
>
> @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
> }
>
> break;
> + case QEMU_SECCOMP_SET_SPAWN:
> + if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
> + break;
> + } else {
> + continue;
> + }
> +
Remove the above empty line?
Anyway, it's somewhat ugly that you need a switch-case statement here at
all. Couldn't you simply check it like this:
if (!(seccomp_opts & blacklist[i].set)) {
continue;
}
?
You then just have to invert the meaning of the
QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
treated in the same way as the others (i.e. use
uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
instead of
uint32_t seccomp_opts = 0x00000;
in vl.c in the second patch).
Thomas
On Fri, Sep 08, 2017 at 11:50:12AM +0200, Thomas Huth wrote:
> On 08.09.2017 11:10, Eduardo Otubo wrote:
> > This patch adds [,spawn=deny] argument to `-sandbox on' option. It
> > blacklists fork and execve system calls, avoiding Qemu to spawn new
> > threads or processes.
> >
> > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > ---
> > include/sysemu/seccomp.h | 1 +
> > qemu-options.hx | 9 +++++++--
> > qemu-seccomp.c | 12 ++++++++++++
> > vl.c | 16 ++++++++++++++++
> > 4 files changed, 36 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > index 4a9e63c7cd..3ab5fc4f61 100644
> > --- a/include/sysemu/seccomp.h
> > +++ b/include/sysemu/seccomp.h
> > @@ -18,6 +18,7 @@
> > #define QEMU_SECCOMP_SET_DEFAULT (1 << 0)
> > #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1)
> > #define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2)
> > +#define QEMU_SECCOMP_SET_SPAWN (1 << 3)
> >
> > #include <seccomp.h>
> >
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 5c1b163fb5..2b04b9f170 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -4018,6 +4018,7 @@ ETEXI
> >
> > DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
> > + " [,spawn=allow|deny]\n" \
> > " Enable seccomp mode 2 system call filter (default 'off').\n" \
> > " use 'obsolete' to allow obsolete system calls that are provided\n" \
> > " by the kernel, but typically no longer used by modern\n" \
> > @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
> > " its privileges by blacklisting all set*uid|gid system calls.\n" \
> > " The value 'children' will deny set*uid|gid system calls for\n" \
> > - " main QEMU process but will allow forks and execves to run unprivileged\n",
> > + " main QEMU process but will allow forks and execves to run unprivileged\n" \
> > + " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
> > + " blacklisting *fork and execve\n",
> > QEMU_ARCH_ALL)
> > STEXI
> > -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
> > +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
> > @findex -sandbox
> > Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> > disable it. The default is 'off'.
> > @@ -4037,6 +4040,8 @@ disable it. The default is 'off'.
> > Enable Obsolete system calls
> > @item elevateprivileges=@var{string}
> > Disable set*uid|gid system calls
> > +@item spawn=@var{string}
> > +Disable *fork and execve
> > @end table
> > ETEXI
> >
> > diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> > index 2bad16cafb..4c169febf8 100644
> > --- a/qemu-seccomp.c
> > +++ b/qemu-seccomp.c
> > @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
> > { SCMP_SYS(setresgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> > { SCMP_SYS(setfsuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> > { SCMP_SYS(setfsgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
> > + /* spawn */
> > + { SCMP_SYS(fork), 8, QEMU_SECCOMP_SET_SPAWN },
> > + { SCMP_SYS(vfork), 8, QEMU_SECCOMP_SET_SPAWN },
> > + { SCMP_SYS(execve), 8, QEMU_SECCOMP_SET_SPAWN },
> > };
> >
> >
> > @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
> > }
> >
> > break;
> > + case QEMU_SECCOMP_SET_SPAWN:
> > + if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
> > + break;
> > + } else {
> > + continue;
> > + }
> > +
>
> Remove the above empty line?
>
> Anyway, it's somewhat ugly that you need a switch-case statement here at
> all. Couldn't you simply check it like this:
>
> if (!(seccomp_opts & blacklist[i].set)) {
> continue;
> }
> ?
>
> You then just have to invert the meaning of the
> QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
> treated in the same way as the others (i.e. use
> uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
> instead of
> uint32_t seccomp_opts = 0x00000;
> in vl.c in the second patch).
That's indeed much better, but perhaps:
uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT | QEMU_SECCOMP_SET_OBSOLETE;
?
--
Eduardo Otubo
Senior Software Engineer @ RedHat
On 08.09.2017 13:15, Eduardo Otubo wrote:
> On Fri, Sep 08, 2017 at 11:50:12AM +0200, Thomas Huth wrote:
>> On 08.09.2017 11:10, Eduardo Otubo wrote:
>>> This patch adds [,spawn=deny] argument to `-sandbox on' option. It
>>> blacklists fork and execve system calls, avoiding Qemu to spawn new
>>> threads or processes.
>>>
>>> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
>>> ---
>>> include/sysemu/seccomp.h | 1 +
>>> qemu-options.hx | 9 +++++++--
>>> qemu-seccomp.c | 12 ++++++++++++
>>> vl.c | 16 ++++++++++++++++
>>> 4 files changed, 36 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
>>> index 4a9e63c7cd..3ab5fc4f61 100644
>>> --- a/include/sysemu/seccomp.h
>>> +++ b/include/sysemu/seccomp.h
>>> @@ -18,6 +18,7 @@
>>> #define QEMU_SECCOMP_SET_DEFAULT (1 << 0)
>>> #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1)
>>> #define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2)
>>> +#define QEMU_SECCOMP_SET_SPAWN (1 << 3)
>>>
>>> #include <seccomp.h>
>>>
>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>> index 5c1b163fb5..2b04b9f170 100644
>>> --- a/qemu-options.hx
>>> +++ b/qemu-options.hx
>>> @@ -4018,6 +4018,7 @@ ETEXI
>>>
>>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>> "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
>>> + " [,spawn=allow|deny]\n" \
>>> " Enable seccomp mode 2 system call filter (default 'off').\n" \
>>> " use 'obsolete' to allow obsolete system calls that are provided\n" \
>>> " by the kernel, but typically no longer used by modern\n" \
>>> @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>> " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
>>> " its privileges by blacklisting all set*uid|gid system calls.\n" \
>>> " The value 'children' will deny set*uid|gid system calls for\n" \
>>> - " main QEMU process but will allow forks and execves to run unprivileged\n",
>>> + " main QEMU process but will allow forks and execves to run unprivileged\n" \
>>> + " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
>>> + " blacklisting *fork and execve\n",
>>> QEMU_ARCH_ALL)
>>> STEXI
>>> -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
>>> +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
>>> @findex -sandbox
>>> Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
>>> disable it. The default is 'off'.
>>> @@ -4037,6 +4040,8 @@ disable it. The default is 'off'.
>>> Enable Obsolete system calls
>>> @item elevateprivileges=@var{string}
>>> Disable set*uid|gid system calls
>>> +@item spawn=@var{string}
>>> +Disable *fork and execve
>>> @end table
>>> ETEXI
>>>
>>> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
>>> index 2bad16cafb..4c169febf8 100644
>>> --- a/qemu-seccomp.c
>>> +++ b/qemu-seccomp.c
>>> @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
>>> { SCMP_SYS(setresgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
>>> { SCMP_SYS(setfsuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
>>> { SCMP_SYS(setfsgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
>>> + /* spawn */
>>> + { SCMP_SYS(fork), 8, QEMU_SECCOMP_SET_SPAWN },
>>> + { SCMP_SYS(vfork), 8, QEMU_SECCOMP_SET_SPAWN },
>>> + { SCMP_SYS(execve), 8, QEMU_SECCOMP_SET_SPAWN },
>>> };
>>>
>>>
>>> @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
>>> }
>>>
>>> break;
>>> + case QEMU_SECCOMP_SET_SPAWN:
>>> + if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
>>> + break;
>>> + } else {
>>> + continue;
>>> + }
>>> +
>>
>> Remove the above empty line?
>>
>> Anyway, it's somewhat ugly that you need a switch-case statement here at
>> all. Couldn't you simply check it like this:
>>
>> if (!(seccomp_opts & blacklist[i].set)) {
>> continue;
>> }
>> ?
>>
>> You then just have to invert the meaning of the
>> QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
>> treated in the same way as the others (i.e. use
>> uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
>> instead of
>> uint32_t seccomp_opts = 0x00000;
>> in vl.c in the second patch).
>
> That's indeed much better, but perhaps:
> uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT | QEMU_SECCOMP_SET_OBSOLETE;
Right, the default set should be excluded by default of course, too! :-)
Thomas
© 2016 - 2026 Red Hat, Inc.