This patch introduces the new argument
[,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows
or denies Qemu process to elevate its privileges by blacklisting all
set*uid|gid system calls. The 'children' option will let forks and
execves run unprivileged.
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
---
include/sysemu/seccomp.h | 1 +
qemu-options.hx | 9 ++++++---
qemu-seccomp.c | 29 +++++++++++++++++++++++++++++
vl.c | 22 ++++++++++++++++++++++
4 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 7a7bde246b..e6e78d85ce 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -16,6 +16,7 @@
#define QEMU_SECCOMP_H
#define OBSOLETE 0x0001
+#define PRIVILEGED 0x0010
#include <seccomp.h>
diff --git a/qemu-options.hx b/qemu-options.hx
index 54e492f36a..34d33a812e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4004,17 +4004,20 @@ Old param mode (ARM only).
ETEXI
DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
- "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \
- " obsolete: Allow obsolete system calls\n",
+ "-sandbox on[,obsolete=allow][,elevateprivileges=allow|deny|children] Enable seccomp mode 2 system call filter (default 'off').\n" \
+ " obsolete: Allow obsolete system calls\n"
+ " elevateprivileges: allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. 'children' will deny set*uid|gid system calls for main Qemu process but will allow forks and execves to run unprivileged\n",
QEMU_ARCH_ALL)
STEXI
-@item -sandbox @var{arg}[,obsolete=@var{string}]
+@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
@findex -sandbox
Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
disable it. The default is 'off'.
@table @option
@item obsolete=@var{string}
Enable Obsolete system calls
+@item elevateprivileges=@var{string}
+Disable set*uid|gid systema calls
@end table
ETEXI
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index c6a8b28260..6caa513edd 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -31,6 +31,19 @@ struct QemuSeccompSyscall {
uint8_t priority;
};
+static const struct QemuSeccompSyscall privileged_syscalls[] = {
+ { SCMP_SYS(setuid), 255 },
+ { SCMP_SYS(setgid), 255 },
+ { SCMP_SYS(setpgid), 255 },
+ { SCMP_SYS(setsid), 255 },
+ { SCMP_SYS(setreuid), 255 },
+ { SCMP_SYS(setregid), 255 },
+ { SCMP_SYS(setresuid), 255 },
+ { SCMP_SYS(setresgid), 255 },
+ { SCMP_SYS(setfsuid), 255 },
+ { SCMP_SYS(setfsgid), 255 },
+};
+
static const struct QemuSeccompSyscall obsolete[] = {
{ SCMP_SYS(readdir), 255 },
{ SCMP_SYS(_sysctl), 255 },
@@ -110,6 +123,22 @@ int seccomp_start(uint8_t seccomp_opts)
}
}
+ if (seccomp_opts & PRIVILEGED) {
+ for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) {
+ rc = seccomp_rule_add(ctx, SCMP_ACT_KILL,
+ privileged_syscalls[i].num, 0);
+ if (rc < 0) {
+ goto seccomp_return;
+ }
+ rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num,
+ privileged_syscalls[i].priority);
+ if (rc < 0) {
+ goto seccomp_return;
+ }
+ }
+ }
+
+
rc = seccomp_load(ctx);
seccomp_return:
diff --git a/vl.c b/vl.c
index cbe09c94af..800e2b573d 100644
--- a/vl.c
+++ b/vl.c
@@ -29,6 +29,7 @@
#ifdef CONFIG_SECCOMP
#include "sysemu/seccomp.h"
+#include "sys/prctl.h"
#endif
#if defined(CONFIG_VDE)
@@ -275,6 +276,10 @@ static QemuOptsList qemu_sandbox_opts = {
.name = "obsolete",
.type = QEMU_OPT_STRING,
},
+ {
+ .name = "elevateprivileges",
+ .type = QEMU_OPT_STRING,
+ },
{ /* end of list */ }
},
};
@@ -1046,6 +1051,23 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
}
}
+ value = qemu_opt_get(opts, "elevateprivileges");
+ if (value) {
+ if (strcmp(value, "deny") == 0) {
+ seccomp_opts |= PRIVILEGED;
+ }
+ if (strcmp(value, "children") == 0) {
+ seccomp_opts |= PRIVILEGED;
+
+ /* calling prctl directly because we're
+ * not sure if host has CAP_SYS_ADMIN set*/
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+ error_report("failed to set no_new_privs "
+ "aborting");
+ }
+ }
+ }
+
if (seccomp_start(seccomp_opts) < 0) {
error_report("failed to install seccomp syscall filter "
"in the kernel");
--
2.13.3
On Fri, Jul 28, 2017 at 02:10:37PM +0200, Eduardo Otubo wrote:
> This patch introduces the new argument
> [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows
> or denies Qemu process to elevate its privileges by blacklisting all
> set*uid|gid system calls. The 'children' option will let forks and
> execves run unprivileged.
>
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
> include/sysemu/seccomp.h | 1 +
> qemu-options.hx | 9 ++++++---
> qemu-seccomp.c | 29 +++++++++++++++++++++++++++++
> vl.c | 22 ++++++++++++++++++++++
> 4 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 7a7bde246b..e6e78d85ce 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -16,6 +16,7 @@
> #define QEMU_SECCOMP_H
>
> #define OBSOLETE 0x0001
> +#define PRIVILEGED 0x0010
Err, this is hex, but you seem to be treating it as a binary
string. It would be better expressed as
#define OBSOLETE (1 << 0)
#define PRIVILEGED (1 << 1)
#define .... (1 << 2)
#define .... (1 << 3)
#define .... (1 << 4)
>
> + value = qemu_opt_get(opts, "elevateprivileges");
> + if (value) {
> + if (strcmp(value, "deny") == 0) {
> + seccomp_opts |= PRIVILEGED;
> + }
> + if (strcmp(value, "children") == 0) {
> + seccomp_opts |= PRIVILEGED;
> +
> + /* calling prctl directly because we're
> + * not sure if host has CAP_SYS_ADMIN set*/
> + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
> + error_report("failed to set no_new_privs "
> + "aborting");
> + }
The prctl() really ought to be done in seccomp_start IMHO.
> + }
Also it should report an error for invalid 'value' strings.
> + }
> +
> if (seccomp_start(seccomp_opts) < 0) {
> error_report("failed to install seccomp syscall filter "
> "in the kernel");
> --
> 2.13.3
>
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 28.07.2017 14:10, Eduardo Otubo wrote:
> This patch introduces the new argument
> [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows
> or denies Qemu process to elevate its privileges by blacklisting all
> set*uid|gid system calls. The 'children' option will let forks and
> execves run unprivileged.
>
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
> include/sysemu/seccomp.h | 1 +
> qemu-options.hx | 9 ++++++---
> qemu-seccomp.c | 29 +++++++++++++++++++++++++++++
> vl.c | 22 ++++++++++++++++++++++
> 4 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 7a7bde246b..e6e78d85ce 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -16,6 +16,7 @@
> #define QEMU_SECCOMP_H
>
> #define OBSOLETE 0x0001
> +#define PRIVILEGED 0x0010
>
> #include <seccomp.h>
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 54e492f36a..34d33a812e 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4004,17 +4004,20 @@ Old param mode (ARM only).
> ETEXI
>
> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \
> - " obsolete: Allow obsolete system calls\n",
> + "-sandbox on[,obsolete=allow][,elevateprivileges=allow|deny|children] Enable seccomp mode 2 system call filter (default 'off').\n" \
Most other boolean-like options use "on|off" as possible values ...
maybe it would be nicer to use "on|off" instead of "allow|deny" here, too?
> + " obsolete: Allow obsolete system calls\n"
> + " elevateprivileges: allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. 'children' will deny set*uid|gid system calls for main Qemu process but will allow forks and execves to run unprivileged\n",
Correct spelling is "QEMU" with all capital letters, not "Qemu"
> QEMU_ARCH_ALL)
> STEXI
> -@item -sandbox @var{arg}[,obsolete=@var{string}]
> +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
> @findex -sandbox
> Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> disable it. The default is 'off'.
> @table @option
> @item obsolete=@var{string}
> Enable Obsolete system calls
> +@item elevateprivileges=@var{string}
> +Disable set*uid|gid systema calls
s/systema/system/
Thomas
© 2016 - 2026 Red Hat, Inc.