When using "-seccomp on", the seccomp policy is only applied to the
main thread, the vcpu worker thread and other worker threads created
after seccomp policy is applied; the seccomp policy is not applied to
e.g. the RCU thread because it is created before the seccomp policy is
applied and SECCOMP_FILTER_FLAG_TSYNC isn't used.
This can be verified with
for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done
Seccomp: 2
Seccomp: 0
Seccomp: 0
Seccomp: 2
Seccomp: 2
Seccomp: 2
Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use
seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy
on all threads.
Do it by default if possible, warn if not possible. Add an option to
set the tsync behaviour explicitly.
Note: we can't bump libseccomp to 2.2.0 since it's not available in
Debian oldstable (2.1.0).
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
qemu-seccomp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
qemu-options.hx | 2 ++
2 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index f0c833f3ca..aa23eae970 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -119,6 +119,45 @@ qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
#endif
}
+static bool qemu_seccomp_syscall_check(void)
+{
+ int rc;
+
+ /*
+ * this is an invalid call because the second argument is non-zero, but
+ * depending on the errno value of ENOSYS or EINVAL we can guess if the
+ * seccomp() syscal is supported or not
+ */
+ rc = qemu_seccomp(SECCOMP_SET_MODE_STRICT, 1, NULL);
+ if (rc < 0 && errno == EINVAL) {
+ return true;
+ }
+
+ return false;
+}
+
+static bool qemu_seccomp_get_default_tsync(void)
+{
+ bool tsync = true;
+
+ /* TSYNC support was added with the syscall */
+ if (!qemu_seccomp_syscall_check()) {
+ error_report("The host kernel doesn't support seccomp TSYNC!");
+ tsync = false;
+ }
+
+#if !(SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2)
+ error_report("libseccomp is too old to support TSYNC!");
+ tsync = false;
+#endif
+
+ if (!tsync) {
+ error_report("Only the main thread will be filtered by seccomp!");
+ }
+
+ return tsync;
+}
+
static uint32_t qemu_seccomp_get_kill_action(void)
{
#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \
@@ -136,7 +175,7 @@ static uint32_t qemu_seccomp_get_kill_action(void)
}
-static int seccomp_start(uint32_t seccomp_opts)
+static int seccomp_start(uint32_t seccomp_opts, bool tsync)
{
int rc = 0;
unsigned int i = 0;
@@ -149,6 +188,17 @@ static int seccomp_start(uint32_t seccomp_opts)
goto seccomp_return;
}
+ if (tsync) {
+#if SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2
+ rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1);
+#else
+ rc = -1;
+#endif
+ if (rc != 0) {
+ goto seccomp_return;
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
if (!(seccomp_opts & blacklist[i].set)) {
continue;
@@ -175,6 +225,13 @@ int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
| QEMU_SECCOMP_SET_OBSOLETE;
const char *value = NULL;
+ bool tsync;
+
+ if (qemu_opt_get(opts, "tsync")) {
+ tsync = qemu_opt_get_bool(opts, "tsync", true);
+ } else {
+ tsync = qemu_seccomp_get_default_tsync();
+ }
value = qemu_opt_get(opts, "obsolete");
if (value) {
@@ -236,7 +293,7 @@ int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
}
}
- if (seccomp_start(seccomp_opts) < 0) {
+ if (seccomp_start(seccomp_opts, tsync) < 0) {
error_report("failed to install seccomp syscall filter "
"in the kernel");
return -1;
@@ -271,6 +328,10 @@ static QemuOptsList qemu_sandbox_opts = {
.name = "resourcecontrol",
.type = QEMU_OPT_STRING,
},
+ {
+ .name = "tsync",
+ .type = QEMU_OPT_BOOL,
+ },
{ /* end of list */ }
},
};
diff --git a/qemu-options.hx b/qemu-options.hx
index 5515dfaba5..dafacb60c6 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
Disable *fork and execve
@item resourcecontrol=@var{string}
Disable process affinity and schedular priority
+@item tsync=@var{bool}
+Apply seccomp filter to all threads (default is auto, and will warn if fail)
@end table
ETEXI
--
2.18.0.547.g1d89318c48
On Wed, Aug 22, 2018 at 04:29:56PM +0200, Marc-André Lureau wrote:
> When using "-seccomp on", the seccomp policy is only applied to the
> main thread, the vcpu worker thread and other worker threads created
> after seccomp policy is applied; the seccomp policy is not applied to
> e.g. the RCU thread because it is created before the seccomp policy is
> applied and SECCOMP_FILTER_FLAG_TSYNC isn't used.
>
> This can be verified with
> for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done
> Seccomp: 2
> Seccomp: 0
> Seccomp: 0
> Seccomp: 2
> Seccomp: 2
> Seccomp: 2
>
> Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use
> seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy
> on all threads.
>
> Do it by default if possible, warn if not possible. Add an option to
> set the tsync behaviour explicitly.
>
> Note: we can't bump libseccomp to 2.2.0 since it's not available in
> Debian oldstable (2.1.0).
>
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
> qemu-seccomp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
> qemu-options.hx | 2 ++
> 2 files changed, 65 insertions(+), 2 deletions(-)
>
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index f0c833f3ca..aa23eae970 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -119,6 +119,45 @@ qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
> #endif
> }
>
> +static bool qemu_seccomp_syscall_check(void)
> +{
> + int rc;
> +
> + /*
> + * this is an invalid call because the second argument is non-zero, but
> + * depending on the errno value of ENOSYS or EINVAL we can guess if the
> + * seccomp() syscal is supported or not
> + */
> + rc = qemu_seccomp(SECCOMP_SET_MODE_STRICT, 1, NULL);
> + if (rc < 0 && errno == EINVAL) {
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static bool qemu_seccomp_get_default_tsync(void)
> +{
> + bool tsync = true;
> +
> + /* TSYNC support was added with the syscall */
> + if (!qemu_seccomp_syscall_check()) {
> + error_report("The host kernel doesn't support seccomp TSYNC!");
> + tsync = false;
> + }
> +
> +#if !(SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2)
> + error_report("libseccomp is too old to support TSYNC!");
> + tsync = false;
> +#endif
> +
> + if (!tsync) {
> + error_report("Only the main thread will be filtered by seccomp!");
At this point you might as well not bother using seccomp at all. The
thread that is confined merely needs to scribble something into the
stack of the unconfined thread and now it can do whatever it wants.
IMHO we need to find a way to get the policy to apply to those other
threads.
The RCU thread is tricky as it is spawned from a __constructor__
function, which means it'll be active way before we setup seccomp.
I think we need to figure out a way todo synchronization between
the RCU thread and the seccomp setup code. Could we have a global
variable 'int seccomp_initialized' that we check from the RCU
thread loop - when that toggles to non-zero, the RCU thread can
then call into the seccomp_start() method to activate policy in
its thread. We'd need a synchronous feedback mechansim back to
the main thread, as it must block startup until all the threads
have activated the seccomp filter.
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 5515dfaba5..dafacb60c6 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
> Disable *fork and execve
> @item resourcecontrol=@var{string}
> Disable process affinity and schedular priority
> +@item tsync=@var{bool}
> +Apply seccomp filter to all threads (default is auto, and will warn if fail)
IMHO this should never exist, as setting "tsync" to anything other
than "yes", is akin to just running without any sandbox.
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Wed, Aug 22, 2018 at 5:46 PM, Daniel P. Berrangé <berrange@redhat.com> wrote:
> On Wed, Aug 22, 2018 at 04:29:56PM +0200, Marc-André Lureau wrote:
>> When using "-seccomp on", the seccomp policy is only applied to the
>> main thread, the vcpu worker thread and other worker threads created
>> after seccomp policy is applied; the seccomp policy is not applied to
>> e.g. the RCU thread because it is created before the seccomp policy is
>> applied and SECCOMP_FILTER_FLAG_TSYNC isn't used.
>>
>> This can be verified with
>> for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done
>> Seccomp: 2
>> Seccomp: 0
>> Seccomp: 0
>> Seccomp: 2
>> Seccomp: 2
>> Seccomp: 2
>>
>> Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use
>> seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy
>> on all threads.
>>
>> Do it by default if possible, warn if not possible. Add an option to
>> set the tsync behaviour explicitly.
>>
>> Note: we can't bump libseccomp to 2.2.0 since it's not available in
>> Debian oldstable (2.1.0).
>>
>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>> ---
>> qemu-seccomp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
>> qemu-options.hx | 2 ++
>> 2 files changed, 65 insertions(+), 2 deletions(-)
>>
>> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
>> index f0c833f3ca..aa23eae970 100644
>> --- a/qemu-seccomp.c
>> +++ b/qemu-seccomp.c
>> @@ -119,6 +119,45 @@ qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
>> #endif
>> }
>>
>> +static bool qemu_seccomp_syscall_check(void)
>> +{
>> + int rc;
>> +
>> + /*
>> + * this is an invalid call because the second argument is non-zero, but
>> + * depending on the errno value of ENOSYS or EINVAL we can guess if the
>> + * seccomp() syscal is supported or not
>> + */
>> + rc = qemu_seccomp(SECCOMP_SET_MODE_STRICT, 1, NULL);
>> + if (rc < 0 && errno == EINVAL) {
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +static bool qemu_seccomp_get_default_tsync(void)
>> +{
>> + bool tsync = true;
>> +
>> + /* TSYNC support was added with the syscall */
>> + if (!qemu_seccomp_syscall_check()) {
>> + error_report("The host kernel doesn't support seccomp TSYNC!");
>> + tsync = false;
>> + }
>> +
>> +#if !(SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2)
>> + error_report("libseccomp is too old to support TSYNC!");
>> + tsync = false;
>> +#endif
>> +
>> + if (!tsync) {
>> + error_report("Only the main thread will be filtered by seccomp!");
>
> At this point you might as well not bother using seccomp at all. The
> thread that is confined merely needs to scribble something into the
> stack of the unconfined thread and now it can do whatever it wants.
Actually, that message is incorrect, it should rather be "not all
threads will be filtered" (as described in commit message).
> IMHO we need to find a way to get the policy to apply to those other
> threads.
That's what the patch is about ;)
> The RCU thread is tricky as it is spawned from a __constructor__
> function, which means it'll be active way before we setup seccomp.
>
> I think we need to figure out a way todo synchronization between
> the RCU thread and the seccomp setup code. Could we have a global
> variable 'int seccomp_initialized' that we check from the RCU
> thread loop - when that toggles to non-zero, the RCU thread can
> then call into the seccomp_start() method to activate policy in
> its thread. We'd need a synchronous feedback mechansim back to
> the main thread, as it must block startup until all the threads
> have activated the seccomp filter.
That's a bit like TSYNC, except we do it ourself with RCU thread. But
what about other threads? For examples one that could be created by
external libraries (like mesa)
>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 5515dfaba5..dafacb60c6 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
>> Disable *fork and execve
>> @item resourcecontrol=@var{string}
>> Disable process affinity and schedular priority
>> +@item tsync=@var{bool}
>> +Apply seccomp filter to all threads (default is auto, and will warn if fail)
>
> IMHO this should never exist, as setting "tsync" to anything other
> than "yes", is akin to just running without any sandbox.
Then we should just fail -sandbox on those systems.
>
> Regards,
> Daniel
> --
> |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o- https://fstop138.berrange.com :|
> |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Wed, Aug 22, 2018 at 05:58:46PM +0200, Marc-André Lureau wrote:
> On Wed, Aug 22, 2018 at 5:46 PM, Daniel P. Berrangé <berrange@redhat.com> wrote:
> > On Wed, Aug 22, 2018 at 04:29:56PM +0200, Marc-André Lureau wrote:
> >
> > At this point you might as well not bother using seccomp at all. The
> > thread that is confined merely needs to scribble something into the
> > stack of the unconfined thread and now it can do whatever it wants.
>
> Actually, that message is incorrect, it should rather be "not all
> threads will be filtered" (as described in commit message).
>
> > IMHO we need to find a way to get the policy to apply to those other
> > threads.
>
> That's what the patch is about ;)
It only does it in some scenarios, leaving other unfixed. We need
a solution (or choice of multiple solutions) that works all the time
>
> > The RCU thread is tricky as it is spawned from a __constructor__
> > function, which means it'll be active way before we setup seccomp.
> >
> > I think we need to figure out a way todo synchronization between
> > the RCU thread and the seccomp setup code. Could we have a global
> > variable 'int seccomp_initialized' that we check from the RCU
> > thread loop - when that toggles to non-zero, the RCU thread can
> > then call into the seccomp_start() method to activate policy in
> > its thread. We'd need a synchronous feedback mechansim back to
> > the main thread, as it must block startup until all the threads
> > have activated the seccomp filter.
>
> That's a bit like TSYNC, except we do it ourself with RCU thread. But
> what about other threads? For examples one that could be created by
> external libraries (like mesa)
Does mesa create threads from library constructors too, or somewhere
else *before* we do -seccomp setup ?
> >> diff --git a/qemu-options.hx b/qemu-options.hx
> >> index 5515dfaba5..dafacb60c6 100644
> >> --- a/qemu-options.hx
> >> +++ b/qemu-options.hx
> >> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
> >> Disable *fork and execve
> >> @item resourcecontrol=@var{string}
> >> Disable process affinity and schedular priority
> >> +@item tsync=@var{bool}
> >> +Apply seccomp filter to all threads (default is auto, and will warn if fail)
> >
> > IMHO this should never exist, as setting "tsync" to anything other
> > than "yes", is akin to just running without any sandbox.
>
> Then we should just fail -sandbox on those systems.
We would have to make libvirt probe for tsync support too, because it
now unconditionally uses -sandbox for new enough QEMU.
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Hi
On Wed, Aug 22, 2018 at 6:08 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Wed, Aug 22, 2018 at 05:58:46PM +0200, Marc-André Lureau wrote:
> > On Wed, Aug 22, 2018 at 5:46 PM, Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > On Wed, Aug 22, 2018 at 04:29:56PM +0200, Marc-André Lureau wrote:
> > >
> > > At this point you might as well not bother using seccomp at all. The
> > > thread that is confined merely needs to scribble something into the
> > > stack of the unconfined thread and now it can do whatever it wants.
> >
> > Actually, that message is incorrect, it should rather be "not all
> > threads will be filtered" (as described in commit message).
> >
> > > IMHO we need to find a way to get the policy to apply to those other
> > > threads.
> >
> > That's what the patch is about ;)
>
> It only does it in some scenarios, leaving other unfixed. We need
> a solution (or choice of multiple solutions) that works all the time
>
> >
> > > The RCU thread is tricky as it is spawned from a __constructor__
> > > function, which means it'll be active way before we setup seccomp.
> > >
> > > I think we need to figure out a way todo synchronization between
> > > the RCU thread and the seccomp setup code. Could we have a global
> > > variable 'int seccomp_initialized' that we check from the RCU
> > > thread loop - when that toggles to non-zero, the RCU thread can
> > > then call into the seccomp_start() method to activate policy in
> > > its thread. We'd need a synchronous feedback mechansim back to
> > > the main thread, as it must block startup until all the threads
> > > have activated the seccomp filter.
> >
> > That's a bit like TSYNC, except we do it ourself with RCU thread. But
> > what about other threads? For examples one that could be created by
> > external libraries (like mesa)
>
> Does mesa create threads from library constructors too, or somewhere
> else *before* we do -seccomp setup ?
That was an example, I don't think mesa creates threads before
-seccomp. But what about the other 100 dependencies, or if we
introduce other threads without the seccomp sync by mistake? I think
we are better off using tsync.
>
> > >> diff --git a/qemu-options.hx b/qemu-options.hx
> > >> index 5515dfaba5..dafacb60c6 100644
> > >> --- a/qemu-options.hx
> > >> +++ b/qemu-options.hx
> > >> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
> > >> Disable *fork and execve
> > >> @item resourcecontrol=@var{string}
> > >> Disable process affinity and schedular priority
> > >> +@item tsync=@var{bool}
> > >> +Apply seccomp filter to all threads (default is auto, and will warn if fail)
> > >
> > > IMHO this should never exist, as setting "tsync" to anything other
> > > than "yes", is akin to just running without any sandbox.
> >
> > Then we should just fail -sandbox on those systems.
>
> We would have to make libvirt probe for tsync support too, because it
> now unconditionally uses -sandbox for new enough QEMU.
sigh :( that's where the -sandbox tsync option could have been helpful
keeping the compatibility.
--
Marc-André Lureau
Hi On Wed, Aug 22, 2018 at 6:37 PM Marc-André Lureau <marcandre.lureau@gmail.com> wrote: > > Hi > > On Wed, Aug 22, 2018 at 6:08 PM Daniel P. Berrangé <berrange@redhat.com> wrote: > > We would have to make libvirt probe for tsync support too, because it > > now unconditionally uses -sandbox for new enough QEMU. > > sigh :( that's where the -sandbox tsync option could have been helpful > keeping the compatibility. So what can libvirt do if tsync is not available? -- Marc-André Lureau
On Wed, Aug 22, 2018 at 06:39:52PM +0200, Marc-André Lureau wrote: > Hi > > On Wed, Aug 22, 2018 at 6:37 PM Marc-André Lureau > <marcandre.lureau@gmail.com> wrote: > > > > Hi > > > > On Wed, Aug 22, 2018 at 6:08 PM Daniel P. Berrangé <berrange@redhat.com> wrote: > > > We would have to make libvirt probe for tsync support too, because it > > > now unconditionally uses -sandbox for new enough QEMU. > > > > sigh :( that's where the -sandbox tsync option could have been helpful > > keeping the compatibility. > > So what can libvirt do if tsync is not available? It depends how libvirt is configured. If /etc/libvirt/qemu.conf has seccomp=1, then we'd blindly start QEMU and expect QEMU to fail because -sandbox can't be usefully enforced. If qemu.conf has "seccomp" unset, then we'd simply not use -sandbox flag for any guests. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Wed, Aug 22, 2018 at 06:37:56PM +0200, Marc-André Lureau wrote: > Hi > > On Wed, Aug 22, 2018 at 6:08 PM Daniel P. Berrangé <berrange@redhat.com> wrote: > > > > On Wed, Aug 22, 2018 at 05:58:46PM +0200, Marc-André Lureau wrote: > > > On Wed, Aug 22, 2018 at 5:46 PM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > > > On Wed, Aug 22, 2018 at 04:29:56PM +0200, Marc-André Lureau wrote: > > > > > > > > At this point you might as well not bother using seccomp at all. The > > > > thread that is confined merely needs to scribble something into the > > > > stack of the unconfined thread and now it can do whatever it wants. > > > > > > Actually, that message is incorrect, it should rather be "not all > > > threads will be filtered" (as described in commit message). > > > > > > > IMHO we need to find a way to get the policy to apply to those other > > > > threads. > > > > > > That's what the patch is about ;) > > > > It only does it in some scenarios, leaving other unfixed. We need > > a solution (or choice of multiple solutions) that works all the time > > > > > > > > > The RCU thread is tricky as it is spawned from a __constructor__ > > > > function, which means it'll be active way before we setup seccomp. > > > > > > > > I think we need to figure out a way todo synchronization between > > > > the RCU thread and the seccomp setup code. Could we have a global > > > > variable 'int seccomp_initialized' that we check from the RCU > > > > thread loop - when that toggles to non-zero, the RCU thread can > > > > then call into the seccomp_start() method to activate policy in > > > > its thread. We'd need a synchronous feedback mechansim back to > > > > the main thread, as it must block startup until all the threads > > > > have activated the seccomp filter. > > > > > > That's a bit like TSYNC, except we do it ourself with RCU thread. But > > > what about other threads? For examples one that could be created by > > > external libraries (like mesa) > > > > Does mesa create threads from library constructors too, or somewhere > > else *before* we do -seccomp setup ? > > That was an example, I don't think mesa creates threads before > -seccomp. But what about the other 100 dependencies, or if we > introduce other threads without the seccomp sync by mistake? I think > we are better off using tsync. Yeah we would have to actively check whether any unexpected threads existed or not. > > > > IMHO this should never exist, as setting "tsync" to anything other > > > > than "yes", is akin to just running without any sandbox. > > > > > > Then we should just fail -sandbox on those systems. > > > > We would have to make libvirt probe for tsync support too, because it > > now unconditionally uses -sandbox for new enough QEMU. > > sigh :( that's where the -sandbox tsync option could have been helpful > keeping the compatibility. Probably if a distro knows they have a kernel which doesn't support it, then should just biuld QEMU with seccomp disabled, at which point the -sandbox arg stops being reported and libvirt "does the right thing" IOW, most people probably won't hit the runtime check. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 08/22/2018 10:58 AM, Marc-André Lureau wrote:
>> At this point you might as well not bother using seccomp at all. The
>> thread that is confined merely needs to scribble something into the
>> stack of the unconfined thread and now it can do whatever it wants.
>
> Actually, that message is incorrect, it should rather be "not all
> threads will be filtered" (as described in commit message).
>
>> IMHO we need to find a way to get the policy to apply to those other
>> threads.
>
> That's what the patch is about ;)
In other words, this patch is patching the gaping security hole that
already exists, but...
>>> +++ b/qemu-options.hx
>>> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
>>> Disable *fork and execve
>>> @item resourcecontrol=@var{string}
>>> Disable process affinity and schedular priority
>>> +@item tsync=@var{bool}
>>> +Apply seccomp filter to all threads (default is auto, and will warn if fail)
>>
>> IMHO this should never exist, as setting "tsync" to anything other
>> than "yes", is akin to just running without any sandbox.
>
> Then we should just fail -sandbox on those systems.
...leaving the backdoor open. Yes, we should instead fix things to hard
fail when -sandbox cannot fully protect the process, rather than adding
a tsync=off backdoor to permit execution in spite of the insecurity.
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3266
Virtualization: qemu.org | libvirt.org
Hi
On Wed, Aug 22, 2018 at 6:07 PM, Eric Blake <eblake@redhat.com> wrote:
> On 08/22/2018 10:58 AM, Marc-André Lureau wrote:
>
>>> At this point you might as well not bother using seccomp at all. The
>>> thread that is confined merely needs to scribble something into the
>>> stack of the unconfined thread and now it can do whatever it wants.
>>
>>
>> Actually, that message is incorrect, it should rather be "not all
>> threads will be filtered" (as described in commit message).
>>
>>> IMHO we need to find a way to get the policy to apply to those other
>>> threads.
>>
>>
>> That's what the patch is about ;)
>
>
> In other words, this patch is patching the gaping security hole that already
> exists, but...
>
>>>> +++ b/qemu-options.hx
>>>> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
>>>> Disable *fork and execve
>>>> @item resourcecontrol=@var{string}
>>>> Disable process affinity and schedular priority
>>>> +@item tsync=@var{bool}
>>>> +Apply seccomp filter to all threads (default is auto, and will warn if
>>>> fail)
>>>
>>>
>>> IMHO this should never exist, as setting "tsync" to anything other
>>> than "yes", is akin to just running without any sandbox.
>>
>>
>> Then we should just fail -sandbox on those systems.
>
>
> ...leaving the backdoor open. Yes, we should instead fix things to hard
> fail when -sandbox cannot fully protect the process, rather than adding a
> tsync=off backdoor to permit execution in spite of the insecurity.
Ok, -sandbox will now require libseccomp 2.2.0 (not available in
Debian oldstable - so it will fail at configure time) and kernel >=
3.17 (error during start). If that sounds ok, I'll update the series.
On Wed, Aug 22, 2018 at 06:19:16PM +0200, Marc-André Lureau wrote:
> Hi
>
> On Wed, Aug 22, 2018 at 6:07 PM, Eric Blake <eblake@redhat.com> wrote:
> > On 08/22/2018 10:58 AM, Marc-André Lureau wrote:
> >
> >>> At this point you might as well not bother using seccomp at all. The
> >>> thread that is confined merely needs to scribble something into the
> >>> stack of the unconfined thread and now it can do whatever it wants.
> >>
> >>
> >> Actually, that message is incorrect, it should rather be "not all
> >> threads will be filtered" (as described in commit message).
> >>
> >>> IMHO we need to find a way to get the policy to apply to those other
> >>> threads.
> >>
> >>
> >> That's what the patch is about ;)
> >
> >
> > In other words, this patch is patching the gaping security hole that already
> > exists, but...
> >
> >>>> +++ b/qemu-options.hx
> >>>> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
> >>>> Disable *fork and execve
> >>>> @item resourcecontrol=@var{string}
> >>>> Disable process affinity and schedular priority
> >>>> +@item tsync=@var{bool}
> >>>> +Apply seccomp filter to all threads (default is auto, and will warn if
> >>>> fail)
> >>>
> >>>
> >>> IMHO this should never exist, as setting "tsync" to anything other
> >>> than "yes", is akin to just running without any sandbox.
> >>
> >>
> >> Then we should just fail -sandbox on those systems.
> >
> >
> > ...leaving the backdoor open. Yes, we should instead fix things to hard
> > fail when -sandbox cannot fully protect the process, rather than adding a
> > tsync=off backdoor to permit execution in spite of the insecurity.
>
> Ok, -sandbox will now require libseccomp 2.2.0 (not available in
> Debian oldstable - so it will fail at configure time) and kernel >=
> 3.17 (error during start). If that sounds ok, I'll update the series.
Hmm, that will cause seccomp to be unusable for RHEL-7, prior to
the 7.5 kernel, which has complications wrt libvirt using -sandbox
by default.
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Wed, Aug 22, 2018 at 05:43:36PM +0100, Daniel P. Berrangé wrote:
> On Wed, Aug 22, 2018 at 06:19:16PM +0200, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Aug 22, 2018 at 6:07 PM, Eric Blake <eblake@redhat.com> wrote:
> > > On 08/22/2018 10:58 AM, Marc-André Lureau wrote:
> > >
> > >>> At this point you might as well not bother using seccomp at all. The
> > >>> thread that is confined merely needs to scribble something into the
> > >>> stack of the unconfined thread and now it can do whatever it wants.
> > >>
> > >>
> > >> Actually, that message is incorrect, it should rather be "not all
> > >> threads will be filtered" (as described in commit message).
> > >>
> > >>> IMHO we need to find a way to get the policy to apply to those other
> > >>> threads.
> > >>
> > >>
> > >> That's what the patch is about ;)
> > >
> > >
> > > In other words, this patch is patching the gaping security hole that already
> > > exists, but...
> > >
> > >>>> +++ b/qemu-options.hx
> > >>>> @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls
> > >>>> Disable *fork and execve
> > >>>> @item resourcecontrol=@var{string}
> > >>>> Disable process affinity and schedular priority
> > >>>> +@item tsync=@var{bool}
> > >>>> +Apply seccomp filter to all threads (default is auto, and will warn if
> > >>>> fail)
> > >>>
> > >>>
> > >>> IMHO this should never exist, as setting "tsync" to anything other
> > >>> than "yes", is akin to just running without any sandbox.
> > >>
> > >>
> > >> Then we should just fail -sandbox on those systems.
> > >
> > >
> > > ...leaving the backdoor open. Yes, we should instead fix things to hard
> > > fail when -sandbox cannot fully protect the process, rather than adding a
> > > tsync=off backdoor to permit execution in spite of the insecurity.
> >
> > Ok, -sandbox will now require libseccomp 2.2.0 (not available in
> > Debian oldstable - so it will fail at configure time) and kernel >=
> > 3.17 (error during start). If that sounds ok, I'll update the series.
>
> Hmm, that will cause seccomp to be unusable for RHEL-7, prior to
> the 7.5 kernel, which has complications wrt libvirt using -sandbox
> by default.
None the less, lets just do what you propose here.
Distros without kernel support should explicitly disable seccomp in QEMU
at build time, then libvirt will "do the right thing".
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
© 2016 - 2025 Red Hat, Inc.