[PATCH v2 9/9] qemu: Place helper processes into the same trusted group

Michal Privoznik posted 9 patches 3 years, 7 months ago
There is a newer version of this series
[PATCH v2 9/9] qemu: Place helper processes into the same trusted group
Posted by Michal Privoznik 3 years, 7 months ago
Since the level of trust that QEMU has is the same level of trust
that helper processes have there's no harm in placing all of them
into the same group.

Unfortunately, since these processes are started before QEMU we
can't use brand new virCommand*() APIs (those are used on hotplug
though) and have to use the low level virProcess*() APIs.

Moreover, because there no (kernel) API that would copy cookie
from one process to another WITHOUT modifying the cookie of the
process that's doing the copy, we have to fork() and use
available copy APIs.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---
 src/qemu/qemu_extdevice.c | 120 ++++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_extdevice.h |   3 +
 src/qemu/qemu_process.c   |   4 ++
 3 files changed, 127 insertions(+)

diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c
index b8e3c1000a..41368a9cea 100644
--- a/src/qemu/qemu_extdevice.c
+++ b/src/qemu/qemu_extdevice.c
@@ -335,3 +335,123 @@ qemuExtDevicesSetupCgroup(virQEMUDriver *driver,
 
     return 0;
 }
+
+
+static int
+qemuExtDevicesSetupSchedHelper(pid_t ppid G_GNUC_UNUSED,
+                               void *opaque)
+{
+    GSList *pids = opaque;
+    GSList *next;
+    pid_t vmPid;
+
+    /* The first item on the list is special: it's the PID of the
+     * QEMU that has the cookie we want to copy to the rest. */
+    vmPid = GPOINTER_TO_INT(pids->data);
+    if (virProcessSchedCoreShareFrom(vmPid) < 0) {
+        virReportSystemError(errno,
+                             _("Unable to get core group of: %lld"),
+                             (long long) vmPid);
+        return -1;
+    }
+
+    VIR_DEBUG("SCHED_CORE: vmPid = %lld", (long long) vmPid);
+
+    for (next = pids->next; next; next = next->next) {
+        pid_t pid = GPOINTER_TO_INT(next->data);
+
+        VIR_DEBUG("SCHED_CORE: share to %lld", (long long) pid);
+        if (virProcessSchedCoreShareTo(pid) < 0) {
+            virReportSystemError(errno,
+                                 _("Unable to share core group to: %lld"),
+                                 (long long) pid);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+int
+qemuExtDevicesSetupSched(virQEMUDriver *driver,
+                         virDomainObj *vm)
+{
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+    virDomainDef *def = vm->def;
+    g_autofree char *shortname = NULL;
+    g_autoptr(GSList) pids = NULL;
+    size_t i;
+    pid_t cpid = -1;
+
+    if (cfg->schedCore != QEMU_SCHED_CORE_FULL)
+        return 0;
+
+    shortname = virDomainDefGetShortName(def);
+    if (!shortname)
+        return -1;
+
+    if (qemuDBusGetPID(driver, vm, &cpid) < 0)
+        return -1;
+
+    if (cpid != -1)
+        pids = g_slist_prepend(pids, GINT_TO_POINTER(cpid));
+
+    for (i = 0; i < def->nvideos; i++) {
+        virDomainVideoDef *video = def->videos[i];
+
+        if (video->backend != VIR_DOMAIN_VIDEO_BACKEND_TYPE_VHOSTUSER)
+            continue;
+
+        if (qemuVhostUserGPUGetPid(cfg->stateDir, shortname, video->info.alias, &cpid) < 0)
+            return -1;
+
+        if (cpid != -1)
+            pids = g_slist_prepend(pids, GINT_TO_POINTER(cpid));
+    }
+
+    for (i = 0; i < def->nnets; i++) {
+        virDomainNetDef *net = def->nets[i];
+        qemuSlirp *slirp = QEMU_DOMAIN_NETWORK_PRIVATE(net)->slirp;
+
+        if (slirp && slirp->pid != -1)
+            pids = g_slist_prepend(pids, GINT_TO_POINTER(slirp->pid));
+    }
+
+    for (i = 0; i < def->ntpms; i++) {
+        virDomainTPMDef *tpm = def->tpms[i];
+
+        if (tpm->type != VIR_DOMAIN_TPM_TYPE_EMULATOR)
+            continue;
+
+        if (qemuTPMEmulatorGetPid(cfg->swtpmStateDir, shortname, &cpid) < 0)
+            return -1;
+
+        if (cpid != -1)
+            pids = g_slist_prepend(pids, GINT_TO_POINTER(cpid));
+    }
+
+    for (i = 0; i < def->nfss; i++) {
+        virDomainFSDef *fs = def->fss[i];
+
+        if (fs->sock ||
+            fs->fsdriver != VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS)
+            continue;
+
+        if (qemuVirtioFSGetPid(vm, fs, &cpid) < 0)
+            return -1;
+
+        if (cpid != -1)
+            pids = g_slist_prepend(pids, GINT_TO_POINTER(cpid));
+    }
+
+    /* Exit early if there's nothing to do, to avoid needless fork. */
+    if (!pids)
+        return 0;
+
+    pids = g_slist_prepend(pids, GINT_TO_POINTER(vm->pid));
+
+    /* Unfortunately, there's no better way of copying scheduling
+     * cookies than fork(). */
+    return virProcessRunInFork(qemuExtDevicesSetupSchedHelper, pids);
+}
diff --git a/src/qemu/qemu_extdevice.h b/src/qemu/qemu_extdevice.h
index 43d2a4dfff..02397adc6c 100644
--- a/src/qemu/qemu_extdevice.h
+++ b/src/qemu/qemu_extdevice.h
@@ -59,3 +59,6 @@ bool qemuExtDevicesHasDevice(virDomainDef *def);
 int qemuExtDevicesSetupCgroup(virQEMUDriver *driver,
                               virDomainObj *vm,
                               virCgroup *cgroup);
+
+int qemuExtDevicesSetupSched(virQEMUDriver *driver,
+                             virDomainObj *vm);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 86c058316f..eb8dfb8f11 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -7714,6 +7714,10 @@ qemuProcessLaunch(virConnectPtr conn,
     if (qemuSetupCgroupForExtDevices(vm, driver) < 0)
         goto cleanup;
 
+    VIR_DEBUG("Setting SCHED_CORE for external devices (if required)");
+    if (qemuExtDevicesSetupSched(driver, vm) < 0)
+        goto cleanup;
+
     VIR_DEBUG("Setting up resctrl");
     if (qemuProcessResctrlCreate(driver, vm) < 0)
         goto cleanup;
-- 
2.35.1
Re: [PATCH v2 9/9] qemu: Place helper processes into the same trusted group
Posted by Daniel P. Berrangé 3 years, 7 months ago
On Mon, Jun 27, 2022 at 12:44:41PM +0200, Michal Privoznik wrote:
> Since the level of trust that QEMU has is the same level of trust
> that helper processes have there's no harm in placing all of them
> into the same group.
> 
> Unfortunately, since these processes are started before QEMU we
> can't use brand new virCommand*() APIs (those are used on hotplug
> though) and have to use the low level virProcess*() APIs.
> 
> Moreover, because there no (kernel) API that would copy cookie
> from one process to another WITHOUT modifying the cookie of the
> process that's doing the copy, we have to fork() and use
> available copy APIs.
> 
> Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
> ---
>  src/qemu/qemu_extdevice.c | 120 ++++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_extdevice.h |   3 +
>  src/qemu/qemu_process.c   |   4 ++
>  3 files changed, 127 insertions(+)
> 
> diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c
> index b8e3c1000a..41368a9cea 100644
> --- a/src/qemu/qemu_extdevice.c
> +++ b/src/qemu/qemu_extdevice.c
> @@ -335,3 +335,123 @@ qemuExtDevicesSetupCgroup(virQEMUDriver *driver,
>  
>      return 0;
>  }
> +
> +
> +static int
> +qemuExtDevicesSetupSchedHelper(pid_t ppid G_GNUC_UNUSED,
> +                               void *opaque)
> +{
> +    GSList *pids = opaque;
> +    GSList *next;
> +    pid_t vmPid;
> +
> +    /* The first item on the list is special: it's the PID of the
> +     * QEMU that has the cookie we want to copy to the rest. */
> +    vmPid = GPOINTER_TO_INT(pids->data);
> +    if (virProcessSchedCoreShareFrom(vmPid) < 0) {
> +        virReportSystemError(errno,
> +                             _("Unable to get core group of: %lld"),
> +                             (long long) vmPid);
> +        return -1;
> +    }
> +
> +    VIR_DEBUG("SCHED_CORE: vmPid = %lld", (long long) vmPid);
> +
> +    for (next = pids->next; next; next = next->next) {
> +        pid_t pid = GPOINTER_TO_INT(next->data);
> +
> +        VIR_DEBUG("SCHED_CORE: share to %lld", (long long) pid);
> +        if (virProcessSchedCoreShareTo(pid) < 0) {
> +            virReportSystemError(errno,
> +                                 _("Unable to share core group to: %lld"),
> +                                 (long long) pid);
> +            return -1;
> +        }
> +    }

The helper processes can have many threads, but this virProcessSchedCoreShareTo
call only sets scheduling cookie for a single thread.

It would need to use SCOPE_THREAD_GROUP, except even that is not sufficient
as the helper may have fork+exec'd another helper by this point, and our
call will only affect the first process.

IOW, to set core scheduling cookies on the helpers, we need to set them
upfront at the time we spawn the helper.

IOW, during startup, IIUC, we need to fork  a dummy process solely to
call PR_SCHED_CORE_CREATE. Then when forking anything else, whether a
helper, or QEMU itself, we need to pull the cookie from that dummy
process, and then finally kill that dummy process.

If hotplugging a device, we won't need the dummy process, we can pull
the cookie from the running QEMU.


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH v2 9/9] qemu: Place helper processes into the same trusted group
Posted by Michal Prívozník 3 years, 6 months ago
On 7/13/22 18:25, Daniel P. Berrangé wrote:
> On Mon, Jun 27, 2022 at 12:44:41PM +0200, Michal Privoznik wrote:
>> Since the level of trust that QEMU has is the same level of trust
>> that helper processes have there's no harm in placing all of them
>> into the same group.
>>
>> Unfortunately, since these processes are started before QEMU we
>> can't use brand new virCommand*() APIs (those are used on hotplug
>> though) and have to use the low level virProcess*() APIs.
>>
>> Moreover, because there no (kernel) API that would copy cookie
>> from one process to another WITHOUT modifying the cookie of the
>> process that's doing the copy, we have to fork() and use
>> available copy APIs.
>>
>> Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
>> ---
>>  src/qemu/qemu_extdevice.c | 120 ++++++++++++++++++++++++++++++++++++++
>>  src/qemu/qemu_extdevice.h |   3 +
>>  src/qemu/qemu_process.c   |   4 ++
>>  3 files changed, 127 insertions(+)
>>
>> diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c
>> index b8e3c1000a..41368a9cea 100644
>> --- a/src/qemu/qemu_extdevice.c
>> +++ b/src/qemu/qemu_extdevice.c
>> @@ -335,3 +335,123 @@ qemuExtDevicesSetupCgroup(virQEMUDriver *driver,
>>  
>>      return 0;
>>  }
>> +
>> +
>> +static int
>> +qemuExtDevicesSetupSchedHelper(pid_t ppid G_GNUC_UNUSED,
>> +                               void *opaque)
>> +{
>> +    GSList *pids = opaque;
>> +    GSList *next;
>> +    pid_t vmPid;
>> +
>> +    /* The first item on the list is special: it's the PID of the
>> +     * QEMU that has the cookie we want to copy to the rest. */
>> +    vmPid = GPOINTER_TO_INT(pids->data);
>> +    if (virProcessSchedCoreShareFrom(vmPid) < 0) {
>> +        virReportSystemError(errno,
>> +                             _("Unable to get core group of: %lld"),
>> +                             (long long) vmPid);
>> +        return -1;
>> +    }
>> +
>> +    VIR_DEBUG("SCHED_CORE: vmPid = %lld", (long long) vmPid);
>> +
>> +    for (next = pids->next; next; next = next->next) {
>> +        pid_t pid = GPOINTER_TO_INT(next->data);
>> +
>> +        VIR_DEBUG("SCHED_CORE: share to %lld", (long long) pid);
>> +        if (virProcessSchedCoreShareTo(pid) < 0) {
>> +            virReportSystemError(errno,
>> +                                 _("Unable to share core group to: %lld"),
>> +                                 (long long) pid);
>> +            return -1;
>> +        }
>> +    }
> 
> The helper processes can have many threads, but this virProcessSchedCoreShareTo
> call only sets scheduling cookie for a single thread.
> 
> It would need to use SCOPE_THREAD_GROUP, except even that is not sufficient
> as the helper may have fork+exec'd another helper by this point, and our
> call will only affect the first process.
> 
> IOW, to set core scheduling cookies on the helpers, we need to set them
> upfront at the time we spawn the helper.
> 
> IOW, during startup, IIUC, we need to fork  a dummy process solely to
> call PR_SCHED_CORE_CREATE. Then when forking anything else, whether a
> helper, or QEMU itself, we need to pull the cookie from that dummy
> process, and then finally kill that dummy process.
> 
> If hotplugging a device, we won't need the dummy process, we can pull
> the cookie from the running QEMU.

Yeah. I've missed this fact. That will need some rework though. Let me
do that in v3.

Michal

Re: [PATCH v2 9/9] qemu: Place helper processes into the same trusted group
Posted by Dario Faggioli 3 years, 6 months ago
On Wed, 2022-07-13 at 17:25 +0100, Daniel P. Berrangé wrote:
> It would need to use SCOPE_THREAD_GROUP, except even that is not
> sufficient
> as the helper may have fork+exec'd another helper by this point, and
> our
> call will only affect the first process.
> 
> IOW, to set core scheduling cookies on the helpers, we need to set
> them
> upfront at the time we spawn the helper.
> 
> IOW, during startup, IIUC, we need to fork  a dummy process solely to
> call PR_SCHED_CORE_CREATE. Then when forking anything else, whether a
> helper, or QEMU itself, we need to pull the cookie from that dummy
> process, and then finally kill that dummy process.
> 
Yes. Not pretty, but if forks are involved, that's the solution, I
think.

An alternative would be to create/pull the cookie in the main process
(the one that forks all the helpers), then do fork all helpers, and
then "clear" the cookie for it.

But I guess that would be even worse...

Regards
-- 
Dario Faggioli, Ph.D
http://about.me/dario.faggioli
Virtualization Software Engineer
SUSE Labs, SUSE https://www.suse.com/
-------------------------------------------------------------------
<<This happens because _I_ choose it to happen!>> (Raistlin Majere)