From nobody Mon Feb 9 01:00:56 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) client-ip=209.132.183.28; envelope-from=libvir-list-bounces@redhat.com; helo=mx1.redhat.com; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) smtp.mailfrom=libvir-list-bounces@redhat.com; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1569514894; cv=none; d=zoho.com; s=zohoarc; b=KgV64Ge2LcCc8AocR4XjqCBvX7e3kFfZq2ntyzN7uh0jnZ5GnBg3Hl1jrSGZi84trGcHe+TuZ/GFhiHMGGOq6KUDsRrgfmvngqa5Y2IefKDyoc0eZpRAOQ2MdLyAbirMwCkhelshozR86Zgf1gBtLZs6SxJuXIT/FkRQS20GWIA= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zoho.com; s=zohoarc; t=1569514894; h=Content-Type:Content-Transfer-Encoding:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To:ARC-Authentication-Results; bh=6YWPDjZbg51QWM0a1m7osD/b9hFGUvviciUdCdcfa0A=; b=Wh+gn/qlvr3iB83ui2Mepv4owOQPxE+BKpwbrgWFfgAU+FlDu7VCz0wOZva2/ohd6wd2wCb4pXbhzk4Jcpj8xcynOOE+jLfIpIh821hsr6sPiZv5Vx22u0n4lE/r12UVUrczX9DAmL+vJJ/xLIAjhuol741O8EKch+UelePsWBs= ARC-Authentication-Results: i=1; mx.zoho.com; spf=pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) smtp.mailfrom=libvir-list-bounces@redhat.com; dmarc=pass header.from= (p=none dis=none) header.from= Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by mx.zohomail.com with SMTPS id 156951489403040.21004498943546; Thu, 26 Sep 2019 09:21:34 -0700 (PDT) Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 488A03175296; Thu, 26 Sep 2019 16:21:32 +0000 (UTC) Received: from colo-mx.corp.redhat.com (colo-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.20]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 215885D9C3; Thu, 26 Sep 2019 16:21:32 +0000 (UTC) Received: from lists01.pubmisc.prod.ext.phx2.redhat.com (lists01.pubmisc.prod.ext.phx2.redhat.com [10.5.19.33]) by colo-mx.corp.redhat.com (Postfix) with ESMTP id CDFAE1803517; Thu, 26 Sep 2019 16:21:31 +0000 (UTC) Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) by lists01.pubmisc.prod.ext.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id x8QGEiIE003791 for ; Thu, 26 Sep 2019 12:14:44 -0400 Received: by smtp.corp.redhat.com (Postfix) id BCEB05D9C3; Thu, 26 Sep 2019 16:14:44 +0000 (UTC) Received: from moe.brq.redhat.com (unknown [10.43.2.30]) by smtp.corp.redhat.com (Postfix) with ESMTP id 46B105D9D5 for ; Thu, 26 Sep 2019 16:14:40 +0000 (UTC) From: Michal Privoznik To: libvir-list@redhat.com Date: Thu, 26 Sep 2019 18:12:26 +0200 Message-Id: In-Reply-To: References: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.14 X-loop: libvir-list@redhat.com Subject: [libvirt] [PATCH v2 30/39] qemu: Allow NVMe disk in CGroups X-BeenThere: libvir-list@redhat.com X-Mailman-Version: 2.1.12 Precedence: junk List-Id: Development discussions about the libvirt library & tools List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Transfer-Encoding: quoted-printable Sender: libvir-list-bounces@redhat.com Errors-To: libvir-list-bounces@redhat.com X-Scanned-By: MIMEDefang 2.79 on 10.5.11.14 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.49]); Thu, 26 Sep 2019 16:21:33 +0000 (UTC) Content-Type: text/plain; charset="utf-8" If a domain has an NVMe disk configured, then we need to allow it on devices CGroup so that qemu can access it. There is one caveat though - if an NVMe disk is read only we need CGroup to allow write too. This is because when opening the device, qemu does couple of ioctl()-s which are considered as write. Signed-off-by: Michal Privoznik --- src/qemu/qemu_cgroup.c | 101 +++++++++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 29 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 9684bf3662..b9fb0ebca2 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -119,10 +119,30 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm, virStorageSourcePtr src, bool forceReadonly) { - if (!src->path || !virStorageSourceIsLocalStorage(src)) { - VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", - NULLSTR(src->path), virStorageTypeToString(src->type)); - return 0; + VIR_AUTOFREE(char *) path =3D NULL; + bool readonly =3D src->readonly || forceReadonly; + + if (src->type =3D=3D VIR_STORAGE_TYPE_NVME) { + /* Even though disk is R/O we can't make it so in + * CGroups. QEMU will try to do some ioctl()-s over the + * device and such operations are considered R/W by the + * kernel */ + readonly =3D false; + + if (!(path =3D virPCIDeviceAddressGetIOMMUGroupDev(&src->nvme->pci= Addr))) + return -1; + + if (qemuSetupImagePathCgroup(vm, QEMU_DEV_VFIO, false) < 0) + return -1; + } else { + if (!src->path || !virStorageSourceIsLocalStorage(src)) { + VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", + NULLSTR(src->path), virStorageTypeToString(src->type= )); + return 0; + } + + if (VIR_STRDUP(path, src->path) < 0) + return -1; } =20 if (virStoragePRDefIsManaged(src->pr) && @@ -130,7 +150,7 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm, qemuSetupImagePathCgroup(vm, QEMU_DEVICE_MAPPER_CONTROL_PATH, fals= e) < 0) return -1; =20 - return qemuSetupImagePathCgroup(vm, src->path, src->readonly || forceR= eadonly); + return qemuSetupImagePathCgroup(vm, path, readonly); } =20 =20 @@ -147,7 +167,10 @@ qemuTeardownImageCgroup(virDomainObjPtr vm, virStorageSourcePtr src) { qemuDomainObjPrivatePtr priv =3D vm->privateData; + VIR_AUTOFREE(char *path) =3D NULL; int perms =3D VIR_CGROUP_DEVICE_RWM; + bool hasPR =3D false; + bool hasNVMe =3D false; size_t i; int ret; =20 @@ -155,41 +178,61 @@ qemuTeardownImageCgroup(virDomainObjPtr vm, VIR_CGROUP_CONTROLLER_DEVICES)) return 0; =20 - if (!src->path || !virStorageSourceIsLocalStorage(src)) { - VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", - NULLSTR(src->path), virStorageTypeToString(src->type)); - return 0; + for (i =3D 0; i < vm->def->ndisks; i++) { + virStorageSourcePtr diskSrc =3D vm->def->disks[i]->src; + + if (src =3D=3D diskSrc) + continue; + + if (virStoragePRDefIsManaged(diskSrc->pr)) + hasPR =3D true; + + if (virStorageSourceChainHasNVMe(diskSrc)) + hasNVMe =3D true; } =20 - if (virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) { - for (i =3D 0; i < vm->def->ndisks; i++) { - virStorageSourcePtr diskSrc =3D vm->def->disks[i]->src; + if (src->type =3D=3D VIR_STORAGE_TYPE_NVME) { + if (!(path =3D virPCIDeviceAddressGetIOMMUGroupDev(&src->nvme->pci= Addr))) + return -1; =20 - if (src =3D=3D diskSrc) - continue; - - if (virStoragePRDefIsManaged(diskSrc->pr)) - break; - } - - if (i =3D=3D vm->def->ndisks) { - VIR_DEBUG("Disabling device mapper control"); - ret =3D virCgroupDenyDevicePath(priv->cgroup, - QEMU_DEVICE_MAPPER_CONTROL_PATH, - perms, true); + if (!hasNVMe && + !qemuDomainNeedsVFIO(vm->def)) { + ret =3D virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, p= erms, true); virDomainAuditCgroupPath(vm, priv->cgroup, "deny", - QEMU_DEVICE_MAPPER_CONTROL_PATH, + QEMU_DEV_VFIO, virCgroupGetDevicePermsString(perms),= ret); if (ret < 0) - return ret; + return -1; } + } else { + if (!src->path || !virStorageSourceIsLocalStorage(src)) { + VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s", + NULLSTR(src->path), virStorageTypeToString(src->type= )); + return 0; + } + + if (VIR_STRDUP(path, src->path) < 0) + return -1; + } + + if (!hasPR && + virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) { + VIR_DEBUG("Disabling device mapper control"); + ret =3D virCgroupDenyDevicePath(priv->cgroup, + QEMU_DEVICE_MAPPER_CONTROL_PATH, + perms, true); + virDomainAuditCgroupPath(vm, priv->cgroup, "deny", + QEMU_DEVICE_MAPPER_CONTROL_PATH, + virCgroupGetDevicePermsString(perms), ret= ); + if (ret < 0) + return ret; } =20 - VIR_DEBUG("Deny path %s", src->path); + VIR_DEBUG("Deny path %s", path); =20 - ret =3D virCgroupDenyDevicePath(priv->cgroup, src->path, perms, true); + ret =3D virCgroupDenyDevicePath(priv->cgroup, path, perms, true); =20 - virDomainAuditCgroupPath(vm, priv->cgroup, "deny", src->path, + virDomainAuditCgroupPath(vm, priv->cgroup, "deny", path, virCgroupGetDevicePermsString(perms), ret); =20 /* If you're looking for a counter part to --=20 2.21.0 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list