From nobody Mon Nov 25 07:35:29 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=movementarian.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1717000165554821.577320709854; Wed, 29 May 2024 09:29:25 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1sCM6v-00019A-PU; Wed, 29 May 2024 12:25:09 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6q-00016D-NJ for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:05 -0400 Received: from ssh.movementarian.org ([139.162.205.133] helo=movementarian.org) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM6l-0006Kq-8F for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:04 -0400 Received: from movement by movementarian.org with local (Exim 4.95) (envelope-from ) id 1sCM6j-006CPl-H9; Wed, 29 May 2024 17:24:57 +0100 From: John Levon To: qemu-devel@nongnu.org Cc: alex.williamson@redhat.com, clg@redhat.com, jag.raman@oracle.com, thanos.makatos@nutanix.com, John Johnson , Elena Ufimtseva , John Levon Subject: [PATCH 15/26] vfio-user: get region info Date: Wed, 29 May 2024 17:23:08 +0100 Message-Id: <20240529162319.1476680-16-levon@movementarian.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240529162319.1476680-1-levon@movementarian.org> References: <20240529162319.1476680-1-levon@movementarian.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=139.162.205.133; envelope-from=movement@movementarian.org; helo=movementarian.org X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_PASS=-0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZM-MESSAGEID: 1717000167442100005 Content-Type: text/plain; charset="utf-8" From: Jagannathan Raman Add per-region FD to support mmap() of remote device regions Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon --- hw/vfio/ap.c | 2 ++ hw/vfio/ccw.c | 2 ++ hw/vfio/container.c | 7 ++++ hw/vfio/helpers.c | 26 ++++++++++++-- hw/vfio/pci.c | 2 ++ hw/vfio/platform.c | 5 +++ hw/vfio/trace-events | 1 + hw/vfio/user-pci.c | 2 ++ hw/vfio/user-protocol.h | 14 ++++++++ hw/vfio/user.c | 68 +++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio-common.h | 6 +++- 11 files changed, 132 insertions(+), 3 deletions(-) diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 23d700e67a..2736ad03d9 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -162,6 +162,8 @@ static void vfio_ap_realize(DeviceState *dev, Error **e= rrp) return; } =20 + vbasedev->use_regfds =3D false; + if (!vfio_attach_device(vbasedev->name, vbasedev, &address_space_memory, errp)) { goto error; diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index b4139c8aef..34cba01a68 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -587,6 +587,8 @@ static void vfio_ccw_realize(DeviceState *dev, Error **= errp) return; } =20 + vbasedev->use_regfds =3D false; + if (!vfio_attach_device(cdev->mdevid, vbasedev, &address_space_memory, errp)) { goto out_attach_dev_err; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 4ff09e277f..7e1a1a010a 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -895,10 +895,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) int i; =20 for (i =3D 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds !=3D NULL && vbasedev->regfds[i] !=3D -1)= { + close(vbasedev->regfds[i]); + } g_free(vbasedev->regions[i]); } g_free(vbasedev->regions); vbasedev->regions =3D NULL; + if (vbasedev->regfds !=3D NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds =3D NULL; + } } =20 if (!vbasedev->group) { diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 2cd8fbe70c..d0f1db30da 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -363,6 +363,12 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasede= v, VFIORegion *region, region->size =3D info->size; region->fd_offset =3D info->offset; region->nr =3D index; + if (vbasedev->regfds !=3D NULL) { + region->fd =3D vbasedev->regfds[index]; + } else { + region->fd =3D vbasedev->fd; + } + =20 if (region->size) { region->mem =3D g_new0(MemoryRegion, 1); @@ -539,12 +545,16 @@ int vfio_get_region_info(VFIODevice *vbasedev, int in= dex, struct vfio_region_info **info) { size_t argsz =3D sizeof(struct vfio_region_info); + int fd =3D -1; int ret; =20 /* create region cache */ if (vbasedev->regions =3D=3D NULL) { vbasedev->regions =3D g_new0(struct vfio_region_info *, vbasedev->num_regions); + if (vbasedev->use_regfds) { + vbasedev->regfds =3D g_new0(int, vbasedev->num_regions); + } } /* check cache */ if (vbasedev->regions[index] !=3D NULL) { @@ -558,22 +568,33 @@ int vfio_get_region_info(VFIODevice *vbasedev, int in= dex, retry: (*info)->argsz =3D argsz; =20 - ret =3D vbasedev->io->get_region_info(vbasedev, *info); + ret =3D vbasedev->io->get_region_info(vbasedev, *info, &fd); if (ret !=3D 0) { g_free(*info); *info =3D NULL; + if (vbasedev->regfds !=3D NULL) { + vbasedev->regfds[index] =3D -1; + } + return -errno; } =20 if ((*info)->argsz > argsz) { argsz =3D (*info)->argsz; *info =3D g_realloc(*info, argsz); + if (fd !=3D -1) { + close(fd); + fd =3D -1; + } =20 goto retry; } =20 /* fill cache */ vbasedev->regions[index] =3D *info; + if (vbasedev->regfds !=3D NULL) { + vbasedev->regfds[index] =3D fd; + } =20 return 0; } @@ -695,10 +716,11 @@ static int vfio_io_device_feature(VFIODevice *vbasede= v, } =20 static int vfio_io_get_region_info(VFIODevice *vbasedev, - struct vfio_region_info *info) + struct vfio_region_info *info, int *fd) { int ret; =20 + *fd =3D -1; ret =3D ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); =20 return ret < 0 ? -errno : ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 2e334c0c38..50a22b6986 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3054,6 +3054,8 @@ static void vfio_realize(PCIDevice *pdev, Error **err= p) name =3D g_strdup(vbasedev->name); } =20 + vbasedev->use_regfds =3D false; + if (!vfio_attach_device(name, vbasedev, pci_device_iommu_address_space(pdev), errp)) { goto error; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 86ecd97fde..7885ccbb97 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -575,6 +575,11 @@ static void vfio_platform_realize(DeviceState *dev, Er= ror **errp) VFIODevice *vbasedev =3D &vdev->vbasedev; int i; =20 + vbasedev->type =3D VFIO_DEVICE_TYPE_PLATFORM; + vbasedev->dev =3D dev; + vbasedev->ops =3D &vfio_platform_ops; + vbasedev->use_regfds =3D false; + qemu_mutex_init(&vdev->intp_mutex); =20 trace_vfio_platform_realize(vbasedev->sysfsdev ? diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 0f2e338194..3bfb625ee3 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -184,3 +184,4 @@ vfio_user_recv_request(uint16_t cmd) " command 0x%x" vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x" vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " majo= r %d minor %d caps: %s" vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs = %d" +vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) "= index %d flags 0x%x size 0x%"PRIx64 diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c index fe98048aad..3c2d145812 100644 --- a/hw/vfio/user-pci.c +++ b/hw/vfio/user-pci.c @@ -112,6 +112,8 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Erro= r **errp) vbasedev->ops =3D &vfio_user_pci_ops; vbasedev->type =3D VFIO_DEVICE_TYPE_PCI; vbasedev->dev =3D DEVICE(vdev); + vbasedev->io =3D &vfio_dev_io_sock; + vbasedev->use_regfds =3D true; =20 as =3D pci_device_iommu_address_space(pdev); if (!vfio_attach_device_by_iommu_type(TYPE_VFIO_IOMMU_USER, diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 5f9ef1768f..6f70a48905 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -125,4 +125,18 @@ typedef struct { uint32_t num_irqs; } VFIOUserDeviceInfo; =20 +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 645b927f97..e9a89df82a 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -1122,3 +1122,71 @@ int vfio_user_get_info(VFIOUserProxy *proxy, struct = vfio_device_info *info) =20 return 0; } + +static int vfio_user_get_region_info(VFIOUserProxy *proxy, + struct vfio_region_info *info, + VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp =3D NULL; + uint32_t size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -E2BIG; + } + if (fds !=3D NULL && fds->send_fds !=3D 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size =3D info->argsz + sizeof(VFIOUserHdr); + msgp =3D g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz =3D info->argsz; + msgp->index =3D info->index; + + vfio_user_send_wait(proxy, &msgp->hdr, fds, size); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); + + memcpy(info, &msgp->argsz, info->argsz); + return 0; +} + + +/* + * Socket-based io_ops + */ + +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + int ret; + VFIOUserFDs fds =3D { 0, 1, fd}; + + ret =3D vfio_user_get_region_info(vbasedev->proxy, info, &fds); + if (ret) { + return ret; + } + + if (info->index > vbasedev->num_regions) { + return -EINVAL; + } + /* cap_offset in valid area */ + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && + (info->cap_offset < sizeof(*info) || info->cap_offset > info->args= z)) { + return -EINVAL; + } + + return 0; +} + +VFIODeviceIO vfio_dev_io_sock =3D { + .get_region_info =3D vfio_user_io_get_region_info, +}; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 6bbe0218e9..85c3fec3b9 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -57,6 +57,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int fd; /* fd to mmap() region */ } VFIORegion; =20 typedef struct VFIOMigration { @@ -126,6 +127,7 @@ typedef struct VFIODevice { bool ram_block_discard_allowed; OnOffAuto enable_migration; bool migration_events; + bool use_regfds; VFIODeviceOps *ops; VFIODeviceIO *io; unsigned int num_irqs; @@ -140,6 +142,7 @@ typedef struct VFIODevice { IOMMUFDBackend *iommufd; VFIOUserProxy *proxy; struct vfio_region_info **regions; + int *regfds; } VFIODevice; =20 struct VFIODeviceOps { @@ -184,7 +187,7 @@ struct VFIODeviceOps { struct VFIODeviceIO { int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t s= ize, @@ -194,6 +197,7 @@ struct VFIODeviceIO { }; =20 extern VFIODeviceIO vfio_dev_io_ioctl; +extern VFIODeviceIO vfio_dev_io_sock; =20 #endif /* CONFIG_LINUX */ =20 --=20 2.34.1