This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.
Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.
Add helper functions vfio_device_set_fd() and vfio_device_get_name()
to set fd and get device name, they will also be used by other vfio
devices.
There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.
There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v6: simplify CONFIG_IOMMUFD checking code
introduce a helper vfio_device_set_fd
include/hw/vfio/vfio-common.h | 3 +++
hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
hw/vfio/iommufd.c | 12 ++++++----
hw/vfio/pci.c | 28 ++++++++++++----------
4 files changed, 71 insertions(+), 16 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..567e5f7bea 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
hwaddr size);
int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
uint64_t size, ram_addr_t ram_addr);
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..986ef1095a 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@
*/
#include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <sys/ioctl.h>
#include "hw/vfio/vfio-common.h"
@@ -27,6 +28,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "monitor/monitor.h"
/*
* Common VFIO interrupt disable
@@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
return ret;
}
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+ struct stat st;
+
+ if (vbasedev->fd < 0) {
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
+ error_setg_errno(errp, errno, "no such host device");
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ return -errno;
+ }
+ /* User may specify a name, e.g: VFIO platform device */
+ if (!vbasedev->name) {
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+ }
+ } else {
+ if (!vbasedev->iommufd) {
+ error_setg(errp, "Use FD passing only with iommufd backend");
+ return -EINVAL;
+ }
+ /*
+ * Give a name with fd so any function printing out vbasedev->name
+ * will not break.
+ */
+ if (!vbasedev->name) {
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ }
+ }
+
+ return 0;
+}
+
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ int fd = monitor_fd_param(monitor_cur(), str, errp);
+
+ if (fd < 0) {
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
+ return;
+ }
+ vbasedev->fd = fd;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 3eec428162..e08a217057 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
uint32_t ioas_id;
Error *err = NULL;
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
- if (devfd < 0) {
- return devfd;
+ if (vbasedev->fd < 0) {
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+ if (devfd < 0) {
+ return devfd;
+ }
+ vbasedev->fd = devfd;
+ } else {
+ devfd = vbasedev->fd;
}
- vbasedev->fd = devfd;
ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c5984b0598..b23b492cce 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIODevice *vbasedev = &vdev->vbasedev;
char *tmp, *subsys;
Error *err = NULL;
- struct stat st;
int i, ret;
bool is_mdev;
char uuid[UUID_STR_LEN];
char *name;
- if (!vbasedev->sysfsdev) {
+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
error_setg(errp, "No provided host device");
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+#ifdef CONFIG_IOMMUFD
+ "or -device vfio-pci,fd=DEVICE_FD "
+#endif
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
return;
}
@@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vdev->host.slot, vdev->host.function);
}
- if (stat(vbasedev->sysfsdev, &st) < 0) {
- error_setg_errno(errp, errno, "no such host device");
- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ if (vfio_device_get_name(vbasedev, errp)) {
return;
}
-
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
vbasedev->ops = &vfio_pci_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
@@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj)
vdev->host.bus = ~0U;
vdev->host.slot = ~0U;
vdev->host.function = ~0U;
+ vdev->vbasedev.fd = -1;
vdev->nv_gpudirect_clique = 0xFF;
@@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = {
qdev_prop_nv_gpudirect_clique, uint8_t),
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
OFF_AUTOPCIBAR_OFF),
- /*
- * TODO - support passed fds... is this necessary?
- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
- */
#ifdef CONFIG_IOMMUFD
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
+}
+#endif
+
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
dc->reset = vfio_pci_reset;
device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
--
2.34.1
Hi Zhenzhong, On 14/11/23 11:09, Zhenzhong Duan wrote: > This gives management tools like libvirt a chance to open the vfio > cdev with privilege and pass FD to qemu. This way qemu never needs > to have privilege to open a VFIO or iommu cdev node. > > Together with the earlier support of pre-opening /dev/iommu device, > now we have full support of passing a vfio device to unprivileged > qemu by management tool. This mode is no more considered for the > legacy backend. So let's remove the "TODO" comment. > > Add helper functions vfio_device_set_fd() and vfio_device_get_name() > to set fd and get device name, they will also be used by other vfio > devices. > > There is no easy way to check if a device is mdev with FD passing, > so fail the x-balloon-allowed check unconditionally in this case. > > There is also no easy way to get BDF as name with FD passing, so > we fake a name by VFIO_FD[fd]. > > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > v6: simplify CONFIG_IOMMUFD checking code > introduce a helper vfio_device_set_fd > > include/hw/vfio/vfio-common.h | 3 +++ > hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++ > hw/vfio/iommufd.c | 12 ++++++---- > hw/vfio/pci.c | 28 ++++++++++++---------- > 4 files changed, 71 insertions(+), 16 deletions(-) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 3dac5c167e..567e5f7bea 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, > hwaddr size); > int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, > uint64_t size, ram_addr_t ram_addr); > + Please add bare documentation: /* Returns 0 on success, or a negative errno. */ > +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); Functions taking an Error** param should return a boolean, so: /* Return: true on success, else false setting @errp with error. */ > +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); > #endif /* HW_VFIO_VFIO_COMMON_H */ > @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) > > return ret; > } > + > +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) > +{ > + struct stat st; > + > + if (vbasedev->fd < 0) { > + if (stat(vbasedev->sysfsdev, &st) < 0) { > + error_setg_errno(errp, errno, "no such host device"); > + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); > + return -errno; > + } > + /* User may specify a name, e.g: VFIO platform device */ > + if (!vbasedev->name) { > + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); > + } > + } else { > + if (!vbasedev->iommufd) { > + error_setg(errp, "Use FD passing only with iommufd backend"); > + return -EINVAL; > + } > + /* > + * Give a name with fd so any function printing out vbasedev->name > + * will not break. > + */ > + if (!vbasedev->name) { > + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); > + } > + } > + > + return 0; > +} > + > +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) bool vfio_device_set_fd(..., Error **errp) > +{ > + int fd = monitor_fd_param(monitor_cur(), str, errp); > + > + if (fd < 0) { > + error_prepend(errp, "Could not parse remote object fd %s:", str); > + return; return false; > + } > + vbasedev->fd = fd; return true; > +} > diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c > index 3eec428162..e08a217057 100644 > --- a/hw/vfio/iommufd.c > +++ b/hw/vfio/iommufd.c > @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, > uint32_t ioas_id; > Error *err = NULL; > > - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); > - if (devfd < 0) { > - return devfd; > + if (vbasedev->fd < 0) { > + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); > + if (devfd < 0) { > + return devfd; > + } > + vbasedev->fd = devfd; > + } else { > + devfd = vbasedev->fd; > } > - vbasedev->fd = devfd; > > ret = iommufd_cdev_connect_and_bind(vbasedev, errp); > if (ret) { > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index c5984b0598..b23b492cce 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > VFIODevice *vbasedev = &vdev->vbasedev; > char *tmp, *subsys; > Error *err = NULL; > - struct stat st; > int i, ret; > bool is_mdev; > char uuid[UUID_STR_LEN]; > char *name; > > - if (!vbasedev->sysfsdev) { > + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { > if (!(~vdev->host.domain || ~vdev->host.bus || > ~vdev->host.slot || ~vdev->host.function)) { > error_setg(errp, "No provided host device"); > error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " > +#ifdef CONFIG_IOMMUFD > + "or -device vfio-pci,fd=DEVICE_FD " > +#endif > "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); > return; > } > @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > vdev->host.slot, vdev->host.function); > } > > - if (stat(vbasedev->sysfsdev, &st) < 0) { > - error_setg_errno(errp, errno, "no such host device"); > - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); > + if (vfio_device_get_name(vbasedev, errp)) { Clearer as: if (vfio_device_get_name(vbasedev, errp) < 0) { > return; > } Regards, Phil.
On 11/15/23 13:09, Philippe Mathieu-Daudé wrote: > Hi Zhenzhong, > > On 14/11/23 11:09, Zhenzhong Duan wrote: >> This gives management tools like libvirt a chance to open the vfio >> cdev with privilege and pass FD to qemu. This way qemu never needs >> to have privilege to open a VFIO or iommu cdev node. >> >> Together with the earlier support of pre-opening /dev/iommu device, >> now we have full support of passing a vfio device to unprivileged >> qemu by management tool. This mode is no more considered for the >> legacy backend. So let's remove the "TODO" comment. >> >> Add helper functions vfio_device_set_fd() and vfio_device_get_name() >> to set fd and get device name, they will also be used by other vfio >> devices. >> >> There is no easy way to check if a device is mdev with FD passing, >> so fail the x-balloon-allowed check unconditionally in this case. >> >> There is also no easy way to get BDF as name with FD passing, so >> we fake a name by VFIO_FD[fd]. >> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> v6: simplify CONFIG_IOMMUFD checking code >> introduce a helper vfio_device_set_fd >> >> include/hw/vfio/vfio-common.h | 3 +++ >> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++ >> hw/vfio/iommufd.c | 12 ++++++---- >> hw/vfio/pci.c | 28 ++++++++++++---------- >> 4 files changed, 71 insertions(+), 16 deletions(-) >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 3dac5c167e..567e5f7bea 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, >> hwaddr size); >> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, >> uint64_t size, ram_addr_t ram_addr); >> + > > Please add bare documentation: > > /* Returns 0 on success, or a negative errno. */ > >> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); > > Functions taking an Error** param should return a boolean, so: > > /* Return: true on success, else false setting @errp with error. */ > >> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); >> #endif /* HW_VFIO_VFIO_COMMON_H */ > > >> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) >> return ret; >> } >> + >> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) >> +{ >> + struct stat st; >> + >> + if (vbasedev->fd < 0) { >> + if (stat(vbasedev->sysfsdev, &st) < 0) { >> + error_setg_errno(errp, errno, "no such host device"); >> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >> + return -errno; >> + } >> + /* User may specify a name, e.g: VFIO platform device */ >> + if (!vbasedev->name) { >> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >> + } >> + } else { >> + if (!vbasedev->iommufd) { >> + error_setg(errp, "Use FD passing only with iommufd backend"); >> + return -EINVAL; >> + } >> + /* >> + * Give a name with fd so any function printing out vbasedev->name >> + * will not break. >> + */ >> + if (!vbasedev->name) { >> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); >> + } >> + } >> + >> + return 0; >> +} >> + >> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) > > bool vfio_device_set_fd(..., Error **errp) > >> +{ >> + int fd = monitor_fd_param(monitor_cur(), str, errp); >> + >> + if (fd < 0) { >> + error_prepend(errp, "Could not parse remote object fd %s:", str); >> + return; > > return false; > >> + } >> + vbasedev->fd = fd; > > return true; If we had a QOM base device object, vfio_device_set_fd() would be passed directly to object_class_property_add_str() which expects a : void (*set)(Object *, const char *, Error **) I think it is fine to keep as it is. We might have a QOM base device object one day ! Minor anyway. Thanks, C. > >> +} >> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >> index 3eec428162..e08a217057 100644 >> --- a/hw/vfio/iommufd.c >> +++ b/hw/vfio/iommufd.c >> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, >> uint32_t ioas_id; >> Error *err = NULL; >> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >> - if (devfd < 0) { >> - return devfd; >> + if (vbasedev->fd < 0) { >> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >> + if (devfd < 0) { >> + return devfd; >> + } >> + vbasedev->fd = devfd; >> + } else { >> + devfd = vbasedev->fd; >> } >> - vbasedev->fd = devfd; >> ret = iommufd_cdev_connect_and_bind(vbasedev, errp); >> if (ret) { >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index c5984b0598..b23b492cce 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> VFIODevice *vbasedev = &vdev->vbasedev; >> char *tmp, *subsys; >> Error *err = NULL; >> - struct stat st; >> int i, ret; >> bool is_mdev; >> char uuid[UUID_STR_LEN]; >> char *name; >> - if (!vbasedev->sysfsdev) { >> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { >> if (!(~vdev->host.domain || ~vdev->host.bus || >> ~vdev->host.slot || ~vdev->host.function)) { >> error_setg(errp, "No provided host device"); >> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " >> +#ifdef CONFIG_IOMMUFD >> + "or -device vfio-pci,fd=DEVICE_FD " >> +#endif >> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); >> return; >> } >> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> vdev->host.slot, vdev->host.function); >> } >> - if (stat(vbasedev->sysfsdev, &st) < 0) { >> - error_setg_errno(errp, errno, "no such host device"); >> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >> + if (vfio_device_get_name(vbasedev, errp)) { > > Clearer as: > > if (vfio_device_get_name(vbasedev, errp) < 0) { > >> return; >> } > > Regards, > > Phil. >
Hi Philippe, >-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Sent: Wednesday, November 15, 2023 9:05 PM >Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a >file handle > >On 11/15/23 13:09, Philippe Mathieu-Daudé wrote: >> Hi Zhenzhong, >> >> On 14/11/23 11:09, Zhenzhong Duan wrote: >>> This gives management tools like libvirt a chance to open the vfio >>> cdev with privilege and pass FD to qemu. This way qemu never needs >>> to have privilege to open a VFIO or iommu cdev node. >>> >>> Together with the earlier support of pre-opening /dev/iommu device, >>> now we have full support of passing a vfio device to unprivileged >>> qemu by management tool. This mode is no more considered for the >>> legacy backend. So let's remove the "TODO" comment. >>> >>> Add helper functions vfio_device_set_fd() and vfio_device_get_name() >>> to set fd and get device name, they will also be used by other vfio >>> devices. >>> >>> There is no easy way to check if a device is mdev with FD passing, >>> so fail the x-balloon-allowed check unconditionally in this case. >>> >>> There is also no easy way to get BDF as name with FD passing, so >>> we fake a name by VFIO_FD[fd]. >>> >>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>> --- >>> v6: simplify CONFIG_IOMMUFD checking code >>> introduce a helper vfio_device_set_fd >>> >>> include/hw/vfio/vfio-common.h | 3 +++ >>> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++ >>> hw/vfio/iommufd.c | 12 ++++++---- >>> hw/vfio/pci.c | 28 ++++++++++++---------- >>> 4 files changed, 71 insertions(+), 16 deletions(-) >>> >>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >>> index 3dac5c167e..567e5f7bea 100644 >>> --- a/include/hw/vfio/vfio-common.h >>> +++ b/include/hw/vfio/vfio-common.h >>> @@ -251,4 +251,7 @@ int >vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, >>> hwaddr size); >>> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, >>> uint64_t size, ram_addr_t ram_addr); >>> + >> >> Please add bare documentation: >> >> /* Returns 0 on success, or a negative errno. */ >> >>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); Will do, I'd like to wait a few days to collect more suggested changes and RB, Then send all these updates to Cédric in once before he pushes this series to vfio-next. >> >> Functions taking an Error** param should return a boolean, so: >> >> /* Return: true on success, else false setting @errp with error. */ >> >>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); >>> #endif /* HW_VFIO_VFIO_COMMON_H */ >> >> >>> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int >region, uint16_t cap_type) >>> return ret; >>> } >>> + >>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) >>> +{ >>> + struct stat st; >>> + >>> + if (vbasedev->fd < 0) { >>> + if (stat(vbasedev->sysfsdev, &st) < 0) { >>> + error_setg_errno(errp, errno, "no such host device"); >>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >>> + return -errno; >>> + } >>> + /* User may specify a name, e.g: VFIO platform device */ >>> + if (!vbasedev->name) { >>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >>> + } >>> + } else { >>> + if (!vbasedev->iommufd) { >>> + error_setg(errp, "Use FD passing only with iommufd backend"); >>> + return -EINVAL; >>> + } >>> + /* >>> + * Give a name with fd so any function printing out vbasedev->name >>> + * will not break. >>> + */ >>> + if (!vbasedev->name) { >>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); >>> + } >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) >> >> bool vfio_device_set_fd(..., Error **errp) >> >>> +{ >>> + int fd = monitor_fd_param(monitor_cur(), str, errp); >>> + >>> + if (fd < 0) { >>> + error_prepend(errp, "Could not parse remote object fd %s:", str); >>> + return; >> >> return false; >> >>> + } >>> + vbasedev->fd = fd; >> >> return true; > >If we had a QOM base device object, vfio_device_set_fd() would be passed >directly to object_class_property_add_str() which expects a : > > void (*set)(Object *, const char *, Error **) > >I think it is fine to keep as it is. We might have a QOM base device object >one day ! Minor anyway. > >Thanks, > >C. > > >> >>> +} >>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >>> index 3eec428162..e08a217057 100644 >>> --- a/hw/vfio/iommufd.c >>> +++ b/hw/vfio/iommufd.c >>> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, >VFIODevice *vbasedev, >>> uint32_t ioas_id; >>> Error *err = NULL; >>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >>> - if (devfd < 0) { >>> - return devfd; >>> + if (vbasedev->fd < 0) { >>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >>> + if (devfd < 0) { >>> + return devfd; >>> + } >>> + vbasedev->fd = devfd; >>> + } else { >>> + devfd = vbasedev->fd; >>> } >>> - vbasedev->fd = devfd; >>> ret = iommufd_cdev_connect_and_bind(vbasedev, errp); >>> if (ret) { >>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>> index c5984b0598..b23b492cce 100644 >>> --- a/hw/vfio/pci.c >>> +++ b/hw/vfio/pci.c >>> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error >**errp) >>> VFIODevice *vbasedev = &vdev->vbasedev; >>> char *tmp, *subsys; >>> Error *err = NULL; >>> - struct stat st; >>> int i, ret; >>> bool is_mdev; >>> char uuid[UUID_STR_LEN]; >>> char *name; >>> - if (!vbasedev->sysfsdev) { >>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { >>> if (!(~vdev->host.domain || ~vdev->host.bus || >>> ~vdev->host.slot || ~vdev->host.function)) { >>> error_setg(errp, "No provided host device"); >>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " >>> +#ifdef CONFIG_IOMMUFD >>> + "or -device vfio-pci,fd=DEVICE_FD " >>> +#endif >>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); >>> return; >>> } >>> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error >**errp) >>> vdev->host.slot, vdev->host.function); >>> } >>> - if (stat(vbasedev->sysfsdev, &st) < 0) { >>> - error_setg_errno(errp, errno, "no such host device"); >>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >>> + if (vfio_device_get_name(vbasedev, errp)) { >> >> Clearer as: >> >> if (vfio_device_get_name(vbasedev, errp) < 0) { >> >>> return; >>> } Will do. Thanks Zhenzhong
>>> Please add bare documentation: >>> >>> /* Returns 0 on success, or a negative errno. */ >>> >>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); > > Will do, I'd like to wait a few days to collect more suggested changes and RB, > Then send all these updates to Cédric in once before he pushes this series to vfio-next. Yep. Could you respin a v7 with all the comments on v6 ? I will then apply directly on vfio-next. Please wait for Eric to finish looking at the platform part. Thanks, C.
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Sent: Thursday, November 16, 2023 3:25 PM >Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a >file handle > >>>> Please add bare documentation: >>>> >>>> /* Returns 0 on success, or a negative errno. */ >>>> >>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); >> >> Will do, I'd like to wait a few days to collect more suggested changes and RB, >> Then send all these updates to Cédric in once before he pushes this series to >vfio-next. > >Yep. Could you respin a v7 with all the comments on v6 ? I will >then apply directly on vfio-next. Sure. > >Please wait for Eric to finish looking at the platform part. Sure. Thanks Zhenzhong
On 11/14/23 11:09, Zhenzhong Duan wrote: > This gives management tools like libvirt a chance to open the vfio > cdev with privilege and pass FD to qemu. This way qemu never needs > to have privilege to open a VFIO or iommu cdev node. > > Together with the earlier support of pre-opening /dev/iommu device, > now we have full support of passing a vfio device to unprivileged > qemu by management tool. This mode is no more considered for the > legacy backend. So let's remove the "TODO" comment. > > Add helper functions vfio_device_set_fd() and vfio_device_get_name() > to set fd and get device name, they will also be used by other vfio > devices. > > There is no easy way to check if a device is mdev with FD passing, > so fail the x-balloon-allowed check unconditionally in this case. > > There is also no easy way to get BDF as name with FD passing, so > we fake a name by VFIO_FD[fd]. > > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > v6: simplify CONFIG_IOMMUFD checking code > introduce a helper vfio_device_set_fd > > include/hw/vfio/vfio-common.h | 3 +++ > hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++ > hw/vfio/iommufd.c | 12 ++++++---- > hw/vfio/pci.c | 28 ++++++++++++---------- > 4 files changed, 71 insertions(+), 16 deletions(-) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 3dac5c167e..567e5f7bea 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, > hwaddr size); > int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, > uint64_t size, ram_addr_t ram_addr); > + > +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); > +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); > #endif /* HW_VFIO_VFIO_COMMON_H */ > diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c > index 168847e7c5..986ef1095a 100644 > --- a/hw/vfio/helpers.c > +++ b/hw/vfio/helpers.c > @@ -20,6 +20,7 @@ > */ > > #include "qemu/osdep.h" > +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ Unused now. I removed it. Reviewed-by: Cédric Le Goater <clg@redhat.com> Thanks, C. > #include <sys/ioctl.h> > > #include "hw/vfio/vfio-common.h" > @@ -27,6 +28,7 @@ > #include "trace.h" > #include "qapi/error.h" > #include "qemu/error-report.h" > +#include "monitor/monitor.h" > > /* > * Common VFIO interrupt disable > @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) > > return ret; > } > + > +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) > +{ > + struct stat st; > + > + if (vbasedev->fd < 0) { > + if (stat(vbasedev->sysfsdev, &st) < 0) { > + error_setg_errno(errp, errno, "no such host device"); > + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); > + return -errno; > + } > + /* User may specify a name, e.g: VFIO platform device */ > + if (!vbasedev->name) { > + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); > + } > + } else { > + if (!vbasedev->iommufd) { > + error_setg(errp, "Use FD passing only with iommufd backend"); > + return -EINVAL; > + } > + /* > + * Give a name with fd so any function printing out vbasedev->name > + * will not break. > + */ > + if (!vbasedev->name) { > + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); > + } > + } > + > + return 0; > +} > + > +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) > +{ > + int fd = monitor_fd_param(monitor_cur(), str, errp); > + > + if (fd < 0) { > + error_prepend(errp, "Could not parse remote object fd %s:", str); > + return; > + } > + vbasedev->fd = fd; > +} > diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c > index 3eec428162..e08a217057 100644 > --- a/hw/vfio/iommufd.c > +++ b/hw/vfio/iommufd.c > @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, > uint32_t ioas_id; > Error *err = NULL; > > - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); > - if (devfd < 0) { > - return devfd; > + if (vbasedev->fd < 0) { > + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); > + if (devfd < 0) { > + return devfd; > + } > + vbasedev->fd = devfd; > + } else { > + devfd = vbasedev->fd; > } > - vbasedev->fd = devfd; > > ret = iommufd_cdev_connect_and_bind(vbasedev, errp); > if (ret) { > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index c5984b0598..b23b492cce 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > VFIODevice *vbasedev = &vdev->vbasedev; > char *tmp, *subsys; > Error *err = NULL; > - struct stat st; > int i, ret; > bool is_mdev; > char uuid[UUID_STR_LEN]; > char *name; > > - if (!vbasedev->sysfsdev) { > + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { > if (!(~vdev->host.domain || ~vdev->host.bus || > ~vdev->host.slot || ~vdev->host.function)) { > error_setg(errp, "No provided host device"); > error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " > +#ifdef CONFIG_IOMMUFD > + "or -device vfio-pci,fd=DEVICE_FD " > +#endif > "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); > return; > } > @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > vdev->host.slot, vdev->host.function); > } > > - if (stat(vbasedev->sysfsdev, &st) < 0) { > - error_setg_errno(errp, errno, "no such host device"); > - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); > + if (vfio_device_get_name(vbasedev, errp)) { > return; > } > - > - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); > vbasedev->ops = &vfio_pci_ops; > vbasedev->type = VFIO_DEVICE_TYPE_PCI; > vbasedev->dev = DEVICE(vdev); > @@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj) > vdev->host.bus = ~0U; > vdev->host.slot = ~0U; > vdev->host.function = ~0U; > + vdev->vbasedev.fd = -1; > > vdev->nv_gpudirect_clique = 0xFF; > > @@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = { > qdev_prop_nv_gpudirect_clique, uint8_t), > DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, > OFF_AUTOPCIBAR_OFF), > - /* > - * TODO - support passed fds... is this necessary? > - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), > - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), > - */ > #ifdef CONFIG_IOMMUFD > DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, > TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), > @@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = { > DEFINE_PROP_END_OF_LIST(), > }; > > +#ifdef CONFIG_IOMMUFD > +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) > +{ > + vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); > +} > +#endif > + > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > { > DeviceClass *dc = DEVICE_CLASS(klass); > @@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > > dc->reset = vfio_pci_reset; > device_class_set_props(dc, vfio_pci_dev_properties); > +#ifdef CONFIG_IOMMUFD > + object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); > +#endif > dc->desc = "VFIO-based PCI device assignment"; > set_bit(DEVICE_CATEGORY_MISC, dc->categories); > pdc->realize = vfio_realize;
© 2016 - 2024 Red Hat, Inc.