This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.
Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.
Add a helper function vfio_device_get_name() to check fd and get
device name, it will also be used by other vfio devices.
There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.
There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/vfio/vfio-common.h | 1 +
hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++
hw/vfio/iommufd.c | 12 +++++++----
hw/vfio/pci.c | 40 ++++++++++++++++++++++++-----------
4 files changed, 71 insertions(+), 16 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..960a14e8d8 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -238,6 +238,7 @@ struct vfio_info_cap_header *
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
struct vfio_info_cap_header *
vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
#endif
bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..d80aa58719 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@
*/
#include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <sys/ioctl.h>
#include "hw/vfio/vfio-common.h"
@@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
return ret;
}
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+ struct stat st;
+
+ if (vbasedev->fd < 0) {
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
+ error_setg_errno(errp, errno, "no such host device");
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ return -errno;
+ }
+ /* User may specify a name, e.g: VFIO platform device */
+ if (!vbasedev->name) {
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+ }
+ }
+#ifdef CONFIG_IOMMUFD
+ else {
+ if (!vbasedev->iommufd) {
+ error_setg(errp, "Use FD passing only with iommufd backend");
+ return -EINVAL;
+ }
+ /*
+ * Give a name with fd so any function printing out vbasedev->name
+ * will not break.
+ */
+ if (!vbasedev->name) {
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ }
+ }
+#endif
+ return 0;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 44dc6848bf..fd30477275 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name, VFIODevice *vbasedev,
uint32_t ioas_id;
Error *err = NULL;
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
- if (devfd < 0) {
- return devfd;
+ if (vbasedev->fd < 0) {
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+ if (devfd < 0) {
+ return devfd;
+ }
+ vbasedev->fd = devfd;
+ } else {
+ devfd = vbasedev->fd;
}
- vbasedev->fd = devfd;
ret = iommufd_connect_and_bind(vbasedev, errp);
if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e9a426200b..f95725ed16 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -44,6 +44,7 @@
#include "migration/blocker.h"
#include "migration/qemu-file.h"
#include "sysemu/iommufd.h"
+#include "monitor/monitor.h"
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
@@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIODevice *vbasedev = &vdev->vbasedev;
char *tmp, *subsys;
Error *err = NULL;
- struct stat st;
int i, ret;
bool is_mdev;
char uuid[UUID_STR_LEN];
char *name;
- if (!vbasedev->sysfsdev) {
+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
error_setg(errp, "No provided host device");
+#ifdef CONFIG_IOMMUFD
+ error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
+ "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
+ "or -device vfio-pci,fd=DEVICE_FD\n");
+#else
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+#endif
return;
}
vbasedev->sysfsdev =
@@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vdev->host.slot, vdev->host.function);
}
- if (stat(vbasedev->sysfsdev, &st) < 0) {
- error_setg_errno(errp, errno, "no such host device");
- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ if (vfio_device_get_name(vbasedev, errp)) {
return;
}
-
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
vbasedev->ops = &vfio_pci_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
@@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
vdev->host.bus = ~0U;
vdev->host.slot = ~0U;
vdev->host.function = ~0U;
+ vdev->vbasedev.fd = -1;
vdev->nv_gpudirect_clique = 0xFF;
@@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
qdev_prop_nv_gpudirect_clique, uint8_t),
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
OFF_AUTOPCIBAR_OFF),
- /*
- * TODO - support passed fds... is this necessary?
- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
- */
#ifdef CONFIG_IOMMUFD
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI(obj);
+ int fd = -1;
+
+ fd = monitor_fd_param(monitor_cur(), str, errp);
+ if (fd == -1) {
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
+ return;
+ }
+ vdev->vbasedev.fd = fd;
+}
+#endif
+
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3392,6 +3405,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
dc->reset = vfio_pci_reset;
device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
--
2.34.1
On 11/9/23 12:45, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
>
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
>
> Add a helper function vfio_device_get_name() to check fd and get
> device name, it will also be used by other vfio devices.
>
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
>
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/hw/vfio/vfio-common.h | 1 +
> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++
> hw/vfio/iommufd.c | 12 +++++++----
> hw/vfio/pci.c | 40 ++++++++++++++++++++++++-----------
> 4 files changed, 71 insertions(+), 16 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..960a14e8d8 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
> struct vfio_info_cap_header *
> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> #endif
>
> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 168847e7c5..d80aa58719 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -20,6 +20,7 @@
> */
>
> #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
> #include <sys/ioctl.h>
>
> #include "hw/vfio/vfio-common.h"
> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>
> return ret;
> }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> + struct stat st;
> +
> + if (vbasedev->fd < 0) {
> + if (stat(vbasedev->sysfsdev, &st) < 0) {
> + error_setg_errno(errp, errno, "no such host device");
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + return -errno;
> + }
> + /* User may specify a name, e.g: VFIO platform device */
> + if (!vbasedev->name) {
> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> + }
> + }
> +#ifdef CONFIG_IOMMUFD
> + else {
> + if (!vbasedev->iommufd) {
Can we handle with this case without CONFIG_IOMMUFD, simply by
testing vbasedev->iommufd ?
> + error_setg(errp, "Use FD passing only with iommufd backend");
> + return -EINVAL;
> + }
> + /*
> + * Give a name with fd so any function printing out vbasedev->name
> + * will not break.
> + */
> + if (!vbasedev->name) {
> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> + }
> + }
> +#endif
> + return 0;
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 44dc6848bf..fd30477275 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name, VFIODevice *vbasedev,
> uint32_t ioas_id;
> Error *err = NULL;
>
> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> - if (devfd < 0) {
> - return devfd;
> + if (vbasedev->fd < 0) {
> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> + if (devfd < 0) {
> + return devfd;
> + }
> + vbasedev->fd = devfd;
> + } else {
> + devfd = vbasedev->fd;
> }
> - vbasedev->fd = devfd;
>
> ret = iommufd_connect_and_bind(vbasedev, errp);
> if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e9a426200b..f95725ed16 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -44,6 +44,7 @@
> #include "migration/blocker.h"
> #include "migration/qemu-file.h"
> #include "sysemu/iommufd.h"
> +#include "monitor/monitor.h"
>
> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>
> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> VFIODevice *vbasedev = &vdev->vbasedev;
> char *tmp, *subsys;
> Error *err = NULL;
> - struct stat st;
> int i, ret;
> bool is_mdev;
> char uuid[UUID_STR_LEN];
> char *name;
>
> - if (!vbasedev->sysfsdev) {
> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
> if (!(~vdev->host.domain || ~vdev->host.bus ||
> ~vdev->host.slot || ~vdev->host.function)) {
> error_setg(errp, "No provided host device");
> +#ifdef CONFIG_IOMMUFD
> + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
> + "or -device vfio-pci,fd=DEVICE_FD\n");
> +#else
> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> +#endif
or simply :
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+#ifdef CONFIG_IOMMUFD
+ "or -device vfio-pci,fd=DEVICE_FD "
+#endif
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> return;
> }
> vbasedev->sysfsdev =
> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vdev->host.slot, vdev->host.function);
> }
>
> - if (stat(vbasedev->sysfsdev, &st) < 0) {
> - error_setg_errno(errp, errno, "no such host device");
> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + if (vfio_device_get_name(vbasedev, errp)) {
> return;
> }
> -
> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> vbasedev->ops = &vfio_pci_ops;
> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
> vbasedev->dev = DEVICE(vdev);
> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
> vdev->host.bus = ~0U;
> vdev->host.slot = ~0U;
> vdev->host.function = ~0U;
> + vdev->vbasedev.fd = -1;
We should probably move the all VFIODevice initializations :
vbasedev->ops = &vfio_pci_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
under vfio_instance_init (should be called vfio_pci_instance_init).
This is true for all other VFIO devices. May be not for this series,
it can come later.
>
> vdev->nv_gpudirect_clique = 0xFF;
>
> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
> qdev_prop_nv_gpudirect_clique, uint8_t),
> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
> OFF_AUTOPCIBAR_OFF),
> - /*
> - * TODO - support passed fds... is this necessary?
> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
> - */
> #ifdef CONFIG_IOMMUFD
> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
> DEFINE_PROP_END_OF_LIST(),
> };
>
> +#ifdef CONFIG_IOMMUFD
> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
> +{
> + VFIOPCIDevice *vdev = VFIO_PCI(obj);
> + int fd = -1;
> +
> + fd = monitor_fd_param(monitor_cur(), str, errp);
> + if (fd == -1) {
> + error_prepend(errp, "Could not parse remote object fd %s:", str);
> + return;
> + }
> + vdev->vbasedev.fd = fd;
We could introduce a common helper in hw/vfio/common.c to remove code
duplication :
#ifdef CONFIG_IOMMUFD
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
{
vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
}
#endif
Thanks,
C.
> +}
> +#endif
> +
> static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> {
> DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -3392,6 +3405,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>
> dc->reset = vfio_pci_reset;
> device_class_set_props(dc, vfio_pci_dev_properties);
> +#ifdef CONFIG_IOMMUFD
> + object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
> +#endif
> dc->desc = "VFIO-based PCI device assignment";
> set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> pdc->realize = vfio_realize;
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Friday, November 10, 2023 6:53 PM
>Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/9/23 12:45, Zhenzhong Duan wrote:
>> This gives management tools like libvirt a chance to open the vfio
>> cdev with privilege and pass FD to qemu. This way qemu never needs
>> to have privilege to open a VFIO or iommu cdev node.
>>
>> Together with the earlier support of pre-opening /dev/iommu device,
>> now we have full support of passing a vfio device to unprivileged
>> qemu by management tool. This mode is no more considered for the
>> legacy backend. So let's remove the "TODO" comment.
>>
>> Add a helper function vfio_device_get_name() to check fd and get
>> device name, it will also be used by other vfio devices.
>>
>> There is no easy way to check if a device is mdev with FD passing,
>> so fail the x-balloon-allowed check unconditionally in this case.
>>
>> There is also no easy way to get BDF as name with FD passing, so
>> we fake a name by VFIO_FD[fd].
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/hw/vfio/vfio-common.h | 1 +
>> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++
>> hw/vfio/iommufd.c | 12 +++++++----
>> hw/vfio/pci.c | 40 ++++++++++++++++++++++++-----------
>> 4 files changed, 71 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 3dac5c167e..960a14e8d8 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>> struct vfio_info_cap_header *
>> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>> #endif
>>
>> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 168847e7c5..d80aa58719 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -20,6 +20,7 @@
>> */
>>
>> #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>> #include <sys/ioctl.h>
>>
>> #include "hw/vfio/vfio-common.h"
>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>region, uint16_t cap_type)
>>
>> return ret;
>> }
>> +
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>> +{
>> + struct stat st;
>> +
>> + if (vbasedev->fd < 0) {
>> + if (stat(vbasedev->sysfsdev, &st) < 0) {
>> + error_setg_errno(errp, errno, "no such host device");
>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> + return -errno;
>> + }
>> + /* User may specify a name, e.g: VFIO platform device */
>> + if (!vbasedev->name) {
>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>> + }
>> + }
>> +#ifdef CONFIG_IOMMUFD
>> + else {
>> + if (!vbasedev->iommufd) {
>
>
>Can we handle with this case without CONFIG_IOMMUFD, simply by
>testing vbasedev->iommufd ?
Sure, will do.
>
>> + error_setg(errp, "Use FD passing only with iommufd backend");
>> + return -EINVAL;
>> + }
>> + /*
>> + * Give a name with fd so any function printing out vbasedev->name
>> + * will not break.
>> + */
>> + if (!vbasedev->name) {
>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>> + }
>> + }
>> +#endif
>> + return 0;
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 44dc6848bf..fd30477275 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name,
>VFIODevice *vbasedev,
>> uint32_t ioas_id;
>> Error *err = NULL;
>>
>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> - if (devfd < 0) {
>> - return devfd;
>> + if (vbasedev->fd < 0) {
>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> + if (devfd < 0) {
>> + return devfd;
>> + }
>> + vbasedev->fd = devfd;
>> + } else {
>> + devfd = vbasedev->fd;
>> }
>> - vbasedev->fd = devfd;
>>
>> ret = iommufd_connect_and_bind(vbasedev, errp);
>> if (ret) {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index e9a426200b..f95725ed16 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -44,6 +44,7 @@
>> #include "migration/blocker.h"
>> #include "migration/qemu-file.h"
>> #include "sysemu/iommufd.h"
>> +#include "monitor/monitor.h"
>>
>> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>
>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>> VFIODevice *vbasedev = &vdev->vbasedev;
>> char *tmp, *subsys;
>> Error *err = NULL;
>> - struct stat st;
>> int i, ret;
>> bool is_mdev;
>> char uuid[UUID_STR_LEN];
>> char *name;
>>
>> - if (!vbasedev->sysfsdev) {
>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>> if (!(~vdev->host.domain || ~vdev->host.bus ||
>> ~vdev->host.slot || ~vdev->host.function)) {
>> error_setg(errp, "No provided host device");
>> +#ifdef CONFIG_IOMMUFD
>> + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>> + "or -device vfio-pci,fd=DEVICE_FD\n");
>> +#else
>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>> +#endif
>
>or simply :
>
>
> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> +#ifdef CONFIG_IOMMUFD
> + "or -device vfio-pci,fd=DEVICE_FD "
> +#endif
> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
Good idea, will do.
>
>
>
>> return;
>> }
>> vbasedev->sysfsdev =
>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>> vdev->host.slot, vdev->host.function);
>> }
>>
>> - if (stat(vbasedev->sysfsdev, &st) < 0) {
>> - error_setg_errno(errp, errno, "no such host device");
>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> + if (vfio_device_get_name(vbasedev, errp)) {
>> return;
>> }
>> -
>> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>> vbasedev->ops = &vfio_pci_ops;
>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>> vbasedev->dev = DEVICE(vdev);
>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>> vdev->host.bus = ~0U;
>> vdev->host.slot = ~0U;
>> vdev->host.function = ~0U;
>> + vdev->vbasedev.fd = -1;
>We should probably move the all VFIODevice initializations :
>
> vbasedev->ops = &vfio_pci_ops;
> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
> vbasedev->dev = DEVICE(vdev);
>
>under vfio_instance_init (should be called vfio_pci_instance_init).
>
>This is true for all other VFIO devices. May be not for this series,
>it can come later.
Sure, Let me send a separate series addressing this.
>
>
>>
>> vdev->nv_gpudirect_clique = 0xFF;
>>
>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>> qdev_prop_nv_gpudirect_clique, uint8_t),
>> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>msix_relo,
>> OFF_AUTOPCIBAR_OFF),
>> - /*
>> - * TODO - support passed fds... is this necessary?
>> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
>> - */
>> #ifdef CONFIG_IOMMUFD
>> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>> DEFINE_PROP_END_OF_LIST(),
>> };
>>
>> +#ifdef CONFIG_IOMMUFD
>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>> +{
>> + VFIOPCIDevice *vdev = VFIO_PCI(obj);
>> + int fd = -1;
>> +
>> + fd = monitor_fd_param(monitor_cur(), str, errp);
>> + if (fd == -1) {
>> + error_prepend(errp, "Could not parse remote object fd %s:", str);
>> + return;
>> + }
>> + vdev->vbasedev.fd = fd;
>
>We could introduce a common helper in hw/vfio/common.c to remove code
>duplication :
>
>#ifdef CONFIG_IOMMUFD
>static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>{
> vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>}
>#endif
Good suggestions! Will do.
Thanks
Zhenzhong
On 11/13/23 04:00, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Cédric Le Goater <clg@redhat.com>
>> Sent: Friday, November 10, 2023 6:53 PM
>> Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>> file handle
>>
>> On 11/9/23 12:45, Zhenzhong Duan wrote:
>>> This gives management tools like libvirt a chance to open the vfio
>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>> to have privilege to open a VFIO or iommu cdev node.
>>>
>>> Together with the earlier support of pre-opening /dev/iommu device,
>>> now we have full support of passing a vfio device to unprivileged
>>> qemu by management tool. This mode is no more considered for the
>>> legacy backend. So let's remove the "TODO" comment.
>>>
>>> Add a helper function vfio_device_get_name() to check fd and get
>>> device name, it will also be used by other vfio devices.
>>>
>>> There is no easy way to check if a device is mdev with FD passing,
>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>
>>> There is also no easy way to get BDF as name with FD passing, so
>>> we fake a name by VFIO_FD[fd].
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> include/hw/vfio/vfio-common.h | 1 +
>>> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++
>>> hw/vfio/iommufd.c | 12 +++++++----
>>> hw/vfio/pci.c | 40 ++++++++++++++++++++++++-----------
>>> 4 files changed, 71 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 3dac5c167e..960a14e8d8 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>>> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>>> struct vfio_info_cap_header *
>>> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>> #endif
>>>
>>> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 168847e7c5..d80aa58719 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -20,6 +20,7 @@
>>> */
>>>
>>> #include "qemu/osdep.h"
>>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>> #include <sys/ioctl.h>
>>>
>>> #include "hw/vfio/vfio-common.h"
>>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>> region, uint16_t cap_type)
>>>
>>> return ret;
>>> }
>>> +
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> + struct stat st;
>>> +
>>> + if (vbasedev->fd < 0) {
>>> + if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> + error_setg_errno(errp, errno, "no such host device");
>>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> + return -errno;
>>> + }
>>> + /* User may specify a name, e.g: VFIO platform device */
>>> + if (!vbasedev->name) {
>>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>> + }
>>> + }
>>> +#ifdef CONFIG_IOMMUFD
>>> + else {
>>> + if (!vbasedev->iommufd) {
>>
>>
>> Can we handle with this case without CONFIG_IOMMUFD, simply by
>> testing vbasedev->iommufd ?
>
> Sure, will do.
>
>>
>>> + error_setg(errp, "Use FD passing only with iommufd backend");
>>> + return -EINVAL;
>>> + }
>>> + /*
>>> + * Give a name with fd so any function printing out vbasedev->name
>>> + * will not break.
>>> + */
>>> + if (!vbasedev->name) {
>>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>> + }
>>> + }
>>> +#endif
>>> + return 0;
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 44dc6848bf..fd30477275 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name,
>> VFIODevice *vbasedev,
>>> uint32_t ioas_id;
>>> Error *err = NULL;
>>>
>>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> - if (devfd < 0) {
>>> - return devfd;
>>> + if (vbasedev->fd < 0) {
>>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> + if (devfd < 0) {
>>> + return devfd;
>>> + }
>>> + vbasedev->fd = devfd;
>>> + } else {
>>> + devfd = vbasedev->fd;
>>> }
>>> - vbasedev->fd = devfd;
>>>
>>> ret = iommufd_connect_and_bind(vbasedev, errp);
>>> if (ret) {
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index e9a426200b..f95725ed16 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -44,6 +44,7 @@
>>> #include "migration/blocker.h"
>>> #include "migration/qemu-file.h"
>>> #include "sysemu/iommufd.h"
>>> +#include "monitor/monitor.h"
>>>
>>> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>>
>>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>> VFIODevice *vbasedev = &vdev->vbasedev;
>>> char *tmp, *subsys;
>>> Error *err = NULL;
>>> - struct stat st;
>>> int i, ret;
>>> bool is_mdev;
>>> char uuid[UUID_STR_LEN];
>>> char *name;
>>>
>>> - if (!vbasedev->sysfsdev) {
>>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>> if (!(~vdev->host.domain || ~vdev->host.bus ||
>>> ~vdev->host.slot || ~vdev->host.function)) {
>>> error_setg(errp, "No provided host device");
>>> +#ifdef CONFIG_IOMMUFD
>>> + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>>> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>>> + "or -device vfio-pci,fd=DEVICE_FD\n");
>>> +#else
>>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>> +#endif
>>
>> or simply :
>>
>>
>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>> +#ifdef CONFIG_IOMMUFD
>> + "or -device vfio-pci,fd=DEVICE_FD "
>> +#endif
>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>
> Good idea, will do.
>
>>
>>
>>
>>> return;
>>> }
>>> vbasedev->sysfsdev =
>>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>> vdev->host.slot, vdev->host.function);
>>> }
>>>
>>> - if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> - error_setg_errno(errp, errno, "no such host device");
>>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> + if (vfio_device_get_name(vbasedev, errp)) {
>>> return;
>>> }
>>> -
>>> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>> vbasedev->ops = &vfio_pci_ops;
>>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>> vbasedev->dev = DEVICE(vdev);
>>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>>> vdev->host.bus = ~0U;
>>> vdev->host.slot = ~0U;
>>> vdev->host.function = ~0U;
>>> + vdev->vbasedev.fd = -1;
>> We should probably move the all VFIODevice initializations :
>>
>> vbasedev->ops = &vfio_pci_ops;
>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>> vbasedev->dev = DEVICE(vdev);
>>
>> under vfio_instance_init (should be called vfio_pci_instance_init).
>>
>> This is true for all other VFIO devices. May be not for this series,
>> it can come later.
>
> Sure, Let me send a separate series addressing this.
>
>>
>>
>>>
>>> vdev->nv_gpudirect_clique = 0xFF;
>>>
>>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>>> qdev_prop_nv_gpudirect_clique, uint8_t),
>>> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>> msix_relo,
>>> OFF_AUTOPCIBAR_OFF),
>>> - /*
>>> - * TODO - support passed fds... is this necessary?
>>> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>>> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
>>> - */
>>> #ifdef CONFIG_IOMMUFD
>>> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>>> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>>> DEFINE_PROP_END_OF_LIST(),
>>> };
>>>
>>> +#ifdef CONFIG_IOMMUFD
>>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>> +{
>>> + VFIOPCIDevice *vdev = VFIO_PCI(obj);
>>> + int fd = -1;
>>> +
>>> + fd = monitor_fd_param(monitor_cur(), str, errp);
>>> + if (fd == -1) {
>>> + error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> + return;
>>> + }
>>> + vdev->vbasedev.fd = fd;
>>
>> We could introduce a common helper in hw/vfio/common.c to remove code
>> duplication :
>>
>> #ifdef CONFIG_IOMMUFD
>> static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>> {
>> vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>> }
>> #endif
>
> Good suggestions! Will do.
The idead is to isolate the iommufd addition in some common extension.
I tried with a QOM interface but it is not really satifying. See
previous email. I am not loosing faith though to find a solution for
this multi inheritance issue with VFIO devices
Thanks,
C.
>
> Thanks
> Zhenzhong
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Monday, November 13, 2023 7:08 PM
>Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/13/23 04:00, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Cédric Le Goater <clg@redhat.com>
>>> Sent: Friday, November 10, 2023 6:53 PM
>>> Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by
>passing a
>>> file handle
>>>
>>> On 11/9/23 12:45, Zhenzhong Duan wrote:
>>>> This gives management tools like libvirt a chance to open the vfio
>>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>>> to have privilege to open a VFIO or iommu cdev node.
>>>>
>>>> Together with the earlier support of pre-opening /dev/iommu device,
>>>> now we have full support of passing a vfio device to unprivileged
>>>> qemu by management tool. This mode is no more considered for the
>>>> legacy backend. So let's remove the "TODO" comment.
>>>>
>>>> Add a helper function vfio_device_get_name() to check fd and get
>>>> device name, it will also be used by other vfio devices.
>>>>
>>>> There is no easy way to check if a device is mdev with FD passing,
>>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>>
>>>> There is also no easy way to get BDF as name with FD passing, so
>>>> we fake a name by VFIO_FD[fd].
>>>>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>> include/hw/vfio/vfio-common.h | 1 +
>>>> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++
>>>> hw/vfio/iommufd.c | 12 +++++++----
>>>> hw/vfio/pci.c | 40 ++++++++++++++++++++++++-----------
>>>> 4 files changed, 71 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>>>> index 3dac5c167e..960a14e8d8 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>>>> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>>>> struct vfio_info_cap_header *
>>>> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>> #endif
>>>>
>>>> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>> index 168847e7c5..d80aa58719 100644
>>>> --- a/hw/vfio/helpers.c
>>>> +++ b/hw/vfio/helpers.c
>>>> @@ -20,6 +20,7 @@
>>>> */
>>>>
>>>> #include "qemu/osdep.h"
>>>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>>> #include <sys/ioctl.h>
>>>>
>>>> #include "hw/vfio/vfio-common.h"
>>>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev,
>int
>>> region, uint16_t cap_type)
>>>>
>>>> return ret;
>>>> }
>>>> +
>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>>> +{
>>>> + struct stat st;
>>>> +
>>>> + if (vbasedev->fd < 0) {
>>>> + if (stat(vbasedev->sysfsdev, &st) < 0) {
>>>> + error_setg_errno(errp, errno, "no such host device");
>>>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>>> + return -errno;
>>>> + }
>>>> + /* User may specify a name, e.g: VFIO platform device */
>>>> + if (!vbasedev->name) {
>>>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>>> + }
>>>> + }
>>>> +#ifdef CONFIG_IOMMUFD
>>>> + else {
>>>> + if (!vbasedev->iommufd) {
>>>
>>>
>>> Can we handle with this case without CONFIG_IOMMUFD, simply by
>>> testing vbasedev->iommufd ?
>>
>> Sure, will do.
>>
>>>
>>>> + error_setg(errp, "Use FD passing only with iommufd backend");
>>>> + return -EINVAL;
>>>> + }
>>>> + /*
>>>> + * Give a name with fd so any function printing out vbasedev->name
>>>> + * will not break.
>>>> + */
>>>> + if (!vbasedev->name) {
>>>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>>> + }
>>>> + }
>>>> +#endif
>>>> + return 0;
>>>> +}
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 44dc6848bf..fd30477275 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char
>*name,
>>> VFIODevice *vbasedev,
>>>> uint32_t ioas_id;
>>>> Error *err = NULL;
>>>>
>>>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>>> - if (devfd < 0) {
>>>> - return devfd;
>>>> + if (vbasedev->fd < 0) {
>>>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>>> + if (devfd < 0) {
>>>> + return devfd;
>>>> + }
>>>> + vbasedev->fd = devfd;
>>>> + } else {
>>>> + devfd = vbasedev->fd;
>>>> }
>>>> - vbasedev->fd = devfd;
>>>>
>>>> ret = iommufd_connect_and_bind(vbasedev, errp);
>>>> if (ret) {
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index e9a426200b..f95725ed16 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -44,6 +44,7 @@
>>>> #include "migration/blocker.h"
>>>> #include "migration/qemu-file.h"
>>>> #include "sysemu/iommufd.h"
>>>> +#include "monitor/monitor.h"
>>>>
>>>> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>>>
>>>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>> VFIODevice *vbasedev = &vdev->vbasedev;
>>>> char *tmp, *subsys;
>>>> Error *err = NULL;
>>>> - struct stat st;
>>>> int i, ret;
>>>> bool is_mdev;
>>>> char uuid[UUID_STR_LEN];
>>>> char *name;
>>>>
>>>> - if (!vbasedev->sysfsdev) {
>>>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>>> if (!(~vdev->host.domain || ~vdev->host.bus ||
>>>> ~vdev->host.slot || ~vdev->host.function)) {
>>>> error_setg(errp, "No provided host device");
>>>> +#ifdef CONFIG_IOMMUFD
>>>> + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>>>> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>>>> + "or -device vfio-pci,fd=DEVICE_FD\n");
>>>> +#else
>>>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>>> +#endif
>>>
>>> or simply :
>>>
>>>
>>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>> +#ifdef CONFIG_IOMMUFD
>>> + "or -device vfio-pci,fd=DEVICE_FD "
>>> +#endif
>>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>
>> Good idea, will do.
>>
>>>
>>>
>>>
>>>> return;
>>>> }
>>>> vbasedev->sysfsdev =
>>>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>> vdev->host.slot, vdev->host.function);
>>>> }
>>>>
>>>> - if (stat(vbasedev->sysfsdev, &st) < 0) {
>>>> - error_setg_errno(errp, errno, "no such host device");
>>>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>>> + if (vfio_device_get_name(vbasedev, errp)) {
>>>> return;
>>>> }
>>>> -
>>>> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>>> vbasedev->ops = &vfio_pci_ops;
>>>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>> vbasedev->dev = DEVICE(vdev);
>>>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>>>> vdev->host.bus = ~0U;
>>>> vdev->host.slot = ~0U;
>>>> vdev->host.function = ~0U;
>>>> + vdev->vbasedev.fd = -1;
>>> We should probably move the all VFIODevice initializations :
>>>
>>> vbasedev->ops = &vfio_pci_ops;
>>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>> vbasedev->dev = DEVICE(vdev);
>>>
>>> under vfio_instance_init (should be called vfio_pci_instance_init).
>>>
>>> This is true for all other VFIO devices. May be not for this series,
>>> it can come later.
>>
>> Sure, Let me send a separate series addressing this.
>>
>>>
>>>
>>>>
>>>> vdev->nv_gpudirect_clique = 0xFF;
>>>>
>>>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>>>> qdev_prop_nv_gpudirect_clique, uint8_t),
>>>> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>>> msix_relo,
>>>> OFF_AUTOPCIBAR_OFF),
>>>> - /*
>>>> - * TODO - support passed fds... is this necessary?
>>>> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>>>> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice,
>vfiogroupfd_name),
>>>> - */
>>>> #ifdef CONFIG_IOMMUFD
>>>> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>>>> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>>>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>>>> DEFINE_PROP_END_OF_LIST(),
>>>> };
>>>>
>>>> +#ifdef CONFIG_IOMMUFD
>>>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>>> +{
>>>> + VFIOPCIDevice *vdev = VFIO_PCI(obj);
>>>> + int fd = -1;
>>>> +
>>>> + fd = monitor_fd_param(monitor_cur(), str, errp);
>>>> + if (fd == -1) {
>>>> + error_prepend(errp, "Could not parse remote object fd %s:", str);
>>>> + return;
>>>> + }
>>>> + vdev->vbasedev.fd = fd;
>>>
>>> We could introduce a common helper in hw/vfio/common.c to remove code
>>> duplication :
>>>
>>> #ifdef CONFIG_IOMMUFD
>>> static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>> {
>>> vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>>> }
>>> #endif
>>
>> Good suggestions! Will do.
>
>
>The idead is to isolate the iommufd addition in some common extension.
>I tried with a QOM interface but it is not really satifying. See
>previous email. I am not loosing faith though to find a solution for
>this multi inheritance issue with VFIO devices
I know, always keep this in mind😊
Thanks
Zhenzhong
© 2016 - 2026 Red Hat, Inc.