This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.
Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.
Add helper functions vfio_device_set_fd() and vfio_device_get_name()
to set fd and get device name, they will also be used by other vfio
devices.
There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.
There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v6: simplify CONFIG_IOMMUFD checking code
introduce a helper vfio_device_set_fd
include/hw/vfio/vfio-common.h | 3 +++
hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
hw/vfio/iommufd.c | 12 ++++++----
hw/vfio/pci.c | 28 ++++++++++++----------
4 files changed, 71 insertions(+), 16 deletions(-)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..567e5f7bea 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
hwaddr size);
int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
uint64_t size, ram_addr_t ram_addr);
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..986ef1095a 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@
*/
#include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <sys/ioctl.h>
#include "hw/vfio/vfio-common.h"
@@ -27,6 +28,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "monitor/monitor.h"
/*
* Common VFIO interrupt disable
@@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
return ret;
}
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+ struct stat st;
+
+ if (vbasedev->fd < 0) {
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
+ error_setg_errno(errp, errno, "no such host device");
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ return -errno;
+ }
+ /* User may specify a name, e.g: VFIO platform device */
+ if (!vbasedev->name) {
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+ }
+ } else {
+ if (!vbasedev->iommufd) {
+ error_setg(errp, "Use FD passing only with iommufd backend");
+ return -EINVAL;
+ }
+ /*
+ * Give a name with fd so any function printing out vbasedev->name
+ * will not break.
+ */
+ if (!vbasedev->name) {
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ }
+ }
+
+ return 0;
+}
+
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ int fd = monitor_fd_param(monitor_cur(), str, errp);
+
+ if (fd < 0) {
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
+ return;
+ }
+ vbasedev->fd = fd;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 3eec428162..e08a217057 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
uint32_t ioas_id;
Error *err = NULL;
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
- if (devfd < 0) {
- return devfd;
+ if (vbasedev->fd < 0) {
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+ if (devfd < 0) {
+ return devfd;
+ }
+ vbasedev->fd = devfd;
+ } else {
+ devfd = vbasedev->fd;
}
- vbasedev->fd = devfd;
ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c5984b0598..b23b492cce 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIODevice *vbasedev = &vdev->vbasedev;
char *tmp, *subsys;
Error *err = NULL;
- struct stat st;
int i, ret;
bool is_mdev;
char uuid[UUID_STR_LEN];
char *name;
- if (!vbasedev->sysfsdev) {
+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
error_setg(errp, "No provided host device");
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+#ifdef CONFIG_IOMMUFD
+ "or -device vfio-pci,fd=DEVICE_FD "
+#endif
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
return;
}
@@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vdev->host.slot, vdev->host.function);
}
- if (stat(vbasedev->sysfsdev, &st) < 0) {
- error_setg_errno(errp, errno, "no such host device");
- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+ if (vfio_device_get_name(vbasedev, errp)) {
return;
}
-
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
vbasedev->ops = &vfio_pci_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
@@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj)
vdev->host.bus = ~0U;
vdev->host.slot = ~0U;
vdev->host.function = ~0U;
+ vdev->vbasedev.fd = -1;
vdev->nv_gpudirect_clique = 0xFF;
@@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = {
qdev_prop_nv_gpudirect_clique, uint8_t),
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
OFF_AUTOPCIBAR_OFF),
- /*
- * TODO - support passed fds... is this necessary?
- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
- */
#ifdef CONFIG_IOMMUFD
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
+}
+#endif
+
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
dc->reset = vfio_pci_reset;
device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
--
2.34.1
Hi Zhenzhong,
On 14/11/23 11:09, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
>
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
>
> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
> to set fd and get device name, they will also be used by other vfio
> devices.
>
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
>
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v6: simplify CONFIG_IOMMUFD checking code
> introduce a helper vfio_device_set_fd
>
> include/hw/vfio/vfio-common.h | 3 +++
> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
> hw/vfio/iommufd.c | 12 ++++++----
> hw/vfio/pci.c | 28 ++++++++++++----------
> 4 files changed, 71 insertions(+), 16 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..567e5f7bea 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
> hwaddr size);
> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
> uint64_t size, ram_addr_t ram_addr);
> +
Please add bare documentation:
/* Returns 0 on success, or a negative errno. */
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
Functions taking an Error** param should return a boolean, so:
/* Return: true on success, else false setting @errp with error. */
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
> #endif /* HW_VFIO_VFIO_COMMON_H */
> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>
> return ret;
> }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> + struct stat st;
> +
> + if (vbasedev->fd < 0) {
> + if (stat(vbasedev->sysfsdev, &st) < 0) {
> + error_setg_errno(errp, errno, "no such host device");
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + return -errno;
> + }
> + /* User may specify a name, e.g: VFIO platform device */
> + if (!vbasedev->name) {
> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> + }
> + } else {
> + if (!vbasedev->iommufd) {
> + error_setg(errp, "Use FD passing only with iommufd backend");
> + return -EINVAL;
> + }
> + /*
> + * Give a name with fd so any function printing out vbasedev->name
> + * will not break.
> + */
> + if (!vbasedev->name) {
> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> + }
> + }
> +
> + return 0;
> +}
> +
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
bool vfio_device_set_fd(..., Error **errp)
> +{
> + int fd = monitor_fd_param(monitor_cur(), str, errp);
> +
> + if (fd < 0) {
> + error_prepend(errp, "Could not parse remote object fd %s:", str);
> + return;
return false;
> + }
> + vbasedev->fd = fd;
return true;
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 3eec428162..e08a217057 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
> uint32_t ioas_id;
> Error *err = NULL;
>
> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> - if (devfd < 0) {
> - return devfd;
> + if (vbasedev->fd < 0) {
> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> + if (devfd < 0) {
> + return devfd;
> + }
> + vbasedev->fd = devfd;
> + } else {
> + devfd = vbasedev->fd;
> }
> - vbasedev->fd = devfd;
>
> ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
> if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5984b0598..b23b492cce 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> VFIODevice *vbasedev = &vdev->vbasedev;
> char *tmp, *subsys;
> Error *err = NULL;
> - struct stat st;
> int i, ret;
> bool is_mdev;
> char uuid[UUID_STR_LEN];
> char *name;
>
> - if (!vbasedev->sysfsdev) {
> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
> if (!(~vdev->host.domain || ~vdev->host.bus ||
> ~vdev->host.slot || ~vdev->host.function)) {
> error_setg(errp, "No provided host device");
> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> +#ifdef CONFIG_IOMMUFD
> + "or -device vfio-pci,fd=DEVICE_FD "
> +#endif
> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> return;
> }
> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vdev->host.slot, vdev->host.function);
> }
>
> - if (stat(vbasedev->sysfsdev, &st) < 0) {
> - error_setg_errno(errp, errno, "no such host device");
> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + if (vfio_device_get_name(vbasedev, errp)) {
Clearer as:
if (vfio_device_get_name(vbasedev, errp) < 0) {
> return;
> }
Regards,
Phil.
On 11/15/23 13:09, Philippe Mathieu-Daudé wrote:
> Hi Zhenzhong,
>
> On 14/11/23 11:09, Zhenzhong Duan wrote:
>> This gives management tools like libvirt a chance to open the vfio
>> cdev with privilege and pass FD to qemu. This way qemu never needs
>> to have privilege to open a VFIO or iommu cdev node.
>>
>> Together with the earlier support of pre-opening /dev/iommu device,
>> now we have full support of passing a vfio device to unprivileged
>> qemu by management tool. This mode is no more considered for the
>> legacy backend. So let's remove the "TODO" comment.
>>
>> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
>> to set fd and get device name, they will also be used by other vfio
>> devices.
>>
>> There is no easy way to check if a device is mdev with FD passing,
>> so fail the x-balloon-allowed check unconditionally in this case.
>>
>> There is also no easy way to get BDF as name with FD passing, so
>> we fake a name by VFIO_FD[fd].
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> v6: simplify CONFIG_IOMMUFD checking code
>> introduce a helper vfio_device_set_fd
>>
>> include/hw/vfio/vfio-common.h | 3 +++
>> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
>> hw/vfio/iommufd.c | 12 ++++++----
>> hw/vfio/pci.c | 28 ++++++++++++----------
>> 4 files changed, 71 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 3dac5c167e..567e5f7bea 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>> hwaddr size);
>> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>> uint64_t size, ram_addr_t ram_addr);
>> +
>
> Please add bare documentation:
>
> /* Returns 0 on success, or a negative errno. */
>
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>
> Functions taking an Error** param should return a boolean, so:
>
> /* Return: true on success, else false setting @errp with error. */
>
>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>> #endif /* HW_VFIO_VFIO_COMMON_H */
>
>
>> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>> return ret;
>> }
>> +
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>> +{
>> + struct stat st;
>> +
>> + if (vbasedev->fd < 0) {
>> + if (stat(vbasedev->sysfsdev, &st) < 0) {
>> + error_setg_errno(errp, errno, "no such host device");
>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> + return -errno;
>> + }
>> + /* User may specify a name, e.g: VFIO platform device */
>> + if (!vbasedev->name) {
>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>> + }
>> + } else {
>> + if (!vbasedev->iommufd) {
>> + error_setg(errp, "Use FD passing only with iommufd backend");
>> + return -EINVAL;
>> + }
>> + /*
>> + * Give a name with fd so any function printing out vbasedev->name
>> + * will not break.
>> + */
>> + if (!vbasedev->name) {
>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
>
> bool vfio_device_set_fd(..., Error **errp)
>
>> +{
>> + int fd = monitor_fd_param(monitor_cur(), str, errp);
>> +
>> + if (fd < 0) {
>> + error_prepend(errp, "Could not parse remote object fd %s:", str);
>> + return;
>
> return false;
>
>> + }
>> + vbasedev->fd = fd;
>
> return true;
If we had a QOM base device object, vfio_device_set_fd() would be passed
directly to object_class_property_add_str() which expects a :
void (*set)(Object *, const char *, Error **)
I think it is fine to keep as it is. We might have a QOM base device object
one day ! Minor anyway.
Thanks,
C.
>
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 3eec428162..e08a217057 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>> uint32_t ioas_id;
>> Error *err = NULL;
>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> - if (devfd < 0) {
>> - return devfd;
>> + if (vbasedev->fd < 0) {
>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> + if (devfd < 0) {
>> + return devfd;
>> + }
>> + vbasedev->fd = devfd;
>> + } else {
>> + devfd = vbasedev->fd;
>> }
>> - vbasedev->fd = devfd;
>> ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>> if (ret) {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index c5984b0598..b23b492cce 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>> VFIODevice *vbasedev = &vdev->vbasedev;
>> char *tmp, *subsys;
>> Error *err = NULL;
>> - struct stat st;
>> int i, ret;
>> bool is_mdev;
>> char uuid[UUID_STR_LEN];
>> char *name;
>> - if (!vbasedev->sysfsdev) {
>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>> if (!(~vdev->host.domain || ~vdev->host.bus ||
>> ~vdev->host.slot || ~vdev->host.function)) {
>> error_setg(errp, "No provided host device");
>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>> +#ifdef CONFIG_IOMMUFD
>> + "or -device vfio-pci,fd=DEVICE_FD "
>> +#endif
>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>> return;
>> }
>> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>> vdev->host.slot, vdev->host.function);
>> }
>> - if (stat(vbasedev->sysfsdev, &st) < 0) {
>> - error_setg_errno(errp, errno, "no such host device");
>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> + if (vfio_device_get_name(vbasedev, errp)) {
>
> Clearer as:
>
> if (vfio_device_get_name(vbasedev, errp) < 0) {
>
>> return;
>> }
>
> Regards,
>
> Phil.
>
Hi Philippe,
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Wednesday, November 15, 2023 9:05 PM
>Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/15/23 13:09, Philippe Mathieu-Daudé wrote:
>> Hi Zhenzhong,
>>
>> On 14/11/23 11:09, Zhenzhong Duan wrote:
>>> This gives management tools like libvirt a chance to open the vfio
>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>> to have privilege to open a VFIO or iommu cdev node.
>>>
>>> Together with the earlier support of pre-opening /dev/iommu device,
>>> now we have full support of passing a vfio device to unprivileged
>>> qemu by management tool. This mode is no more considered for the
>>> legacy backend. So let's remove the "TODO" comment.
>>>
>>> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
>>> to set fd and get device name, they will also be used by other vfio
>>> devices.
>>>
>>> There is no easy way to check if a device is mdev with FD passing,
>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>
>>> There is also no easy way to get BDF as name with FD passing, so
>>> we fake a name by VFIO_FD[fd].
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> v6: simplify CONFIG_IOMMUFD checking code
>>> introduce a helper vfio_device_set_fd
>>>
>>> include/hw/vfio/vfio-common.h | 3 +++
>>> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
>>> hw/vfio/iommufd.c | 12 ++++++----
>>> hw/vfio/pci.c | 28 ++++++++++++----------
>>> 4 files changed, 71 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 3dac5c167e..567e5f7bea 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -251,4 +251,7 @@ int
>vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>>> hwaddr size);
>>> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>>> uint64_t size, ram_addr_t ram_addr);
>>> +
>>
>> Please add bare documentation:
>>
>> /* Returns 0 on success, or a negative errno. */
>>
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
Will do, I'd like to wait a few days to collect more suggested changes and RB,
Then send all these updates to Cédric in once before he pushes this series to vfio-next.
>>
>> Functions taking an Error** param should return a boolean, so:
>>
>> /* Return: true on success, else false setting @errp with error. */
>>
>>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>>> #endif /* HW_VFIO_VFIO_COMMON_H */
>>
>>
>>> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>region, uint16_t cap_type)
>>> return ret;
>>> }
>>> +
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> + struct stat st;
>>> +
>>> + if (vbasedev->fd < 0) {
>>> + if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> + error_setg_errno(errp, errno, "no such host device");
>>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> + return -errno;
>>> + }
>>> + /* User may specify a name, e.g: VFIO platform device */
>>> + if (!vbasedev->name) {
>>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>> + }
>>> + } else {
>>> + if (!vbasedev->iommufd) {
>>> + error_setg(errp, "Use FD passing only with iommufd backend");
>>> + return -EINVAL;
>>> + }
>>> + /*
>>> + * Give a name with fd so any function printing out vbasedev->name
>>> + * will not break.
>>> + */
>>> + if (!vbasedev->name) {
>>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>> + }
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
>>
>> bool vfio_device_set_fd(..., Error **errp)
>>
>>> +{
>>> + int fd = monitor_fd_param(monitor_cur(), str, errp);
>>> +
>>> + if (fd < 0) {
>>> + error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> + return;
>>
>> return false;
>>
>>> + }
>>> + vbasedev->fd = fd;
>>
>> return true;
>
>If we had a QOM base device object, vfio_device_set_fd() would be passed
>directly to object_class_property_add_str() which expects a :
>
> void (*set)(Object *, const char *, Error **)
>
>I think it is fine to keep as it is. We might have a QOM base device object
>one day ! Minor anyway.
>
>Thanks,
>
>C.
>
>
>>
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 3eec428162..e08a217057 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name,
>VFIODevice *vbasedev,
>>> uint32_t ioas_id;
>>> Error *err = NULL;
>>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> - if (devfd < 0) {
>>> - return devfd;
>>> + if (vbasedev->fd < 0) {
>>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> + if (devfd < 0) {
>>> + return devfd;
>>> + }
>>> + vbasedev->fd = devfd;
>>> + } else {
>>> + devfd = vbasedev->fd;
>>> }
>>> - vbasedev->fd = devfd;
>>> ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>>> if (ret) {
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index c5984b0598..b23b492cce 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>> VFIODevice *vbasedev = &vdev->vbasedev;
>>> char *tmp, *subsys;
>>> Error *err = NULL;
>>> - struct stat st;
>>> int i, ret;
>>> bool is_mdev;
>>> char uuid[UUID_STR_LEN];
>>> char *name;
>>> - if (!vbasedev->sysfsdev) {
>>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>> if (!(~vdev->host.domain || ~vdev->host.bus ||
>>> ~vdev->host.slot || ~vdev->host.function)) {
>>> error_setg(errp, "No provided host device");
>>> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>> +#ifdef CONFIG_IOMMUFD
>>> + "or -device vfio-pci,fd=DEVICE_FD "
>>> +#endif
>>> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>> return;
>>> }
>>> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>> vdev->host.slot, vdev->host.function);
>>> }
>>> - if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> - error_setg_errno(errp, errno, "no such host device");
>>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> + if (vfio_device_get_name(vbasedev, errp)) {
>>
>> Clearer as:
>>
>> if (vfio_device_get_name(vbasedev, errp) < 0) {
>>
>>> return;
>>> }
Will do.
Thanks
Zhenzhong
>>> Please add bare documentation: >>> >>> /* Returns 0 on success, or a negative errno. */ >>> >>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); > > Will do, I'd like to wait a few days to collect more suggested changes and RB, > Then send all these updates to Cédric in once before he pushes this series to vfio-next. Yep. Could you respin a v7 with all the comments on v6 ? I will then apply directly on vfio-next. Please wait for Eric to finish looking at the platform part. Thanks, C.
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Sent: Thursday, November 16, 2023 3:25 PM >Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a >file handle > >>>> Please add bare documentation: >>>> >>>> /* Returns 0 on success, or a negative errno. */ >>>> >>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); >> >> Will do, I'd like to wait a few days to collect more suggested changes and RB, >> Then send all these updates to Cédric in once before he pushes this series to >vfio-next. > >Yep. Could you respin a v7 with all the comments on v6 ? I will >then apply directly on vfio-next. Sure. > >Please wait for Eric to finish looking at the platform part. Sure. Thanks Zhenzhong
On 11/14/23 11:09, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
>
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
>
> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
> to set fd and get device name, they will also be used by other vfio
> devices.
>
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
>
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v6: simplify CONFIG_IOMMUFD checking code
> introduce a helper vfio_device_set_fd
>
> include/hw/vfio/vfio-common.h | 3 +++
> hw/vfio/helpers.c | 44 +++++++++++++++++++++++++++++++++++
> hw/vfio/iommufd.c | 12 ++++++----
> hw/vfio/pci.c | 28 ++++++++++++----------
> 4 files changed, 71 insertions(+), 16 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..567e5f7bea 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
> hwaddr size);
> int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
> uint64_t size, ram_addr_t ram_addr);
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
> #endif /* HW_VFIO_VFIO_COMMON_H */
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 168847e7c5..986ef1095a 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -20,6 +20,7 @@
> */
>
> #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
Unused now. I removed it.
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Thanks,
C.
> #include <sys/ioctl.h>
>
> #include "hw/vfio/vfio-common.h"
> @@ -27,6 +28,7 @@
> #include "trace.h"
> #include "qapi/error.h"
> #include "qemu/error-report.h"
> +#include "monitor/monitor.h"
>
> /*
> * Common VFIO interrupt disable
> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>
> return ret;
> }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> + struct stat st;
> +
> + if (vbasedev->fd < 0) {
> + if (stat(vbasedev->sysfsdev, &st) < 0) {
> + error_setg_errno(errp, errno, "no such host device");
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + return -errno;
> + }
> + /* User may specify a name, e.g: VFIO platform device */
> + if (!vbasedev->name) {
> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> + }
> + } else {
> + if (!vbasedev->iommufd) {
> + error_setg(errp, "Use FD passing only with iommufd backend");
> + return -EINVAL;
> + }
> + /*
> + * Give a name with fd so any function printing out vbasedev->name
> + * will not break.
> + */
> + if (!vbasedev->name) {
> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> + }
> + }
> +
> + return 0;
> +}
> +
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
> +{
> + int fd = monitor_fd_param(monitor_cur(), str, errp);
> +
> + if (fd < 0) {
> + error_prepend(errp, "Could not parse remote object fd %s:", str);
> + return;
> + }
> + vbasedev->fd = fd;
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 3eec428162..e08a217057 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
> uint32_t ioas_id;
> Error *err = NULL;
>
> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> - if (devfd < 0) {
> - return devfd;
> + if (vbasedev->fd < 0) {
> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> + if (devfd < 0) {
> + return devfd;
> + }
> + vbasedev->fd = devfd;
> + } else {
> + devfd = vbasedev->fd;
> }
> - vbasedev->fd = devfd;
>
> ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
> if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5984b0598..b23b492cce 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> VFIODevice *vbasedev = &vdev->vbasedev;
> char *tmp, *subsys;
> Error *err = NULL;
> - struct stat st;
> int i, ret;
> bool is_mdev;
> char uuid[UUID_STR_LEN];
> char *name;
>
> - if (!vbasedev->sysfsdev) {
> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
> if (!(~vdev->host.domain || ~vdev->host.bus ||
> ~vdev->host.slot || ~vdev->host.function)) {
> error_setg(errp, "No provided host device");
> error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> +#ifdef CONFIG_IOMMUFD
> + "or -device vfio-pci,fd=DEVICE_FD "
> +#endif
> "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> return;
> }
> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vdev->host.slot, vdev->host.function);
> }
>
> - if (stat(vbasedev->sysfsdev, &st) < 0) {
> - error_setg_errno(errp, errno, "no such host device");
> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> + if (vfio_device_get_name(vbasedev, errp)) {
> return;
> }
> -
> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> vbasedev->ops = &vfio_pci_ops;
> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
> vbasedev->dev = DEVICE(vdev);
> @@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj)
> vdev->host.bus = ~0U;
> vdev->host.slot = ~0U;
> vdev->host.function = ~0U;
> + vdev->vbasedev.fd = -1;
>
> vdev->nv_gpudirect_clique = 0xFF;
>
> @@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = {
> qdev_prop_nv_gpudirect_clique, uint8_t),
> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
> OFF_AUTOPCIBAR_OFF),
> - /*
> - * TODO - support passed fds... is this necessary?
> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
> - */
> #ifdef CONFIG_IOMMUFD
> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
> @@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = {
> DEFINE_PROP_END_OF_LIST(),
> };
>
> +#ifdef CONFIG_IOMMUFD
> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
> +{
> + vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
> +}
> +#endif
> +
> static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> {
> DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>
> dc->reset = vfio_pci_reset;
> device_class_set_props(dc, vfio_pci_dev_properties);
> +#ifdef CONFIG_IOMMUFD
> + object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
> +#endif
> dc->desc = "VFIO-based PCI device assignment";
> set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> pdc->realize = vfio_realize;
© 2016 - 2026 Red Hat, Inc.