From: Eric Auger <eric.auger@redhat.com>
We want the VFIO devices to be able to use two different
IOMMU callbacks, the legacy VFIO one and the new iommufd one.
Introduce vfio_[attach/detach]_device which aim at hiding the
underlying IOMMU backend (IOCTLs, datatypes, ...).
Once vfio_attach_device completes, the device is attached
to a security context and its fd can be used. Conversely
When vfio_detach_device completes, the device has been
detached to the security context.
In this patch, only the vfio-pci device gets converted to use
the new API. Subsequent patches will handle other devices.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/vfio/container.c | 66 +++++++++++++++++++++++++++++++++++
hw/vfio/pci.c | 50 ++++----------------------
hw/vfio/trace-events | 2 +-
include/hw/vfio/vfio-common.h | 3 ++
4 files changed, 76 insertions(+), 45 deletions(-)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 175cdbbdff..74556da0c7 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1083,3 +1083,69 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
}
return vfio_eeh_container_op(container, op);
}
+
+static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
+{
+ char *tmp, group_path[PATH_MAX], *group_name;
+ int ret, groupid;
+ ssize_t len;
+
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
+
+ if (len <= 0 || len >= sizeof(group_path)) {
+ ret = len < 0 ? -errno : -ENAMETOOLONG;
+ error_setg_errno(errp, -ret, "no iommu_group found");
+ return ret;
+ }
+
+ group_path[len] = 0;
+
+ group_name = basename(group_path);
+ if (sscanf(group_name, "%d", &groupid) != 1) {
+ error_setg_errno(errp, errno, "failed to read %s", group_path);
+ return -errno;
+ }
+ return groupid;
+}
+
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ int groupid = vfio_device_groupid(vbasedev, errp);
+ VFIODevice *vbasedev_iter;
+ VFIOGroup *group;
+ int ret;
+
+ if (groupid < 0) {
+ return groupid;
+ }
+
+ group = vfio_get_group(groupid, as, errp);
+ if (!group) {
+ return -ENOENT;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+ error_setg(errp, "device is already attached");
+ vfio_put_group(group);
+ return -EBUSY;
+ }
+ }
+ ret = vfio_get_device(group, name, vbasedev, errp);
+ if (ret) {
+ vfio_put_group(group);
+ }
+
+ return ret;
+}
+
+void vfio_detach_device(VFIODevice *vbasedev)
+{
+ VFIOGroup *group = vbasedev->group;
+
+ vfio_put_base_device(vbasedev);
+ vfio_put_group(group);
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..34f65ecd17 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
static void vfio_put_device(VFIOPCIDevice *vdev)
{
+ vfio_detach_device(&vdev->vbasedev);
+
g_free(vdev->vbasedev.name);
g_free(vdev->msix);
-
- vfio_put_base_device(&vdev->vbasedev);
}
static void vfio_err_notifier_handler(void *opaque)
@@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- VFIODevice *vbasedev_iter;
- VFIOGroup *group;
- char *tmp, *subsys, group_path[PATH_MAX], *group_name;
+ char *tmp, *subsys;
Error *err = NULL;
- ssize_t len;
struct stat st;
- int groupid;
int i, ret;
bool is_mdev;
char uuid[UUID_FMT_LEN];
@@ -3015,38 +3011,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
- tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
- len = readlink(tmp, group_path, sizeof(group_path));
- g_free(tmp);
-
- if (len <= 0 || len >= sizeof(group_path)) {
- error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
- "no iommu_group found");
- goto error;
- }
-
- group_path[len] = 0;
-
- group_name = basename(group_path);
- if (sscanf(group_name, "%d", &groupid) != 1) {
- error_setg_errno(errp, errno, "failed to read %s", group_path);
- goto error;
- }
-
- trace_vfio_realize(vbasedev->name, groupid);
-
- group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
- if (!group) {
- goto error;
- }
-
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
- error_setg(errp, "device is already attached");
- vfio_put_group(group);
- goto error;
- }
- }
+ trace_vfio_realize(vbasedev->name);
/*
* Mediated devices *might* operate compatibly with discarding of RAM, but
@@ -3065,7 +3030,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
if (vbasedev->ram_block_discard_allowed && !is_mdev) {
error_setg(errp, "x-balloon-allowed only potentially compatible "
"with mdev devices");
- vfio_put_group(group);
goto error;
}
@@ -3076,10 +3040,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
name = g_strdup(vbasedev->name);
}
- ret = vfio_get_device(group, name, vbasedev, errp);
+ ret = vfio_attach_device(name, vbasedev,
+ pci_device_iommu_address_space(pdev), errp);
g_free(name);
if (ret) {
- vfio_put_group(group);
goto error;
}
@@ -3318,7 +3282,6 @@ error:
static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI(obj);
- VFIOGroup *group = vdev->vbasedev.group;
vfio_display_finalize(vdev);
vfio_bars_finalize(vdev);
@@ -3332,7 +3295,6 @@ static void vfio_instance_finalize(Object *obj)
* g_free(vdev->igd_opregion);
*/
vfio_put_device(vdev);
- vfio_put_group(group);
}
static void vfio_exitfn(PCIDevice *pdev)
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index ee7509e68e..8016d9f0d2 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
-vfio_realize(const char *name, int group_id) " (%s) group %d"
+vfio_realize(const char *name) " (%s)"
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bb7f9fe9c4..a29dfe7723 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -253,6 +253,9 @@ void vfio_put_group(VFIOGroup *group);
struct vfio_device_info *vfio_get_device_info(int fd);
int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev, Error **errp);
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp);
+void vfio_detach_device(VFIODevice *vbasedev);
extern int vfio_kvm_device_fd;
--
2.34.1
On 8/30/23 12:37, Zhenzhong Duan wrote:
> From: Eric Auger <eric.auger@redhat.com>
>
> We want the VFIO devices to be able to use two different
> IOMMU callbacks, the legacy VFIO one and the new iommufd one.
>
> Introduce vfio_[attach/detach]_device which aim at hiding the
> underlying IOMMU backend (IOCTLs, datatypes, ...).
>
> Once vfio_attach_device completes, the device is attached
> to a security context and its fd can be used. Conversely
> When vfio_detach_device completes, the device has been
> detached to the security context.
>
> In this patch, only the vfio-pci device gets converted to use
> the new API. Subsequent patches will handle other devices.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/vfio/container.c | 66 +++++++++++++++++++++++++++++++++++
> hw/vfio/pci.c | 50 ++++----------------------
> hw/vfio/trace-events | 2 +-
> include/hw/vfio/vfio-common.h | 3 ++
> 4 files changed, 76 insertions(+), 45 deletions(-)
>
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 175cdbbdff..74556da0c7 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -1083,3 +1083,69 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
> }
> return vfio_eeh_container_op(container, op);
> }
> +
> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
> +{
> + char *tmp, group_path[PATH_MAX], *group_name;
> + int ret, groupid;
> + ssize_t len;
> +
> + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> + len = readlink(tmp, group_path, sizeof(group_path));
> + g_free(tmp);
> +
> + if (len <= 0 || len >= sizeof(group_path)) {
> + ret = len < 0 ? -errno : -ENAMETOOLONG;
> + error_setg_errno(errp, -ret, "no iommu_group found");
> + return ret;
> + }
> +
> + group_path[len] = 0;
> +
> + group_name = basename(group_path);
> + if (sscanf(group_name, "%d", &groupid) != 1) {
> + error_setg_errno(errp, errno, "failed to read %s", group_path);
> + return -errno;
> + }
> + return groupid;
> +}
VFIO has 4 other routines reading the iommu_group from sysfs :
vfio_ccw_get_group()
vfio_ap_get_group()
vfio_base_device_init()
sysfs_find_group_file()
which could use this helper. Thanks for introducing it !
> +
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + int groupid = vfio_device_groupid(vbasedev, errp);
> + VFIODevice *vbasedev_iter;
> + VFIOGroup *group;
> + int ret;
> +
> + if (groupid < 0) {
> + return groupid;
> + }
> +
> + group = vfio_get_group(groupid, as, errp);
> + if (!group) {
> + return -ENOENT;
> + }
> +
> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> + error_setg(errp, "device is already attached");
> + vfio_put_group(group);
> + return -EBUSY;
> + }
> + }
> + ret = vfio_get_device(group, name, vbasedev, errp);
> + if (ret) {
> + vfio_put_group(group);
> + }
> +
> + return ret;
> +}
> +
> +void vfio_detach_device(VFIODevice *vbasedev)
> +{
> + VFIOGroup *group = vbasedev->group;
> +
> + vfio_put_base_device(vbasedev);
> + vfio_put_group(group);
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..34f65ecd17 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>
> static void vfio_put_device(VFIOPCIDevice *vdev)
> {
> + vfio_detach_device(&vdev->vbasedev);
> +
> g_free(vdev->vbasedev.name);
> g_free(vdev->msix);
> -
> - vfio_put_base_device(&vdev->vbasedev);
> }
>
> static void vfio_err_notifier_handler(void *opaque)
> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> {
> VFIOPCIDevice *vdev = VFIO_PCI(pdev);
> VFIODevice *vbasedev = &vdev->vbasedev;
> - VFIODevice *vbasedev_iter;
> - VFIOGroup *group;
> - char *tmp, *subsys, group_path[PATH_MAX], *group_name;
> + char *tmp, *subsys;
> Error *err = NULL;
> - ssize_t len;
> struct stat st;
> - int groupid;
> int i, ret;
> bool is_mdev;
> char uuid[UUID_FMT_LEN];
> @@ -3015,38 +3011,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
> vbasedev->dev = DEVICE(vdev);
>
> - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> - len = readlink(tmp, group_path, sizeof(group_path));
> - g_free(tmp);
> -
> - if (len <= 0 || len >= sizeof(group_path)) {
> - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
> - "no iommu_group found");
> - goto error;
> - }
> -
> - group_path[len] = 0;
> -
> - group_name = basename(group_path);
> - if (sscanf(group_name, "%d", &groupid) != 1) {
> - error_setg_errno(errp, errno, "failed to read %s", group_path);
> - goto error;
> - }
> -
> - trace_vfio_realize(vbasedev->name, groupid);
> -
> - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
> - if (!group) {
> - goto error;
> - }
> -
> - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> - error_setg(errp, "device is already attached");
> - vfio_put_group(group);
> - goto error;
> - }
> - }
> + trace_vfio_realize(vbasedev->name);
I would move the trace event after vfio_attach_device() and print out the group.
Or simply add trace events in vfio_detach/attach_device().
This is a general comment on the VFIO PCI routines which do not use a 'vfio_pci'
prefix and I find it confusing, sometimes. Like this call stack :
vfio_put_device()
vfio_detach_device()
vfio_put_base_device()
I think we should rename vfio_put_device() in vfio_pci_put_device(). This is
not for this series.
Thanks,
C.
>
> /*
> * Mediated devices *might* operate compatibly with discarding of RAM, but
> @@ -3065,7 +3030,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> if (vbasedev->ram_block_discard_allowed && !is_mdev) {
> error_setg(errp, "x-balloon-allowed only potentially compatible "
> "with mdev devices");
> - vfio_put_group(group);
> goto error;
> }
>
> @@ -3076,10 +3040,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> name = g_strdup(vbasedev->name);
> }
>
> - ret = vfio_get_device(group, name, vbasedev, errp);
> + ret = vfio_attach_device(name, vbasedev,
> + pci_device_iommu_address_space(pdev), errp);
> g_free(name);
> if (ret) {
> - vfio_put_group(group);
> goto error;
> }
>
> @@ -3318,7 +3282,6 @@ error:
> static void vfio_instance_finalize(Object *obj)
> {
> VFIOPCIDevice *vdev = VFIO_PCI(obj);
> - VFIOGroup *group = vdev->vbasedev.group;
>
> vfio_display_finalize(vdev);
> vfio_bars_finalize(vdev);
> @@ -3332,7 +3295,6 @@ static void vfio_instance_finalize(Object *obj)
> * g_free(vdev->igd_opregion);
> */
> vfio_put_device(vdev);
> - vfio_put_group(group);
> }
>
> static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index ee7509e68e..8016d9f0d2 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
> vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
> vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
> vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
> -vfio_realize(const char *name, int group_id) " (%s) group %d"
> +vfio_realize(const char *name) " (%s)"
> vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
> vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
> vfio_pci_reset(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index bb7f9fe9c4..a29dfe7723 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -253,6 +253,9 @@ void vfio_put_group(VFIOGroup *group);
> struct vfio_device_info *vfio_get_device_info(int fd);
> int vfio_get_device(VFIOGroup *group, const char *name,
> VFIODevice *vbasedev, Error **errp);
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp);
> +void vfio_detach_device(VFIODevice *vbasedev);
>
> extern int vfio_kvm_device_fd;
>
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Thursday, September 21, 2023 5:45 PM
>Subject: Re: [PATCH v1 09/22] vfio/container: Introduce
>vfio_[attach/detach]_device
>
>On 8/30/23 12:37, Zhenzhong Duan wrote:
>> From: Eric Auger <eric.auger@redhat.com>
>>
>> We want the VFIO devices to be able to use two different
>> IOMMU callbacks, the legacy VFIO one and the new iommufd one.
>>
>> Introduce vfio_[attach/detach]_device which aim at hiding the
>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>
>> Once vfio_attach_device completes, the device is attached
>> to a security context and its fd can be used. Conversely
>> When vfio_detach_device completes, the device has been
>> detached to the security context.
>>
>> In this patch, only the vfio-pci device gets converted to use
>> the new API. Subsequent patches will handle other devices.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/vfio/container.c | 66 +++++++++++++++++++++++++++++++++++
>> hw/vfio/pci.c | 50 ++++----------------------
>> hw/vfio/trace-events | 2 +-
>> include/hw/vfio/vfio-common.h | 3 ++
>> 4 files changed, 76 insertions(+), 45 deletions(-)
>>
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 175cdbbdff..74556da0c7 100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -1083,3 +1083,69 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>> }
>> return vfio_eeh_container_op(container, op);
>> }
>> +
>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>> +{
>> + char *tmp, group_path[PATH_MAX], *group_name;
>> + int ret, groupid;
>> + ssize_t len;
>> +
>> + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> + len = readlink(tmp, group_path, sizeof(group_path));
>> + g_free(tmp);
>> +
>> + if (len <= 0 || len >= sizeof(group_path)) {
>> + ret = len < 0 ? -errno : -ENAMETOOLONG;
>> + error_setg_errno(errp, -ret, "no iommu_group found");
>> + return ret;
>> + }
>> +
>> + group_path[len] = 0;
>> +
>> + group_name = basename(group_path);
>> + if (sscanf(group_name, "%d", &groupid) != 1) {
>> + error_setg_errno(errp, errno, "failed to read %s", group_path);
>> + return -errno;
>> + }
>> + return groupid;
>> +}
>
>VFIO has 4 other routines reading the iommu_group from sysfs :
>
> vfio_ccw_get_group()
> vfio_ap_get_group()
> vfio_base_device_init()
> sysfs_find_group_file()
>
>which could use this helper. Thanks for introducing it !
>
>
>
>> +
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> + AddressSpace *as, Error **errp)
>> +{
>> + int groupid = vfio_device_groupid(vbasedev, errp);
>> + VFIODevice *vbasedev_iter;
>> + VFIOGroup *group;
>> + int ret;
>> +
>> + if (groupid < 0) {
>> + return groupid;
>> + }
>> +
>> + group = vfio_get_group(groupid, as, errp);
>> + if (!group) {
>> + return -ENOENT;
>> + }
>> +
>> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> + error_setg(errp, "device is already attached");
>> + vfio_put_group(group);
>> + return -EBUSY;
>> + }
>> + }
>> + ret = vfio_get_device(group, name, vbasedev, errp);
>> + if (ret) {
>> + vfio_put_group(group);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +void vfio_detach_device(VFIODevice *vbasedev)
>> +{
>> + VFIOGroup *group = vbasedev->group;
>> +
>> + vfio_put_base_device(vbasedev);
>> + vfio_put_group(group);
>> +}
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index a205c6b113..34f65ecd17 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>*vdev, Error **errp)
>>
>> static void vfio_put_device(VFIOPCIDevice *vdev)
>> {
>> + vfio_detach_device(&vdev->vbasedev);
>> +
>> g_free(vdev->vbasedev.name);
>> g_free(vdev->msix);
>> -
>> - vfio_put_base_device(&vdev->vbasedev);
>> }
>>
>> static void vfio_err_notifier_handler(void *opaque)
>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>> {
>> VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>> VFIODevice *vbasedev = &vdev->vbasedev;
>> - VFIODevice *vbasedev_iter;
>> - VFIOGroup *group;
>> - char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>> + char *tmp, *subsys;
>> Error *err = NULL;
>> - ssize_t len;
>> struct stat st;
>> - int groupid;
>> int i, ret;
>> bool is_mdev;
>> char uuid[UUID_FMT_LEN];
>> @@ -3015,38 +3011,7 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>> vbasedev->dev = DEVICE(vdev);
>>
>> - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> - len = readlink(tmp, group_path, sizeof(group_path));
>> - g_free(tmp);
>> -
>> - if (len <= 0 || len >= sizeof(group_path)) {
>> - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>> - "no iommu_group found");
>> - goto error;
>> - }
>> -
>> - group_path[len] = 0;
>> -
>> - group_name = basename(group_path);
>> - if (sscanf(group_name, "%d", &groupid) != 1) {
>> - error_setg_errno(errp, errno, "failed to read %s", group_path);
>> - goto error;
>> - }
>> -
>> - trace_vfio_realize(vbasedev->name, groupid);
>> -
>> - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>errp);
>> - if (!group) {
>> - goto error;
>> - }
>> -
>> - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> - error_setg(errp, "device is already attached");
>> - vfio_put_group(group);
>> - goto error;
>> - }
>> - }
>> + trace_vfio_realize(vbasedev->name);
>
>I would move the trace event after vfio_attach_device() and print out the group.
>Or simply add trace events in vfio_detach/attach_device().
>
>This is a general comment on the VFIO PCI routines which do not use a 'vfio_pci'
>prefix and I find it confusing, sometimes. Like this call stack :
>
> vfio_put_device()
> vfio_detach_device()
> vfio_put_base_device()
>
>I think we should rename vfio_put_device() in vfio_pci_put_device(). This is
>not for this series.
Good suggestion! I had ever been confused by this function too.
I can help if you have not done that yet.
Thanks
Zhenzhong
Hi Zhenzhong,
In the commit title I would replace vfio/container by vfio/pci to match
next patches
On 8/30/23 12:37, Zhenzhong Duan wrote:
> From: Eric Auger <eric.auger@redhat.com>
>
> We want the VFIO devices to be able to use two different
> IOMMU callbacks, the legacy VFIO one and the new iommufd one.
s/callbacks/backends
>
> Introduce vfio_[attach/detach]_device which aim at hiding the
> underlying IOMMU backend (IOCTLs, datatypes, ...).
At the moment only the implementation based on the legacy
container/group exists. Let's use it from the vfio-pci device.
>
> Once vfio_attach_device completes, the device is attached
> to a security context and its fd can be used. Conversely
> When vfio_detach_device completes, the device has been
> detached to the security context.
from the security context
>
> In this patch, only the vfio-pci device gets converted to use
> the new API. Subsequent patches will handle other devices.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/vfio/container.c | 66 +++++++++++++++++++++++++++++++++++
> hw/vfio/pci.c | 50 ++++----------------------
> hw/vfio/trace-events | 2 +-
> include/hw/vfio/vfio-common.h | 3 ++
> 4 files changed, 76 insertions(+), 45 deletions(-)
>
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 175cdbbdff..74556da0c7 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -1083,3 +1083,69 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
> }
> return vfio_eeh_container_op(container, op);
> }
> +
> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
> +{
> + char *tmp, group_path[PATH_MAX], *group_name;
> + int ret, groupid;
> + ssize_t len;
> +
> + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> + len = readlink(tmp, group_path, sizeof(group_path));
> + g_free(tmp);
> +
> + if (len <= 0 || len >= sizeof(group_path)) {
> + ret = len < 0 ? -errno : -ENAMETOOLONG;
> + error_setg_errno(errp, -ret, "no iommu_group found");
> + return ret;
> + }
> +
> + group_path[len] = 0;
> +
> + group_name = basename(group_path);
> + if (sscanf(group_name, "%d", &groupid) != 1) {
> + error_setg_errno(errp, errno, "failed to read %s", group_path);
> + return -errno;
> + }
> + return groupid;
> +}
> +
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + int groupid = vfio_device_groupid(vbasedev, errp);
> + VFIODevice *vbasedev_iter;
> + VFIOGroup *group;
> + int ret;
> +
> + if (groupid < 0) {
> + return groupid;
> + }
> +
> + group = vfio_get_group(groupid, as, errp);
> + if (!group) {
> + return -ENOENT;
> + }
> +
> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> + error_setg(errp, "device is already attached");
> + vfio_put_group(group);
> + return -EBUSY;
> + }
> + }
> + ret = vfio_get_device(group, name, vbasedev, errp);
> + if (ret) {
> + vfio_put_group(group);
> + }
> +
> + return ret;
> +}
> +
> +void vfio_detach_device(VFIODevice *vbasedev)
> +{
> + VFIOGroup *group = vbasedev->group;
> +
> + vfio_put_base_device(vbasedev);
> + vfio_put_group(group);
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..34f65ecd17 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>
> static void vfio_put_device(VFIOPCIDevice *vdev)
> {
> + vfio_detach_device(&vdev->vbasedev);
> +
> g_free(vdev->vbasedev.name);
> g_free(vdev->msix);
> -
> - vfio_put_base_device(&vdev->vbasedev);
> }
>
> static void vfio_err_notifier_handler(void *opaque)
> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> {
> VFIOPCIDevice *vdev = VFIO_PCI(pdev);
> VFIODevice *vbasedev = &vdev->vbasedev;
> - VFIODevice *vbasedev_iter;
> - VFIOGroup *group;
> - char *tmp, *subsys, group_path[PATH_MAX], *group_name;
> + char *tmp, *subsys;
> Error *err = NULL;
> - ssize_t len;
> struct stat st;
> - int groupid;
> int i, ret;
> bool is_mdev;
> char uuid[UUID_FMT_LEN];
> @@ -3015,38 +3011,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vbasedev->type = VFIO_DEVICE_TYPE_PCI;
> vbasedev->dev = DEVICE(vdev);
>
> - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> - len = readlink(tmp, group_path, sizeof(group_path));
> - g_free(tmp);
> -
> - if (len <= 0 || len >= sizeof(group_path)) {
> - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
> - "no iommu_group found");
> - goto error;
> - }
> -
> - group_path[len] = 0;
> -
> - group_name = basename(group_path);
> - if (sscanf(group_name, "%d", &groupid) != 1) {
> - error_setg_errno(errp, errno, "failed to read %s", group_path);
> - goto error;
> - }
> -
> - trace_vfio_realize(vbasedev->name, groupid);
> -
> - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
> - if (!group) {
> - goto error;
> - }
> -
> - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> - error_setg(errp, "device is already attached");
> - vfio_put_group(group);
> - goto error;
> - }
> - }
> + trace_vfio_realize(vbasedev->name);
>
> /*
> * Mediated devices *might* operate compatibly with discarding of RAM, but
> @@ -3065,7 +3030,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> if (vbasedev->ram_block_discard_allowed && !is_mdev) {
> error_setg(errp, "x-balloon-allowed only potentially compatible "
> "with mdev devices");
> - vfio_put_group(group);
> goto error;
> }
>
> @@ -3076,10 +3040,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> name = g_strdup(vbasedev->name);
> }
>
> - ret = vfio_get_device(group, name, vbasedev, errp);
> + ret = vfio_attach_device(name, vbasedev,
> + pci_device_iommu_address_space(pdev), errp);
> g_free(name);
> if (ret) {
> - vfio_put_group(group);
> goto error;
> }
>
> @@ -3318,7 +3282,6 @@ error:
> static void vfio_instance_finalize(Object *obj)
> {
> VFIOPCIDevice *vdev = VFIO_PCI(obj);
> - VFIOGroup *group = vdev->vbasedev.group;
>
> vfio_display_finalize(vdev);
> vfio_bars_finalize(vdev);
> @@ -3332,7 +3295,6 @@ static void vfio_instance_finalize(Object *obj)
> * g_free(vdev->igd_opregion);
> */
> vfio_put_device(vdev);
> - vfio_put_group(group);
> }
>
> static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index ee7509e68e..8016d9f0d2 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
> vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
> vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
> vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
> -vfio_realize(const char *name, int group_id) " (%s) group %d"
> +vfio_realize(const char *name) " (%s)"
I am not sure this trace point is useful anymore, without the id. Some
tracepoints shall be BE specific to keep their usefulness and should be
called from container.c/iommufd.c instead of in the generic function.
> vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
> vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
> vfio_pci_reset(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index bb7f9fe9c4..a29dfe7723 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -253,6 +253,9 @@ void vfio_put_group(VFIOGroup *group);
> struct vfio_device_info *vfio_get_device_info(int fd);
> int vfio_get_device(VFIOGroup *group, const char *name,
> VFIODevice *vbasedev, Error **errp);
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp);
> +void vfio_detach_device(VFIODevice *vbasedev);
>
> extern int vfio_kvm_device_fd;
>
Thanks
Eric
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Sent: Wednesday, September 20, 2023 9:33 PM >Subject: Re: [PATCH v1 09/22] vfio/container: Introduce >vfio_[attach/detach]_device > >Hi Zhenzhong, > >In the commit title I would replace vfio/container by vfio/pci to match >next patches Make sense, will do. > >On 8/30/23 12:37, Zhenzhong Duan wrote: >> From: Eric Auger <eric.auger@redhat.com> >> >> We want the VFIO devices to be able to use two different >> IOMMU callbacks, the legacy VFIO one and the new iommufd one. >s/callbacks/backends >> >> Introduce vfio_[attach/detach]_device which aim at hiding the >> underlying IOMMU backend (IOCTLs, datatypes, ...). > ...... >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index ee7509e68e..8016d9f0d2 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int >slot, int function, int >> vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot >reset: %s" >> vfio_populate_device_config(const char *name, unsigned long size, unsigned >long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, >flags: 0x%lx" >> vfio_populate_device_get_irq_info_failure(const char *errstr) >"VFIO_DEVICE_GET_IRQ_INFO failure: %s" >> -vfio_realize(const char *name, int group_id) " (%s) group %d" >> +vfio_realize(const char *name) " (%s)" >I am not sure this trace point is useful anymore, without the id. Some >tracepoints shall be BE specific to keep their usefulness and should be >called from container.c/iommufd.c instead of in the generic function. Previously I use this trace event just to hint vfio realize starting. I agree with you that being BE specific could show more useful information. I'll fix it in v2. Thanks Zhenzhong
© 2016 - 2026 Red Hat, Inc.