Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific
check and reset operation.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v6: pci_hot_reset return -errno if fails
hw/vfio/iommufd.c | 145 +++++++++++++++++++++++++++++++++++++++++++
hw/vfio/trace-events | 1 +
2 files changed, 146 insertions(+)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index e5bf528e89..3eec428162 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -24,6 +24,7 @@
#include "sysemu/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "pci.h"
static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
@@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
close(vbasedev->fd);
}
+static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
+{
+ VFIODevice *vbasedev_iter;
+
+ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
+ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
+ continue;
+ }
+ if (devid == vbasedev_iter->devid) {
+ return vbasedev_iter;
+ }
+ }
+ return NULL;
+}
+
+static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ struct vfio_pci_hot_reset *reset;
+ int ret, i;
+ bool multi = false;
+
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+ if (!single) {
+ vfio_pci_pre_reset(vdev);
+ }
+ vdev->vbasedev.needs_reset = false;
+
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+ if (ret) {
+ goto out_single;
+ }
+
+ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
+
+ devices = &info->devices[0];
+
+ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
+ if (!vdev->has_pm_reset) {
+ for (i = 0; i < info->count; i++) {
+ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
+ error_report("vfio: Cannot reset device %s, "
+ "depends on device %04x:%02x:%02x.%x "
+ "which is not owned.",
+ vdev->vbasedev.name, devices[i].segment,
+ devices[i].bus, PCI_SLOT(devices[i].devfn),
+ PCI_FUNC(devices[i].devfn));
+ }
+ }
+ }
+ ret = -EPERM;
+ goto out_single;
+ }
+
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+ for (i = 0; i < info->count; i++) {
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
+ devices[i].bus,
+ PCI_SLOT(devices[i].devfn),
+ PCI_FUNC(devices[i].devfn),
+ devices[i].devid);
+
+ /*
+ * If a VFIO cdev device is resettable, all the dependent devices
+ * are either bound to same iommufd or within same iommu_groups as
+ * one of the iommufd bound devices.
+ */
+ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
+
+ if (devices[i].devid == vdev->vbasedev.devid ||
+ devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+ continue;
+ }
+
+ vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
+ if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (single) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+ vfio_pci_pre_reset(tmp);
+ tmp->vbasedev.needs_reset = false;
+ multi = true;
+ }
+
+ if (!single && !multi) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+
+ /* Use zero length array for hot reset with iommufd backend */
+ reset = g_malloc0(sizeof(*reset));
+ reset->argsz = sizeof(*reset);
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+ if (ret) {
+ ret = -errno;
+ }
+
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+ ret ? strerror(errno) : "Success");
+
+ /* Re-enable INTx on affected devices */
+ for (i = 0; i < info->count; i++) {
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ if (devices[i].devid == vdev->vbasedev.devid ||
+ devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+ continue;
+ }
+
+ vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
+ if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ vfio_pci_post_reset(tmp);
+ }
+out_single:
+ if (!single) {
+ vfio_pci_post_reset(vdev);
+ }
+ g_free(info);
+
+ return ret;
+}
+
const VFIOIOMMUOps vfio_iommufd_ops = {
.dma_map = iommufd_cdev_map,
.dma_unmap = iommufd_cdev_unmap,
.attach_device = iommufd_cdev_attach,
.detach_device = iommufd_cdev_detach,
+ .pci_hot_reset = iommufd_cdev_pci_hot_reset,
};
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 5d3e9e8cee..d838232d5a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name, const char *str, in
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
+iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
--
2.34.1
On 11/14/23 11:09, Zhenzhong Duan wrote:
> Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific
> check and reset operation.
nit: Implement the newly introduced pci_hot_reset callback?
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v6: pci_hot_reset return -errno if fails
>
> hw/vfio/iommufd.c | 145 +++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/trace-events | 1 +
> 2 files changed, 146 insertions(+)
>
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index e5bf528e89..3eec428162 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -24,6 +24,7 @@
> #include "sysemu/reset.h"
> #include "qemu/cutils.h"
> #include "qemu/chardev_open.h"
> +#include "pci.h"
>
> static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
> ram_addr_t size, void *vaddr, bool readonly)
> @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
> close(vbasedev->fd);
> }
>
> +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
> +{
> + VFIODevice *vbasedev_iter;
> +
> + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
> + if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
> + continue;
> + }
> + if (devid == vbasedev_iter->devid) {
> + return vbasedev_iter;
> + }
> + }
> + return NULL;
> +}
> +
> +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
> +{
> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> + struct vfio_pci_hot_reset_info *info = NULL;
> + struct vfio_pci_dependent_device *devices;
> + struct vfio_pci_hot_reset *reset;
> + int ret, i;
> + bool multi = false;
> +
> + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
> +
> + if (!single) {
> + vfio_pci_pre_reset(vdev);
> + }
> + vdev->vbasedev.needs_reset = false;
> +
> + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
> +
> + if (ret) {
> + goto out_single;
> + }
> +
> + assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
> +
> + devices = &info->devices[0];
> +
> + if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
> + if (!vdev->has_pm_reset) {
> + for (i = 0; i < info->count; i++) {
> + if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
> + error_report("vfio: Cannot reset device %s, "
> + "depends on device %04x:%02x:%02x.%x "
> + "which is not owned.",
> + vdev->vbasedev.name, devices[i].segment,
> + devices[i].bus, PCI_SLOT(devices[i].devfn),
> + PCI_FUNC(devices[i].devfn));
> + }
> + }
> + }
> + ret = -EPERM;
> + goto out_single;
> + }
> +
> + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
> +
> + for (i = 0; i < info->count; i++) {
> + VFIOPCIDevice *tmp;
> + VFIODevice *vbasedev_iter;
> +
> + trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
> + devices[i].bus,
> + PCI_SLOT(devices[i].devfn),
> + PCI_FUNC(devices[i].devfn),
> + devices[i].devid);
> +
> + /*
> + * If a VFIO cdev device is resettable, all the dependent devices
> + * are either bound to same iommufd or within same iommu_groups as
> + * one of the iommufd bound devices.
> + */
> + assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
> +
> + if (devices[i].devid == vdev->vbasedev.devid ||
> + devices[i].devid == VFIO_PCI_DEVID_OWNED) {
> + continue;
> + }
> +
> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
> + if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> + continue;
> + }
> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> + if (single) {
> + ret = -EINVAL;
> + goto out_single;
> + }
> + vfio_pci_pre_reset(tmp);
> + tmp->vbasedev.needs_reset = false;
> + multi = true;
> + }
> +
> + if (!single && !multi) {
> + ret = -EINVAL;
> + goto out_single;
> + }
> +
> + /* Use zero length array for hot reset with iommufd backend */
> + reset = g_malloc0(sizeof(*reset));
> + reset->argsz = sizeof(*reset);
> +
> + /* Bus reset! */
> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
> + g_free(reset);
> + if (ret) {
> + ret = -errno;
> + }
> +
> + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
> + ret ? strerror(errno) : "Success");
> +
> + /* Re-enable INTx on affected devices */
> + for (i = 0; i < info->count; i++) {
> + VFIOPCIDevice *tmp;
> + VFIODevice *vbasedev_iter;
> +
> + if (devices[i].devid == vdev->vbasedev.devid ||
> + devices[i].devid == VFIO_PCI_DEVID_OWNED) {
> + continue;
> + }
> +
> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
> + if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> + continue;
> + }
> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
nit: I see this block of code also is used above for the pre_reset. May
be interesting to introduce an helper? Could be done later though
> + vfio_pci_post_reset(tmp);
> + }
> +out_single:
> + if (!single) {
> + vfio_pci_post_reset(vdev);
> + }
> + g_free(info);
> +
> + return ret;
> +}
> +
> const VFIOIOMMUOps vfio_iommufd_ops = {
> .dma_map = iommufd_cdev_map,
> .dma_unmap = iommufd_cdev_unmap,
> .attach_device = iommufd_cdev_attach,
> .detach_device = iommufd_cdev_detach,
> + .pci_hot_reset = iommufd_cdev_pci_hot_reset,
> };
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 5d3e9e8cee..d838232d5a 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name, const char *str, in
> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
> +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
Otherwise looks good to me.
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Eric
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Sent: Friday, November 17, 2023 9:54 PM
>Subject: Re: [PATCH v6 09/21] vfio/iommufd: Enable pci hot reset through
>iommufd cdev interface
>
>
>
>On 11/14/23 11:09, Zhenzhong Duan wrote:
>> Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific
>> check and reset operation.
>
>nit: Implement the newly introduced pci_hot_reset callback?
Yes
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> v6: pci_hot_reset return -errno if fails
>>
>> hw/vfio/iommufd.c | 145
>+++++++++++++++++++++++++++++++++++++++++++
>> hw/vfio/trace-events | 1 +
>> 2 files changed, 146 insertions(+)
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index e5bf528e89..3eec428162 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -24,6 +24,7 @@
>> #include "sysemu/reset.h"
>> #include "qemu/cutils.h"
>> #include "qemu/chardev_open.h"
>> +#include "pci.h"
>>
>> static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
>> ram_addr_t size, void *vaddr, bool readonly)
>> @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice
>*vbasedev)
>> close(vbasedev->fd);
>> }
>>
>> +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
>> +{
>> + VFIODevice *vbasedev_iter;
>> +
>> + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
>> + if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
>> + continue;
>> + }
>> + if (devid == vbasedev_iter->devid) {
>> + return vbasedev_iter;
>> + }
>> + }
>> + return NULL;
>> +}
>> +
>> +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
>> +{
>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> + struct vfio_pci_hot_reset_info *info = NULL;
>> + struct vfio_pci_dependent_device *devices;
>> + struct vfio_pci_hot_reset *reset;
>> + int ret, i;
>> + bool multi = false;
>> +
>> + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
>> +
>> + if (!single) {
>> + vfio_pci_pre_reset(vdev);
>> + }
>> + vdev->vbasedev.needs_reset = false;
>> +
>> + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
>> +
>> + if (ret) {
>> + goto out_single;
>> + }
>> +
>> + assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
>> +
>> + devices = &info->devices[0];
>> +
>> + if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
>> + if (!vdev->has_pm_reset) {
>> + for (i = 0; i < info->count; i++) {
>> + if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
>> + error_report("vfio: Cannot reset device %s, "
>> + "depends on device %04x:%02x:%02x.%x "
>> + "which is not owned.",
>> + vdev->vbasedev.name, devices[i].segment,
>> + devices[i].bus, PCI_SLOT(devices[i].devfn),
>> + PCI_FUNC(devices[i].devfn));
>> + }
>> + }
>> + }
>> + ret = -EPERM;
>> + goto out_single;
>> + }
>> +
>> + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
>> +
>> + for (i = 0; i < info->count; i++) {
>> + VFIOPCIDevice *tmp;
>> + VFIODevice *vbasedev_iter;
>> +
>> + trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
>> + devices[i].bus,
>> + PCI_SLOT(devices[i].devfn),
>> + PCI_FUNC(devices[i].devfn),
>> + devices[i].devid);
>> +
>> + /*
>> + * If a VFIO cdev device is resettable, all the dependent devices
>> + * are either bound to same iommufd or within same iommu_groups as
>> + * one of the iommufd bound devices.
>> + */
>> + assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
>> +
>> + if (devices[i].devid == vdev->vbasedev.devid ||
>> + devices[i].devid == VFIO_PCI_DEVID_OWNED) {
>> + continue;
>> + }
>> +
>> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
>> + if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
>> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> + continue;
>> + }
>> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> + if (single) {
>> + ret = -EINVAL;
>> + goto out_single;
>> + }
>> + vfio_pci_pre_reset(tmp);
>> + tmp->vbasedev.needs_reset = false;
>> + multi = true;
>> + }
>> +
>> + if (!single && !multi) {
>> + ret = -EINVAL;
>> + goto out_single;
>> + }
>> +
>> + /* Use zero length array for hot reset with iommufd backend */
>> + reset = g_malloc0(sizeof(*reset));
>> + reset->argsz = sizeof(*reset);
>> +
>> + /* Bus reset! */
>> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
>> + g_free(reset);
>> + if (ret) {
>> + ret = -errno;
>> + }
>> +
>> + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
>> + ret ? strerror(errno) : "Success");
>> +
>> + /* Re-enable INTx on affected devices */
>> + for (i = 0; i < info->count; i++) {
>> + VFIOPCIDevice *tmp;
>> + VFIODevice *vbasedev_iter;
>> +
>> + if (devices[i].devid == vdev->vbasedev.devid ||
>> + devices[i].devid == VFIO_PCI_DEVID_OWNED) {
>> + continue;
>> + }
>> +
>> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
>> + if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
>> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> + continue;
>> + }
>> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>nit: I see this block of code also is used above for the pre_reset. May
>be interesting to introduce an helper? Could be done later though
Will do in v7.
Thanks
Zhenzhong
>> + vfio_pci_post_reset(tmp);
>> + }
>> +out_single:
>> + if (!single) {
>> + vfio_pci_post_reset(vdev);
>> + }
>> + g_free(info);
>> +
>> + return ret;
>> +}
>> +
>> const VFIOIOMMUOps vfio_iommufd_ops = {
>> .dma_map = iommufd_cdev_map,
>> .dma_unmap = iommufd_cdev_unmap,
>> .attach_device = iommufd_cdev_attach,
>> .detach_device = iommufd_cdev_detach,
>> + .pci_hot_reset = iommufd_cdev_pci_hot_reset,
>> };
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 5d3e9e8cee..d838232d5a 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const
>char *name, const char *str, in
>> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
>> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new
>IOMMUFD container with ioasid=%d"
>> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int
>num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
>> +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int
>function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
>Otherwise looks good to me.
>
>Reviewed-by: Eric Auger <eric.auger@redhat.com>
>
>
>Eric
>
>
© 2016 - 2026 Red Hat, Inc.