Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific
check and reset operation.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v6: pci_hot_reset return -errno if fails
hw/vfio/iommufd.c | 145 +++++++++++++++++++++++++++++++++++++++++++
hw/vfio/trace-events | 1 +
2 files changed, 146 insertions(+)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index e5bf528e89..3eec428162 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -24,6 +24,7 @@
#include "sysemu/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "pci.h"
static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
@@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
close(vbasedev->fd);
}
+static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
+{
+ VFIODevice *vbasedev_iter;
+
+ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
+ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
+ continue;
+ }
+ if (devid == vbasedev_iter->devid) {
+ return vbasedev_iter;
+ }
+ }
+ return NULL;
+}
+
+static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ struct vfio_pci_hot_reset *reset;
+ int ret, i;
+ bool multi = false;
+
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+ if (!single) {
+ vfio_pci_pre_reset(vdev);
+ }
+ vdev->vbasedev.needs_reset = false;
+
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+ if (ret) {
+ goto out_single;
+ }
+
+ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
+
+ devices = &info->devices[0];
+
+ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
+ if (!vdev->has_pm_reset) {
+ for (i = 0; i < info->count; i++) {
+ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
+ error_report("vfio: Cannot reset device %s, "
+ "depends on device %04x:%02x:%02x.%x "
+ "which is not owned.",
+ vdev->vbasedev.name, devices[i].segment,
+ devices[i].bus, PCI_SLOT(devices[i].devfn),
+ PCI_FUNC(devices[i].devfn));
+ }
+ }
+ }
+ ret = -EPERM;
+ goto out_single;
+ }
+
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+ for (i = 0; i < info->count; i++) {
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
+ devices[i].bus,
+ PCI_SLOT(devices[i].devfn),
+ PCI_FUNC(devices[i].devfn),
+ devices[i].devid);
+
+ /*
+ * If a VFIO cdev device is resettable, all the dependent devices
+ * are either bound to same iommufd or within same iommu_groups as
+ * one of the iommufd bound devices.
+ */
+ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
+
+ if (devices[i].devid == vdev->vbasedev.devid ||
+ devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+ continue;
+ }
+
+ vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
+ if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (single) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+ vfio_pci_pre_reset(tmp);
+ tmp->vbasedev.needs_reset = false;
+ multi = true;
+ }
+
+ if (!single && !multi) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+
+ /* Use zero length array for hot reset with iommufd backend */
+ reset = g_malloc0(sizeof(*reset));
+ reset->argsz = sizeof(*reset);
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+ if (ret) {
+ ret = -errno;
+ }
+
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+ ret ? strerror(errno) : "Success");
+
+ /* Re-enable INTx on affected devices */
+ for (i = 0; i < info->count; i++) {
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ if (devices[i].devid == vdev->vbasedev.devid ||
+ devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+ continue;
+ }
+
+ vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid);
+ if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ vfio_pci_post_reset(tmp);
+ }
+out_single:
+ if (!single) {
+ vfio_pci_post_reset(vdev);
+ }
+ g_free(info);
+
+ return ret;
+}
+
const VFIOIOMMUOps vfio_iommufd_ops = {
.dma_map = iommufd_cdev_map,
.dma_unmap = iommufd_cdev_unmap,
.attach_device = iommufd_cdev_attach,
.detach_device = iommufd_cdev_detach,
+ .pci_hot_reset = iommufd_cdev_pci_hot_reset,
};
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 5d3e9e8cee..d838232d5a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name, const char *str, in
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
+iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
--
2.34.1
On 11/14/23 11:09, Zhenzhong Duan wrote: > Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific > check and reset operation. nit: Implement the newly introduced pci_hot_reset callback? > > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > v6: pci_hot_reset return -errno if fails > > hw/vfio/iommufd.c | 145 +++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 1 + > 2 files changed, 146 insertions(+) > > diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c > index e5bf528e89..3eec428162 100644 > --- a/hw/vfio/iommufd.c > +++ b/hw/vfio/iommufd.c > @@ -24,6 +24,7 @@ > #include "sysemu/reset.h" > #include "qemu/cutils.h" > #include "qemu/chardev_open.h" > +#include "pci.h" > > static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, > ram_addr_t size, void *vaddr, bool readonly) > @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) > close(vbasedev->fd); > } > > +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) > +{ > + VFIODevice *vbasedev_iter; > + > + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { > + if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { > + continue; > + } > + if (devid == vbasedev_iter->devid) { > + return vbasedev_iter; > + } > + } > + return NULL; > +} > + > +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) > +{ > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + struct vfio_pci_hot_reset *reset; > + int ret, i; > + bool multi = false; > + > + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); > + > + if (!single) { > + vfio_pci_pre_reset(vdev); > + } > + vdev->vbasedev.needs_reset = false; > + > + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); > + > + if (ret) { > + goto out_single; > + } > + > + assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); > + > + devices = &info->devices[0]; > + > + if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { > + if (!vdev->has_pm_reset) { > + for (i = 0; i < info->count; i++) { > + if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { > + error_report("vfio: Cannot reset device %s, " > + "depends on device %04x:%02x:%02x.%x " > + "which is not owned.", > + vdev->vbasedev.name, devices[i].segment, > + devices[i].bus, PCI_SLOT(devices[i].devfn), > + PCI_FUNC(devices[i].devfn)); > + } > + } > + } > + ret = -EPERM; > + goto out_single; > + } > + > + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); > + > + for (i = 0; i < info->count; i++) { > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + > + trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, > + devices[i].bus, > + PCI_SLOT(devices[i].devfn), > + PCI_FUNC(devices[i].devfn), > + devices[i].devid); > + > + /* > + * If a VFIO cdev device is resettable, all the dependent devices > + * are either bound to same iommufd or within same iommu_groups as > + * one of the iommufd bound devices. > + */ > + assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); > + > + if (devices[i].devid == vdev->vbasedev.devid || > + devices[i].devid == VFIO_PCI_DEVID_OWNED) { > + continue; > + } > + > + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); > + if (!vbasedev_iter || !vbasedev_iter->dev->realized || > + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (single) { > + ret = -EINVAL; > + goto out_single; > + } > + vfio_pci_pre_reset(tmp); > + tmp->vbasedev.needs_reset = false; > + multi = true; > + } > + > + if (!single && !multi) { > + ret = -EINVAL; > + goto out_single; > + } > + > + /* Use zero length array for hot reset with iommufd backend */ > + reset = g_malloc0(sizeof(*reset)); > + reset->argsz = sizeof(*reset); > + > + /* Bus reset! */ > + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); > + g_free(reset); > + if (ret) { > + ret = -errno; > + } > + > + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, > + ret ? strerror(errno) : "Success"); > + > + /* Re-enable INTx on affected devices */ > + for (i = 0; i < info->count; i++) { > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + > + if (devices[i].devid == vdev->vbasedev.devid || > + devices[i].devid == VFIO_PCI_DEVID_OWNED) { > + continue; > + } > + > + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); > + if (!vbasedev_iter || !vbasedev_iter->dev->realized || > + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); nit: I see this block of code also is used above for the pre_reset. May be interesting to introduce an helper? Could be done later though > + vfio_pci_post_reset(tmp); > + } > +out_single: > + if (!single) { > + vfio_pci_post_reset(vdev); > + } > + g_free(info); > + > + return ret; > +} > + > const VFIOIOMMUOps vfio_iommufd_ops = { > .dma_map = iommufd_cdev_map, > .dma_unmap = iommufd_cdev_unmap, > .attach_device = iommufd_cdev_attach, > .detach_device = iommufd_cdev_detach, > + .pci_hot_reset = iommufd_cdev_pci_hot_reset, > }; > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 5d3e9e8cee..d838232d5a 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name, const char *str, in > iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" > iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" > iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" > +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" Otherwise looks good to me. Reviewed-by: Eric Auger <eric.auger@redhat.com> Eric
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Sent: Friday, November 17, 2023 9:54 PM >Subject: Re: [PATCH v6 09/21] vfio/iommufd: Enable pci hot reset through >iommufd cdev interface > > > >On 11/14/23 11:09, Zhenzhong Duan wrote: >> Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific >> check and reset operation. > >nit: Implement the newly introduced pci_hot_reset callback? Yes >> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> v6: pci_hot_reset return -errno if fails >> >> hw/vfio/iommufd.c | 145 >+++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 1 + >> 2 files changed, 146 insertions(+) >> >> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >> index e5bf528e89..3eec428162 100644 >> --- a/hw/vfio/iommufd.c >> +++ b/hw/vfio/iommufd.c >> @@ -24,6 +24,7 @@ >> #include "sysemu/reset.h" >> #include "qemu/cutils.h" >> #include "qemu/chardev_open.h" >> +#include "pci.h" >> >> static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, >> ram_addr_t size, void *vaddr, bool readonly) >> @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice >*vbasedev) >> close(vbasedev->fd); >> } >> >> +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) >> +{ >> + VFIODevice *vbasedev_iter; >> + >> + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { >> + if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { >> + continue; >> + } >> + if (devid == vbasedev_iter->devid) { >> + return vbasedev_iter; >> + } >> + } >> + return NULL; >> +} >> + >> +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) >> +{ >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >> + struct vfio_pci_hot_reset_info *info = NULL; >> + struct vfio_pci_dependent_device *devices; >> + struct vfio_pci_hot_reset *reset; >> + int ret, i; >> + bool multi = false; >> + >> + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); >> + >> + if (!single) { >> + vfio_pci_pre_reset(vdev); >> + } >> + vdev->vbasedev.needs_reset = false; >> + >> + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); >> + >> + if (ret) { >> + goto out_single; >> + } >> + >> + assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); >> + >> + devices = &info->devices[0]; >> + >> + if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { >> + if (!vdev->has_pm_reset) { >> + for (i = 0; i < info->count; i++) { >> + if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { >> + error_report("vfio: Cannot reset device %s, " >> + "depends on device %04x:%02x:%02x.%x " >> + "which is not owned.", >> + vdev->vbasedev.name, devices[i].segment, >> + devices[i].bus, PCI_SLOT(devices[i].devfn), >> + PCI_FUNC(devices[i].devfn)); >> + } >> + } >> + } >> + ret = -EPERM; >> + goto out_single; >> + } >> + >> + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); >> + >> + for (i = 0; i < info->count; i++) { >> + VFIOPCIDevice *tmp; >> + VFIODevice *vbasedev_iter; >> + >> + trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, >> + devices[i].bus, >> + PCI_SLOT(devices[i].devfn), >> + PCI_FUNC(devices[i].devfn), >> + devices[i].devid); >> + >> + /* >> + * If a VFIO cdev device is resettable, all the dependent devices >> + * are either bound to same iommufd or within same iommu_groups as >> + * one of the iommufd bound devices. >> + */ >> + assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); >> + >> + if (devices[i].devid == vdev->vbasedev.devid || >> + devices[i].devid == VFIO_PCI_DEVID_OWNED) { >> + continue; >> + } >> + >> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); >> + if (!vbasedev_iter || !vbasedev_iter->dev->realized || >> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); >> + if (single) { >> + ret = -EINVAL; >> + goto out_single; >> + } >> + vfio_pci_pre_reset(tmp); >> + tmp->vbasedev.needs_reset = false; >> + multi = true; >> + } >> + >> + if (!single && !multi) { >> + ret = -EINVAL; >> + goto out_single; >> + } >> + >> + /* Use zero length array for hot reset with iommufd backend */ >> + reset = g_malloc0(sizeof(*reset)); >> + reset->argsz = sizeof(*reset); >> + >> + /* Bus reset! */ >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); >> + g_free(reset); >> + if (ret) { >> + ret = -errno; >> + } >> + >> + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, >> + ret ? strerror(errno) : "Success"); >> + >> + /* Re-enable INTx on affected devices */ >> + for (i = 0; i < info->count; i++) { >> + VFIOPCIDevice *tmp; >> + VFIODevice *vbasedev_iter; >> + >> + if (devices[i].devid == vdev->vbasedev.devid || >> + devices[i].devid == VFIO_PCI_DEVID_OWNED) { >> + continue; >> + } >> + >> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); >> + if (!vbasedev_iter || !vbasedev_iter->dev->realized || >> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); >nit: I see this block of code also is used above for the pre_reset. May >be interesting to introduce an helper? Could be done later though Will do in v7. Thanks Zhenzhong >> + vfio_pci_post_reset(tmp); >> + } >> +out_single: >> + if (!single) { >> + vfio_pci_post_reset(vdev); >> + } >> + g_free(info); >> + >> + return ret; >> +} >> + >> const VFIOIOMMUOps vfio_iommufd_ops = { >> .dma_map = iommufd_cdev_map, >> .dma_unmap = iommufd_cdev_unmap, >> .attach_device = iommufd_cdev_attach, >> .detach_device = iommufd_cdev_detach, >> + .pci_hot_reset = iommufd_cdev_pci_hot_reset, >> }; >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 5d3e9e8cee..d838232d5a 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const >char *name, const char *str, in >> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" >> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new >IOMMUFD container with ioasid=%d" >> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int >num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" >> +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int >function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" >Otherwise looks good to me. > >Reviewed-by: Eric Auger <eric.auger@redhat.com> > > >Eric > >
© 2016 - 2024 Red Hat, Inc.