[Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance

Jason Wang posted 1 patch 7 years, 1 month ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/1488877751-13419-1-git-send-email-jasowang@redhat.com
Test checkpatch passed
Test docker passed
hw/pci/pci.c               | 11 ++++++++---
hw/virtio/virtio-pci.c     |  9 +++++++++
hw/virtio/virtio.c         | 19 +++++++++++++++++++
include/hw/pci/pci.h       |  1 +
include/hw/virtio/virtio.h |  1 +
5 files changed, 38 insertions(+), 3 deletions(-)
[Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago
After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
after caching ring translations"), IOMMU was required to be created in
advance. This is because we can only get the correct dma_as after pci
IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
inconvenient for user. This patch releases this by:

- introduce a bus_master_ready method for PCIDeviceClass and trigger
  this during pci_init_bus_master
- implement virtio-pci method and 1) reset the dma_as 2) re-register
  the memory listener to the new dma_as

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from V2:
- delay pci_init_bus_master() after pci device is realized to make
  bus_master_ready a more generic method
---
 hw/pci/pci.c               | 11 ++++++++---
 hw/virtio/virtio-pci.c     |  9 +++++++++
 hw/virtio/virtio.c         | 19 +++++++++++++++++++
 include/hw/pci/pci.h       |  1 +
 include/hw/virtio/virtio.h |  1 +
 5 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 273f1e4..22e6ad9 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
 
     memory_region_init_alias(&pci_dev->bus_master_enable_region,
                              OBJECT(pci_dev), "bus master",
@@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
     address_space_init(&pci_dev->bus_master_as,
                        &pci_dev->bus_master_enable_region, pci_dev->name);
+    if (pc->bus_master_ready) {
+        pc->bus_master_ready(pci_dev);
+    }
 }
 
 static void pcibus_machine_done(Notifier *notifier, void *data)
@@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pci_dev->devfn = devfn;
     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
 
-    if (qdev_hotplug) {
-        pci_init_bus_master(pci_dev);
-    }
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
     pci_config_alloc(pci_dev);
@@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         }
     }
 
+    if (qdev_hotplug) {
+        pci_init_bus_master(pci_dev);
+    }
+
     /* rom loading */
     is_default_rom = false;
     if (pci_dev->romfile == NULL && pc->romfile != NULL) {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b76f3f6..21cbda2 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1845,6 +1845,14 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
     address_space_destroy(&proxy->modern_as);
 }
 
+static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    virtio_device_reset_dma_as(vdev);
+}
+
 static void virtio_pci_reset(DeviceState *qdev)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
@@ -1904,6 +1912,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
     dc->props = virtio_pci_properties;
     k->realize = virtio_pci_realize;
     k->exit = virtio_pci_exit;
+    k->bus_master_ready = virtio_pci_bus_master_ready;
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
     k->revision = VIRTIO_PCI_ABI_VERSION;
     k->class_id = PCI_CLASS_OTHERS;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index efce4b3..09f4cf4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2594,6 +2594,25 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
     return virtio_bus_ioeventfd_enabled(vbus);
 }
 
+void virtio_device_reset_dma_as(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+
+    if (klass->get_dma_as != NULL && has_iommu) {
+        vdev->dma_as = klass->get_dma_as(qbus->parent);
+    } else {
+        vdev->dma_as = &address_space_memory;
+    }
+
+    memory_listener_unregister(&vdev->listener);
+    memory_listener_register(&vdev->listener, vdev->dma_as);
+}
+
+
 static const TypeInfo virtio_device_info = {
     .name = TYPE_VIRTIO_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 9349acb..e700a58 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -207,6 +207,7 @@ typedef struct PCIDeviceClass {
 
     void (*realize)(PCIDevice *dev, Error **errp);
     int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
+    void (*bus_master_ready)(PCIDevice *dev);
     PCIUnregisterFunc *exit;
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 15efcf2..21fa273 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -283,6 +283,7 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
 int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
 void virtio_device_release_ioeventfd(VirtIODevice *vdev);
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
+void virtio_device_reset_dma_as(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
-- 
2.7.4


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Peter Xu 7 years, 1 month ago
On Tue, Mar 07, 2017 at 05:09:11PM +0800, Jason Wang wrote:
> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> after caching ring translations"), IOMMU was required to be created in
> advance. This is because we can only get the correct dma_as after pci
> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> inconvenient for user. This patch releases this by:
> 
> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>   this during pci_init_bus_master
> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>   the memory listener to the new dma_as
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Reviewed-by: Peter Xu <peterx@redhat.com>

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Marcel Apfelbaum 7 years, 1 month ago
On 03/07/2017 11:09 AM, Jason Wang wrote:
> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> after caching ring translations"), IOMMU was required to be created in
> advance. This is because we can only get the correct dma_as after pci
> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> inconvenient for user. This patch releases this by:
>
> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>   this during pci_init_bus_master
> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>   the memory listener to the new dma_as
>

Hi Jason,

> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from V2:
> - delay pci_init_bus_master() after pci device is realized to make
>   bus_master_ready a more generic method
> ---
>  hw/pci/pci.c               | 11 ++++++++---
>  hw/virtio/virtio-pci.c     |  9 +++++++++
>  hw/virtio/virtio.c         | 19 +++++++++++++++++++
>  include/hw/pci/pci.h       |  1 +
>  include/hw/virtio/virtio.h |  1 +
>  5 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 273f1e4..22e6ad9 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>  static void pci_init_bus_master(PCIDevice *pci_dev)
>  {
>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>
>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>                               OBJECT(pci_dev), "bus master",
> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>      address_space_init(&pci_dev->bus_master_as,
>                         &pci_dev->bus_master_enable_region, pci_dev->name);
> +    if (pc->bus_master_ready) {
> +        pc->bus_master_ready(pci_dev);
> +    }
>  }
>
>  static void pcibus_machine_done(Notifier *notifier, void *data)
> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      pci_dev->devfn = devfn;
>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>
> -    if (qdev_hotplug) {
> -        pci_init_bus_master(pci_dev);
> -    }
>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>      pci_dev->irq_state = 0;
>      pci_config_alloc(pci_dev);
> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          }
>      }
>
> +    if (qdev_hotplug) {
> +        pci_init_bus_master(pci_dev);
> +    }
> +

How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
I suppose you want the code to run after the "realize" function?
If so, what happens if a "realize" function of another device needs the bus_master_as
(at hotplug time)?

Thanks,
Marcel

>      /* rom loading */
>      is_default_rom = false;
>      if (pci_dev->romfile == NULL && pc->romfile != NULL) {
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index b76f3f6..21cbda2 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1845,6 +1845,14 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
>      address_space_destroy(&proxy->modern_as);
>  }
>
> +static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +
> +    virtio_device_reset_dma_as(vdev);
> +}
> +
>  static void virtio_pci_reset(DeviceState *qdev)
>  {
>      VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
> @@ -1904,6 +1912,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
>      dc->props = virtio_pci_properties;
>      k->realize = virtio_pci_realize;
>      k->exit = virtio_pci_exit;
> +    k->bus_master_ready = virtio_pci_bus_master_ready;
>      k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
>      k->revision = VIRTIO_PCI_ABI_VERSION;
>      k->class_id = PCI_CLASS_OTHERS;
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index efce4b3..09f4cf4 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2594,6 +2594,25 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
>      return virtio_bus_ioeventfd_enabled(vbus);
>  }
>
> +void virtio_device_reset_dma_as(VirtIODevice *vdev)
> +{
> +    DeviceState *qdev = DEVICE(vdev);
> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> +    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> +
> +    if (klass->get_dma_as != NULL && has_iommu) {
> +        vdev->dma_as = klass->get_dma_as(qbus->parent);
> +    } else {
> +        vdev->dma_as = &address_space_memory;
> +    }
> +
> +    memory_listener_unregister(&vdev->listener);
> +    memory_listener_register(&vdev->listener, vdev->dma_as);
> +}
> +
> +
>  static const TypeInfo virtio_device_info = {
>      .name = TYPE_VIRTIO_DEVICE,
>      .parent = TYPE_DEVICE,
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 9349acb..e700a58 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -207,6 +207,7 @@ typedef struct PCIDeviceClass {
>
>      void (*realize)(PCIDevice *dev, Error **errp);
>      int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
> +    void (*bus_master_ready)(PCIDevice *dev);
>      PCIUnregisterFunc *exit;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 15efcf2..21fa273 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -283,6 +283,7 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
>  int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
>  void virtio_device_release_ioeventfd(VirtIODevice *vdev);
>  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
> +void virtio_device_reset_dma_as(VirtIODevice *vdev);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
>  void virtio_queue_host_notifier_read(EventNotifier *n);
>  void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
>


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Peter Xu 7 years, 1 month ago
On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
> On 03/07/2017 11:09 AM, Jason Wang wrote:
> >After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> >after caching ring translations"), IOMMU was required to be created in
> >advance. This is because we can only get the correct dma_as after pci
> >IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> >inconvenient for user. This patch releases this by:
> >
> >- introduce a bus_master_ready method for PCIDeviceClass and trigger
> >  this during pci_init_bus_master
> >- implement virtio-pci method and 1) reset the dma_as 2) re-register
> >  the memory listener to the new dma_as
> >
> 
> Hi Jason,
> 
> >Cc: Paolo Bonzini <pbonzini@redhat.com>
> >Signed-off-by: Jason Wang <jasowang@redhat.com>
> >---
> >Changes from V2:
> >- delay pci_init_bus_master() after pci device is realized to make
> >  bus_master_ready a more generic method
> >---
> > hw/pci/pci.c               | 11 ++++++++---
> > hw/virtio/virtio-pci.c     |  9 +++++++++
> > hw/virtio/virtio.c         | 19 +++++++++++++++++++
> > include/hw/pci/pci.h       |  1 +
> > include/hw/virtio/virtio.h |  1 +
> > 5 files changed, 38 insertions(+), 3 deletions(-)
> >
> >diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >index 273f1e4..22e6ad9 100644
> >--- a/hw/pci/pci.c
> >+++ b/hw/pci/pci.c
> >@@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> > static void pci_init_bus_master(PCIDevice *pci_dev)
> > {
> >     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> >+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >
> >     memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >                              OBJECT(pci_dev), "bus master",
> >@@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >     address_space_init(&pci_dev->bus_master_as,
> >                        &pci_dev->bus_master_enable_region, pci_dev->name);
> >+    if (pc->bus_master_ready) {
> >+        pc->bus_master_ready(pci_dev);
> >+    }
> > }
> >
> > static void pcibus_machine_done(Notifier *notifier, void *data)
> >@@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >     pci_dev->devfn = devfn;
> >     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >
> >-    if (qdev_hotplug) {
> >-        pci_init_bus_master(pci_dev);
> >-    }
> >     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >     pci_dev->irq_state = 0;
> >     pci_config_alloc(pci_dev);
> >@@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >         }
> >     }
> >
> >+    if (qdev_hotplug) {
> >+        pci_init_bus_master(pci_dev);
> >+    }
> >+
> 
> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
> I suppose you want the code to run after the "realize" function?
> If so, what happens if a "realize" function of another device needs the bus_master_as
> (at hotplug time)?

My unmature understanding is that, we can just call
pci_device_iommu_address_space() if device realization really needs
the address space, rather than using bus_master_as.

An example is vfio-pci device. It is using
pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
there may be other reasons behind, e.g., VFIOAddressSpace should be
1:1 mapped to bus master address space, so we may want to make sure
this address space will be the same when different devices are using
the same address space (while bus_master_as is one per device, even if
two devices have the same backend address space, there will be two
distinct bus_master_as).

IIUC, bus_master_as is only used to emulate the master bit in PCI
command register. In that case, that should only be there for the
guest operations, while imho device realization is "emulation code",
which can directly use pci_device_iommu_address_space(). Actually, if
it plays with bus_master_as even if it can, I guess it'll just fail
since that region has not yet been enabled.

Please kindly correct me if I made a mistake...

Thanks,

-- peterx

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago

On 2017年03月08日 10:43, Peter Xu wrote:
> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>   this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>   the memory listener to the new dma_as
>>>
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>   bus_master_ready a more generic method
>>> ---
>>> hw/pci/pci.c               | 11 ++++++++---
>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>> include/hw/pci/pci.h       |  1 +
>>> include/hw/virtio/virtio.h |  1 +
>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>> {
>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                               OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>      address_space_init(&pci_dev->bus_master_as,
>>>                         &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>> }
>>>
>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>      pci_dev->devfn = devfn;
>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>      pci_dev->irq_state = 0;
>>>      pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>          }
>>>      }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?

Yes.

>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?

I'm not sure this is really needed. What kind of device need to check 
hotplug during their realize? (Looks like we don't have such kind of 
device now).

> My unmature understanding is that, we can just call
> pci_device_iommu_address_space() if device realization really needs
> the address space, rather than using bus_master_as.

A little issue is pci_device_iommu_address_space() can be wrong if it 
was called before OMMU was created.

Thanks

>
> An example is vfio-pci device. It is using
> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
> there may be other reasons behind, e.g., VFIOAddressSpace should be
> 1:1 mapped to bus master address space, so we may want to make sure
> this address space will be the same when different devices are using
> the same address space (while bus_master_as is one per device, even if
> two devices have the same backend address space, there will be two
> distinct bus_master_as).
>
> IIUC, bus_master_as is only used to emulate the master bit in PCI
> command register. In that case, that should only be there for the
> guest operations, while imho device realization is "emulation code",
> which can directly use pci_device_iommu_address_space(). Actually, if
> it plays with bus_master_as even if it can, I guess it'll just fail
> since that region has not yet been enabled.
>
> Please kindly correct me if I made a mistake...
>
> Thanks,
>
> -- peterx


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Marcel Apfelbaum 7 years, 1 month ago
On 03/08/2017 05:15 AM, Jason Wang wrote:
>
>
> On 2017年03月08日 10:43, Peter Xu wrote:
>> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>> after caching ring translations"), IOMMU was required to be created in
>>>> advance. This is because we can only get the correct dma_as after pci
>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>> inconvenient for user. This patch releases this by:
>>>>
>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>   this during pci_init_bus_master
>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>   the memory listener to the new dma_as
>>>>
>>> Hi Jason,
>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes from V2:
>>>> - delay pci_init_bus_master() after pci device is realized to make
>>>>   bus_master_ready a more generic method
>>>> ---
>>>> hw/pci/pci.c               | 11 ++++++++---
>>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>> include/hw/pci/pci.h       |  1 +
>>>> include/hw/virtio/virtio.h |  1 +
>>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 273f1e4..22e6ad9 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>>> {
>>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>>
>>>>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>>                               OBJECT(pci_dev), "bus master",
>>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>>      address_space_init(&pci_dev->bus_master_as,
>>>>                         &pci_dev->bus_master_enable_region, pci_dev->name);
>>>> +    if (pc->bus_master_ready) {
>>>> +        pc->bus_master_ready(pci_dev);
>>>> +    }
>>>> }
>>>>
>>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>>      pci_dev->devfn = devfn;
>>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>>
>>>> -    if (qdev_hotplug) {
>>>> -        pci_init_bus_master(pci_dev);
>>>> -    }
>>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>>      pci_dev->irq_state = 0;
>>>>      pci_config_alloc(pci_dev);
>>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>          }
>>>>      }
>>>>
>>>> +    if (qdev_hotplug) {
>>>> +        pci_init_bus_master(pci_dev);
>>>> +    }
>>>> +
>>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>>> I suppose you want the code to run after the "realize" function?
>
> Yes.
>
>>> If so, what happens if a "realize" function of another device needs the bus_master_as
>>> (at hotplug time)?
>
> I'm not sure this is really needed. What kind of device need to check hotplug during their realize? (Looks like we don't have such kind of device now).

I am sorry I was not clear enough:
If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
event to enable "bus master".
This is why we have
    if (qdev_hotplug)
         pci_init_bus_master(pci_dev);

The code you proposed changes the order, so this call is done *after* realize.

My question was: What if any other device may require the bus_master_as
at realize time (and can be hot-plugged) ?
For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
and caches the bus_master_as.

It is possible the mentioned devices can't be hot-plugged or
are not relevant for PCIe.

I am only saying we should double check when making such a change.

>
>> My unmature understanding is that, we can just call
>> pci_device_iommu_address_space() if device realization really needs
>> the address space, rather than using bus_master_as.
>
> A little issue is pci_device_iommu_address_space() can be wrong if it was called before OMMU was created.
>

At hotplug time this is irrelevant because the iommu has already been created.

Thanks,
Marcel

> Thanks
>
>>
>> An example is vfio-pci device. It is using
>> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
>> there may be other reasons behind, e.g., VFIOAddressSpace should be
>> 1:1 mapped to bus master address space, so we may want to make sure
>> this address space will be the same when different devices are using
>> the same address space (while bus_master_as is one per device, even if
>> two devices have the same backend address space, there will be two
>> distinct bus_master_as).
>>
>> IIUC, bus_master_as is only used to emulate the master bit in PCI
>> command register. In that case, that should only be there for the
>> guest operations, while imho device realization is "emulation code",
>> which can directly use pci_device_iommu_address_space(). Actually, if
>> it plays with bus_master_as even if it can, I guess it'll just fail
>> since that region has not yet been enabled.
>>
>> Please kindly correct me if I made a mistake...
>>
>> Thanks,
>>
>> -- peterx
>


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Peter Xu 7 years, 1 month ago
On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:

[...]

> 
> I am sorry I was not clear enough:
> If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
> event to enable "bus master".
> This is why we have
>    if (qdev_hotplug)
>         pci_init_bus_master(pci_dev);
> 
> The code you proposed changes the order, so this call is done *after* realize.
> 
> My question was: What if any other device may require the bus_master_as
> at realize time (and can be hot-plugged) ?
> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> and caches the bus_master_as.

Oh, I didn't notice that there are other devices that used
bus_master_as during realization. If so... Would this really work even
without hot plug? Considering that bus_master_as won't be inited until
machine done phase?

-- peterx

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Peter Xu 7 years, 1 month ago
On Wed, Mar 08, 2017 at 05:09:45PM +0800, Peter Xu wrote:
> On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:
> 
> [...]
> 
> > 
> > I am sorry I was not clear enough:
> > If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
> > event to enable "bus master".
> > This is why we have
> >    if (qdev_hotplug)
> >         pci_init_bus_master(pci_dev);
> > 
> > The code you proposed changes the order, so this call is done *after* realize.
> > 
> > My question was: What if any other device may require the bus_master_as
> > at realize time (and can be hot-plugged) ?
> > For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> > and caches the bus_master_as.
> 
> Oh, I didn't notice that there are other devices that used
> bus_master_as during realization. If so... Would this really work even
> without hot plug? Considering that bus_master_as won't be inited until
> machine done phase?

Please ignore my question... I think the answer is that these devices
are only caching the pointer of bus_master_as. So it won't really use
the address space before machine_done.

If so, IMHO moving pci_init_bus_master() after device specific
realize() is okay as well then, right?

Thanks,

-- peterx

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Marcel Apfelbaum 7 years, 1 month ago
On 03/08/2017 11:16 AM, Peter Xu wrote:
> On Wed, Mar 08, 2017 at 05:09:45PM +0800, Peter Xu wrote:
>> On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:
>>
>> [...]
>>
>>>
>>> I am sorry I was not clear enough:
>>> If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
>>> event to enable "bus master".
>>> This is why we have
>>>    if (qdev_hotplug)
>>>         pci_init_bus_master(pci_dev);
>>>
>>> The code you proposed changes the order, so this call is done *after* realize.
>>>
>>> My question was: What if any other device may require the bus_master_as
>>> at realize time (and can be hot-plugged) ?
>>> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
>>> and caches the bus_master_as.
>>
>> Oh, I didn't notice that there are other devices that used
>> bus_master_as during realization. If so... Would this really work even
>> without hot plug? Considering that bus_master_as won't be inited until
>> machine done phase?
>
> Please ignore my question... I think the answer is that these devices
> are only caching the pointer of bus_master_as. So it won't really use
> the address space before machine_done.
>
> If so, IMHO moving pci_init_bus_master() after device specific
> realize() is okay as well then, right?
>

It should work, right. But you may want to double-check anyway :)

Thanks,
Marcel

> Thanks,
>
> -- peterx
>


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago

On 2017年03月08日 16:17, Marcel Apfelbaum wrote:
> On 03/08/2017 05:15 AM, Jason Wang wrote:
>>
>>
>> On 2017年03月08日 10:43, Peter Xu wrote:
>>> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>> after caching ring translations"), IOMMU was required to be 
>>>>> created in
>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>> inconvenient for user. This patch releases this by:
>>>>>
>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>   this during pci_init_bus_master
>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>   the memory listener to the new dma_as
>>>>>
>>>> Hi Jason,
>>>>
>>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>> ---
>>>>> Changes from V2:
>>>>> - delay pci_init_bus_master() after pci device is realized to make
>>>>>   bus_master_ready a more generic method
>>>>> ---
>>>>> hw/pci/pci.c               | 11 ++++++++---
>>>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>>> include/hw/pci/pci.h       |  1 +
>>>>> include/hw/virtio/virtio.h |  1 +
>>>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index 273f1e4..22e6ad9 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>> {
>>>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>>>
>>>>> memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>>>                               OBJECT(pci_dev), "bus master",
>>>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>> memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>>>      address_space_init(&pci_dev->bus_master_as,
>>>>> &pci_dev->bus_master_enable_region, pci_dev->name);
>>>>> +    if (pc->bus_master_ready) {
>>>>> +        pc->bus_master_ready(pci_dev);
>>>>> +    }
>>>>> }
>>>>>
>>>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>>>> @@ -995,9 +999,6 @@ static PCIDevice 
>>>>> *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>>>      pci_dev->devfn = devfn;
>>>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>>>
>>>>> -    if (qdev_hotplug) {
>>>>> -        pci_init_bus_master(pci_dev);
>>>>> -    }
>>>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>>>      pci_dev->irq_state = 0;
>>>>>      pci_config_alloc(pci_dev);
>>>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState 
>>>>> *qdev, Error **errp)
>>>>>          }
>>>>>      }
>>>>>
>>>>> +    if (qdev_hotplug) {
>>>>> +        pci_init_bus_master(pci_dev);
>>>>> +    }
>>>>> +
>>>> How does it help if we move qdev_hotplug check outside 
>>>> "do_pci_register_device"?
>>>> I suppose you want the code to run after the "realize" function?
>>
>> Yes.
>>
>>>> If so, what happens if a "realize" function of another device needs 
>>>> the bus_master_as
>>>> (at hotplug time)?
>>
>> I'm not sure this is really needed. What kind of device need to check 
>> hotplug during their realize? (Looks like we don't have such kind of 
>> device now).
>
> I am sorry I was not clear enough:
> If a device is added after the system is up (hotplug), we cannot 
> depend on the "machine_done"
> event to enable "bus master".
> This is why we have
>    if (qdev_hotplug)
>         pci_init_bus_master(pci_dev);
>
> The code you proposed changes the order, so this call is done *after* 
> realize.
>
> My question was: What if any other device may require the bus_master_as
> at realize time (and can be hot-plugged) ?
> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> and caches the bus_master_as.
>
> It is possible the mentioned devices can't be hot-plugged or
> are not relevant for PCIe.
>
> I am only saying we should double check when making such a change.

Can't agree more.

For hcd-ehci/hcd-ohci, simply caching the as should be fine. But if they 
want to more things like virito, they should delay it to bus_master_ready.

>
>>
>>> My unmature understanding is that, we can just call
>>> pci_device_iommu_address_space() if device realization really needs
>>> the address space, rather than using bus_master_as.
>>
>> A little issue is pci_device_iommu_address_space() can be wrong if it 
>> was called before OMMU was created.
>>
>
> At hotplug time this is irrelevant because the iommu has already been 
> created.
>
> Thanks,
> Marcel

Yes.

Thanks

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Marcel Apfelbaum 7 years, 1 month ago
On 03/08/2017 04:43 AM, Peter Xu wrote:
> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>  this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>  the memory listener to the new dma_as
>>>
>>
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>  bus_master_ready a more generic method
>>> ---
>>> hw/pci/pci.c               | 11 ++++++++---
>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>> include/hw/pci/pci.h       |  1 +
>>> include/hw/virtio/virtio.h |  1 +
>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>> {
>>>     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>     memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                              OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>     address_space_init(&pci_dev->bus_master_as,
>>>                        &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>> }
>>>
>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>     pci_dev->devfn = devfn;
>>>     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>     pci_dev->irq_state = 0;
>>>     pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>         }
>>>     }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>>
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?
>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?
>
> My unmature understanding is that, we can just call
> pci_device_iommu_address_space() if device realization really needs
> the address space, rather than using bus_master_as.
>

Yes, each device that support IOMMU needs to call it instead of bus_master_as.
As if it really needs this information at realize time is debatable.
The vfio-pci is a special case and Alex Williamson explain why it is needed
at the "realize" time.


> An example is vfio-pci device. It is using
> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
> there may be other reasons behind, e.g., VFIOAddressSpace should be
> 1:1 mapped to bus master address space, so we may want to make sure
> this address space will be the same when different devices are using
> the same address space (while bus_master_as is one per device, even if
> two devices have the same backend address space, there will be two
> distinct bus_master_as).
>

Understood

> IIUC, bus_master_as is only used to emulate the master bit in PCI
> command register. In that case, that should only be there for the
> guest operations, while imho device realization is "emulation code",
> which can directly use pci_device_iommu_address_space(). Actually, if
> it plays with bus_master_as even if it can, I guess it'll just fail
> since that region has not yet been enabled.
>
> Please kindly correct me if I made a mistake...
>

You are right the device does not need the bus master address space before the
VM is up, this is we the init process of this region is delayed until machine done.


My question was why do you need to move pci_init_bus_master after device realization
at hotplug time and how would influence the other devices.


Thanks,
Marcel

> Thanks,
>
> -- peterx
>


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Igor Mammedov 7 years, 1 month ago
On Tue, 7 Mar 2017 14:47:30 +0200
Marcel Apfelbaum <marcel@redhat.com> wrote:

> On 03/07/2017 11:09 AM, Jason Wang wrote:
> > After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> > after caching ring translations"), IOMMU was required to be created in
> > advance. This is because we can only get the correct dma_as after pci
> > IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> > inconvenient for user. This patch releases this by:
> >
> > - introduce a bus_master_ready method for PCIDeviceClass and trigger
> >   this during pci_init_bus_master
> > - implement virtio-pci method and 1) reset the dma_as 2) re-register
> >   the memory listener to the new dma_as
> >  
> 
> Hi Jason,
> 
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes from V2:
> > - delay pci_init_bus_master() after pci device is realized to make
> >   bus_master_ready a more generic method
> > ---
> >  hw/pci/pci.c               | 11 ++++++++---
> >  hw/virtio/virtio-pci.c     |  9 +++++++++
> >  hw/virtio/virtio.c         | 19 +++++++++++++++++++
> >  include/hw/pci/pci.h       |  1 +
> >  include/hw/virtio/virtio.h |  1 +
> >  5 files changed, 38 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index 273f1e4..22e6ad9 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> >  static void pci_init_bus_master(PCIDevice *pci_dev)
> >  {
> >      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> > +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >
> >      memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >                               OBJECT(pci_dev), "bus master",
> > @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >      address_space_init(&pci_dev->bus_master_as,
> >                         &pci_dev->bus_master_enable_region, pci_dev->name);
> > +    if (pc->bus_master_ready) {
> > +        pc->bus_master_ready(pci_dev);
> > +    }
> >  }
> >
> >  static void pcibus_machine_done(Notifier *notifier, void *data)
> > @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >      pci_dev->devfn = devfn;
> >      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >
> > -    if (qdev_hotplug) {
> > -        pci_init_bus_master(pci_dev);
> > -    }
> >      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >      pci_dev->irq_state = 0;
> >      pci_config_alloc(pci_dev);
> > @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >          }
> >      }
> >
> > +    if (qdev_hotplug) {
> > +        pci_init_bus_master(pci_dev);
> > +    }
> > +  
> 
> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
> I suppose you want the code to run after the "realize" function?
> If so, what happens if a "realize" function of another device needs the bus_master_as
> (at hotplug time)?

Conceptually,
I'm not sure that inherited device class realize
should be aware of uninitialized bus_master,
which belongs to PCI device, nor should it initialize
it by calling pci_init_bus_master() explicitly,
it's parent's class job (PCIDevice).

more close to current code:
if xen-pci-passthrough were hotplugged then it looks like
this hunk could break it:
hw/xen/xen_pt.c:
 memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
would happen with uninitialized bus_master_as
as realize is called before pci_init_bus_master();

So the same question as Marcel's but other way around,
why this hunk "has to" be moved.


> 
> Thanks,
> Marcel
> 
> >      /* rom loading */
> >      is_default_rom = false;
> >      if (pci_dev->romfile == NULL && pc->romfile != NULL) {
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index b76f3f6..21cbda2 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -1845,6 +1845,14 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
> >      address_space_destroy(&proxy->modern_as);
> >  }
> >
> > +static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
> > +{
> > +    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> > +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> > +
> > +    virtio_device_reset_dma_as(vdev);
> > +}
> > +
> >  static void virtio_pci_reset(DeviceState *qdev)
> >  {
> >      VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
> > @@ -1904,6 +1912,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
> >      dc->props = virtio_pci_properties;
> >      k->realize = virtio_pci_realize;
> >      k->exit = virtio_pci_exit;
> > +    k->bus_master_ready = virtio_pci_bus_master_ready;
> >      k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> >      k->revision = VIRTIO_PCI_ABI_VERSION;
> >      k->class_id = PCI_CLASS_OTHERS;
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index efce4b3..09f4cf4 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -2594,6 +2594,25 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
> >      return virtio_bus_ioeventfd_enabled(vbus);
> >  }
> >
> > +void virtio_device_reset_dma_as(VirtIODevice *vdev)
> > +{
> > +    DeviceState *qdev = DEVICE(vdev);
> > +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> > +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> > +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> > +    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> > +
> > +    if (klass->get_dma_as != NULL && has_iommu) {
> > +        vdev->dma_as = klass->get_dma_as(qbus->parent);
> > +    } else {
> > +        vdev->dma_as = &address_space_memory;
> > +    }
> > +
> > +    memory_listener_unregister(&vdev->listener);
> > +    memory_listener_register(&vdev->listener, vdev->dma_as);
> > +}
this mostly subset of virtio_bus_device_plugged(), generalize?

> > +
> > +
> >  static const TypeInfo virtio_device_info = {
> >      .name = TYPE_VIRTIO_DEVICE,
> >      .parent = TYPE_DEVICE,
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 9349acb..e700a58 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -207,6 +207,7 @@ typedef struct PCIDeviceClass {
> >
> >      void (*realize)(PCIDevice *dev, Error **errp);
> >      int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
> > +    void (*bus_master_ready)(PCIDevice *dev);
> >      PCIUnregisterFunc *exit;
> >      PCIConfigReadFunc *config_read;
> >      PCIConfigWriteFunc *config_write;
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 15efcf2..21fa273 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -283,6 +283,7 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
> >  int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
> >  void virtio_device_release_ioeventfd(VirtIODevice *vdev);
> >  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
> > +void virtio_device_reset_dma_as(VirtIODevice *vdev);
> >  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
> >  void virtio_queue_host_notifier_read(EventNotifier *n);
> >  void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
> >  
> 
> 


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago

On 2017年03月09日 00:40, Igor Mammedov wrote:
> On Tue, 7 Mar 2017 14:47:30 +0200
> Marcel Apfelbaum<marcel@redhat.com>  wrote:
>
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>    this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>    the memory listener to the new dma_as
>>>   
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini<pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>    bus_master_ready a more generic method
>>> ---
>>>   hw/pci/pci.c               | 11 ++++++++---
>>>   hw/virtio/virtio-pci.c     |  9 +++++++++
>>>   hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>   include/hw/pci/pci.h       |  1 +
>>>   include/hw/virtio/virtio.h |  1 +
>>>   5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>   static void pci_init_bus_master(PCIDevice *pci_dev)
>>>   {
>>>       AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>       memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                                OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>       memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>       address_space_init(&pci_dev->bus_master_as,
>>>                          &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>>   }
>>>
>>>   static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>       pci_dev->devfn = devfn;
>>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>       pci_dev->irq_state = 0;
>>>       pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>           }
>>>       }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?
>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?
> Conceptually,
> I'm not sure that inherited device class realize
> should be aware of uninitialized bus_master,
> which belongs to PCI device, nor should it initialize
> it by calling pci_init_bus_master() explicitly,
> it's parent's class job (PCIDevice).

Yes, I was trying to propose a workaround for 2.9. I'm sure we will 
refine the ordering in 2.10. And consider we have asked libvirt to 
create IOMMU first, I think I will withdraw the patch.

>
> more close to current code:
> if xen-pci-passthrough were hotplugged then it looks like
> this hunk could break it:
> hw/xen/xen_pt.c:
>   memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
> would happen with uninitialized bus_master_as
> as realize is called before pci_init_bus_master();

Yes, this won't work. This is exactly the same issue as virtio, but this 
will also break if it was created before an IOMMU.

>
> So the same question as Marcel's but other way around,
> why this hunk "has to" be moved.
>
>

Right.

Thanks

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Igor Mammedov 7 years, 1 month ago
On Thu, 9 Mar 2017 10:32:44 +0800
Jason Wang <jasowang@redhat.com> wrote:

> On 2017年03月09日 00:40, Igor Mammedov wrote:
> > On Tue, 7 Mar 2017 14:47:30 +0200
> > Marcel Apfelbaum<marcel@redhat.com>  wrote:
> >  
> >> On 03/07/2017 11:09 AM, Jason Wang wrote:  
> >>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> >>> after caching ring translations"), IOMMU was required to be created in
> >>> advance. This is because we can only get the correct dma_as after pci
> >>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> >>> inconvenient for user. This patch releases this by:
> >>>
> >>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
> >>>    this during pci_init_bus_master
> >>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
> >>>    the memory listener to the new dma_as

Instead of trying to fix up later it's possible to refuse
adding iommu device if other devices has been added before
it with -device/device_add.
That would match current CLI semantics where device that
others depend on should be listed on CLI before that others
are listed.

A bit hackish but something along this lines:

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 9349acb..9d8ecc6 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -460,7 +460,7 @@ void pci_device_deassert_intx(PCIDevice *dev);
 typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index e0732cc..9d9a76b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1160,7 +1160,7 @@ static void amdvi_realize(DeviceState *dev, Error **err)
 
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
-    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    pci_setup_iommu(bus, amdvi_host_dma_iommu, s, err);
     s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
     msi_init(&s->pci.dev, 0, 1, true, false, err);
     amdvi_init(s);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 22d8226..f4f208c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2585,7 +2585,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
                                               g_free, g_free);
     vtd_init(s);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
-    pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+    pci_setup_iommu(bus, vtd_host_dma_iommu, dev, errp);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
 }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 273f1e4..2d01589 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -317,6 +317,21 @@ static void pci_host_bus_register(DeviceState *host)
     QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
 }
 
+static bool pcibus_has_devices(PCIBus *bus)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        if (bus->devices[i]) {
+            DeviceState *dev = DEVICE(bus->devices[i]);
+            if (dev->opts) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 PCIBus *pci_find_primary_bus(void)
 {
     PCIBus *primary_bus = NULL;
@@ -2534,8 +2549,12 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
     return &address_space_memory;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp)
 {
+    if (pcibus_has_devices(bus)) {
+        error_setg(errp, "IOMMU must be created before any other PCI devices");
+        return;
+    }
     bus->iommu_fn = fn;
     bus->iommu_opaque = opaque;
 }



> >>>     
> >> Hi Jason,
> >>  
> >>> Cc: Paolo Bonzini<pbonzini@redhat.com>
> >>> Signed-off-by: Jason Wang<jasowang@redhat.com>
> >>> ---
> >>> Changes from V2:
> >>> - delay pci_init_bus_master() after pci device is realized to make
> >>>    bus_master_ready a more generic method
> >>> ---
> >>>   hw/pci/pci.c               | 11 ++++++++---
> >>>   hw/virtio/virtio-pci.c     |  9 +++++++++
> >>>   hw/virtio/virtio.c         | 19 +++++++++++++++++++
> >>>   include/hw/pci/pci.h       |  1 +
> >>>   include/hw/virtio/virtio.h |  1 +
> >>>   5 files changed, 38 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>> index 273f1e4..22e6ad9 100644
> >>> --- a/hw/pci/pci.c
> >>> +++ b/hw/pci/pci.c
> >>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> >>>   static void pci_init_bus_master(PCIDevice *pci_dev)
> >>>   {
> >>>       AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> >>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >>>
> >>>       memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >>>                                OBJECT(pci_dev), "bus master",
> >>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >>>       memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >>>       address_space_init(&pci_dev->bus_master_as,
> >>>                          &pci_dev->bus_master_enable_region, pci_dev->name);
> >>> +    if (pc->bus_master_ready) {
> >>> +        pc->bus_master_ready(pci_dev);
> >>> +    }
> >>>   }
> >>>
> >>>   static void pcibus_machine_done(Notifier *notifier, void *data)
> >>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >>>       pci_dev->devfn = devfn;
> >>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >>>
> >>> -    if (qdev_hotplug) {
> >>> -        pci_init_bus_master(pci_dev);
> >>> -    }
> >>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >>>       pci_dev->irq_state = 0;
> >>>       pci_config_alloc(pci_dev);
> >>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >>>           }
> >>>       }
> >>>
> >>> +    if (qdev_hotplug) {
> >>> +        pci_init_bus_master(pci_dev);
> >>> +    }
> >>> +  
> >> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
> >> I suppose you want the code to run after the "realize" function?
> >> If so, what happens if a "realize" function of another device needs the bus_master_as
> >> (at hotplug time)?  
> > Conceptually,
> > I'm not sure that inherited device class realize
> > should be aware of uninitialized bus_master,
> > which belongs to PCI device, nor should it initialize
> > it by calling pci_init_bus_master() explicitly,
> > it's parent's class job (PCIDevice).  
> 
> Yes, I was trying to propose a workaround for 2.9. I'm sure we will 
> refine the ordering in 2.10. And consider we have asked libvirt to 
> create IOMMU first, I think I will withdraw the patch.
> 
> >
> > more close to current code:
> > if xen-pci-passthrough were hotplugged then it looks like
> > this hunk could break it:
> > hw/xen/xen_pt.c:
> >   memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
> > would happen with uninitialized bus_master_as
> > as realize is called before pci_init_bus_master();  
> 
> Yes, this won't work. This is exactly the same issue as virtio, but this 
> will also break if it was created before an IOMMU.
> 
> >
> > So the same question as Marcel's but other way around,
> > why this hunk "has to" be moved.
> >
> >  
> 
> Right.
> 
> Thanks


Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Paolo Bonzini 7 years, 1 month ago

On 09/03/2017 10:28, Igor Mammedov wrote:
> On Thu, 9 Mar 2017 10:32:44 +0800
> Jason Wang <jasowang@redhat.com> wrote:
> 
>> On 2017年03月09日 00:40, Igor Mammedov wrote:
>>> On Tue, 7 Mar 2017 14:47:30 +0200
>>> Marcel Apfelbaum<marcel@redhat.com>  wrote:
>>>  
>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:  
>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>> after caching ring translations"), IOMMU was required to be created in
>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>> inconvenient for user. This patch releases this by:
>>>>>
>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>    this during pci_init_bus_master
>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>    the memory listener to the new dma_as
> 
> Instead of trying to fix up later it's possible to refuse
> adding iommu device if other devices has been added before
> it with -device/device_add.
> That would match current CLI semantics where device that
> others depend on should be listed on CLI before that others
> are listed.

I like this a lot.  Can we add it to "Future incompatible changes" in
the 2.9 changelog?

Paolo

> A bit hackish but something along this lines:
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 9349acb..9d8ecc6 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -460,7 +460,7 @@ void pci_device_deassert_intx(PCIDevice *dev);
>  typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>  
>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
> -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
> +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp);
>  
>  static inline void
>  pci_set_byte(uint8_t *config, uint8_t val)
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index e0732cc..9d9a76b 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -1160,7 +1160,7 @@ static void amdvi_realize(DeviceState *dev, Error **err)
>  
>      sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
> -    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
> +    pci_setup_iommu(bus, amdvi_host_dma_iommu, s, err);
>      s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
>      msi_init(&s->pci.dev, 0, 1, true, false, err);
>      amdvi_init(s);
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 22d8226..f4f208c 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2585,7 +2585,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                                g_free, g_free);
>      vtd_init(s);
>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
> -    pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
> +    pci_setup_iommu(bus, vtd_host_dma_iommu, dev, errp);
>      /* Pseudo address space under root PCI bus. */
>      pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
>  }
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 273f1e4..2d01589 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -317,6 +317,21 @@ static void pci_host_bus_register(DeviceState *host)
>      QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
>  }
>  
> +static bool pcibus_has_devices(PCIBus *bus)
> +{
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> +        if (bus->devices[i]) {
> +            DeviceState *dev = DEVICE(bus->devices[i]);
> +            if (dev->opts) {
> +                return true;
> +            }
> +        }
> +    }
> +    return false;
> +}
> +
>  PCIBus *pci_find_primary_bus(void)
>  {
>      PCIBus *primary_bus = NULL;
> @@ -2534,8 +2549,12 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>      return &address_space_memory;
>  }
>  
> -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
> +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp)
>  {
> +    if (pcibus_has_devices(bus)) {
> +        error_setg(errp, "IOMMU must be created before any other PCI devices");
> +        return;
> +    }
>      bus->iommu_fn = fn;
>      bus->iommu_opaque = opaque;
>  }
> 
> 
> 
>>>>>     
>>>> Hi Jason,
>>>>  
>>>>> Cc: Paolo Bonzini<pbonzini@redhat.com>
>>>>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>>>>> ---
>>>>> Changes from V2:
>>>>> - delay pci_init_bus_master() after pci device is realized to make
>>>>>    bus_master_ready a more generic method
>>>>> ---
>>>>>   hw/pci/pci.c               | 11 ++++++++---
>>>>>   hw/virtio/virtio-pci.c     |  9 +++++++++
>>>>>   hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>>>   include/hw/pci/pci.h       |  1 +
>>>>>   include/hw/virtio/virtio.h |  1 +
>>>>>   5 files changed, 38 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index 273f1e4..22e6ad9 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>>>   static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>>   {
>>>>>       AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>>>
>>>>>       memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>>>                                OBJECT(pci_dev), "bus master",
>>>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>>       memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>>>       address_space_init(&pci_dev->bus_master_as,
>>>>>                          &pci_dev->bus_master_enable_region, pci_dev->name);
>>>>> +    if (pc->bus_master_ready) {
>>>>> +        pc->bus_master_ready(pci_dev);
>>>>> +    }
>>>>>   }
>>>>>
>>>>>   static void pcibus_machine_done(Notifier *notifier, void *data)
>>>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>>>       pci_dev->devfn = devfn;
>>>>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>>>
>>>>> -    if (qdev_hotplug) {
>>>>> -        pci_init_bus_master(pci_dev);
>>>>> -    }
>>>>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>>>       pci_dev->irq_state = 0;
>>>>>       pci_config_alloc(pci_dev);
>>>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>>           }
>>>>>       }
>>>>>
>>>>> +    if (qdev_hotplug) {
>>>>> +        pci_init_bus_master(pci_dev);
>>>>> +    }
>>>>> +  
>>>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>>>> I suppose you want the code to run after the "realize" function?
>>>> If so, what happens if a "realize" function of another device needs the bus_master_as
>>>> (at hotplug time)?  
>>> Conceptually,
>>> I'm not sure that inherited device class realize
>>> should be aware of uninitialized bus_master,
>>> which belongs to PCI device, nor should it initialize
>>> it by calling pci_init_bus_master() explicitly,
>>> it's parent's class job (PCIDevice).  
>>
>> Yes, I was trying to propose a workaround for 2.9. I'm sure we will 
>> refine the ordering in 2.10. And consider we have asked libvirt to 
>> create IOMMU first, I think I will withdraw the patch.
>>
>>>
>>> more close to current code:
>>> if xen-pci-passthrough were hotplugged then it looks like
>>> this hunk could break it:
>>> hw/xen/xen_pt.c:
>>>   memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
>>> would happen with uninitialized bus_master_as
>>> as realize is called before pci_init_bus_master();  
>>
>> Yes, this won't work. This is exactly the same issue as virtio, but this 
>> will also break if it was created before an IOMMU.
>>
>>>
>>> So the same question as Marcel's but other way around,
>>> why this hunk "has to" be moved.
>>>
>>>  
>>
>> Right.
>>
>> Thanks
> 

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago

On 2017年03月09日 17:28, Igor Mammedov wrote:
> On Thu, 9 Mar 2017 10:32:44 +0800
> Jason Wang<jasowang@redhat.com>  wrote:
>
>> On 2017年03月09日 00:40, Igor Mammedov wrote:
>>> On Tue, 7 Mar 2017 14:47:30 +0200
>>> Marcel Apfelbaum<marcel@redhat.com>   wrote:
>>>   
>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>> after caching ring translations"), IOMMU was required to be created in
>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>> inconvenient for user. This patch releases this by:
>>>>>
>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>     this during pci_init_bus_master
>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>     the memory listener to the new dma_as
> Instead of trying to fix up later it's possible to refuse
> adding iommu device if other devices has been added before
> it with -device/device_add.
> That would match current CLI semantics where device that
> others depend on should be listed on CLI before that others
> are listed.

Yes, but it works by chance in the past for the device that does not 
want bus_master_as in their realize. This change may surprise their users.

Thanks

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Paolo Bonzini 7 years, 1 month ago

On 09/03/2017 10:58, Jason Wang wrote:
> 
> 
> On 2017年03月09日 17:28, Igor Mammedov wrote:
>> On Thu, 9 Mar 2017 10:32:44 +0800
>> Jason Wang<jasowang@redhat.com>  wrote:
>>
>>> On 2017年03月09日 00:40, Igor Mammedov wrote:
>>>> On Tue, 7 Mar 2017 14:47:30 +0200
>>>> Marcel Apfelbaum<marcel@redhat.com>   wrote:
>>>>  
>>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>>> after caching ring translations"), IOMMU was required to be
>>>>>> created in
>>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>>> inconvenient for user. This patch releases this by:
>>>>>>
>>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>>     this during pci_init_bus_master
>>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>>     the memory listener to the new dma_as
>> Instead of trying to fix up later it's possible to refuse
>> adding iommu device if other devices has been added before
>> it with -device/device_add.
>> That would match current CLI semantics where device that
>> others depend on should be listed on CLI before that others
>> are listed.
> 
> Yes, but it works by chance in the past for the device that does not
> want bus_master_as in their realize. This change may surprise their users.

There is another posssibility.  Create the address space at init time
and add a container region instead of the bus_master_enable_region
alias.  Then at machine_done time you create the bus_master_enable_region.

This removes the need for the callbacks and makes the MemoryListener
just work.

Paolo

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Michael S. Tsirkin 7 years, 1 month ago
On Thu, Mar 09, 2017 at 11:05:36AM +0100, Paolo Bonzini wrote:
> 
> 
> On 09/03/2017 10:58, Jason Wang wrote:
> > 
> > 
> > On 2017年03月09日 17:28, Igor Mammedov wrote:
> >> On Thu, 9 Mar 2017 10:32:44 +0800
> >> Jason Wang<jasowang@redhat.com>  wrote:
> >>
> >>> On 2017年03月09日 00:40, Igor Mammedov wrote:
> >>>> On Tue, 7 Mar 2017 14:47:30 +0200
> >>>> Marcel Apfelbaum<marcel@redhat.com>   wrote:
> >>>>  
> >>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
> >>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> >>>>>> after caching ring translations"), IOMMU was required to be
> >>>>>> created in
> >>>>>> advance. This is because we can only get the correct dma_as after pci
> >>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> >>>>>> inconvenient for user. This patch releases this by:
> >>>>>>
> >>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
> >>>>>>     this during pci_init_bus_master
> >>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
> >>>>>>     the memory listener to the new dma_as
> >> Instead of trying to fix up later it's possible to refuse
> >> adding iommu device if other devices has been added before
> >> it with -device/device_add.
> >> That would match current CLI semantics where device that
> >> others depend on should be listed on CLI before that others
> >> are listed.
> > 
> > Yes, but it works by chance in the past for the device that does not
> > want bus_master_as in their realize. This change may surprise their users.
> 
> There is another posssibility.  Create the address space at init time
> and add a container region instead of the bus_master_enable_region
> alias.  Then at machine_done time you create the bus_master_enable_region.
> 
> This removes the need for the callbacks and makes the MemoryListener
> just work.
> 
> Paolo

That's definitely cleaner than a callback, though a bit tricky
so needs a good comment explaining what is going on.
And then I think we can revert
96a8821d21411f10d77ea994af369c6e5c35a2cc, right?

-- 
MST

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Paolo Bonzini 7 years, 1 month ago

On 09/03/2017 16:31, Michael S. Tsirkin wrote:
> On Thu, Mar 09, 2017 at 11:05:36AM +0100, Paolo Bonzini wrote:
>>
>>
>> On 09/03/2017 10:58, Jason Wang wrote:
>>>
>>>
>>> On 2017年03月09日 17:28, Igor Mammedov wrote:
>>>> On Thu, 9 Mar 2017 10:32:44 +0800
>>>> Jason Wang<jasowang@redhat.com>  wrote:
>>>>
>>>>> On 2017年03月09日 00:40, Igor Mammedov wrote:
>>>>>> On Tue, 7 Mar 2017 14:47:30 +0200
>>>>>> Marcel Apfelbaum<marcel@redhat.com>   wrote:
>>>>>>  
>>>>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>>>>> after caching ring translations"), IOMMU was required to be
>>>>>>>> created in
>>>>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>>>>> inconvenient for user. This patch releases this by:
>>>>>>>>
>>>>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>>>>     this during pci_init_bus_master
>>>>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>>>>     the memory listener to the new dma_as
>>>> Instead of trying to fix up later it's possible to refuse
>>>> adding iommu device if other devices has been added before
>>>> it with -device/device_add.
>>>> That would match current CLI semantics where device that
>>>> others depend on should be listed on CLI before that others
>>>> are listed.
>>>
>>> Yes, but it works by chance in the past for the device that does not
>>> want bus_master_as in their realize. This change may surprise their users.
>>
>> There is another posssibility.  Create the address space at init time
>> and add a container region instead of the bus_master_enable_region
>> alias.  Then at machine_done time you create the bus_master_enable_region.
>>
>> This removes the need for the callbacks and makes the MemoryListener
>> just work.
>>
>> Paolo
> 
> That's definitely cleaner than a callback, though a bit tricky
> so needs a good comment explaining what is going on.
> And then I think we can revert
> 96a8821d21411f10d77ea994af369c6e5c35a2cc, right?

Yes. 96a8821d21411f10d77ea994af369c6e5c35a2cc can go then (and this is
how I expected it to work all the time---my bad).

Paolo



Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Posted by Jason Wang 7 years, 1 month ago

On 2017年03月09日 23:32, Paolo Bonzini wrote:
>
> On 09/03/2017 16:31, Michael S. Tsirkin wrote:
>> On Thu, Mar 09, 2017 at 11:05:36AM +0100, Paolo Bonzini wrote:
>>>
>>> On 09/03/2017 10:58, Jason Wang wrote:
>>>>
>>>> On 2017年03月09日 17:28, Igor Mammedov wrote:
>>>>> On Thu, 9 Mar 2017 10:32:44 +0800
>>>>> Jason Wang<jasowang@redhat.com>  wrote:
>>>>>
>>>>>> On 2017年03月09日 00:40, Igor Mammedov wrote:
>>>>>>> On Tue, 7 Mar 2017 14:47:30 +0200
>>>>>>> Marcel Apfelbaum<marcel@redhat.com>   wrote:
>>>>>>>   
>>>>>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>>>>>> after caching ring translations"), IOMMU was required to be
>>>>>>>>> created in
>>>>>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>>>>>> inconvenient for user. This patch releases this by:
>>>>>>>>>
>>>>>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>>>>>      this during pci_init_bus_master
>>>>>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>>>>>      the memory listener to the new dma_as
>>>>> Instead of trying to fix up later it's possible to refuse
>>>>> adding iommu device if other devices has been added before
>>>>> it with -device/device_add.
>>>>> That would match current CLI semantics where device that
>>>>> others depend on should be listed on CLI before that others
>>>>> are listed.
>>>> Yes, but it works by chance in the past for the device that does not
>>>> want bus_master_as in their realize. This change may surprise their users.
>>> There is another posssibility.  Create the address space at init time
>>> and add a container region instead of the bus_master_enable_region
>>> alias.  Then at machine_done time you create the bus_master_enable_region.
>>>
>>> This removes the need for the callbacks and makes the MemoryListener
>>> just work.
>>>
>>> Paolo
>> That's definitely cleaner than a callback, though a bit tricky
>> so needs a good comment explaining what is going on.
>> And then I think we can revert
>> 96a8821d21411f10d77ea994af369c6e5c35a2cc, right?
> Yes. 96a8821d21411f10d77ea994af369c6e5c35a2cc can go then (and this is
> how I expected it to work all the time---my bad).
>
> Paolo
>
>

Will prepare patches for this (probably next week).

Thanks