[PATCH v6 12/19] vfio-user: IOMMU support for remote device

Jagannathan Raman posted 19 patches 3 years, 11 months ago
Maintainers: Elena Ufimtseva <elena.ufimtseva@oracle.com>, Darren Kenny <darren.kenny@oracle.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Eduardo Habkost <eduardo@habkost.net>, Peter Xu <peterx@redhat.com>, Wainer dos Santos Moschetta <wainersm@redhat.com>, Juan Quintela <quintela@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Jagannathan Raman <jag.raman@oracle.com>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Cleber Rosa <crosa@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Laurent Vivier <lvivier@redhat.com>, John G Johnson <john.g.johnson@oracle.com>, Bandan Das <bsd@redhat.com>, Qiuhao Li <Qiuhao.Li@outlook.com>, Markus Armbruster <armbru@redhat.com>, David Hildenbrand <david@redhat.com>, Alexander Bulekov <alxndr@bu.edu>, Paolo Bonzini <pbonzini@redhat.com>, Beraldo Leal <bleal@redhat.com>, Kevin Wolf <kwolf@redhat.com>, Eric Blake <eblake@redhat.com>, "Alex Bennée" <alex.bennee@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, Thomas Huth <thuth@redhat.com>
There is a newer version of this series
[PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Jagannathan Raman 3 years, 11 months ago
Assign separate address space for each device in the remote processes.

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 include/hw/remote/iommu.h | 18 +++++++++
 hw/remote/iommu.c         | 78 +++++++++++++++++++++++++++++++++++++++
 MAINTAINERS               |  2 +
 hw/remote/meson.build     |  1 +
 4 files changed, 99 insertions(+)
 create mode 100644 include/hw/remote/iommu.h
 create mode 100644 hw/remote/iommu.c

diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
new file mode 100644
index 0000000000..8f850400f1
--- /dev/null
+++ b/include/hw/remote/iommu.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright © 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOMMU_H
+#define REMOTE_IOMMU_H
+
+#include "hw/pci/pci_bus.h"
+
+void remote_configure_iommu(PCIBus *pci_bus);
+
+void remote_iommu_del_device(PCIDevice *pci_dev);
+
+#endif
diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
new file mode 100644
index 0000000000..50d75cc22d
--- /dev/null
+++ b/hw/remote/iommu.c
@@ -0,0 +1,78 @@
+/**
+ * IOMMU for remote device
+ *
+ * Copyright © 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/iommu.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "trace.h"
+
+struct RemoteIommuElem {
+    AddressSpace  as;
+    MemoryRegion  mr;
+};
+
+GHashTable *remote_iommu_elem_by_bdf;
+
+#define INT2VOIDP(i) (void *)(uintptr_t)(i)
+
+static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
+                                              void *opaque, int devfn)
+{
+    struct RemoteIommuElem *elem = NULL;
+    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
+
+    if (!remote_iommu_elem_by_bdf) {
+        return &address_space_memory;
+    }
+
+    elem = g_hash_table_lookup(remote_iommu_elem_by_bdf, INT2VOIDP(pci_bdf));
+
+    if (!elem) {
+        g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
+        g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
+
+        elem = g_malloc0(sizeof(struct RemoteIommuElem));
+
+        memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
+        address_space_init(&elem->as, &elem->mr, as_name);
+
+        g_hash_table_insert(remote_iommu_elem_by_bdf, INT2VOIDP(pci_bdf), elem);
+    }
+
+    return &elem->as;
+}
+
+void remote_iommu_del_device(PCIDevice *pci_dev)
+{
+    int pci_bdf;
+
+    if (!remote_iommu_elem_by_bdf || !pci_dev) {
+        return;
+    }
+
+    pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
+
+    g_hash_table_remove(remote_iommu_elem_by_bdf, INT2VOIDP(pci_bdf));
+}
+
+void remote_configure_iommu(PCIBus *pci_bus)
+{
+    if (!remote_iommu_elem_by_bdf) {
+        remote_iommu_elem_by_bdf = g_hash_table_new_full(NULL, NULL,
+                                                         NULL, NULL);
+    }
+
+    pci_setup_iommu(pci_bus, remote_iommu_find_add_as, NULL);
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index 751d97852d..f47232c78c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3569,6 +3569,8 @@ F: hw/remote/iohub.c
 F: include/hw/remote/iohub.h
 F: subprojects/libvfio-user
 F: hw/remote/vfio-user-obj.c
+F: hw/remote/iommu.c
+F: include/hw/remote/iommu.h
 
 EBPF:
 M: Jason Wang <jasowang@redhat.com>
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index 534ac5df79..bcef83c8cc 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
+remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
 remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
 
 remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)
-- 
2.20.1


Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Stefan Hajnoczi 3 years, 11 months ago
On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
> +struct RemoteIommuElem {
> +    AddressSpace  as;
> +    MemoryRegion  mr;
> +};
> +
> +GHashTable *remote_iommu_elem_by_bdf;

A mutable global hash table requires synchronization when device
emulation runs in multiple threads.

I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
global. If there is only 1 device per remote PCI bus, then there are no
further synchronization concerns.

> +
> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
> +
> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
> +                                              void *opaque, int devfn)
> +{
> +    struct RemoteIommuElem *elem = NULL;
> +    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
> +
> +    if (!remote_iommu_elem_by_bdf) {
> +        return &address_space_memory;
> +    }

When can this happen? remote_configure_iommu() allocates
remote_iommu_elem_by_bdf so it should always be non-NULL.
Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Jag Raman 3 years, 11 months ago

> On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
>> +struct RemoteIommuElem {
>> +    AddressSpace  as;
>> +    MemoryRegion  mr;
>> +};
>> +
>> +GHashTable *remote_iommu_elem_by_bdf;
> 
> A mutable global hash table requires synchronization when device
> emulation runs in multiple threads.
> 
> I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
> global. If there is only 1 device per remote PCI bus, then there are no
> further synchronization concerns.

OK, will avoid the global. We would need to access the hash table
concurrently since there could be more than one device in the
same bus - so a mutex would be needed here.

> 
>> +
>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>> +
>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
>> +                                              void *opaque, int devfn)
>> +{
>> +    struct RemoteIommuElem *elem = NULL;
>> +    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
>> +
>> +    if (!remote_iommu_elem_by_bdf) {
>> +        return &address_space_memory;
>> +    }
> 
> When can this happen? remote_configure_iommu() allocates
> remote_iommu_elem_by_bdf so it should always be non-NULL.

I think we won’t hit this case. g_hash_table_new_full() would always succeed.

Thank you!
--
Jag

Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Stefan Hajnoczi 3 years, 11 months ago
On Mon, Feb 28, 2022 at 07:54:38PM +0000, Jag Raman wrote:
> 
> 
> > On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > 
> > On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
> >> +struct RemoteIommuElem {
> >> +    AddressSpace  as;
> >> +    MemoryRegion  mr;
> >> +};
> >> +
> >> +GHashTable *remote_iommu_elem_by_bdf;
> > 
> > A mutable global hash table requires synchronization when device
> > emulation runs in multiple threads.
> > 
> > I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
> > global. If there is only 1 device per remote PCI bus, then there are no
> > further synchronization concerns.
> 
> OK, will avoid the global. We would need to access the hash table
> concurrently since there could be more than one device in the
> same bus - so a mutex would be needed here.

I thought the PCIe topology can be set up with a separate buf for each
x-vfio-user-server? I remember something like that in the previous
revision where a root port was instantiated for each x-vfio-user-server.

Stefan
Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Jag Raman 3 years, 11 months ago

> On Mar 2, 2022, at 11:49 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Mon, Feb 28, 2022 at 07:54:38PM +0000, Jag Raman wrote:
>> 
>> 
>>> On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>> 
>>> On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
>>>> +struct RemoteIommuElem {
>>>> +    AddressSpace  as;
>>>> +    MemoryRegion  mr;
>>>> +};
>>>> +
>>>> +GHashTable *remote_iommu_elem_by_bdf;
>>> 
>>> A mutable global hash table requires synchronization when device
>>> emulation runs in multiple threads.
>>> 
>>> I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
>>> global. If there is only 1 device per remote PCI bus, then there are no
>>> further synchronization concerns.
>> 
>> OK, will avoid the global. We would need to access the hash table
>> concurrently since there could be more than one device in the
>> same bus - so a mutex would be needed here.
> 
> I thought the PCIe topology can be set up with a separate buf for each
> x-vfio-user-server? I remember something like that in the previous
> revision where a root port was instantiated for each x-vfio-user-server.

Yes, we could setup the PCIe topology to be that way. But the user could
add more than one device to the same bus, unless the bus type explicitly
limits the number of devices to one (BusClass->max_dev).

Thank you!
--
Jag

> 
> Stefan
Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Stefan Hajnoczi 3 years, 11 months ago
On Thu, Mar 03, 2022 at 02:49:53PM +0000, Jag Raman wrote:
> 
> 
> > On Mar 2, 2022, at 11:49 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > 
> > On Mon, Feb 28, 2022 at 07:54:38PM +0000, Jag Raman wrote:
> >> 
> >> 
> >>> On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >>> 
> >>> On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
> >>>> +struct RemoteIommuElem {
> >>>> +    AddressSpace  as;
> >>>> +    MemoryRegion  mr;
> >>>> +};
> >>>> +
> >>>> +GHashTable *remote_iommu_elem_by_bdf;
> >>> 
> >>> A mutable global hash table requires synchronization when device
> >>> emulation runs in multiple threads.
> >>> 
> >>> I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
> >>> global. If there is only 1 device per remote PCI bus, then there are no
> >>> further synchronization concerns.
> >> 
> >> OK, will avoid the global. We would need to access the hash table
> >> concurrently since there could be more than one device in the
> >> same bus - so a mutex would be needed here.
> > 
> > I thought the PCIe topology can be set up with a separate buf for each
> > x-vfio-user-server? I remember something like that in the previous
> > revision where a root port was instantiated for each x-vfio-user-server.
> 
> Yes, we could setup the PCIe topology to be that way. But the user could
> add more than one device to the same bus, unless the bus type explicitly
> limits the number of devices to one (BusClass->max_dev).

Due to how the IOMMU is used to restrict the bus to the vfio-user
client's DMA mappings, it seems like it's necesssary to limit the number
of devices to 1 per bus anyway?

Stefan
Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Jag Raman 3 years, 11 months ago

> On Mar 7, 2022, at 4:45 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Thu, Mar 03, 2022 at 02:49:53PM +0000, Jag Raman wrote:
>> 
>> 
>>> On Mar 2, 2022, at 11:49 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>> 
>>> On Mon, Feb 28, 2022 at 07:54:38PM +0000, Jag Raman wrote:
>>>> 
>>>> 
>>>>> On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>>>> 
>>>>> On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
>>>>>> +struct RemoteIommuElem {
>>>>>> +    AddressSpace  as;
>>>>>> +    MemoryRegion  mr;
>>>>>> +};
>>>>>> +
>>>>>> +GHashTable *remote_iommu_elem_by_bdf;
>>>>> 
>>>>> A mutable global hash table requires synchronization when device
>>>>> emulation runs in multiple threads.
>>>>> 
>>>>> I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
>>>>> global. If there is only 1 device per remote PCI bus, then there are no
>>>>> further synchronization concerns.
>>>> 
>>>> OK, will avoid the global. We would need to access the hash table
>>>> concurrently since there could be more than one device in the
>>>> same bus - so a mutex would be needed here.
>>> 
>>> I thought the PCIe topology can be set up with a separate buf for each
>>> x-vfio-user-server? I remember something like that in the previous
>>> revision where a root port was instantiated for each x-vfio-user-server.
>> 
>> Yes, we could setup the PCIe topology to be that way. But the user could
>> add more than one device to the same bus, unless the bus type explicitly
>> limits the number of devices to one (BusClass->max_dev).
> 
> Due to how the IOMMU is used to restrict the bus to the vfio-user
> client's DMA mappings, it seems like it's necesssary to limit the number
> of devices to 1 per bus anyway?

Hi Stefan,

“remote_iommu_elem_by_bdf” has a separate entry for each of the BDF
combinations - it provides a separate DMA address space per device. As
such, we don’t have to limit the number of devices to 1 per bus.

Thank you!
--
Jag

> 
> Stefan

Re: [PATCH v6 12/19] vfio-user: IOMMU support for remote device
Posted by Stefan Hajnoczi 3 years, 11 months ago
On Mon, Mar 07, 2022 at 02:42:49PM +0000, Jag Raman wrote:
> 
> 
> > On Mar 7, 2022, at 4:45 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > 
> > On Thu, Mar 03, 2022 at 02:49:53PM +0000, Jag Raman wrote:
> >> 
> >> 
> >>> On Mar 2, 2022, at 11:49 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >>> 
> >>> On Mon, Feb 28, 2022 at 07:54:38PM +0000, Jag Raman wrote:
> >>>> 
> >>>> 
> >>>>> On Feb 22, 2022, at 5:40 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >>>>> 
> >>>>> On Thu, Feb 17, 2022 at 02:48:59AM -0500, Jagannathan Raman wrote:
> >>>>>> +struct RemoteIommuElem {
> >>>>>> +    AddressSpace  as;
> >>>>>> +    MemoryRegion  mr;
> >>>>>> +};
> >>>>>> +
> >>>>>> +GHashTable *remote_iommu_elem_by_bdf;
> >>>>> 
> >>>>> A mutable global hash table requires synchronization when device
> >>>>> emulation runs in multiple threads.
> >>>>> 
> >>>>> I suggest using pci_setup_iommu()'s iommu_opaque argument to avoid the
> >>>>> global. If there is only 1 device per remote PCI bus, then there are no
> >>>>> further synchronization concerns.
> >>>> 
> >>>> OK, will avoid the global. We would need to access the hash table
> >>>> concurrently since there could be more than one device in the
> >>>> same bus - so a mutex would be needed here.
> >>> 
> >>> I thought the PCIe topology can be set up with a separate buf for each
> >>> x-vfio-user-server? I remember something like that in the previous
> >>> revision where a root port was instantiated for each x-vfio-user-server.
> >> 
> >> Yes, we could setup the PCIe topology to be that way. But the user could
> >> add more than one device to the same bus, unless the bus type explicitly
> >> limits the number of devices to one (BusClass->max_dev).
> > 
> > Due to how the IOMMU is used to restrict the bus to the vfio-user
> > client's DMA mappings, it seems like it's necesssary to limit the number
> > of devices to 1 per bus anyway?
> 
> Hi Stefan,
> 
> “remote_iommu_elem_by_bdf” has a separate entry for each of the BDF
> combinations - it provides a separate DMA address space per device. As
> such, we don’t have to limit the number of devices to 1 per bus.

I see, thanks!

Stefan