Assign separate address space for each device in the remote processes.
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
include/hw/remote/iommu.h | 18 ++++++++
hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
MAINTAINERS | 2 +
hw/remote/meson.build | 1 +
4 files changed, 116 insertions(+)
create mode 100644 include/hw/remote/iommu.h
create mode 100644 hw/remote/iommu.c
diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
new file mode 100644
index 0000000000..8f850400f1
--- /dev/null
+++ b/include/hw/remote/iommu.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright © 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOMMU_H
+#define REMOTE_IOMMU_H
+
+#include "hw/pci/pci_bus.h"
+
+void remote_configure_iommu(PCIBus *pci_bus);
+
+void remote_iommu_del_device(PCIDevice *pci_dev);
+
+#endif
diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
new file mode 100644
index 0000000000..13f329b45d
--- /dev/null
+++ b/hw/remote/iommu.c
@@ -0,0 +1,95 @@
+/**
+ * IOMMU for remote device
+ *
+ * Copyright © 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/iommu.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "trace.h"
+
+struct RemoteIommuElem {
+ AddressSpace as;
+ MemoryRegion mr;
+};
+
+struct RemoteIommuTable {
+ QemuMutex lock;
+ GHashTable *elem_by_bdf;
+} remote_iommu_table;
+
+#define INT2VOIDP(i) (void *)(uintptr_t)(i)
+
+static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
+ void *opaque, int devfn)
+{
+ struct RemoteIommuTable *iommu_table = opaque;
+ struct RemoteIommuElem *elem = NULL;
+ int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
+
+ elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
+
+ if (!elem) {
+ g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
+ g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
+
+ elem = g_malloc0(sizeof(struct RemoteIommuElem));
+
+ memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
+ address_space_init(&elem->as, &elem->mr, as_name);
+
+ qemu_mutex_lock(&iommu_table->lock);
+ g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
+ qemu_mutex_unlock(&iommu_table->lock);
+ }
+
+ return &elem->as;
+}
+
+static void remote_iommu_del_elem(gpointer data)
+{
+ struct RemoteIommuElem *elem = data;
+
+ g_assert(elem);
+
+ memory_region_unref(&elem->mr);
+ address_space_destroy(&elem->as);
+
+ g_free(elem);
+}
+
+void remote_iommu_del_device(PCIDevice *pci_dev)
+{
+ int pci_bdf;
+
+ if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
+ return;
+ }
+
+ pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
+
+ qemu_mutex_lock(&remote_iommu_table.lock);
+ g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
+ qemu_mutex_unlock(&remote_iommu_table.lock);
+}
+
+void remote_configure_iommu(PCIBus *pci_bus)
+{
+ if (!remote_iommu_table.elem_by_bdf) {
+ remote_iommu_table.elem_by_bdf =
+ g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
+ qemu_mutex_init(&remote_iommu_table.lock);
+ }
+
+ pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index e7b0297a63..21694a9698 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c
F: include/hw/remote/iohub.h
F: subprojects/libvfio-user
F: hw/remote/vfio-user-obj.c
+F: hw/remote/iommu.c
+F: include/hw/remote/iommu.h
EBPF:
M: Jason Wang <jasowang@redhat.com>
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index 534ac5df79..bcef83c8cc 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
+remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)
--
2.20.1
On Fri, 25 Mar 2022 15:19:41 -0400
Jagannathan Raman <jag.raman@oracle.com> wrote:
> Assign separate address space for each device in the remote processes.
>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
> include/hw/remote/iommu.h | 18 ++++++++
> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
> MAINTAINERS | 2 +
> hw/remote/meson.build | 1 +
> 4 files changed, 116 insertions(+)
> create mode 100644 include/hw/remote/iommu.h
> create mode 100644 hw/remote/iommu.c
>
> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
> new file mode 100644
> index 0000000000..8f850400f1
> --- /dev/null
> +++ b/include/hw/remote/iommu.h
> @@ -0,0 +1,18 @@
> +/**
> + * Copyright © 2022 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef REMOTE_IOMMU_H
> +#define REMOTE_IOMMU_H
> +
> +#include "hw/pci/pci_bus.h"
> +
> +void remote_configure_iommu(PCIBus *pci_bus);
> +
> +void remote_iommu_del_device(PCIDevice *pci_dev);
> +
> +#endif
> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
> new file mode 100644
> index 0000000000..13f329b45d
> --- /dev/null
> +++ b/hw/remote/iommu.c
> @@ -0,0 +1,95 @@
> +/**
> + * IOMMU for remote device
> + *
> + * Copyright © 2022 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include "hw/remote/iommu.h"
> +#include "hw/pci/pci_bus.h"
> +#include "hw/pci/pci.h"
> +#include "exec/memory.h"
> +#include "exec/address-spaces.h"
> +#include "trace.h"
> +
> +struct RemoteIommuElem {
> + AddressSpace as;
> + MemoryRegion mr;
> +};
> +
> +struct RemoteIommuTable {
> + QemuMutex lock;
> + GHashTable *elem_by_bdf;
> +} remote_iommu_table;
> +
> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
> +
> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
> + void *opaque, int devfn)
> +{
> + struct RemoteIommuTable *iommu_table = opaque;
> + struct RemoteIommuElem *elem = NULL;
> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
> +
> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
> +
> + if (!elem) {
> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
> +
> + elem = g_malloc0(sizeof(struct RemoteIommuElem));
> +
> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
goes here:
memory_region_do_init()
if (!owner) {
owner = container_get(qdev_get_machine(), "/unattached");
}
then
> + address_space_init(&elem->as, &elem->mr, as_name);
> +
> + qemu_mutex_lock(&iommu_table->lock);
> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
> + qemu_mutex_unlock(&iommu_table->lock);
> + }
> +
> + return &elem->as;
> +}
> +
> +static void remote_iommu_del_elem(gpointer data)
> +{
> + struct RemoteIommuElem *elem = data;
> +
> + g_assert(elem);
> +
> + memory_region_unref(&elem->mr);
here we call
object_unref(mr->owner);
leaving dangling pointer in owner '(qdev_get_machine(), "/unattached")'
it doesn't look correct
I thought that memory_region_unref() should be always paired with memory_region_ref()
and looking at memory_region_init(...owner...) history it looks like
owner-less (NULL) regions are not meant to be deleted ever.
> + address_space_destroy(&elem->as);
> +
> + g_free(elem);
> +}
> +
> +void remote_iommu_del_device(PCIDevice *pci_dev)
> +{
> + int pci_bdf;
> +
> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
> + return;
> + }
> +
> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
> +
> + qemu_mutex_lock(&remote_iommu_table.lock);
> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
> + qemu_mutex_unlock(&remote_iommu_table.lock);
> +}
> +
> +void remote_configure_iommu(PCIBus *pci_bus)
> +{
> + if (!remote_iommu_table.elem_by_bdf) {
> + remote_iommu_table.elem_by_bdf =
> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
> + qemu_mutex_init(&remote_iommu_table.lock);
> + }
> +
> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e7b0297a63..21694a9698 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c
> F: include/hw/remote/iohub.h
> F: subprojects/libvfio-user
> F: hw/remote/vfio-user-obj.c
> +F: hw/remote/iommu.c
> +F: include/hw/remote/iommu.h
>
> EBPF:
> M: Jason Wang <jasowang@redhat.com>
> diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> index 534ac5df79..bcef83c8cc 100644
> --- a/hw/remote/meson.build
> +++ b/hw/remote/meson.build
> @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
> +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
>
> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)
> On Apr 13, 2022, at 10:25 AM, Igor Mammedov <imammedo@redhat.com> wrote:
>
> On Fri, 25 Mar 2022 15:19:41 -0400
> Jagannathan Raman <jag.raman@oracle.com> wrote:
>
>> Assign separate address space for each device in the remote processes.
>>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> ---
>> include/hw/remote/iommu.h | 18 ++++++++
>> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
>> MAINTAINERS | 2 +
>> hw/remote/meson.build | 1 +
>> 4 files changed, 116 insertions(+)
>> create mode 100644 include/hw/remote/iommu.h
>> create mode 100644 hw/remote/iommu.c
>>
>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
>> new file mode 100644
>> index 0000000000..8f850400f1
>> --- /dev/null
>> +++ b/include/hw/remote/iommu.h
>> @@ -0,0 +1,18 @@
>> +/**
>> + * Copyright © 2022 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef REMOTE_IOMMU_H
>> +#define REMOTE_IOMMU_H
>> +
>> +#include "hw/pci/pci_bus.h"
>> +
>> +void remote_configure_iommu(PCIBus *pci_bus);
>> +
>> +void remote_iommu_del_device(PCIDevice *pci_dev);
>> +
>> +#endif
>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
>> new file mode 100644
>> index 0000000000..13f329b45d
>> --- /dev/null
>> +++ b/hw/remote/iommu.c
>> @@ -0,0 +1,95 @@
>> +/**
>> + * IOMMU for remote device
>> + *
>> + * Copyright © 2022 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +
>> +#include "hw/remote/iommu.h"
>> +#include "hw/pci/pci_bus.h"
>> +#include "hw/pci/pci.h"
>> +#include "exec/memory.h"
>> +#include "exec/address-spaces.h"
>> +#include "trace.h"
>> +
>> +struct RemoteIommuElem {
>> + AddressSpace as;
>> + MemoryRegion mr;
>> +};
>> +
>> +struct RemoteIommuTable {
>> + QemuMutex lock;
>> + GHashTable *elem_by_bdf;
>> +} remote_iommu_table;
>> +
>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>> +
>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
>> + void *opaque, int devfn)
>> +{
>> + struct RemoteIommuTable *iommu_table = opaque;
>> + struct RemoteIommuElem *elem = NULL;
>> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
>> +
>> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
>> +
>> + if (!elem) {
>> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
>> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
>> +
>> + elem = g_malloc0(sizeof(struct RemoteIommuElem));
>> +
>> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
> goes here:
> memory_region_do_init()
> if (!owner) {
> owner = container_get(qdev_get_machine(), "/unattached");
> }
>
> then
>
>> + address_space_init(&elem->as, &elem->mr, as_name);
>> +
>> + qemu_mutex_lock(&iommu_table->lock);
>> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
>> + qemu_mutex_unlock(&iommu_table->lock);
>> + }
>> +
>> + return &elem->as;
>> +}
>> +
>> +static void remote_iommu_del_elem(gpointer data)
>> +{
>> + struct RemoteIommuElem *elem = data;
>> +
>> + g_assert(elem);
>> +
>> + memory_region_unref(&elem->mr);
>
> here we call
> object_unref(mr->owner);
> leaving dangling pointer in owner '(qdev_get_machine(), "/unattached")'
> it doesn't look correct
>
> I thought that memory_region_unref() should be always paired with memory_region_ref()
>
> and looking at memory_region_init(...owner...) history it looks like
> owner-less (NULL) regions are not meant to be deleted ever.
Hi Igor,
Thanks for the pointers about ref counters for MemoryRegions.
It makes sense - MemoryRegions are not QEMU Objects. So their
owner’s ref counters are used instead. So the expectation is that
when the owner is destroyed, the MemoryRegions initialized by them
also get destroyed simultaneously.
In this case, RemoteIommuElem->mr does not have an owner. Therefore,
we don’t have to manage its ref counter. When RemoteIommuElem is
destroyed, the MemoryRegion should be cleaned up automatically.
--
Jag
>
>> + address_space_destroy(&elem->as);
>> +
>> + g_free(elem);
>> +}
>> +
>> +void remote_iommu_del_device(PCIDevice *pci_dev)
>> +{
>> + int pci_bdf;
>> +
>> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
>> + return;
>> + }
>> +
>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
>> +
>> + qemu_mutex_lock(&remote_iommu_table.lock);
>> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
>> + qemu_mutex_unlock(&remote_iommu_table.lock);
>> +}
>> +
>> +void remote_configure_iommu(PCIBus *pci_bus)
>> +{
>> + if (!remote_iommu_table.elem_by_bdf) {
>> + remote_iommu_table.elem_by_bdf =
>> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
>> + qemu_mutex_init(&remote_iommu_table.lock);
>> + }
>> +
>> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
>> +}
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index e7b0297a63..21694a9698 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c
>> F: include/hw/remote/iohub.h
>> F: subprojects/libvfio-user
>> F: hw/remote/vfio-user-obj.c
>> +F: hw/remote/iommu.c
>> +F: include/hw/remote/iommu.h
>>
>> EBPF:
>> M: Jason Wang <jasowang@redhat.com>
>> diff --git a/hw/remote/meson.build b/hw/remote/meson.build
>> index 534ac5df79..bcef83c8cc 100644
>> --- a/hw/remote/meson.build
>> +++ b/hw/remote/meson.build
>> @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
>> +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
>>
>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)
>
> On Apr 13, 2022, at 2:24 PM, Jag Raman <jag.raman@oracle.com> wrote:
>
>
>
>> On Apr 13, 2022, at 10:25 AM, Igor Mammedov <imammedo@redhat.com> wrote:
>>
>> On Fri, 25 Mar 2022 15:19:41 -0400
>> Jagannathan Raman <jag.raman@oracle.com> wrote:
>>
>>> Assign separate address space for each device in the remote processes.
>>>
>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>> ---
>>> include/hw/remote/iommu.h | 18 ++++++++
>>> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
>>> MAINTAINERS | 2 +
>>> hw/remote/meson.build | 1 +
>>> 4 files changed, 116 insertions(+)
>>> create mode 100644 include/hw/remote/iommu.h
>>> create mode 100644 hw/remote/iommu.c
>>>
>>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
>>> new file mode 100644
>>> index 0000000000..8f850400f1
>>> --- /dev/null
>>> +++ b/include/hw/remote/iommu.h
>>> @@ -0,0 +1,18 @@
>>> +/**
>>> + * Copyright © 2022 Oracle and/or its affiliates.
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#ifndef REMOTE_IOMMU_H
>>> +#define REMOTE_IOMMU_H
>>> +
>>> +#include "hw/pci/pci_bus.h"
>>> +
>>> +void remote_configure_iommu(PCIBus *pci_bus);
>>> +
>>> +void remote_iommu_del_device(PCIDevice *pci_dev);
>>> +
>>> +#endif
>>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
>>> new file mode 100644
>>> index 0000000000..13f329b45d
>>> --- /dev/null
>>> +++ b/hw/remote/iommu.c
>>> @@ -0,0 +1,95 @@
>>> +/**
>>> + * IOMMU for remote device
>>> + *
>>> + * Copyright © 2022 Oracle and/or its affiliates.
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "qemu-common.h"
>>> +
>>> +#include "hw/remote/iommu.h"
>>> +#include "hw/pci/pci_bus.h"
>>> +#include "hw/pci/pci.h"
>>> +#include "exec/memory.h"
>>> +#include "exec/address-spaces.h"
>>> +#include "trace.h"
>>> +
>>> +struct RemoteIommuElem {
>>> + AddressSpace as;
>>> + MemoryRegion mr;
>>> +};
>>> +
>>> +struct RemoteIommuTable {
>>> + QemuMutex lock;
>>> + GHashTable *elem_by_bdf;
>>> +} remote_iommu_table;
>>> +
>>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>>> +
>>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
>>> + void *opaque, int devfn)
>>> +{
>>> + struct RemoteIommuTable *iommu_table = opaque;
>>> + struct RemoteIommuElem *elem = NULL;
>>> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
>>> +
>>> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
>>> +
>>> + if (!elem) {
>>> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
>>> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
>>> +
>>> + elem = g_malloc0(sizeof(struct RemoteIommuElem));
>>> +
>>> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
>> goes here:
>> memory_region_do_init()
>> if (!owner) {
>> owner = container_get(qdev_get_machine(), "/unattached");
>> }
>>
>> then
>>
>>> + address_space_init(&elem->as, &elem->mr, as_name);
>>> +
>>> + qemu_mutex_lock(&iommu_table->lock);
>>> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
>>> + qemu_mutex_unlock(&iommu_table->lock);
>>> + }
>>> +
>>> + return &elem->as;
>>> +}
>>> +
>>> +static void remote_iommu_del_elem(gpointer data)
>>> +{
>>> + struct RemoteIommuElem *elem = data;
>>> +
>>> + g_assert(elem);
>>> +
>>> + memory_region_unref(&elem->mr);
>>
>> here we call
>> object_unref(mr->owner);
>> leaving dangling pointer in owner '(qdev_get_machine(), "/unattached")'
>> it doesn't look correct
>>
>> I thought that memory_region_unref() should be always paired with memory_region_ref()
>>
>> and looking at memory_region_init(...owner...) history it looks like
>> owner-less (NULL) regions are not meant to be deleted ever.
>
> Hi Igor,
>
> Thanks for the pointers about ref counters for MemoryRegions.
>
> It makes sense - MemoryRegions are not QEMU Objects. So their
> owner’s ref counters are used instead. So the expectation is that
> when the owner is destroyed, the MemoryRegions initialized by them
> also get destroyed simultaneously.
Well, MemoryRegions are indeed QEMU objects -
"memory_region_init() -> object_initialize()" initializes the object.
So we should be able to unref the MemoryRegion object directly.
We could make the PCIDevice as the owner of its IOMMU region -
when the device is finalized, its region would be finalized as well.
Given the above, I don’t think we would need a separate delete
function (such as remote_iommu_del_device()). When the device is
unplugged, its MemoryRegion would be finalized automatically.
Thank you!
--
Jag
>
> In this case, RemoteIommuElem->mr does not have an owner. Therefore,
> we don’t have to manage its ref counter. When RemoteIommuElem is
> destroyed, the MemoryRegion should be cleaned up automatically.
>
> --
> Jag
>
>>
>>> + address_space_destroy(&elem->as);
>>> +
>>> + g_free(elem);
>>> +}
>>> +
>>> +void remote_iommu_del_device(PCIDevice *pci_dev)
>>> +{
>>> + int pci_bdf;
>>> +
>>> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
>>> + return;
>>> + }
>>> +
>>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
>>> +
>>> + qemu_mutex_lock(&remote_iommu_table.lock);
>>> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
>>> + qemu_mutex_unlock(&remote_iommu_table.lock);
>>> +}
>>> +
>>> +void remote_configure_iommu(PCIBus *pci_bus)
>>> +{
>>> + if (!remote_iommu_table.elem_by_bdf) {
>>> + remote_iommu_table.elem_by_bdf =
>>> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
>>> + qemu_mutex_init(&remote_iommu_table.lock);
>>> + }
>>> +
>>> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
>>> +}
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index e7b0297a63..21694a9698 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c
>>> F: include/hw/remote/iohub.h
>>> F: subprojects/libvfio-user
>>> F: hw/remote/vfio-user-obj.c
>>> +F: hw/remote/iommu.c
>>> +F: include/hw/remote/iommu.h
>>>
>>> EBPF:
>>> M: Jason Wang <jasowang@redhat.com>
>>> diff --git a/hw/remote/meson.build b/hw/remote/meson.build
>>> index 534ac5df79..bcef83c8cc 100644
>>> --- a/hw/remote/meson.build
>>> +++ b/hw/remote/meson.build
>>> @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
>>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
>>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
>>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
>>> +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
>>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
>>>
>>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)
On Fri, Mar 25, 2022 at 03:19:41PM -0400, Jagannathan Raman wrote:
> Assign separate address space for each device in the remote processes.
>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
> include/hw/remote/iommu.h | 18 ++++++++
> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
> MAINTAINERS | 2 +
> hw/remote/meson.build | 1 +
> 4 files changed, 116 insertions(+)
> create mode 100644 include/hw/remote/iommu.h
> create mode 100644 hw/remote/iommu.c
>
> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
> new file mode 100644
> index 0000000000..8f850400f1
> --- /dev/null
> +++ b/include/hw/remote/iommu.h
> @@ -0,0 +1,18 @@
> +/**
> + * Copyright © 2022 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef REMOTE_IOMMU_H
> +#define REMOTE_IOMMU_H
> +
> +#include "hw/pci/pci_bus.h"
> +
> +void remote_configure_iommu(PCIBus *pci_bus);
> +
> +void remote_iommu_del_device(PCIDevice *pci_dev);
> +
> +#endif
> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
> new file mode 100644
> index 0000000000..13f329b45d
> --- /dev/null
> +++ b/hw/remote/iommu.c
> @@ -0,0 +1,95 @@
> +/**
> + * IOMMU for remote device
> + *
> + * Copyright © 2022 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include "hw/remote/iommu.h"
> +#include "hw/pci/pci_bus.h"
> +#include "hw/pci/pci.h"
> +#include "exec/memory.h"
> +#include "exec/address-spaces.h"
> +#include "trace.h"
> +
> +struct RemoteIommuElem {
> + AddressSpace as;
> + MemoryRegion mr;
> +};
> +
> +struct RemoteIommuTable {
> + QemuMutex lock;
> + GHashTable *elem_by_bdf;
> +} remote_iommu_table;
> +
> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
> +
> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
> + void *opaque, int devfn)
> +{
> + struct RemoteIommuTable *iommu_table = opaque;
> + struct RemoteIommuElem *elem = NULL;
> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
> +
> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
Why is a lock needed around g_hash_table_insert() below but no lock is
held around g_hash_table_lookup()?
Insertion isn't atomic because lookup and insert are separate operations
and they are not done under a single lock.
> +
> + if (!elem) {
> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
> +
> + elem = g_malloc0(sizeof(struct RemoteIommuElem));
> +
> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
> + address_space_init(&elem->as, &elem->mr, as_name);
> +
> + qemu_mutex_lock(&iommu_table->lock);
> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
> + qemu_mutex_unlock(&iommu_table->lock);
> + }
> +
> + return &elem->as;
> +}
> +
> +static void remote_iommu_del_elem(gpointer data)
> +{
> + struct RemoteIommuElem *elem = data;
> +
> + g_assert(elem);
> +
> + memory_region_unref(&elem->mr);
> + address_space_destroy(&elem->as);
> +
> + g_free(elem);
> +}
> +
> +void remote_iommu_del_device(PCIDevice *pci_dev)
> +{
> + int pci_bdf;
> +
> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
> + return;
> + }
> +
> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
> +
> + qemu_mutex_lock(&remote_iommu_table.lock);
> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
> + qemu_mutex_unlock(&remote_iommu_table.lock);
> +}
> +
> +void remote_configure_iommu(PCIBus *pci_bus)
> +{
> + if (!remote_iommu_table.elem_by_bdf) {
> + remote_iommu_table.elem_by_bdf =
> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
> + qemu_mutex_init(&remote_iommu_table.lock);
> + }
> +
> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
Why is remote_iommu_table global? It could be per-PCIBus and indexed by
just devfn instead of the full BDF.
> On Mar 29, 2022, at 8:35 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Fri, Mar 25, 2022 at 03:19:41PM -0400, Jagannathan Raman wrote:
>> Assign separate address space for each device in the remote processes.
>>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> ---
>> include/hw/remote/iommu.h | 18 ++++++++
>> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++
>> MAINTAINERS | 2 +
>> hw/remote/meson.build | 1 +
>> 4 files changed, 116 insertions(+)
>> create mode 100644 include/hw/remote/iommu.h
>> create mode 100644 hw/remote/iommu.c
>>
>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
>> new file mode 100644
>> index 0000000000..8f850400f1
>> --- /dev/null
>> +++ b/include/hw/remote/iommu.h
>> @@ -0,0 +1,18 @@
>> +/**
>> + * Copyright © 2022 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef REMOTE_IOMMU_H
>> +#define REMOTE_IOMMU_H
>> +
>> +#include "hw/pci/pci_bus.h"
>> +
>> +void remote_configure_iommu(PCIBus *pci_bus);
>> +
>> +void remote_iommu_del_device(PCIDevice *pci_dev);
>> +
>> +#endif
>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
>> new file mode 100644
>> index 0000000000..13f329b45d
>> --- /dev/null
>> +++ b/hw/remote/iommu.c
>> @@ -0,0 +1,95 @@
>> +/**
>> + * IOMMU for remote device
>> + *
>> + * Copyright © 2022 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +
>> +#include "hw/remote/iommu.h"
>> +#include "hw/pci/pci_bus.h"
>> +#include "hw/pci/pci.h"
>> +#include "exec/memory.h"
>> +#include "exec/address-spaces.h"
>> +#include "trace.h"
>> +
>> +struct RemoteIommuElem {
>> + AddressSpace as;
>> + MemoryRegion mr;
>> +};
>> +
>> +struct RemoteIommuTable {
>> + QemuMutex lock;
>> + GHashTable *elem_by_bdf;
>> +} remote_iommu_table;
>> +
>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i)
>> +
>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
>> + void *opaque, int devfn)
>> +{
>> + struct RemoteIommuTable *iommu_table = opaque;
>> + struct RemoteIommuElem *elem = NULL;
>> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn);
>> +
>> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf));
>
> Why is a lock needed around g_hash_table_insert() below but no lock is
> held around g_hash_table_lookup()?
>
> Insertion isn't atomic because lookup and insert are separate operations
> and they are not done under a single lock.
Thanks for the catch! The lock should cover lookup also.
>
>> +
>> + if (!elem) {
>> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf);
>> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf);
>> +
>> + elem = g_malloc0(sizeof(struct RemoteIommuElem));
>> +
>> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX);
>> + address_space_init(&elem->as, &elem->mr, as_name);
>> +
>> + qemu_mutex_lock(&iommu_table->lock);
>> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), elem);
>> + qemu_mutex_unlock(&iommu_table->lock);
>> + }
>> +
>> + return &elem->as;
>> +}
>> +
>> +static void remote_iommu_del_elem(gpointer data)
>> +{
>> + struct RemoteIommuElem *elem = data;
>> +
>> + g_assert(elem);
>> +
>> + memory_region_unref(&elem->mr);
>> + address_space_destroy(&elem->as);
>> +
>> + g_free(elem);
>> +}
>> +
>> +void remote_iommu_del_device(PCIDevice *pci_dev)
>> +{
>> + int pci_bdf;
>> +
>> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) {
>> + return;
>> + }
>> +
>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), pci_dev->devfn);
>> +
>> + qemu_mutex_lock(&remote_iommu_table.lock);
>> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, INT2VOIDP(pci_bdf));
>> + qemu_mutex_unlock(&remote_iommu_table.lock);
>> +}
>> +
>> +void remote_configure_iommu(PCIBus *pci_bus)
>> +{
>> + if (!remote_iommu_table.elem_by_bdf) {
>> + remote_iommu_table.elem_by_bdf =
>> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem);
>> + qemu_mutex_init(&remote_iommu_table.lock);
>> + }
>> +
>> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, &remote_iommu_table);
>
> Why is remote_iommu_table global? It could be per-PCIBus and indexed by
> just devfn instead of the full BDF.
It’s global because remote_iommu_del_device() needs it for cleanup.
OK, will make it a per bus property.
Thank you!
--
Jag
© 2016 - 2026 Red Hat, Inc.