[PATCH for-10.0 5/7] hw/riscv/virt.c, riscv-iommu-sys.c: add MSIx support

Daniel Henrique Barboza posted 7 patches 2 weeks, 3 days ago
[PATCH for-10.0 5/7] hw/riscv/virt.c, riscv-iommu-sys.c: add MSIx support
Posted by Daniel Henrique Barboza 2 weeks, 3 days ago
MSIx support is added in the RISC-V IOMMU platform device by including
the required MSIx facilities to alow software to properly setup the MSIx
subsystem.

We took inspiration of what is being done in the riscv-iommu-pci device,
mainly msix_init() and msix_notify(), while keeping in mind that
riscv-iommu-sys isn't a true PCI device and we don't need to copy/paste
all the contents of these MSIx functions.

Two extra MSI MemoryRegions were added: 'msix-table' and 'msix-pba'.
They are used to manage r/w of the MSI table and Pending Bit Array (PBA)
respectively. Both are subregions of the main IOMMU memory region,
iommu->regs_mr, initialized during riscv_iommu_realize(), and each one
has their own handlers for MSIx reads and writes.

This is the expected memory map when using this device in the 'virt'
machine:

    0000000003010000-0000000003010fff (prio 0, i/o): riscv-iommu-regs
      0000000003010300-000000000301034f (prio 0, i/o): msix-table
      0000000003010400-0000000003010407 (prio 0, i/o): msix-pba

We're now able to set IGS to RISCV_IOMMU_CAP_IGS_BOTH, and userspace is
free to decide which interrupt model to use.

Enabling MSIx support for this device in the 'virt' machine requires
adding 'msi-parent' in the iommu-sys DT.

Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
---
 hw/riscv/riscv-iommu-sys.c | 116 +++++++++++++++++++++++++++++++++++--
 hw/riscv/trace-events      |   2 +
 hw/riscv/virt.c            |   6 +-
 3 files changed, 119 insertions(+), 5 deletions(-)

diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c
index 4b82046ce9..a0ef67a20b 100644
--- a/hw/riscv/riscv-iommu-sys.c
+++ b/hw/riscv/riscv-iommu-sys.c
@@ -26,11 +26,15 @@
 #include "qemu/host-utils.h"
 #include "qemu/module.h"
 #include "qom/object.h"
+#include "exec/exec-all.h"
+#include "trace.h"
 
 #include "riscv-iommu.h"
 
 #define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333
 
+#define RISCV_IOMMU_PCI_MSIX_VECTORS 5
+
 /* RISC-V IOMMU System Platform Device Emulation */
 
 struct RISCVIOMMUStateSys {
@@ -39,21 +43,123 @@ struct RISCVIOMMUStateSys {
     uint32_t         base_irq;
     DeviceState      *irqchip;
     RISCVIOMMUState  iommu;
+
+    /* Wired int support */
     qemu_irq         irqs[RISCV_IOMMU_INTR_COUNT];
+
+    /* Memory Regions for MSIX table and pending bit entries. */
+    MemoryRegion msix_table_mmio;
+    MemoryRegion msix_pba_mmio;
+    uint8_t *msix_table;
+    uint8_t *msix_pba;
+};
+
+static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
+                                     unsigned size)
+{
+    RISCVIOMMUStateSys *s = opaque;
+
+    g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
+    return pci_get_long(s->msix_table + addr);
+}
+
+static void msix_table_mmio_write(void *opaque, hwaddr addr,
+                                  uint64_t val, unsigned size)
+{
+    RISCVIOMMUStateSys *s = opaque;
+
+    g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
+    pci_set_long(s->msix_table + addr, val);
+}
+
+static const MemoryRegionOps msix_table_mmio_ops = {
+    .read = msix_table_mmio_read,
+    .write = msix_table_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
+                                   unsigned size)
+{
+    RISCVIOMMUStateSys *s = opaque;
+
+    return pci_get_long(s->msix_pba + addr);
+}
+
+static void msix_pba_mmio_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps msix_pba_mmio_ops = {
+    .read = msix_pba_mmio_read,
+    .write = msix_pba_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .max_access_size = 4,
+    },
 };
 
+static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s,
+                                        uint32_t n_vectors)
+{
+    RISCVIOMMUState *iommu = &s->iommu;
+    uint32_t table_size = table_size = n_vectors * PCI_MSIX_ENTRY_SIZE;
+    uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG;
+    uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8;
+    uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256;
+
+    s->msix_table = g_malloc0(table_size);
+    s->msix_pba = g_malloc0(pba_size);
+
+    memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops,
+                          s, "msix-table", table_size);
+    memory_region_add_subregion(&iommu->regs_mr, table_offset,
+                                &s->msix_table_mmio);
+
+    memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s,
+                          "msix-pba", pba_size);
+    memory_region_add_subregion(&iommu->regs_mr, pba_offset,
+                                &s->msix_pba_mmio);
+}
+
+static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s,
+                                        uint32_t vector)
+{
+    uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
+    uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
+    uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
+    MemTxResult result;
+
+    address_space_stl_le(&address_space_memory, msi_addr,
+                         msi_data, MEMTXATTRS_UNSPECIFIED, &result);
+    trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result);
+}
+
 static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu,
                                       unsigned vector)
 {
     RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu);
     uint32_t fctl =  riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL);
 
-    /* We do not support MSIs yet */
-    if (!(fctl & RISCV_IOMMU_FCTL_WSI)) {
+    if (fctl & RISCV_IOMMU_FCTL_WSI) {
+        qemu_irq_pulse(s->irqs[vector]);
+        trace_riscv_iommu_sys_irq_sent(vector);
         return;
     }
 
-    qemu_irq_pulse(s->irqs[vector]);
+    riscv_iommu_sysdev_send_MSI(s, vector);
 }
 
 static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp)
@@ -82,6 +188,8 @@ static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp)
         irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i);
         sysbus_connect_irq(sysdev, i, irq);
     }
+
+    riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS);
 }
 
 static void riscv_iommu_sys_init(Object *obj)
@@ -93,7 +201,7 @@ static void riscv_iommu_sys_init(Object *obj)
     qdev_alias_all_properties(DEVICE(iommu), obj);
 
     iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS;
-    riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_WSI);
+    riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH);
 }
 
 static Property riscv_iommu_sys_properties[] = {
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
index 0527c56c91..94facbb8b1 100644
--- a/hw/riscv/trace-events
+++ b/hw/riscv/trace-events
@@ -15,3 +15,5 @@ riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0
 riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64
 riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate"
 riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response"
+riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u"
+riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%lx msi_data 0x%x result %u"
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 23d1380b86..281fc65cc6 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1043,6 +1043,7 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
 }
 
 static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
+                                 uint32_t msi_phandle,
                                  uint32_t *iommu_sys_phandle)
 {
     const char comp[] = "riscv,iommu";
@@ -1077,6 +1078,8 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
         iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW,
         iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW);
 
+    qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle);
+
     *iommu_sys_phandle = iommu_phandle;
 }
 
@@ -1117,7 +1120,8 @@ static void finalize_fdt(RISCVVirtState *s)
     create_fdt_virtio(s, virt_memmap, irq_virtio_phandle);
 
     if (virt_is_iommu_sys_enabled(s)) {
-        create_fdt_iommu_sys(s, irq_mmio_phandle, &iommu_sys_phandle);
+        create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle,
+                             &iommu_sys_phandle);
     }
     create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle,
                     iommu_sys_phandle);
-- 
2.45.2
Re: [PATCH for-10.0 5/7] hw/riscv/virt.c, riscv-iommu-sys.c: add MSIx support
Posted by Alistair Francis 4 days, 18 hours ago
On Wed, Nov 6, 2024 at 11:36 PM Daniel Henrique Barboza
<dbarboza@ventanamicro.com> wrote:
>
> MSIx support is added in the RISC-V IOMMU platform device by including
> the required MSIx facilities to alow software to properly setup the MSIx
> subsystem.
>
> We took inspiration of what is being done in the riscv-iommu-pci device,
> mainly msix_init() and msix_notify(), while keeping in mind that
> riscv-iommu-sys isn't a true PCI device and we don't need to copy/paste
> all the contents of these MSIx functions.
>
> Two extra MSI MemoryRegions were added: 'msix-table' and 'msix-pba'.
> They are used to manage r/w of the MSI table and Pending Bit Array (PBA)
> respectively. Both are subregions of the main IOMMU memory region,
> iommu->regs_mr, initialized during riscv_iommu_realize(), and each one
> has their own handlers for MSIx reads and writes.
>
> This is the expected memory map when using this device in the 'virt'
> machine:
>
>     0000000003010000-0000000003010fff (prio 0, i/o): riscv-iommu-regs
>       0000000003010300-000000000301034f (prio 0, i/o): msix-table
>       0000000003010400-0000000003010407 (prio 0, i/o): msix-pba
>
> We're now able to set IGS to RISCV_IOMMU_CAP_IGS_BOTH, and userspace is
> free to decide which interrupt model to use.
>
> Enabling MSIx support for this device in the 'virt' machine requires
> adding 'msi-parent' in the iommu-sys DT.
>
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  hw/riscv/riscv-iommu-sys.c | 116 +++++++++++++++++++++++++++++++++++--
>  hw/riscv/trace-events      |   2 +
>  hw/riscv/virt.c            |   6 +-
>  3 files changed, 119 insertions(+), 5 deletions(-)
>
> diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c
> index 4b82046ce9..a0ef67a20b 100644
> --- a/hw/riscv/riscv-iommu-sys.c
> +++ b/hw/riscv/riscv-iommu-sys.c
> @@ -26,11 +26,15 @@
>  #include "qemu/host-utils.h"
>  #include "qemu/module.h"
>  #include "qom/object.h"
> +#include "exec/exec-all.h"
> +#include "trace.h"
>
>  #include "riscv-iommu.h"
>
>  #define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333
>
> +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5
> +
>  /* RISC-V IOMMU System Platform Device Emulation */
>
>  struct RISCVIOMMUStateSys {
> @@ -39,21 +43,123 @@ struct RISCVIOMMUStateSys {
>      uint32_t         base_irq;
>      DeviceState      *irqchip;
>      RISCVIOMMUState  iommu;
> +
> +    /* Wired int support */
>      qemu_irq         irqs[RISCV_IOMMU_INTR_COUNT];
> +
> +    /* Memory Regions for MSIX table and pending bit entries. */
> +    MemoryRegion msix_table_mmio;
> +    MemoryRegion msix_pba_mmio;
> +    uint8_t *msix_table;
> +    uint8_t *msix_pba;
> +};
> +
> +static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
> +                                     unsigned size)
> +{
> +    RISCVIOMMUStateSys *s = opaque;
> +
> +    g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
> +    return pci_get_long(s->msix_table + addr);
> +}
> +
> +static void msix_table_mmio_write(void *opaque, hwaddr addr,
> +                                  uint64_t val, unsigned size)
> +{
> +    RISCVIOMMUStateSys *s = opaque;
> +
> +    g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
> +    pci_set_long(s->msix_table + addr, val);
> +}
> +
> +static const MemoryRegionOps msix_table_mmio_ops = {
> +    .read = msix_table_mmio_read,
> +    .write = msix_table_mmio_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
> +                                   unsigned size)
> +{
> +    RISCVIOMMUStateSys *s = opaque;
> +
> +    return pci_get_long(s->msix_pba + addr);
> +}
> +
> +static void msix_pba_mmio_write(void *opaque, hwaddr addr,
> +                                uint64_t val, unsigned size)
> +{
> +}
> +
> +static const MemoryRegionOps msix_pba_mmio_ops = {
> +    .read = msix_pba_mmio_read,
> +    .write = msix_pba_mmio_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .max_access_size = 4,
> +    },
>  };
>
> +static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s,
> +                                        uint32_t n_vectors)
> +{
> +    RISCVIOMMUState *iommu = &s->iommu;
> +    uint32_t table_size = table_size = n_vectors * PCI_MSIX_ENTRY_SIZE;
> +    uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG;
> +    uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8;
> +    uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256;
> +
> +    s->msix_table = g_malloc0(table_size);
> +    s->msix_pba = g_malloc0(pba_size);
> +
> +    memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops,
> +                          s, "msix-table", table_size);
> +    memory_region_add_subregion(&iommu->regs_mr, table_offset,
> +                                &s->msix_table_mmio);
> +
> +    memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s,
> +                          "msix-pba", pba_size);
> +    memory_region_add_subregion(&iommu->regs_mr, pba_offset,
> +                                &s->msix_pba_mmio);
> +}
> +
> +static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s,
> +                                        uint32_t vector)
> +{
> +    uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
> +    uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
> +    uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
> +    MemTxResult result;
> +
> +    address_space_stl_le(&address_space_memory, msi_addr,
> +                         msi_data, MEMTXATTRS_UNSPECIFIED, &result);
> +    trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result);
> +}
> +
>  static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu,
>                                        unsigned vector)
>  {
>      RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu);
>      uint32_t fctl =  riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL);
>
> -    /* We do not support MSIs yet */
> -    if (!(fctl & RISCV_IOMMU_FCTL_WSI)) {
> +    if (fctl & RISCV_IOMMU_FCTL_WSI) {
> +        qemu_irq_pulse(s->irqs[vector]);
> +        trace_riscv_iommu_sys_irq_sent(vector);
>          return;
>      }
>
> -    qemu_irq_pulse(s->irqs[vector]);
> +    riscv_iommu_sysdev_send_MSI(s, vector);
>  }
>
>  static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp)
> @@ -82,6 +188,8 @@ static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp)
>          irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i);
>          sysbus_connect_irq(sysdev, i, irq);
>      }
> +
> +    riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS);
>  }
>
>  static void riscv_iommu_sys_init(Object *obj)
> @@ -93,7 +201,7 @@ static void riscv_iommu_sys_init(Object *obj)
>      qdev_alias_all_properties(DEVICE(iommu), obj);
>
>      iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS;
> -    riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_WSI);
> +    riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH);
>  }
>
>  static Property riscv_iommu_sys_properties[] = {
> diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
> index 0527c56c91..94facbb8b1 100644
> --- a/hw/riscv/trace-events
> +++ b/hw/riscv/trace-events
> @@ -15,3 +15,5 @@ riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0
>  riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64
>  riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate"
>  riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response"
> +riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u"
> +riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%lx msi_data 0x%x result %u"
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 23d1380b86..281fc65cc6 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1043,6 +1043,7 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
>  }
>
>  static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
> +                                 uint32_t msi_phandle,
>                                   uint32_t *iommu_sys_phandle)
>  {
>      const char comp[] = "riscv,iommu";
> @@ -1077,6 +1078,8 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
>          iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW,
>          iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW);
>
> +    qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle);
> +
>      *iommu_sys_phandle = iommu_phandle;
>  }
>
> @@ -1117,7 +1120,8 @@ static void finalize_fdt(RISCVVirtState *s)
>      create_fdt_virtio(s, virt_memmap, irq_virtio_phandle);
>
>      if (virt_is_iommu_sys_enabled(s)) {
> -        create_fdt_iommu_sys(s, irq_mmio_phandle, &iommu_sys_phandle);
> +        create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle,
> +                             &iommu_sys_phandle);
>      }
>      create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle,
>                      iommu_sys_phandle);
> --
> 2.45.2
>
>