From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
"Set" function tracks VFIO devices in the hash table. This is useful when
looking up per-device host IOMMU information later on.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
hw/i386/amd_iommu.c | 71 +++++++++++++++++++++++++++++++++++++++++++++
hw/i386/amd_iommu.h | 8 +++++
2 files changed, 79 insertions(+)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 378e0cb55eab..8b146f4d33d2 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -382,6 +382,22 @@ static guint amdvi_uint64_hash(gconstpointer v)
return (guint)*(const uint64_t *)v;
}
+static guint amdvi_dte_hash(gconstpointer v)
+{
+ const struct AMDVI_dte_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
+}
+
+static gboolean amdvi_dte_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct AMDVI_dte_key *key1 = v1;
+ const struct AMDVI_dte_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
@@ -2291,8 +2307,60 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &iommu_as[devfn]->as;
}
+static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ AMDVIState *s = opaque;
+ struct AMDVI_dte_key *new_key;
+ struct AMDVI_dte_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ assert(hiod);
+ assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+ if (g_hash_table_lookup(s->hiod_hash, &key)) {
+ error_setg(errp, "Host IOMMU device already exist");
+ return false;
+ }
+
+ if (hiod->caps.type != IOMMU_HW_INFO_TYPE_AMD &&
+ hiod->caps.type != IOMMU_HW_INFO_TYPE_DEFAULT) {
+ error_setg(errp, "IOMMU hardware is not compatible");
+ return false;
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(s->hiod_hash, new_key, hiod);
+
+ return true;
+}
+
+static void amdvi_unset_iommu_device(PCIBus *bus, void *opaque,
+ int devfn)
+{
+ AMDVIState *s = opaque;
+ struct AMDVI_dte_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ if (!g_hash_table_lookup(s->hiod_hash, &key)) {
+ return;
+ }
+
+ g_hash_table_remove(s->hiod_hash, &key);
+}
+
static const PCIIOMMUOps amdvi_iommu_ops = {
.get_address_space = amdvi_host_dma_iommu,
+ .set_iommu_device = amdvi_set_iommu_device,
+ .unset_iommu_device = amdvi_unset_iommu_device,
};
static const MemoryRegionOps mmio_mem_ops = {
@@ -2510,6 +2578,9 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
amdvi_uint64_equal, g_free, g_free);
+ s->hiod_hash = g_hash_table_new_full(amdvi_dte_hash,
+ amdvi_dte_equal, g_free, g_free);
+
/* set up MMIO */
memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
"amdvi-mmio", AMDVI_MMIO_SIZE);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index daf82fc85f96..e6f6902fe06d 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -358,6 +358,11 @@ struct AMDVIPCIState {
uint32_t capab_offset; /* capability offset pointer */
};
+struct AMDVI_dte_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
struct AMDVIState {
X86IOMMUState iommu; /* IOMMU bus device */
AMDVIPCIState *pci; /* IOMMU PCI device */
@@ -416,6 +421,9 @@ struct AMDVIState {
/* IOTLB */
GHashTable *iotlb;
+ /* HostIOMMUDevice hash table*/
+ GHashTable *hiod_hash;
+
/* Interrupt remapping */
bool ga_enabled;
bool xtsup;
--
2.34.1
Extended feature register 2 (EFR2) exposes newer IOMMU features such as
NUM_INT_REMAP_SUP. Set MMIO offset 0x01A0 and ACPI table entry to EFR2.
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
hw/i386/acpi-build.c | 4 +++-
hw/i386/amd_iommu-stub.c | 5 +++++
hw/i386/amd_iommu.c | 20 +++++++++++++++++---
hw/i386/amd_iommu.h | 4 ++++
4 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9446a9f862ca..1d4fd064e9a5 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1873,7 +1873,9 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
amdvi_extended_feature_register(s),
8);
/* EFR Register Image 2 */
- build_append_int_noprefix(table_data, 0, 8);
+ build_append_int_noprefix(table_data,
+ amdvi_extended_feature_register2(s),
+ 8);
/* IVHD entries as found above */
g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
diff --git a/hw/i386/amd_iommu-stub.c b/hw/i386/amd_iommu-stub.c
index d62a3732e60f..39b1afc0c751 100644
--- a/hw/i386/amd_iommu-stub.c
+++ b/hw/i386/amd_iommu-stub.c
@@ -24,3 +24,8 @@ uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
return AMDVI_DEFAULT_EXT_FEATURES;
}
+
+uint64_t amdvi_extended_feature_register2(AMDVIState *s)
+{
+ return AMDVI_DEFAULT_EXT_FEATURES2;
+}
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 8b146f4d33d2..3221bf5a0303 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -114,6 +114,11 @@ uint64_t amdvi_extended_feature_register(AMDVIState *s)
return feature;
}
+uint64_t amdvi_extended_feature_register2(AMDVIState *s)
+{
+ return AMDVI_DEFAULT_EXT_FEATURES2;
+}
+
/* configure MMIO registers at startup/reset */
static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
uint64_t romask, uint64_t w1cmask)
@@ -123,6 +128,16 @@ static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
stq_le_p(&s->w1cmask[addr], w1cmask);
}
+static void amdvi_refresh_efrs(struct AMDVIState *s)
+{
+ amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
+ amdvi_extended_feature_register(s),
+ 0xffffffffffffffef, 0);
+ amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES2,
+ amdvi_extended_feature_register2(s),
+ 0xffffffffffffffff, 0);
+}
+
static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
{
return lduw_le_p(&s->mmior[addr]);
@@ -2307,6 +2322,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &iommu_as[devfn]->as;
}
+
static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
HostIOMMUDevice *hiod, Error **errp)
{
@@ -2434,9 +2450,7 @@ static void amdvi_init(AMDVIState *s)
/* reset MMIO */
memset(s->mmior, 0, AMDVI_MMIO_SIZE);
- amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES,
- amdvi_extended_feature_register(s),
- 0xffffffffffffffef, 0);
+ amdvi_refresh_efrs(s);
amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
}
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index e6f6902fe06d..c8eaf229b50e 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -57,6 +57,7 @@
#define AMDVI_MMIO_EXCL_BASE 0x0020
#define AMDVI_MMIO_EXCL_LIMIT 0x0028
#define AMDVI_MMIO_EXT_FEATURES 0x0030
+#define AMDVI_MMIO_EXT_FEATURES2 0x01A0
#define AMDVI_MMIO_COMMAND_HEAD 0x2000
#define AMDVI_MMIO_COMMAND_TAIL 0x2008
#define AMDVI_MMIO_EVENT_HEAD 0x2010
@@ -229,6 +230,8 @@
AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
+#define AMDVI_DEFAULT_EXT_FEATURES2 (0)
+
/* capabilities header */
#define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
AMDVI_CAPAB_FLAG_NPCACHE | AMDVI_CAPAB_FLAG_IOTLBSUP \
@@ -433,5 +436,6 @@ struct AMDVIState {
};
uint64_t amdvi_extended_feature_register(AMDVIState *s);
+uint64_t amdvi_extended_feature_register2(AMDVIState *s);
#endif
--
2.34.1
AMD IOMMU supports upto 2048 MSIs for a single device function
when NUM_INT_REMAP_SUP Extended-Feature-Register-2 bit is set to one.
Software can enable this feature by writing one to NUM_INT_REMAP_MODE
in the control register. MSI address destination mode (DM) bit decides
how many MSI data bits are used by IOMMU to index into IRT. When DM = 0,
IOMMU uses bits 8:0 (max 512) for the index, otherwise (DM = 1)
IOMMU uses bits 10:0 (max 2048) for IRT index.
This feature can be enabled with flag `numint2k=on`. In case of
passhthrough devices viommu uses control register provided by vendor
capabilites to determine if host IOMMU has enabled 2048 MSIs. If host
IOMMU has not enabled it then the guest feature is disabled.
example command line
'''
-object iommufd,id=fd0 \
-device amd_iommu,dma-remap=on,numint2k=on \
-device vfio-host,host=<DEVID>,iommufd=fd0 \
'''
NOTE: In case of legacy VFIO container the guest will always fall back
to 512 MSIs.
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
hw/i386/amd_iommu.c | 74 ++++++++++++++++++++++++++++++++++++++++-----
hw/i386/amd_iommu.h | 12 ++++++++
2 files changed, 79 insertions(+), 7 deletions(-)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 3221bf5a0303..4f62c4ee3671 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -116,7 +116,12 @@ uint64_t amdvi_extended_feature_register(AMDVIState *s)
uint64_t amdvi_extended_feature_register2(AMDVIState *s)
{
- return AMDVI_DEFAULT_EXT_FEATURES2;
+ uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES2;
+ if (s->num_int_sup_2k) {
+ feature |= AMDVI_FEATURE_NUM_INT_REMAP_SUP;
+ }
+
+ return feature;
}
/* configure MMIO registers at startup/reset */
@@ -1538,6 +1543,9 @@ static void amdvi_handle_control_write(AMDVIState *s)
AMDVI_MMIO_CONTROL_CMDBUFLEN);
s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
+ s->num_int_enabled = (control >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT) &
+ AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK;
+
/* update the flags depending on the control register */
if (s->cmdbuf_enabled) {
amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
@@ -2119,6 +2127,25 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
* (page 5)
*/
delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
+ /*
+ * The MSI address register bit[2] is used to get the destination
+ * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
+ * and when IOMMU supports upto 2048 interrupts.
+ */
+ dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
+
+ if (dest_mode &&
+ iommu->num_int_enabled == AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K) {
+
+ trace_amdvi_ir_delivery_mode("2K interrupt mode");
+ ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
+ if (ret < 0) {
+ goto remap_fail;
+ }
+ /* Translate IRQ to MSI messages */
+ x86_iommu_irq_to_msi_message(&irq, translated);
+ goto out;
+ }
switch (delivery_mode) {
case AMDVI_IOAPIC_INT_TYPE_FIXED:
@@ -2159,12 +2186,6 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
goto remap_fail;
}
- /*
- * The MSI address register bit[2] is used to get the destination
- * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
- * only.
- */
- dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
if (dest_mode) {
trace_amdvi_ir_err("invalid dest_mode");
ret = -AMDVI_IR_ERR;
@@ -2322,6 +2343,30 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &iommu_as[devfn]->as;
}
+static void amdvi_refresh_efrs_hwinfo(struct AMDVIState *s,
+ struct iommu_hw_info_amd *hwinfo)
+{
+ /* Check if host OS has enabled 2K interrupts */
+ bool hwinfo_ctrl_2k;
+
+ if (s->num_int_sup_2k && !hwinfo) {
+ warn_report("AMDVI: Disabling 2048 MSI for guest, "
+ "use IOMMUFD for device passthrough to support it");
+ s->num_int_sup_2k = 0;
+ }
+
+ hwinfo_ctrl_2k = ((hwinfo->control_register
+ >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT)
+ & AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K);
+
+ if (s->num_int_sup_2k && !hwinfo_ctrl_2k) {
+ warn_report("AMDVI: Disabling 2048 MSIs for guest, "
+ "as host kernel does not support this feature");
+ s->num_int_sup_2k = 0;
+ }
+
+ amdvi_refresh_efrs(s);
+}
static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
HostIOMMUDevice *hiod, Error **errp)
@@ -2354,6 +2399,20 @@ static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
object_ref(hiod);
g_hash_table_insert(s->hiod_hash, new_key, hiod);
+ if (hiod->caps.type == IOMMU_HW_INFO_TYPE_AMD) {
+ /*
+ * Refresh the MMIO efr registers so that changes are visible to the
+ * guest.
+ */
+ amdvi_refresh_efrs_hwinfo(s, &hiod->caps.vendor_caps.amd);
+ } else {
+ /*
+ * Pass NULL hardware registers when we have non-IOMMUFD
+ * passthrough device
+ */
+ amdvi_refresh_efrs_hwinfo(s, NULL);
+ }
+
return true;
}
@@ -2641,6 +2700,7 @@ static const Property amdvi_properties[] = {
DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
DEFINE_PROP_BOOL("dma-remap", AMDVIState, dma_remap, false),
+ DEFINE_PROP_BOOL("numint2k", AMDVIState, num_int_sup_2k, false),
};
static const VMStateDescription vmstate_amdvi_sysbus = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index c8eaf229b50e..588725fe0c25 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -107,6 +107,9 @@
#define AMDVI_MMIO_CONTROL_COMWAITINTEN (1ULL << 4)
#define AMDVI_MMIO_CONTROL_CMDBUFLEN (1ULL << 12)
#define AMDVI_MMIO_CONTROL_GAEN (1ULL << 17)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK (0x3)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT (43)
+#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K (0x1)
/* MMIO status register bits */
#define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4)
@@ -160,6 +163,7 @@
#define AMDVI_PERM_READ (1 << 0)
#define AMDVI_PERM_WRITE (1 << 1)
+/* EFR */
#define AMDVI_FEATURE_PREFETCH (1ULL << 0) /* page prefetch */
#define AMDVI_FEATURE_PPR (1ULL << 1) /* PPR Support */
#define AMDVI_FEATURE_XT (1ULL << 2) /* x2APIC Support */
@@ -169,6 +173,9 @@
#define AMDVI_FEATURE_HE (1ULL << 8) /* hardware error regs */
#define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */
+/* EFR2 */
+#define AMDVI_FEATURE_NUM_INT_REMAP_SUP (1ULL << 8) /* 2K int support */
+
/* reserved DTE bits */
#define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63))
#define AMDVI_DTE_QUAD1_RESERVED 0
@@ -380,6 +387,8 @@ struct AMDVIState {
bool evtlog_enabled; /* event log enabled */
bool excl_enabled;
+ uint8_t num_int_enabled;
+
hwaddr devtab; /* base address device table */
uint64_t devtab_len; /* device table length */
@@ -433,6 +442,9 @@ struct AMDVIState {
/* DMA address translation */
bool dma_remap;
+
+ /* upto 2048 interrupt support */
+ bool num_int_sup_2k;
};
uint64_t amdvi_extended_feature_register(AMDVIState *s);
--
2.34.1
© 2016 - 2025 Red Hat, Inc.