Physical AMD IOMMU supports upto 64 bits of DMA address. When device tries
to read or write from the given DMA address, IOMMU translates the address
using page table assigned to that device. Since IOMMU uses per device page
tables, the emulated IOMMU should use the cache tag of 68 bits
(64 bit address - 12 bit page alignment + 16 device ID).
Current emulated AMD IOMMU uses GLib hash table to create software iotlb
and uses 64 bit key to store the IOVA and deviceID, which limits the IOVA
to 60 bits. This cause failure while setting up the device when guest is
booted with "iommu.forcedac=1".
To solve this problem, define `struct amdvi_iotlb_key` which uses 64 bit
IOVA and 16 bit devid as key to store and lookup IOTLB entry.
Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
hw/i386/amd_iommu.c | 51 ++++++++++++++++++++++++++++-----------------
hw/i386/amd_iommu.h | 5 +++--
2 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 0a4b4d46d885..5106d9cc4036 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -106,6 +106,11 @@ typedef struct amdvi_as_key {
int devfn;
} amdvi_as_key;
+typedef struct amdvi_iotlb_key {
+ uint64_t gfn;
+ uint16_t devid;
+} amdvi_iotlb_key;
+
uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
@@ -377,16 +382,6 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
PCI_STATUS_SIG_TARGET_ABORT);
}
-static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
-{
- return *((const uint64_t *)v1) == *((const uint64_t *)v2);
-}
-
-static guint amdvi_uint64_hash(gconstpointer v)
-{
- return (guint)*(const uint64_t *)v;
-}
-
static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
{
const struct amdvi_as_key *key1 = v1;
@@ -423,11 +418,27 @@ static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
amdvi_find_as_by_devid, &devid);
}
+static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2)
+{
+ const amdvi_iotlb_key *key1 = v1;
+ const amdvi_iotlb_key *key2 = v2;
+
+ return key1->devid == key2->devid && key1->gfn == key2->gfn;
+}
+
+static guint amdvi_iotlb_hash(gconstpointer v)
+{
+ const amdvi_iotlb_key *key = v;
+ /* Use GPA and DEVID to find the bucket */
+ return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K |
+ (key->devid & ~AMDVI_PAGE_MASK_4K));
+}
+
+
static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
- uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
- ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
return g_hash_table_lookup(s->iotlb, &key);
}
@@ -449,8 +460,7 @@ static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
uint64_t devid)
{
- uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
- ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
g_hash_table_remove(s->iotlb, &key);
}
@@ -461,8 +471,10 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
/* don't cache erroneous translations */
if (to_cache.perm != IOMMU_NONE) {
AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
- uint64_t *key = g_new(uint64_t, 1);
- uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+ amdvi_iotlb_key *key = g_new(amdvi_iotlb_key, 1);
+
+ key->gfn = AMDVI_GET_IOTLB_GFN(gpa);
+ key->devid = devid;
trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
PCI_FUNC(devid), gpa, to_cache.translated_addr);
@@ -475,7 +487,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
entry->perms = to_cache.perm;
entry->translated_addr = to_cache.translated_addr;
entry->page_mask = to_cache.addr_mask;
- *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ entry->devid = devid;
+
g_hash_table_replace(s->iotlb, key, entry);
}
}
@@ -2529,8 +2542,8 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
}
}
- s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
- amdvi_uint64_equal, g_free, g_free);
+ s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash,
+ amdvi_iotlb_equal, g_free, g_free);
s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
amdvi_as_equal, g_free, g_free);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 38471b95d153..8089f9472ac4 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -220,8 +220,9 @@
#define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9))
/* IOTLB */
-#define AMDVI_IOTLB_MAX_SIZE 1024
-#define AMDVI_DEVID_SHIFT 36
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_IOTLB_DEVID_SHIFT 48
+#define AMDVI_GET_IOTLB_GFN(addr) (addr >> AMDVI_PAGE_SHIFT_4K)
/* default extended feature */
#define AMDVI_DEFAULT_EXT_FEATURES \
--
2.34.1
Hi Sairaj,
On 10/8/25 12:43 PM, Sairaj Kodilkar wrote:
> Physical AMD IOMMU supports upto 64 bits of DMA address. When device tries
s/upto/up to/ and "a device"
> to read or write from the given DMA address, IOMMU translates the address
"a given DMA address"
> using page table assigned to that device. Since IOMMU uses per device page
> tables, the emulated IOMMU should use the cache tag of 68 bits
> (64 bit address - 12 bit page alignment + 16 device ID).
>
> Current emulated AMD IOMMU uses GLib hash table to create software iotlb
> and uses 64 bit key to store the IOVA and deviceID, which limits the IOVA
> to 60 bits. This cause failure while setting up the device when guest is
> booted with "iommu.forcedac=1".
>
> To solve this problem, define `struct amdvi_iotlb_key` which uses 64 bit
> IOVA and 16 bit devid as key to store and lookup IOTLB entry.
>
I wouldn't necessarily mention and quote the structure name since that
is an implementation detail and it might change in the future.
Also, the current implementation also combines a 64-bit IOVA
(technically a 52bit gfn) with a 16-bit devid, the real change in this
patch is in how those same values are being shifted to construct a hash
key that avoids truncation as much as possible. So I'd reword the commit
message to highlight that.
> Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU")
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
> ---
> hw/i386/amd_iommu.c | 51 ++++++++++++++++++++++++++++-----------------
> hw/i386/amd_iommu.h | 5 +++--
> 2 files changed, 35 insertions(+), 21 deletions(-)
>
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index 0a4b4d46d885..5106d9cc4036 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -106,6 +106,11 @@ typedef struct amdvi_as_key {
> int devfn;
> } amdvi_as_key;
>
> +typedef struct amdvi_iotlb_key {
> + uint64_t gfn;
> + uint16_t devid;
> +} amdvi_iotlb_key;
> +
> uint64_t amdvi_extended_feature_register(AMDVIState *s)
> {
> uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
> @@ -377,16 +382,6 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
> PCI_STATUS_SIG_TARGET_ABORT);
> }
>
> -static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
> -{
> - return *((const uint64_t *)v1) == *((const uint64_t *)v2);
> -}
> -
> -static guint amdvi_uint64_hash(gconstpointer v)
> -{
> - return (guint)*(const uint64_t *)v;
> -}
> -
> static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
> {
> const struct amdvi_as_key *key1 = v1;
> @@ -423,11 +418,27 @@ static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
> amdvi_find_as_by_devid, &devid);
> }
>
> +static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2)
> +{
> + const amdvi_iotlb_key *key1 = v1;
> + const amdvi_iotlb_key *key2 = v2;
> +
> + return key1->devid == key2->devid && key1->gfn == key2->gfn;
> +}
> +
> +static guint amdvi_iotlb_hash(gconstpointer v)
> +{
> + const amdvi_iotlb_key *key = v;
> + /* Use GPA and DEVID to find the bucket */
> + return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K |
> + (key->devid & ~AMDVI_PAGE_MASK_4K));
> +}
> +
> +
> static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
> uint64_t devid)
> {
> - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
> - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
> + amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
This line initializes the key fields with the opposite of the intended
values. Please use this initialization style instead to prevent these
types of errors, plus it makes the definitions more readable:
- amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
+ amdvi_iotlb_key key = {
+ .gfn = AMDVI_GET_IOTLB_GFN(addr),
+ .devid = devid,
+ };
> return g_hash_table_lookup(s->iotlb, &key);
> }
>
> @@ -449,8 +460,7 @@ static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
> static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
> uint64_t devid)
> {
> - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
> - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
> + amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
Same as above, key fields are initialized in incorrect order. Same easy
fix by using designated initializers.
> g_hash_table_remove(s->iotlb, &key);
> }
>
> @@ -461,8 +471,10 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
> /* don't cache erroneous translations */
> if (to_cache.perm != IOMMU_NONE) {
> AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
> - uint64_t *key = g_new(uint64_t, 1);
> - uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
> + amdvi_iotlb_key *key = g_new(amdvi_iotlb_key, 1);
> +
> + key->gfn = AMDVI_GET_IOTLB_GFN(gpa);
> + key->devid = devid;
>
> trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
> PCI_FUNC(devid), gpa, to_cache.translated_addr);
> @@ -475,7 +487,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
> entry->perms = to_cache.perm;
> entry->translated_addr = to_cache.translated_addr;
> entry->page_mask = to_cache.addr_mask;
> - *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
> + entry->devid = devid;
> +
> g_hash_table_replace(s->iotlb, key, entry);
> }
> }
> @@ -2529,8 +2542,8 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
> }
> }
>
> - s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
> - amdvi_uint64_equal, g_free, g_free);
> + s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash,
> + amdvi_iotlb_equal, g_free, g_free);
>
> s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
> amdvi_as_equal, g_free, g_free);
> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> index 38471b95d153..8089f9472ac4 100644
> --- a/hw/i386/amd_iommu.h
> +++ b/hw/i386/amd_iommu.h
> @@ -220,8 +220,9 @@
> #define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9))
>
> /* IOTLB */
> -#define AMDVI_IOTLB_MAX_SIZE 1024
> -#define AMDVI_DEVID_SHIFT 36
> +#define AMDVI_IOTLB_MAX_SIZE 1024
> +#define AMDVI_IOTLB_DEVID_SHIFT 48
Remove AMDVI_IOTLB_DEVID_SHIFT since it is not currently used (I assume
it is a left over from earlier prototype)...
Thank you,
Alejandro
> +#define AMDVI_GET_IOTLB_GFN(addr) (addr >> AMDVI_PAGE_SHIFT_4K)
>
> /* default extended feature */
> #define AMDVI_DEFAULT_EXT_FEATURES \
On 10/10/2025 6:52 AM, Alejandro Jimenez wrote:
Hi Alejandro,
> Hi Sairaj,
>
> On 10/8/25 12:43 PM, Sairaj Kodilkar wrote:
>> Physical AMD IOMMU supports upto 64 bits of DMA address. When device
>> tries
>
> s/upto/up to/ and "a device"
>
>> to read or write from the given DMA address, IOMMU translates the
>> address
>
> "a given DMA address"
>
>> using page table assigned to that device. Since IOMMU uses per device
>> page
>> tables, the emulated IOMMU should use the cache tag of 68 bits
>> (64 bit address - 12 bit page alignment + 16 device ID).
>>
>> Current emulated AMD IOMMU uses GLib hash table to create software iotlb
>> and uses 64 bit key to store the IOVA and deviceID, which limits the
>> IOVA
>> to 60 bits. This cause failure while setting up the device when guest is
>> booted with "iommu.forcedac=1".
>>
>> To solve this problem, define `struct amdvi_iotlb_key` which uses 64 bit
>> IOVA and 16 bit devid as key to store and lookup IOTLB entry.
>>
>
> I wouldn't necessarily mention and quote the structure name since that
> is an implementation detail and it might change in the future.
>
> Also, the current implementation also combines a 64-bit IOVA
> (technically a 52bit gfn) with a 16-bit devid, the real change in this
> patch is in how those same values are being shifted to construct a
> hash key that avoids truncation as much as possible. So I'd reword the
> commit message to highlight that.
>
I will update the commit message to highlight this.
>> Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU")
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> ---
>> hw/i386/amd_iommu.c | 51 ++++++++++++++++++++++++++++-----------------
>> hw/i386/amd_iommu.h | 5 +++--
>> 2 files changed, 35 insertions(+), 21 deletions(-)
>>
>> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
>> index 0a4b4d46d885..5106d9cc4036 100644
>> --- a/hw/i386/amd_iommu.c
>> +++ b/hw/i386/amd_iommu.c
>> @@ -106,6 +106,11 @@ typedef struct amdvi_as_key {
>> int devfn;
>> } amdvi_as_key;
>> +typedef struct amdvi_iotlb_key {
>> + uint64_t gfn;
>> + uint16_t devid;
>> +} amdvi_iotlb_key;
>> +
>> uint64_t amdvi_extended_feature_register(AMDVIState *s)
>> {
>> uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
>> @@ -377,16 +382,6 @@ static void amdvi_log_pagetab_error(AMDVIState
>> *s, uint16_t devid,
>> PCI_STATUS_SIG_TARGET_ABORT);
>> }
>> -static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer
>> v2)
>> -{
>> - return *((const uint64_t *)v1) == *((const uint64_t *)v2);
>> -}
>> -
>> -static guint amdvi_uint64_hash(gconstpointer v)
>> -{
>> - return (guint)*(const uint64_t *)v;
>> -}
>> -
>> static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
>> {
>> const struct amdvi_as_key *key1 = v1;
>> @@ -423,11 +418,27 @@ static AMDVIAddressSpace
>> *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid)
>> amdvi_find_as_by_devid, &devid);
>> }
>> +static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> + const amdvi_iotlb_key *key1 = v1;
>> + const amdvi_iotlb_key *key2 = v2;
>> +
>> + return key1->devid == key2->devid && key1->gfn == key2->gfn;
>> +}
>> +
>> +static guint amdvi_iotlb_hash(gconstpointer v)
>> +{
>> + const amdvi_iotlb_key *key = v;
>> + /* Use GPA and DEVID to find the bucket */
>> + return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K |
>> + (key->devid & ~AMDVI_PAGE_MASK_4K));
>> +}
>> +
>> +
>> static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
>> uint64_t devid)
>> {
>> - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
>> - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
>> + amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
>
> This line initializes the key fields with the opposite of the intended
> values. Please use this initialization style instead to prevent these
> types of errors, plus it makes the definitions more readable:
Good catch. This was something from my older prototype where fields were
reversed.
Will update it.
>
> - amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
> + amdvi_iotlb_key key = {
> + .gfn = AMDVI_GET_IOTLB_GFN(addr),
> + .devid = devid,
> + };
>
>
>> return g_hash_table_lookup(s->iotlb, &key);
>> }
>> @@ -449,8 +460,7 @@ static gboolean
>> amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
>> static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
>> uint64_t devid)
>> {
>> - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
>> - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
>> + amdvi_iotlb_key key = {devid, AMDVI_GET_IOTLB_GFN(addr)};
>
> Same as above, key fields are initialized in incorrect order. Same
> easy fix by using designated initializers.
>
>> g_hash_table_remove(s->iotlb, &key);
>> }
>> @@ -461,8 +471,10 @@ static void amdvi_update_iotlb(AMDVIState *s,
>> uint16_t devid,
>> /* don't cache erroneous translations */
>> if (to_cache.perm != IOMMU_NONE) {
>> AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
>> - uint64_t *key = g_new(uint64_t, 1);
>> - uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
>> + amdvi_iotlb_key *key = g_new(amdvi_iotlb_key, 1);
>> +
>> + key->gfn = AMDVI_GET_IOTLB_GFN(gpa);
>> + key->devid = devid;
>> trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid),
>> PCI_SLOT(devid),
>> PCI_FUNC(devid), gpa, to_cache.translated_addr);
>> @@ -475,7 +487,8 @@ static void amdvi_update_iotlb(AMDVIState *s,
>> uint16_t devid,
>> entry->perms = to_cache.perm;
>> entry->translated_addr = to_cache.translated_addr;
>> entry->page_mask = to_cache.addr_mask;
>> - *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
>> + entry->devid = devid;
>> +
>> g_hash_table_replace(s->iotlb, key, entry);
>> }
>> }
>> @@ -2529,8 +2542,8 @@ static void amdvi_sysbus_realize(DeviceState
>> *dev, Error **errp)
>> }
>> }
>> - s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
>> - amdvi_uint64_equal, g_free,
>> g_free);
>> + s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash,
>> + amdvi_iotlb_equal, g_free,
>> g_free);
>> s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
>> amdvi_as_equal, g_free, g_free);
>> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
>> index 38471b95d153..8089f9472ac4 100644
>> --- a/hw/i386/amd_iommu.h
>> +++ b/hw/i386/amd_iommu.h
>> @@ -220,8 +220,9 @@
>> #define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) -
>> 12) % 9))
>> /* IOTLB */
>> -#define AMDVI_IOTLB_MAX_SIZE 1024
>> -#define AMDVI_DEVID_SHIFT 36
>> +#define AMDVI_IOTLB_MAX_SIZE 1024
>> +#define AMDVI_IOTLB_DEVID_SHIFT 48
>
> Remove AMDVI_IOTLB_DEVID_SHIFT since it is not currently used (I
> assume it is a left over from earlier prototype)...
Right
Thanks
Sairaj
© 2016 - 2025 Red Hat, Inc.