The interrupt remapping table must be decrypted so that the VMM can access
the memory to emulate interrupt remapping. However, the amd iommu driver
currently allocate the table with kmem_cache mainly to enforce 128-byte
memory alignment as specified in the AMD IOMMU spec.
For SEV guest, memory encryption is done on a page basis. The driver must
be modified to allocate the table using page-aligned memory, which still
satisfies the original 128-byte alignment.
In addition, the table is setup per-device, which can be allocated with
NUMA-aware page to help reduce IRTE access latency.
Suggested-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/amd_iommu_types.h | 3 +-
drivers/iommu/amd/init.c | 31 +++++++-----------
drivers/iommu/amd/iommu.c | 50 ++++++++++++++++-------------
3 files changed, 41 insertions(+), 43 deletions(-)
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 8ced34cac1db..980fbb9bae39 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -309,7 +309,6 @@
* AMD IOMMU hardware only support 512 IRTEs despite
* the architectural limitation of 2048 entries.
*/
-#define DTE_INTTAB_ALIGNMENT 128
#define DTE_INTTABLEN_VALUE 9ULL
#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1)
#define DTE_INTTABLEN_MASK (0xfULL << 1)
@@ -497,7 +496,7 @@ struct amd_iommu_mem {
struct irq_remap_table {
raw_spinlock_t lock;
unsigned min_index;
- u32 *table;
+ struct amd_iommu_mem mem;
};
/* Interrupt remapping feature used? */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 1b74a31b4337..b3ff89952c7f 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -697,6 +697,17 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se
static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
+ int i;
+ struct irq_remap_table *table;
+
+ for (i = 0 ; i <= pci_seg->last_bdf; ++i) {
+ table = pci_seg->irq_lookup_table[i];
+ if (table) {
+ amd_iommu_free_mem(&table->mem);
+ kfree(table);
+ }
+ }
+
kmemleak_free(pci_seg->irq_lookup_table);
iommu_free_pages(pci_seg->irq_lookup_table,
get_order(pci_seg->rlookup_table_size));
@@ -2923,9 +2934,6 @@ static struct syscore_ops amd_iommu_syscore_ops = {
static void __init free_iommu_resources(void)
{
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
free_iommu_all();
free_pci_segments();
}
@@ -3026,7 +3034,7 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
- int remap_cache_sz, ret;
+ int ret;
acpi_status status;
if (!amd_iommu_detected)
@@ -3090,21 +3098,6 @@ static int __init early_amd_iommu_init(void)
if (amd_iommu_irq_remap) {
struct amd_iommu_pci_seg *pci_seg;
- /*
- * Interrupt remapping enabled, create kmem_cache for the
- * remapping tables.
- */
- ret = -ENOMEM;
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
- else
- remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
- amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
- remap_cache_sz,
- DTE_INTTAB_ALIGNMENT,
- 0, NULL);
- if (!amd_iommu_irq_cache)
- goto out;
for_each_pci_segment(pci_seg) {
if (alloc_irq_lookup_table(pci_seg))
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4f95c726e139..f98a10b7925b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -73,8 +73,6 @@ struct iommu_cmd {
u32 data[4];
};
-struct kmem_cache *amd_iommu_irq_cache;
-
static void detach_device(struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
@@ -2998,7 +2996,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
dte = dev_table[devid].data[2];
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
- dte |= iommu_virt_to_phys(table->table);
+ dte |= amd_iommu_mem_to_phys(&table->mem);
dte |= DTE_IRQ_REMAP_INTCTL;
dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
@@ -3024,27 +3022,35 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}
-static struct irq_remap_table *__alloc_irq_table(void)
+static size_t get_irq_table_size(void)
+{
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ return (MAX_IRQS_PER_TABLE * sizeof(u32));
+ else
+ return (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2));
+}
+
+static struct irq_remap_table *__alloc_irq_table(struct amd_iommu *iommu)
{
+ struct amd_iommu_mem *mem;
struct irq_remap_table *table;
+ int order = get_order(get_irq_table_size());
+ int nid = (iommu && iommu->dev) ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
- if (!table->table) {
+ mem = &table->mem;
+ mem->modes = ALLOC_MODE_GUEST_MEM_DECRYPT;
+ mem->order = order;
+ mem->buf = amd_iommu_get_zeroed_mem_node(nid, GFP_KERNEL, mem);
+ if (!mem->buf) {
kfree(table);
return NULL;
}
raw_spin_lock_init(&table->lock);
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
return table;
}
@@ -3101,7 +3107,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table();
+ new_table = __alloc_irq_table(iommu);
if (!new_table)
return NULL;
@@ -3136,7 +3142,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
if (new_table) {
- kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ amd_iommu_free_mem(&new_table->mem);
kfree(new_table);
}
return table;
@@ -3202,7 +3208,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
raw_spin_lock_irqsave(&table->lock, flags);
- entry = (struct irte_ga *)table->table;
+ entry = (struct irte_ga *)table->mem.buf;
entry = &entry[index];
/*
@@ -3244,7 +3250,7 @@ static int modify_irte(struct amd_iommu *iommu,
return -ENOMEM;
raw_spin_lock_irqsave(&table->lock, flags);
- table->table[index] = irte->val;
+ ((u32 *)table->mem.buf)[index] = irte->val;
raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt_and_complete(iommu, devid);
@@ -3358,12 +3364,12 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid
#define IRTE_ALLOCATED (~1U)
static void irte_set_allocated(struct irq_remap_table *table, int index)
{
- table->table[index] = IRTE_ALLOCATED;
+ ((u32 *)table->mem.buf)[index] = IRTE_ALLOCATED;
}
static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];
memset(&irte->lo.val, 0, sizeof(u64));
@@ -3373,7 +3379,7 @@ static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
static bool irte_is_allocated(struct irq_remap_table *table, int index)
{
- union irte *ptr = (union irte *)table->table;
+ union irte *ptr = (union irte *)table->mem.buf;
union irte *irte = &ptr[index];
return irte->val != 0;
@@ -3381,7 +3387,7 @@ static bool irte_is_allocated(struct irq_remap_table *table, int index)
static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];
return irte->hi.fields.vector != 0;
@@ -3389,12 +3395,12 @@ static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
static void irte_clear_allocated(struct irq_remap_table *table, int index)
{
- table->table[index] = 0;
+ ((u32 *)table->mem.buf)[index] = 0;
}
static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];
memset(&irte->lo.val, 0, sizeof(u64));
--
2.34.1
© 2016 - 2025 Red Hat, Inc.