Currently AMD IOMMU driver does not reserve domain ids programmed in the
DTE while reusing the device table inside kdump kernel. This can cause
reallocation of these domain ids for newer domains that are created by
the kdump kernel, which can lead to potential IO_PAGE_FAULTs
Hence reserve these ids inside pdom_ids.
Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
---
drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f2991c11867c..14eb9de33ccb 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
static bool __reuse_device_table(struct amd_iommu *iommu)
{
struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
- u32 lo, hi, old_devtb_size;
+ struct dev_table_entry *old_dev_tbl_entry;
+ u32 lo, hi, old_devtb_size, devid;
phys_addr_t old_devtb_phys;
+ u16 dom_id;
+ bool dte_v;
u64 entry;
+ int ret;
/* Each IOMMU use separate device table with the same size */
lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1173,6 +1177,23 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
return false;
}
+ for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
+ old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
+ dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
+ dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);
+
+ if (!dte_v || !dom_id)
+ continue;
+ /*
+ * ID reservation can fail with -ENOSPC when there
+ * are multiple devices present in the same domain,
+ * hence check only for -ENOMEM.
+ */
+ ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_KERNEL);
+ if (ret == -ENOMEM)
+ return false;
+ }
+
return true;
}
--
2.34.1
On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
> Currently AMD IOMMU driver does not reserve domain ids programmed in the
> DTE while reusing the device table inside kdump kernel. This can cause
> reallocation of these domain ids for newer domains that are created by
> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>
> Hence reserve these ids inside pdom_ids.
>
> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
> ---
> drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
This seems OK
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
But the a point of this work was to remove this code:
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 48bca4dc8eb61f..1cd799913cbcd6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
phys_addr_t top_paddr, unsigned int top_level)
{
u16 domid;
- u32 old_domid;
struct dev_table_entry *initial_dte;
struct dev_table_entry new = {};
struct protection_domain *domain = dev_data->domain;
@@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
if (dev_data->ats_enabled)
new.data[1] |= DTE_FLAG_IOTLB;
- old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
new.data[1] |= domid;
/*
@@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
set_dte_gcr3_table(iommu, dev_data, &new);
update_dte256(iommu, dev_data, &new);
-
- /*
- * A kdump kernel might be replacing a domain ID that was copied from
- * the previous kernel--if so, it needs to flush the translation cache
- * entries for the old domain ID that is being overwritten
- */
- if (old_domid) {
- amd_iommu_flush_tlb_domid(iommu, old_domid);
- }
}
/*
Under the reasoning that:
- domids in use by the prior kernel are reserved in the IDA and are
never used by this kernel
- domids in the IDA must be clean
- There is no reason to flush a domid until it is returned to the IDA
- detach_device() calls amd_iommu_domain_flush_all() before the
domain can be freed and the domid returned the IDA which clears the
IOTLB
Please add a patch?
Jason
On 11/24/2025 8:14 PM, Jason Gunthorpe wrote:
> On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
>> Currently AMD IOMMU driver does not reserve domain ids programmed in the
>> DTE while reusing the device table inside kdump kernel. This can cause
>> reallocation of these domain ids for newer domains that are created by
>> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>>
>> Hence reserve these ids inside pdom_ids.
>>
>> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
>> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
>> ---
>> drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
>> 1 file changed, 22 insertions(+), 1 deletion(-)
>
> This seems OK
>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>
> But the a point of this work was to remove this code:
>
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 48bca4dc8eb61f..1cd799913cbcd6 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> phys_addr_t top_paddr, unsigned int top_level)
> {
> u16 domid;
> - u32 old_domid;
> struct dev_table_entry *initial_dte;
> struct dev_table_entry new = {};
> struct protection_domain *domain = dev_data->domain;
> @@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> if (dev_data->ats_enabled)
> new.data[1] |= DTE_FLAG_IOTLB;
>
> - old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
> new.data[1] |= domid;
>
> /*
> @@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> set_dte_gcr3_table(iommu, dev_data, &new);
>
> update_dte256(iommu, dev_data, &new);
> -
> - /*
> - * A kdump kernel might be replacing a domain ID that was copied from
> - * the previous kernel--if so, it needs to flush the translation cache
> - * entries for the old domain ID that is being overwritten
> - */
> - if (old_domid) {
> - amd_iommu_flush_tlb_domid(iommu, old_domid);
> - }
> }
>
> /*
>
> Under the reasoning that:
> - domids in use by the prior kernel are reserved in the IDA and are
> never used by this kernel
It looks like on non-SNP system in kdump path, if it fails to reserve old DTE
table, it tries to allocate new one! Looking into code again, it may be OK to
fail in that path as well.. so that we will have common flow.
That needs to be fixed before removing old_domid check.
-Vasant
> - domids in the IDA must be clean
> - There is no reason to flush a domid until it is returned to the IDA
> - detach_device() calls amd_iommu_domain_flush_all() before the
> domain can be freed and the domid returned the IDA which clears the
> IOTLB
>
> Please add a patch?
>
> Jason
On Wed, Nov 26, 2025 at 01:51:27PM +0530, Vasant Hegde wrote: > It looks like on non-SNP system in kdump path, if it fails to reserve old DTE > table, it tries to allocate new one! Looking into code again, it may be OK to > fail in that path as well.. so that we will have common flow. If it allocates a new (empty) one and does a full flush of the iommu that would be OK? Jason
On 11/24/2025 8:14 PM, Jason Gunthorpe wrote:
> On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
>> Currently AMD IOMMU driver does not reserve domain ids programmed in the
>> DTE while reusing the device table inside kdump kernel. This can cause
>> reallocation of these domain ids for newer domains that are created by
>> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>>
>> Hence reserve these ids inside pdom_ids.
>>
>> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
>> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
>> ---
>> drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
>> 1 file changed, 22 insertions(+), 1 deletion(-)
> This seems OK
>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>
> But the a point of this work was to remove this code:
>
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 48bca4dc8eb61f..1cd799913cbcd6 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> phys_addr_t top_paddr, unsigned int top_level)
> {
> u16 domid;
> - u32 old_domid;
> struct dev_table_entry *initial_dte;
> struct dev_table_entry new = {};
> struct protection_domain *domain = dev_data->domain;
> @@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> if (dev_data->ats_enabled)
> new.data[1] |= DTE_FLAG_IOTLB;
>
> - old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
> new.data[1] |= domid;
>
> /*
> @@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
> set_dte_gcr3_table(iommu, dev_data, &new);
>
> update_dte256(iommu, dev_data, &new);
> -
> - /*
> - * A kdump kernel might be replacing a domain ID that was copied from
> - * the previous kernel--if so, it needs to flush the translation cache
> - * entries for the old domain ID that is being overwritten
> - */
> - if (old_domid) {
> - amd_iommu_flush_tlb_domid(iommu, old_domid);
> - }
> }
>
> /*
>
> Under the reasoning that:
> - domids in use by the prior kernel are reserved in the IDA and are
> never used by this kernel
> - domids in the IDA must be clean
> - There is no reason to flush a domid until it is returned to the IDA
> - detach_device() calls amd_iommu_domain_flush_all() before the
> domain can be freed and the domid returned the IDA which clears the
> IOTLB
>
> Please add a patch?
Thanks for the explaination
Will add the patch for it
Thanks
Sairaj
© 2016 - 2025 Red Hat, Inc.