[PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel

Sairaj Kodilkar posted 2 patches 1 week, 3 days ago
[PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel
Posted by Sairaj Kodilkar 1 week, 3 days ago
Currently AMD IOMMU driver does not reserve domain ids programmed in the
DTE while reusing the device table inside kdump kernel. This can cause
reallocation of these domain ids for newer domains that are created by
the kdump kernel, which can lead to potential IO_PAGE_FAULTs

Hence reserve these ids inside pdom_ids.

Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
---
 drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f2991c11867c..14eb9de33ccb 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
 static bool __reuse_device_table(struct amd_iommu *iommu)
 {
 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
-	u32 lo, hi, old_devtb_size;
+	struct dev_table_entry *old_dev_tbl_entry;
+	u32 lo, hi, old_devtb_size, devid;
 	phys_addr_t old_devtb_phys;
+	u16 dom_id;
+	bool dte_v;
 	u64 entry;
+	int ret;
 
 	/* Each IOMMU use separate device table with the same size */
 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1173,6 +1177,23 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
 		return false;
 	}
 
+	for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
+		old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
+		dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
+		dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);
+
+		if (!dte_v || !dom_id)
+			continue;
+		/*
+		 * ID reservation can fail with -ENOSPC when there
+		 * are multiple devices present in the same domain,
+		 * hence check only for -ENOMEM.
+		 */
+		ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_KERNEL);
+		if (ret == -ENOMEM)
+			return false;
+	}
+
 	return true;
 }
 
-- 
2.34.1
Re: [PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel
Posted by Jason Gunthorpe 1 week ago
On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
> Currently AMD IOMMU driver does not reserve domain ids programmed in the
> DTE while reusing the device table inside kdump kernel. This can cause
> reallocation of these domain ids for newer domains that are created by
> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
> 
> Hence reserve these ids inside pdom_ids.
> 
> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
> ---
>  drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)

This seems OK

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

But the a point of this work was to remove this code:

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 48bca4dc8eb61f..1cd799913cbcd6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
                          phys_addr_t top_paddr, unsigned int top_level)
 {
        u16 domid;
-       u32 old_domid;
        struct dev_table_entry *initial_dte;
        struct dev_table_entry new = {};
        struct protection_domain *domain = dev_data->domain;
@@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
        if (dev_data->ats_enabled)
                new.data[1] |= DTE_FLAG_IOTLB;
 
-       old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
        new.data[1] |= domid;
 
        /*
@@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
        set_dte_gcr3_table(iommu, dev_data, &new);
 
        update_dte256(iommu, dev_data, &new);
-
-       /*
-        * A kdump kernel might be replacing a domain ID that was copied from
-        * the previous kernel--if so, it needs to flush the translation cache
-        * entries for the old domain ID that is being overwritten
-        */
-       if (old_domid) {
-               amd_iommu_flush_tlb_domid(iommu, old_domid);
-       }
 }
 
 /*

Under the reasoning that:
 - domids in use by the prior kernel are reserved in the IDA and are
   never used by this kernel
 - domids in the IDA must be clean
 - There is no reason to flush a domid until it is returned to the IDA
 - detach_device() calls amd_iommu_domain_flush_all() before the
   domain can be freed and the domid returned the IDA which clears the
   IOTLB

Please add a patch?

Jason
Re: [PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel
Posted by Vasant Hegde 5 days, 17 hours ago

On 11/24/2025 8:14 PM, Jason Gunthorpe wrote:
> On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
>> Currently AMD IOMMU driver does not reserve domain ids programmed in the
>> DTE while reusing the device table inside kdump kernel. This can cause
>> reallocation of these domain ids for newer domains that are created by
>> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>>
>> Hence reserve these ids inside pdom_ids.
>>
>> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
>> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
>> ---
>>  drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
>>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> This seems OK
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> 
> But the a point of this work was to remove this code:
> 
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 48bca4dc8eb61f..1cd799913cbcd6 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>                           phys_addr_t top_paddr, unsigned int top_level)
>  {
>         u16 domid;
> -       u32 old_domid;
>         struct dev_table_entry *initial_dte;
>         struct dev_table_entry new = {};
>         struct protection_domain *domain = dev_data->domain;
> @@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>         if (dev_data->ats_enabled)
>                 new.data[1] |= DTE_FLAG_IOTLB;
>  
> -       old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
>         new.data[1] |= domid;
>  
>         /*
> @@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>         set_dte_gcr3_table(iommu, dev_data, &new);
>  
>         update_dte256(iommu, dev_data, &new);
> -
> -       /*
> -        * A kdump kernel might be replacing a domain ID that was copied from
> -        * the previous kernel--if so, it needs to flush the translation cache
> -        * entries for the old domain ID that is being overwritten
> -        */
> -       if (old_domid) {
> -               amd_iommu_flush_tlb_domid(iommu, old_domid);
> -       }
>  }
>  
>  /*
> 
> Under the reasoning that:
>  - domids in use by the prior kernel are reserved in the IDA and are
>    never used by this kernel

It looks like on non-SNP system in kdump path, if it fails to reserve old DTE
table, it tries to allocate new one! Looking into code again, it may be OK to
fail in that path as well.. so that we will have common flow.

That needs to be fixed before removing old_domid check.


-Vasant

>  - domids in the IDA must be clean
>  - There is no reason to flush a domid until it is returned to the IDA
>  - detach_device() calls amd_iommu_domain_flush_all() before the
>    domain can be freed and the domid returned the IDA which clears the
>    IOTLB
> 
> Please add a patch?
> 
> Jason
Re: [PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel
Posted by Jason Gunthorpe 5 days, 10 hours ago
On Wed, Nov 26, 2025 at 01:51:27PM +0530, Vasant Hegde wrote:
> It looks like on non-SNP system in kdump path, if it fails to reserve old DTE
> table, it tries to allocate new one! Looking into code again, it may be OK to
> fail in that path as well.. so that we will have common flow.

If it allocates a new (empty) one and does a full flush of the iommu
that would be OK?

Jason
Re: [PATCH v2 1/2] amd/iommu: Preserve domain ids inside the kdump kernel
Posted by Sairaj Kodilkar 6 days, 18 hours ago

On 11/24/2025 8:14 PM, Jason Gunthorpe wrote:
> On Fri, Nov 21, 2025 at 02:41:15PM +0530, Sairaj Kodilkar wrote:
>> Currently AMD IOMMU driver does not reserve domain ids programmed in the
>> DTE while reusing the device table inside kdump kernel. This can cause
>> reallocation of these domain ids for newer domains that are created by
>> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>>
>> Hence reserve these ids inside pdom_ids.
>>
>> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> Reported-by: Jason Gunthorpe <jgg@nvidia.com>
>> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
>> ---
>>   drivers/iommu/amd/init.c | 23 ++++++++++++++++++++++-
>>   1 file changed, 22 insertions(+), 1 deletion(-)
> This seems OK
>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>
> But the a point of this work was to remove this code:
>
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 48bca4dc8eb61f..1cd799913cbcd6 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -2024,7 +2024,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>                            phys_addr_t top_paddr, unsigned int top_level)
>   {
>          u16 domid;
> -       u32 old_domid;
>          struct dev_table_entry *initial_dte;
>          struct dev_table_entry new = {};
>          struct protection_domain *domain = dev_data->domain;
> @@ -2080,7 +2079,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>          if (dev_data->ats_enabled)
>                  new.data[1] |= DTE_FLAG_IOTLB;
>   
> -       old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
>          new.data[1] |= domid;
>   
>          /*
> @@ -2096,15 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>          set_dte_gcr3_table(iommu, dev_data, &new);
>   
>          update_dte256(iommu, dev_data, &new);
> -
> -       /*
> -        * A kdump kernel might be replacing a domain ID that was copied from
> -        * the previous kernel--if so, it needs to flush the translation cache
> -        * entries for the old domain ID that is being overwritten
> -        */
> -       if (old_domid) {
> -               amd_iommu_flush_tlb_domid(iommu, old_domid);
> -       }
>   }
>   
>   /*
>
> Under the reasoning that:
>   - domids in use by the prior kernel are reserved in the IDA and are
>     never used by this kernel
>   - domids in the IDA must be clean
>   - There is no reason to flush a domid until it is returned to the IDA
>   - detach_device() calls amd_iommu_domain_flush_all() before the
>     domain can be freed and the domid returned the IDA which clears the
>     IOTLB
>
> Please add a patch?

Thanks for the explaination
Will add the patch for it

Thanks
Sairaj