Currently AMD IOMMU driver does not reserve domain ids programmed in the
DTE while reusing the device table inside kdump kernel. This can cause
reallocation of these domain ids for newer domains that are created by
the kdump kernel, which can lead to potential IO_PAGE_FAULTs
Hence reserve these ids inside pdom_ids.
Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
drivers/iommu/amd/init.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f2991c11867c..9375fba1071c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
static bool __reuse_device_table(struct amd_iommu *iommu)
{
struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
- u32 lo, hi, old_devtb_size;
+ struct dev_table_entry *old_dev_tbl_entry;
+ u32 lo, hi, old_devtb_size, devid;
phys_addr_t old_devtb_phys;
+ u16 dom_id;
+ bool dte_v;
u64 entry;
+ int ret;
/* Each IOMMU use separate device table with the same size */
lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
return false;
}
+ for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
+ old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
+ dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
+ dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
+
+ if (!dte_v || !dom_id)
+ continue;
+ /*
+ * ID reseveration can fail with -ENOSPC when there
+ * are multiple devices present in the same domain,
+ * hence check only for -ENOMEM.
+ */
+ ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
+ if (ret == -ENOMEM) {
+ pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
+ return false;
+ }
+ }
+
return true;
}
--
2.34.1
On 11/14/2025 4:14 PM, Sairaj Kodilkar wrote:
> Currently AMD IOMMU driver does not reserve domain ids programmed in the
> DTE while reusing the device table inside kdump kernel. This can cause
> reallocation of these domain ids for newer domains that are created by
> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>
> Hence reserve these ids inside pdom_ids.
>
> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
I missed to inform you. This was reported by Jason while reviewing nested
series. So
Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
-Vasant
> ---
> drivers/iommu/amd/init.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> index f2991c11867c..9375fba1071c 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
> static bool __reuse_device_table(struct amd_iommu *iommu)
> {
> struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
> - u32 lo, hi, old_devtb_size;
> + struct dev_table_entry *old_dev_tbl_entry;
> + u32 lo, hi, old_devtb_size, devid;
> phys_addr_t old_devtb_phys;
> + u16 dom_id;
> + bool dte_v;
> u64 entry;
> + int ret;
>
> /* Each IOMMU use separate device table with the same size */
> lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
> @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
> return false;
> }
>
> + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
> + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
> + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
> + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
> +
> + if (!dte_v || !dom_id)
> + continue;
> + /*
> + * ID reseveration can fail with -ENOSPC when there
> + * are multiple devices present in the same domain,
> + * hence check only for -ENOMEM.
> + */
> + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
> + if (ret == -ENOMEM) {
> + pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
> + return false;
> + }
> + }
> +
> return true;
> }
>
On Wed, Nov 19, 2025 at 04:20:28PM +0530, Vasant Hegde wrote:
> > diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> > index f2991c11867c..9375fba1071c 100644
> > --- a/drivers/iommu/amd/init.c
> > +++ b/drivers/iommu/amd/init.c
> > @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
> > static bool __reuse_device_table(struct amd_iommu *iommu)
> > {
> > struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
> > - u32 lo, hi, old_devtb_size;
> > + struct dev_table_entry *old_dev_tbl_entry;
> > + u32 lo, hi, old_devtb_size, devid;
> > phys_addr_t old_devtb_phys;
> > + u16 dom_id;
> > + bool dte_v;
> > u64 entry;
> > + int ret;
> >
> > /* Each IOMMU use separate device table with the same size */
> > lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
> > @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
> > return false;
> > }
> >
> > + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
> > + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
> > + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
> > + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
> > +
> > + if (!dte_v || !dom_id)
> > + continue;
> > + /*
> > + * ID reseveration can fail with -ENOSPC when there
> > + * are multiple devices present in the same domain,
> > + * hence check only for -ENOMEM.
> > + */
> > + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
Is it really an atomic context? Why?
> > + if (ret == -ENOMEM) {
> > + pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
> > + return false;
Please don't print on ENOMEM, there is already a print.
I think you should also keep iterating as other dom_ids may still be
fit in already allocated bitmaps. Though the system is probably toast
if this happens anyhow.
Jason
On 11/19/2025 7:13 PM, Jason Gunthorpe wrote:
> On Wed, Nov 19, 2025 at 04:20:28PM +0530, Vasant Hegde wrote:
>>> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
>>> index f2991c11867c..9375fba1071c 100644
>>> --- a/drivers/iommu/amd/init.c
>>> +++ b/drivers/iommu/amd/init.c
>>> @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
>>> static bool __reuse_device_table(struct amd_iommu *iommu)
>>> {
>>> struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
>>> - u32 lo, hi, old_devtb_size;
>>> + struct dev_table_entry *old_dev_tbl_entry;
>>> + u32 lo, hi, old_devtb_size, devid;
>>> phys_addr_t old_devtb_phys;
>>> + u16 dom_id;
>>> + bool dte_v;
>>> u64 entry;
>>> + int ret;
>>>
>>> /* Each IOMMU use separate device table with the same size */
>>> lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
>>> @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
>>> return false;
>>> }
>>>
>>> + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
>>> + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
>>> + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
>>> + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
>>> +
>>> + if (!dte_v || !dom_id)
>>> + continue;
>>> + /*
>>> + * ID reseveration can fail with -ENOSPC when there
>>> + * are multiple devices present in the same domain,
>>> + * hence check only for -ENOMEM.
>>> + */
>>> + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
>
> Is it really an atomic context? Why?
I had to dig git history to figureout why I used GFP_ATOMIC for ida allocator.
When we moved to ida allocator we used GPF_ATOMIC as there were paths were code
was holding spinlock before calling ida allocator.
Now that those paths are moved to mutex lock and remvoed redundant locks, I
think its fine to change it to GFP_KERNEL.
-Vasant
On 11/19/2025 7:13 PM, Jason Gunthorpe wrote:
> On Wed, Nov 19, 2025 at 04:20:28PM +0530, Vasant Hegde wrote:
>>> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
>>> index f2991c11867c..9375fba1071c 100644
>>> --- a/drivers/iommu/amd/init.c
>>> +++ b/drivers/iommu/amd/init.c
>>> @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
>>> static bool __reuse_device_table(struct amd_iommu *iommu)
>>> {
>>> struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
>>> - u32 lo, hi, old_devtb_size;
>>> + struct dev_table_entry *old_dev_tbl_entry;
>>> + u32 lo, hi, old_devtb_size, devid;
>>> phys_addr_t old_devtb_phys;
>>> + u16 dom_id;
>>> + bool dte_v;
>>> u64 entry;
>>> + int ret;
>>>
>>> /* Each IOMMU use separate device table with the same size */
>>> lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
>>> @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
>>> return false;
>>> }
>>>
>>> + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
>>> + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
>>> + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
>>> + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
>>> +
>>> + if (!dte_v || !dom_id)
>>> + continue;
>>> + /*
>>> + * ID reseveration can fail with -ENOSPC when there
>>> + * are multiple devices present in the same domain,
>>> + * hence check only for -ENOMEM.
>>> + */
>>> + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
>
> Is it really an atomic context? Why?
>
>>> + if (ret == -ENOMEM) {
>>> + pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
>>> + return false;
>
> Please don't print on ENOMEM, there is already a print.
>
> I think you should also keep iterating as other dom_ids may still be
> fit in already allocated bitmaps. Though the system is probably toast
> if this happens anyhow.
Right. No point in continuing if it fails to reserve the domain ID. Things can
go wrong. So it returns error.
-Vasant
On 11/19/2025 7:13 PM, Jason Gunthorpe wrote:
> On Wed, Nov 19, 2025 at 04:20:28PM +0530, Vasant Hegde wrote:
>>> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
>>> index f2991c11867c..9375fba1071c 100644
>>> --- a/drivers/iommu/amd/init.c
>>> +++ b/drivers/iommu/amd/init.c
>>> @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
>>> static bool __reuse_device_table(struct amd_iommu *iommu)
>>> {
>>> struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
>>> - u32 lo, hi, old_devtb_size;
>>> + struct dev_table_entry *old_dev_tbl_entry;
>>> + u32 lo, hi, old_devtb_size, devid;
>>> phys_addr_t old_devtb_phys;
>>> + u16 dom_id;
>>> + bool dte_v;
>>> u64 entry;
>>> + int ret;
>>>
>>> /* Each IOMMU use separate device table with the same size */
>>> lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
>>> @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
>>> return false;
>>> }
>>>
>>> + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
>>> + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
>>> + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
>>> + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
>>> +
>>> + if (!dte_v || !dom_id)
>>> + continue;
>>> + /*
>>> + * ID reseveration can fail with -ENOSPC when there
>>> + * are multiple devices present in the same domain,
>>> + * hence check only for -ENOMEM.
>>> + */
>>> + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
> Is it really an atomic context? Why?
No, its not atomic context. My intension was to ensure that the memory
allocation
succeeds. But I am not sure if it's the most appropriate choice for this
scenario
Could you please suggest which GFP flag would be best suited for this use ?
>
>>> + if (ret == -ENOMEM) {
>>> + pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
>>> + return false;
> Please don't print on ENOMEM, there is already a print.
>
> I think you should also keep iterating as other dom_ids may still be
> fit in already allocated bitmaps. Though the system is probably toast
> if this happens anyhow.
Sure
Thanks
Sairaj
> Jason
On Thu, Nov 20, 2025 at 10:32:32AM +0530, Sairaj Kodilkar wrote: > No, its not atomic context. My intension was to ensure that the > memory allocation succeeds. But I am not sure if it's the most > appropriate choice for this scenario This is early boot, it won't fail either way. Use GFP_KERNEL because it is clearer Jason
On 11/20/2025 6:26 PM, Jason Gunthorpe wrote: > On Thu, Nov 20, 2025 at 10:32:32AM +0530, Sairaj Kodilkar wrote: > >> No, its not atomic context. My intension was to ensure that the >> memory allocation succeeds. But I am not sure if it's the most >> appropriate choice for this scenario > This is early boot, it won't fail either way. Use GFP_KERNEL because > it is clearer Sure, Thanks for suggestion -Sairaj
Hi,
On Fri, Nov 14, 2025 at 04:14:41PM +0530, Sairaj Kodilkar wrote:
> Currently AMD IOMMU driver does not reserve domain ids programmed in the
> DTE while reusing the device table inside kdump kernel. This can cause
> reallocation of these domain ids for newer domains that are created by
> the kdump kernel, which can lead to potential IO_PAGE_FAULTs
>
> Hence reserve these ids inside pdom_ids.
>
> Fixes: 38e5f33ee359 ("iommu/amd: Reuse device table for kdump")
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
> ---
> drivers/iommu/amd/init.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> index f2991c11867c..9375fba1071c 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -1136,9 +1136,13 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
> static bool __reuse_device_table(struct amd_iommu *iommu)
> {
> struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
> - u32 lo, hi, old_devtb_size;
> + struct dev_table_entry *old_dev_tbl_entry;
> + u32 lo, hi, old_devtb_size, devid;
> phys_addr_t old_devtb_phys;
> + u16 dom_id;
> + bool dte_v;
> u64 entry;
> + int ret;
>
> /* Each IOMMU use separate device table with the same size */
> lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
> @@ -1173,6 +1177,25 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
> return false;
> }
>
> + for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
> + old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
> + dte_v = old_dev_tbl_entry->data[0] & DTE_FLAG_V;
> + dom_id = old_dev_tbl_entry->data[1] & DEV_DOMID_MASK;
Consider using FIELD_GET().
> +
> + if (!dte_v || !dom_id)
> + continue;
> + /*
> + * ID reseveration can fail with -ENOSPC when there
s/reseveration/reservation
> + * are multiple devices present in the same domain,
> + * hence check only for -ENOMEM.
> + */
> + ret = ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC);
> + if (ret == -ENOMEM) {
> + pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
Can we use ratelimit, as in kdump kernel pr_err can be bit noisy.
-Ankit
> + return false;
> + }
> + }
> +
> return true;
> }
>
> --
> 2.34.1
>
© 2016 - 2026 Red Hat, Inc.