In the kdump kernel, the IOMMU operates in deferred_attach mode. In this
mode, info->domain may not yet be assigned by the time the release_device
function is called. It leads to the following crash in the crash kernel:
BUG: kernel NULL pointer dereference, address: 000000000000003c
...
RIP: 0010:do_raw_spin_lock+0xa/0xa0
...
_raw_spin_lock_irqsave+0x1b/0x30
intel_iommu_release_device+0x96/0x170
iommu_deinit_device+0x39/0xf0
__iommu_group_remove_device+0xa0/0xd0
iommu_bus_notifier+0x55/0xb0
notifier_call_chain+0x5a/0xd0
blocking_notifier_call_chain+0x41/0x60
bus_notify+0x34/0x50
device_del+0x269/0x3d0
pci_remove_bus_device+0x77/0x100
p2sb_bar+0xae/0x1d0
...
i801_probe+0x423/0x740
Use the release_domain mechanism to fix it.
Fixes: 586081d3f6b1 ("iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO")
Reported-by: Eric Badger <ebadger@purestorage.com>
Closes: https://lore.kernel.org/r/20240113181713.1817855-1-ebadger@purestorage.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.c | 26 +-------------------------
1 file changed, 1 insertion(+), 25 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 61bb35046ea4..2c04ea90d22f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3798,30 +3798,6 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}
-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
- struct intel_iommu *iommu = info->iommu;
- unsigned long flags;
-
- if (!dev_is_real_dma_subdevice(info->dev)) {
- if (dev_is_pci(info->dev) && sm_supported(iommu))
- intel_pasid_tear_down_entry(iommu, info->dev,
- IOMMU_NO_PASID, false);
-
- iommu_disable_pci_caps(info);
- domain_context_clear(info);
- }
-
- spin_lock_irqsave(&domain->lock, flags);
- list_del(&info->link);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- domain_detach_iommu(domain, iommu);
- info->domain = NULL;
-}
-
/*
* Clear the page table pointer in context or pasid table entries so that
* all DMA requests without PASID from the device are blocked. If the page
@@ -4348,7 +4324,6 @@ static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
@@ -4839,6 +4814,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
+ .release_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
--
2.34.1
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, February 23, 2024 1:13 PM
>
> -static void dmar_remove_one_dev_info(struct device *dev)
> -{
> - struct device_domain_info *info = dev_iommu_priv_get(dev);
> - struct dmar_domain *domain = info->domain;
> - struct intel_iommu *iommu = info->iommu;
> - unsigned long flags;
> -
> - if (!dev_is_real_dma_subdevice(info->dev)) {
> - if (dev_is_pci(info->dev) && sm_supported(iommu))
> - intel_pasid_tear_down_entry(iommu, info->dev,
> - IOMMU_NO_PASID, false);
> -
> - iommu_disable_pci_caps(info);
> - domain_context_clear(info);
> - }
> -
> - spin_lock_irqsave(&domain->lock, flags);
> - list_del(&info->link);
> - spin_unlock_irqrestore(&domain->lock, flags);
> -
> - domain_detach_iommu(domain, iommu);
> - info->domain = NULL;
> -}
> -
what's required here is slightly different from device_block_translation()
which leaves context entry uncleared in scalable mode (implying the
pasid table must be valid). but in the release path the pasid table will
be freed right after then leading to a use-after-free case.
let's add an explicit domain_context_clear() in intel_iommu_release_device().
with that,
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
On 2/27/24 3:40 PM, Tian, Kevin wrote:
>> From: Lu Baolu<baolu.lu@linux.intel.com>
>> Sent: Friday, February 23, 2024 1:13 PM
>>
>> -static void dmar_remove_one_dev_info(struct device *dev)
>> -{
>> - struct device_domain_info *info = dev_iommu_priv_get(dev);
>> - struct dmar_domain *domain = info->domain;
>> - struct intel_iommu *iommu = info->iommu;
>> - unsigned long flags;
>> -
>> - if (!dev_is_real_dma_subdevice(info->dev)) {
>> - if (dev_is_pci(info->dev) && sm_supported(iommu))
>> - intel_pasid_tear_down_entry(iommu, info->dev,
>> - IOMMU_NO_PASID, false);
>> -
>> - iommu_disable_pci_caps(info);
>> - domain_context_clear(info);
>> - }
>> -
>> - spin_lock_irqsave(&domain->lock, flags);
>> - list_del(&info->link);
>> - spin_unlock_irqrestore(&domain->lock, flags);
>> -
>> - domain_detach_iommu(domain, iommu);
>> - info->domain = NULL;
>> -}
>> -
> what's required here is slightly different from device_block_translation()
> which leaves context entry uncleared in scalable mode (implying the
> pasid table must be valid). but in the release path the pasid table will
> be freed right after then leading to a use-after-free case.
>
> let's add an explicit domain_context_clear() in intel_iommu_release_device().
Nice catch!
How about moving the scalable mode context entry management to probe and
release path? Currently, it's part of domain switch, that's really
irrelevant.
Best regards,
baolu
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Wednesday, February 28, 2024 9:23 AM
>
> On 2/27/24 3:40 PM, Tian, Kevin wrote:
> >> From: Lu Baolu<baolu.lu@linux.intel.com>
> >> Sent: Friday, February 23, 2024 1:13 PM
> >>
> >> -static void dmar_remove_one_dev_info(struct device *dev)
> >> -{
> >> - struct device_domain_info *info = dev_iommu_priv_get(dev);
> >> - struct dmar_domain *domain = info->domain;
> >> - struct intel_iommu *iommu = info->iommu;
> >> - unsigned long flags;
> >> -
> >> - if (!dev_is_real_dma_subdevice(info->dev)) {
> >> - if (dev_is_pci(info->dev) && sm_supported(iommu))
> >> - intel_pasid_tear_down_entry(iommu, info->dev,
> >> - IOMMU_NO_PASID, false);
> >> -
> >> - iommu_disable_pci_caps(info);
> >> - domain_context_clear(info);
> >> - }
> >> -
> >> - spin_lock_irqsave(&domain->lock, flags);
> >> - list_del(&info->link);
> >> - spin_unlock_irqrestore(&domain->lock, flags);
> >> -
> >> - domain_detach_iommu(domain, iommu);
> >> - info->domain = NULL;
> >> -}
> >> -
> > what's required here is slightly different from device_block_translation()
> > which leaves context entry uncleared in scalable mode (implying the
> > pasid table must be valid). but in the release path the pasid table will
> > be freed right after then leading to a use-after-free case.
> >
> > let's add an explicit domain_context_clear() in
> intel_iommu_release_device().
>
> Nice catch!
>
> How about moving the scalable mode context entry management to probe
> and
> release path? Currently, it's part of domain switch, that's really
> irrelevant.
>
sounds good.
© 2016 - 2026 Red Hat, Inc.