From: Lu Baolu <baolu.lu@linux.intel.com>
This implements the .cache_invalidate_user() callback to support iotlb
flush for nested domain.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Co-developed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
1 file changed, 116 insertions(+)
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index b5a5563ab32c..c665e2647045 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
kfree(to_dmar_domain(domain));
}
+static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
+ struct dmar_domain *domain, u64 addr,
+ unsigned long npages, bool ih)
+{
+ u16 did = domain_id_iommu(domain, iommu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ if (!list_empty(&domain->devices))
+ qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
+ npages, ih, NULL);
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
+ unsigned mask, u32 *fault)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+ u16 sid, qdep;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ list_for_each_entry(info, &domain->devices, link) {
+ if (!info->ats_enabled)
+ continue;
+ sid = info->bus << 8 | info->devfn;
+ qdep = info->ats_qdep;
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, addr, mask, fault);
+ quirk_extra_dev_tlb_flush(info, addr, mask,
+ IOMMU_NO_PASID, qdep);
+ }
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
+ unsigned long npages, u32 *error)
+{
+ struct iommu_domain_info *info;
+ unsigned long i;
+ unsigned mask;
+ u32 fault = 0;
+
+ if (npages == U64_MAX)
+ mask = 64 - VTD_PAGE_SHIFT;
+ else
+ mask = ilog2(__roundup_pow_of_two(npages));
+
+ xa_for_each(&domain->iommu_array, i, info) {
+ nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
+
+ if (domain->has_iotlb_device)
+ continue;
+
+ nested_flush_dev_iotlb(domain, addr, mask, &fault);
+ if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
+ break;
+ }
+
+ if (fault & DMA_FSTS_ICE)
+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
+ if (fault & DMA_FSTS_ITE)
+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
+}
+
+static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
+ struct iommu_user_data_array *array)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct iommu_hwpt_vtd_s1_invalidate inv_entry;
+ u32 processed = 0;
+ int ret = 0;
+ u32 index;
+
+ if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (index = 0; index < array->entry_num; index++) {
+ ret = iommu_copy_struct_from_user_array(&inv_entry, array,
+ IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
+ index, inv_error);
+ if (ret)
+ break;
+
+ if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
+ ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ inv_entry.inv_error = 0;
+ intel_nested_flush_cache(dmar_domain, inv_entry.addr,
+ inv_entry.npages, &inv_entry.inv_error);
+
+ ret = iommu_respond_struct_to_user_array(array, index,
+ (void *)&inv_entry,
+ sizeof(inv_entry));
+ if (ret)
+ break;
+
+ processed++;
+ }
+
+out:
+ array->entry_num = processed;
+ return ret;
+}
+
static const struct iommu_domain_ops intel_nested_domain_ops = {
.attach_dev = intel_nested_attach_dev,
.free = intel_nested_domain_free,
+ .cache_invalidate_user = intel_nested_cache_invalidate_user,
};
struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
--
2.34.1
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain
>
>From: Lu Baolu <baolu.lu@linux.intel.com>
>
>This implements the .cache_invalidate_user() callback to support iotlb
>flush for nested domain.
>
>Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>---
> drivers/iommu/intel/nested.c | 116
>+++++++++++++++++++++++++++++++++++
> 1 file changed, 116 insertions(+)
>
>diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>index b5a5563ab32c..c665e2647045 100644
>--- a/drivers/iommu/intel/nested.c
>+++ b/drivers/iommu/intel/nested.c
>@@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct
>iommu_domain *domain)
> kfree(to_dmar_domain(domain));
> }
>
>+static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>+ struct dmar_domain *domain, u64 addr,
>+ unsigned long npages, bool ih)
>+{
>+ u16 did = domain_id_iommu(domain, iommu);
>+ unsigned long flags;
>+
>+ spin_lock_irqsave(&domain->lock, flags);
>+ if (!list_empty(&domain->devices))
>+ qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>+ npages, ih, NULL);
Is it optimal to check if domain attached to iommu before trigger flush?
Or the check is redundant if intel_nested_flush_cache() is the only call site.
Thanks
Zhenzhong
>+ spin_unlock_irqrestore(&domain->lock, flags);
>+}
>+
>+static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>+ unsigned mask, u32 *fault)
>+{
>+ struct device_domain_info *info;
>+ unsigned long flags;
>+ u16 sid, qdep;
>+
>+ spin_lock_irqsave(&domain->lock, flags);
>+ list_for_each_entry(info, &domain->devices, link) {
>+ if (!info->ats_enabled)
>+ continue;
>+ sid = info->bus << 8 | info->devfn;
>+ qdep = info->ats_qdep;
>+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>+ qdep, addr, mask, fault);
>+ quirk_extra_dev_tlb_flush(info, addr, mask,
>+ IOMMU_NO_PASID, qdep);
>+ }
>+ spin_unlock_irqrestore(&domain->lock, flags);
>+}
>+
>+static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>addr,
>+ unsigned long npages, u32 *error)
>+{
>+ struct iommu_domain_info *info;
>+ unsigned long i;
>+ unsigned mask;
>+ u32 fault = 0;
>+
>+ if (npages == U64_MAX)
>+ mask = 64 - VTD_PAGE_SHIFT;
>+ else
>+ mask = ilog2(__roundup_pow_of_two(npages));
>+
>+ xa_for_each(&domain->iommu_array, i, info) {
>+ nested_flush_pasid_iotlb(info->iommu, domain, addr,
>npages, 0);
>+
>+ if (domain->has_iotlb_device)
>+ continue;
>+
>+ nested_flush_dev_iotlb(domain, addr, mask, &fault);
>+ if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>+ break;
>+ }
>+
>+ if (fault & DMA_FSTS_ICE)
>+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>+ if (fault & DMA_FSTS_ITE)
>+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>+}
>+
>+static int intel_nested_cache_invalidate_user(struct iommu_domain
>*domain,
>+ struct iommu_user_data_array
>*array)
>+{
>+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>+ struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>+ u32 processed = 0;
>+ int ret = 0;
>+ u32 index;
>+
>+ if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>+ ret = -EINVAL;
>+ goto out;
>+ }
>+
>+ for (index = 0; index < array->entry_num; index++) {
>+ ret = iommu_copy_struct_from_user_array(&inv_entry,
>array,
>+
> IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>+ index, inv_error);
>+ if (ret)
>+ break;
>+
>+ if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>+ ret = -EOPNOTSUPP;
>+ break;
>+ }
>+
>+ if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>+ ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>+ ret = -EINVAL;
>+ break;
>+ }
>+
>+ inv_entry.inv_error = 0;
>+ intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>+ inv_entry.npages,
>&inv_entry.inv_error);
>+
>+ ret = iommu_respond_struct_to_user_array(array, index,
>+ (void *)&inv_entry,
>+ sizeof(inv_entry));
>+ if (ret)
>+ break;
>+
>+ processed++;
>+ }
>+
>+out:
>+ array->entry_num = processed;
>+ return ret;
>+}
>+
> static const struct iommu_domain_ops intel_nested_domain_ops = {
> .attach_dev = intel_nested_attach_dev,
> .free = intel_nested_domain_free,
>+ .cache_invalidate_user = intel_nested_cache_invalidate_user,
> };
>
> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain
>*parent,
>--
>2.34.1
On 2023/12/27 17:27, Duan, Zhenzhong wrote:
>
>
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain
>>
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>
>> This implements the .cache_invalidate_user() callback to support iotlb
>> flush for nested domain.
>>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>> drivers/iommu/intel/nested.c | 116
>> +++++++++++++++++++++++++++++++++++
>> 1 file changed, 116 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>> index b5a5563ab32c..c665e2647045 100644
>> --- a/drivers/iommu/intel/nested.c
>> +++ b/drivers/iommu/intel/nested.c
>> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct
>> iommu_domain *domain)
>> kfree(to_dmar_domain(domain));
>> }
>>
>> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>> + struct dmar_domain *domain, u64 addr,
>> + unsigned long npages, bool ih)
>> +{
>> + u16 did = domain_id_iommu(domain, iommu);
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&domain->lock, flags);
>> + if (!list_empty(&domain->devices))
>> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>> + npages, ih, NULL);
>
> Is it optimal to check if domain attached to iommu before trigger flush?
> Or the check is redundant if intel_nested_flush_cache() is the only call site.
I think it is possible that userspace issue an invalidation on a hwpt which
does not have any device attached.. Though this is something stupid. So
checking if any device attached before flushing still makes sense.
> Thanks
> Zhenzhong
>
>> + spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>> + unsigned mask, u32 *fault)
>> +{
>> + struct device_domain_info *info;
>> + unsigned long flags;
>> + u16 sid, qdep;
>> +
>> + spin_lock_irqsave(&domain->lock, flags);
>> + list_for_each_entry(info, &domain->devices, link) {
>> + if (!info->ats_enabled)
>> + continue;
>> + sid = info->bus << 8 | info->devfn;
>> + qdep = info->ats_qdep;
>> + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>> + qdep, addr, mask, fault);
>> + quirk_extra_dev_tlb_flush(info, addr, mask,
>> + IOMMU_NO_PASID, qdep);
>> + }
>> + spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>> addr,
>> + unsigned long npages, u32 *error)
>> +{
>> + struct iommu_domain_info *info;
>> + unsigned long i;
>> + unsigned mask;
>> + u32 fault = 0;
>> +
>> + if (npages == U64_MAX)
>> + mask = 64 - VTD_PAGE_SHIFT;
>> + else
>> + mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> + xa_for_each(&domain->iommu_array, i, info) {
>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
>> +
>> + if (domain->has_iotlb_device)
>> + continue;
>> +
>> + nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> + break;
>> + }
>> +
>> + if (fault & DMA_FSTS_ICE)
>> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>> + if (fault & DMA_FSTS_ITE)
>> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>> +}
>> +
>> +static int intel_nested_cache_invalidate_user(struct iommu_domain
>> *domain,
>> + struct iommu_user_data_array
>> *array)
>> +{
>> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> + struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>> + u32 processed = 0;
>> + int ret = 0;
>> + u32 index;
>> +
>> + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + for (index = 0; index < array->entry_num; index++) {
>> + ret = iommu_copy_struct_from_user_array(&inv_entry,
>> array,
>> +
>> IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>> + index, inv_error);
>> + if (ret)
>> + break;
>> +
>> + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>> + ret = -EOPNOTSUPP;
>> + break;
>> + }
>> +
>> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>> + inv_entry.inv_error = 0;
>> + intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>> + inv_entry.npages,
>> &inv_entry.inv_error);
>> +
>> + ret = iommu_respond_struct_to_user_array(array, index,
>> + (void *)&inv_entry,
>> + sizeof(inv_entry));
>> + if (ret)
>> + break;
>> +
>> + processed++;
>> + }
>> +
>> +out:
>> + array->entry_num = processed;
>> + return ret;
>> +}
>> +
>> static const struct iommu_domain_ops intel_nested_domain_ops = {
>> .attach_dev = intel_nested_attach_dev,
>> .free = intel_nested_domain_free,
>> + .cache_invalidate_user = intel_nested_cache_invalidate_user,
>> };
>>
>> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain
>> *parent,
>> --
>> 2.34.1
>
--
Regards,
Yi Liu
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Thursday, December 21, 2023 11:40 PM
>
> +
> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
> addr,
> + unsigned long npages, u32 *error)
> +{
> + struct iommu_domain_info *info;
> + unsigned long i;
> + unsigned mask;
> + u32 fault = 0;
> +
> + if (npages == U64_MAX)
> + mask = 64 - VTD_PAGE_SHIFT;
> + else
> + mask = ilog2(__roundup_pow_of_two(npages));
> +
> + xa_for_each(&domain->iommu_array, i, info) {
> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
> npages, 0);
so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?
> +
> + if (domain->has_iotlb_device)
> + continue;
> +
> + nested_flush_dev_iotlb(domain, addr, mask, &fault);
> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
> + break;
here you may add a note that we don't plan to forward invalidation
queue error (i.e. IQE) to the caller as it's caused only by driver
internal bug.
> +
> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> + ret = -EINVAL;
> + break;
> + }
> +
why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
be not supported by underlying helpers?
On 2023/12/22 14:57, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Thursday, December 21, 2023 11:40 PM
>>
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>> addr,
>> + unsigned long npages, u32 *error)
>> +{
>> + struct iommu_domain_info *info;
>> + unsigned long i;
>> + unsigned mask;
>> + u32 fault = 0;
>> +
>> + if (npages == U64_MAX)
>> + mask = 64 - VTD_PAGE_SHIFT;
>> + else
>> + mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> + xa_for_each(&domain->iommu_array, i, info) {
>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
>
> so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?
yeah... it is. It is named as ih in the driver code. But it appears only
the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th
parameter (ih) may be true.
static int intel_iommu_memory_notifier(struct notifier_block *nb,
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
mhp->nr_pages - 1);
switch (val) {
case MEM_GOING_ONLINE:
if (iommu_domain_identity_map(si_domain,
start_vpfn, last_vpfn)) {
pr_warn("Failed to build identity map for [%lx-%lx]\n",
start_vpfn, last_vpfn);
return NOTIFY_BAD;
}
break;
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
LIST_HEAD(freelist);
domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain,
start_vpfn, mhp->nr_pages,
list_empty(&freelist), 0);
rcu_read_unlock();
put_pages_list(&freelist);
}
break;
}
return NOTIFY_OK;
}
>
>> +
>> + if (domain->has_iotlb_device)
>> + continue;
>> +
>> + nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> + break;
>
> here you may add a note that we don't plan to forward invalidation
> queue error (i.e. IQE) to the caller as it's caused only by driver
> internal bug.
yes.
>
>> +
>> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>
> why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
> be not supported by underlying helpers?
no such limitation by underlying helpers. But in such case, the
addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit
strange. But I'm fine to relax the check since the underlying helper
only checks npages when determining paid-selective or not.
--
Regards,
Yi Liu
On 2023/12/26 12:51, Yi Liu wrote:
> On 2023/12/22 14:57, Tian, Kevin wrote:
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Sent: Thursday, December 21, 2023 11:40 PM
>>>
>>> +
>>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64
>>> addr,
>>> + unsigned long npages, u32 *error)
>>> +{
>>> + struct iommu_domain_info *info;
>>> + unsigned long i;
>>> + unsigned mask;
>>> + u32 fault = 0;
>>> +
>>> + if (npages == U64_MAX)
>>> + mask = 64 - VTD_PAGE_SHIFT;
>>> + else
>>> + mask = ilog2(__roundup_pow_of_two(npages));
>>> +
>>> + xa_for_each(&domain->iommu_array, i, info) {
>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>> npages, 0);
>>
>> so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored?
>
> yeah... it is. It is named as ih in the driver code. But it appears only
> the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th
> parameter (ih) may be true.
>
> static int intel_iommu_memory_notifier(struct notifier_block *nb,
> unsigned long val, void *v)
> {
> struct memory_notify *mhp = v;
> unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
> mhp->nr_pages - 1);
>
> switch (val) {
> case MEM_GOING_ONLINE:
> if (iommu_domain_identity_map(si_domain,
> start_vpfn, last_vpfn)) {
> pr_warn("Failed to build identity map for [%lx-%lx]\n",
> start_vpfn, last_vpfn);
> return NOTIFY_BAD;
> }
> break;
>
> case MEM_OFFLINE:
> case MEM_CANCEL_ONLINE:
> {
> struct dmar_drhd_unit *drhd;
> struct intel_iommu *iommu;
> LIST_HEAD(freelist);
>
> domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
>
> rcu_read_lock();
> for_each_active_iommu(iommu, drhd)
> iommu_flush_iotlb_psi(iommu, si_domain,
> start_vpfn, mhp->nr_pages,
> list_empty(&freelist), 0);
> rcu_read_unlock();
> put_pages_list(&freelist);
> }
> break;
> }
>
> return NOTIFY_OK;
> }
I passed this flag to the intel_nested_flush_cache() now as the
helper accepts an ih parameter.
--
Regards,
Yi Liu
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Tuesday, December 26, 2023 12:52 PM
> >> +
> >> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> >> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> >> + ret = -EINVAL;
> >> + break;
> >> + }
> >> +
> >
> > why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to
> > be not supported by underlying helpers?
>
> no such limitation by underlying helpers. But in such case, the
> addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit
> strange. But I'm fine to relax the check since the underlying helper
> only checks npages when determining paid-selective or not.
>
I overlooked npages as end. let's keep the check.
On 12/21/2023 11:39 PM, Yi Liu wrote:
> From: Lu Baolu <baolu.lu@linux.intel.com>
>
> This implements the .cache_invalidate_user() callback to support iotlb
> flush for nested domain.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
> drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
> 1 file changed, 116 insertions(+)
>
> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
> index b5a5563ab32c..c665e2647045 100644
> --- a/drivers/iommu/intel/nested.c
> +++ b/drivers/iommu/intel/nested.c
> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
> kfree(to_dmar_domain(domain));
> }
>
> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
> + struct dmar_domain *domain, u64 addr,
> + unsigned long npages, bool ih)
> +{
> + u16 did = domain_id_iommu(domain, iommu);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&domain->lock, flags);
> + if (!list_empty(&domain->devices))
> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
> + npages, ih, NULL);
> + spin_unlock_irqrestore(&domain->lock, flags);
> +}
> +
> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
> + unsigned mask, u32 *fault)
> +{
> + struct device_domain_info *info;
> + unsigned long flags;
> + u16 sid, qdep;
> +
> + spin_lock_irqsave(&domain->lock, flags);
> + list_for_each_entry(info, &domain->devices, link) {
> + if (!info->ats_enabled)
> + continue;
> + sid = info->bus << 8 | info->devfn;
> + qdep = info->ats_qdep;
> + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
> + qdep, addr, mask, fault);
> + quirk_extra_dev_tlb_flush(info, addr, mask,
> + IOMMU_NO_PASID, qdep);
> + }
> + spin_unlock_irqrestore(&domain->lock, flags);
> +}
> +
> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
> + unsigned long npages, u32 *error)
> +{
> + struct iommu_domain_info *info;
> + unsigned long i;
> + unsigned mask;
> + u32 fault = 0;
> +
> + if (npages == U64_MAX)
> + mask = 64 - VTD_PAGE_SHIFT;
> + else
> + mask = ilog2(__roundup_pow_of_two(npages));
> +
> + xa_for_each(&domain->iommu_array, i, info) {
> + nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
> +
> + if (domain->has_iotlb_device)
> + continue;
Shouldn't this be if (!domain->has_iotlb_device)?
> +
> + nested_flush_dev_iotlb(domain, addr, mask, &fault);
> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
> + break;
> + }
> +
> + if (fault & DMA_FSTS_ICE)
> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
> + if (fault & DMA_FSTS_ITE)
> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
> +}
> +
> +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
> + struct iommu_user_data_array *array)
> +{
> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> + struct iommu_hwpt_vtd_s1_invalidate inv_entry;
> + u32 processed = 0;
> + int ret = 0;
> + u32 index;
> +
> + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + for (index = 0; index < array->entry_num; index++) {
> + ret = iommu_copy_struct_from_user_array(&inv_entry, array,
> + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
> + index, inv_error);
> + if (ret)
> + break;
> +
> + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
> + ret = -EOPNOTSUPP;
> + break;
> + }
> +
> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
> + ret = -EINVAL;
> + break;
> + }
> +
> + inv_entry.inv_error = 0;
> + intel_nested_flush_cache(dmar_domain, inv_entry.addr,
> + inv_entry.npages, &inv_entry.inv_error);
> +
> + ret = iommu_respond_struct_to_user_array(array, index,
> + (void *)&inv_entry,
> + sizeof(inv_entry));
> + if (ret)
> + break;
> +
> + processed++;
> + }
> +
> +out:
> + array->entry_num = processed;
> + return ret;
> +}
> +
> static const struct iommu_domain_ops intel_nested_domain_ops = {
> .attach_dev = intel_nested_attach_dev,
> .free = intel_nested_domain_free,
> + .cache_invalidate_user = intel_nested_cache_invalidate_user,
> };
>
> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
> On Dec 22, 2023, at 11:56, Yang, Weijiang <weijiang.yang@intel.com> wrote:
>
> On 12/21/2023 11:39 PM, Yi Liu wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>
>> This implements the .cache_invalidate_user() callback to support iotlb
>> flush for nested domain.
>>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> Co-developed-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>> drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++
>> 1 file changed, 116 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
>> index b5a5563ab32c..c665e2647045 100644
>> --- a/drivers/iommu/intel/nested.c
>> +++ b/drivers/iommu/intel/nested.c
>> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
>> kfree(to_dmar_domain(domain));
>> }
>> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu,
>> + struct dmar_domain *domain, u64 addr,
>> + unsigned long npages, bool ih)
>> +{
>> + u16 did = domain_id_iommu(domain, iommu);
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&domain->lock, flags);
>> + if (!list_empty(&domain->devices))
>> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>> + npages, ih, NULL);
>> + spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
>> + unsigned mask, u32 *fault)
>> +{
>> + struct device_domain_info *info;
>> + unsigned long flags;
>> + u16 sid, qdep;
>> +
>> + spin_lock_irqsave(&domain->lock, flags);
>> + list_for_each_entry(info, &domain->devices, link) {
>> + if (!info->ats_enabled)
>> + continue;
>> + sid = info->bus << 8 | info->devfn;
>> + qdep = info->ats_qdep;
>> + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
>> + qdep, addr, mask, fault);
>> + quirk_extra_dev_tlb_flush(info, addr, mask,
>> + IOMMU_NO_PASID, qdep);
>> + }
>> + spin_unlock_irqrestore(&domain->lock, flags);
>> +}
>> +
>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
>> + unsigned long npages, u32 *error)
>> +{
>> + struct iommu_domain_info *info;
>> + unsigned long i;
>> + unsigned mask;
>> + u32 fault = 0;
>> +
>> + if (npages == U64_MAX)
>> + mask = 64 - VTD_PAGE_SHIFT;
>> + else
>> + mask = ilog2(__roundup_pow_of_two(npages));
>> +
>> + xa_for_each(&domain->iommu_array, i, info) {
>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0);
>> +
>> + if (domain->has_iotlb_device)
>> + continue;
>
> Shouldn't this be if (!domain->has_iotlb_device)?
oops, yes it is.
>> +
>> + nested_flush_dev_iotlb(domain, addr, mask, &fault);
>> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE))
>> + break;
>> + }
>> +
>> + if (fault & DMA_FSTS_ICE)
>> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE;
>> + if (fault & DMA_FSTS_ITE)
>> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE;
>> +}
>> +
>> +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
>> + struct iommu_user_data_array *array)
>> +{
>> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> + struct iommu_hwpt_vtd_s1_invalidate inv_entry;
>> + u32 processed = 0;
>> + int ret = 0;
>> + u32 index;
>> +
>> + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + for (index = 0; index < array->entry_num; index++) {
>> + ret = iommu_copy_struct_from_user_array(&inv_entry, array,
>> + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
>> + index, inv_error);
>> + if (ret)
>> + break;
>> +
>> + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) {
>> + ret = -EOPNOTSUPP;
>> + break;
>> + }
>> +
>> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) ||
>> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>> + inv_entry.inv_error = 0;
>> + intel_nested_flush_cache(dmar_domain, inv_entry.addr,
>> + inv_entry.npages, &inv_entry.inv_error);
>> +
>> + ret = iommu_respond_struct_to_user_array(array, index,
>> + (void *)&inv_entry,
>> + sizeof(inv_entry));
>> + if (ret)
>> + break;
>> +
>> + processed++;
>> + }
>> +
>> +out:
>> + array->entry_num = processed;
>> + return ret;
>> +}
>> +
>> static const struct iommu_domain_ops intel_nested_domain_ops = {
>> .attach_dev = intel_nested_attach_dev,
>> .free = intel_nested_domain_free,
>> + .cache_invalidate_user = intel_nested_cache_invalidate_user,
>> };
>> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
>
> From: Yang, Weijiang <weijiang.yang@intel.com>
> Sent: Friday, December 22, 2023 11:56 AM
> > +
> > + xa_for_each(&domain->iommu_array, i, info) {
> > + nested_flush_pasid_iotlb(info->iommu, domain, addr,
> npages, 0);
> > +
> > + if (domain->has_iotlb_device)
> > + continue;
>
> Shouldn't this be if (!domain->has_iotlb_device)?
yes that is wrong.
actually it's weird to put domain check in a loop of domain->iommu_array.
that check along with devtlb flush should be done out of that loop.
> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
>
>
>>
>> From: Yang, Weijiang <weijiang.yang@intel.com>
>> Sent: Friday, December 22, 2023 11:56 AM
>>> +
>>> + xa_for_each(&domain->iommu_array, i, info) {
>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>> npages, 0);
>>> +
>>> + if (domain->has_iotlb_device)
>>> + continue;
>>
>> Shouldn't this be if (!domain->has_iotlb_device)?
>
> yes that is wrong.
>
> actually it's weird to put domain check in a loop of domain->iommu_array.
>
> that check along with devtlb flush should be done out of that loop.
Maybe adding a bool, set it out of the loop, check the bool in the loop.
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Friday, December 22, 2023 3:02 PM
>
>
> > On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
> >
> >
> >>
> >> From: Yang, Weijiang <weijiang.yang@intel.com>
> >> Sent: Friday, December 22, 2023 11:56 AM
> >>> +
> >>> + xa_for_each(&domain->iommu_array, i, info) {
> >>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
> >> npages, 0);
> >>> +
> >>> + if (domain->has_iotlb_device)
> >>> + continue;
> >>
> >> Shouldn't this be if (!domain->has_iotlb_device)?
> >
> > yes that is wrong.
> >
> > actually it's weird to put domain check in a loop of domain->iommu_array.
> >
> > that check along with devtlb flush should be done out of that loop.
>
> Maybe adding a bool, set it out of the loop, check the bool in the loop.
the point is that dev iotlb doesn't rely on info->iommu:
nested_flush_dev_iotlb(domain, addr, mask, &fault);
then why do it in the loop of info->iommu?
> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote:
>
>
>>
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Friday, December 22, 2023 3:02 PM
>>
>>
>>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
>>>
>>>
>>>>
>>>> From: Yang, Weijiang <weijiang.yang@intel.com>
>>>> Sent: Friday, December 22, 2023 11:56 AM
>>>>> +
>>>>> + xa_for_each(&domain->iommu_array, i, info) {
>>>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>>> npages, 0);
>>>>> +
>>>>> + if (domain->has_iotlb_device)
>>>>> + continue;
>>>>
>>>> Shouldn't this be if (!domain->has_iotlb_device)?
>>>
>>> yes that is wrong.
>>>
>>> actually it's weird to put domain check in a loop of domain->iommu_array.
>>>
>>> that check along with devtlb flush should be done out of that loop.
>>
>> Maybe adding a bool, set it out of the loop, check the bool in the loop.
>
> the point is that dev iotlb doesn't rely on info->iommu:
>
> nested_flush_dev_iotlb(domain, addr, mask, &fault);
>
> then why do it in the loop of info->iommu?
yes. It should have another device loop instead.
On 2023/12/22 19:59, Liu, Yi L wrote:
>
>> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote:
>>
>>
>>>
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Sent: Friday, December 22, 2023 3:02 PM
>>>
>>>
>>>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote:
>>>>
>>>>
>>>>>
>>>>> From: Yang, Weijiang <weijiang.yang@intel.com>
>>>>> Sent: Friday, December 22, 2023 11:56 AM
>>>>>> +
>>>>>> + xa_for_each(&domain->iommu_array, i, info) {
>>>>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr,
>>>>> npages, 0);
>>>>>> +
>>>>>> + if (domain->has_iotlb_device)
>>>>>> + continue;
>>>>>
>>>>> Shouldn't this be if (!domain->has_iotlb_device)?
>>>>
>>>> yes that is wrong.
>>>>
>>>> actually it's weird to put domain check in a loop of domain->iommu_array.
>>>>
>>>> that check along with devtlb flush should be done out of that loop.
>>>
>>> Maybe adding a bool, set it out of the loop, check the bool in the loop.
>>
>> the point is that dev iotlb doesn't rely on info->iommu:
>>
>> nested_flush_dev_iotlb(domain, addr, mask, &fault);
>>
>> then why do it in the loop of info->iommu?
>
> yes. It should have another device loop instead.
let me move the device tlb related code out of the info->iommu loop.
--
Regards,
Yi Liu
© 2016 - 2026 Red Hat, Inc.