>-----Original Message-----
>From: Baolu Lu <baolu.lu@linux.intel.com>
>Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>
>On 12/16/24 11:38, Zhenzhong Duan wrote:
>> Cache tag invalidation requests for a domain are accumulated until a
>> different iommu unit is found when traversing the cache_tags linked list.
>> But cache tags of same iommu unit can be distributed in the linked list,
>> this make batched flush less efficient. E.g., one device backed by iommu0
>> is attached to a domain in between two devices attaching backed by iommu1.
>>
>> Group cache tags together for same iommu unit in cache_tag_assign() to
>> maximize the performance of batched flush.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> drivers/iommu/intel/cache.c | 11 ++++++++++-
>> 1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>> index e5b89f728ad3..726052a841e0 100644
>> --- a/drivers/iommu/intel/cache.c
>> +++ b/drivers/iommu/intel/cache.c
>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain *domain,
>u16 did,
>> struct intel_iommu *iommu = info->iommu;
>> struct cache_tag *tag, *temp;
>> unsigned long flags;
>> + struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>> + struct cache_tag, node);
>
>Is this valid for a list head?
Yes, it's not valid for list head but it's intentional, just want to
avoid unnecessary temp2 check. If I don't do that way, patch will be:
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
unsigned long flags;
+ struct cache_tag *temp2 = NULL;
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
if (!tag)
@@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
trace_cache_tag_assign(temp);
return 0;
}
+ if (temp->iommu == iommu)
+ temp2 = temp;
}
- list_add_tail(&tag->node, &domain->cache_tags);
+ /*
+ * Link cache tags of same iommu unit together, so consponding
+ * flush ops can be batched for iommu unit.
+ */
+ if (temp2)
+ list_add(&tag->node, &temp2->node);
+ else
+ list_add_tail((&tag->node, &domain->cache_tags);
+
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);
Thanks
Zhenzhong
>
>>
>> tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>> if (!tag)
>> @@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>> trace_cache_tag_assign(temp);
>> return 0;
>> }
>> + if (temp->iommu == iommu)
>> + temp2 = temp;
>> }
>> - list_add_tail(&tag->node, &domain->cache_tags);
>> + /*
>> + * Link cache tags of same iommu unit together, so consponding
>> + * flush ops can be batched for iommu unit.
>> + */
>> + list_add(&tag->node, &temp2->node);
>> +
>> spin_unlock_irqrestore(&domain->cache_lock, flags);
>> trace_cache_tag_assign(tag);
>>
On 2024/12/18 13:22, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Baolu Lu<baolu.lu@linux.intel.com>
>> Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>>
>> On 12/16/24 11:38, Zhenzhong Duan wrote:
>>> Cache tag invalidation requests for a domain are accumulated until a
>>> different iommu unit is found when traversing the cache_tags linked list.
>>> But cache tags of same iommu unit can be distributed in the linked list,
>>> this make batched flush less efficient. E.g., one device backed by iommu0
>>> is attached to a domain in between two devices attaching backed by iommu1.
>>>
>>> Group cache tags together for same iommu unit in cache_tag_assign() to
>>> maximize the performance of batched flush.
>>>
>>> Signed-off-by: Zhenzhong Duan<zhenzhong.duan@intel.com>
>>> ---
>>> drivers/iommu/intel/cache.c | 11 ++++++++++-
>>> 1 file changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>>> index e5b89f728ad3..726052a841e0 100644
>>> --- a/drivers/iommu/intel/cache.c
>>> +++ b/drivers/iommu/intel/cache.c
>>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain *domain,
>> u16 did,
>>> struct intel_iommu *iommu = info->iommu;
>>> struct cache_tag *tag, *temp;
>>> unsigned long flags;
>>> + struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>>> + struct cache_tag, node);
>> Is this valid for a list head?
> Yes, it's not valid for list head but it's intentional, just want to
> avoid unnecessary temp2 check. If I don't do that way, patch will be:
>
> --- a/drivers/iommu/intel/cache.c
> +++ b/drivers/iommu/intel/cache.c
> @@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
> struct intel_iommu *iommu = info->iommu;
> struct cache_tag *tag, *temp;
> unsigned long flags;
> + struct cache_tag *temp2 = NULL;
>
> tag = kzalloc(sizeof(*tag), GFP_KERNEL);
> if (!tag)
> @@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
> trace_cache_tag_assign(temp);
> return 0;
> }
> + if (temp->iommu == iommu)
> + temp2 = temp;
> }
> - list_add_tail(&tag->node, &domain->cache_tags);
> + /*
> + * Link cache tags of same iommu unit together, so consponding
> + * flush ops can be batched for iommu unit.
> + */
> + if (temp2)
> + list_add(&tag->node, &temp2->node);
> + else
> + list_add_tail((&tag->node, &domain->cache_tags);
> +
> spin_unlock_irqrestore(&domain->cache_lock, flags);
> trace_cache_tag_assign(tag);
Perhaps we can make it like this?
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 09694cca8752..cf0cca94d165 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain
*domain, u16 did,
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
+ struct list_head *prev;
unsigned long flags;
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
@@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain
*domain, u16 did,
tag->dev = iommu->iommu.dev;
spin_lock_irqsave(&domain->cache_lock, flags);
+ prev = &domain->cache_tags;
list_for_each_entry(temp, &domain->cache_tags, node) {
if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
temp->users++;
@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain
*domain, u16 did,
trace_cache_tag_assign(temp);
return 0;
}
+ if (temp->iommu == iommu)
+ prev = &temp->node;
}
- list_add_tail(&tag->node, &domain->cache_tags);
+ /*
+ * Link cache tags of same iommu unit together, so consponding
+ * flush ops can be batched for iommu unit.
+ */
+ list_add(&tag->node, prev);
+
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);
Thanks,
baolu
>-----Original Message-----
>From: Baolu Lu <baolu.lu@linux.intel.com>
>Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>
>On 2024/12/18 13:22, Duan, Zhenzhong wrote:
>>> -----Original Message-----
>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>> Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit
>together
>>>
>>> On 12/16/24 11:38, Zhenzhong Duan wrote:
>>>> Cache tag invalidation requests for a domain are accumulated until a
>>>> different iommu unit is found when traversing the cache_tags linked list.
>>>> But cache tags of same iommu unit can be distributed in the linked list,
>>>> this make batched flush less efficient. E.g., one device backed by iommu0
>>>> is attached to a domain in between two devices attaching backed by iommu1.
>>>>
>>>> Group cache tags together for same iommu unit in cache_tag_assign() to
>>>> maximize the performance of batched flush.
>>>>
>>>> Signed-off-by: Zhenzhong Duan<zhenzhong.duan@intel.com>
>>>> ---
>>>> drivers/iommu/intel/cache.c | 11 ++++++++++-
>>>> 1 file changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>>>> index e5b89f728ad3..726052a841e0 100644
>>>> --- a/drivers/iommu/intel/cache.c
>>>> +++ b/drivers/iommu/intel/cache.c
>>>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain
>*domain,
>>> u16 did,
>>>> struct intel_iommu *iommu = info->iommu;
>>>> struct cache_tag *tag, *temp;
>>>> unsigned long flags;
>>>> + struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>>>> + struct cache_tag, node);
>>> Is this valid for a list head?
>> Yes, it's not valid for list head but it's intentional, just want to
>> avoid unnecessary temp2 check. If I don't do that way, patch will be:
>>
>> --- a/drivers/iommu/intel/cache.c
>> +++ b/drivers/iommu/intel/cache.c
>> @@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain,
>u16 did,
>> struct intel_iommu *iommu = info->iommu;
>> struct cache_tag *tag, *temp;
>> unsigned long flags;
>> + struct cache_tag *temp2 = NULL;
>>
>> tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>> if (!tag)
>> @@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>> trace_cache_tag_assign(temp);
>> return 0;
>> }
>> + if (temp->iommu == iommu)
>> + temp2 = temp;
>> }
>> - list_add_tail(&tag->node, &domain->cache_tags);
>> + /*
>> + * Link cache tags of same iommu unit together, so consponding
>> + * flush ops can be batched for iommu unit.
>> + */
>> + if (temp2)
>> + list_add(&tag->node, &temp2->node);
>> + else
>> + list_add_tail((&tag->node, &domain->cache_tags);
>> +
>> spin_unlock_irqrestore(&domain->cache_lock, flags);
>> trace_cache_tag_assign(tag);
>
>Perhaps we can make it like this?
Good suggestion! Will do.
Thanks
Zhenzhong
>
>diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>index 09694cca8752..cf0cca94d165 100644
>--- a/drivers/iommu/intel/cache.c
>+++ b/drivers/iommu/intel/cache.c
>@@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
> struct device_domain_info *info = dev_iommu_priv_get(dev);
> struct intel_iommu *iommu = info->iommu;
> struct cache_tag *tag, *temp;
>+ struct list_head *prev;
> unsigned long flags;
>
> tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>@@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
> tag->dev = iommu->iommu.dev;
>
> spin_lock_irqsave(&domain->cache_lock, flags);
>+ prev = &domain->cache_tags;
> list_for_each_entry(temp, &domain->cache_tags, node) {
> if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
> temp->users++;
>@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
> trace_cache_tag_assign(temp);
> return 0;
> }
>+ if (temp->iommu == iommu)
>+ prev = &temp->node;
> }
>- list_add_tail(&tag->node, &domain->cache_tags);
>+ /*
>+ * Link cache tags of same iommu unit together, so consponding
>+ * flush ops can be batched for iommu unit.
>+ */
>+ list_add(&tag->node, prev);
>+
> spin_unlock_irqrestore(&domain->cache_lock, flags);
> trace_cache_tag_assign(tag);
>
>Thanks,
>baolu
© 2016 - 2026 Red Hat, Inc.