RE: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together

Duan, Zhenzhong posted 1 patch 1 year, 1 month ago
RE: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
Posted by Duan, Zhenzhong 1 year, 1 month ago


>-----Original Message-----
>From: Baolu Lu <baolu.lu@linux.intel.com>
>Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>
>On 12/16/24 11:38, Zhenzhong Duan wrote:
>> Cache tag invalidation requests for a domain are accumulated until a
>> different iommu unit is found when traversing the cache_tags linked list.
>> But cache tags of same iommu unit can be distributed in the linked list,
>> this make batched flush less efficient. E.g., one device backed by iommu0
>> is attached to a domain in between two devices attaching backed by iommu1.
>>
>> Group cache tags together for same iommu unit in cache_tag_assign() to
>> maximize the performance of batched flush.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   drivers/iommu/intel/cache.c | 11 ++++++++++-
>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>> index e5b89f728ad3..726052a841e0 100644
>> --- a/drivers/iommu/intel/cache.c
>> +++ b/drivers/iommu/intel/cache.c
>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain *domain,
>u16 did,
>>   	struct intel_iommu *iommu = info->iommu;
>>   	struct cache_tag *tag, *temp;
>>   	unsigned long flags;
>> +	struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>> +					     struct cache_tag, node);
>
>Is this valid for a list head?

Yes, it's not valid for list head but it's intentional, just want to
avoid unnecessary temp2 check. If I don't do that way, patch will be:

--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
        struct intel_iommu *iommu = info->iommu;
        struct cache_tag *tag, *temp;
        unsigned long flags;
+       struct cache_tag *temp2 = NULL;

        tag = kzalloc(sizeof(*tag), GFP_KERNEL);
        if (!tag)
@@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
                        trace_cache_tag_assign(temp);
                        return 0;
                }
+               if (temp->iommu == iommu)
+                       temp2 = temp;
        }
-       list_add_tail(&tag->node, &domain->cache_tags);
+       /*
+        * Link cache tags of same iommu unit together, so consponding
+        * flush ops can be batched for iommu unit.
+        */
+       if (temp2)
+               list_add(&tag->node, &temp2->node);
+       else
+               list_add_tail((&tag->node, &domain->cache_tags);
+
        spin_unlock_irqrestore(&domain->cache_lock, flags);
        trace_cache_tag_assign(tag);

Thanks
Zhenzhong

>
>>
>>   	tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>>   	if (!tag)
>> @@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>>   			trace_cache_tag_assign(temp);
>>   			return 0;
>>   		}
>> +		if (temp->iommu == iommu)
>> +			temp2 = temp;
>>   	}
>> -	list_add_tail(&tag->node, &domain->cache_tags);
>> +	/*
>> +	 * Link cache tags of same iommu unit together, so consponding
>> +	 * flush ops can be batched for iommu unit.
>> +	 */
>> +	list_add(&tag->node, &temp2->node);
>> +
>>   	spin_unlock_irqrestore(&domain->cache_lock, flags);
>>   	trace_cache_tag_assign(tag);
>>

Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
Posted by Baolu Lu 1 year, 1 month ago
On 2024/12/18 13:22, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Baolu Lu<baolu.lu@linux.intel.com>
>> Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>>
>> On 12/16/24 11:38, Zhenzhong Duan wrote:
>>> Cache tag invalidation requests for a domain are accumulated until a
>>> different iommu unit is found when traversing the cache_tags linked list.
>>> But cache tags of same iommu unit can be distributed in the linked list,
>>> this make batched flush less efficient. E.g., one device backed by iommu0
>>> is attached to a domain in between two devices attaching backed by iommu1.
>>>
>>> Group cache tags together for same iommu unit in cache_tag_assign() to
>>> maximize the performance of batched flush.
>>>
>>> Signed-off-by: Zhenzhong Duan<zhenzhong.duan@intel.com>
>>> ---
>>>    drivers/iommu/intel/cache.c | 11 ++++++++++-
>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>>> index e5b89f728ad3..726052a841e0 100644
>>> --- a/drivers/iommu/intel/cache.c
>>> +++ b/drivers/iommu/intel/cache.c
>>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain *domain,
>> u16 did,
>>>    	struct intel_iommu *iommu = info->iommu;
>>>    	struct cache_tag *tag, *temp;
>>>    	unsigned long flags;
>>> +	struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>>> +					     struct cache_tag, node);
>> Is this valid for a list head?
> Yes, it's not valid for list head but it's intentional, just want to
> avoid unnecessary temp2 check. If I don't do that way, patch will be:
> 
> --- a/drivers/iommu/intel/cache.c
> +++ b/drivers/iommu/intel/cache.c
> @@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
>          struct intel_iommu *iommu = info->iommu;
>          struct cache_tag *tag, *temp;
>          unsigned long flags;
> +       struct cache_tag *temp2 = NULL;
> 
>          tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>          if (!tag)
> @@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
>                          trace_cache_tag_assign(temp);
>                          return 0;
>                  }
> +               if (temp->iommu == iommu)
> +                       temp2 = temp;
>          }
> -       list_add_tail(&tag->node, &domain->cache_tags);
> +       /*
> +        * Link cache tags of same iommu unit together, so consponding
> +        * flush ops can be batched for iommu unit.
> +        */
> +       if (temp2)
> +               list_add(&tag->node, &temp2->node);
> +       else
> +               list_add_tail((&tag->node, &domain->cache_tags);
> +
>          spin_unlock_irqrestore(&domain->cache_lock, flags);
>          trace_cache_tag_assign(tag);

Perhaps we can make it like this?

diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 09694cca8752..cf0cca94d165 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain 
*domain, u16 did,
         struct device_domain_info *info = dev_iommu_priv_get(dev);
         struct intel_iommu *iommu = info->iommu;
         struct cache_tag *tag, *temp;
+       struct list_head *prev;
         unsigned long flags;

         tag = kzalloc(sizeof(*tag), GFP_KERNEL);
@@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain 
*domain, u16 did,
                 tag->dev = iommu->iommu.dev;

         spin_lock_irqsave(&domain->cache_lock, flags);
+       prev = &domain->cache_tags;
         list_for_each_entry(temp, &domain->cache_tags, node) {
                 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
                         temp->users++;
@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain 
*domain, u16 did,
                         trace_cache_tag_assign(temp);
                         return 0;
                 }
+               if (temp->iommu == iommu)
+                       prev = &temp->node;
         }
-       list_add_tail(&tag->node, &domain->cache_tags);
+       /*
+        * Link cache tags of same iommu unit together, so consponding
+        * flush ops can be batched for iommu unit.
+        */
+       list_add(&tag->node, prev);
+
         spin_unlock_irqrestore(&domain->cache_lock, flags);
         trace_cache_tag_assign(tag);

Thanks,
baolu
RE: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
Posted by Duan, Zhenzhong 1 year, 1 month ago

>-----Original Message-----
>From: Baolu Lu <baolu.lu@linux.intel.com>
>Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit together
>
>On 2024/12/18 13:22, Duan, Zhenzhong wrote:
>>> -----Original Message-----
>>> From: Baolu Lu<baolu.lu@linux.intel.com>
>>> Subject: Re: [PATCH] iommu/vt-d: Link cache tags of same iommu unit
>together
>>>
>>> On 12/16/24 11:38, Zhenzhong Duan wrote:
>>>> Cache tag invalidation requests for a domain are accumulated until a
>>>> different iommu unit is found when traversing the cache_tags linked list.
>>>> But cache tags of same iommu unit can be distributed in the linked list,
>>>> this make batched flush less efficient. E.g., one device backed by iommu0
>>>> is attached to a domain in between two devices attaching backed by iommu1.
>>>>
>>>> Group cache tags together for same iommu unit in cache_tag_assign() to
>>>> maximize the performance of batched flush.
>>>>
>>>> Signed-off-by: Zhenzhong Duan<zhenzhong.duan@intel.com>
>>>> ---
>>>>    drivers/iommu/intel/cache.c | 11 ++++++++++-
>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>>>> index e5b89f728ad3..726052a841e0 100644
>>>> --- a/drivers/iommu/intel/cache.c
>>>> +++ b/drivers/iommu/intel/cache.c
>>>> @@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain
>*domain,
>>> u16 did,
>>>>    	struct intel_iommu *iommu = info->iommu;
>>>>    	struct cache_tag *tag, *temp;
>>>>    	unsigned long flags;
>>>> +	struct cache_tag *temp2 = list_entry(&domain->cache_tags,
>>>> +					     struct cache_tag, node);
>>> Is this valid for a list head?
>> Yes, it's not valid for list head but it's intentional, just want to
>> avoid unnecessary temp2 check. If I don't do that way, patch will be:
>>
>> --- a/drivers/iommu/intel/cache.c
>> +++ b/drivers/iommu/intel/cache.c
>> @@ -48,6 +48,7 @@ static int cache_tag_assign(struct dmar_domain *domain,
>u16 did,
>>          struct intel_iommu *iommu = info->iommu;
>>          struct cache_tag *tag, *temp;
>>          unsigned long flags;
>> +       struct cache_tag *temp2 = NULL;
>>
>>          tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>>          if (!tag)
>> @@ -73,8 +74,18 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>>                          trace_cache_tag_assign(temp);
>>                          return 0;
>>                  }
>> +               if (temp->iommu == iommu)
>> +                       temp2 = temp;
>>          }
>> -       list_add_tail(&tag->node, &domain->cache_tags);
>> +       /*
>> +        * Link cache tags of same iommu unit together, so consponding
>> +        * flush ops can be batched for iommu unit.
>> +        */
>> +       if (temp2)
>> +               list_add(&tag->node, &temp2->node);
>> +       else
>> +               list_add_tail((&tag->node, &domain->cache_tags);
>> +
>>          spin_unlock_irqrestore(&domain->cache_lock, flags);
>>          trace_cache_tag_assign(tag);
>
>Perhaps we can make it like this?

Good suggestion! Will do.

Thanks
Zhenzhong

>
>diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>index 09694cca8752..cf0cca94d165 100644
>--- a/drivers/iommu/intel/cache.c
>+++ b/drivers/iommu/intel/cache.c
>@@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>         struct device_domain_info *info = dev_iommu_priv_get(dev);
>         struct intel_iommu *iommu = info->iommu;
>         struct cache_tag *tag, *temp;
>+       struct list_head *prev;
>         unsigned long flags;
>
>         tag = kzalloc(sizeof(*tag), GFP_KERNEL);
>@@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>                 tag->dev = iommu->iommu.dev;
>
>         spin_lock_irqsave(&domain->cache_lock, flags);
>+       prev = &domain->cache_tags;
>         list_for_each_entry(temp, &domain->cache_tags, node) {
>                 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
>                         temp->users++;
>@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain
>*domain, u16 did,
>                         trace_cache_tag_assign(temp);
>                         return 0;
>                 }
>+               if (temp->iommu == iommu)
>+                       prev = &temp->node;
>         }
>-       list_add_tail(&tag->node, &domain->cache_tags);
>+       /*
>+        * Link cache tags of same iommu unit together, so consponding
>+        * flush ops can be batched for iommu unit.
>+        */
>+       list_add(&tag->node, prev);
>+
>         spin_unlock_irqrestore(&domain->cache_lock, flags);
>         trace_cache_tag_assign(tag);
>
>Thanks,
>baolu