[PATCH] iommu/vt-d: Introduce a rb_tree for looking up device

Huang Jiaqing posted 1 patch 2 years, 3 months ago
drivers/iommu/intel/iommu.c | 68 +++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.h |  8 +++++
drivers/iommu/intel/svm.c   | 13 +++----
3 files changed, 81 insertions(+), 8 deletions(-)
[PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by Huang Jiaqing 2 years, 3 months ago
The existing IO page fault handler locates the PCI device by calling
pci_get_domain_bus_and_slot(), which searches the list of all PCI
devices until the desired PCI device is found. This is inefficient
because the algorithm efficiency of searching a list is O(n). In the
critical path of handling an IO page fault, this can cause a significant
performance bottleneck.

To improve the performance of the IO page fault handler, replace
pci_get_domain_bus_and_slot() with a local red-black tree. A red-black
tree is a self-balancing binary search tree, which means that the
average time complexity of searching a red-black tree is O(log(n)). This
is significantly faster than O(n), so it can significantly improve the
performance of the IO page fault handler.

In addition, we can only insert the affected devices (those that have IO
page fault enabled) into the red-black tree. This can further improve
the performance of the IO page fault handler.

Signed-off-by: Huang Jiaqing <jiaqing.huang@intel.com>
---
 drivers/iommu/intel/iommu.c | 68 +++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/iommu.h |  8 +++++
 drivers/iommu/intel/svm.c   | 13 +++----
 3 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5c8c5cdc36cf..fcebb7493d99 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -235,6 +235,65 @@ clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
 	clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
 }
 
+
+struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+	struct device_domain_info *data = NULL;
+	struct rb_node *node;
+
+	down_read(&iommu->iopf_device_sem);
+
+	node = iommu->iopf_device_rbtree.rb_node;
+	while (node) {
+		data = container_of(node, struct device_domain_info, node);
+		s16 result = RB_NODE_CMP(bus, devfn, data->bus, data->devfn);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			break;
+	}
+	up_read(&iommu->iopf_device_sem);
+
+	return node ? data : NULL;
+}
+
+static int device_rbtree_insert(struct intel_iommu *iommu, struct device_domain_info *data)
+{
+	struct rb_node **new, *parent = NULL;
+
+	down_write(&iommu->iopf_device_sem);
+
+	new = &(iommu->iopf_device_rbtree.rb_node);
+	while (*new) {
+		struct device_domain_info *this = container_of(*new, struct device_domain_info, node);
+		s16 result = RB_NODE_CMP(data->bus, data->devfn, this->bus, this->devfn);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, &iommu->iopf_device_rbtree);
+
+	up_write(&iommu->iopf_device_sem);
+	return 0;
+}
+
+static void device_rbtree_remove(struct intel_iommu *iommu, struct device_domain_info *data)
+{
+	down_write(&iommu->iopf_device_sem);
+	rb_erase(&data->node, &iommu->iopf_device_rbtree);
+	up_write(&iommu->iopf_device_sem);
+}
+
 /*
  * This domain is a statically identity mapping domain.
  *	1. This domain creats a static 1:1 mapping to all usable memory.
@@ -3920,6 +3979,9 @@ int __init intel_iommu_init(void)
 			iommu_enable_translation(iommu);
 
 		iommu_disable_protect_mem_regions(iommu);
+
+		iommu->iopf_device_rbtree = RB_ROOT;
+		init_rwsem(&iommu->iopf_device_sem);
 	}
 	up_read(&dmar_global_lock);
 
@@ -4601,6 +4663,11 @@ static int intel_iommu_enable_iopf(struct device *dev)
 	ret = pci_enable_pri(pdev, PRQ_DEPTH);
 	if (ret)
 		goto iopf_unregister_handler;
+
+	ret = device_rbtree_insert(iommu, info);
+	if(ret)
+		goto iopf_unregister_handler;
+
 	info->pri_enabled = 1;
 
 	return 0;
@@ -4620,6 +4687,7 @@ static int intel_iommu_disable_iopf(struct device *dev)
 
 	if (!info->pri_enabled)
 		return -EINVAL;
+	device_rbtree_remove(iommu, info);
 
 	/*
 	 * PCIe spec states that by clearing PRI enable bit, the Page
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1c5e1d88862b..d49c9facb40e 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -360,6 +360,8 @@
 /* PERFINTRSTS_REG */
 #define DMA_PERFINTRSTS_PIS	((u32)1)
 
+#define RB_NODE_CMP(bus1, devfn1, bus2, devfn2) (s16)(PCI_DEVID(bus1, devfn1) - PCI_DEVID(bus2, devfn2))
+
 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts)			\
 do {									\
 	cycles_t start_time = get_cycles();				\
@@ -682,6 +684,9 @@ struct intel_iommu {
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
 
+	struct rb_root iopf_device_rbtree;
+	struct rw_semaphore iopf_device_sem;
+
 #ifdef CONFIG_IRQ_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
 	struct irq_domain *ir_domain;
@@ -715,6 +720,7 @@ struct device_domain_info {
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
 	struct pasid_table *pasid_table; /* pasid table */
+	struct rb_node node; /*device tracking node(lookup by (bus, devfn))*/
 };
 
 static inline void __iommu_flush_cache(
@@ -844,6 +850,8 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg);
 struct iommu_domain *intel_svm_domain_alloc(void);
 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
+struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu,
+				u8 bus, u8 devfn);
 
 struct intel_svm_dev {
 	struct list_head list;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index e95b339e9cdc..78a10677630c 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -664,7 +664,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 	struct intel_iommu *iommu = d;
 	struct page_req_dsc *req;
 	int head, tail, handled;
-	struct pci_dev *pdev;
+	struct device_domain_info *info;
 	u64 address;
 
 	/*
@@ -710,23 +710,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
 			goto prq_advance;
 
-		pdev = pci_get_domain_bus_and_slot(iommu->segment,
-						   PCI_BUS_NUM(req->rid),
-						   req->rid & 0xff);
+		info = device_rbtree_find(iommu, PCI_BUS_NUM(req->rid), req->rid & 0xff);
 		/*
 		 * If prq is to be handled outside iommu driver via receiver of
 		 * the fault notifiers, we skip the page response here.
 		 */
-		if (!pdev)
+		if (!info)
 			goto bad_req;
 
-		if (intel_svm_prq_report(iommu, &pdev->dev, req))
+		if (intel_svm_prq_report(iommu, info->dev, req))
 			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 		else
-			trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+			trace_prq_report(iommu, info->dev, req->qw_0, req->qw_1,
 					 req->priv_data[0], req->priv_data[1],
 					 iommu->prq_seq_number++);
-		pci_dev_put(pdev);
 prq_advance:
 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
 	}
-- 
2.31.1
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by Joerg Roedel 2 years, 2 months ago
On Mon, Aug 21, 2023 at 12:16:59AM -0700, Huang Jiaqing wrote:
> The existing IO page fault handler locates the PCI device by calling
> pci_get_domain_bus_and_slot(), which searches the list of all PCI
> devices until the desired PCI device is found. This is inefficient
> because the algorithm efficiency of searching a list is O(n). In the
> critical path of handling an IO page fault, this can cause a significant
> performance bottleneck.

Can you elaborate a little more on the 'significant performance
bottleneck' part? Where do you see this as a problem?

Regards,

	Joerg
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by Huang, Jiaqing 2 years, 2 months ago
On 9/25/2023 4:12 PM, Joerg Roedel wrote:

> On Mon, Aug 21, 2023 at 12:16:59AM -0700, Huang Jiaqing wrote:
>> The existing IO page fault handler locates the PCI device by calling
>> pci_get_domain_bus_and_slot(), which searches the list of all PCI
>> devices until the desired PCI device is found. This is inefficient
>> because the algorithm efficiency of searching a list is O(n). In the
>> critical path of handling an IO page fault, this can cause a significant
>> performance bottleneck.
> Can you elaborate a little more on the 'significant performance
> bottleneck' part? Where do you see this as a problem?
>
> Regards,
>
> 	Joerg
While lots of dsa devices were enabled, parallel dsa_test with large 
transfer size
would be executed ineffciently and cause cpu stuck in 
pci_get_domain_bus_and_slot
by lock competition. The introduced patch could significantly improve 
the speed and
prevent the CPU from getting sutck. It maybe confusing for "significant 
performance
bottleneck" since it didn't benefit all the cases, would rephase it in 
the new patch. Thanks!

BRs,
Jiaqing
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by kernel test robot 2 years, 3 months ago
Hi Huang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.5-rc7]
[also build test WARNING on linus/master]
[cannot apply to joro-iommu/next next-20230823]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Huang-Jiaqing/iommu-vt-d-Introduce-a-rb_tree-for-looking-up-device/20230821-151945
base:   v6.5-rc7
patch link:    https://lore.kernel.org/r/20230821071659.123981-1-jiaqing.huang%40intel.com
patch subject: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20230823/202308231801.6MtvqTmB-lkp@intel.com/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project.git 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce: (https://download.01.org/0day-ci/archive/20230823/202308231801.6MtvqTmB-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308231801.6MtvqTmB-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/iommu/intel/iommu.c:239:28: warning: no previous prototype for function 'device_rbtree_find' [-Wmissing-prototypes]
   struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
                              ^
   drivers/iommu/intel/iommu.c:239:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
   ^
   static 
   1 warning generated.


vim +/device_rbtree_find +239 drivers/iommu/intel/iommu.c

   237	
   238	
 > 239	struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
   240	{
   241		struct device_domain_info *data = NULL;
   242		struct rb_node *node;
   243	
   244		down_read(&iommu->iopf_device_sem);
   245	
   246		node = iommu->iopf_device_rbtree.rb_node;
   247		while (node) {
   248			data = container_of(node, struct device_domain_info, node);
   249			s16 result = RB_NODE_CMP(bus, devfn, data->bus, data->devfn);
   250	
   251			if (result < 0)
   252				node = node->rb_left;
   253			else if (result > 0)
   254				node = node->rb_right;
   255			else
   256				break;
   257		}
   258		up_read(&iommu->iopf_device_sem);
   259	
   260		return node ? data : NULL;
   261	}
   262	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by kernel test robot 2 years, 3 months ago
Hi Huang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.5-rc7]
[also build test WARNING on linus/master]
[cannot apply to joro-iommu/next next-20230822]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Huang-Jiaqing/iommu-vt-d-Introduce-a-rb_tree-for-looking-up-device/20230821-151945
base:   v6.5-rc7
patch link:    https://lore.kernel.org/r/20230821071659.123981-1-jiaqing.huang%40intel.com
patch subject: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230823/202308230922.9PKf5JkV-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230823/202308230922.9PKf5JkV-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308230922.9PKf5JkV-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/iommu/intel/iommu.c:239:28: warning: no previous prototype for 'device_rbtree_find' [-Wmissing-prototypes]
     239 | struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
         |                            ^~~~~~~~~~~~~~~~~~


vim +/device_rbtree_find +239 drivers/iommu/intel/iommu.c

   237	
   238	
 > 239	struct device_domain_info *device_rbtree_find(struct intel_iommu *iommu, u8 bus, u8 devfn)
   240	{
   241		struct device_domain_info *data = NULL;
   242		struct rb_node *node;
   243	
   244		down_read(&iommu->iopf_device_sem);
   245	
   246		node = iommu->iopf_device_rbtree.rb_node;
   247		while (node) {
   248			data = container_of(node, struct device_domain_info, node);
   249			s16 result = RB_NODE_CMP(bus, devfn, data->bus, data->devfn);
   250	
   251			if (result < 0)
   252				node = node->rb_left;
   253			else if (result > 0)
   254				node = node->rb_right;
   255			else
   256				break;
   257		}
   258		up_read(&iommu->iopf_device_sem);
   259	
   260		return node ? data : NULL;
   261	}
   262	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by Jason Gunthorpe 2 years, 3 months ago
On Mon, Aug 21, 2023 at 12:16:59AM -0700, Huang Jiaqing wrote:
> The existing IO page fault handler locates the PCI device by calling
> pci_get_domain_bus_and_slot(), which searches the list of all PCI
> devices until the desired PCI device is found. This is inefficient
> because the algorithm efficiency of searching a list is O(n). In the
> critical path of handling an IO page fault, this can cause a significant
> performance bottleneck.
> 
> To improve the performance of the IO page fault handler, replace
> pci_get_domain_bus_and_slot() with a local red-black tree. A red-black
> tree is a self-balancing binary search tree, which means that the
> average time complexity of searching a red-black tree is O(log(n)). This
> is significantly faster than O(n), so it can significantly improve the
> performance of the IO page fault handler.
> 
> In addition, we can only insert the affected devices (those that have IO
> page fault enabled) into the red-black tree. This can further improve
> the performance of the IO page fault handler.
> 
> Signed-off-by: Huang Jiaqing <jiaqing.huang@intel.com>
> ---
>  drivers/iommu/intel/iommu.c | 68 +++++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel/iommu.h |  8 +++++
>  drivers/iommu/intel/svm.c   | 13 +++----
>  3 files changed, 81 insertions(+), 8 deletions(-)

I feel like this should be a helper library provided by the core
code, doesn't every PRI driver basically need the same thing?

Jason
Re: [PATCH] iommu/vt-d: Introduce a rb_tree for looking up device
Posted by Baolu Lu 2 years, 3 months ago
On 2023/8/22 0:52, Jason Gunthorpe wrote:
> On Mon, Aug 21, 2023 at 12:16:59AM -0700, Huang Jiaqing wrote:
>> The existing IO page fault handler locates the PCI device by calling
>> pci_get_domain_bus_and_slot(), which searches the list of all PCI
>> devices until the desired PCI device is found. This is inefficient
>> because the algorithm efficiency of searching a list is O(n). In the
>> critical path of handling an IO page fault, this can cause a significant
>> performance bottleneck.
>>
>> To improve the performance of the IO page fault handler, replace
>> pci_get_domain_bus_and_slot() with a local red-black tree. A red-black
>> tree is a self-balancing binary search tree, which means that the
>> average time complexity of searching a red-black tree is O(log(n)). This
>> is significantly faster than O(n), so it can significantly improve the
>> performance of the IO page fault handler.
>>
>> In addition, we can only insert the affected devices (those that have IO
>> page fault enabled) into the red-black tree. This can further improve
>> the performance of the IO page fault handler.
>>
>> Signed-off-by: Huang Jiaqing<jiaqing.huang@intel.com>
>> ---
>>   drivers/iommu/intel/iommu.c | 68 +++++++++++++++++++++++++++++++++++++
>>   drivers/iommu/intel/iommu.h |  8 +++++
>>   drivers/iommu/intel/svm.c   | 13 +++----
>>   3 files changed, 81 insertions(+), 8 deletions(-)
> I feel like this should be a helper library provided by the core
> code, doesn't every PRI driver basically need the same thing?

It seems to be. pci_get_domain_bus_and_slot() is also used in the amd
driver. And the risc-v iommu driver under discussion is also proposing
this.

Best regards,
baolu