[PATCH 2/2] PCI: endpoint: pci-epf-vntb: Add MSI doorbell support

Frank Li posted 2 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH 2/2] PCI: endpoint: pci-epf-vntb: Add MSI doorbell support
Posted by Frank Li 1 month, 2 weeks ago
Add MSI doorbell support to reduce latency between PCI host and EP.

Before this change:
  ping 169.254.172.137
  64 bytes from 169.254.172.137: icmp_seq=1 ttl=64 time=0.575 ms
  64 bytes from 169.254.172.137: icmp_seq=2 ttl=64 time=1.80 ms
  64 bytes from 169.254.172.137: icmp_seq=3 ttl=64 time=8.19 ms
  64 bytes from 169.254.172.137: icmp_seq=4 ttl=64 time=2.00 ms

After this change:
  ping 169.254.144.71
  64 bytes from 169.254.144.71: icmp_seq=1 ttl=64 time=0.215 ms
  64 bytes from 169.254.144.71: icmp_seq=2 ttl=64 time=0.456 ms
  64 bytes from 169.254.144.71: icmp_seq=3 ttl=64 time=0.448 ms

Change u64 db to atomic_64 because difference doorbell may happen at the
same time.

Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 153 +++++++++++++++++++++++---
 1 file changed, 136 insertions(+), 17 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 83e9ab10f9c4fc2b485d5463faa2172500f12999..1c586205835fe9c7c5352e74819bccb4ece84438 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -36,11 +36,13 @@
  * PCIe Root Port                        PCI EP
  */
 
+#include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 
+#include <linux/pci-ep-msi.h>
 #include <linux/pci-epc.h>
 #include <linux/pci-epf.h>
 #include <linux/ntb.h>
@@ -126,12 +128,13 @@ struct epf_ntb {
 	u32 db_count;
 	u32 spad_count;
 	u64 mws_size[MAX_MW];
-	u64 db;
+	atomic64_t db;
 	u32 vbus_number;
 	u16 vntb_pid;
 	u16 vntb_vid;
 
 	bool linkup;
+	bool msi_doorbell;
 	u32 spad_size;
 
 	enum pci_barno epf_ntb_bar[VNTB_BAR_NUM];
@@ -258,9 +261,9 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
 
 	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
 
-	for (i = 1; i < ntb->db_count; i++) {
+	for (i = 1; i < ntb->db_count && !ntb->msi_doorbell; i++) {
 		if (ntb->epf_db[i]) {
-			ntb->db |= 1 << (i - 1);
+			atomic64_or(1 << (i - 1), &ntb->db);
 			ntb_db_event(&ntb->ntb, i);
 			ntb->epf_db[i] = 0;
 		}
@@ -319,7 +322,24 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
 
 reset_handler:
 	queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler,
-			   msecs_to_jiffies(5));
+			   ntb->msi_doorbell ? msecs_to_jiffies(500) : msecs_to_jiffies(5));
+}
+
+static irqreturn_t epf_ntb_doorbell_handler(int irq, void *data)
+{
+	struct epf_ntb *ntb = data;
+	int i = 0;
+
+	for (i = 1; i < ntb->db_count; i++)
+		if (irq == ntb->epf->db_msg[i].virq) {
+			atomic64_or(1 << (i - 1), &ntb->db);
+			ntb_db_event(&ntb->ntb, i);
+		}
+
+	if (irq == ntb->epf->db_msg[0].virq)
+		queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler, 0);
+
+	return IRQ_HANDLED;
 }
 
 /**
@@ -500,6 +520,90 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
 	return 0;
 }
 
+static int epf_ntb_db_bar_init_msi_doorbell(struct epf_ntb *ntb,
+					    struct pci_epf_bar *db_bar,
+					    const struct pci_epc_features *epc_features,
+					    enum pci_barno barno)
+{
+	struct pci_epf *epf = ntb->epf;
+	dma_addr_t low, high;
+	struct msi_msg *msg;
+	size_t sz;
+	int ret;
+	int i;
+
+	ret = pci_epf_alloc_doorbell(epf,  ntb->db_count);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ntb->db_count; i++) {
+		ret = request_irq(epf->db_msg[i].virq, epf_ntb_doorbell_handler,
+				  0, "vntb_db", ntb);
+
+		if (ret) {
+			dev_err(&epf->dev,
+				"Failed to request doorbell IRQ: %d\n",
+				epf->db_msg[i].virq);
+			goto err_request_irq;
+		}
+	}
+
+	msg = &epf->db_msg[0].msg;
+
+	high = 0;
+	low = (u64)msg->address_hi << 32 | msg->address_lo;
+
+	for (i = 0; i < ntb->db_count; i++) {
+		struct msi_msg *msg = &epf->db_msg[i].msg;
+		dma_addr_t addr = (u64)msg->address_hi << 32 | msg->address_lo;
+
+		low = min(low, addr);
+		high = max(high, addr);
+	}
+
+	sz = high - low + sizeof(u32);
+
+	ret = pci_epf_set_inbound_space(epf, sz, barno,
+					epc_features, 0, false, low);
+
+	ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, db_bar);
+	if (ret) {
+		dev_err(&epf->dev, "Doorbell BAR set failed\n");
+		goto err_request_irq;
+	}
+
+	for (i = 0; i < ntb->db_count; i++) {
+		struct msi_msg *msg = &epf->db_msg[i].msg;
+		dma_addr_t addr;
+		size_t offset;
+
+		ret = pci_epf_align_inbound_addr(epf, db_bar->barno,
+				((u64)msg->address_hi << 32) | msg->address_lo,
+				&addr, &offset);
+
+		if (ret) {
+			ntb->msi_doorbell = false;
+			goto err_request_irq;
+		}
+
+		ntb->reg->db_data[i] = msg->data;
+		ntb->reg->db_offset[i] = offset;
+	}
+
+	ntb->reg->db_entry_size = 0;
+
+	ntb->msi_doorbell = true;
+
+	return 0;
+
+err_request_irq:
+	for (i--; i >= 0; i--)
+		free_irq(epf->db_msg[i].virq, ntb);
+
+	pci_epf_free_doorbell(ntb->epf);
+	return ret;
+}
+
 /**
  * epf_ntb_db_bar_init() - Configure Doorbell window BARs
  * @ntb: NTB device that facilitates communication between HOST and VHOST
@@ -520,22 +624,27 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb)
 					    ntb->epf->func_no,
 					    ntb->epf->vfunc_no);
 	barno = ntb->epf_ntb_bar[BAR_DB];
-
-	mw_addr = pci_epf_alloc_space(ntb->epf, size, barno, epc_features, 0);
-	if (!mw_addr) {
-		dev_err(dev, "Failed to allocate OB address\n");
-		return -ENOMEM;
-	}
-
-	ntb->epf_db = mw_addr;
-
 	epf_bar = &ntb->epf->bar[barno];
 
-	ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, epf_bar);
+	ret = epf_ntb_db_bar_init_msi_doorbell(ntb, epf_bar, epc_features, barno);
 	if (ret) {
-		dev_err(dev, "Doorbell BAR set failed\n");
+		/* fall back to polling mode */
+		mw_addr = pci_epf_alloc_space(ntb->epf, size, barno, epc_features, 0);
+		if (!mw_addr) {
+			dev_err(dev, "Failed to allocate OB address\n");
+			return -ENOMEM;
+		}
+
+		ntb->epf_db = mw_addr;
+
+		ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no,
+				      ntb->epf->vfunc_no, epf_bar);
+		if (ret) {
+			dev_err(dev, "Doorbell BAR set failed\n");
 			goto err_alloc_peer_mem;
+		}
 	}
+
 	return ret;
 
 err_alloc_peer_mem:
@@ -554,6 +663,16 @@ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb)
 {
 	enum pci_barno barno;
 
+	if (ntb->msi_doorbell) {
+		int i;
+
+		for (i = 0; i < ntb->db_count; i++)
+			free_irq(ntb->epf->db_msg[i].virq, ntb);
+	}
+
+	if (ntb->epf->db_msg)
+		pci_epf_free_doorbell(ntb->epf);
+
 	barno = ntb->epf_ntb_bar[BAR_DB];
 	pci_epf_free_space(ntb->epf, ntb->epf_db, barno, 0);
 	pci_epc_clear_bar(ntb->epf->epc,
@@ -1268,7 +1387,7 @@ static u64 vntb_epf_db_read(struct ntb_dev *ndev)
 {
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 
-	return ntb->db;
+	return atomic64_read(&ntb->db);
 }
 
 static int vntb_epf_mw_get_align(struct ntb_dev *ndev, int pidx, int idx,
@@ -1308,7 +1427,7 @@ static int vntb_epf_db_clear(struct ntb_dev *ndev, u64 db_bits)
 {
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 
-	ntb->db &= ~db_bits;
+	atomic64_and(~db_bits, &ntb->db);
 	return 0;
 }
 

-- 
2.34.1
Re: [PATCH 2/2] PCI: endpoint: pci-epf-vntb: Add MSI doorbell support
Posted by Manivannan Sadhasivam 1 month ago
On Fri, Aug 15, 2025 at 06:20:54PM GMT, Frank Li wrote:
> Add MSI doorbell support to reduce latency between PCI host and EP.
> 
> Before this change:
>   ping 169.254.172.137
>   64 bytes from 169.254.172.137: icmp_seq=1 ttl=64 time=0.575 ms
>   64 bytes from 169.254.172.137: icmp_seq=2 ttl=64 time=1.80 ms
>   64 bytes from 169.254.172.137: icmp_seq=3 ttl=64 time=8.19 ms
>   64 bytes from 169.254.172.137: icmp_seq=4 ttl=64 time=2.00 ms
> 
> After this change:
>   ping 169.254.144.71
>   64 bytes from 169.254.144.71: icmp_seq=1 ttl=64 time=0.215 ms
>   64 bytes from 169.254.144.71: icmp_seq=2 ttl=64 time=0.456 ms
>   64 bytes from 169.254.144.71: icmp_seq=3 ttl=64 time=0.448 ms
> 
> Change u64 db to atomic_64 because difference doorbell may happen at the
> same time.
> 

Only the atomicity of db variable is enough?

> Signed-off-by: Frank Li <Frank.Li@nxp.com>
> ---
>  drivers/pci/endpoint/functions/pci-epf-vntb.c | 153 +++++++++++++++++++++++---
>  1 file changed, 136 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> index 83e9ab10f9c4fc2b485d5463faa2172500f12999..1c586205835fe9c7c5352e74819bccb4ece84438 100644
> --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
> +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> @@ -36,11 +36,13 @@
>   * PCIe Root Port                        PCI EP
>   */
>  
> +#include <linux/atomic.h>
>  #include <linux/delay.h>
>  #include <linux/io.h>
>  #include <linux/module.h>
>  #include <linux/slab.h>
>  
> +#include <linux/pci-ep-msi.h>
>  #include <linux/pci-epc.h>
>  #include <linux/pci-epf.h>
>  #include <linux/ntb.h>
> @@ -126,12 +128,13 @@ struct epf_ntb {
>  	u32 db_count;
>  	u32 spad_count;
>  	u64 mws_size[MAX_MW];
> -	u64 db;
> +	atomic64_t db;
>  	u32 vbus_number;
>  	u16 vntb_pid;
>  	u16 vntb_vid;
>  
>  	bool linkup;
> +	bool msi_doorbell;
>  	u32 spad_size;
>  
>  	enum pci_barno epf_ntb_bar[VNTB_BAR_NUM];
> @@ -258,9 +261,9 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
>  
>  	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
>  
> -	for (i = 1; i < ntb->db_count; i++) {
> +	for (i = 1; i < ntb->db_count && !ntb->msi_doorbell; i++) {
>  		if (ntb->epf_db[i]) {
> -			ntb->db |= 1 << (i - 1);
> +			atomic64_or(1 << (i - 1), &ntb->db);
>  			ntb_db_event(&ntb->ntb, i);
>  			ntb->epf_db[i] = 0;
>  		}
> @@ -319,7 +322,24 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
>  
>  reset_handler:
>  	queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler,
> -			   msecs_to_jiffies(5));
> +			   ntb->msi_doorbell ? msecs_to_jiffies(500) : msecs_to_jiffies(5));
> +}
> +
> +static irqreturn_t epf_ntb_doorbell_handler(int irq, void *data)
> +{
> +	struct epf_ntb *ntb = data;
> +	int i = 0;
> +
> +	for (i = 1; i < ntb->db_count; i++)
> +		if (irq == ntb->epf->db_msg[i].virq) {
> +			atomic64_or(1 << (i - 1), &ntb->db);
> +			ntb_db_event(&ntb->ntb, i);
> +		}
> +
> +	if (irq == ntb->epf->db_msg[0].virq)
> +		queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler, 0);
> +
> +	return IRQ_HANDLED;
>  }
>  
>  /**
> @@ -500,6 +520,90 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
>  	return 0;
>  }
>  
> +static int epf_ntb_db_bar_init_msi_doorbell(struct epf_ntb *ntb,
> +					    struct pci_epf_bar *db_bar,
> +					    const struct pci_epc_features *epc_features,
> +					    enum pci_barno barno)
> +{
> +	struct pci_epf *epf = ntb->epf;
> +	dma_addr_t low, high;
> +	struct msi_msg *msg;
> +	size_t sz;
> +	int ret;
> +	int i;
> +
> +	ret = pci_epf_alloc_doorbell(epf,  ntb->db_count);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ntb->db_count; i++) {
> +		ret = request_irq(epf->db_msg[i].virq, epf_ntb_doorbell_handler,
> +				  0, "vntb_db", ntb);
> +
> +		if (ret) {
> +			dev_err(&epf->dev,
> +				"Failed to request doorbell IRQ: %d\n",
> +				epf->db_msg[i].virq);
> +			goto err_request_irq;
> +		}
> +	}
> +
> +	msg = &epf->db_msg[0].msg;
> +
> +	high = 0;
> +	low = (u64)msg->address_hi << 32 | msg->address_lo;

Can you remind me when the 'address_{hi/lo}' pairs are set?

Rest looks OK to me.

- Mani

-- 
மணிவண்ணன் சதாசிவம்
Re: [PATCH 2/2] PCI: endpoint: pci-epf-vntb: Add MSI doorbell support
Posted by Frank Li 3 weeks ago
On Sat, Aug 30, 2025 at 09:14:43PM +0530, Manivannan Sadhasivam wrote:
> On Fri, Aug 15, 2025 at 06:20:54PM GMT, Frank Li wrote:
> > Add MSI doorbell support to reduce latency between PCI host and EP.
> >
> > Before this change:
> >   ping 169.254.172.137
> >   64 bytes from 169.254.172.137: icmp_seq=1 ttl=64 time=0.575 ms
> >   64 bytes from 169.254.172.137: icmp_seq=2 ttl=64 time=1.80 ms
> >   64 bytes from 169.254.172.137: icmp_seq=3 ttl=64 time=8.19 ms
> >   64 bytes from 169.254.172.137: icmp_seq=4 ttl=64 time=2.00 ms
> >
> > After this change:
> >   ping 169.254.144.71
> >   64 bytes from 169.254.144.71: icmp_seq=1 ttl=64 time=0.215 ms
> >   64 bytes from 169.254.144.71: icmp_seq=2 ttl=64 time=0.456 ms
> >   64 bytes from 169.254.144.71: icmp_seq=3 ttl=64 time=0.448 ms
> >
> > Change u64 db to atomic_64 because difference doorbell may happen at the
> > same time.
> >
>
> Only the atomicity of db variable is enough?

So far yes, atomic OR and AND safe to check doorbell[n]'s each bit.

>
> > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > ---
> >  drivers/pci/endpoint/functions/pci-epf-vntb.c | 153 +++++++++++++++++++++++---
> >  1 file changed, 136 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > index 83e9ab10f9c4fc2b485d5463faa2172500f12999..1c586205835fe9c7c5352e74819bccb4ece84438 100644
> > --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > @@ -36,11 +36,13 @@
> >   * PCIe Root Port                        PCI EP
> >   */
> >
> > +#include <linux/atomic.h>
> >  #include <linux/delay.h>
> >  #include <linux/io.h>
> >  #include <linux/module.h>
> >  #include <linux/slab.h>
> >
> > +#include <linux/pci-ep-msi.h>
> >  #include <linux/pci-epc.h>
> >  #include <linux/pci-epf.h>
> >  #include <linux/ntb.h>
> > @@ -126,12 +128,13 @@ struct epf_ntb {
> >  	u32 db_count;
> >  	u32 spad_count;
> >  	u64 mws_size[MAX_MW];
> > -	u64 db;
> > +	atomic64_t db;
> >  	u32 vbus_number;
> >  	u16 vntb_pid;
> >  	u16 vntb_vid;
> >
> >  	bool linkup;
> > +	bool msi_doorbell;
> >  	u32 spad_size;
> >
> >  	enum pci_barno epf_ntb_bar[VNTB_BAR_NUM];
> > @@ -258,9 +261,9 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
> >
> >  	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
> >
> > -	for (i = 1; i < ntb->db_count; i++) {
> > +	for (i = 1; i < ntb->db_count && !ntb->msi_doorbell; i++) {
> >  		if (ntb->epf_db[i]) {
> > -			ntb->db |= 1 << (i - 1);
> > +			atomic64_or(1 << (i - 1), &ntb->db);
> >  			ntb_db_event(&ntb->ntb, i);
> >  			ntb->epf_db[i] = 0;
> >  		}
> > @@ -319,7 +322,24 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
> >
> >  reset_handler:
> >  	queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler,
> > -			   msecs_to_jiffies(5));
> > +			   ntb->msi_doorbell ? msecs_to_jiffies(500) : msecs_to_jiffies(5));
> > +}
> > +
> > +static irqreturn_t epf_ntb_doorbell_handler(int irq, void *data)
> > +{
> > +	struct epf_ntb *ntb = data;
> > +	int i = 0;
> > +
> > +	for (i = 1; i < ntb->db_count; i++)
> > +		if (irq == ntb->epf->db_msg[i].virq) {
> > +			atomic64_or(1 << (i - 1), &ntb->db);
> > +			ntb_db_event(&ntb->ntb, i);
> > +		}
> > +
> > +	if (irq == ntb->epf->db_msg[0].virq)
> > +		queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler, 0);
> > +
> > +	return IRQ_HANDLED;
> >  }
> >
> >  /**
> > @@ -500,6 +520,90 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
> >  	return 0;
> >  }
> >
> > +static int epf_ntb_db_bar_init_msi_doorbell(struct epf_ntb *ntb,
> > +					    struct pci_epf_bar *db_bar,
> > +					    const struct pci_epc_features *epc_features,
> > +					    enum pci_barno barno)
> > +{
> > +	struct pci_epf *epf = ntb->epf;
> > +	dma_addr_t low, high;
> > +	struct msi_msg *msg;
> > +	size_t sz;
> > +	int ret;
> > +	int i;
> > +
> > +	ret = pci_epf_alloc_doorbell(epf,  ntb->db_count);
> > +	if (ret)
> > +		return ret;
> > +
> > +	for (i = 0; i < ntb->db_count; i++) {
> > +		ret = request_irq(epf->db_msg[i].virq, epf_ntb_doorbell_handler,
> > +				  0, "vntb_db", ntb);
> > +
> > +		if (ret) {
> > +			dev_err(&epf->dev,
> > +				"Failed to request doorbell IRQ: %d\n",
> > +				epf->db_msg[i].virq);
> > +			goto err_request_irq;
> > +		}
> > +	}
> > +
> > +	msg = &epf->db_msg[0].msg;
> > +
> > +	high = 0;
> > +	low = (u64)msg->address_hi << 32 | msg->address_lo;
>
> Can you remind me when the 'address_{hi/lo}' pairs are set?

set at pci_epf_alloc_doorbell().

Frank
>
> Rest looks OK to me.
>
> - Mani
>
> --
> மணிவண்ணன் சதாசிவம்