[RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode

Koichiro Den posted 27 patches 2 months, 1 week ago
[RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
Add a new transport backend that uses a remote DesignWare eDMA engine
located on the NTB endpoint to move data between host and endpoint.

In this mode:

  - The endpoint exposes a dedicated memory window that contains the
    eDMA register block followed by a small control structure (struct
    ntb_edma_info) and per-channel linked-list (LL) rings.

  - On the endpoint side, ntb_edma_setup_mws() allocates the control
    structure and LL rings in endpoint memory, then programs an inbound
    iATU region so that the host can access them via a peer MW.

  - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
    ntb_edma_info and configures a dw-edma DMA device to use the LL
    rings provided by the endpoint.

  - ntb_transport is extended with a new backend_ops implementation that
    routes TX and RX enqueue/poll operations through the remote eDMA
    rings while keeping the existing shared-memory backend intact.

  - The host signals the endpoint via a dedicated DMA read channel.
    'use_msi' module option is ignored when 'use_remote_edma=1'.

The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
module parameter (use_remote_edma). When disabled, the existing
ntb_transport behaviour is unchanged.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
 drivers/ntb/Kconfig                           |   11 +
 drivers/ntb/Makefile                          |    3 +
 drivers/ntb/ntb_edma.c                        |  628 ++++++++
 drivers/ntb/ntb_edma.h                        |  128 ++
 .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
 5 files changed, 2048 insertions(+), 3 deletions(-)
 create mode 100644 drivers/ntb/ntb_edma.c
 create mode 100644 drivers/ntb/ntb_edma.h
 rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)

diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index df16c755b4da..db63f02bb116 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -37,4 +37,15 @@ config NTB_TRANSPORT
 
 	 If unsure, say N.
 
+config NTB_TRANSPORT_EDMA
+	bool "NTB Transport backed by remote eDMA"
+	depends on NTB_TRANSPORT
+	depends on PCI
+	select DMA_ENGINE
+	help
+	  Enable a transport backend that uses a remote DesignWare eDMA engine
+	  exposed through a dedicated NTB memory window. The host uses the
+	  endpoint's eDMA engine to move data in both directions.
+	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
+
 endif # NTB
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 3a6fa181ff99..51f0e1e3aec7 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
 
 ntb-y			:= core.o
 ntb-$(CONFIG_NTB_MSI)	+= msi.o
+
+ntb_transport-y					:= ntb_transport_core.o
+ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
new file mode 100644
index 000000000000..cb35e0d56aa8
--- /dev/null
+++ b/drivers/ntb/ntb_edma.c
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ntb.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/dmaengine.h>
+#include <linux/pci-epc.h>
+#include <linux/dma/edma.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#include "ntb_edma.h"
+
+/*
+ * The interrupt register offsets below are taken from the DesignWare
+ * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
+ * backend currently only supports this layout.
+ */
+#define DMA_WRITE_INT_STATUS_OFF   0x4c
+#define DMA_WRITE_INT_MASK_OFF     0x54
+#define DMA_WRITE_INT_CLEAR_OFF    0x58
+#define DMA_READ_INT_STATUS_OFF    0xa0
+#define DMA_READ_INT_MASK_OFF      0xa8
+#define DMA_READ_INT_CLEAR_OFF     0xac
+
+#define NTB_EDMA_NOTIFY_MAX_QP		64
+
+static unsigned int edma_spi = 417; /* 0x1a1 */
+module_param(edma_spi, uint, 0644);
+MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
+
+static u64 edma_regs_phys = 0xe65d5000;
+module_param(edma_regs_phys, ullong, 0644);
+MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
+
+static unsigned long edma_regs_size = 0x1200;
+module_param(edma_regs_size, ulong, 0644);
+MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
+
+struct ntb_edma_intr {
+	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
+};
+
+struct ntb_edma_ctx {
+	void *ll_wr_virt[EDMA_WR_CH_NUM];
+	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
+	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
+	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
+
+	struct ntb_edma_intr *intr_ep_virt;
+	dma_addr_t intr_ep_phys;
+	struct ntb_edma_intr *intr_rc_virt;
+	dma_addr_t intr_rc_phys;
+	u32 notify_qp_max;
+
+	bool initialized;
+};
+
+static struct ntb_edma_ctx edma_ctx;
+
+typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
+
+struct ntb_edma_interrupt {
+	int virq;
+	void __iomem *base;
+	ntb_edma_interrupt_cb_t cb;
+	void *data;
+};
+
+static struct ntb_edma_interrupt ntb_edma_intr;
+
+static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
+{
+	struct device_node *np = dev_of_node(dev);
+	struct device_node *parent;
+	struct irq_fwspec fwspec = { 0 };
+	int virq;
+
+	parent = of_irq_find_parent(np);
+	if (!parent)
+		return -ENODEV;
+
+	fwspec.fwnode      = of_fwnode_handle(parent);
+	fwspec.param_count = 3;
+	fwspec.param[0]    = GIC_SPI;
+	fwspec.param[1]    = spi;
+	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
+
+	virq = irq_create_fwspec_mapping(&fwspec);
+	of_node_put(parent);
+	return (virq > 0) ? virq : -EINVAL;
+}
+
+static irqreturn_t ntb_edma_isr(int irq, void *data)
+{
+	struct ntb_edma_interrupt *v = data;
+	u32 mask = BIT(EDMA_RD_CH_NUM);
+	u32 i, val;
+
+	/*
+	 * We do not ack interrupts here but instead we mask all local interrupt
+	 * sources except the read channel used for notification. This reduces
+	 * needless ISR invocations.
+	 *
+	 * In theory we could configure LIE=1/RIE=0 only for the notification
+	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
+	 * require intrusive changes to the dw-edma core.
+	 *
+	 * Note: The host side may have already cleared the read interrupt used
+	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
+	 * way to detect it. As a result, we cannot reliably tell which specific
+	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
+	 * instead.
+	 */
+	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
+	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
+
+	if (!v->cb || !edma_ctx.intr_ep_virt)
+		return IRQ_HANDLED;
+
+	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
+		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
+		if (!val)
+			continue;
+
+		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
+		v->cb(v->data, i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
+		       ntb_edma_interrupt_cb_t cb, void *data)
+{
+	struct ntb_edma_interrupt *v = &ntb_edma_intr;
+	int virq = ntb_edma_map_spi_to_virq(epc_dev->parent, edma_spi);
+	int ret;
+
+	if (virq < 0) {
+		dev_err(dev, "failed to get virq (%d)\n", virq);
+		return virq;
+	}
+
+	v->virq = virq;
+	v->cb = cb;
+	v->data = data;
+	if (edma_regs_phys && !v->base)
+		v->base = devm_ioremap(dev, edma_regs_phys, edma_regs_size);
+	if (!v->base) {
+		dev_err(dev, "failed to setup v->base\n");
+		return -1;
+	}
+	ret = devm_request_irq(dev, v->virq, ntb_edma_isr, 0, "ntb-edma", v);
+	if (ret)
+		return ret;
+
+	if (v->base) {
+		iowrite32(0x0, v->base + DMA_WRITE_INT_MASK_OFF);
+		iowrite32(0x0, v->base + DMA_READ_INT_MASK_OFF);
+	}
+	return 0;
+}
+
+void ntb_edma_teardown_isr(struct device *dev)
+{
+	struct ntb_edma_interrupt *v = &ntb_edma_intr;
+
+	/* Mask all write/read interrupts so we don't get called again. */
+	if (v->base) {
+		iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
+		iowrite32(~0x0, v->base + DMA_READ_INT_MASK_OFF);
+	}
+
+	if (v->virq > 0)
+		devm_free_irq(dev, v->virq, v);
+
+	if (v->base)
+		devm_iounmap(dev, v->base);
+
+	v->virq = 0;
+	v->cb = NULL;
+	v->data = NULL;
+}
+
+int ntb_edma_setup_mws(struct ntb_dev *ndev)
+{
+	const size_t info_bytes = PAGE_SIZE;
+	resource_size_t size_max, offset;
+	dma_addr_t intr_phys, info_phys;
+	u32 wr_done = 0, rd_done = 0;
+	struct ntb_edma_intr *intr;
+	struct ntb_edma_info *info;
+	int peer_mw, mw_index, rc;
+	struct iommu_domain *dom;
+	bool reg_mapped = false;
+	size_t ll_bytes, size;
+	struct pci_epc *epc;
+	struct device *dev;
+	unsigned long iova;
+	phys_addr_t phys;
+	u64 need;
+	u32 i;
+
+	/* +1 is for interruption */
+	ll_bytes = (EDMA_WR_CH_NUM + EDMA_RD_CH_NUM + 1) * DMA_LLP_MEM_SIZE;
+	need = EDMA_REG_SIZE + info_bytes + ll_bytes;
+
+	epc = ntb_get_pci_epc(ndev);
+	if (!epc)
+		return -ENODEV;
+	dev = epc->dev.parent;
+
+	if (edma_ctx.initialized)
+		return 0;
+
+	info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	memset(info, 0, info_bytes);
+	info->magic = NTB_EDMA_INFO_MAGIC;
+	info->wr_cnt = EDMA_WR_CH_NUM;
+	info->rd_cnt = EDMA_RD_CH_NUM + 1; /* +1 for interruption */
+	info->regs_phys = edma_regs_phys;
+	info->ll_stride = DMA_LLP_MEM_SIZE;
+
+	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
+		edma_ctx.ll_wr_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
+							 &edma_ctx.ll_wr_phys[i],
+							 GFP_KERNEL,
+							 DMA_ATTR_FORCE_CONTIGUOUS);
+		if (!edma_ctx.ll_wr_virt[i]) {
+			rc = -ENOMEM;
+			goto err_free_ll;
+		}
+		wr_done++;
+		info->ll_wr_phys[i] = edma_ctx.ll_wr_phys[i];
+	}
+	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
+		edma_ctx.ll_rd_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
+							 &edma_ctx.ll_rd_phys[i],
+							 GFP_KERNEL,
+							 DMA_ATTR_FORCE_CONTIGUOUS);
+		if (!edma_ctx.ll_rd_virt[i]) {
+			rc = -ENOMEM;
+			goto err_free_ll;
+		}
+		rd_done++;
+		info->ll_rd_phys[i] = edma_ctx.ll_rd_phys[i];
+	}
+
+	/* For interruption */
+	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
+	intr = dma_alloc_coherent(dev, sizeof(*intr), &intr_phys, GFP_KERNEL);
+	if (!intr) {
+		rc = -ENOMEM;
+		goto err_free_ll;
+	}
+	memset(intr, 0, sizeof(*intr));
+	edma_ctx.intr_ep_virt = intr;
+	edma_ctx.intr_ep_phys = intr_phys;
+	info->intr_dar_base = intr_phys;
+
+	peer_mw = ntb_peer_mw_count(ndev);
+	if (peer_mw <= 0) {
+		rc = -ENODEV;
+		goto err_free_ll;
+	}
+
+	mw_index = peer_mw - 1; /* last MW */
+
+	rc = ntb_mw_get_align(ndev, 0, mw_index, 0, NULL, &size_max,
+			      &offset);
+	if (rc)
+		goto err_free_ll;
+
+	if (size_max < need) {
+		rc = -ENOSPC;
+		goto err_free_ll;
+	}
+
+	/* Map register space (direct) */
+	dom = iommu_get_domain_for_dev(dev);
+	if (dom) {
+		phys = edma_regs_phys & PAGE_MASK;
+		size = PAGE_ALIGN(EDMA_REG_SIZE + edma_regs_phys - phys);
+		iova = phys;
+
+		rc = iommu_map(dom, iova, phys, EDMA_REG_SIZE,
+			       IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, GFP_KERNEL);
+		if (rc)
+			dev_err(&ndev->dev, "failed to create direct mapping for eDMA reg space\n");
+		reg_mapped = true;
+	}
+
+	rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_regs_phys, EDMA_REG_SIZE, offset);
+	if (rc)
+		goto err_unmap_reg;
+
+	offset += EDMA_REG_SIZE;
+
+	/* Map ntb_edma_info */
+	rc = ntb_mw_set_trans(ndev, 0, mw_index, info_phys, info_bytes, offset);
+	if (rc)
+		goto err_clear_trans;
+	offset += info_bytes;
+
+	/* Map LL location */
+	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
+		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_wr_phys[i],
+				      DMA_LLP_MEM_SIZE, offset);
+		if (rc)
+			goto err_clear_trans;
+		offset += DMA_LLP_MEM_SIZE;
+	}
+	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
+		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_rd_phys[i],
+				      DMA_LLP_MEM_SIZE, offset);
+		if (rc)
+			goto err_clear_trans;
+		offset += DMA_LLP_MEM_SIZE;
+	}
+	edma_ctx.initialized = true;
+
+	return 0;
+
+err_clear_trans:
+	/*
+	 * Tear down the NTB translation window used for the eDMA MW.
+	 * There is no sub-range clear API for ntb_mw_set_trans(), so we
+	 * unconditionally drop the whole mapping on error.
+	 */
+	ntb_mw_clear_trans(ndev, 0, mw_index);
+
+err_unmap_reg:
+	if (reg_mapped)
+		iommu_unmap(dom, iova, size);
+err_free_ll:
+	while (rd_done--)
+		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
+			       edma_ctx.ll_rd_virt[rd_done],
+			       edma_ctx.ll_rd_phys[rd_done],
+			       DMA_ATTR_FORCE_CONTIGUOUS);
+	while (wr_done--)
+		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
+			       edma_ctx.ll_wr_virt[wr_done],
+			       edma_ctx.ll_wr_phys[wr_done],
+			       DMA_ATTR_FORCE_CONTIGUOUS);
+	if (edma_ctx.intr_ep_virt)
+		dma_free_coherent(dev, sizeof(struct ntb_edma_intr),
+				  edma_ctx.intr_ep_virt,
+				  edma_ctx.intr_ep_phys);
+	dma_free_coherent(dev, info_bytes, info, info_phys);
+	return rc;
+}
+
+static int ntb_edma_irq_vector(struct device *dev, unsigned int nr)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret, nvec;
+
+	nvec = pci_msi_vec_count(pdev);
+	for (; nr < nvec; nr++) {
+		ret = pci_irq_vector(pdev, nr);
+		if (!irq_has_action(ret))
+			return ret;
+	}
+	return 0;
+}
+
+static const struct dw_edma_plat_ops ntb_edma_ops = {
+	.irq_vector     = ntb_edma_irq_vector,
+};
+
+int ntb_edma_setup_peer(struct ntb_dev *ndev)
+{
+	struct ntb_edma_info *info;
+	unsigned int wr_cnt, rd_cnt;
+	struct dw_edma_chip *chip;
+	void __iomem *edma_virt;
+	phys_addr_t edma_phys;
+	resource_size_t mw_size;
+	u64 off = EDMA_REG_SIZE;
+	int peer_mw, mw_index;
+	unsigned int i;
+	int ret;
+
+	peer_mw = ntb_peer_mw_count(ndev);
+	if (peer_mw <= 0)
+		return -ENODEV;
+
+	mw_index = peer_mw - 1; /* last MW */
+
+	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
+				   &mw_size);
+	if (ret)
+		return -1;
+
+	edma_virt = ioremap(edma_phys, mw_size);
+
+	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	chip->dev = &ndev->pdev->dev;
+	chip->nr_irqs = 4;
+	chip->ops = &ntb_edma_ops;
+	chip->flags = 0;
+	chip->reg_base = edma_virt;
+	chip->mf = EDMA_MF_EDMA_UNROLL;
+
+	info = edma_virt + off;
+	if (info->magic != NTB_EDMA_INFO_MAGIC)
+		return -EINVAL;
+	wr_cnt = info->wr_cnt;
+	rd_cnt = info->rd_cnt;
+	chip->ll_wr_cnt = wr_cnt;
+	chip->ll_rd_cnt = rd_cnt;
+	off += PAGE_SIZE;
+
+	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
+	edma_ctx.intr_ep_phys = info->intr_dar_base;
+	if (edma_ctx.intr_ep_phys) {
+		edma_ctx.intr_rc_virt =
+			dma_alloc_coherent(&ndev->pdev->dev,
+					   sizeof(struct ntb_edma_intr),
+					   &edma_ctx.intr_rc_phys,
+					   GFP_KERNEL);
+		if (!edma_ctx.intr_rc_virt)
+			return -ENOMEM;
+		memset(edma_ctx.intr_rc_virt, 0,
+		       sizeof(struct ntb_edma_intr));
+	}
+
+	for (i = 0; i < wr_cnt; i++) {
+		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
+		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
+		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
+		off += DMA_LLP_MEM_SIZE;
+	}
+	for (i = 0; i < rd_cnt; i++) {
+		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
+		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
+		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
+		off += DMA_LLP_MEM_SIZE;
+	}
+
+	if (!pci_dev_msi_enabled(ndev->pdev))
+		return -ENXIO;
+
+	ret = dw_edma_probe(chip);
+	if (ret) {
+		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct ntb_edma_filter {
+	struct device *dma_dev;
+	u32 direction;
+};
+
+static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
+{
+	struct ntb_edma_filter *filter = arg;
+	u32 dir = filter->direction;
+	struct dma_slave_caps caps;
+	int ret;
+
+	if (chan->device->dev != filter->dma_dev)
+		return false;
+
+	ret = dma_get_slave_caps(chan, &caps);
+	if (ret < 0)
+		return false;
+
+	return !!(caps.directions & dir);
+}
+
+void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
+{
+	unsigned int i;
+
+	for (i = 0; i < edma->num_wr_chan; i++)
+		dma_release_channel(edma->wr_chan[i]);
+
+	for (i = 0; i < edma->num_rd_chan; i++)
+		dma_release_channel(edma->rd_chan[i]);
+
+	if (edma->intr_chan)
+		dma_release_channel(edma->intr_chan);
+}
+
+int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
+{
+	struct ntb_edma_filter filter;
+	dma_cap_mask_t dma_mask;
+	unsigned int i;
+
+	dma_cap_zero(dma_mask);
+	dma_cap_set(DMA_SLAVE, dma_mask);
+
+	memset(edma, 0, sizeof(*edma));
+	edma->dev = dma_dev;
+
+	filter.dma_dev = dma_dev;
+	filter.direction = BIT(DMA_DEV_TO_MEM);
+	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
+		edma->wr_chan[i] = dma_request_channel(dma_mask,
+						       ntb_edma_filter_fn,
+						       &filter);
+		if (!edma->wr_chan[i])
+			break;
+		edma->num_wr_chan++;
+	}
+
+	filter.direction = BIT(DMA_MEM_TO_DEV);
+	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
+		edma->rd_chan[i] = dma_request_channel(dma_mask,
+						       ntb_edma_filter_fn,
+						       &filter);
+		if (!edma->rd_chan[i])
+			break;
+		edma->num_rd_chan++;
+	}
+
+	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
+					      &filter);
+	if (!edma->intr_chan)
+		dev_warn(dma_dev,
+			 "Remote eDMA notify channel could not be allocated\n");
+
+	if (!edma->num_wr_chan || !edma->num_rd_chan) {
+		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
+		ntb_edma_teardown_chans(edma);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
+				    remote_edma_dir_t dir)
+{
+	unsigned int n, cur, idx;
+	struct dma_chan **chans;
+	atomic_t *cur_chan;
+
+	if (dir == REMOTE_EDMA_WRITE) {
+		n = edma->num_wr_chan;
+		chans = edma->wr_chan;
+		cur_chan = &edma->cur_wr_chan;
+	} else {
+		n = edma->num_rd_chan;
+		chans = edma->rd_chan;
+		cur_chan = &edma->cur_rd_chan;
+	}
+	if (WARN_ON_ONCE(!n))
+		return NULL;
+
+	/* Simple round-robin */
+	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
+	idx = cur % n;
+	return chans[idx];
+}
+
+int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
+{
+	struct dma_async_tx_descriptor *txd;
+	struct dma_slave_config cfg;
+	struct scatterlist sgl;
+	dma_cookie_t cookie;
+	struct device *dev;
+
+	if (!edma || !edma->intr_chan)
+		return -ENXIO;
+
+	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
+		return -EINVAL;
+
+	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
+		return -EINVAL;
+
+	dev = edma->dev;
+	if (!dev)
+		return -ENODEV;
+
+	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
+
+	/* Ensure store is visible before kicking the DMA transfer */
+	wmb();
+
+	sg_init_table(&sgl, 1);
+	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
+	sg_dma_len(&sgl) = sizeof(u32);
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.direction      = DMA_MEM_TO_DEV;
+
+	if (dmaengine_slave_config(edma->intr_chan, &cfg))
+		return -EINVAL;
+
+	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
+				      DMA_MEM_TO_DEV,
+				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!txd)
+		return -ENOSPC;
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie))
+		return -ENOSPC;
+
+	dma_async_issue_pending(edma->intr_chan);
+	return 0;
+}
diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
new file mode 100644
index 000000000000..da0451827edb
--- /dev/null
+++ b/drivers/ntb/ntb_edma.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+#ifndef _NTB_EDMA_H_
+#define _NTB_EDMA_H_
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+
+#define EDMA_REG_SIZE		SZ_64K
+#define DMA_LLP_MEM_SIZE	SZ_4K
+#define EDMA_WR_CH_NUM		4
+#define EDMA_RD_CH_NUM		4
+#define NTB_EDMA_MAX_CH		8
+
+#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
+#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
+
+#define NTB_EDMA_RING_ORDER	7
+#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
+#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
+
+typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
+
+/*
+ * REMOTE_EDMA_EP:
+ *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
+ *
+ * REMOTE_EDMA_RC:
+ *   Root Complex controls the endpoint eDMA through the shared MW and
+ *   drives reads/writes on behalf of the host.
+ */
+typedef enum {
+	REMOTE_EDMA_UNKNOWN,
+	REMOTE_EDMA_EP,
+	REMOTE_EDMA_RC,
+} remote_edma_mode_t;
+
+typedef enum {
+	REMOTE_EDMA_WRITE,
+	REMOTE_EDMA_READ,
+} remote_edma_dir_t;
+
+/*
+ * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
+ *
+ *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
+ *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
+ *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
+ *                                RC configures via dw_edma)
+ *
+ * ntb_edma_setup_mws() on EP:
+ *   - allocates ntb_edma_info and LLs in EP memory
+ *   - programs inbound iATU so that RC peer MW[n] points at this block
+ *
+ * ntb_edma_setup_peer() on RC:
+ *   - ioremaps peer MW[n]
+ *   - reads ntb_edma_info
+ *   - sets up dw_edma_chip ll_region_* from that info
+ */
+struct ntb_edma_info {
+	u32 magic;
+	u16 wr_cnt;
+	u16 rd_cnt;
+	u64 regs_phys;
+	u32 ll_stride;
+	u32 rsvd;
+	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
+	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
+
+	u64 intr_dar_base;
+} __packed;
+
+struct ll_dma_addrs {
+	dma_addr_t wr[EDMA_WR_CH_NUM];
+	dma_addr_t rd[EDMA_RD_CH_NUM];
+};
+
+struct ntb_edma_chans {
+	struct device *dev;
+
+	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
+	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
+	struct dma_chan *intr_chan;
+
+	unsigned int num_wr_chan;
+	unsigned int num_rd_chan;
+	atomic_t cur_wr_chan;
+	atomic_t cur_rd_chan;
+};
+
+static __always_inline u32 ntb_edma_ring_idx(u32 v)
+{
+	return v & NTB_EDMA_RING_MASK;
+}
+
+static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
+{
+	if (head >= tail) {
+		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
+		return head - tail;
+	}
+
+	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
+	return U32_MAX - tail + head + 1;
+}
+
+static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
+{
+	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
+}
+
+static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
+{
+	return ntb_edma_ring_free_entry(head, tail) == 0;
+}
+
+int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
+		       ntb_edma_interrupt_cb_t cb, void *data);
+void ntb_edma_teardown_isr(struct device *dev);
+int ntb_edma_setup_mws(struct ntb_dev *ndev);
+int ntb_edma_setup_peer(struct ntb_dev *ndev);
+int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
+struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
+				    remote_edma_dir_t dir);
+void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
+int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
+
+#endif
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
similarity index 65%
rename from drivers/ntb/ntb_transport.c
rename to drivers/ntb/ntb_transport_core.c
index 907db6c93d4d..48d48921978d 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport_core.c
@@ -47,6 +47,9 @@
  * Contact Information:
  * Jon Mason <jon.mason@intel.com>
  */
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
@@ -71,6 +74,8 @@
 #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
 #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
 
+#define NTB_EDMA_MAX_POLL		32
+
 MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
 MODULE_VERSION(NTB_TRANSPORT_VER);
 MODULE_LICENSE("Dual BSD/GPL");
@@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
 MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
 #endif
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+#include "ntb_edma.h"
+static bool use_remote_edma;
+module_param(use_remote_edma, bool, 0644);
+MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
+#endif
+
 static struct dentry *nt_debugfs_dir;
 
 /* Only two-ports NTB devices are supported */
@@ -125,6 +137,14 @@ struct ntb_queue_entry {
 		struct ntb_payload_header __iomem *tx_hdr;
 		struct ntb_payload_header *rx_hdr;
 	};
+
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	dma_addr_t addr;
+
+	/* Used by RC side only */
+	struct scatterlist sgl;
+	struct work_struct dma_work;
+#endif
 };
 
 struct ntb_rx_info {
@@ -202,6 +222,33 @@ struct ntb_transport_qp {
 	int msi_irq;
 	struct ntb_msi_desc msi_desc;
 	struct ntb_msi_desc peer_msi_desc;
+
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	/*
+	 * For ensuring peer notification in non-atomic context.
+	 * ntb_peer_db_set might sleep or schedule.
+	 */
+	struct work_struct db_work;
+
+	/*
+	 * wr: remote eDMA write transfer (EP -> RC direction)
+	 * rd: remote eDMA read transfer (RC -> EP direction)
+	 */
+	u32 wr_cons;
+	u32 rd_cons;
+	u32 wr_prod;
+	u32 rd_prod;
+	u32 wr_issue;
+	u32 rd_issue;
+
+	spinlock_t ep_tx_lock;
+	spinlock_t ep_rx_lock;
+	spinlock_t rc_lock;
+
+	/* Completion work for read/write transfers. */
+	struct work_struct read_work;
+	struct work_struct write_work;
+#endif
 };
 
 struct ntb_transport_mw {
@@ -249,6 +296,13 @@ struct ntb_transport_ctx {
 
 	/* Make sure workq of link event be executed serially */
 	struct mutex link_event_lock;
+
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	remote_edma_mode_t remote_edma_mode;
+	struct device *dma_dev;
+	struct workqueue_struct *wq;
+	struct ntb_edma_chans edma;
+#endif
 };
 
 enum {
@@ -262,6 +316,19 @@ struct ntb_payload_header {
 	unsigned int flags;
 };
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
+static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
+				   unsigned int *mw_count);
+static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
+					  unsigned int qp_num);
+static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
+					    struct ntb_transport_qp *qp);
+static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
+static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
+static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
+#endif /* CONFIG_NTB_TRANSPORT_EDMA */
+
 /*
  * Return the device that should be used for DMA mapping.
  *
@@ -298,7 +365,7 @@ enum {
 	container_of((__drv), struct ntb_transport_client, driver)
 
 #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
-#define NTB_QP_DEF_NUM_ENTRIES	100
+#define NTB_QP_DEF_NUM_ENTRIES	128
 #define NTB_LINK_DOWN_TIMEOUT	10
 
 static void ntb_transport_rxc_db(unsigned long data);
@@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
 	count = ntb_spad_count(nt->ndev);
 	for (i = 0; i < count; i++)
 		ntb_spad_write(nt->ndev, i, 0);
+
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	ntb_edma_teardown_chans(&nt->edma);
+#endif
 }
 
 static void ntb_transport_link_cleanup_work(struct work_struct *work)
@@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
 
 	/* send the local info, in the opposite order of the way we read it */
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	rc = ntb_transport_edma_ep_init(nt);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
+		return;
+	}
+#endif
+
 	if (nt->use_msi) {
 		rc = ntb_msi_setup_mws(ndev);
 		if (rc) {
@@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
 
 	nt->link_is_up = true;
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	rc = ntb_transport_edma_rc_init(nt);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
+		goto out1;
+	}
+#endif
+
 	for (i = 0; i < nt->qp_count; i++) {
 		struct ntb_transport_qp *qp = &nt->qp_vec[i];
 
@@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
 	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
 };
 
+static const struct ntb_transport_backend_ops edma_backend_ops;
+
 static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 {
 	struct ntb_transport_ctx *nt;
@@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 
 	nt->ndev = ndev;
 
-	nt->backend_ops = default_backend_ops;
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	if (use_remote_edma) {
+		rc = ntb_transport_edma_init(nt, &mw_count);
+		if (rc) {
+			nt->mw_count = 0;
+			goto err;
+		}
+		nt->backend_ops = edma_backend_ops;
+
+		/*
+		 * On remote eDMA mode, we reserve a read channel for Host->EP
+		 * interruption.
+		 */
+		use_msi = false;
+	} else
+#endif
+		nt->backend_ops = default_backend_ops;
 
 	/*
 	 * If we are using MSI, and have at least one extra memory window,
@@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		rc = ntb_transport_init_queue(nt, i);
 		if (rc)
 			goto err2;
+
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+		ntb_transport_edma_init_queue(nt, i);
+#endif
 	}
 
 	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
@@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	}
 	kfree(nt->mw_vec);
 err:
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	ntb_transport_edma_uninit(nt);
+#endif
 	kfree(nt);
 	return rc;
 }
@@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
 
 	nt->qp_bitmap_free &= ~qp_bit;
 
+	qp->qp_bit = qp_bit;
 	qp->cb_data = data;
 	qp->rx_handler = handlers->rx_handler;
 	qp->tx_handler = handlers->tx_handler;
 	qp->event_handler = handlers->event_handler;
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	ntb_transport_edma_create_queue(nt, qp);
+#endif
+
 	dma_cap_zero(dma_mask);
 	dma_cap_set(DMA_MEMCPY, dma_mask);
 
@@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
 			goto err1;
 
 		entry->qp = qp;
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
+#endif
 		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
 			     &qp->rx_free_q);
 	}
@@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
  */
 void ntb_transport_free_queue(struct ntb_transport_qp *qp)
 {
-	struct pci_dev *pdev;
 	struct ntb_queue_entry *entry;
+	struct pci_dev *pdev;
 	u64 qp_bit;
 
 	if (!qp)
@@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
 	tasklet_kill(&qp->rxc_db_work);
 
 	cancel_delayed_work_sync(&qp->link_work);
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+	cancel_work_sync(&qp->read_work);
+	cancel_work_sync(&qp->write_work);
+#endif
 
 	qp->cb_data = NULL;
 	qp->rx_handler = NULL;
@@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
 }
 EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
 
+#ifdef CONFIG_NTB_TRANSPORT_EDMA
+/*
+ * Remote eDMA mode implementation
+ */
+struct ntb_edma_desc {
+	u32 len;
+	u32 flags;
+	u64 addr; /* DMA address */
+	u64 data;
+};
+
+struct ntb_edma_ring {
+	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
+	u32 head;
+	u32 tail;
+};
+
+#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
+
+#define __NTB_EDMA_CHECK_INDEX(_i)					\
+({									\
+	unsigned long __i = (unsigned long)(_i);			\
+	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
+		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
+		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
+	__i;								\
+})
+
+#define NTB_EDMA_DESC_I(qp, i, n)					\
+({									\
+	typeof(qp) __qp = (qp);						\
+	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
+	(struct ntb_edma_desc *)					\
+		((char *)(__qp)->rx_buff +				\
+		 (sizeof(struct ntb_edma_ring) * n) +			\
+		 NTB_EDMA_DESC_OFF(__i));				\
+})
+
+#define NTB_EDMA_DESC_O(qp, i, n)					\
+({									\
+	typeof(qp) __qp = (qp);						\
+	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
+	(struct ntb_edma_desc __iomem *)				\
+		((char __iomem *)(__qp)->tx_mw +			\
+		 (sizeof(struct ntb_edma_ring) * n) +			\
+		 NTB_EDMA_DESC_OFF(__i));				\
+})
+
+#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
+				(sizeof(struct ntb_edma_ring) * n) +	\
+				offsetof(struct ntb_edma_ring, head)))
+#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
+				(sizeof(struct ntb_edma_ring) * n) +	\
+				offsetof(struct ntb_edma_ring, head)))
+#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
+				(sizeof(struct ntb_edma_ring) * n) +	\
+				offsetof(struct ntb_edma_ring, tail)))
+#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
+				(sizeof(struct ntb_edma_ring) * n) +	\
+				offsetof(struct ntb_edma_ring, tail)))
+
+/*
+ * Macro naming rule:
+ *   NTB_DESC_RD_EP_I (as an example)
+ *            ^^ ^^ ^
+ *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
+ *            :  `----- Who uses this macro.
+ *            `-------- DESC / HEAD / TAIL
+ *
+ * Read transfers (RC->EP):
+ *
+ *   EP view (outbound, written via NTB):
+ *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
+ *           [ len ][ flags ][ addr ][ data ]
+ *           [ len ][ flags ][ addr ][ data ]
+ *           :
+ *           [ len ][ flags ][ addr ][ data ]
+ *       - head: NTB_HEAD_RD_EP_O(qp)
+ *       - tail: NTB_TAIL_RD_EP_I(qp)
+ *
+ *   RC view (inbound, local mapping):
+ *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
+ *           [ len ][ flags ][ addr ][ data ]
+ *           [ len ][ flags ][ addr ][ data ]
+ *           :
+ *           [ len ][ flags ][ addr ][ data ]
+ *       - head: NTB_HEAD_RD_RC_I(qp)
+ *       - tail: NTB_TAIL_RD_RC_O(qp)
+ *
+ * Write transfers (EP -> RC) are analogous but use
+ * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
+ * and NTB_TAIL_WR_{EP_I,RC_O}().
+ */
+#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
+#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
+#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
+#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
+#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
+#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
+#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
+#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
+
+#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
+#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
+#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
+#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
+
+#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
+#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
+#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
+#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
+
+static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
+{
+	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
+}
+
+static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
+{
+	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
+}
+
+static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
+{
+	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
+}
+
+static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
+{
+	unsigned int head, tail;
+
+	if (ntb_qp_edma_is_ep(qp)) {
+		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
+			/* In this scope, only 'head' might proceed */
+			tail = READ_ONCE(qp->wr_cons);
+			head = READ_ONCE(qp->wr_prod);
+		}
+		return ntb_edma_ring_free_entry(head, tail);
+	}
+
+	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
+		/* In this scope, only 'head' might proceed */
+		tail = READ_ONCE(qp->rd_issue);
+		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
+	}
+	/*
+	 * On RC side, 'used' amount indicates how much EP side
+	 * has refilled, which are available for us to use for TX.
+	 */
+	return ntb_edma_ring_used_entry(head, tail);
+}
+
+static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
+						  struct ntb_transport_qp *qp)
+{
+	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
+	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
+	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
+	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
+	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
+	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
+
+	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
+	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
+	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
+	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
+	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
+	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
+	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
+	seq_putc(s, '\n');
+
+	seq_puts(s, "Using Remote eDMA - Yes\n");
+	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
+}
+
+static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
+{
+	struct ntb_dev *ndev = nt->ndev;
+
+	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
+		ntb_edma_teardown_isr(&ndev->pdev->dev);
+
+	if (nt->wq)
+		destroy_workqueue(nt->wq);
+	nt->wq = NULL;
+}
+
+static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
+				   unsigned int *mw_count)
+{
+	struct ntb_dev *ndev = nt->ndev;
+
+	/*
+	 * We need at least one MW for the transport plus one MW reserved
+	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
+	 */
+	if (*mw_count <= 1) {
+		dev_err(&ndev->dev,
+			"remote eDMA requires at least two MWS (have %u)\n",
+			*mw_count);
+		return -ENODEV;
+	}
+
+	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nt->wq) {
+		ntb_transport_edma_uninit(nt);
+		return -ENOMEM;
+	}
+
+	/* Reserve the last peer MW exclusively for the eDMA window. */
+	*mw_count -= 1;
+
+	return 0;
+}
+
+static void ntb_transport_edma_db_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp =
+			container_of(work, struct ntb_transport_qp, db_work);
+
+	ntb_peer_db_set(qp->ndev, qp->qp_bit);
+}
+
+static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
+{
+	if (ntb_qp_edma_is_rc(qp))
+		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
+			return;
+
+	/*
+	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
+	 * may sleep, delegate the actual doorbell write to a workqueue.
+	 */
+	queue_work(system_highpri_wq, &qp->db_work);
+}
+
+static void ntb_transport_edma_isr(void *data, int qp_num)
+{
+	struct ntb_transport_ctx *nt = data;
+	struct ntb_transport_qp *qp;
+
+	if (qp_num < 0 || qp_num >= nt->qp_count)
+		return;
+
+	qp = &nt->qp_vec[qp_num];
+	if (WARN_ON(!qp))
+		return;
+
+	queue_work(nt->wq, &qp->read_work);
+	queue_work(nt->wq, &qp->write_work);
+}
+
+static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
+{
+	struct ntb_dev *ndev = nt->ndev;
+	struct pci_dev *pdev = ndev->pdev;
+	int rc;
+
+	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
+		return 0;
+
+	rc = ntb_edma_setup_peer(ndev);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
+		return rc;
+	}
+
+	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
+		return rc;
+	}
+
+	nt->remote_edma_mode = REMOTE_EDMA_RC;
+	return 0;
+}
+
+static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
+{
+	struct ntb_dev *ndev = nt->ndev;
+	struct pci_dev *pdev = ndev->pdev;
+	struct pci_epc *epc;
+	int rc;
+
+	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
+		return 0;
+
+	/* Only EP side can return pci_epc */
+	epc = ntb_get_pci_epc(ndev);
+	if (!epc)
+		return 0;
+
+	rc = ntb_edma_setup_mws(ndev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to set up memory window for eDMA: %d\n", rc);
+		return rc;
+	}
+
+	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
+		return rc;
+	}
+
+	nt->remote_edma_mode = REMOTE_EDMA_EP;
+	return 0;
+}
+
+static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
+					  unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	struct ntb_dev *ndev = nt->ndev;
+	struct ntb_queue_entry *entry;
+	struct ntb_transport_mw *mw;
+	unsigned int mw_num, mw_count, qp_count;
+	unsigned int qp_offset, rx_info_offset;
+	unsigned int mw_size, mw_size_per_qp;
+	unsigned int num_qps_mw;
+	size_t edma_total;
+	unsigned int i;
+	int node;
+
+	mw_count = nt->mw_count;
+	qp_count = nt->qp_count;
+
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	if (!mw->virt_addr)
+		return -ENOMEM;
+
+	if (mw_num < qp_count % mw_count)
+		num_qps_mw = qp_count / mw_count + 1;
+	else
+		num_qps_mw = qp_count / mw_count;
+
+	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
+	if (max_mw_size && mw_size > max_mw_size)
+		mw_size = max_mw_size;
+
+	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
+	qp_offset = mw_size_per_qp * (qp_num / mw_count);
+	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
+
+	qp->tx_mw_size = mw_size_per_qp;
+	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
+	if (!qp->tx_mw)
+		return -EINVAL;
+	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
+	if (!qp->tx_mw_phys)
+		return -EINVAL;
+	qp->rx_info = qp->tx_mw + rx_info_offset;
+	qp->rx_buff = mw->virt_addr + qp_offset;
+	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
+
+	/* Due to housekeeping, there must be at least 2 buffs */
+	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
+	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
+
+	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
+	edma_total = 2 * sizeof(struct ntb_edma_ring);
+	if (rx_info_offset < edma_total) {
+		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
+			edma_total, rx_info_offset);
+		return -EINVAL;
+	}
+	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
+	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
+
+	/*
+	 * Checking to see if we have more entries than the default.
+	 * We should add additional entries if that is the case so we
+	 * can be in sync with the transport frames.
+	 */
+	node = dev_to_node(&ndev->dev);
+	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
+		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
+		if (!entry)
+			return -ENOMEM;
+
+		entry->qp = qp;
+		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
+		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+			     &qp->rx_free_q);
+		qp->rx_alloc_entry++;
+	}
+
+	memset(qp->rx_buff, 0, edma_total);
+
+	qp->rx_pkts = 0;
+	qp->tx_pkts = 0;
+
+	return 0;
+}
+
+static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
+{
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	struct ntb_queue_entry *entry;
+	struct ntb_edma_desc *in;
+	unsigned int len;
+	u32 idx;
+
+	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
+				     qp->rd_cons) == 0)
+		return 0;
+
+	idx = ntb_edma_ring_idx(qp->rd_cons);
+	in = NTB_DESC_RD_EP_I(qp, idx);
+	if (!(in->flags & DESC_DONE_FLAG))
+		return 0;
+
+	in->flags = 0;
+	len = in->len; /* might be smaller than entry->len */
+
+	entry = (struct ntb_queue_entry *)(in->data);
+	if (WARN_ON(!entry))
+		return 0;
+
+	if (in->flags & LINK_DOWN_FLAG) {
+		ntb_qp_link_down(qp);
+		qp->rd_cons++;
+		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+		return 1;
+	}
+
+	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
+
+	qp->rx_bytes += len;
+	qp->rx_pkts++;
+	qp->rd_cons++;
+
+	if (qp->rx_handler && qp->client_ready)
+		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
+
+	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+	return 1;
+}
+
+static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	struct ntb_edma_desc *in;
+	u32 idx;
+
+	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
+				     qp->wr_cons) == 0)
+		return 0;
+
+	idx = ntb_edma_ring_idx(qp->wr_cons);
+	in = NTB_DESC_WR_EP_I(qp, idx);
+
+	entry = (struct ntb_queue_entry *)(in->data);
+	if (WARN_ON(!entry))
+		return 0;
+
+	qp->wr_cons++;
+
+	if (qp->tx_handler)
+		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
+
+	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
+	return 1;
+}
+
+static void ntb_transport_edma_ep_read_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, read_work);
+	unsigned int i;
+
+	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
+		if (!ntb_transport_edma_ep_read_complete(qp))
+			break;
+	}
+
+	if (ntb_transport_edma_ep_read_complete(qp))
+		queue_work(qp->transport->wq, &qp->read_work);
+}
+
+static void ntb_transport_edma_ep_write_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, write_work);
+	unsigned int i;
+
+	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
+		if (!ntb_transport_edma_ep_write_complete(qp))
+			break;
+	}
+
+	if (ntb_transport_edma_ep_write_complete(qp))
+		queue_work(qp->transport->wq, &qp->write_work);
+}
+
+static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, write_work);
+	struct ntb_queue_entry *entry;
+	struct ntb_edma_desc *in;
+	unsigned int len;
+	void *cb_data;
+	u32 idx;
+
+	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
+					qp->wr_cons) != 0) {
+		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
+		smp_rmb();
+
+		idx = ntb_edma_ring_idx(qp->wr_cons);
+		in = NTB_DESC_WR_RC_I(qp, idx);
+		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
+		if (!entry || !(entry->flags & DESC_DONE_FLAG))
+			break;
+
+		in->data = 0;
+
+		cb_data = entry->cb_data;
+		len = entry->len;
+
+		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
+
+		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
+			ntb_qp_link_down(qp);
+			continue;
+		}
+
+		ntb_transport_edma_notify_peer(qp);
+
+		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+
+		if (qp->rx_handler && qp->client_ready)
+			qp->rx_handler(qp, qp->cb_data, cb_data, len);
+
+		/* stat updates */
+		qp->rx_bytes += len;
+		qp->rx_pkts++;
+	}
+}
+
+static void ntb_transport_edma_rc_write_cb(void *data,
+					   const struct dmaengine_result *res)
+{
+	struct ntb_queue_entry *entry = data;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_transport_ctx *nt = qp->transport;
+	enum dmaengine_tx_result dma_err = res->result;
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+
+	switch (dma_err) {
+	case DMA_TRANS_READ_FAILED:
+	case DMA_TRANS_WRITE_FAILED:
+	case DMA_TRANS_ABORTED:
+		entry->errors++;
+		entry->len = -EIO;
+		break;
+	case DMA_TRANS_NOERROR:
+	default:
+		break;
+	}
+	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
+	sg_dma_address(&entry->sgl) = 0;
+
+	entry->flags |= DESC_DONE_FLAG;
+
+	queue_work(nt->wq, &qp->write_work);
+}
+
+static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, read_work);
+	struct ntb_edma_desc *in, __iomem *out;
+	struct ntb_queue_entry *entry;
+	unsigned int len;
+	void *cb_data;
+	u32 idx;
+
+	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
+					qp->rd_cons) != 0) {
+		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
+		smp_rmb();
+
+		idx = ntb_edma_ring_idx(qp->rd_cons);
+		in = NTB_DESC_RD_RC_I(qp, idx);
+		entry = (struct ntb_queue_entry *)in->data;
+		if (!entry || !(entry->flags & DESC_DONE_FLAG))
+			break;
+
+		in->data = 0;
+
+		cb_data = entry->cb_data;
+		len = entry->len;
+
+		out = NTB_DESC_RD_RC_O(qp, idx);
+
+		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
+
+		/*
+		 * No need to add barrier in-between to enforce ordering here.
+		 * The other side proceeds only after both flags and tail are
+		 * updated.
+		 */
+		iowrite32(entry->flags, &out->flags);
+		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
+
+		ntb_transport_edma_notify_peer(qp);
+
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, cb_data, len);
+
+		/* stat updates */
+		qp->tx_bytes += len;
+		qp->tx_pkts++;
+	}
+}
+
+static void ntb_transport_edma_rc_read_cb(void *data,
+					  const struct dmaengine_result *res)
+{
+	struct ntb_queue_entry *entry = data;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	enum dmaengine_tx_result dma_err = res->result;
+
+	switch (dma_err) {
+	case DMA_TRANS_READ_FAILED:
+	case DMA_TRANS_WRITE_FAILED:
+	case DMA_TRANS_ABORTED:
+		entry->errors++;
+		entry->len = -EIO;
+		break;
+	case DMA_TRANS_NOERROR:
+	default:
+		break;
+	}
+	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
+	sg_dma_address(&entry->sgl) = 0;
+
+	entry->flags |= DESC_DONE_FLAG;
+
+	queue_work(nt->wq, &qp->read_work);
+}
+
+static int ntb_transport_edma_rc_write_start(struct device *d,
+					     struct dma_chan *chan, size_t len,
+					     dma_addr_t ep_src, void *rc_dst,
+					     struct ntb_queue_entry *entry)
+{
+	struct scatterlist *sgl = &entry->sgl;
+	struct dma_async_tx_descriptor *txd;
+	struct dma_slave_config cfg;
+	dma_cookie_t cookie;
+	int nents, rc;
+
+	if (!d)
+		return -ENODEV;
+
+	if (!chan)
+		return -ENXIO;
+
+	if (WARN_ON(!ep_src || !rc_dst))
+		return -EINVAL;
+
+	if (WARN_ON(sg_dma_address(sgl)))
+		return -EINVAL;
+
+	sg_init_one(sgl, rc_dst, len);
+	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
+	if (nents <= 0)
+		return -EIO;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.src_addr       = ep_src;
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.direction      = DMA_DEV_TO_MEM;
+	rc = dmaengine_slave_config(chan, &cfg);
+	if (rc)
+		goto out_unmap;
+
+	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
+				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!txd) {
+		rc = -EIO;
+		goto out_unmap;
+	}
+
+	txd->callback_result = ntb_transport_edma_rc_write_cb;
+	txd->callback_param = entry;
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie)) {
+		rc = -EIO;
+		goto out_unmap;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+out_unmap:
+	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
+	return rc;
+}
+
+static int ntb_transport_edma_rc_read_start(struct device *d,
+					    struct dma_chan *chan, size_t len,
+					    void *rc_src, dma_addr_t ep_dst,
+					    struct ntb_queue_entry *entry)
+{
+	struct scatterlist *sgl = &entry->sgl;
+	struct dma_async_tx_descriptor *txd;
+	struct dma_slave_config cfg;
+	dma_cookie_t cookie;
+	int nents, rc;
+
+	if (!d)
+		return -ENODEV;
+
+	if (!chan)
+		return -ENXIO;
+
+	if (WARN_ON(!rc_src || !ep_dst))
+		return -EINVAL;
+
+	if (WARN_ON(sg_dma_address(sgl)))
+		return -EINVAL;
+
+	sg_init_one(sgl, rc_src, len);
+	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
+	if (nents <= 0)
+		return -EIO;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.dst_addr       = ep_dst;
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.direction      = DMA_MEM_TO_DEV;
+	rc = dmaengine_slave_config(chan, &cfg);
+	if (rc)
+		goto out_unmap;
+
+	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
+				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!txd) {
+		rc = -EIO;
+		goto out_unmap;
+	}
+
+	txd->callback_result = ntb_transport_edma_rc_read_cb;
+	txd->callback_param = entry;
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie)) {
+		rc = -EIO;
+		goto out_unmap;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+out_unmap:
+	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
+	return rc;
+}
+
+static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
+{
+	struct ntb_queue_entry *entry = container_of(
+				work, struct ntb_queue_entry, dma_work);
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	struct dma_chan *chan;
+	int rc;
+
+	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
+	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
+					       entry->addr, entry->buf, entry);
+	if (rc) {
+		entry->errors++;
+		entry->len = -EIO;
+		entry->flags |= DESC_DONE_FLAG;
+		queue_work(nt->wq, &qp->write_work);
+		return;
+	}
+}
+
+static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	unsigned int budget = NTB_EDMA_MAX_POLL;
+	struct ntb_queue_entry *entry;
+	struct ntb_edma_desc *in;
+	dma_addr_t ep_src;
+	u32 len, idx;
+
+	while (budget--) {
+		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
+					     qp->wr_issue) == 0)
+			break;
+
+		idx = ntb_edma_ring_idx(qp->wr_issue);
+		in = NTB_DESC_WR_RC_I(qp, idx);
+
+		len = READ_ONCE(in->len);
+		ep_src = (dma_addr_t)READ_ONCE(in->addr);
+
+		/* Prepare 'entry' for write completion */
+		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
+		if (!entry) {
+			qp->rx_err_no_buf++;
+			break;
+		}
+		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
+			entry->flags &= ~DESC_DONE_FLAG;
+		entry->len = len; /* NB. entry->len can be <=0 */
+		entry->addr = ep_src;
+
+		/*
+		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
+		 * so it needs to be set before wr_issue++.
+		 */
+		in->data = (uintptr_t)entry;
+
+		/* Ensure in->data visible before wr_issue++ */
+		smp_wmb();
+
+		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
+
+		if (!len) {
+			entry->flags |= DESC_DONE_FLAG;
+			queue_work(nt->wq, &qp->write_work);
+			continue;
+		}
+
+		if (in->flags & LINK_DOWN_FLAG) {
+			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
+			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
+			queue_work(nt->wq, &qp->write_work);
+			continue;
+		}
+
+		queue_work(nt->wq, &entry->dma_work);
+	}
+
+	if (!budget)
+		tasklet_schedule(&qp->rxc_db_work);
+}
+
+static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
+					    struct ntb_queue_entry *entry)
+{
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct ntb_edma_desc *in, __iomem *out;
+	unsigned int len = entry->len;
+	struct dma_chan *chan;
+	u32 issue, idx, head;
+	dma_addr_t ep_dst;
+	int rc;
+
+	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
+
+	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
+		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
+		issue = qp->rd_issue;
+		if (ntb_edma_ring_used_entry(head, issue) == 0) {
+			qp->tx_ring_full++;
+			return -ENOSPC;
+		}
+
+		/*
+		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
+		 * so it needs to be set before rd_issue++.
+		 */
+		idx = ntb_edma_ring_idx(issue);
+		in = NTB_DESC_RD_RC_I(qp, idx);
+		in->data = (uintptr_t)entry;
+
+		/* Make in->data visible before rd_issue++ */
+		smp_wmb();
+
+		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
+	}
+
+	/* Publish the final transfer length to the EP side */
+	out = NTB_DESC_RD_RC_O(qp, idx);
+	iowrite32(len, &out->len);
+	ioread32(&out->len);
+
+	if (unlikely(!len)) {
+		entry->flags |= DESC_DONE_FLAG;
+		queue_work(nt->wq, &qp->read_work);
+		return 0;
+	}
+
+	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
+	dma_rmb();
+
+	/* kick remote eDMA read transfer */
+	ep_dst = (dma_addr_t)in->addr;
+	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
+	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
+					      entry->buf, ep_dst, entry);
+	if (rc) {
+		entry->errors++;
+		entry->len = -EIO;
+		entry->flags |= DESC_DONE_FLAG;
+		queue_work(nt->wq, &qp->read_work);
+	}
+	return 0;
+}
+
+static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
+					    struct ntb_queue_entry *entry)
+{
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	struct ntb_edma_desc *in, __iomem *out;
+	unsigned int len = entry->len;
+	dma_addr_t ep_src = 0;
+	u32 idx;
+	int rc;
+
+	if (likely(len)) {
+		ep_src = dma_map_single(dma_dev, entry->buf, len,
+					DMA_TO_DEVICE);
+		rc = dma_mapping_error(dma_dev, ep_src);
+		if (rc)
+			return rc;
+	}
+
+	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
+		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
+			rc = -ENOSPC;
+			qp->tx_ring_full++;
+			goto out_unmap;
+		}
+
+		idx = ntb_edma_ring_idx(qp->wr_prod);
+		in  = NTB_DESC_WR_EP_I(qp, idx);
+		out = NTB_DESC_WR_EP_O(qp, idx);
+
+		WARN_ON(in->flags & DESC_DONE_FLAG);
+		WARN_ON(entry->flags & DESC_DONE_FLAG);
+		in->flags = 0;
+		in->data  = (uintptr_t)entry;
+		entry->addr  = ep_src;
+
+		iowrite32(len,          &out->len);
+		iowrite32(entry->flags, &out->flags);
+		iowrite64(ep_src,       &out->addr);
+		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
+
+		dma_wmb();
+		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
+
+		qp->tx_bytes += len;
+		qp->tx_pkts++;
+	}
+
+	ntb_transport_edma_notify_peer(qp);
+
+	return 0;
+out_unmap:
+	if (likely(len))
+		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
+	return rc;
+}
+
+static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
+					 struct ntb_queue_entry *entry,
+					 void *cb, void *data, unsigned int len,
+					 unsigned int flags)
+{
+	struct device *dma_dev;
+
+	if (entry->addr) {
+		/* Deferred unmap */
+		dma_dev = get_dma_dev(qp->ndev);
+		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
+	}
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = flags;
+	entry->errors = 0;
+	entry->addr = 0;
+
+	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
+
+	if (ntb_qp_edma_is_ep(qp))
+		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
+	else
+		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
+}
+
+static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
+					    struct ntb_queue_entry *entry)
+{
+	struct device *dma_dev = get_dma_dev(qp->ndev);
+	struct ntb_edma_desc *in, __iomem *out;
+	unsigned int len = entry->len;
+	void *data = entry->buf;
+	dma_addr_t ep_dst;
+	u32 idx;
+	int rc;
+
+	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
+	rc = dma_mapping_error(dma_dev, ep_dst);
+	if (rc)
+		return rc;
+
+	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
+		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
+				       READ_ONCE(qp->rd_cons))) {
+			rc = -ENOSPC;
+			goto out_unmap;
+		}
+
+		idx = ntb_edma_ring_idx(qp->rd_prod);
+		in = NTB_DESC_RD_EP_I(qp, idx);
+		out = NTB_DESC_RD_EP_O(qp, idx);
+
+		iowrite32(len, &out->len);
+		iowrite64(ep_dst, &out->addr);
+
+		WARN_ON(in->flags & DESC_DONE_FLAG);
+		in->data = (uintptr_t)entry;
+		entry->addr = ep_dst;
+
+		/* Ensure len/addr are visible before the head update */
+		dma_wmb();
+
+		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
+		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
+	}
+	return 0;
+out_unmap:
+	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
+	return rc;
+}
+
+static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
+					 struct ntb_queue_entry *entry)
+{
+	int rc;
+
+	/* The behaviour is the same as the default backend for RC side */
+	if (ntb_qp_edma_is_ep(qp)) {
+		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
+		if (rc) {
+			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+				     &qp->rx_free_q);
+			return rc;
+		}
+	}
+
+	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
+
+	if (qp->active)
+		tasklet_schedule(&qp->rxc_db_work);
+
+	return 0;
+}
+
+static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+
+	if (ntb_qp_edma_is_rc(qp))
+		ntb_transport_edma_rc_poll(qp);
+	else if (ntb_qp_edma_is_ep(qp)) {
+		/*
+		 * Make sure we poll the rings even if an eDMA interrupt is
+		 * cleared on the RC side earlier.
+		 */
+		queue_work(nt->wq, &qp->read_work);
+		queue_work(nt->wq, &qp->write_work);
+	} else
+		/* Unreachable */
+		WARN_ON_ONCE(1);
+}
+
+static void ntb_transport_edma_read_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, read_work);
+
+	if (ntb_qp_edma_is_rc(qp))
+		ntb_transport_edma_rc_read_complete_work(work);
+	else if (ntb_qp_edma_is_ep(qp))
+		ntb_transport_edma_ep_read_work(work);
+	else
+		/* Unreachable */
+		WARN_ON_ONCE(1);
+}
+
+static void ntb_transport_edma_write_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(
+				work, struct ntb_transport_qp, write_work);
+
+	if (ntb_qp_edma_is_rc(qp))
+		ntb_transport_edma_rc_write_complete_work(work);
+	else if (ntb_qp_edma_is_ep(qp))
+		ntb_transport_edma_ep_write_work(work);
+	else
+		/* Unreachable */
+		WARN_ON_ONCE(1);
+}
+
+static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
+					  unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+
+	qp->wr_cons = 0;
+	qp->rd_cons = 0;
+	qp->wr_prod = 0;
+	qp->rd_prod = 0;
+	qp->wr_issue = 0;
+	qp->rd_issue = 0;
+
+	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
+	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
+	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
+}
+
+static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
+					    struct ntb_transport_qp *qp)
+{
+	spin_lock_init(&qp->ep_tx_lock);
+	spin_lock_init(&qp->ep_rx_lock);
+	spin_lock_init(&qp->rc_lock);
+}
+
+static const struct ntb_transport_backend_ops edma_backend_ops = {
+	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
+	.tx_free_entry = ntb_transport_edma_tx_free_entry,
+	.tx_enqueue = ntb_transport_edma_tx_enqueue,
+	.rx_enqueue = ntb_transport_edma_rx_enqueue,
+	.rx_poll = ntb_transport_edma_rx_poll,
+	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
+};
+#endif /* CONFIG_NTB_TRANSPORT_EDMA */
+
 /**
  * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
  * @qp: NTB transport layer queue to be enabled
-- 
2.48.1
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Dave Jiang 2 months, 1 week ago

On 11/29/25 9:03 AM, Koichiro Den wrote:
> Add a new transport backend that uses a remote DesignWare eDMA engine
> located on the NTB endpoint to move data between host and endpoint.
> 
> In this mode:
> 
>   - The endpoint exposes a dedicated memory window that contains the
>     eDMA register block followed by a small control structure (struct
>     ntb_edma_info) and per-channel linked-list (LL) rings.
> 
>   - On the endpoint side, ntb_edma_setup_mws() allocates the control
>     structure and LL rings in endpoint memory, then programs an inbound
>     iATU region so that the host can access them via a peer MW.
> 
>   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
>     ntb_edma_info and configures a dw-edma DMA device to use the LL
>     rings provided by the endpoint.
> 
>   - ntb_transport is extended with a new backend_ops implementation that
>     routes TX and RX enqueue/poll operations through the remote eDMA
>     rings while keeping the existing shared-memory backend intact.
> 
>   - The host signals the endpoint via a dedicated DMA read channel.
>     'use_msi' module option is ignored when 'use_remote_edma=1'.
> 
> The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
> module parameter (use_remote_edma). When disabled, the existing
> ntb_transport behaviour is unchanged.
> 
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  drivers/ntb/Kconfig                           |   11 +
>  drivers/ntb/Makefile                          |    3 +
>  drivers/ntb/ntb_edma.c                        |  628 ++++++++
>  drivers/ntb/ntb_edma.h                        |  128 ++

I briefly looked over the code. It feels like the EDMA bits should go in drivers/ntb/hw/ rather than drivers/ntb/ given it's pretty specific to the designware hardware. What sits in drivers/ntb should be generic APIs where a different vendor can utilize it and not having to adopt to designware hardware specifics. So maybe a bit more abstractions are needed?

>  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
>  5 files changed, 2048 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/ntb/ntb_edma.c
>  create mode 100644 drivers/ntb/ntb_edma.h
>  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
> 
> diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
> index df16c755b4da..db63f02bb116 100644
> --- a/drivers/ntb/Kconfig
> +++ b/drivers/ntb/Kconfig
> @@ -37,4 +37,15 @@ config NTB_TRANSPORT
>  
>  	 If unsure, say N.
>  
> +config NTB_TRANSPORT_EDMA
> +	bool "NTB Transport backed by remote eDMA"
> +	depends on NTB_TRANSPORT
> +	depends on PCI
> +	select DMA_ENGINE
> +	help
> +	  Enable a transport backend that uses a remote DesignWare eDMA engine
> +	  exposed through a dedicated NTB memory window. The host uses the
> +	  endpoint's eDMA engine to move data in both directions.
> +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
> +
>  endif # NTB
> diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
> index 3a6fa181ff99..51f0e1e3aec7 100644
> --- a/drivers/ntb/Makefile
> +++ b/drivers/ntb/Makefile
> @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
>  
>  ntb-y			:= core.o
>  ntb-$(CONFIG_NTB_MSI)	+= msi.o
> +
> +ntb_transport-y					:= ntb_transport_core.o
> +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
> diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
> new file mode 100644
> index 000000000000..cb35e0d56aa8
> --- /dev/null
> +++ b/drivers/ntb/ntb_edma.c
> @@ -0,0 +1,628 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/pci.h>
> +#include <linux/ntb.h>
> +#include <linux/io.h>
> +#include <linux/iommu.h>
> +#include <linux/dmaengine.h>
> +#include <linux/pci-epc.h>
> +#include <linux/dma/edma.h>
> +#include <linux/irq.h>
> +#include <linux/irqdomain.h>
> +#include <linux/of.h>
> +#include <linux/of_irq.h>
> +#include <dt-bindings/interrupt-controller/arm-gic.h>
> +
> +#include "ntb_edma.h"
> +
> +/*
> + * The interrupt register offsets below are taken from the DesignWare
> + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> + * backend currently only supports this layout.
> + */
> +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> +#define DMA_WRITE_INT_MASK_OFF     0x54
> +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> +#define DMA_READ_INT_STATUS_OFF    0xa0
> +#define DMA_READ_INT_MASK_OFF      0xa8
> +#define DMA_READ_INT_CLEAR_OFF     0xac
> +
> +#define NTB_EDMA_NOTIFY_MAX_QP		64
> +
> +static unsigned int edma_spi = 417; /* 0x1a1 */
> +module_param(edma_spi, uint, 0644);
> +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
> +
> +static u64 edma_regs_phys = 0xe65d5000;
> +module_param(edma_regs_phys, ullong, 0644);
> +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
> +
> +static unsigned long edma_regs_size = 0x1200;
> +module_param(edma_regs_size, ulong, 0644);
> +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
> +
> +struct ntb_edma_intr {
> +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
> +};
> +
> +struct ntb_edma_ctx {
> +	void *ll_wr_virt[EDMA_WR_CH_NUM];
> +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
> +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
> +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
> +
> +	struct ntb_edma_intr *intr_ep_virt;
> +	dma_addr_t intr_ep_phys;
> +	struct ntb_edma_intr *intr_rc_virt;
> +	dma_addr_t intr_rc_phys;
> +	u32 notify_qp_max;
> +
> +	bool initialized;
> +};
> +
> +static struct ntb_edma_ctx edma_ctx;
> +
> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> +
> +struct ntb_edma_interrupt {
> +	int virq;
> +	void __iomem *base;
> +	ntb_edma_interrupt_cb_t cb;
> +	void *data;
> +};
> +
> +static struct ntb_edma_interrupt ntb_edma_intr;
> +
> +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
> +{
> +	struct device_node *np = dev_of_node(dev);
> +	struct device_node *parent;
> +	struct irq_fwspec fwspec = { 0 };
> +	int virq;
> +
> +	parent = of_irq_find_parent(np);
> +	if (!parent)
> +		return -ENODEV;
> +
> +	fwspec.fwnode      = of_fwnode_handle(parent);
> +	fwspec.param_count = 3;
> +	fwspec.param[0]    = GIC_SPI;
> +	fwspec.param[1]    = spi;
> +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
> +
> +	virq = irq_create_fwspec_mapping(&fwspec);
> +	of_node_put(parent);
> +	return (virq > 0) ? virq : -EINVAL;
> +}
> +
> +static irqreturn_t ntb_edma_isr(int irq, void *data)
> +{
> +	struct ntb_edma_interrupt *v = data;
> +	u32 mask = BIT(EDMA_RD_CH_NUM);
> +	u32 i, val;
> +
> +	/*
> +	 * We do not ack interrupts here but instead we mask all local interrupt
> +	 * sources except the read channel used for notification. This reduces
> +	 * needless ISR invocations.
> +	 *
> +	 * In theory we could configure LIE=1/RIE=0 only for the notification
> +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
> +	 * require intrusive changes to the dw-edma core.
> +	 *
> +	 * Note: The host side may have already cleared the read interrupt used
> +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
> +	 * way to detect it. As a result, we cannot reliably tell which specific
> +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
> +	 * instead.
> +	 */
> +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
> +
> +	if (!v->cb || !edma_ctx.intr_ep_virt)
> +		return IRQ_HANDLED;
> +
> +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
> +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
> +		if (!val)
> +			continue;
> +
> +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
> +		v->cb(v->data, i);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> +		       ntb_edma_interrupt_cb_t cb, void *data)
> +{
> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> +	int virq = ntb_edma_map_spi_to_virq(epc_dev->parent, edma_spi);
> +	int ret;
> +
> +	if (virq < 0) {
> +		dev_err(dev, "failed to get virq (%d)\n", virq);
> +		return virq;
> +	}
> +
> +	v->virq = virq;
> +	v->cb = cb;
> +	v->data = data;
> +	if (edma_regs_phys && !v->base)
> +		v->base = devm_ioremap(dev, edma_regs_phys, edma_regs_size);
> +	if (!v->base) {
> +		dev_err(dev, "failed to setup v->base\n");
> +		return -1;
> +	}
> +	ret = devm_request_irq(dev, v->virq, ntb_edma_isr, 0, "ntb-edma", v);
> +	if (ret)
> +		return ret;
> +
> +	if (v->base) {
> +		iowrite32(0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> +		iowrite32(0x0, v->base + DMA_READ_INT_MASK_OFF);
> +	}
> +	return 0;
> +}
> +
> +void ntb_edma_teardown_isr(struct device *dev)
> +{
> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> +
> +	/* Mask all write/read interrupts so we don't get called again. */
> +	if (v->base) {
> +		iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> +		iowrite32(~0x0, v->base + DMA_READ_INT_MASK_OFF);
> +	}
> +
> +	if (v->virq > 0)
> +		devm_free_irq(dev, v->virq, v);
> +
> +	if (v->base)
> +		devm_iounmap(dev, v->base);
> +
> +	v->virq = 0;
> +	v->cb = NULL;
> +	v->data = NULL;
> +}
> +
> +int ntb_edma_setup_mws(struct ntb_dev *ndev)
> +{
> +	const size_t info_bytes = PAGE_SIZE;
> +	resource_size_t size_max, offset;
> +	dma_addr_t intr_phys, info_phys;
> +	u32 wr_done = 0, rd_done = 0;
> +	struct ntb_edma_intr *intr;
> +	struct ntb_edma_info *info;
> +	int peer_mw, mw_index, rc;
> +	struct iommu_domain *dom;
> +	bool reg_mapped = false;
> +	size_t ll_bytes, size;
> +	struct pci_epc *epc;
> +	struct device *dev;
> +	unsigned long iova;
> +	phys_addr_t phys;
> +	u64 need;
> +	u32 i;
> +
> +	/* +1 is for interruption */
> +	ll_bytes = (EDMA_WR_CH_NUM + EDMA_RD_CH_NUM + 1) * DMA_LLP_MEM_SIZE;
> +	need = EDMA_REG_SIZE + info_bytes + ll_bytes;
> +
> +	epc = ntb_get_pci_epc(ndev);
> +	if (!epc)
> +		return -ENODEV;
> +	dev = epc->dev.parent;
> +
> +	if (edma_ctx.initialized)
> +		return 0;
> +
> +	info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL);
> +	if (!info)
> +		return -ENOMEM;
> +
> +	memset(info, 0, info_bytes);
> +	info->magic = NTB_EDMA_INFO_MAGIC;
> +	info->wr_cnt = EDMA_WR_CH_NUM;
> +	info->rd_cnt = EDMA_RD_CH_NUM + 1; /* +1 for interruption */
> +	info->regs_phys = edma_regs_phys;
> +	info->ll_stride = DMA_LLP_MEM_SIZE;
> +
> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> +		edma_ctx.ll_wr_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> +							 &edma_ctx.ll_wr_phys[i],
> +							 GFP_KERNEL,
> +							 DMA_ATTR_FORCE_CONTIGUOUS);
> +		if (!edma_ctx.ll_wr_virt[i]) {
> +			rc = -ENOMEM;
> +			goto err_free_ll;
> +		}
> +		wr_done++;
> +		info->ll_wr_phys[i] = edma_ctx.ll_wr_phys[i];
> +	}
> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> +		edma_ctx.ll_rd_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> +							 &edma_ctx.ll_rd_phys[i],
> +							 GFP_KERNEL,
> +							 DMA_ATTR_FORCE_CONTIGUOUS);
> +		if (!edma_ctx.ll_rd_virt[i]) {
> +			rc = -ENOMEM;
> +			goto err_free_ll;
> +		}
> +		rd_done++;
> +		info->ll_rd_phys[i] = edma_ctx.ll_rd_phys[i];
> +	}
> +
> +	/* For interruption */
> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> +	intr = dma_alloc_coherent(dev, sizeof(*intr), &intr_phys, GFP_KERNEL);
> +	if (!intr) {
> +		rc = -ENOMEM;
> +		goto err_free_ll;
> +	}
> +	memset(intr, 0, sizeof(*intr));
> +	edma_ctx.intr_ep_virt = intr;
> +	edma_ctx.intr_ep_phys = intr_phys;
> +	info->intr_dar_base = intr_phys;
> +
> +	peer_mw = ntb_peer_mw_count(ndev);
> +	if (peer_mw <= 0) {
> +		rc = -ENODEV;
> +		goto err_free_ll;
> +	}
> +
> +	mw_index = peer_mw - 1; /* last MW */
> +
> +	rc = ntb_mw_get_align(ndev, 0, mw_index, 0, NULL, &size_max,
> +			      &offset);
> +	if (rc)
> +		goto err_free_ll;
> +
> +	if (size_max < need) {
> +		rc = -ENOSPC;
> +		goto err_free_ll;
> +	}
> +
> +	/* Map register space (direct) */
> +	dom = iommu_get_domain_for_dev(dev);
> +	if (dom) {
> +		phys = edma_regs_phys & PAGE_MASK;
> +		size = PAGE_ALIGN(EDMA_REG_SIZE + edma_regs_phys - phys);
> +		iova = phys;
> +
> +		rc = iommu_map(dom, iova, phys, EDMA_REG_SIZE,
> +			       IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, GFP_KERNEL);
> +		if (rc)
> +			dev_err(&ndev->dev, "failed to create direct mapping for eDMA reg space\n");
> +		reg_mapped = true;
> +	}
> +
> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_regs_phys, EDMA_REG_SIZE, offset);
> +	if (rc)
> +		goto err_unmap_reg;
> +
> +	offset += EDMA_REG_SIZE;
> +
> +	/* Map ntb_edma_info */
> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, info_phys, info_bytes, offset);
> +	if (rc)
> +		goto err_clear_trans;
> +	offset += info_bytes;
> +
> +	/* Map LL location */
> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_wr_phys[i],
> +				      DMA_LLP_MEM_SIZE, offset);
> +		if (rc)
> +			goto err_clear_trans;
> +		offset += DMA_LLP_MEM_SIZE;
> +	}
> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_rd_phys[i],
> +				      DMA_LLP_MEM_SIZE, offset);
> +		if (rc)
> +			goto err_clear_trans;
> +		offset += DMA_LLP_MEM_SIZE;
> +	}
> +	edma_ctx.initialized = true;
> +
> +	return 0;
> +
> +err_clear_trans:
> +	/*
> +	 * Tear down the NTB translation window used for the eDMA MW.
> +	 * There is no sub-range clear API for ntb_mw_set_trans(), so we
> +	 * unconditionally drop the whole mapping on error.
> +	 */
> +	ntb_mw_clear_trans(ndev, 0, mw_index);
> +
> +err_unmap_reg:
> +	if (reg_mapped)
> +		iommu_unmap(dom, iova, size);
> +err_free_ll:
> +	while (rd_done--)
> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> +			       edma_ctx.ll_rd_virt[rd_done],
> +			       edma_ctx.ll_rd_phys[rd_done],
> +			       DMA_ATTR_FORCE_CONTIGUOUS);
> +	while (wr_done--)
> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> +			       edma_ctx.ll_wr_virt[wr_done],
> +			       edma_ctx.ll_wr_phys[wr_done],
> +			       DMA_ATTR_FORCE_CONTIGUOUS);
> +	if (edma_ctx.intr_ep_virt)
> +		dma_free_coherent(dev, sizeof(struct ntb_edma_intr),
> +				  edma_ctx.intr_ep_virt,
> +				  edma_ctx.intr_ep_phys);
> +	dma_free_coherent(dev, info_bytes, info, info_phys);
> +	return rc;
> +}
> +
> +static int ntb_edma_irq_vector(struct device *dev, unsigned int nr)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	int ret, nvec;
> +
> +	nvec = pci_msi_vec_count(pdev);
> +	for (; nr < nvec; nr++) {
> +		ret = pci_irq_vector(pdev, nr);
> +		if (!irq_has_action(ret))
> +			return ret;
> +	}
> +	return 0;
> +}
> +
> +static const struct dw_edma_plat_ops ntb_edma_ops = {
> +	.irq_vector     = ntb_edma_irq_vector,
> +};
> +
> +int ntb_edma_setup_peer(struct ntb_dev *ndev)
> +{
> +	struct ntb_edma_info *info;
> +	unsigned int wr_cnt, rd_cnt;
> +	struct dw_edma_chip *chip;
> +	void __iomem *edma_virt;
> +	phys_addr_t edma_phys;
> +	resource_size_t mw_size;
> +	u64 off = EDMA_REG_SIZE;
> +	int peer_mw, mw_index;
> +	unsigned int i;
> +	int ret;
> +
> +	peer_mw = ntb_peer_mw_count(ndev);
> +	if (peer_mw <= 0)
> +		return -ENODEV;
> +
> +	mw_index = peer_mw - 1; /* last MW */
> +
> +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
> +				   &mw_size);
> +	if (ret)
> +		return -1;
> +
> +	edma_virt = ioremap(edma_phys, mw_size);
> +
> +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
> +	if (!chip) {
> +		ret = -ENOMEM;
> +		return ret;
> +	}
> +
> +	chip->dev = &ndev->pdev->dev;
> +	chip->nr_irqs = 4;
> +	chip->ops = &ntb_edma_ops;
> +	chip->flags = 0;
> +	chip->reg_base = edma_virt;
> +	chip->mf = EDMA_MF_EDMA_UNROLL;
> +
> +	info = edma_virt + off;
> +	if (info->magic != NTB_EDMA_INFO_MAGIC)
> +		return -EINVAL;
> +	wr_cnt = info->wr_cnt;
> +	rd_cnt = info->rd_cnt;
> +	chip->ll_wr_cnt = wr_cnt;
> +	chip->ll_rd_cnt = rd_cnt;
> +	off += PAGE_SIZE;
> +
> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> +	edma_ctx.intr_ep_phys = info->intr_dar_base;
> +	if (edma_ctx.intr_ep_phys) {
> +		edma_ctx.intr_rc_virt =
> +			dma_alloc_coherent(&ndev->pdev->dev,
> +					   sizeof(struct ntb_edma_intr),
> +					   &edma_ctx.intr_rc_phys,
> +					   GFP_KERNEL);
> +		if (!edma_ctx.intr_rc_virt)
> +			return -ENOMEM;
> +		memset(edma_ctx.intr_rc_virt, 0,
> +		       sizeof(struct ntb_edma_intr));
> +	}
> +
> +	for (i = 0; i < wr_cnt; i++) {
> +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
> +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
> +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
> +		off += DMA_LLP_MEM_SIZE;
> +	}
> +	for (i = 0; i < rd_cnt; i++) {
> +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
> +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
> +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
> +		off += DMA_LLP_MEM_SIZE;
> +	}
> +
> +	if (!pci_dev_msi_enabled(ndev->pdev))
> +		return -ENXIO;
> +
> +	ret = dw_edma_probe(chip);
> +	if (ret) {
> +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +struct ntb_edma_filter {
> +	struct device *dma_dev;
> +	u32 direction;
> +};
> +
> +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
> +{
> +	struct ntb_edma_filter *filter = arg;
> +	u32 dir = filter->direction;
> +	struct dma_slave_caps caps;
> +	int ret;
> +
> +	if (chan->device->dev != filter->dma_dev)
> +		return false;
> +
> +	ret = dma_get_slave_caps(chan, &caps);
> +	if (ret < 0)
> +		return false;
> +
> +	return !!(caps.directions & dir);
> +}
> +
> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < edma->num_wr_chan; i++)
> +		dma_release_channel(edma->wr_chan[i]);
> +
> +	for (i = 0; i < edma->num_rd_chan; i++)
> +		dma_release_channel(edma->rd_chan[i]);
> +
> +	if (edma->intr_chan)
> +		dma_release_channel(edma->intr_chan);
> +}
> +
> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
> +{
> +	struct ntb_edma_filter filter;
> +	dma_cap_mask_t dma_mask;
> +	unsigned int i;
> +
> +	dma_cap_zero(dma_mask);
> +	dma_cap_set(DMA_SLAVE, dma_mask);
> +
> +	memset(edma, 0, sizeof(*edma));
> +	edma->dev = dma_dev;
> +
> +	filter.dma_dev = dma_dev;
> +	filter.direction = BIT(DMA_DEV_TO_MEM);
> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> +		edma->wr_chan[i] = dma_request_channel(dma_mask,
> +						       ntb_edma_filter_fn,
> +						       &filter);
> +		if (!edma->wr_chan[i])
> +			break;
> +		edma->num_wr_chan++;
> +	}
> +
> +	filter.direction = BIT(DMA_MEM_TO_DEV);
> +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
> +		edma->rd_chan[i] = dma_request_channel(dma_mask,
> +						       ntb_edma_filter_fn,
> +						       &filter);
> +		if (!edma->rd_chan[i])
> +			break;
> +		edma->num_rd_chan++;
> +	}
> +
> +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
> +					      &filter);
> +	if (!edma->intr_chan)
> +		dev_warn(dma_dev,
> +			 "Remote eDMA notify channel could not be allocated\n");
> +
> +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
> +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
> +		ntb_edma_teardown_chans(edma);
> +		return -ENODEV;
> +	}
> +	return 0;
> +}
> +
> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> +				    remote_edma_dir_t dir)
> +{
> +	unsigned int n, cur, idx;
> +	struct dma_chan **chans;
> +	atomic_t *cur_chan;
> +
> +	if (dir == REMOTE_EDMA_WRITE) {
> +		n = edma->num_wr_chan;
> +		chans = edma->wr_chan;
> +		cur_chan = &edma->cur_wr_chan;
> +	} else {
> +		n = edma->num_rd_chan;
> +		chans = edma->rd_chan;
> +		cur_chan = &edma->cur_rd_chan;
> +	}
> +	if (WARN_ON_ONCE(!n))
> +		return NULL;
> +
> +	/* Simple round-robin */
> +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
> +	idx = cur % n;
> +	return chans[idx];
> +}
> +
> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
> +{
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	struct scatterlist sgl;
> +	dma_cookie_t cookie;
> +	struct device *dev;
> +
> +	if (!edma || !edma->intr_chan)
> +		return -ENXIO;
> +
> +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
> +		return -EINVAL;
> +
> +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
> +		return -EINVAL;
> +
> +	dev = edma->dev;
> +	if (!dev)
> +		return -ENODEV;
> +
> +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
> +
> +	/* Ensure store is visible before kicking the DMA transfer */
> +	wmb();
> +
> +	sg_init_table(&sgl, 1);
> +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
> +	sg_dma_len(&sgl) = sizeof(u32);
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_MEM_TO_DEV;
> +
> +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
> +		return -EINVAL;
> +
> +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
> +				      DMA_MEM_TO_DEV,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd)
> +		return -ENOSPC;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie))
> +		return -ENOSPC;
> +
> +	dma_async_issue_pending(edma->intr_chan);
> +	return 0;
> +}
> diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
> new file mode 100644
> index 000000000000..da0451827edb
> --- /dev/null
> +++ b/drivers/ntb/ntb_edma.h
> @@ -0,0 +1,128 @@
> +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
> +#ifndef _NTB_EDMA_H_
> +#define _NTB_EDMA_H_
> +
> +#include <linux/completion.h>
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +
> +#define EDMA_REG_SIZE		SZ_64K
> +#define DMA_LLP_MEM_SIZE	SZ_4K
> +#define EDMA_WR_CH_NUM		4
> +#define EDMA_RD_CH_NUM		4
> +#define NTB_EDMA_MAX_CH		8
> +
> +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
> +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
> +
> +#define NTB_EDMA_RING_ORDER	7
> +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
> +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
> +
> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> +
> +/*
> + * REMOTE_EDMA_EP:
> + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
> + *
> + * REMOTE_EDMA_RC:
> + *   Root Complex controls the endpoint eDMA through the shared MW and
> + *   drives reads/writes on behalf of the host.
> + */
> +typedef enum {
> +	REMOTE_EDMA_UNKNOWN,
> +	REMOTE_EDMA_EP,
> +	REMOTE_EDMA_RC,
> +} remote_edma_mode_t;
> +
> +typedef enum {
> +	REMOTE_EDMA_WRITE,
> +	REMOTE_EDMA_READ,
> +} remote_edma_dir_t;
> +
> +/*
> + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
> + *
> + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
> + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
> + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
> + *                                RC configures via dw_edma)
> + *
> + * ntb_edma_setup_mws() on EP:
> + *   - allocates ntb_edma_info and LLs in EP memory
> + *   - programs inbound iATU so that RC peer MW[n] points at this block
> + *
> + * ntb_edma_setup_peer() on RC:
> + *   - ioremaps peer MW[n]
> + *   - reads ntb_edma_info
> + *   - sets up dw_edma_chip ll_region_* from that info
> + */
> +struct ntb_edma_info {
> +	u32 magic;
> +	u16 wr_cnt;
> +	u16 rd_cnt;
> +	u64 regs_phys;
> +	u32 ll_stride;
> +	u32 rsvd;
> +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
> +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
> +
> +	u64 intr_dar_base;
> +} __packed;
> +
> +struct ll_dma_addrs {
> +	dma_addr_t wr[EDMA_WR_CH_NUM];
> +	dma_addr_t rd[EDMA_RD_CH_NUM];
> +};
> +
> +struct ntb_edma_chans {
> +	struct device *dev;
> +
> +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
> +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
> +	struct dma_chan *intr_chan;
> +
> +	unsigned int num_wr_chan;
> +	unsigned int num_rd_chan;
> +	atomic_t cur_wr_chan;
> +	atomic_t cur_rd_chan;
> +};
> +
> +static __always_inline u32 ntb_edma_ring_idx(u32 v)
> +{
> +	return v & NTB_EDMA_RING_MASK;
> +}
> +
> +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
> +{
> +	if (head >= tail) {
> +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
> +		return head - tail;
> +	}
> +
> +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
> +	return U32_MAX - tail + head + 1;
> +}
> +
> +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
> +{
> +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
> +}
> +
> +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
> +{
> +	return ntb_edma_ring_free_entry(head, tail) == 0;
> +}
> +
> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> +		       ntb_edma_interrupt_cb_t cb, void *data);
> +void ntb_edma_teardown_isr(struct device *dev);
> +int ntb_edma_setup_mws(struct ntb_dev *ndev);
> +int ntb_edma_setup_peer(struct ntb_dev *ndev);
> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> +				    remote_edma_dir_t dir);
> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
> +
> +#endif
> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
> similarity index 65%
> rename from drivers/ntb/ntb_transport.c
> rename to drivers/ntb/ntb_transport_core.c
> index 907db6c93d4d..48d48921978d 100644
> --- a/drivers/ntb/ntb_transport.c
> +++ b/drivers/ntb/ntb_transport_core.c
> @@ -47,6 +47,9 @@
>   * Contact Information:
>   * Jon Mason <jon.mason@intel.com>
>   */
> +#include <linux/atomic.h>
> +#include <linux/bug.h>
> +#include <linux/compiler.h>
>  #include <linux/debugfs.h>
>  #include <linux/delay.h>
>  #include <linux/dmaengine.h>
> @@ -71,6 +74,8 @@
>  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
>  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
>  
> +#define NTB_EDMA_MAX_POLL		32
> +
>  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
>  MODULE_VERSION(NTB_TRANSPORT_VER);
>  MODULE_LICENSE("Dual BSD/GPL");
> @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
>  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
>  #endif
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA

This is a comment throughout this patch. Doing ifdefs inside C source is pretty frowed upon in the kernel. The preferred way is to only have ifdefs in the header files. So please give this a bit more consideration and see if it can be done differently to address this.

> +#include "ntb_edma.h"
> +static bool use_remote_edma;
> +module_param(use_remote_edma, bool, 0644);
> +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
> +#endif
> +
>  static struct dentry *nt_debugfs_dir;
>  
>  /* Only two-ports NTB devices are supported */
> @@ -125,6 +137,14 @@ struct ntb_queue_entry {
>  		struct ntb_payload_header __iomem *tx_hdr;
>  		struct ntb_payload_header *rx_hdr;
>  	};
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	dma_addr_t addr;
> +
> +	/* Used by RC side only */
> +	struct scatterlist sgl;
> +	struct work_struct dma_work;
> +#endif
>  };
>  
>  struct ntb_rx_info {
> @@ -202,6 +222,33 @@ struct ntb_transport_qp {
>  	int msi_irq;
>  	struct ntb_msi_desc msi_desc;
>  	struct ntb_msi_desc peer_msi_desc;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	/*
> +	 * For ensuring peer notification in non-atomic context.
> +	 * ntb_peer_db_set might sleep or schedule.
> +	 */
> +	struct work_struct db_work;
> +
> +	/*
> +	 * wr: remote eDMA write transfer (EP -> RC direction)
> +	 * rd: remote eDMA read transfer (RC -> EP direction)
> +	 */
> +	u32 wr_cons;
> +	u32 rd_cons;
> +	u32 wr_prod;
> +	u32 rd_prod;
> +	u32 wr_issue;
> +	u32 rd_issue;
> +
> +	spinlock_t ep_tx_lock;
> +	spinlock_t ep_rx_lock;
> +	spinlock_t rc_lock;
> +
> +	/* Completion work for read/write transfers. */
> +	struct work_struct read_work;
> +	struct work_struct write_work;
> +#endif

For something like this, maybe it needs its own struct instead of an ifdef chunk. Perhaps 'ntb_rx_info' can serve as a core data struct with EDMA having 'ntb_rx_info_edma' and embed 'ntb_rx_info'. 

DJ

>  };
>  
>  struct ntb_transport_mw {
> @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
>  
>  	/* Make sure workq of link event be executed serially */
>  	struct mutex link_event_lock;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	remote_edma_mode_t remote_edma_mode;
> +	struct device *dma_dev;
> +	struct workqueue_struct *wq;
> +	struct ntb_edma_chans edma;
> +#endif
>  };
>  
>  enum {
> @@ -262,6 +316,19 @@ struct ntb_payload_header {
>  	unsigned int flags;
>  };
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> +				   unsigned int *mw_count);
> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num);
> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> +					    struct ntb_transport_qp *qp);
> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> +
>  /*
>   * Return the device that should be used for DMA mapping.
>   *
> @@ -298,7 +365,7 @@ enum {
>  	container_of((__drv), struct ntb_transport_client, driver)
>  
>  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
> -#define NTB_QP_DEF_NUM_ENTRIES	100
> +#define NTB_QP_DEF_NUM_ENTRIES	128
>  #define NTB_LINK_DOWN_TIMEOUT	10
>  
>  static void ntb_transport_rxc_db(unsigned long data);
> @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
>  	count = ntb_spad_count(nt->ndev);
>  	for (i = 0; i < count; i++)
>  		ntb_spad_write(nt->ndev, i, 0);
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_edma_teardown_chans(&nt->edma);
> +#endif
>  }
>  
>  static void ntb_transport_link_cleanup_work(struct work_struct *work)
> @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>  
>  	/* send the local info, in the opposite order of the way we read it */
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	rc = ntb_transport_edma_ep_init(nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
> +		return;
> +	}
> +#endif
> +
>  	if (nt->use_msi) {
>  		rc = ntb_msi_setup_mws(ndev);
>  		if (rc) {
> @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>  
>  	nt->link_is_up = true;
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	rc = ntb_transport_edma_rc_init(nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
> +		goto out1;
> +	}
> +#endif
> +
>  	for (i = 0; i < nt->qp_count; i++) {
>  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
>  
> @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
>  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
>  };
>  
> +static const struct ntb_transport_backend_ops edma_backend_ops;
> +
>  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  {
>  	struct ntb_transport_ctx *nt;
> @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  
>  	nt->ndev = ndev;
>  
> -	nt->backend_ops = default_backend_ops;
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	if (use_remote_edma) {
> +		rc = ntb_transport_edma_init(nt, &mw_count);
> +		if (rc) {
> +			nt->mw_count = 0;
> +			goto err;
> +		}
> +		nt->backend_ops = edma_backend_ops;
> +
> +		/*
> +		 * On remote eDMA mode, we reserve a read channel for Host->EP
> +		 * interruption.
> +		 */
> +		use_msi = false;
> +	} else
> +#endif
> +		nt->backend_ops = default_backend_ops;
>  
>  	/*
>  	 * If we are using MSI, and have at least one extra memory window,
> @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  		rc = ntb_transport_init_queue(nt, i);
>  		if (rc)
>  			goto err2;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +		ntb_transport_edma_init_queue(nt, i);
> +#endif
>  	}
>  
>  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
> @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  	}
>  	kfree(nt->mw_vec);
>  err:
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_transport_edma_uninit(nt);
> +#endif
>  	kfree(nt);
>  	return rc;
>  }
> @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>  
>  	nt->qp_bitmap_free &= ~qp_bit;
>  
> +	qp->qp_bit = qp_bit;
>  	qp->cb_data = data;
>  	qp->rx_handler = handlers->rx_handler;
>  	qp->tx_handler = handlers->tx_handler;
>  	qp->event_handler = handlers->event_handler;
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_transport_edma_create_queue(nt, qp);
> +#endif
> +
>  	dma_cap_zero(dma_mask);
>  	dma_cap_set(DMA_MEMCPY, dma_mask);
>  
> @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>  			goto err1;
>  
>  		entry->qp = qp;
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> +#endif
>  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
>  			     &qp->rx_free_q);
>  	}
> @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
>   */
>  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>  {
> -	struct pci_dev *pdev;
>  	struct ntb_queue_entry *entry;
> +	struct pci_dev *pdev;
>  	u64 qp_bit;
>  
>  	if (!qp)
> @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>  	tasklet_kill(&qp->rxc_db_work);
>  
>  	cancel_delayed_work_sync(&qp->link_work);
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	cancel_work_sync(&qp->read_work);
> +	cancel_work_sync(&qp->write_work);
> +#endif
>  
>  	qp->cb_data = NULL;
>  	qp->rx_handler = NULL;
> @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
>  }
>  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
>  
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +/*
> + * Remote eDMA mode implementation
> + */
> +struct ntb_edma_desc {
> +	u32 len;
> +	u32 flags;
> +	u64 addr; /* DMA address */
> +	u64 data;
> +};
> +
> +struct ntb_edma_ring {
> +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
> +	u32 head;
> +	u32 tail;
> +};
> +
> +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
> +
> +#define __NTB_EDMA_CHECK_INDEX(_i)					\
> +({									\
> +	unsigned long __i = (unsigned long)(_i);			\
> +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
> +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
> +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
> +	__i;								\
> +})
> +
> +#define NTB_EDMA_DESC_I(qp, i, n)					\
> +({									\
> +	typeof(qp) __qp = (qp);						\
> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> +	(struct ntb_edma_desc *)					\
> +		((char *)(__qp)->rx_buff +				\
> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> +		 NTB_EDMA_DESC_OFF(__i));				\
> +})
> +
> +#define NTB_EDMA_DESC_O(qp, i, n)					\
> +({									\
> +	typeof(qp) __qp = (qp);						\
> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> +	(struct ntb_edma_desc __iomem *)				\
> +		((char __iomem *)(__qp)->tx_mw +			\
> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> +		 NTB_EDMA_DESC_OFF(__i));				\
> +})
> +
> +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, head)))
> +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, head)))
> +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, tail)))
> +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, tail)))
> +
> +/*
> + * Macro naming rule:
> + *   NTB_DESC_RD_EP_I (as an example)
> + *            ^^ ^^ ^
> + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
> + *            :  `----- Who uses this macro.
> + *            `-------- DESC / HEAD / TAIL
> + *
> + * Read transfers (RC->EP):
> + *
> + *   EP view (outbound, written via NTB):
> + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
> + *           [ len ][ flags ][ addr ][ data ]
> + *           [ len ][ flags ][ addr ][ data ]
> + *           :
> + *           [ len ][ flags ][ addr ][ data ]
> + *       - head: NTB_HEAD_RD_EP_O(qp)
> + *       - tail: NTB_TAIL_RD_EP_I(qp)
> + *
> + *   RC view (inbound, local mapping):
> + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
> + *           [ len ][ flags ][ addr ][ data ]
> + *           [ len ][ flags ][ addr ][ data ]
> + *           :
> + *           [ len ][ flags ][ addr ][ data ]
> + *       - head: NTB_HEAD_RD_RC_I(qp)
> + *       - tail: NTB_TAIL_RD_RC_O(qp)
> + *
> + * Write transfers (EP -> RC) are analogous but use
> + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
> + * and NTB_TAIL_WR_{EP_I,RC_O}().
> + */
> +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> +
> +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
> +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
> +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
> +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
> +
> +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
> +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
> +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
> +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
> +
> +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
> +{
> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
> +}
> +
> +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
> +{
> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
> +}
> +
> +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
> +{
> +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
> +}
> +
> +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
> +{
> +	unsigned int head, tail;
> +
> +	if (ntb_qp_edma_is_ep(qp)) {
> +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> +			/* In this scope, only 'head' might proceed */
> +			tail = READ_ONCE(qp->wr_cons);
> +			head = READ_ONCE(qp->wr_prod);
> +		}
> +		return ntb_edma_ring_free_entry(head, tail);
> +	}
> +
> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> +		/* In this scope, only 'head' might proceed */
> +		tail = READ_ONCE(qp->rd_issue);
> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> +	}
> +	/*
> +	 * On RC side, 'used' amount indicates how much EP side
> +	 * has refilled, which are available for us to use for TX.
> +	 */
> +	return ntb_edma_ring_used_entry(head, tail);
> +}
> +
> +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
> +						  struct ntb_transport_qp *qp)
> +{
> +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
> +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
> +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
> +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
> +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
> +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
> +
> +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
> +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
> +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
> +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
> +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
> +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
> +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
> +	seq_putc(s, '\n');
> +
> +	seq_puts(s, "Using Remote eDMA - Yes\n");
> +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
> +}
> +
> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +
> +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
> +		ntb_edma_teardown_isr(&ndev->pdev->dev);
> +
> +	if (nt->wq)
> +		destroy_workqueue(nt->wq);
> +	nt->wq = NULL;
> +}
> +
> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> +				   unsigned int *mw_count)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +
> +	/*
> +	 * We need at least one MW for the transport plus one MW reserved
> +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
> +	 */
> +	if (*mw_count <= 1) {
> +		dev_err(&ndev->dev,
> +			"remote eDMA requires at least two MWS (have %u)\n",
> +			*mw_count);
> +		return -ENODEV;
> +	}
> +
> +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
> +	if (!nt->wq) {
> +		ntb_transport_edma_uninit(nt);
> +		return -ENOMEM;
> +	}
> +
> +	/* Reserve the last peer MW exclusively for the eDMA window. */
> +	*mw_count -= 1;
> +
> +	return 0;
> +}
> +
> +static void ntb_transport_edma_db_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp =
> +			container_of(work, struct ntb_transport_qp, db_work);
> +
> +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
> +}
> +
> +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
> +{
> +	if (ntb_qp_edma_is_rc(qp))
> +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
> +			return;
> +
> +	/*
> +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
> +	 * may sleep, delegate the actual doorbell write to a workqueue.
> +	 */
> +	queue_work(system_highpri_wq, &qp->db_work);
> +}
> +
> +static void ntb_transport_edma_isr(void *data, int qp_num)
> +{
> +	struct ntb_transport_ctx *nt = data;
> +	struct ntb_transport_qp *qp;
> +
> +	if (qp_num < 0 || qp_num >= nt->qp_count)
> +		return;
> +
> +	qp = &nt->qp_vec[qp_num];
> +	if (WARN_ON(!qp))
> +		return;
> +
> +	queue_work(nt->wq, &qp->read_work);
> +	queue_work(nt->wq, &qp->write_work);
> +}
> +
> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct pci_dev *pdev = ndev->pdev;
> +	int rc;
> +
> +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
> +		return 0;
> +
> +	rc = ntb_edma_setup_peer(ndev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
> +		return rc;
> +	}
> +
> +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
> +		return rc;
> +	}
> +
> +	nt->remote_edma_mode = REMOTE_EDMA_RC;
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct pci_dev *pdev = ndev->pdev;
> +	struct pci_epc *epc;
> +	int rc;
> +
> +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
> +		return 0;
> +
> +	/* Only EP side can return pci_epc */
> +	epc = ntb_get_pci_epc(ndev);
> +	if (!epc)
> +		return 0;
> +
> +	rc = ntb_edma_setup_mws(ndev);
> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"Failed to set up memory window for eDMA: %d\n", rc);
> +		return rc;
> +	}
> +
> +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
> +		return rc;
> +	}
> +
> +	nt->remote_edma_mode = REMOTE_EDMA_EP;
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num)
> +{
> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct ntb_queue_entry *entry;
> +	struct ntb_transport_mw *mw;
> +	unsigned int mw_num, mw_count, qp_count;
> +	unsigned int qp_offset, rx_info_offset;
> +	unsigned int mw_size, mw_size_per_qp;
> +	unsigned int num_qps_mw;
> +	size_t edma_total;
> +	unsigned int i;
> +	int node;
> +
> +	mw_count = nt->mw_count;
> +	qp_count = nt->qp_count;
> +
> +	mw_num = QP_TO_MW(nt, qp_num);
> +	mw = &nt->mw_vec[mw_num];
> +
> +	if (!mw->virt_addr)
> +		return -ENOMEM;
> +
> +	if (mw_num < qp_count % mw_count)
> +		num_qps_mw = qp_count / mw_count + 1;
> +	else
> +		num_qps_mw = qp_count / mw_count;
> +
> +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
> +	if (max_mw_size && mw_size > max_mw_size)
> +		mw_size = max_mw_size;
> +
> +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
> +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
> +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
> +
> +	qp->tx_mw_size = mw_size_per_qp;
> +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> +	if (!qp->tx_mw)
> +		return -EINVAL;
> +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
> +	if (!qp->tx_mw_phys)
> +		return -EINVAL;
> +	qp->rx_info = qp->tx_mw + rx_info_offset;
> +	qp->rx_buff = mw->virt_addr + qp_offset;
> +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
> +
> +	/* Due to housekeeping, there must be at least 2 buffs */
> +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> +
> +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
> +	edma_total = 2 * sizeof(struct ntb_edma_ring);
> +	if (rx_info_offset < edma_total) {
> +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
> +			edma_total, rx_info_offset);
> +		return -EINVAL;
> +	}
> +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
> +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
> +
> +	/*
> +	 * Checking to see if we have more entries than the default.
> +	 * We should add additional entries if that is the case so we
> +	 * can be in sync with the transport frames.
> +	 */
> +	node = dev_to_node(&ndev->dev);
> +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
> +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
> +		if (!entry)
> +			return -ENOMEM;
> +
> +		entry->qp = qp;
> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> +			     &qp->rx_free_q);
> +		qp->rx_alloc_entry++;
> +	}
> +
> +	memset(qp->rx_buff, 0, edma_total);
> +
> +	qp->rx_pkts = 0;
> +	qp->tx_pkts = 0;
> +
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	unsigned int len;
> +	u32 idx;
> +
> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
> +				     qp->rd_cons) == 0)
> +		return 0;
> +
> +	idx = ntb_edma_ring_idx(qp->rd_cons);
> +	in = NTB_DESC_RD_EP_I(qp, idx);
> +	if (!(in->flags & DESC_DONE_FLAG))
> +		return 0;
> +
> +	in->flags = 0;
> +	len = in->len; /* might be smaller than entry->len */
> +
> +	entry = (struct ntb_queue_entry *)(in->data);
> +	if (WARN_ON(!entry))
> +		return 0;
> +
> +	if (in->flags & LINK_DOWN_FLAG) {
> +		ntb_qp_link_down(qp);
> +		qp->rd_cons++;
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +		return 1;
> +	}
> +
> +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
> +
> +	qp->rx_bytes += len;
> +	qp->rx_pkts++;
> +	qp->rd_cons++;
> +
> +	if (qp->rx_handler && qp->client_ready)
> +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
> +
> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +	return 1;
> +}
> +
> +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	u32 idx;
> +
> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
> +				     qp->wr_cons) == 0)
> +		return 0;
> +
> +	idx = ntb_edma_ring_idx(qp->wr_cons);
> +	in = NTB_DESC_WR_EP_I(qp, idx);
> +
> +	entry = (struct ntb_queue_entry *)(in->data);
> +	if (WARN_ON(!entry))
> +		return 0;
> +
> +	qp->wr_cons++;
> +
> +	if (qp->tx_handler)
> +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
> +
> +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
> +	return 1;
> +}
> +
> +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +	unsigned int i;
> +
> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> +		if (!ntb_transport_edma_ep_read_complete(qp))
> +			break;
> +	}
> +
> +	if (ntb_transport_edma_ep_read_complete(qp))
> +		queue_work(qp->transport->wq, &qp->read_work);
> +}
> +
> +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +	unsigned int i;
> +
> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> +		if (!ntb_transport_edma_ep_write_complete(qp))
> +			break;
> +	}
> +
> +	if (ntb_transport_edma_ep_write_complete(qp))
> +		queue_work(qp->transport->wq, &qp->write_work);
> +}
> +
> +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	unsigned int len;
> +	void *cb_data;
> +	u32 idx;
> +
> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
> +					qp->wr_cons) != 0) {
> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
> +		smp_rmb();
> +
> +		idx = ntb_edma_ring_idx(qp->wr_cons);
> +		in = NTB_DESC_WR_RC_I(qp, idx);
> +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> +			break;
> +
> +		in->data = 0;
> +
> +		cb_data = entry->cb_data;
> +		len = entry->len;
> +
> +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
> +
> +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
> +			ntb_qp_link_down(qp);
> +			continue;
> +		}
> +
> +		ntb_transport_edma_notify_peer(qp);
> +
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +
> +		if (qp->rx_handler && qp->client_ready)
> +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
> +
> +		/* stat updates */
> +		qp->rx_bytes += len;
> +		qp->rx_pkts++;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_write_cb(void *data,
> +					   const struct dmaengine_result *res)
> +{
> +	struct ntb_queue_entry *entry = data;
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	enum dmaengine_tx_result dma_err = res->result;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +
> +	switch (dma_err) {
> +	case DMA_TRANS_READ_FAILED:
> +	case DMA_TRANS_WRITE_FAILED:
> +	case DMA_TRANS_ABORTED:
> +		entry->errors++;
> +		entry->len = -EIO;
> +		break;
> +	case DMA_TRANS_NOERROR:
> +	default:
> +		break;
> +	}
> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
> +	sg_dma_address(&entry->sgl) = 0;
> +
> +	entry->flags |= DESC_DONE_FLAG;
> +
> +	queue_work(nt->wq, &qp->write_work);
> +}
> +
> +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	struct ntb_queue_entry *entry;
> +	unsigned int len;
> +	void *cb_data;
> +	u32 idx;
> +
> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
> +					qp->rd_cons) != 0) {
> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
> +		smp_rmb();
> +
> +		idx = ntb_edma_ring_idx(qp->rd_cons);
> +		in = NTB_DESC_RD_RC_I(qp, idx);
> +		entry = (struct ntb_queue_entry *)in->data;
> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> +			break;
> +
> +		in->data = 0;
> +
> +		cb_data = entry->cb_data;
> +		len = entry->len;
> +
> +		out = NTB_DESC_RD_RC_O(qp, idx);
> +
> +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
> +
> +		/*
> +		 * No need to add barrier in-between to enforce ordering here.
> +		 * The other side proceeds only after both flags and tail are
> +		 * updated.
> +		 */
> +		iowrite32(entry->flags, &out->flags);
> +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
> +
> +		ntb_transport_edma_notify_peer(qp);
> +
> +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
> +			     &qp->tx_free_q);
> +
> +		if (qp->tx_handler)
> +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
> +
> +		/* stat updates */
> +		qp->tx_bytes += len;
> +		qp->tx_pkts++;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_read_cb(void *data,
> +					  const struct dmaengine_result *res)
> +{
> +	struct ntb_queue_entry *entry = data;
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	enum dmaengine_tx_result dma_err = res->result;
> +
> +	switch (dma_err) {
> +	case DMA_TRANS_READ_FAILED:
> +	case DMA_TRANS_WRITE_FAILED:
> +	case DMA_TRANS_ABORTED:
> +		entry->errors++;
> +		entry->len = -EIO;
> +		break;
> +	case DMA_TRANS_NOERROR:
> +	default:
> +		break;
> +	}
> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
> +	sg_dma_address(&entry->sgl) = 0;
> +
> +	entry->flags |= DESC_DONE_FLAG;
> +
> +	queue_work(nt->wq, &qp->read_work);
> +}
> +
> +static int ntb_transport_edma_rc_write_start(struct device *d,
> +					     struct dma_chan *chan, size_t len,
> +					     dma_addr_t ep_src, void *rc_dst,
> +					     struct ntb_queue_entry *entry)
> +{
> +	struct scatterlist *sgl = &entry->sgl;
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	dma_cookie_t cookie;
> +	int nents, rc;
> +
> +	if (!d)
> +		return -ENODEV;
> +
> +	if (!chan)
> +		return -ENXIO;
> +
> +	if (WARN_ON(!ep_src || !rc_dst))
> +		return -EINVAL;
> +
> +	if (WARN_ON(sg_dma_address(sgl)))
> +		return -EINVAL;
> +
> +	sg_init_one(sgl, rc_dst, len);
> +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
> +	if (nents <= 0)
> +		return -EIO;
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.src_addr       = ep_src;
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_DEV_TO_MEM;
> +	rc = dmaengine_slave_config(chan, &cfg);
> +	if (rc)
> +		goto out_unmap;
> +
> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +
> +	txd->callback_result = ntb_transport_edma_rc_write_cb;
> +	txd->callback_param = entry;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie)) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +	dma_async_issue_pending(chan);
> +	return 0;
> +out_unmap:
> +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_rc_read_start(struct device *d,
> +					    struct dma_chan *chan, size_t len,
> +					    void *rc_src, dma_addr_t ep_dst,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct scatterlist *sgl = &entry->sgl;
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	dma_cookie_t cookie;
> +	int nents, rc;
> +
> +	if (!d)
> +		return -ENODEV;
> +
> +	if (!chan)
> +		return -ENXIO;
> +
> +	if (WARN_ON(!rc_src || !ep_dst))
> +		return -EINVAL;
> +
> +	if (WARN_ON(sg_dma_address(sgl)))
> +		return -EINVAL;
> +
> +	sg_init_one(sgl, rc_src, len);
> +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
> +	if (nents <= 0)
> +		return -EIO;
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.dst_addr       = ep_dst;
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_MEM_TO_DEV;
> +	rc = dmaengine_slave_config(chan, &cfg);
> +	if (rc)
> +		goto out_unmap;
> +
> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +
> +	txd->callback_result = ntb_transport_edma_rc_read_cb;
> +	txd->callback_param = entry;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie)) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +	dma_async_issue_pending(chan);
> +	return 0;
> +out_unmap:
> +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
> +	return rc;
> +}
> +
> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
> +{
> +	struct ntb_queue_entry *entry = container_of(
> +				work, struct ntb_queue_entry, dma_work);
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct dma_chan *chan;
> +	int rc;
> +
> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
> +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
> +					       entry->addr, entry->buf, entry);
> +	if (rc) {
> +		entry->errors++;
> +		entry->len = -EIO;
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->write_work);
> +		return;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	unsigned int budget = NTB_EDMA_MAX_POLL;
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	dma_addr_t ep_src;
> +	u32 len, idx;
> +
> +	while (budget--) {
> +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
> +					     qp->wr_issue) == 0)
> +			break;
> +
> +		idx = ntb_edma_ring_idx(qp->wr_issue);
> +		in = NTB_DESC_WR_RC_I(qp, idx);
> +
> +		len = READ_ONCE(in->len);
> +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
> +
> +		/* Prepare 'entry' for write completion */
> +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
> +		if (!entry) {
> +			qp->rx_err_no_buf++;
> +			break;
> +		}
> +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
> +			entry->flags &= ~DESC_DONE_FLAG;
> +		entry->len = len; /* NB. entry->len can be <=0 */
> +		entry->addr = ep_src;
> +
> +		/*
> +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
> +		 * so it needs to be set before wr_issue++.
> +		 */
> +		in->data = (uintptr_t)entry;
> +
> +		/* Ensure in->data visible before wr_issue++ */
> +		smp_wmb();
> +
> +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
> +
> +		if (!len) {
> +			entry->flags |= DESC_DONE_FLAG;
> +			queue_work(nt->wq, &qp->write_work);
> +			continue;
> +		}
> +
> +		if (in->flags & LINK_DOWN_FLAG) {
> +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
> +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
> +			queue_work(nt->wq, &qp->write_work);
> +			continue;
> +		}
> +
> +		queue_work(nt->wq, &entry->dma_work);
> +	}
> +
> +	if (!budget)
> +		tasklet_schedule(&qp->rxc_db_work);
> +}
> +
> +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	struct dma_chan *chan;
> +	u32 issue, idx, head;
> +	dma_addr_t ep_dst;
> +	int rc;
> +
> +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
> +
> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> +		issue = qp->rd_issue;
> +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
> +			qp->tx_ring_full++;
> +			return -ENOSPC;
> +		}
> +
> +		/*
> +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
> +		 * so it needs to be set before rd_issue++.
> +		 */
> +		idx = ntb_edma_ring_idx(issue);
> +		in = NTB_DESC_RD_RC_I(qp, idx);
> +		in->data = (uintptr_t)entry;
> +
> +		/* Make in->data visible before rd_issue++ */
> +		smp_wmb();
> +
> +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
> +	}
> +
> +	/* Publish the final transfer length to the EP side */
> +	out = NTB_DESC_RD_RC_O(qp, idx);
> +	iowrite32(len, &out->len);
> +	ioread32(&out->len);
> +
> +	if (unlikely(!len)) {
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->read_work);
> +		return 0;
> +	}
> +
> +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
> +	dma_rmb();
> +
> +	/* kick remote eDMA read transfer */
> +	ep_dst = (dma_addr_t)in->addr;
> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
> +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
> +					      entry->buf, ep_dst, entry);
> +	if (rc) {
> +		entry->errors++;
> +		entry->len = -EIO;
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->read_work);
> +	}
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	dma_addr_t ep_src = 0;
> +	u32 idx;
> +	int rc;
> +
> +	if (likely(len)) {
> +		ep_src = dma_map_single(dma_dev, entry->buf, len,
> +					DMA_TO_DEVICE);
> +		rc = dma_mapping_error(dma_dev, ep_src);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
> +			rc = -ENOSPC;
> +			qp->tx_ring_full++;
> +			goto out_unmap;
> +		}
> +
> +		idx = ntb_edma_ring_idx(qp->wr_prod);
> +		in  = NTB_DESC_WR_EP_I(qp, idx);
> +		out = NTB_DESC_WR_EP_O(qp, idx);
> +
> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> +		WARN_ON(entry->flags & DESC_DONE_FLAG);
> +		in->flags = 0;
> +		in->data  = (uintptr_t)entry;
> +		entry->addr  = ep_src;
> +
> +		iowrite32(len,          &out->len);
> +		iowrite32(entry->flags, &out->flags);
> +		iowrite64(ep_src,       &out->addr);
> +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
> +
> +		dma_wmb();
> +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
> +
> +		qp->tx_bytes += len;
> +		qp->tx_pkts++;
> +	}
> +
> +	ntb_transport_edma_notify_peer(qp);
> +
> +	return 0;
> +out_unmap:
> +	if (likely(len))
> +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
> +					 struct ntb_queue_entry *entry,
> +					 void *cb, void *data, unsigned int len,
> +					 unsigned int flags)
> +{
> +	struct device *dma_dev;
> +
> +	if (entry->addr) {
> +		/* Deferred unmap */
> +		dma_dev = get_dma_dev(qp->ndev);
> +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
> +	}
> +
> +	entry->cb_data = cb;
> +	entry->buf = data;
> +	entry->len = len;
> +	entry->flags = flags;
> +	entry->errors = 0;
> +	entry->addr = 0;
> +
> +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
> +
> +	if (ntb_qp_edma_is_ep(qp))
> +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
> +	else
> +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
> +}
> +
> +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	void *data = entry->buf;
> +	dma_addr_t ep_dst;
> +	u32 idx;
> +	int rc;
> +
> +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
> +	rc = dma_mapping_error(dma_dev, ep_dst);
> +	if (rc)
> +		return rc;
> +
> +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
> +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
> +				       READ_ONCE(qp->rd_cons))) {
> +			rc = -ENOSPC;
> +			goto out_unmap;
> +		}
> +
> +		idx = ntb_edma_ring_idx(qp->rd_prod);
> +		in = NTB_DESC_RD_EP_I(qp, idx);
> +		out = NTB_DESC_RD_EP_O(qp, idx);
> +
> +		iowrite32(len, &out->len);
> +		iowrite64(ep_dst, &out->addr);
> +
> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> +		in->data = (uintptr_t)entry;
> +		entry->addr = ep_dst;
> +
> +		/* Ensure len/addr are visible before the head update */
> +		dma_wmb();
> +
> +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
> +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
> +	}
> +	return 0;
> +out_unmap:
> +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
> +					 struct ntb_queue_entry *entry)
> +{
> +	int rc;
> +
> +	/* The behaviour is the same as the default backend for RC side */
> +	if (ntb_qp_edma_is_ep(qp)) {
> +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
> +		if (rc) {
> +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> +				     &qp->rx_free_q);
> +			return rc;
> +		}
> +	}
> +
> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
> +
> +	if (qp->active)
> +		tasklet_schedule(&qp->rxc_db_work);
> +
> +	return 0;
> +}
> +
> +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_transport_ctx *nt = qp->transport;
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_poll(qp);
> +	else if (ntb_qp_edma_is_ep(qp)) {
> +		/*
> +		 * Make sure we poll the rings even if an eDMA interrupt is
> +		 * cleared on the RC side earlier.
> +		 */
> +		queue_work(nt->wq, &qp->read_work);
> +		queue_work(nt->wq, &qp->write_work);
> +	} else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_read_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_read_complete_work(work);
> +	else if (ntb_qp_edma_is_ep(qp))
> +		ntb_transport_edma_ep_read_work(work);
> +	else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_write_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_write_complete_work(work);
> +	else if (ntb_qp_edma_is_ep(qp))
> +		ntb_transport_edma_ep_write_work(work);
> +	else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num)
> +{
> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> +
> +	qp->wr_cons = 0;
> +	qp->rd_cons = 0;
> +	qp->wr_prod = 0;
> +	qp->rd_prod = 0;
> +	qp->wr_issue = 0;
> +	qp->rd_issue = 0;
> +
> +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
> +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
> +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
> +}
> +
> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> +					    struct ntb_transport_qp *qp)
> +{
> +	spin_lock_init(&qp->ep_tx_lock);
> +	spin_lock_init(&qp->ep_rx_lock);
> +	spin_lock_init(&qp->rc_lock);
> +}
> +
> +static const struct ntb_transport_backend_ops edma_backend_ops = {
> +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> +	.rx_poll = ntb_transport_edma_rx_poll,
> +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> +};
> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> +
>  /**
>   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
>   * @qp: NTB transport layer queue to be enabled
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
On Mon, Dec 01, 2025 at 02:46:41PM -0700, Dave Jiang wrote:
> 
> 
> On 11/29/25 9:03 AM, Koichiro Den wrote:
> > Add a new transport backend that uses a remote DesignWare eDMA engine
> > located on the NTB endpoint to move data between host and endpoint.
> > 
> > In this mode:
> > 
> >   - The endpoint exposes a dedicated memory window that contains the
> >     eDMA register block followed by a small control structure (struct
> >     ntb_edma_info) and per-channel linked-list (LL) rings.
> > 
> >   - On the endpoint side, ntb_edma_setup_mws() allocates the control
> >     structure and LL rings in endpoint memory, then programs an inbound
> >     iATU region so that the host can access them via a peer MW.
> > 
> >   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
> >     ntb_edma_info and configures a dw-edma DMA device to use the LL
> >     rings provided by the endpoint.
> > 
> >   - ntb_transport is extended with a new backend_ops implementation that
> >     routes TX and RX enqueue/poll operations through the remote eDMA
> >     rings while keeping the existing shared-memory backend intact.
> > 
> >   - The host signals the endpoint via a dedicated DMA read channel.
> >     'use_msi' module option is ignored when 'use_remote_edma=1'.
> > 
> > The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
> > module parameter (use_remote_edma). When disabled, the existing
> > ntb_transport behaviour is unchanged.
> > 
> > Signed-off-by: Koichiro Den <den@valinux.co.jp>
> > ---
> >  drivers/ntb/Kconfig                           |   11 +
> >  drivers/ntb/Makefile                          |    3 +
> >  drivers/ntb/ntb_edma.c                        |  628 ++++++++
> >  drivers/ntb/ntb_edma.h                        |  128 ++
> 
> I briefly looked over the code. It feels like the EDMA bits should go in drivers/ntb/hw/ rather than drivers/ntb/ given it's pretty specific to the designware hardware. What sits in drivers/ntb should be generic APIs where a different vendor can utilize it and not having to adopt to designware hardware specifics. So maybe a bit more abstractions are needed?

That makes sense, I'll reorganize things. Thank you for the suggestion.

> 
> >  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
> >  5 files changed, 2048 insertions(+), 3 deletions(-)
> >  create mode 100644 drivers/ntb/ntb_edma.c
> >  create mode 100644 drivers/ntb/ntb_edma.h
> >  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
> > 
> > diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
> > index df16c755b4da..db63f02bb116 100644
> > --- a/drivers/ntb/Kconfig
> > +++ b/drivers/ntb/Kconfig
> > @@ -37,4 +37,15 @@ config NTB_TRANSPORT
> >  
> >  	 If unsure, say N.
> >  
> > +config NTB_TRANSPORT_EDMA
> > +	bool "NTB Transport backed by remote eDMA"
> > +	depends on NTB_TRANSPORT
> > +	depends on PCI
> > +	select DMA_ENGINE
> > +	help
> > +	  Enable a transport backend that uses a remote DesignWare eDMA engine
> > +	  exposed through a dedicated NTB memory window. The host uses the
> > +	  endpoint's eDMA engine to move data in both directions.
> > +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
> > +
> >  endif # NTB
> > diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
> > index 3a6fa181ff99..51f0e1e3aec7 100644
> > --- a/drivers/ntb/Makefile
> > +++ b/drivers/ntb/Makefile
> > @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
> >  
> >  ntb-y			:= core.o
> >  ntb-$(CONFIG_NTB_MSI)	+= msi.o
> > +
> > +ntb_transport-y					:= ntb_transport_core.o
> > +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
> > diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
> > new file mode 100644
> > index 000000000000..cb35e0d56aa8
> > --- /dev/null
> > +++ b/drivers/ntb/ntb_edma.c
> > @@ -0,0 +1,628 @@
> > +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> > +
> > +#include <linux/module.h>
> > +#include <linux/device.h>
> > +#include <linux/pci.h>
> > +#include <linux/ntb.h>
> > +#include <linux/io.h>
> > +#include <linux/iommu.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/pci-epc.h>
> > +#include <linux/dma/edma.h>
> > +#include <linux/irq.h>
> > +#include <linux/irqdomain.h>
> > +#include <linux/of.h>
> > +#include <linux/of_irq.h>
> > +#include <dt-bindings/interrupt-controller/arm-gic.h>
> > +
> > +#include "ntb_edma.h"
> > +
> > +/*
> > + * The interrupt register offsets below are taken from the DesignWare
> > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > + * backend currently only supports this layout.
> > + */
> > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > +#define DMA_READ_INT_MASK_OFF      0xa8
> > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > +
> > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > +
> > +static unsigned int edma_spi = 417; /* 0x1a1 */
> > +module_param(edma_spi, uint, 0644);
> > +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
> > +
> > +static u64 edma_regs_phys = 0xe65d5000;
> > +module_param(edma_regs_phys, ullong, 0644);
> > +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
> > +
> > +static unsigned long edma_regs_size = 0x1200;
> > +module_param(edma_regs_size, ulong, 0644);
> > +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
> > +
> > +struct ntb_edma_intr {
> > +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
> > +};
> > +
> > +struct ntb_edma_ctx {
> > +	void *ll_wr_virt[EDMA_WR_CH_NUM];
> > +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
> > +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
> > +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
> > +
> > +	struct ntb_edma_intr *intr_ep_virt;
> > +	dma_addr_t intr_ep_phys;
> > +	struct ntb_edma_intr *intr_rc_virt;
> > +	dma_addr_t intr_rc_phys;
> > +	u32 notify_qp_max;
> > +
> > +	bool initialized;
> > +};
> > +
> > +static struct ntb_edma_ctx edma_ctx;
> > +
> > +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> > +
> > +struct ntb_edma_interrupt {
> > +	int virq;
> > +	void __iomem *base;
> > +	ntb_edma_interrupt_cb_t cb;
> > +	void *data;
> > +};
> > +
> > +static struct ntb_edma_interrupt ntb_edma_intr;
> > +
> > +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
> > +{
> > +	struct device_node *np = dev_of_node(dev);
> > +	struct device_node *parent;
> > +	struct irq_fwspec fwspec = { 0 };
> > +	int virq;
> > +
> > +	parent = of_irq_find_parent(np);
> > +	if (!parent)
> > +		return -ENODEV;
> > +
> > +	fwspec.fwnode      = of_fwnode_handle(parent);
> > +	fwspec.param_count = 3;
> > +	fwspec.param[0]    = GIC_SPI;
> > +	fwspec.param[1]    = spi;
> > +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
> > +
> > +	virq = irq_create_fwspec_mapping(&fwspec);
> > +	of_node_put(parent);
> > +	return (virq > 0) ? virq : -EINVAL;
> > +}
> > +
> > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > +{
> > +	struct ntb_edma_interrupt *v = data;
> > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > +	u32 i, val;
> > +
> > +	/*
> > +	 * We do not ack interrupts here but instead we mask all local interrupt
> > +	 * sources except the read channel used for notification. This reduces
> > +	 * needless ISR invocations.
> > +	 *
> > +	 * In theory we could configure LIE=1/RIE=0 only for the notification
> > +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
> > +	 * require intrusive changes to the dw-edma core.
> > +	 *
> > +	 * Note: The host side may have already cleared the read interrupt used
> > +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
> > +	 * way to detect it. As a result, we cannot reliably tell which specific
> > +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
> > +	 * instead.
> > +	 */
> > +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> > +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
> > +
> > +	if (!v->cb || !edma_ctx.intr_ep_virt)
> > +		return IRQ_HANDLED;
> > +
> > +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
> > +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
> > +		if (!val)
> > +			continue;
> > +
> > +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
> > +		v->cb(v->data, i);
> > +	}
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> > +		       ntb_edma_interrupt_cb_t cb, void *data)
> > +{
> > +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> > +	int virq = ntb_edma_map_spi_to_virq(epc_dev->parent, edma_spi);
> > +	int ret;
> > +
> > +	if (virq < 0) {
> > +		dev_err(dev, "failed to get virq (%d)\n", virq);
> > +		return virq;
> > +	}
> > +
> > +	v->virq = virq;
> > +	v->cb = cb;
> > +	v->data = data;
> > +	if (edma_regs_phys && !v->base)
> > +		v->base = devm_ioremap(dev, edma_regs_phys, edma_regs_size);
> > +	if (!v->base) {
> > +		dev_err(dev, "failed to setup v->base\n");
> > +		return -1;
> > +	}
> > +	ret = devm_request_irq(dev, v->virq, ntb_edma_isr, 0, "ntb-edma", v);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (v->base) {
> > +		iowrite32(0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> > +		iowrite32(0x0, v->base + DMA_READ_INT_MASK_OFF);
> > +	}
> > +	return 0;
> > +}
> > +
> > +void ntb_edma_teardown_isr(struct device *dev)
> > +{
> > +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> > +
> > +	/* Mask all write/read interrupts so we don't get called again. */
> > +	if (v->base) {
> > +		iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> > +		iowrite32(~0x0, v->base + DMA_READ_INT_MASK_OFF);
> > +	}
> > +
> > +	if (v->virq > 0)
> > +		devm_free_irq(dev, v->virq, v);
> > +
> > +	if (v->base)
> > +		devm_iounmap(dev, v->base);
> > +
> > +	v->virq = 0;
> > +	v->cb = NULL;
> > +	v->data = NULL;
> > +}
> > +
> > +int ntb_edma_setup_mws(struct ntb_dev *ndev)
> > +{
> > +	const size_t info_bytes = PAGE_SIZE;
> > +	resource_size_t size_max, offset;
> > +	dma_addr_t intr_phys, info_phys;
> > +	u32 wr_done = 0, rd_done = 0;
> > +	struct ntb_edma_intr *intr;
> > +	struct ntb_edma_info *info;
> > +	int peer_mw, mw_index, rc;
> > +	struct iommu_domain *dom;
> > +	bool reg_mapped = false;
> > +	size_t ll_bytes, size;
> > +	struct pci_epc *epc;
> > +	struct device *dev;
> > +	unsigned long iova;
> > +	phys_addr_t phys;
> > +	u64 need;
> > +	u32 i;
> > +
> > +	/* +1 is for interruption */
> > +	ll_bytes = (EDMA_WR_CH_NUM + EDMA_RD_CH_NUM + 1) * DMA_LLP_MEM_SIZE;
> > +	need = EDMA_REG_SIZE + info_bytes + ll_bytes;
> > +
> > +	epc = ntb_get_pci_epc(ndev);
> > +	if (!epc)
> > +		return -ENODEV;
> > +	dev = epc->dev.parent;
> > +
> > +	if (edma_ctx.initialized)
> > +		return 0;
> > +
> > +	info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL);
> > +	if (!info)
> > +		return -ENOMEM;
> > +
> > +	memset(info, 0, info_bytes);
> > +	info->magic = NTB_EDMA_INFO_MAGIC;
> > +	info->wr_cnt = EDMA_WR_CH_NUM;
> > +	info->rd_cnt = EDMA_RD_CH_NUM + 1; /* +1 for interruption */
> > +	info->regs_phys = edma_regs_phys;
> > +	info->ll_stride = DMA_LLP_MEM_SIZE;
> > +
> > +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> > +		edma_ctx.ll_wr_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> > +							 &edma_ctx.ll_wr_phys[i],
> > +							 GFP_KERNEL,
> > +							 DMA_ATTR_FORCE_CONTIGUOUS);
> > +		if (!edma_ctx.ll_wr_virt[i]) {
> > +			rc = -ENOMEM;
> > +			goto err_free_ll;
> > +		}
> > +		wr_done++;
> > +		info->ll_wr_phys[i] = edma_ctx.ll_wr_phys[i];
> > +	}
> > +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> > +		edma_ctx.ll_rd_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> > +							 &edma_ctx.ll_rd_phys[i],
> > +							 GFP_KERNEL,
> > +							 DMA_ATTR_FORCE_CONTIGUOUS);
> > +		if (!edma_ctx.ll_rd_virt[i]) {
> > +			rc = -ENOMEM;
> > +			goto err_free_ll;
> > +		}
> > +		rd_done++;
> > +		info->ll_rd_phys[i] = edma_ctx.ll_rd_phys[i];
> > +	}
> > +
> > +	/* For interruption */
> > +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> > +	intr = dma_alloc_coherent(dev, sizeof(*intr), &intr_phys, GFP_KERNEL);
> > +	if (!intr) {
> > +		rc = -ENOMEM;
> > +		goto err_free_ll;
> > +	}
> > +	memset(intr, 0, sizeof(*intr));
> > +	edma_ctx.intr_ep_virt = intr;
> > +	edma_ctx.intr_ep_phys = intr_phys;
> > +	info->intr_dar_base = intr_phys;
> > +
> > +	peer_mw = ntb_peer_mw_count(ndev);
> > +	if (peer_mw <= 0) {
> > +		rc = -ENODEV;
> > +		goto err_free_ll;
> > +	}
> > +
> > +	mw_index = peer_mw - 1; /* last MW */
> > +
> > +	rc = ntb_mw_get_align(ndev, 0, mw_index, 0, NULL, &size_max,
> > +			      &offset);
> > +	if (rc)
> > +		goto err_free_ll;
> > +
> > +	if (size_max < need) {
> > +		rc = -ENOSPC;
> > +		goto err_free_ll;
> > +	}
> > +
> > +	/* Map register space (direct) */
> > +	dom = iommu_get_domain_for_dev(dev);
> > +	if (dom) {
> > +		phys = edma_regs_phys & PAGE_MASK;
> > +		size = PAGE_ALIGN(EDMA_REG_SIZE + edma_regs_phys - phys);
> > +		iova = phys;
> > +
> > +		rc = iommu_map(dom, iova, phys, EDMA_REG_SIZE,
> > +			       IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, GFP_KERNEL);
> > +		if (rc)
> > +			dev_err(&ndev->dev, "failed to create direct mapping for eDMA reg space\n");
> > +		reg_mapped = true;
> > +	}
> > +
> > +	rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_regs_phys, EDMA_REG_SIZE, offset);
> > +	if (rc)
> > +		goto err_unmap_reg;
> > +
> > +	offset += EDMA_REG_SIZE;
> > +
> > +	/* Map ntb_edma_info */
> > +	rc = ntb_mw_set_trans(ndev, 0, mw_index, info_phys, info_bytes, offset);
> > +	if (rc)
> > +		goto err_clear_trans;
> > +	offset += info_bytes;
> > +
> > +	/* Map LL location */
> > +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> > +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_wr_phys[i],
> > +				      DMA_LLP_MEM_SIZE, offset);
> > +		if (rc)
> > +			goto err_clear_trans;
> > +		offset += DMA_LLP_MEM_SIZE;
> > +	}
> > +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> > +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_rd_phys[i],
> > +				      DMA_LLP_MEM_SIZE, offset);
> > +		if (rc)
> > +			goto err_clear_trans;
> > +		offset += DMA_LLP_MEM_SIZE;
> > +	}
> > +	edma_ctx.initialized = true;
> > +
> > +	return 0;
> > +
> > +err_clear_trans:
> > +	/*
> > +	 * Tear down the NTB translation window used for the eDMA MW.
> > +	 * There is no sub-range clear API for ntb_mw_set_trans(), so we
> > +	 * unconditionally drop the whole mapping on error.
> > +	 */
> > +	ntb_mw_clear_trans(ndev, 0, mw_index);
> > +
> > +err_unmap_reg:
> > +	if (reg_mapped)
> > +		iommu_unmap(dom, iova, size);
> > +err_free_ll:
> > +	while (rd_done--)
> > +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> > +			       edma_ctx.ll_rd_virt[rd_done],
> > +			       edma_ctx.ll_rd_phys[rd_done],
> > +			       DMA_ATTR_FORCE_CONTIGUOUS);
> > +	while (wr_done--)
> > +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> > +			       edma_ctx.ll_wr_virt[wr_done],
> > +			       edma_ctx.ll_wr_phys[wr_done],
> > +			       DMA_ATTR_FORCE_CONTIGUOUS);
> > +	if (edma_ctx.intr_ep_virt)
> > +		dma_free_coherent(dev, sizeof(struct ntb_edma_intr),
> > +				  edma_ctx.intr_ep_virt,
> > +				  edma_ctx.intr_ep_phys);
> > +	dma_free_coherent(dev, info_bytes, info, info_phys);
> > +	return rc;
> > +}
> > +
> > +static int ntb_edma_irq_vector(struct device *dev, unsigned int nr)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	int ret, nvec;
> > +
> > +	nvec = pci_msi_vec_count(pdev);
> > +	for (; nr < nvec; nr++) {
> > +		ret = pci_irq_vector(pdev, nr);
> > +		if (!irq_has_action(ret))
> > +			return ret;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static const struct dw_edma_plat_ops ntb_edma_ops = {
> > +	.irq_vector     = ntb_edma_irq_vector,
> > +};
> > +
> > +int ntb_edma_setup_peer(struct ntb_dev *ndev)
> > +{
> > +	struct ntb_edma_info *info;
> > +	unsigned int wr_cnt, rd_cnt;
> > +	struct dw_edma_chip *chip;
> > +	void __iomem *edma_virt;
> > +	phys_addr_t edma_phys;
> > +	resource_size_t mw_size;
> > +	u64 off = EDMA_REG_SIZE;
> > +	int peer_mw, mw_index;
> > +	unsigned int i;
> > +	int ret;
> > +
> > +	peer_mw = ntb_peer_mw_count(ndev);
> > +	if (peer_mw <= 0)
> > +		return -ENODEV;
> > +
> > +	mw_index = peer_mw - 1; /* last MW */
> > +
> > +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
> > +				   &mw_size);
> > +	if (ret)
> > +		return -1;
> > +
> > +	edma_virt = ioremap(edma_phys, mw_size);
> > +
> > +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
> > +	if (!chip) {
> > +		ret = -ENOMEM;
> > +		return ret;
> > +	}
> > +
> > +	chip->dev = &ndev->pdev->dev;
> > +	chip->nr_irqs = 4;
> > +	chip->ops = &ntb_edma_ops;
> > +	chip->flags = 0;
> > +	chip->reg_base = edma_virt;
> > +	chip->mf = EDMA_MF_EDMA_UNROLL;
> > +
> > +	info = edma_virt + off;
> > +	if (info->magic != NTB_EDMA_INFO_MAGIC)
> > +		return -EINVAL;
> > +	wr_cnt = info->wr_cnt;
> > +	rd_cnt = info->rd_cnt;
> > +	chip->ll_wr_cnt = wr_cnt;
> > +	chip->ll_rd_cnt = rd_cnt;
> > +	off += PAGE_SIZE;
> > +
> > +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> > +	edma_ctx.intr_ep_phys = info->intr_dar_base;
> > +	if (edma_ctx.intr_ep_phys) {
> > +		edma_ctx.intr_rc_virt =
> > +			dma_alloc_coherent(&ndev->pdev->dev,
> > +					   sizeof(struct ntb_edma_intr),
> > +					   &edma_ctx.intr_rc_phys,
> > +					   GFP_KERNEL);
> > +		if (!edma_ctx.intr_rc_virt)
> > +			return -ENOMEM;
> > +		memset(edma_ctx.intr_rc_virt, 0,
> > +		       sizeof(struct ntb_edma_intr));
> > +	}
> > +
> > +	for (i = 0; i < wr_cnt; i++) {
> > +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
> > +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
> > +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
> > +		off += DMA_LLP_MEM_SIZE;
> > +	}
> > +	for (i = 0; i < rd_cnt; i++) {
> > +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
> > +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
> > +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
> > +		off += DMA_LLP_MEM_SIZE;
> > +	}
> > +
> > +	if (!pci_dev_msi_enabled(ndev->pdev))
> > +		return -ENXIO;
> > +
> > +	ret = dw_edma_probe(chip);
> > +	if (ret) {
> > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +struct ntb_edma_filter {
> > +	struct device *dma_dev;
> > +	u32 direction;
> > +};
> > +
> > +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
> > +{
> > +	struct ntb_edma_filter *filter = arg;
> > +	u32 dir = filter->direction;
> > +	struct dma_slave_caps caps;
> > +	int ret;
> > +
> > +	if (chan->device->dev != filter->dma_dev)
> > +		return false;
> > +
> > +	ret = dma_get_slave_caps(chan, &caps);
> > +	if (ret < 0)
> > +		return false;
> > +
> > +	return !!(caps.directions & dir);
> > +}
> > +
> > +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
> > +{
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < edma->num_wr_chan; i++)
> > +		dma_release_channel(edma->wr_chan[i]);
> > +
> > +	for (i = 0; i < edma->num_rd_chan; i++)
> > +		dma_release_channel(edma->rd_chan[i]);
> > +
> > +	if (edma->intr_chan)
> > +		dma_release_channel(edma->intr_chan);
> > +}
> > +
> > +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
> > +{
> > +	struct ntb_edma_filter filter;
> > +	dma_cap_mask_t dma_mask;
> > +	unsigned int i;
> > +
> > +	dma_cap_zero(dma_mask);
> > +	dma_cap_set(DMA_SLAVE, dma_mask);
> > +
> > +	memset(edma, 0, sizeof(*edma));
> > +	edma->dev = dma_dev;
> > +
> > +	filter.dma_dev = dma_dev;
> > +	filter.direction = BIT(DMA_DEV_TO_MEM);
> > +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> > +		edma->wr_chan[i] = dma_request_channel(dma_mask,
> > +						       ntb_edma_filter_fn,
> > +						       &filter);
> > +		if (!edma->wr_chan[i])
> > +			break;
> > +		edma->num_wr_chan++;
> > +	}
> > +
> > +	filter.direction = BIT(DMA_MEM_TO_DEV);
> > +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
> > +		edma->rd_chan[i] = dma_request_channel(dma_mask,
> > +						       ntb_edma_filter_fn,
> > +						       &filter);
> > +		if (!edma->rd_chan[i])
> > +			break;
> > +		edma->num_rd_chan++;
> > +	}
> > +
> > +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
> > +					      &filter);
> > +	if (!edma->intr_chan)
> > +		dev_warn(dma_dev,
> > +			 "Remote eDMA notify channel could not be allocated\n");
> > +
> > +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
> > +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
> > +		ntb_edma_teardown_chans(edma);
> > +		return -ENODEV;
> > +	}
> > +	return 0;
> > +}
> > +
> > +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> > +				    remote_edma_dir_t dir)
> > +{
> > +	unsigned int n, cur, idx;
> > +	struct dma_chan **chans;
> > +	atomic_t *cur_chan;
> > +
> > +	if (dir == REMOTE_EDMA_WRITE) {
> > +		n = edma->num_wr_chan;
> > +		chans = edma->wr_chan;
> > +		cur_chan = &edma->cur_wr_chan;
> > +	} else {
> > +		n = edma->num_rd_chan;
> > +		chans = edma->rd_chan;
> > +		cur_chan = &edma->cur_rd_chan;
> > +	}
> > +	if (WARN_ON_ONCE(!n))
> > +		return NULL;
> > +
> > +	/* Simple round-robin */
> > +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
> > +	idx = cur % n;
> > +	return chans[idx];
> > +}
> > +
> > +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
> > +{
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	struct scatterlist sgl;
> > +	dma_cookie_t cookie;
> > +	struct device *dev;
> > +
> > +	if (!edma || !edma->intr_chan)
> > +		return -ENXIO;
> > +
> > +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
> > +		return -EINVAL;
> > +
> > +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
> > +		return -EINVAL;
> > +
> > +	dev = edma->dev;
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
> > +
> > +	/* Ensure store is visible before kicking the DMA transfer */
> > +	wmb();
> > +
> > +	sg_init_table(&sgl, 1);
> > +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
> > +	sg_dma_len(&sgl) = sizeof(u32);
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_MEM_TO_DEV;
> > +
> > +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
> > +		return -EINVAL;
> > +
> > +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
> > +				      DMA_MEM_TO_DEV,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd)
> > +		return -ENOSPC;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie))
> > +		return -ENOSPC;
> > +
> > +	dma_async_issue_pending(edma->intr_chan);
> > +	return 0;
> > +}
> > diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
> > new file mode 100644
> > index 000000000000..da0451827edb
> > --- /dev/null
> > +++ b/drivers/ntb/ntb_edma.h
> > @@ -0,0 +1,128 @@
> > +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
> > +#ifndef _NTB_EDMA_H_
> > +#define _NTB_EDMA_H_
> > +
> > +#include <linux/completion.h>
> > +#include <linux/device.h>
> > +#include <linux/interrupt.h>
> > +
> > +#define EDMA_REG_SIZE		SZ_64K
> > +#define DMA_LLP_MEM_SIZE	SZ_4K
> > +#define EDMA_WR_CH_NUM		4
> > +#define EDMA_RD_CH_NUM		4
> > +#define NTB_EDMA_MAX_CH		8
> > +
> > +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
> > +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
> > +
> > +#define NTB_EDMA_RING_ORDER	7
> > +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
> > +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
> > +
> > +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> > +
> > +/*
> > + * REMOTE_EDMA_EP:
> > + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
> > + *
> > + * REMOTE_EDMA_RC:
> > + *   Root Complex controls the endpoint eDMA through the shared MW and
> > + *   drives reads/writes on behalf of the host.
> > + */
> > +typedef enum {
> > +	REMOTE_EDMA_UNKNOWN,
> > +	REMOTE_EDMA_EP,
> > +	REMOTE_EDMA_RC,
> > +} remote_edma_mode_t;
> > +
> > +typedef enum {
> > +	REMOTE_EDMA_WRITE,
> > +	REMOTE_EDMA_READ,
> > +} remote_edma_dir_t;
> > +
> > +/*
> > + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
> > + *
> > + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
> > + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
> > + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
> > + *                                RC configures via dw_edma)
> > + *
> > + * ntb_edma_setup_mws() on EP:
> > + *   - allocates ntb_edma_info and LLs in EP memory
> > + *   - programs inbound iATU so that RC peer MW[n] points at this block
> > + *
> > + * ntb_edma_setup_peer() on RC:
> > + *   - ioremaps peer MW[n]
> > + *   - reads ntb_edma_info
> > + *   - sets up dw_edma_chip ll_region_* from that info
> > + */
> > +struct ntb_edma_info {
> > +	u32 magic;
> > +	u16 wr_cnt;
> > +	u16 rd_cnt;
> > +	u64 regs_phys;
> > +	u32 ll_stride;
> > +	u32 rsvd;
> > +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
> > +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
> > +
> > +	u64 intr_dar_base;
> > +} __packed;
> > +
> > +struct ll_dma_addrs {
> > +	dma_addr_t wr[EDMA_WR_CH_NUM];
> > +	dma_addr_t rd[EDMA_RD_CH_NUM];
> > +};
> > +
> > +struct ntb_edma_chans {
> > +	struct device *dev;
> > +
> > +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
> > +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
> > +	struct dma_chan *intr_chan;
> > +
> > +	unsigned int num_wr_chan;
> > +	unsigned int num_rd_chan;
> > +	atomic_t cur_wr_chan;
> > +	atomic_t cur_rd_chan;
> > +};
> > +
> > +static __always_inline u32 ntb_edma_ring_idx(u32 v)
> > +{
> > +	return v & NTB_EDMA_RING_MASK;
> > +}
> > +
> > +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
> > +{
> > +	if (head >= tail) {
> > +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
> > +		return head - tail;
> > +	}
> > +
> > +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
> > +	return U32_MAX - tail + head + 1;
> > +}
> > +
> > +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
> > +{
> > +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
> > +}
> > +
> > +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
> > +{
> > +	return ntb_edma_ring_free_entry(head, tail) == 0;
> > +}
> > +
> > +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> > +		       ntb_edma_interrupt_cb_t cb, void *data);
> > +void ntb_edma_teardown_isr(struct device *dev);
> > +int ntb_edma_setup_mws(struct ntb_dev *ndev);
> > +int ntb_edma_setup_peer(struct ntb_dev *ndev);
> > +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
> > +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> > +				    remote_edma_dir_t dir);
> > +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
> > +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
> > +
> > +#endif
> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
> > similarity index 65%
> > rename from drivers/ntb/ntb_transport.c
> > rename to drivers/ntb/ntb_transport_core.c
> > index 907db6c93d4d..48d48921978d 100644
> > --- a/drivers/ntb/ntb_transport.c
> > +++ b/drivers/ntb/ntb_transport_core.c
> > @@ -47,6 +47,9 @@
> >   * Contact Information:
> >   * Jon Mason <jon.mason@intel.com>
> >   */
> > +#include <linux/atomic.h>
> > +#include <linux/bug.h>
> > +#include <linux/compiler.h>
> >  #include <linux/debugfs.h>
> >  #include <linux/delay.h>
> >  #include <linux/dmaengine.h>
> > @@ -71,6 +74,8 @@
> >  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
> >  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
> >  
> > +#define NTB_EDMA_MAX_POLL		32
> > +
> >  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
> >  MODULE_VERSION(NTB_TRANSPORT_VER);
> >  MODULE_LICENSE("Dual BSD/GPL");
> > @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
> >  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
> >  #endif
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> 
> This is a comment throughout this patch. Doing ifdefs inside C source is pretty frowed upon in the kernel. The preferred way is to only have ifdefs in the header files. So please give this a bit more consideration and see if it can be done differently to address this.

I agree, there is no good reason to keep those remaining ifdefs at all.
I'll clean it up. Thanks for pointing this out.

> 
> > +#include "ntb_edma.h"
> > +static bool use_remote_edma;
> > +module_param(use_remote_edma, bool, 0644);
> > +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
> > +#endif
> > +
> >  static struct dentry *nt_debugfs_dir;
> >  
> >  /* Only two-ports NTB devices are supported */
> > @@ -125,6 +137,14 @@ struct ntb_queue_entry {
> >  		struct ntb_payload_header __iomem *tx_hdr;
> >  		struct ntb_payload_header *rx_hdr;
> >  	};
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	dma_addr_t addr;
> > +
> > +	/* Used by RC side only */
> > +	struct scatterlist sgl;
> > +	struct work_struct dma_work;
> > +#endif
> >  };
> >  
> >  struct ntb_rx_info {
> > @@ -202,6 +222,33 @@ struct ntb_transport_qp {
> >  	int msi_irq;
> >  	struct ntb_msi_desc msi_desc;
> >  	struct ntb_msi_desc peer_msi_desc;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	/*
> > +	 * For ensuring peer notification in non-atomic context.
> > +	 * ntb_peer_db_set might sleep or schedule.
> > +	 */
> > +	struct work_struct db_work;
> > +
> > +	/*
> > +	 * wr: remote eDMA write transfer (EP -> RC direction)
> > +	 * rd: remote eDMA read transfer (RC -> EP direction)
> > +	 */
> > +	u32 wr_cons;
> > +	u32 rd_cons;
> > +	u32 wr_prod;
> > +	u32 rd_prod;
> > +	u32 wr_issue;
> > +	u32 rd_issue;
> > +
> > +	spinlock_t ep_tx_lock;
> > +	spinlock_t ep_rx_lock;
> > +	spinlock_t rc_lock;
> > +
> > +	/* Completion work for read/write transfers. */
> > +	struct work_struct read_work;
> > +	struct work_struct write_work;
> > +#endif
> 
> For something like this, maybe it needs its own struct instead of an ifdef chunk. Perhaps 'ntb_rx_info' can serve as a core data struct with EDMA having 'ntb_rx_info_edma' and embed 'ntb_rx_info'. 

Thanks again for the suggestion. I'll reorganize things.

Koichiro

> 
> DJ
> 
> >  };
> >  
> >  struct ntb_transport_mw {
> > @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
> >  
> >  	/* Make sure workq of link event be executed serially */
> >  	struct mutex link_event_lock;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	remote_edma_mode_t remote_edma_mode;
> > +	struct device *dma_dev;
> > +	struct workqueue_struct *wq;
> > +	struct ntb_edma_chans edma;
> > +#endif
> >  };
> >  
> >  enum {
> > @@ -262,6 +316,19 @@ struct ntb_payload_header {
> >  	unsigned int flags;
> >  };
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
> > +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> > +				   unsigned int *mw_count);
> > +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num);
> > +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> > +					    struct ntb_transport_qp *qp);
> > +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
> > +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
> > +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
> > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > +
> >  /*
> >   * Return the device that should be used for DMA mapping.
> >   *
> > @@ -298,7 +365,7 @@ enum {
> >  	container_of((__drv), struct ntb_transport_client, driver)
> >  
> >  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
> > -#define NTB_QP_DEF_NUM_ENTRIES	100
> > +#define NTB_QP_DEF_NUM_ENTRIES	128
> >  #define NTB_LINK_DOWN_TIMEOUT	10
> >  
> >  static void ntb_transport_rxc_db(unsigned long data);
> > @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
> >  	count = ntb_spad_count(nt->ndev);
> >  	for (i = 0; i < count; i++)
> >  		ntb_spad_write(nt->ndev, i, 0);
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_edma_teardown_chans(&nt->edma);
> > +#endif
> >  }
> >  
> >  static void ntb_transport_link_cleanup_work(struct work_struct *work)
> > @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >  
> >  	/* send the local info, in the opposite order of the way we read it */
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	rc = ntb_transport_edma_ep_init(nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
> > +		return;
> > +	}
> > +#endif
> > +
> >  	if (nt->use_msi) {
> >  		rc = ntb_msi_setup_mws(ndev);
> >  		if (rc) {
> > @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >  
> >  	nt->link_is_up = true;
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	rc = ntb_transport_edma_rc_init(nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
> > +		goto out1;
> > +	}
> > +#endif
> > +
> >  	for (i = 0; i < nt->qp_count; i++) {
> >  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
> >  
> > @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
> >  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
> >  };
> >  
> > +static const struct ntb_transport_backend_ops edma_backend_ops;
> > +
> >  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  {
> >  	struct ntb_transport_ctx *nt;
> > @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  
> >  	nt->ndev = ndev;
> >  
> > -	nt->backend_ops = default_backend_ops;
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	if (use_remote_edma) {
> > +		rc = ntb_transport_edma_init(nt, &mw_count);
> > +		if (rc) {
> > +			nt->mw_count = 0;
> > +			goto err;
> > +		}
> > +		nt->backend_ops = edma_backend_ops;
> > +
> > +		/*
> > +		 * On remote eDMA mode, we reserve a read channel for Host->EP
> > +		 * interruption.
> > +		 */
> > +		use_msi = false;
> > +	} else
> > +#endif
> > +		nt->backend_ops = default_backend_ops;
> >  
> >  	/*
> >  	 * If we are using MSI, and have at least one extra memory window,
> > @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  		rc = ntb_transport_init_queue(nt, i);
> >  		if (rc)
> >  			goto err2;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +		ntb_transport_edma_init_queue(nt, i);
> > +#endif
> >  	}
> >  
> >  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
> > @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  	}
> >  	kfree(nt->mw_vec);
> >  err:
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_transport_edma_uninit(nt);
> > +#endif
> >  	kfree(nt);
> >  	return rc;
> >  }
> > @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >  
> >  	nt->qp_bitmap_free &= ~qp_bit;
> >  
> > +	qp->qp_bit = qp_bit;
> >  	qp->cb_data = data;
> >  	qp->rx_handler = handlers->rx_handler;
> >  	qp->tx_handler = handlers->tx_handler;
> >  	qp->event_handler = handlers->event_handler;
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_transport_edma_create_queue(nt, qp);
> > +#endif
> > +
> >  	dma_cap_zero(dma_mask);
> >  	dma_cap_set(DMA_MEMCPY, dma_mask);
> >  
> > @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >  			goto err1;
> >  
> >  		entry->qp = qp;
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> > +#endif
> >  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> >  			     &qp->rx_free_q);
> >  	}
> > @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
> >   */
> >  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >  {
> > -	struct pci_dev *pdev;
> >  	struct ntb_queue_entry *entry;
> > +	struct pci_dev *pdev;
> >  	u64 qp_bit;
> >  
> >  	if (!qp)
> > @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >  	tasklet_kill(&qp->rxc_db_work);
> >  
> >  	cancel_delayed_work_sync(&qp->link_work);
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	cancel_work_sync(&qp->read_work);
> > +	cancel_work_sync(&qp->write_work);
> > +#endif
> >  
> >  	qp->cb_data = NULL;
> >  	qp->rx_handler = NULL;
> > @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
> >  }
> >  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
> >  
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +/*
> > + * Remote eDMA mode implementation
> > + */
> > +struct ntb_edma_desc {
> > +	u32 len;
> > +	u32 flags;
> > +	u64 addr; /* DMA address */
> > +	u64 data;
> > +};
> > +
> > +struct ntb_edma_ring {
> > +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
> > +	u32 head;
> > +	u32 tail;
> > +};
> > +
> > +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
> > +
> > +#define __NTB_EDMA_CHECK_INDEX(_i)					\
> > +({									\
> > +	unsigned long __i = (unsigned long)(_i);			\
> > +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
> > +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
> > +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
> > +	__i;								\
> > +})
> > +
> > +#define NTB_EDMA_DESC_I(qp, i, n)					\
> > +({									\
> > +	typeof(qp) __qp = (qp);						\
> > +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> > +	(struct ntb_edma_desc *)					\
> > +		((char *)(__qp)->rx_buff +				\
> > +		 (sizeof(struct ntb_edma_ring) * n) +			\
> > +		 NTB_EDMA_DESC_OFF(__i));				\
> > +})
> > +
> > +#define NTB_EDMA_DESC_O(qp, i, n)					\
> > +({									\
> > +	typeof(qp) __qp = (qp);						\
> > +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> > +	(struct ntb_edma_desc __iomem *)				\
> > +		((char __iomem *)(__qp)->tx_mw +			\
> > +		 (sizeof(struct ntb_edma_ring) * n) +			\
> > +		 NTB_EDMA_DESC_OFF(__i));				\
> > +})
> > +
> > +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, head)))
> > +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, head)))
> > +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, tail)))
> > +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, tail)))
> > +
> > +/*
> > + * Macro naming rule:
> > + *   NTB_DESC_RD_EP_I (as an example)
> > + *            ^^ ^^ ^
> > + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
> > + *            :  `----- Who uses this macro.
> > + *            `-------- DESC / HEAD / TAIL
> > + *
> > + * Read transfers (RC->EP):
> > + *
> > + *   EP view (outbound, written via NTB):
> > + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           :
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *       - head: NTB_HEAD_RD_EP_O(qp)
> > + *       - tail: NTB_TAIL_RD_EP_I(qp)
> > + *
> > + *   RC view (inbound, local mapping):
> > + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           :
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *       - head: NTB_HEAD_RD_RC_I(qp)
> > + *       - tail: NTB_TAIL_RD_RC_O(qp)
> > + *
> > + * Write transfers (EP -> RC) are analogous but use
> > + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
> > + * and NTB_TAIL_WR_{EP_I,RC_O}().
> > + */
> > +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> > +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> > +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> > +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> > +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> > +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> > +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> > +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> > +
> > +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
> > +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
> > +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
> > +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
> > +
> > +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
> > +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
> > +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
> > +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
> > +
> > +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
> > +{
> > +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
> > +}
> > +
> > +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
> > +{
> > +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
> > +}
> > +
> > +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
> > +{
> > +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
> > +}
> > +
> > +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
> > +{
> > +	unsigned int head, tail;
> > +
> > +	if (ntb_qp_edma_is_ep(qp)) {
> > +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> > +			/* In this scope, only 'head' might proceed */
> > +			tail = READ_ONCE(qp->wr_cons);
> > +			head = READ_ONCE(qp->wr_prod);
> > +		}
> > +		return ntb_edma_ring_free_entry(head, tail);
> > +	}
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> > +		/* In this scope, only 'head' might proceed */
> > +		tail = READ_ONCE(qp->rd_issue);
> > +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> > +	}
> > +	/*
> > +	 * On RC side, 'used' amount indicates how much EP side
> > +	 * has refilled, which are available for us to use for TX.
> > +	 */
> > +	return ntb_edma_ring_used_entry(head, tail);
> > +}
> > +
> > +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
> > +						  struct ntb_transport_qp *qp)
> > +{
> > +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
> > +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
> > +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
> > +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
> > +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
> > +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
> > +
> > +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
> > +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
> > +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
> > +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
> > +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
> > +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
> > +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
> > +	seq_putc(s, '\n');
> > +
> > +	seq_puts(s, "Using Remote eDMA - Yes\n");
> > +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
> > +}
> > +
> > +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +
> > +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
> > +		ntb_edma_teardown_isr(&ndev->pdev->dev);
> > +
> > +	if (nt->wq)
> > +		destroy_workqueue(nt->wq);
> > +	nt->wq = NULL;
> > +}
> > +
> > +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> > +				   unsigned int *mw_count)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +
> > +	/*
> > +	 * We need at least one MW for the transport plus one MW reserved
> > +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
> > +	 */
> > +	if (*mw_count <= 1) {
> > +		dev_err(&ndev->dev,
> > +			"remote eDMA requires at least two MWS (have %u)\n",
> > +			*mw_count);
> > +		return -ENODEV;
> > +	}
> > +
> > +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
> > +	if (!nt->wq) {
> > +		ntb_transport_edma_uninit(nt);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* Reserve the last peer MW exclusively for the eDMA window. */
> > +	*mw_count -= 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static void ntb_transport_edma_db_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp =
> > +			container_of(work, struct ntb_transport_qp, db_work);
> > +
> > +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
> > +}
> > +
> > +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
> > +{
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
> > +			return;
> > +
> > +	/*
> > +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
> > +	 * may sleep, delegate the actual doorbell write to a workqueue.
> > +	 */
> > +	queue_work(system_highpri_wq, &qp->db_work);
> > +}
> > +
> > +static void ntb_transport_edma_isr(void *data, int qp_num)
> > +{
> > +	struct ntb_transport_ctx *nt = data;
> > +	struct ntb_transport_qp *qp;
> > +
> > +	if (qp_num < 0 || qp_num >= nt->qp_count)
> > +		return;
> > +
> > +	qp = &nt->qp_vec[qp_num];
> > +	if (WARN_ON(!qp))
> > +		return;
> > +
> > +	queue_work(nt->wq, &qp->read_work);
> > +	queue_work(nt->wq, &qp->write_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct pci_dev *pdev = ndev->pdev;
> > +	int rc;
> > +
> > +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
> > +		return 0;
> > +
> > +	rc = ntb_edma_setup_peer(ndev);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	nt->remote_edma_mode = REMOTE_EDMA_RC;
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct pci_dev *pdev = ndev->pdev;
> > +	struct pci_epc *epc;
> > +	int rc;
> > +
> > +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
> > +		return 0;
> > +
> > +	/* Only EP side can return pci_epc */
> > +	epc = ntb_get_pci_epc(ndev);
> > +	if (!epc)
> > +		return 0;
> > +
> > +	rc = ntb_edma_setup_mws(ndev);
> > +	if (rc) {
> > +		dev_err(&pdev->dev,
> > +			"Failed to set up memory window for eDMA: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	nt->remote_edma_mode = REMOTE_EDMA_EP;
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num)
> > +{
> > +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_transport_mw *mw;
> > +	unsigned int mw_num, mw_count, qp_count;
> > +	unsigned int qp_offset, rx_info_offset;
> > +	unsigned int mw_size, mw_size_per_qp;
> > +	unsigned int num_qps_mw;
> > +	size_t edma_total;
> > +	unsigned int i;
> > +	int node;
> > +
> > +	mw_count = nt->mw_count;
> > +	qp_count = nt->qp_count;
> > +
> > +	mw_num = QP_TO_MW(nt, qp_num);
> > +	mw = &nt->mw_vec[mw_num];
> > +
> > +	if (!mw->virt_addr)
> > +		return -ENOMEM;
> > +
> > +	if (mw_num < qp_count % mw_count)
> > +		num_qps_mw = qp_count / mw_count + 1;
> > +	else
> > +		num_qps_mw = qp_count / mw_count;
> > +
> > +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
> > +	if (max_mw_size && mw_size > max_mw_size)
> > +		mw_size = max_mw_size;
> > +
> > +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
> > +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
> > +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
> > +
> > +	qp->tx_mw_size = mw_size_per_qp;
> > +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> > +	if (!qp->tx_mw)
> > +		return -EINVAL;
> > +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
> > +	if (!qp->tx_mw_phys)
> > +		return -EINVAL;
> > +	qp->rx_info = qp->tx_mw + rx_info_offset;
> > +	qp->rx_buff = mw->virt_addr + qp_offset;
> > +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
> > +
> > +	/* Due to housekeeping, there must be at least 2 buffs */
> > +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> > +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> > +
> > +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
> > +	edma_total = 2 * sizeof(struct ntb_edma_ring);
> > +	if (rx_info_offset < edma_total) {
> > +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
> > +			edma_total, rx_info_offset);
> > +		return -EINVAL;
> > +	}
> > +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
> > +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
> > +
> > +	/*
> > +	 * Checking to see if we have more entries than the default.
> > +	 * We should add additional entries if that is the case so we
> > +	 * can be in sync with the transport frames.
> > +	 */
> > +	node = dev_to_node(&ndev->dev);
> > +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
> > +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
> > +		if (!entry)
> > +			return -ENOMEM;
> > +
> > +		entry->qp = qp;
> > +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> > +			     &qp->rx_free_q);
> > +		qp->rx_alloc_entry++;
> > +	}
> > +
> > +	memset(qp->rx_buff, 0, edma_total);
> > +
> > +	qp->rx_pkts = 0;
> > +	qp->tx_pkts = 0;
> > +
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	unsigned int len;
> > +	u32 idx;
> > +
> > +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
> > +				     qp->rd_cons) == 0)
> > +		return 0;
> > +
> > +	idx = ntb_edma_ring_idx(qp->rd_cons);
> > +	in = NTB_DESC_RD_EP_I(qp, idx);
> > +	if (!(in->flags & DESC_DONE_FLAG))
> > +		return 0;
> > +
> > +	in->flags = 0;
> > +	len = in->len; /* might be smaller than entry->len */
> > +
> > +	entry = (struct ntb_queue_entry *)(in->data);
> > +	if (WARN_ON(!entry))
> > +		return 0;
> > +
> > +	if (in->flags & LINK_DOWN_FLAG) {
> > +		ntb_qp_link_down(qp);
> > +		qp->rd_cons++;
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +		return 1;
> > +	}
> > +
> > +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
> > +
> > +	qp->rx_bytes += len;
> > +	qp->rx_pkts++;
> > +	qp->rd_cons++;
> > +
> > +	if (qp->rx_handler && qp->client_ready)
> > +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
> > +
> > +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +	return 1;
> > +}
> > +
> > +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	u32 idx;
> > +
> > +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
> > +				     qp->wr_cons) == 0)
> > +		return 0;
> > +
> > +	idx = ntb_edma_ring_idx(qp->wr_cons);
> > +	in = NTB_DESC_WR_EP_I(qp, idx);
> > +
> > +	entry = (struct ntb_queue_entry *)(in->data);
> > +	if (WARN_ON(!entry))
> > +		return 0;
> > +
> > +	qp->wr_cons++;
> > +
> > +	if (qp->tx_handler)
> > +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
> > +
> > +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
> > +	return 1;
> > +}
> > +
> > +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> > +		if (!ntb_transport_edma_ep_read_complete(qp))
> > +			break;
> > +	}
> > +
> > +	if (ntb_transport_edma_ep_read_complete(qp))
> > +		queue_work(qp->transport->wq, &qp->read_work);
> > +}
> > +
> > +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> > +		if (!ntb_transport_edma_ep_write_complete(qp))
> > +			break;
> > +	}
> > +
> > +	if (ntb_transport_edma_ep_write_complete(qp))
> > +		queue_work(qp->transport->wq, &qp->write_work);
> > +}
> > +
> > +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	unsigned int len;
> > +	void *cb_data;
> > +	u32 idx;
> > +
> > +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
> > +					qp->wr_cons) != 0) {
> > +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
> > +		smp_rmb();
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_cons);
> > +		in = NTB_DESC_WR_RC_I(qp, idx);
> > +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
> > +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> > +			break;
> > +
> > +		in->data = 0;
> > +
> > +		cb_data = entry->cb_data;
> > +		len = entry->len;
> > +
> > +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
> > +
> > +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
> > +			ntb_qp_link_down(qp);
> > +			continue;
> > +		}
> > +
> > +		ntb_transport_edma_notify_peer(qp);
> > +
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +
> > +		if (qp->rx_handler && qp->client_ready)
> > +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
> > +
> > +		/* stat updates */
> > +		qp->rx_bytes += len;
> > +		qp->rx_pkts++;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_write_cb(void *data,
> > +					   const struct dmaengine_result *res)
> > +{
> > +	struct ntb_queue_entry *entry = data;
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	enum dmaengine_tx_result dma_err = res->result;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +
> > +	switch (dma_err) {
> > +	case DMA_TRANS_READ_FAILED:
> > +	case DMA_TRANS_WRITE_FAILED:
> > +	case DMA_TRANS_ABORTED:
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		break;
> > +	case DMA_TRANS_NOERROR:
> > +	default:
> > +		break;
> > +	}
> > +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
> > +	sg_dma_address(&entry->sgl) = 0;
> > +
> > +	entry->flags |= DESC_DONE_FLAG;
> > +
> > +	queue_work(nt->wq, &qp->write_work);
> > +}
> > +
> > +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	struct ntb_queue_entry *entry;
> > +	unsigned int len;
> > +	void *cb_data;
> > +	u32 idx;
> > +
> > +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
> > +					qp->rd_cons) != 0) {
> > +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
> > +		smp_rmb();
> > +
> > +		idx = ntb_edma_ring_idx(qp->rd_cons);
> > +		in = NTB_DESC_RD_RC_I(qp, idx);
> > +		entry = (struct ntb_queue_entry *)in->data;
> > +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> > +			break;
> > +
> > +		in->data = 0;
> > +
> > +		cb_data = entry->cb_data;
> > +		len = entry->len;
> > +
> > +		out = NTB_DESC_RD_RC_O(qp, idx);
> > +
> > +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
> > +
> > +		/*
> > +		 * No need to add barrier in-between to enforce ordering here.
> > +		 * The other side proceeds only after both flags and tail are
> > +		 * updated.
> > +		 */
> > +		iowrite32(entry->flags, &out->flags);
> > +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
> > +
> > +		ntb_transport_edma_notify_peer(qp);
> > +
> > +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
> > +			     &qp->tx_free_q);
> > +
> > +		if (qp->tx_handler)
> > +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
> > +
> > +		/* stat updates */
> > +		qp->tx_bytes += len;
> > +		qp->tx_pkts++;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_read_cb(void *data,
> > +					  const struct dmaengine_result *res)
> > +{
> > +	struct ntb_queue_entry *entry = data;
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	enum dmaengine_tx_result dma_err = res->result;
> > +
> > +	switch (dma_err) {
> > +	case DMA_TRANS_READ_FAILED:
> > +	case DMA_TRANS_WRITE_FAILED:
> > +	case DMA_TRANS_ABORTED:
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		break;
> > +	case DMA_TRANS_NOERROR:
> > +	default:
> > +		break;
> > +	}
> > +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
> > +	sg_dma_address(&entry->sgl) = 0;
> > +
> > +	entry->flags |= DESC_DONE_FLAG;
> > +
> > +	queue_work(nt->wq, &qp->read_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_write_start(struct device *d,
> > +					     struct dma_chan *chan, size_t len,
> > +					     dma_addr_t ep_src, void *rc_dst,
> > +					     struct ntb_queue_entry *entry)
> > +{
> > +	struct scatterlist *sgl = &entry->sgl;
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	dma_cookie_t cookie;
> > +	int nents, rc;
> > +
> > +	if (!d)
> > +		return -ENODEV;
> > +
> > +	if (!chan)
> > +		return -ENXIO;
> > +
> > +	if (WARN_ON(!ep_src || !rc_dst))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(sg_dma_address(sgl)))
> > +		return -EINVAL;
> > +
> > +	sg_init_one(sgl, rc_dst, len);
> > +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
> > +	if (nents <= 0)
> > +		return -EIO;
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.src_addr       = ep_src;
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_DEV_TO_MEM;
> > +	rc = dmaengine_slave_config(chan, &cfg);
> > +	if (rc)
> > +		goto out_unmap;
> > +
> > +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +
> > +	txd->callback_result = ntb_transport_edma_rc_write_cb;
> > +	txd->callback_param = entry;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie)) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +	dma_async_issue_pending(chan);
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_rc_read_start(struct device *d,
> > +					    struct dma_chan *chan, size_t len,
> > +					    void *rc_src, dma_addr_t ep_dst,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct scatterlist *sgl = &entry->sgl;
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	dma_cookie_t cookie;
> > +	int nents, rc;
> > +
> > +	if (!d)
> > +		return -ENODEV;
> > +
> > +	if (!chan)
> > +		return -ENXIO;
> > +
> > +	if (WARN_ON(!rc_src || !ep_dst))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(sg_dma_address(sgl)))
> > +		return -EINVAL;
> > +
> > +	sg_init_one(sgl, rc_src, len);
> > +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
> > +	if (nents <= 0)
> > +		return -EIO;
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.dst_addr       = ep_dst;
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_MEM_TO_DEV;
> > +	rc = dmaengine_slave_config(chan, &cfg);
> > +	if (rc)
> > +		goto out_unmap;
> > +
> > +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +
> > +	txd->callback_result = ntb_transport_edma_rc_read_cb;
> > +	txd->callback_param = entry;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie)) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +	dma_async_issue_pending(chan);
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
> > +{
> > +	struct ntb_queue_entry *entry = container_of(
> > +				work, struct ntb_queue_entry, dma_work);
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct dma_chan *chan;
> > +	int rc;
> > +
> > +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
> > +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
> > +					       entry->addr, entry->buf, entry);
> > +	if (rc) {
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->write_work);
> > +		return;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	unsigned int budget = NTB_EDMA_MAX_POLL;
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	dma_addr_t ep_src;
> > +	u32 len, idx;
> > +
> > +	while (budget--) {
> > +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
> > +					     qp->wr_issue) == 0)
> > +			break;
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_issue);
> > +		in = NTB_DESC_WR_RC_I(qp, idx);
> > +
> > +		len = READ_ONCE(in->len);
> > +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
> > +
> > +		/* Prepare 'entry' for write completion */
> > +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
> > +		if (!entry) {
> > +			qp->rx_err_no_buf++;
> > +			break;
> > +		}
> > +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
> > +			entry->flags &= ~DESC_DONE_FLAG;
> > +		entry->len = len; /* NB. entry->len can be <=0 */
> > +		entry->addr = ep_src;
> > +
> > +		/*
> > +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
> > +		 * so it needs to be set before wr_issue++.
> > +		 */
> > +		in->data = (uintptr_t)entry;
> > +
> > +		/* Ensure in->data visible before wr_issue++ */
> > +		smp_wmb();
> > +
> > +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
> > +
> > +		if (!len) {
> > +			entry->flags |= DESC_DONE_FLAG;
> > +			queue_work(nt->wq, &qp->write_work);
> > +			continue;
> > +		}
> > +
> > +		if (in->flags & LINK_DOWN_FLAG) {
> > +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
> > +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
> > +			queue_work(nt->wq, &qp->write_work);
> > +			continue;
> > +		}
> > +
> > +		queue_work(nt->wq, &entry->dma_work);
> > +	}
> > +
> > +	if (!budget)
> > +		tasklet_schedule(&qp->rxc_db_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	struct dma_chan *chan;
> > +	u32 issue, idx, head;
> > +	dma_addr_t ep_dst;
> > +	int rc;
> > +
> > +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> > +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> > +		issue = qp->rd_issue;
> > +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
> > +			qp->tx_ring_full++;
> > +			return -ENOSPC;
> > +		}
> > +
> > +		/*
> > +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
> > +		 * so it needs to be set before rd_issue++.
> > +		 */
> > +		idx = ntb_edma_ring_idx(issue);
> > +		in = NTB_DESC_RD_RC_I(qp, idx);
> > +		in->data = (uintptr_t)entry;
> > +
> > +		/* Make in->data visible before rd_issue++ */
> > +		smp_wmb();
> > +
> > +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
> > +	}
> > +
> > +	/* Publish the final transfer length to the EP side */
> > +	out = NTB_DESC_RD_RC_O(qp, idx);
> > +	iowrite32(len, &out->len);
> > +	ioread32(&out->len);
> > +
> > +	if (unlikely(!len)) {
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->read_work);
> > +		return 0;
> > +	}
> > +
> > +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
> > +	dma_rmb();
> > +
> > +	/* kick remote eDMA read transfer */
> > +	ep_dst = (dma_addr_t)in->addr;
> > +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
> > +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
> > +					      entry->buf, ep_dst, entry);
> > +	if (rc) {
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->read_work);
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	dma_addr_t ep_src = 0;
> > +	u32 idx;
> > +	int rc;
> > +
> > +	if (likely(len)) {
> > +		ep_src = dma_map_single(dma_dev, entry->buf, len,
> > +					DMA_TO_DEVICE);
> > +		rc = dma_mapping_error(dma_dev, ep_src);
> > +		if (rc)
> > +			return rc;
> > +	}
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> > +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
> > +			rc = -ENOSPC;
> > +			qp->tx_ring_full++;
> > +			goto out_unmap;
> > +		}
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_prod);
> > +		in  = NTB_DESC_WR_EP_I(qp, idx);
> > +		out = NTB_DESC_WR_EP_O(qp, idx);
> > +
> > +		WARN_ON(in->flags & DESC_DONE_FLAG);
> > +		WARN_ON(entry->flags & DESC_DONE_FLAG);
> > +		in->flags = 0;
> > +		in->data  = (uintptr_t)entry;
> > +		entry->addr  = ep_src;
> > +
> > +		iowrite32(len,          &out->len);
> > +		iowrite32(entry->flags, &out->flags);
> > +		iowrite64(ep_src,       &out->addr);
> > +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
> > +
> > +		dma_wmb();
> > +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
> > +
> > +		qp->tx_bytes += len;
> > +		qp->tx_pkts++;
> > +	}
> > +
> > +	ntb_transport_edma_notify_peer(qp);
> > +
> > +	return 0;
> > +out_unmap:
> > +	if (likely(len))
> > +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
> > +					 struct ntb_queue_entry *entry,
> > +					 void *cb, void *data, unsigned int len,
> > +					 unsigned int flags)
> > +{
> > +	struct device *dma_dev;
> > +
> > +	if (entry->addr) {
> > +		/* Deferred unmap */
> > +		dma_dev = get_dma_dev(qp->ndev);
> > +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
> > +	}
> > +
> > +	entry->cb_data = cb;
> > +	entry->buf = data;
> > +	entry->len = len;
> > +	entry->flags = flags;
> > +	entry->errors = 0;
> > +	entry->addr = 0;
> > +
> > +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
> > +
> > +	if (ntb_qp_edma_is_ep(qp))
> > +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
> > +	else
> > +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
> > +}
> > +
> > +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	void *data = entry->buf;
> > +	dma_addr_t ep_dst;
> > +	u32 idx;
> > +	int rc;
> > +
> > +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
> > +	rc = dma_mapping_error(dma_dev, ep_dst);
> > +	if (rc)
> > +		return rc;
> > +
> > +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
> > +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
> > +				       READ_ONCE(qp->rd_cons))) {
> > +			rc = -ENOSPC;
> > +			goto out_unmap;
> > +		}
> > +
> > +		idx = ntb_edma_ring_idx(qp->rd_prod);
> > +		in = NTB_DESC_RD_EP_I(qp, idx);
> > +		out = NTB_DESC_RD_EP_O(qp, idx);
> > +
> > +		iowrite32(len, &out->len);
> > +		iowrite64(ep_dst, &out->addr);
> > +
> > +		WARN_ON(in->flags & DESC_DONE_FLAG);
> > +		in->data = (uintptr_t)entry;
> > +		entry->addr = ep_dst;
> > +
> > +		/* Ensure len/addr are visible before the head update */
> > +		dma_wmb();
> > +
> > +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
> > +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
> > +	}
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
> > +					 struct ntb_queue_entry *entry)
> > +{
> > +	int rc;
> > +
> > +	/* The behaviour is the same as the default backend for RC side */
> > +	if (ntb_qp_edma_is_ep(qp)) {
> > +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
> > +		if (rc) {
> > +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> > +				     &qp->rx_free_q);
> > +			return rc;
> > +		}
> > +	}
> > +
> > +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
> > +
> > +	if (qp->active)
> > +		tasklet_schedule(&qp->rxc_db_work);
> > +
> > +	return 0;
> > +}
> > +
> > +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_poll(qp);
> > +	else if (ntb_qp_edma_is_ep(qp)) {
> > +		/*
> > +		 * Make sure we poll the rings even if an eDMA interrupt is
> > +		 * cleared on the RC side earlier.
> > +		 */
> > +		queue_work(nt->wq, &qp->read_work);
> > +		queue_work(nt->wq, &qp->write_work);
> > +	} else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_read_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_read_complete_work(work);
> > +	else if (ntb_qp_edma_is_ep(qp))
> > +		ntb_transport_edma_ep_read_work(work);
> > +	else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_write_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_write_complete_work(work);
> > +	else if (ntb_qp_edma_is_ep(qp))
> > +		ntb_transport_edma_ep_write_work(work);
> > +	else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num)
> > +{
> > +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> > +
> > +	qp->wr_cons = 0;
> > +	qp->rd_cons = 0;
> > +	qp->wr_prod = 0;
> > +	qp->rd_prod = 0;
> > +	qp->wr_issue = 0;
> > +	qp->rd_issue = 0;
> > +
> > +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
> > +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
> > +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
> > +}
> > +
> > +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> > +					    struct ntb_transport_qp *qp)
> > +{
> > +	spin_lock_init(&qp->ep_tx_lock);
> > +	spin_lock_init(&qp->ep_rx_lock);
> > +	spin_lock_init(&qp->rc_lock);
> > +}
> > +
> > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > +	.rx_poll = ntb_transport_edma_rx_poll,
> > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > +};
> > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > +
> >  /**
> >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> >   * @qp: NTB transport layer queue to be enabled
>
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Dave Jiang 2 months, 1 week ago

On 12/1/25 11:59 PM, Koichiro Den wrote:
> On Mon, Dec 01, 2025 at 02:46:41PM -0700, Dave Jiang wrote:
>>
>>
>> On 11/29/25 9:03 AM, Koichiro Den wrote:
>>> Add a new transport backend that uses a remote DesignWare eDMA engine
>>> located on the NTB endpoint to move data between host and endpoint.
>>>
>>> In this mode:
>>>
>>>   - The endpoint exposes a dedicated memory window that contains the
>>>     eDMA register block followed by a small control structure (struct
>>>     ntb_edma_info) and per-channel linked-list (LL) rings.
>>>
>>>   - On the endpoint side, ntb_edma_setup_mws() allocates the control
>>>     structure and LL rings in endpoint memory, then programs an inbound
>>>     iATU region so that the host can access them via a peer MW.
>>>
>>>   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
>>>     ntb_edma_info and configures a dw-edma DMA device to use the LL
>>>     rings provided by the endpoint.
>>>
>>>   - ntb_transport is extended with a new backend_ops implementation that
>>>     routes TX and RX enqueue/poll operations through the remote eDMA
>>>     rings while keeping the existing shared-memory backend intact.
>>>
>>>   - The host signals the endpoint via a dedicated DMA read channel.
>>>     'use_msi' module option is ignored when 'use_remote_edma=1'.
>>>
>>> The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
>>> module parameter (use_remote_edma). When disabled, the existing
>>> ntb_transport behaviour is unchanged.
>>>
>>> Signed-off-by: Koichiro Den <den@valinux.co.jp>
>>> ---
>>>  drivers/ntb/Kconfig                           |   11 +
>>>  drivers/ntb/Makefile                          |    3 +
>>>  drivers/ntb/ntb_edma.c                        |  628 ++++++++
>>>  drivers/ntb/ntb_edma.h                        |  128 ++
>>
>> I briefly looked over the code. It feels like the EDMA bits should go in drivers/ntb/hw/ rather than drivers/ntb/ given it's pretty specific to the designware hardware. What sits in drivers/ntb should be generic APIs where a different vendor can utilize it and not having to adopt to designware hardware specifics. So maybe a bit more abstractions are needed?
> 
> That makes sense, I'll reorganize things. Thank you for the suggestion.

Also, since a new transport is being introduced. Please update Documentation/driver-api/ntb.rst. While the current documentation doesn't provide adaquate API documentation for ntb_transport APIs, hopefully the new transport can do better going forward. :) Thank you!

DJ

> 
>>
>>>  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
>>>  5 files changed, 2048 insertions(+), 3 deletions(-)
>>>  create mode 100644 drivers/ntb/ntb_edma.c
>>>  create mode 100644 drivers/ntb/ntb_edma.h
>>>  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
>>>
>>> diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
>>> index df16c755b4da..db63f02bb116 100644
>>> --- a/drivers/ntb/Kconfig
>>> +++ b/drivers/ntb/Kconfig
>>> @@ -37,4 +37,15 @@ config NTB_TRANSPORT
>>>  
>>>  	 If unsure, say N.
>>>  
>>> +config NTB_TRANSPORT_EDMA
>>> +	bool "NTB Transport backed by remote eDMA"
>>> +	depends on NTB_TRANSPORT
>>> +	depends on PCI
>>> +	select DMA_ENGINE
>>> +	help
>>> +	  Enable a transport backend that uses a remote DesignWare eDMA engine
>>> +	  exposed through a dedicated NTB memory window. The host uses the
>>> +	  endpoint's eDMA engine to move data in both directions.
>>> +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
>>> +
>>>  endif # NTB
>>> diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
>>> index 3a6fa181ff99..51f0e1e3aec7 100644
>>> --- a/drivers/ntb/Makefile
>>> +++ b/drivers/ntb/Makefile
>>> @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
>>>  
>>>  ntb-y			:= core.o
>>>  ntb-$(CONFIG_NTB_MSI)	+= msi.o
>>> +
>>> +ntb_transport-y					:= ntb_transport_core.o
>>> +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
>>> diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
>>> new file mode 100644
>>> index 000000000000..cb35e0d56aa8
>>> --- /dev/null
>>> +++ b/drivers/ntb/ntb_edma.c
>>> @@ -0,0 +1,628 @@
>>> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
>>> +
>>> +#include <linux/module.h>
>>> +#include <linux/device.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/ntb.h>
>>> +#include <linux/io.h>
>>> +#include <linux/iommu.h>
>>> +#include <linux/dmaengine.h>
>>> +#include <linux/pci-epc.h>
>>> +#include <linux/dma/edma.h>
>>> +#include <linux/irq.h>
>>> +#include <linux/irqdomain.h>
>>> +#include <linux/of.h>
>>> +#include <linux/of_irq.h>
>>> +#include <dt-bindings/interrupt-controller/arm-gic.h>
>>> +
>>> +#include "ntb_edma.h"
>>> +
>>> +/*
>>> + * The interrupt register offsets below are taken from the DesignWare
>>> + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
>>> + * backend currently only supports this layout.
>>> + */
>>> +#define DMA_WRITE_INT_STATUS_OFF   0x4c
>>> +#define DMA_WRITE_INT_MASK_OFF     0x54
>>> +#define DMA_WRITE_INT_CLEAR_OFF    0x58
>>> +#define DMA_READ_INT_STATUS_OFF    0xa0
>>> +#define DMA_READ_INT_MASK_OFF      0xa8
>>> +#define DMA_READ_INT_CLEAR_OFF     0xac
>>> +
>>> +#define NTB_EDMA_NOTIFY_MAX_QP		64
>>> +
>>> +static unsigned int edma_spi = 417; /* 0x1a1 */
>>> +module_param(edma_spi, uint, 0644);
>>> +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
>>> +
>>> +static u64 edma_regs_phys = 0xe65d5000;
>>> +module_param(edma_regs_phys, ullong, 0644);
>>> +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
>>> +
>>> +static unsigned long edma_regs_size = 0x1200;
>>> +module_param(edma_regs_size, ulong, 0644);
>>> +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
>>> +
>>> +struct ntb_edma_intr {
>>> +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
>>> +};
>>> +
>>> +struct ntb_edma_ctx {
>>> +	void *ll_wr_virt[EDMA_WR_CH_NUM];
>>> +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
>>> +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
>>> +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
>>> +
>>> +	struct ntb_edma_intr *intr_ep_virt;
>>> +	dma_addr_t intr_ep_phys;
>>> +	struct ntb_edma_intr *intr_rc_virt;
>>> +	dma_addr_t intr_rc_phys;
>>> +	u32 notify_qp_max;
>>> +
>>> +	bool initialized;
>>> +};
>>> +
>>> +static struct ntb_edma_ctx edma_ctx;
>>> +
>>> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
>>> +
>>> +struct ntb_edma_interrupt {
>>> +	int virq;
>>> +	void __iomem *base;
>>> +	ntb_edma_interrupt_cb_t cb;
>>> +	void *data;
>>> +};
>>> +
>>> +static struct ntb_edma_interrupt ntb_edma_intr;
>>> +
>>> +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
>>> +{
>>> +	struct device_node *np = dev_of_node(dev);
>>> +	struct device_node *parent;
>>> +	struct irq_fwspec fwspec = { 0 };
>>> +	int virq;
>>> +
>>> +	parent = of_irq_find_parent(np);
>>> +	if (!parent)
>>> +		return -ENODEV;
>>> +
>>> +	fwspec.fwnode      = of_fwnode_handle(parent);
>>> +	fwspec.param_count = 3;
>>> +	fwspec.param[0]    = GIC_SPI;
>>> +	fwspec.param[1]    = spi;
>>> +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
>>> +
>>> +	virq = irq_create_fwspec_mapping(&fwspec);
>>> +	of_node_put(parent);
>>> +	return (virq > 0) ? virq : -EINVAL;
>>> +}
>>> +
>>> +static irqreturn_t ntb_edma_isr(int irq, void *data)
>>> +{
>>> +	struct ntb_edma_interrupt *v = data;
>>> +	u32 mask = BIT(EDMA_RD_CH_NUM);
>>> +	u32 i, val;
>>> +
>>> +	/*
>>> +	 * We do not ack interrupts here but instead we mask all local interrupt
>>> +	 * sources except the read channel used for notification. This reduces
>>> +	 * needless ISR invocations.
>>> +	 *
>>> +	 * In theory we could configure LIE=1/RIE=0 only for the notification
>>> +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
>>> +	 * require intrusive changes to the dw-edma core.
>>> +	 *
>>> +	 * Note: The host side may have already cleared the read interrupt used
>>> +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
>>> +	 * way to detect it. As a result, we cannot reliably tell which specific
>>> +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
>>> +	 * instead.
>>> +	 */
>>> +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
>>> +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
>>> +
>>> +	if (!v->cb || !edma_ctx.intr_ep_virt)
>>> +		return IRQ_HANDLED;
>>> +
>>> +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
>>> +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
>>> +		if (!val)
>>> +			continue;
>>> +
>>> +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
>>> +		v->cb(v->data, i);
>>> +	}
>>> +
>>> +	return IRQ_HANDLED;
>>> +}
>>> +
>>> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
>>> +		       ntb_edma_interrupt_cb_t cb, void *data)
>>> +{
>>> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
>>> +	int virq = ntb_edma_map_spi_to_virq(epc_dev->parent, edma_spi);
>>> +	int ret;
>>> +
>>> +	if (virq < 0) {
>>> +		dev_err(dev, "failed to get virq (%d)\n", virq);
>>> +		return virq;
>>> +	}
>>> +
>>> +	v->virq = virq;
>>> +	v->cb = cb;
>>> +	v->data = data;
>>> +	if (edma_regs_phys && !v->base)
>>> +		v->base = devm_ioremap(dev, edma_regs_phys, edma_regs_size);
>>> +	if (!v->base) {
>>> +		dev_err(dev, "failed to setup v->base\n");
>>> +		return -1;
>>> +	}
>>> +	ret = devm_request_irq(dev, v->virq, ntb_edma_isr, 0, "ntb-edma", v);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	if (v->base) {
>>> +		iowrite32(0x0, v->base + DMA_WRITE_INT_MASK_OFF);
>>> +		iowrite32(0x0, v->base + DMA_READ_INT_MASK_OFF);
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +void ntb_edma_teardown_isr(struct device *dev)
>>> +{
>>> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
>>> +
>>> +	/* Mask all write/read interrupts so we don't get called again. */
>>> +	if (v->base) {
>>> +		iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
>>> +		iowrite32(~0x0, v->base + DMA_READ_INT_MASK_OFF);
>>> +	}
>>> +
>>> +	if (v->virq > 0)
>>> +		devm_free_irq(dev, v->virq, v);
>>> +
>>> +	if (v->base)
>>> +		devm_iounmap(dev, v->base);
>>> +
>>> +	v->virq = 0;
>>> +	v->cb = NULL;
>>> +	v->data = NULL;
>>> +}
>>> +
>>> +int ntb_edma_setup_mws(struct ntb_dev *ndev)
>>> +{
>>> +	const size_t info_bytes = PAGE_SIZE;
>>> +	resource_size_t size_max, offset;
>>> +	dma_addr_t intr_phys, info_phys;
>>> +	u32 wr_done = 0, rd_done = 0;
>>> +	struct ntb_edma_intr *intr;
>>> +	struct ntb_edma_info *info;
>>> +	int peer_mw, mw_index, rc;
>>> +	struct iommu_domain *dom;
>>> +	bool reg_mapped = false;
>>> +	size_t ll_bytes, size;
>>> +	struct pci_epc *epc;
>>> +	struct device *dev;
>>> +	unsigned long iova;
>>> +	phys_addr_t phys;
>>> +	u64 need;
>>> +	u32 i;
>>> +
>>> +	/* +1 is for interruption */
>>> +	ll_bytes = (EDMA_WR_CH_NUM + EDMA_RD_CH_NUM + 1) * DMA_LLP_MEM_SIZE;
>>> +	need = EDMA_REG_SIZE + info_bytes + ll_bytes;
>>> +
>>> +	epc = ntb_get_pci_epc(ndev);
>>> +	if (!epc)
>>> +		return -ENODEV;
>>> +	dev = epc->dev.parent;
>>> +
>>> +	if (edma_ctx.initialized)
>>> +		return 0;
>>> +
>>> +	info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL);
>>> +	if (!info)
>>> +		return -ENOMEM;
>>> +
>>> +	memset(info, 0, info_bytes);
>>> +	info->magic = NTB_EDMA_INFO_MAGIC;
>>> +	info->wr_cnt = EDMA_WR_CH_NUM;
>>> +	info->rd_cnt = EDMA_RD_CH_NUM + 1; /* +1 for interruption */
>>> +	info->regs_phys = edma_regs_phys;
>>> +	info->ll_stride = DMA_LLP_MEM_SIZE;
>>> +
>>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
>>> +		edma_ctx.ll_wr_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
>>> +							 &edma_ctx.ll_wr_phys[i],
>>> +							 GFP_KERNEL,
>>> +							 DMA_ATTR_FORCE_CONTIGUOUS);
>>> +		if (!edma_ctx.ll_wr_virt[i]) {
>>> +			rc = -ENOMEM;
>>> +			goto err_free_ll;
>>> +		}
>>> +		wr_done++;
>>> +		info->ll_wr_phys[i] = edma_ctx.ll_wr_phys[i];
>>> +	}
>>> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
>>> +		edma_ctx.ll_rd_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
>>> +							 &edma_ctx.ll_rd_phys[i],
>>> +							 GFP_KERNEL,
>>> +							 DMA_ATTR_FORCE_CONTIGUOUS);
>>> +		if (!edma_ctx.ll_rd_virt[i]) {
>>> +			rc = -ENOMEM;
>>> +			goto err_free_ll;
>>> +		}
>>> +		rd_done++;
>>> +		info->ll_rd_phys[i] = edma_ctx.ll_rd_phys[i];
>>> +	}
>>> +
>>> +	/* For interruption */
>>> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
>>> +	intr = dma_alloc_coherent(dev, sizeof(*intr), &intr_phys, GFP_KERNEL);
>>> +	if (!intr) {
>>> +		rc = -ENOMEM;
>>> +		goto err_free_ll;
>>> +	}
>>> +	memset(intr, 0, sizeof(*intr));
>>> +	edma_ctx.intr_ep_virt = intr;
>>> +	edma_ctx.intr_ep_phys = intr_phys;
>>> +	info->intr_dar_base = intr_phys;
>>> +
>>> +	peer_mw = ntb_peer_mw_count(ndev);
>>> +	if (peer_mw <= 0) {
>>> +		rc = -ENODEV;
>>> +		goto err_free_ll;
>>> +	}
>>> +
>>> +	mw_index = peer_mw - 1; /* last MW */
>>> +
>>> +	rc = ntb_mw_get_align(ndev, 0, mw_index, 0, NULL, &size_max,
>>> +			      &offset);
>>> +	if (rc)
>>> +		goto err_free_ll;
>>> +
>>> +	if (size_max < need) {
>>> +		rc = -ENOSPC;
>>> +		goto err_free_ll;
>>> +	}
>>> +
>>> +	/* Map register space (direct) */
>>> +	dom = iommu_get_domain_for_dev(dev);
>>> +	if (dom) {
>>> +		phys = edma_regs_phys & PAGE_MASK;
>>> +		size = PAGE_ALIGN(EDMA_REG_SIZE + edma_regs_phys - phys);
>>> +		iova = phys;
>>> +
>>> +		rc = iommu_map(dom, iova, phys, EDMA_REG_SIZE,
>>> +			       IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, GFP_KERNEL);
>>> +		if (rc)
>>> +			dev_err(&ndev->dev, "failed to create direct mapping for eDMA reg space\n");
>>> +		reg_mapped = true;
>>> +	}
>>> +
>>> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_regs_phys, EDMA_REG_SIZE, offset);
>>> +	if (rc)
>>> +		goto err_unmap_reg;
>>> +
>>> +	offset += EDMA_REG_SIZE;
>>> +
>>> +	/* Map ntb_edma_info */
>>> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, info_phys, info_bytes, offset);
>>> +	if (rc)
>>> +		goto err_clear_trans;
>>> +	offset += info_bytes;
>>> +
>>> +	/* Map LL location */
>>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
>>> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_wr_phys[i],
>>> +				      DMA_LLP_MEM_SIZE, offset);
>>> +		if (rc)
>>> +			goto err_clear_trans;
>>> +		offset += DMA_LLP_MEM_SIZE;
>>> +	}
>>> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
>>> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_rd_phys[i],
>>> +				      DMA_LLP_MEM_SIZE, offset);
>>> +		if (rc)
>>> +			goto err_clear_trans;
>>> +		offset += DMA_LLP_MEM_SIZE;
>>> +	}
>>> +	edma_ctx.initialized = true;
>>> +
>>> +	return 0;
>>> +
>>> +err_clear_trans:
>>> +	/*
>>> +	 * Tear down the NTB translation window used for the eDMA MW.
>>> +	 * There is no sub-range clear API for ntb_mw_set_trans(), so we
>>> +	 * unconditionally drop the whole mapping on error.
>>> +	 */
>>> +	ntb_mw_clear_trans(ndev, 0, mw_index);
>>> +
>>> +err_unmap_reg:
>>> +	if (reg_mapped)
>>> +		iommu_unmap(dom, iova, size);
>>> +err_free_ll:
>>> +	while (rd_done--)
>>> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
>>> +			       edma_ctx.ll_rd_virt[rd_done],
>>> +			       edma_ctx.ll_rd_phys[rd_done],
>>> +			       DMA_ATTR_FORCE_CONTIGUOUS);
>>> +	while (wr_done--)
>>> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
>>> +			       edma_ctx.ll_wr_virt[wr_done],
>>> +			       edma_ctx.ll_wr_phys[wr_done],
>>> +			       DMA_ATTR_FORCE_CONTIGUOUS);
>>> +	if (edma_ctx.intr_ep_virt)
>>> +		dma_free_coherent(dev, sizeof(struct ntb_edma_intr),
>>> +				  edma_ctx.intr_ep_virt,
>>> +				  edma_ctx.intr_ep_phys);
>>> +	dma_free_coherent(dev, info_bytes, info, info_phys);
>>> +	return rc;
>>> +}
>>> +
>>> +static int ntb_edma_irq_vector(struct device *dev, unsigned int nr)
>>> +{
>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>> +	int ret, nvec;
>>> +
>>> +	nvec = pci_msi_vec_count(pdev);
>>> +	for (; nr < nvec; nr++) {
>>> +		ret = pci_irq_vector(pdev, nr);
>>> +		if (!irq_has_action(ret))
>>> +			return ret;
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +static const struct dw_edma_plat_ops ntb_edma_ops = {
>>> +	.irq_vector     = ntb_edma_irq_vector,
>>> +};
>>> +
>>> +int ntb_edma_setup_peer(struct ntb_dev *ndev)
>>> +{
>>> +	struct ntb_edma_info *info;
>>> +	unsigned int wr_cnt, rd_cnt;
>>> +	struct dw_edma_chip *chip;
>>> +	void __iomem *edma_virt;
>>> +	phys_addr_t edma_phys;
>>> +	resource_size_t mw_size;
>>> +	u64 off = EDMA_REG_SIZE;
>>> +	int peer_mw, mw_index;
>>> +	unsigned int i;
>>> +	int ret;
>>> +
>>> +	peer_mw = ntb_peer_mw_count(ndev);
>>> +	if (peer_mw <= 0)
>>> +		return -ENODEV;
>>> +
>>> +	mw_index = peer_mw - 1; /* last MW */
>>> +
>>> +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
>>> +				   &mw_size);
>>> +	if (ret)
>>> +		return -1;
>>> +
>>> +	edma_virt = ioremap(edma_phys, mw_size);
>>> +
>>> +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
>>> +	if (!chip) {
>>> +		ret = -ENOMEM;
>>> +		return ret;
>>> +	}
>>> +
>>> +	chip->dev = &ndev->pdev->dev;
>>> +	chip->nr_irqs = 4;
>>> +	chip->ops = &ntb_edma_ops;
>>> +	chip->flags = 0;
>>> +	chip->reg_base = edma_virt;
>>> +	chip->mf = EDMA_MF_EDMA_UNROLL;
>>> +
>>> +	info = edma_virt + off;
>>> +	if (info->magic != NTB_EDMA_INFO_MAGIC)
>>> +		return -EINVAL;
>>> +	wr_cnt = info->wr_cnt;
>>> +	rd_cnt = info->rd_cnt;
>>> +	chip->ll_wr_cnt = wr_cnt;
>>> +	chip->ll_rd_cnt = rd_cnt;
>>> +	off += PAGE_SIZE;
>>> +
>>> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
>>> +	edma_ctx.intr_ep_phys = info->intr_dar_base;
>>> +	if (edma_ctx.intr_ep_phys) {
>>> +		edma_ctx.intr_rc_virt =
>>> +			dma_alloc_coherent(&ndev->pdev->dev,
>>> +					   sizeof(struct ntb_edma_intr),
>>> +					   &edma_ctx.intr_rc_phys,
>>> +					   GFP_KERNEL);
>>> +		if (!edma_ctx.intr_rc_virt)
>>> +			return -ENOMEM;
>>> +		memset(edma_ctx.intr_rc_virt, 0,
>>> +		       sizeof(struct ntb_edma_intr));
>>> +	}
>>> +
>>> +	for (i = 0; i < wr_cnt; i++) {
>>> +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
>>> +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
>>> +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
>>> +		off += DMA_LLP_MEM_SIZE;
>>> +	}
>>> +	for (i = 0; i < rd_cnt; i++) {
>>> +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
>>> +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
>>> +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
>>> +		off += DMA_LLP_MEM_SIZE;
>>> +	}
>>> +
>>> +	if (!pci_dev_msi_enabled(ndev->pdev))
>>> +		return -ENXIO;
>>> +
>>> +	ret = dw_edma_probe(chip);
>>> +	if (ret) {
>>> +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
>>> +		return ret;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +struct ntb_edma_filter {
>>> +	struct device *dma_dev;
>>> +	u32 direction;
>>> +};
>>> +
>>> +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
>>> +{
>>> +	struct ntb_edma_filter *filter = arg;
>>> +	u32 dir = filter->direction;
>>> +	struct dma_slave_caps caps;
>>> +	int ret;
>>> +
>>> +	if (chan->device->dev != filter->dma_dev)
>>> +		return false;
>>> +
>>> +	ret = dma_get_slave_caps(chan, &caps);
>>> +	if (ret < 0)
>>> +		return false;
>>> +
>>> +	return !!(caps.directions & dir);
>>> +}
>>> +
>>> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
>>> +{
>>> +	unsigned int i;
>>> +
>>> +	for (i = 0; i < edma->num_wr_chan; i++)
>>> +		dma_release_channel(edma->wr_chan[i]);
>>> +
>>> +	for (i = 0; i < edma->num_rd_chan; i++)
>>> +		dma_release_channel(edma->rd_chan[i]);
>>> +
>>> +	if (edma->intr_chan)
>>> +		dma_release_channel(edma->intr_chan);
>>> +}
>>> +
>>> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
>>> +{
>>> +	struct ntb_edma_filter filter;
>>> +	dma_cap_mask_t dma_mask;
>>> +	unsigned int i;
>>> +
>>> +	dma_cap_zero(dma_mask);
>>> +	dma_cap_set(DMA_SLAVE, dma_mask);
>>> +
>>> +	memset(edma, 0, sizeof(*edma));
>>> +	edma->dev = dma_dev;
>>> +
>>> +	filter.dma_dev = dma_dev;
>>> +	filter.direction = BIT(DMA_DEV_TO_MEM);
>>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
>>> +		edma->wr_chan[i] = dma_request_channel(dma_mask,
>>> +						       ntb_edma_filter_fn,
>>> +						       &filter);
>>> +		if (!edma->wr_chan[i])
>>> +			break;
>>> +		edma->num_wr_chan++;
>>> +	}
>>> +
>>> +	filter.direction = BIT(DMA_MEM_TO_DEV);
>>> +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
>>> +		edma->rd_chan[i] = dma_request_channel(dma_mask,
>>> +						       ntb_edma_filter_fn,
>>> +						       &filter);
>>> +		if (!edma->rd_chan[i])
>>> +			break;
>>> +		edma->num_rd_chan++;
>>> +	}
>>> +
>>> +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
>>> +					      &filter);
>>> +	if (!edma->intr_chan)
>>> +		dev_warn(dma_dev,
>>> +			 "Remote eDMA notify channel could not be allocated\n");
>>> +
>>> +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
>>> +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
>>> +		ntb_edma_teardown_chans(edma);
>>> +		return -ENODEV;
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
>>> +				    remote_edma_dir_t dir)
>>> +{
>>> +	unsigned int n, cur, idx;
>>> +	struct dma_chan **chans;
>>> +	atomic_t *cur_chan;
>>> +
>>> +	if (dir == REMOTE_EDMA_WRITE) {
>>> +		n = edma->num_wr_chan;
>>> +		chans = edma->wr_chan;
>>> +		cur_chan = &edma->cur_wr_chan;
>>> +	} else {
>>> +		n = edma->num_rd_chan;
>>> +		chans = edma->rd_chan;
>>> +		cur_chan = &edma->cur_rd_chan;
>>> +	}
>>> +	if (WARN_ON_ONCE(!n))
>>> +		return NULL;
>>> +
>>> +	/* Simple round-robin */
>>> +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
>>> +	idx = cur % n;
>>> +	return chans[idx];
>>> +}
>>> +
>>> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
>>> +{
>>> +	struct dma_async_tx_descriptor *txd;
>>> +	struct dma_slave_config cfg;
>>> +	struct scatterlist sgl;
>>> +	dma_cookie_t cookie;
>>> +	struct device *dev;
>>> +
>>> +	if (!edma || !edma->intr_chan)
>>> +		return -ENXIO;
>>> +
>>> +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
>>> +		return -EINVAL;
>>> +
>>> +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
>>> +		return -EINVAL;
>>> +
>>> +	dev = edma->dev;
>>> +	if (!dev)
>>> +		return -ENODEV;
>>> +
>>> +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
>>> +
>>> +	/* Ensure store is visible before kicking the DMA transfer */
>>> +	wmb();
>>> +
>>> +	sg_init_table(&sgl, 1);
>>> +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
>>> +	sg_dma_len(&sgl) = sizeof(u32);
>>> +
>>> +	memset(&cfg, 0, sizeof(cfg));
>>> +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
>>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.direction      = DMA_MEM_TO_DEV;
>>> +
>>> +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
>>> +		return -EINVAL;
>>> +
>>> +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
>>> +				      DMA_MEM_TO_DEV,
>>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
>>> +	if (!txd)
>>> +		return -ENOSPC;
>>> +
>>> +	cookie = dmaengine_submit(txd);
>>> +	if (dma_submit_error(cookie))
>>> +		return -ENOSPC;
>>> +
>>> +	dma_async_issue_pending(edma->intr_chan);
>>> +	return 0;
>>> +}
>>> diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
>>> new file mode 100644
>>> index 000000000000..da0451827edb
>>> --- /dev/null
>>> +++ b/drivers/ntb/ntb_edma.h
>>> @@ -0,0 +1,128 @@
>>> +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
>>> +#ifndef _NTB_EDMA_H_
>>> +#define _NTB_EDMA_H_
>>> +
>>> +#include <linux/completion.h>
>>> +#include <linux/device.h>
>>> +#include <linux/interrupt.h>
>>> +
>>> +#define EDMA_REG_SIZE		SZ_64K
>>> +#define DMA_LLP_MEM_SIZE	SZ_4K
>>> +#define EDMA_WR_CH_NUM		4
>>> +#define EDMA_RD_CH_NUM		4
>>> +#define NTB_EDMA_MAX_CH		8
>>> +
>>> +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
>>> +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
>>> +
>>> +#define NTB_EDMA_RING_ORDER	7
>>> +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
>>> +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
>>> +
>>> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
>>> +
>>> +/*
>>> + * REMOTE_EDMA_EP:
>>> + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
>>> + *
>>> + * REMOTE_EDMA_RC:
>>> + *   Root Complex controls the endpoint eDMA through the shared MW and
>>> + *   drives reads/writes on behalf of the host.
>>> + */
>>> +typedef enum {
>>> +	REMOTE_EDMA_UNKNOWN,
>>> +	REMOTE_EDMA_EP,
>>> +	REMOTE_EDMA_RC,
>>> +} remote_edma_mode_t;
>>> +
>>> +typedef enum {
>>> +	REMOTE_EDMA_WRITE,
>>> +	REMOTE_EDMA_READ,
>>> +} remote_edma_dir_t;
>>> +
>>> +/*
>>> + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
>>> + *
>>> + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
>>> + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
>>> + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
>>> + *                                RC configures via dw_edma)
>>> + *
>>> + * ntb_edma_setup_mws() on EP:
>>> + *   - allocates ntb_edma_info and LLs in EP memory
>>> + *   - programs inbound iATU so that RC peer MW[n] points at this block
>>> + *
>>> + * ntb_edma_setup_peer() on RC:
>>> + *   - ioremaps peer MW[n]
>>> + *   - reads ntb_edma_info
>>> + *   - sets up dw_edma_chip ll_region_* from that info
>>> + */
>>> +struct ntb_edma_info {
>>> +	u32 magic;
>>> +	u16 wr_cnt;
>>> +	u16 rd_cnt;
>>> +	u64 regs_phys;
>>> +	u32 ll_stride;
>>> +	u32 rsvd;
>>> +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
>>> +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
>>> +
>>> +	u64 intr_dar_base;
>>> +} __packed;
>>> +
>>> +struct ll_dma_addrs {
>>> +	dma_addr_t wr[EDMA_WR_CH_NUM];
>>> +	dma_addr_t rd[EDMA_RD_CH_NUM];
>>> +};
>>> +
>>> +struct ntb_edma_chans {
>>> +	struct device *dev;
>>> +
>>> +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
>>> +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
>>> +	struct dma_chan *intr_chan;
>>> +
>>> +	unsigned int num_wr_chan;
>>> +	unsigned int num_rd_chan;
>>> +	atomic_t cur_wr_chan;
>>> +	atomic_t cur_rd_chan;
>>> +};
>>> +
>>> +static __always_inline u32 ntb_edma_ring_idx(u32 v)
>>> +{
>>> +	return v & NTB_EDMA_RING_MASK;
>>> +}
>>> +
>>> +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
>>> +{
>>> +	if (head >= tail) {
>>> +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
>>> +		return head - tail;
>>> +	}
>>> +
>>> +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
>>> +	return U32_MAX - tail + head + 1;
>>> +}
>>> +
>>> +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
>>> +{
>>> +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
>>> +}
>>> +
>>> +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
>>> +{
>>> +	return ntb_edma_ring_free_entry(head, tail) == 0;
>>> +}
>>> +
>>> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
>>> +		       ntb_edma_interrupt_cb_t cb, void *data);
>>> +void ntb_edma_teardown_isr(struct device *dev);
>>> +int ntb_edma_setup_mws(struct ntb_dev *ndev);
>>> +int ntb_edma_setup_peer(struct ntb_dev *ndev);
>>> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
>>> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
>>> +				    remote_edma_dir_t dir);
>>> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
>>> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
>>> +
>>> +#endif
>>> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
>>> similarity index 65%
>>> rename from drivers/ntb/ntb_transport.c
>>> rename to drivers/ntb/ntb_transport_core.c
>>> index 907db6c93d4d..48d48921978d 100644
>>> --- a/drivers/ntb/ntb_transport.c
>>> +++ b/drivers/ntb/ntb_transport_core.c
>>> @@ -47,6 +47,9 @@
>>>   * Contact Information:
>>>   * Jon Mason <jon.mason@intel.com>
>>>   */
>>> +#include <linux/atomic.h>
>>> +#include <linux/bug.h>
>>> +#include <linux/compiler.h>
>>>  #include <linux/debugfs.h>
>>>  #include <linux/delay.h>
>>>  #include <linux/dmaengine.h>
>>> @@ -71,6 +74,8 @@
>>>  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
>>>  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
>>>  
>>> +#define NTB_EDMA_MAX_POLL		32
>>> +
>>>  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
>>>  MODULE_VERSION(NTB_TRANSPORT_VER);
>>>  MODULE_LICENSE("Dual BSD/GPL");
>>> @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
>>>  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
>>>  #endif
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>
>> This is a comment throughout this patch. Doing ifdefs inside C source is pretty frowed upon in the kernel. The preferred way is to only have ifdefs in the header files. So please give this a bit more consideration and see if it can be done differently to address this.
> 
> I agree, there is no good reason to keep those remaining ifdefs at all.
> I'll clean it up. Thanks for pointing this out.
> 
>>
>>> +#include "ntb_edma.h"
>>> +static bool use_remote_edma;
>>> +module_param(use_remote_edma, bool, 0644);
>>> +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
>>> +#endif
>>> +
>>>  static struct dentry *nt_debugfs_dir;
>>>  
>>>  /* Only two-ports NTB devices are supported */
>>> @@ -125,6 +137,14 @@ struct ntb_queue_entry {
>>>  		struct ntb_payload_header __iomem *tx_hdr;
>>>  		struct ntb_payload_header *rx_hdr;
>>>  	};
>>> +
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	dma_addr_t addr;
>>> +
>>> +	/* Used by RC side only */
>>> +	struct scatterlist sgl;
>>> +	struct work_struct dma_work;
>>> +#endif
>>>  };
>>>  
>>>  struct ntb_rx_info {
>>> @@ -202,6 +222,33 @@ struct ntb_transport_qp {
>>>  	int msi_irq;
>>>  	struct ntb_msi_desc msi_desc;
>>>  	struct ntb_msi_desc peer_msi_desc;
>>> +
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	/*
>>> +	 * For ensuring peer notification in non-atomic context.
>>> +	 * ntb_peer_db_set might sleep or schedule.
>>> +	 */
>>> +	struct work_struct db_work;
>>> +
>>> +	/*
>>> +	 * wr: remote eDMA write transfer (EP -> RC direction)
>>> +	 * rd: remote eDMA read transfer (RC -> EP direction)
>>> +	 */
>>> +	u32 wr_cons;
>>> +	u32 rd_cons;
>>> +	u32 wr_prod;
>>> +	u32 rd_prod;
>>> +	u32 wr_issue;
>>> +	u32 rd_issue;
>>> +
>>> +	spinlock_t ep_tx_lock;
>>> +	spinlock_t ep_rx_lock;
>>> +	spinlock_t rc_lock;
>>> +
>>> +	/* Completion work for read/write transfers. */
>>> +	struct work_struct read_work;
>>> +	struct work_struct write_work;
>>> +#endif
>>
>> For something like this, maybe it needs its own struct instead of an ifdef chunk. Perhaps 'ntb_rx_info' can serve as a core data struct with EDMA having 'ntb_rx_info_edma' and embed 'ntb_rx_info'. 
> 
> Thanks again for the suggestion. I'll reorganize things.
> 
> Koichiro
> 
>>
>> DJ
>>
>>>  };
>>>  
>>>  struct ntb_transport_mw {
>>> @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
>>>  
>>>  	/* Make sure workq of link event be executed serially */
>>>  	struct mutex link_event_lock;
>>> +
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	remote_edma_mode_t remote_edma_mode;
>>> +	struct device *dma_dev;
>>> +	struct workqueue_struct *wq;
>>> +	struct ntb_edma_chans edma;
>>> +#endif
>>>  };
>>>  
>>>  enum {
>>> @@ -262,6 +316,19 @@ struct ntb_payload_header {
>>>  	unsigned int flags;
>>>  };
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
>>> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
>>> +				   unsigned int *mw_count);
>>> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
>>> +					  unsigned int qp_num);
>>> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
>>> +					    struct ntb_transport_qp *qp);
>>> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
>>> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
>>> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
>>> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
>>> +
>>>  /*
>>>   * Return the device that should be used for DMA mapping.
>>>   *
>>> @@ -298,7 +365,7 @@ enum {
>>>  	container_of((__drv), struct ntb_transport_client, driver)
>>>  
>>>  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
>>> -#define NTB_QP_DEF_NUM_ENTRIES	100
>>> +#define NTB_QP_DEF_NUM_ENTRIES	128
>>>  #define NTB_LINK_DOWN_TIMEOUT	10
>>>  
>>>  static void ntb_transport_rxc_db(unsigned long data);
>>> @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
>>>  	count = ntb_spad_count(nt->ndev);
>>>  	for (i = 0; i < count; i++)
>>>  		ntb_spad_write(nt->ndev, i, 0);
>>> +
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	ntb_edma_teardown_chans(&nt->edma);
>>> +#endif
>>>  }
>>>  
>>>  static void ntb_transport_link_cleanup_work(struct work_struct *work)
>>> @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>>>  
>>>  	/* send the local info, in the opposite order of the way we read it */
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	rc = ntb_transport_edma_ep_init(nt);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
>>> +		return;
>>> +	}
>>> +#endif
>>> +
>>>  	if (nt->use_msi) {
>>>  		rc = ntb_msi_setup_mws(ndev);
>>>  		if (rc) {
>>> @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>>>  
>>>  	nt->link_is_up = true;
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	rc = ntb_transport_edma_rc_init(nt);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
>>> +		goto out1;
>>> +	}
>>> +#endif
>>> +
>>>  	for (i = 0; i < nt->qp_count; i++) {
>>>  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
>>>  
>>> @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
>>>  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
>>>  };
>>>  
>>> +static const struct ntb_transport_backend_ops edma_backend_ops;
>>> +
>>>  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>>>  {
>>>  	struct ntb_transport_ctx *nt;
>>> @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>>>  
>>>  	nt->ndev = ndev;
>>>  
>>> -	nt->backend_ops = default_backend_ops;
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	if (use_remote_edma) {
>>> +		rc = ntb_transport_edma_init(nt, &mw_count);
>>> +		if (rc) {
>>> +			nt->mw_count = 0;
>>> +			goto err;
>>> +		}
>>> +		nt->backend_ops = edma_backend_ops;
>>> +
>>> +		/*
>>> +		 * On remote eDMA mode, we reserve a read channel for Host->EP
>>> +		 * interruption.
>>> +		 */
>>> +		use_msi = false;
>>> +	} else
>>> +#endif
>>> +		nt->backend_ops = default_backend_ops;
>>>  
>>>  	/*
>>>  	 * If we are using MSI, and have at least one extra memory window,
>>> @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>>>  		rc = ntb_transport_init_queue(nt, i);
>>>  		if (rc)
>>>  			goto err2;
>>> +
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +		ntb_transport_edma_init_queue(nt, i);
>>> +#endif
>>>  	}
>>>  
>>>  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
>>> @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>>>  	}
>>>  	kfree(nt->mw_vec);
>>>  err:
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	ntb_transport_edma_uninit(nt);
>>> +#endif
>>>  	kfree(nt);
>>>  	return rc;
>>>  }
>>> @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>>>  
>>>  	nt->qp_bitmap_free &= ~qp_bit;
>>>  
>>> +	qp->qp_bit = qp_bit;
>>>  	qp->cb_data = data;
>>>  	qp->rx_handler = handlers->rx_handler;
>>>  	qp->tx_handler = handlers->tx_handler;
>>>  	qp->event_handler = handlers->event_handler;
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	ntb_transport_edma_create_queue(nt, qp);
>>> +#endif
>>> +
>>>  	dma_cap_zero(dma_mask);
>>>  	dma_cap_set(DMA_MEMCPY, dma_mask);
>>>  
>>> @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>>>  			goto err1;
>>>  
>>>  		entry->qp = qp;
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
>>> +#endif
>>>  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
>>>  			     &qp->rx_free_q);
>>>  	}
>>> @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
>>>   */
>>>  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>>>  {
>>> -	struct pci_dev *pdev;
>>>  	struct ntb_queue_entry *entry;
>>> +	struct pci_dev *pdev;
>>>  	u64 qp_bit;
>>>  
>>>  	if (!qp)
>>> @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>>>  	tasklet_kill(&qp->rxc_db_work);
>>>  
>>>  	cancel_delayed_work_sync(&qp->link_work);
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +	cancel_work_sync(&qp->read_work);
>>> +	cancel_work_sync(&qp->write_work);
>>> +#endif
>>>  
>>>  	qp->cb_data = NULL;
>>>  	qp->rx_handler = NULL;
>>> @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
>>>  }
>>>  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
>>>  
>>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
>>> +/*
>>> + * Remote eDMA mode implementation
>>> + */
>>> +struct ntb_edma_desc {
>>> +	u32 len;
>>> +	u32 flags;
>>> +	u64 addr; /* DMA address */
>>> +	u64 data;
>>> +};
>>> +
>>> +struct ntb_edma_ring {
>>> +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
>>> +	u32 head;
>>> +	u32 tail;
>>> +};
>>> +
>>> +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
>>> +
>>> +#define __NTB_EDMA_CHECK_INDEX(_i)					\
>>> +({									\
>>> +	unsigned long __i = (unsigned long)(_i);			\
>>> +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
>>> +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
>>> +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
>>> +	__i;								\
>>> +})
>>> +
>>> +#define NTB_EDMA_DESC_I(qp, i, n)					\
>>> +({									\
>>> +	typeof(qp) __qp = (qp);						\
>>> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
>>> +	(struct ntb_edma_desc *)					\
>>> +		((char *)(__qp)->rx_buff +				\
>>> +		 (sizeof(struct ntb_edma_ring) * n) +			\
>>> +		 NTB_EDMA_DESC_OFF(__i));				\
>>> +})
>>> +
>>> +#define NTB_EDMA_DESC_O(qp, i, n)					\
>>> +({									\
>>> +	typeof(qp) __qp = (qp);						\
>>> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
>>> +	(struct ntb_edma_desc __iomem *)				\
>>> +		((char __iomem *)(__qp)->tx_mw +			\
>>> +		 (sizeof(struct ntb_edma_ring) * n) +			\
>>> +		 NTB_EDMA_DESC_OFF(__i));				\
>>> +})
>>> +
>>> +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
>>> +				(sizeof(struct ntb_edma_ring) * n) +	\
>>> +				offsetof(struct ntb_edma_ring, head)))
>>> +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
>>> +				(sizeof(struct ntb_edma_ring) * n) +	\
>>> +				offsetof(struct ntb_edma_ring, head)))
>>> +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
>>> +				(sizeof(struct ntb_edma_ring) * n) +	\
>>> +				offsetof(struct ntb_edma_ring, tail)))
>>> +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
>>> +				(sizeof(struct ntb_edma_ring) * n) +	\
>>> +				offsetof(struct ntb_edma_ring, tail)))
>>> +
>>> +/*
>>> + * Macro naming rule:
>>> + *   NTB_DESC_RD_EP_I (as an example)
>>> + *            ^^ ^^ ^
>>> + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
>>> + *            :  `----- Who uses this macro.
>>> + *            `-------- DESC / HEAD / TAIL
>>> + *
>>> + * Read transfers (RC->EP):
>>> + *
>>> + *   EP view (outbound, written via NTB):
>>> + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *           :
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *       - head: NTB_HEAD_RD_EP_O(qp)
>>> + *       - tail: NTB_TAIL_RD_EP_I(qp)
>>> + *
>>> + *   RC view (inbound, local mapping):
>>> + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *           :
>>> + *           [ len ][ flags ][ addr ][ data ]
>>> + *       - head: NTB_HEAD_RD_RC_I(qp)
>>> + *       - tail: NTB_TAIL_RD_RC_O(qp)
>>> + *
>>> + * Write transfers (EP -> RC) are analogous but use
>>> + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
>>> + * and NTB_TAIL_WR_{EP_I,RC_O}().
>>> + */
>>> +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
>>> +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
>>> +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
>>> +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
>>> +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
>>> +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
>>> +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
>>> +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
>>> +
>>> +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
>>> +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
>>> +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
>>> +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
>>> +
>>> +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
>>> +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
>>> +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
>>> +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
>>> +
>>> +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
>>> +{
>>> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
>>> +}
>>> +
>>> +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
>>> +{
>>> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
>>> +}
>>> +
>>> +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
>>> +{
>>> +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
>>> +}
>>> +
>>> +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
>>> +{
>>> +	unsigned int head, tail;
>>> +
>>> +	if (ntb_qp_edma_is_ep(qp)) {
>>> +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
>>> +			/* In this scope, only 'head' might proceed */
>>> +			tail = READ_ONCE(qp->wr_cons);
>>> +			head = READ_ONCE(qp->wr_prod);
>>> +		}
>>> +		return ntb_edma_ring_free_entry(head, tail);
>>> +	}
>>> +
>>> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
>>> +		/* In this scope, only 'head' might proceed */
>>> +		tail = READ_ONCE(qp->rd_issue);
>>> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
>>> +	}
>>> +	/*
>>> +	 * On RC side, 'used' amount indicates how much EP side
>>> +	 * has refilled, which are available for us to use for TX.
>>> +	 */
>>> +	return ntb_edma_ring_used_entry(head, tail);
>>> +}
>>> +
>>> +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
>>> +						  struct ntb_transport_qp *qp)
>>> +{
>>> +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
>>> +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
>>> +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
>>> +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
>>> +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
>>> +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
>>> +
>>> +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
>>> +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
>>> +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
>>> +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
>>> +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
>>> +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
>>> +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
>>> +	seq_putc(s, '\n');
>>> +
>>> +	seq_puts(s, "Using Remote eDMA - Yes\n");
>>> +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
>>> +}
>>> +
>>> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
>>> +{
>>> +	struct ntb_dev *ndev = nt->ndev;
>>> +
>>> +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
>>> +		ntb_edma_teardown_isr(&ndev->pdev->dev);
>>> +
>>> +	if (nt->wq)
>>> +		destroy_workqueue(nt->wq);
>>> +	nt->wq = NULL;
>>> +}
>>> +
>>> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
>>> +				   unsigned int *mw_count)
>>> +{
>>> +	struct ntb_dev *ndev = nt->ndev;
>>> +
>>> +	/*
>>> +	 * We need at least one MW for the transport plus one MW reserved
>>> +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
>>> +	 */
>>> +	if (*mw_count <= 1) {
>>> +		dev_err(&ndev->dev,
>>> +			"remote eDMA requires at least two MWS (have %u)\n",
>>> +			*mw_count);
>>> +		return -ENODEV;
>>> +	}
>>> +
>>> +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
>>> +	if (!nt->wq) {
>>> +		ntb_transport_edma_uninit(nt);
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	/* Reserve the last peer MW exclusively for the eDMA window. */
>>> +	*mw_count -= 1;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void ntb_transport_edma_db_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp =
>>> +			container_of(work, struct ntb_transport_qp, db_work);
>>> +
>>> +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
>>> +}
>>> +
>>> +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
>>> +{
>>> +	if (ntb_qp_edma_is_rc(qp))
>>> +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
>>> +			return;
>>> +
>>> +	/*
>>> +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
>>> +	 * may sleep, delegate the actual doorbell write to a workqueue.
>>> +	 */
>>> +	queue_work(system_highpri_wq, &qp->db_work);
>>> +}
>>> +
>>> +static void ntb_transport_edma_isr(void *data, int qp_num)
>>> +{
>>> +	struct ntb_transport_ctx *nt = data;
>>> +	struct ntb_transport_qp *qp;
>>> +
>>> +	if (qp_num < 0 || qp_num >= nt->qp_count)
>>> +		return;
>>> +
>>> +	qp = &nt->qp_vec[qp_num];
>>> +	if (WARN_ON(!qp))
>>> +		return;
>>> +
>>> +	queue_work(nt->wq, &qp->read_work);
>>> +	queue_work(nt->wq, &qp->write_work);
>>> +}
>>> +
>>> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
>>> +{
>>> +	struct ntb_dev *ndev = nt->ndev;
>>> +	struct pci_dev *pdev = ndev->pdev;
>>> +	int rc;
>>> +
>>> +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
>>> +		return 0;
>>> +
>>> +	rc = ntb_edma_setup_peer(ndev);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
>>> +		return rc;
>>> +	}
>>> +
>>> +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
>>> +		return rc;
>>> +	}
>>> +
>>> +	nt->remote_edma_mode = REMOTE_EDMA_RC;
>>> +	return 0;
>>> +}
>>> +
>>> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
>>> +{
>>> +	struct ntb_dev *ndev = nt->ndev;
>>> +	struct pci_dev *pdev = ndev->pdev;
>>> +	struct pci_epc *epc;
>>> +	int rc;
>>> +
>>> +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
>>> +		return 0;
>>> +
>>> +	/* Only EP side can return pci_epc */
>>> +	epc = ntb_get_pci_epc(ndev);
>>> +	if (!epc)
>>> +		return 0;
>>> +
>>> +	rc = ntb_edma_setup_mws(ndev);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev,
>>> +			"Failed to set up memory window for eDMA: %d\n", rc);
>>> +		return rc;
>>> +	}
>>> +
>>> +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
>>> +	if (rc) {
>>> +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
>>> +		return rc;
>>> +	}
>>> +
>>> +	nt->remote_edma_mode = REMOTE_EDMA_EP;
>>> +	return 0;
>>> +}
>>> +
>>> +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
>>> +					  unsigned int qp_num)
>>> +{
>>> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
>>> +	struct ntb_dev *ndev = nt->ndev;
>>> +	struct ntb_queue_entry *entry;
>>> +	struct ntb_transport_mw *mw;
>>> +	unsigned int mw_num, mw_count, qp_count;
>>> +	unsigned int qp_offset, rx_info_offset;
>>> +	unsigned int mw_size, mw_size_per_qp;
>>> +	unsigned int num_qps_mw;
>>> +	size_t edma_total;
>>> +	unsigned int i;
>>> +	int node;
>>> +
>>> +	mw_count = nt->mw_count;
>>> +	qp_count = nt->qp_count;
>>> +
>>> +	mw_num = QP_TO_MW(nt, qp_num);
>>> +	mw = &nt->mw_vec[mw_num];
>>> +
>>> +	if (!mw->virt_addr)
>>> +		return -ENOMEM;
>>> +
>>> +	if (mw_num < qp_count % mw_count)
>>> +		num_qps_mw = qp_count / mw_count + 1;
>>> +	else
>>> +		num_qps_mw = qp_count / mw_count;
>>> +
>>> +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
>>> +	if (max_mw_size && mw_size > max_mw_size)
>>> +		mw_size = max_mw_size;
>>> +
>>> +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
>>> +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
>>> +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
>>> +
>>> +	qp->tx_mw_size = mw_size_per_qp;
>>> +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
>>> +	if (!qp->tx_mw)
>>> +		return -EINVAL;
>>> +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
>>> +	if (!qp->tx_mw_phys)
>>> +		return -EINVAL;
>>> +	qp->rx_info = qp->tx_mw + rx_info_offset;
>>> +	qp->rx_buff = mw->virt_addr + qp_offset;
>>> +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
>>> +
>>> +	/* Due to housekeeping, there must be at least 2 buffs */
>>> +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
>>> +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
>>> +
>>> +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
>>> +	edma_total = 2 * sizeof(struct ntb_edma_ring);
>>> +	if (rx_info_offset < edma_total) {
>>> +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
>>> +			edma_total, rx_info_offset);
>>> +		return -EINVAL;
>>> +	}
>>> +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
>>> +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
>>> +
>>> +	/*
>>> +	 * Checking to see if we have more entries than the default.
>>> +	 * We should add additional entries if that is the case so we
>>> +	 * can be in sync with the transport frames.
>>> +	 */
>>> +	node = dev_to_node(&ndev->dev);
>>> +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
>>> +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
>>> +		if (!entry)
>>> +			return -ENOMEM;
>>> +
>>> +		entry->qp = qp;
>>> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
>>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
>>> +			     &qp->rx_free_q);
>>> +		qp->rx_alloc_entry++;
>>> +	}
>>> +
>>> +	memset(qp->rx_buff, 0, edma_total);
>>> +
>>> +	qp->rx_pkts = 0;
>>> +	qp->tx_pkts = 0;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
>>> +{
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	struct ntb_queue_entry *entry;
>>> +	struct ntb_edma_desc *in;
>>> +	unsigned int len;
>>> +	u32 idx;
>>> +
>>> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
>>> +				     qp->rd_cons) == 0)
>>> +		return 0;
>>> +
>>> +	idx = ntb_edma_ring_idx(qp->rd_cons);
>>> +	in = NTB_DESC_RD_EP_I(qp, idx);
>>> +	if (!(in->flags & DESC_DONE_FLAG))
>>> +		return 0;
>>> +
>>> +	in->flags = 0;
>>> +	len = in->len; /* might be smaller than entry->len */
>>> +
>>> +	entry = (struct ntb_queue_entry *)(in->data);
>>> +	if (WARN_ON(!entry))
>>> +		return 0;
>>> +
>>> +	if (in->flags & LINK_DOWN_FLAG) {
>>> +		ntb_qp_link_down(qp);
>>> +		qp->rd_cons++;
>>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
>>> +		return 1;
>>> +	}
>>> +
>>> +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
>>> +
>>> +	qp->rx_bytes += len;
>>> +	qp->rx_pkts++;
>>> +	qp->rd_cons++;
>>> +
>>> +	if (qp->rx_handler && qp->client_ready)
>>> +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
>>> +
>>> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
>>> +	return 1;
>>> +}
>>> +
>>> +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
>>> +{
>>> +	struct ntb_queue_entry *entry;
>>> +	struct ntb_edma_desc *in;
>>> +	u32 idx;
>>> +
>>> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
>>> +				     qp->wr_cons) == 0)
>>> +		return 0;
>>> +
>>> +	idx = ntb_edma_ring_idx(qp->wr_cons);
>>> +	in = NTB_DESC_WR_EP_I(qp, idx);
>>> +
>>> +	entry = (struct ntb_queue_entry *)(in->data);
>>> +	if (WARN_ON(!entry))
>>> +		return 0;
>>> +
>>> +	qp->wr_cons++;
>>> +
>>> +	if (qp->tx_handler)
>>> +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
>>> +
>>> +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
>>> +	return 1;
>>> +}
>>> +
>>> +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, read_work);
>>> +	unsigned int i;
>>> +
>>> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
>>> +		if (!ntb_transport_edma_ep_read_complete(qp))
>>> +			break;
>>> +	}
>>> +
>>> +	if (ntb_transport_edma_ep_read_complete(qp))
>>> +		queue_work(qp->transport->wq, &qp->read_work);
>>> +}
>>> +
>>> +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, write_work);
>>> +	unsigned int i;
>>> +
>>> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
>>> +		if (!ntb_transport_edma_ep_write_complete(qp))
>>> +			break;
>>> +	}
>>> +
>>> +	if (ntb_transport_edma_ep_write_complete(qp))
>>> +		queue_work(qp->transport->wq, &qp->write_work);
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, write_work);
>>> +	struct ntb_queue_entry *entry;
>>> +	struct ntb_edma_desc *in;
>>> +	unsigned int len;
>>> +	void *cb_data;
>>> +	u32 idx;
>>> +
>>> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
>>> +					qp->wr_cons) != 0) {
>>> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
>>> +		smp_rmb();
>>> +
>>> +		idx = ntb_edma_ring_idx(qp->wr_cons);
>>> +		in = NTB_DESC_WR_RC_I(qp, idx);
>>> +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
>>> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
>>> +			break;
>>> +
>>> +		in->data = 0;
>>> +
>>> +		cb_data = entry->cb_data;
>>> +		len = entry->len;
>>> +
>>> +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
>>> +
>>> +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
>>> +			ntb_qp_link_down(qp);
>>> +			continue;
>>> +		}
>>> +
>>> +		ntb_transport_edma_notify_peer(qp);
>>> +
>>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
>>> +
>>> +		if (qp->rx_handler && qp->client_ready)
>>> +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
>>> +
>>> +		/* stat updates */
>>> +		qp->rx_bytes += len;
>>> +		qp->rx_pkts++;
>>> +	}
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_write_cb(void *data,
>>> +					   const struct dmaengine_result *res)
>>> +{
>>> +	struct ntb_queue_entry *entry = data;
>>> +	struct ntb_transport_qp *qp = entry->qp;
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +	enum dmaengine_tx_result dma_err = res->result;
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +
>>> +	switch (dma_err) {
>>> +	case DMA_TRANS_READ_FAILED:
>>> +	case DMA_TRANS_WRITE_FAILED:
>>> +	case DMA_TRANS_ABORTED:
>>> +		entry->errors++;
>>> +		entry->len = -EIO;
>>> +		break;
>>> +	case DMA_TRANS_NOERROR:
>>> +	default:
>>> +		break;
>>> +	}
>>> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
>>> +	sg_dma_address(&entry->sgl) = 0;
>>> +
>>> +	entry->flags |= DESC_DONE_FLAG;
>>> +
>>> +	queue_work(nt->wq, &qp->write_work);
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, read_work);
>>> +	struct ntb_edma_desc *in, __iomem *out;
>>> +	struct ntb_queue_entry *entry;
>>> +	unsigned int len;
>>> +	void *cb_data;
>>> +	u32 idx;
>>> +
>>> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
>>> +					qp->rd_cons) != 0) {
>>> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
>>> +		smp_rmb();
>>> +
>>> +		idx = ntb_edma_ring_idx(qp->rd_cons);
>>> +		in = NTB_DESC_RD_RC_I(qp, idx);
>>> +		entry = (struct ntb_queue_entry *)in->data;
>>> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
>>> +			break;
>>> +
>>> +		in->data = 0;
>>> +
>>> +		cb_data = entry->cb_data;
>>> +		len = entry->len;
>>> +
>>> +		out = NTB_DESC_RD_RC_O(qp, idx);
>>> +
>>> +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
>>> +
>>> +		/*
>>> +		 * No need to add barrier in-between to enforce ordering here.
>>> +		 * The other side proceeds only after both flags and tail are
>>> +		 * updated.
>>> +		 */
>>> +		iowrite32(entry->flags, &out->flags);
>>> +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
>>> +
>>> +		ntb_transport_edma_notify_peer(qp);
>>> +
>>> +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
>>> +			     &qp->tx_free_q);
>>> +
>>> +		if (qp->tx_handler)
>>> +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
>>> +
>>> +		/* stat updates */
>>> +		qp->tx_bytes += len;
>>> +		qp->tx_pkts++;
>>> +	}
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_read_cb(void *data,
>>> +					  const struct dmaengine_result *res)
>>> +{
>>> +	struct ntb_queue_entry *entry = data;
>>> +	struct ntb_transport_qp *qp = entry->qp;
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	enum dmaengine_tx_result dma_err = res->result;
>>> +
>>> +	switch (dma_err) {
>>> +	case DMA_TRANS_READ_FAILED:
>>> +	case DMA_TRANS_WRITE_FAILED:
>>> +	case DMA_TRANS_ABORTED:
>>> +		entry->errors++;
>>> +		entry->len = -EIO;
>>> +		break;
>>> +	case DMA_TRANS_NOERROR:
>>> +	default:
>>> +		break;
>>> +	}
>>> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
>>> +	sg_dma_address(&entry->sgl) = 0;
>>> +
>>> +	entry->flags |= DESC_DONE_FLAG;
>>> +
>>> +	queue_work(nt->wq, &qp->read_work);
>>> +}
>>> +
>>> +static int ntb_transport_edma_rc_write_start(struct device *d,
>>> +					     struct dma_chan *chan, size_t len,
>>> +					     dma_addr_t ep_src, void *rc_dst,
>>> +					     struct ntb_queue_entry *entry)
>>> +{
>>> +	struct scatterlist *sgl = &entry->sgl;
>>> +	struct dma_async_tx_descriptor *txd;
>>> +	struct dma_slave_config cfg;
>>> +	dma_cookie_t cookie;
>>> +	int nents, rc;
>>> +
>>> +	if (!d)
>>> +		return -ENODEV;
>>> +
>>> +	if (!chan)
>>> +		return -ENXIO;
>>> +
>>> +	if (WARN_ON(!ep_src || !rc_dst))
>>> +		return -EINVAL;
>>> +
>>> +	if (WARN_ON(sg_dma_address(sgl)))
>>> +		return -EINVAL;
>>> +
>>> +	sg_init_one(sgl, rc_dst, len);
>>> +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
>>> +	if (nents <= 0)
>>> +		return -EIO;
>>> +
>>> +	memset(&cfg, 0, sizeof(cfg));
>>> +	cfg.src_addr       = ep_src;
>>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.direction      = DMA_DEV_TO_MEM;
>>> +	rc = dmaengine_slave_config(chan, &cfg);
>>> +	if (rc)
>>> +		goto out_unmap;
>>> +
>>> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
>>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
>>> +	if (!txd) {
>>> +		rc = -EIO;
>>> +		goto out_unmap;
>>> +	}
>>> +
>>> +	txd->callback_result = ntb_transport_edma_rc_write_cb;
>>> +	txd->callback_param = entry;
>>> +
>>> +	cookie = dmaengine_submit(txd);
>>> +	if (dma_submit_error(cookie)) {
>>> +		rc = -EIO;
>>> +		goto out_unmap;
>>> +	}
>>> +	dma_async_issue_pending(chan);
>>> +	return 0;
>>> +out_unmap:
>>> +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
>>> +	return rc;
>>> +}
>>> +
>>> +static int ntb_transport_edma_rc_read_start(struct device *d,
>>> +					    struct dma_chan *chan, size_t len,
>>> +					    void *rc_src, dma_addr_t ep_dst,
>>> +					    struct ntb_queue_entry *entry)
>>> +{
>>> +	struct scatterlist *sgl = &entry->sgl;
>>> +	struct dma_async_tx_descriptor *txd;
>>> +	struct dma_slave_config cfg;
>>> +	dma_cookie_t cookie;
>>> +	int nents, rc;
>>> +
>>> +	if (!d)
>>> +		return -ENODEV;
>>> +
>>> +	if (!chan)
>>> +		return -ENXIO;
>>> +
>>> +	if (WARN_ON(!rc_src || !ep_dst))
>>> +		return -EINVAL;
>>> +
>>> +	if (WARN_ON(sg_dma_address(sgl)))
>>> +		return -EINVAL;
>>> +
>>> +	sg_init_one(sgl, rc_src, len);
>>> +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
>>> +	if (nents <= 0)
>>> +		return -EIO;
>>> +
>>> +	memset(&cfg, 0, sizeof(cfg));
>>> +	cfg.dst_addr       = ep_dst;
>>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>>> +	cfg.direction      = DMA_MEM_TO_DEV;
>>> +	rc = dmaengine_slave_config(chan, &cfg);
>>> +	if (rc)
>>> +		goto out_unmap;
>>> +
>>> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
>>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
>>> +	if (!txd) {
>>> +		rc = -EIO;
>>> +		goto out_unmap;
>>> +	}
>>> +
>>> +	txd->callback_result = ntb_transport_edma_rc_read_cb;
>>> +	txd->callback_param = entry;
>>> +
>>> +	cookie = dmaengine_submit(txd);
>>> +	if (dma_submit_error(cookie)) {
>>> +		rc = -EIO;
>>> +		goto out_unmap;
>>> +	}
>>> +	dma_async_issue_pending(chan);
>>> +	return 0;
>>> +out_unmap:
>>> +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
>>> +	return rc;
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_queue_entry *entry = container_of(
>>> +				work, struct ntb_queue_entry, dma_work);
>>> +	struct ntb_transport_qp *qp = entry->qp;
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	struct dma_chan *chan;
>>> +	int rc;
>>> +
>>> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
>>> +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
>>> +					       entry->addr, entry->buf, entry);
>>> +	if (rc) {
>>> +		entry->errors++;
>>> +		entry->len = -EIO;
>>> +		entry->flags |= DESC_DONE_FLAG;
>>> +		queue_work(nt->wq, &qp->write_work);
>>> +		return;
>>> +	}
>>> +}
>>> +
>>> +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
>>> +{
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +	unsigned int budget = NTB_EDMA_MAX_POLL;
>>> +	struct ntb_queue_entry *entry;
>>> +	struct ntb_edma_desc *in;
>>> +	dma_addr_t ep_src;
>>> +	u32 len, idx;
>>> +
>>> +	while (budget--) {
>>> +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
>>> +					     qp->wr_issue) == 0)
>>> +			break;
>>> +
>>> +		idx = ntb_edma_ring_idx(qp->wr_issue);
>>> +		in = NTB_DESC_WR_RC_I(qp, idx);
>>> +
>>> +		len = READ_ONCE(in->len);
>>> +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
>>> +
>>> +		/* Prepare 'entry' for write completion */
>>> +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
>>> +		if (!entry) {
>>> +			qp->rx_err_no_buf++;
>>> +			break;
>>> +		}
>>> +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
>>> +			entry->flags &= ~DESC_DONE_FLAG;
>>> +		entry->len = len; /* NB. entry->len can be <=0 */
>>> +		entry->addr = ep_src;
>>> +
>>> +		/*
>>> +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
>>> +		 * so it needs to be set before wr_issue++.
>>> +		 */
>>> +		in->data = (uintptr_t)entry;
>>> +
>>> +		/* Ensure in->data visible before wr_issue++ */
>>> +		smp_wmb();
>>> +
>>> +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
>>> +
>>> +		if (!len) {
>>> +			entry->flags |= DESC_DONE_FLAG;
>>> +			queue_work(nt->wq, &qp->write_work);
>>> +			continue;
>>> +		}
>>> +
>>> +		if (in->flags & LINK_DOWN_FLAG) {
>>> +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
>>> +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
>>> +			queue_work(nt->wq, &qp->write_work);
>>> +			continue;
>>> +		}
>>> +
>>> +		queue_work(nt->wq, &entry->dma_work);
>>> +	}
>>> +
>>> +	if (!budget)
>>> +		tasklet_schedule(&qp->rxc_db_work);
>>> +}
>>> +
>>> +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
>>> +					    struct ntb_queue_entry *entry)
>>> +{
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +	struct ntb_edma_desc *in, __iomem *out;
>>> +	unsigned int len = entry->len;
>>> +	struct dma_chan *chan;
>>> +	u32 issue, idx, head;
>>> +	dma_addr_t ep_dst;
>>> +	int rc;
>>> +
>>> +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
>>> +
>>> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
>>> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
>>> +		issue = qp->rd_issue;
>>> +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
>>> +			qp->tx_ring_full++;
>>> +			return -ENOSPC;
>>> +		}
>>> +
>>> +		/*
>>> +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
>>> +		 * so it needs to be set before rd_issue++.
>>> +		 */
>>> +		idx = ntb_edma_ring_idx(issue);
>>> +		in = NTB_DESC_RD_RC_I(qp, idx);
>>> +		in->data = (uintptr_t)entry;
>>> +
>>> +		/* Make in->data visible before rd_issue++ */
>>> +		smp_wmb();
>>> +
>>> +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
>>> +	}
>>> +
>>> +	/* Publish the final transfer length to the EP side */
>>> +	out = NTB_DESC_RD_RC_O(qp, idx);
>>> +	iowrite32(len, &out->len);
>>> +	ioread32(&out->len);
>>> +
>>> +	if (unlikely(!len)) {
>>> +		entry->flags |= DESC_DONE_FLAG;
>>> +		queue_work(nt->wq, &qp->read_work);
>>> +		return 0;
>>> +	}
>>> +
>>> +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
>>> +	dma_rmb();
>>> +
>>> +	/* kick remote eDMA read transfer */
>>> +	ep_dst = (dma_addr_t)in->addr;
>>> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
>>> +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
>>> +					      entry->buf, ep_dst, entry);
>>> +	if (rc) {
>>> +		entry->errors++;
>>> +		entry->len = -EIO;
>>> +		entry->flags |= DESC_DONE_FLAG;
>>> +		queue_work(nt->wq, &qp->read_work);
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
>>> +					    struct ntb_queue_entry *entry)
>>> +{
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	struct ntb_edma_desc *in, __iomem *out;
>>> +	unsigned int len = entry->len;
>>> +	dma_addr_t ep_src = 0;
>>> +	u32 idx;
>>> +	int rc;
>>> +
>>> +	if (likely(len)) {
>>> +		ep_src = dma_map_single(dma_dev, entry->buf, len,
>>> +					DMA_TO_DEVICE);
>>> +		rc = dma_mapping_error(dma_dev, ep_src);
>>> +		if (rc)
>>> +			return rc;
>>> +	}
>>> +
>>> +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
>>> +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
>>> +			rc = -ENOSPC;
>>> +			qp->tx_ring_full++;
>>> +			goto out_unmap;
>>> +		}
>>> +
>>> +		idx = ntb_edma_ring_idx(qp->wr_prod);
>>> +		in  = NTB_DESC_WR_EP_I(qp, idx);
>>> +		out = NTB_DESC_WR_EP_O(qp, idx);
>>> +
>>> +		WARN_ON(in->flags & DESC_DONE_FLAG);
>>> +		WARN_ON(entry->flags & DESC_DONE_FLAG);
>>> +		in->flags = 0;
>>> +		in->data  = (uintptr_t)entry;
>>> +		entry->addr  = ep_src;
>>> +
>>> +		iowrite32(len,          &out->len);
>>> +		iowrite32(entry->flags, &out->flags);
>>> +		iowrite64(ep_src,       &out->addr);
>>> +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
>>> +
>>> +		dma_wmb();
>>> +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
>>> +
>>> +		qp->tx_bytes += len;
>>> +		qp->tx_pkts++;
>>> +	}
>>> +
>>> +	ntb_transport_edma_notify_peer(qp);
>>> +
>>> +	return 0;
>>> +out_unmap:
>>> +	if (likely(len))
>>> +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
>>> +	return rc;
>>> +}
>>> +
>>> +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
>>> +					 struct ntb_queue_entry *entry,
>>> +					 void *cb, void *data, unsigned int len,
>>> +					 unsigned int flags)
>>> +{
>>> +	struct device *dma_dev;
>>> +
>>> +	if (entry->addr) {
>>> +		/* Deferred unmap */
>>> +		dma_dev = get_dma_dev(qp->ndev);
>>> +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
>>> +	}
>>> +
>>> +	entry->cb_data = cb;
>>> +	entry->buf = data;
>>> +	entry->len = len;
>>> +	entry->flags = flags;
>>> +	entry->errors = 0;
>>> +	entry->addr = 0;
>>> +
>>> +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
>>> +
>>> +	if (ntb_qp_edma_is_ep(qp))
>>> +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
>>> +	else
>>> +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
>>> +}
>>> +
>>> +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
>>> +					    struct ntb_queue_entry *entry)
>>> +{
>>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
>>> +	struct ntb_edma_desc *in, __iomem *out;
>>> +	unsigned int len = entry->len;
>>> +	void *data = entry->buf;
>>> +	dma_addr_t ep_dst;
>>> +	u32 idx;
>>> +	int rc;
>>> +
>>> +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
>>> +	rc = dma_mapping_error(dma_dev, ep_dst);
>>> +	if (rc)
>>> +		return rc;
>>> +
>>> +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
>>> +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
>>> +				       READ_ONCE(qp->rd_cons))) {
>>> +			rc = -ENOSPC;
>>> +			goto out_unmap;
>>> +		}
>>> +
>>> +		idx = ntb_edma_ring_idx(qp->rd_prod);
>>> +		in = NTB_DESC_RD_EP_I(qp, idx);
>>> +		out = NTB_DESC_RD_EP_O(qp, idx);
>>> +
>>> +		iowrite32(len, &out->len);
>>> +		iowrite64(ep_dst, &out->addr);
>>> +
>>> +		WARN_ON(in->flags & DESC_DONE_FLAG);
>>> +		in->data = (uintptr_t)entry;
>>> +		entry->addr = ep_dst;
>>> +
>>> +		/* Ensure len/addr are visible before the head update */
>>> +		dma_wmb();
>>> +
>>> +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
>>> +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
>>> +	}
>>> +	return 0;
>>> +out_unmap:
>>> +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
>>> +	return rc;
>>> +}
>>> +
>>> +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
>>> +					 struct ntb_queue_entry *entry)
>>> +{
>>> +	int rc;
>>> +
>>> +	/* The behaviour is the same as the default backend for RC side */
>>> +	if (ntb_qp_edma_is_ep(qp)) {
>>> +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
>>> +		if (rc) {
>>> +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
>>> +				     &qp->rx_free_q);
>>> +			return rc;
>>> +		}
>>> +	}
>>> +
>>> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
>>> +
>>> +	if (qp->active)
>>> +		tasklet_schedule(&qp->rxc_db_work);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
>>> +{
>>> +	struct ntb_transport_ctx *nt = qp->transport;
>>> +
>>> +	if (ntb_qp_edma_is_rc(qp))
>>> +		ntb_transport_edma_rc_poll(qp);
>>> +	else if (ntb_qp_edma_is_ep(qp)) {
>>> +		/*
>>> +		 * Make sure we poll the rings even if an eDMA interrupt is
>>> +		 * cleared on the RC side earlier.
>>> +		 */
>>> +		queue_work(nt->wq, &qp->read_work);
>>> +		queue_work(nt->wq, &qp->write_work);
>>> +	} else
>>> +		/* Unreachable */
>>> +		WARN_ON_ONCE(1);
>>> +}
>>> +
>>> +static void ntb_transport_edma_read_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, read_work);
>>> +
>>> +	if (ntb_qp_edma_is_rc(qp))
>>> +		ntb_transport_edma_rc_read_complete_work(work);
>>> +	else if (ntb_qp_edma_is_ep(qp))
>>> +		ntb_transport_edma_ep_read_work(work);
>>> +	else
>>> +		/* Unreachable */
>>> +		WARN_ON_ONCE(1);
>>> +}
>>> +
>>> +static void ntb_transport_edma_write_work(struct work_struct *work)
>>> +{
>>> +	struct ntb_transport_qp *qp = container_of(
>>> +				work, struct ntb_transport_qp, write_work);
>>> +
>>> +	if (ntb_qp_edma_is_rc(qp))
>>> +		ntb_transport_edma_rc_write_complete_work(work);
>>> +	else if (ntb_qp_edma_is_ep(qp))
>>> +		ntb_transport_edma_ep_write_work(work);
>>> +	else
>>> +		/* Unreachable */
>>> +		WARN_ON_ONCE(1);
>>> +}
>>> +
>>> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
>>> +					  unsigned int qp_num)
>>> +{
>>> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
>>> +
>>> +	qp->wr_cons = 0;
>>> +	qp->rd_cons = 0;
>>> +	qp->wr_prod = 0;
>>> +	qp->rd_prod = 0;
>>> +	qp->wr_issue = 0;
>>> +	qp->rd_issue = 0;
>>> +
>>> +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
>>> +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
>>> +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
>>> +}
>>> +
>>> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
>>> +					    struct ntb_transport_qp *qp)
>>> +{
>>> +	spin_lock_init(&qp->ep_tx_lock);
>>> +	spin_lock_init(&qp->ep_rx_lock);
>>> +	spin_lock_init(&qp->rc_lock);
>>> +}
>>> +
>>> +static const struct ntb_transport_backend_ops edma_backend_ops = {
>>> +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
>>> +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
>>> +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
>>> +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
>>> +	.rx_poll = ntb_transport_edma_rx_poll,
>>> +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
>>> +};
>>> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
>>> +
>>>  /**
>>>   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
>>>   * @qp: NTB transport layer queue to be enabled
>>
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
On Tue, Dec 02, 2025 at 07:53:59AM -0700, Dave Jiang wrote:
> 
> 
> On 12/1/25 11:59 PM, Koichiro Den wrote:
> > On Mon, Dec 01, 2025 at 02:46:41PM -0700, Dave Jiang wrote:
> >>
> >>
> >> On 11/29/25 9:03 AM, Koichiro Den wrote:
> >>> Add a new transport backend that uses a remote DesignWare eDMA engine
> >>> located on the NTB endpoint to move data between host and endpoint.
> >>>
> >>> In this mode:
> >>>
> >>>   - The endpoint exposes a dedicated memory window that contains the
> >>>     eDMA register block followed by a small control structure (struct
> >>>     ntb_edma_info) and per-channel linked-list (LL) rings.
> >>>
> >>>   - On the endpoint side, ntb_edma_setup_mws() allocates the control
> >>>     structure and LL rings in endpoint memory, then programs an inbound
> >>>     iATU region so that the host can access them via a peer MW.
> >>>
> >>>   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
> >>>     ntb_edma_info and configures a dw-edma DMA device to use the LL
> >>>     rings provided by the endpoint.
> >>>
> >>>   - ntb_transport is extended with a new backend_ops implementation that
> >>>     routes TX and RX enqueue/poll operations through the remote eDMA
> >>>     rings while keeping the existing shared-memory backend intact.
> >>>
> >>>   - The host signals the endpoint via a dedicated DMA read channel.
> >>>     'use_msi' module option is ignored when 'use_remote_edma=1'.
> >>>
> >>> The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
> >>> module parameter (use_remote_edma). When disabled, the existing
> >>> ntb_transport behaviour is unchanged.
> >>>
> >>> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> >>> ---
> >>>  drivers/ntb/Kconfig                           |   11 +
> >>>  drivers/ntb/Makefile                          |    3 +
> >>>  drivers/ntb/ntb_edma.c                        |  628 ++++++++
> >>>  drivers/ntb/ntb_edma.h                        |  128 ++
> >>
> >> I briefly looked over the code. It feels like the EDMA bits should go in drivers/ntb/hw/ rather than drivers/ntb/ given it's pretty specific to the designware hardware. What sits in drivers/ntb should be generic APIs where a different vendor can utilize it and not having to adopt to designware hardware specifics. So maybe a bit more abstractions are needed?
> > 
> > That makes sense, I'll reorganize things. Thank you for the suggestion.
> 
> Also, since a new transport is being introduced. Please update Documentation/driver-api/ntb.rst. While the current documentation doesn't provide adaquate API documentation for ntb_transport APIs, hopefully the new transport can do better going forward. :) Thank you!

Thanks for the reminder. I'll update ntb.rst in the next revision (perhaps
as part of a split-out series).

Thank you for the review,
Koichiro

> 
> DJ
> 
> > 
> >>
> >>>  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
> >>>  5 files changed, 2048 insertions(+), 3 deletions(-)
> >>>  create mode 100644 drivers/ntb/ntb_edma.c
> >>>  create mode 100644 drivers/ntb/ntb_edma.h
> >>>  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
> >>>
> >>> diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
> >>> index df16c755b4da..db63f02bb116 100644
> >>> --- a/drivers/ntb/Kconfig
> >>> +++ b/drivers/ntb/Kconfig
> >>> @@ -37,4 +37,15 @@ config NTB_TRANSPORT
> >>>  
> >>>  	 If unsure, say N.
> >>>  
> >>> +config NTB_TRANSPORT_EDMA
> >>> +	bool "NTB Transport backed by remote eDMA"
> >>> +	depends on NTB_TRANSPORT
> >>> +	depends on PCI
> >>> +	select DMA_ENGINE
> >>> +	help
> >>> +	  Enable a transport backend that uses a remote DesignWare eDMA engine
> >>> +	  exposed through a dedicated NTB memory window. The host uses the
> >>> +	  endpoint's eDMA engine to move data in both directions.
> >>> +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
> >>> +
> >>>  endif # NTB
> >>> diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
> >>> index 3a6fa181ff99..51f0e1e3aec7 100644
> >>> --- a/drivers/ntb/Makefile
> >>> +++ b/drivers/ntb/Makefile
> >>> @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
> >>>  
> >>>  ntb-y			:= core.o
> >>>  ntb-$(CONFIG_NTB_MSI)	+= msi.o
> >>> +
> >>> +ntb_transport-y					:= ntb_transport_core.o
> >>> +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
> >>> diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
> >>> new file mode 100644
> >>> index 000000000000..cb35e0d56aa8
> >>> --- /dev/null
> >>> +++ b/drivers/ntb/ntb_edma.c
> >>> @@ -0,0 +1,628 @@
> >>> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> >>> +
> >>> +#include <linux/module.h>
> >>> +#include <linux/device.h>
> >>> +#include <linux/pci.h>
> >>> +#include <linux/ntb.h>
> >>> +#include <linux/io.h>
> >>> +#include <linux/iommu.h>
> >>> +#include <linux/dmaengine.h>
> >>> +#include <linux/pci-epc.h>
> >>> +#include <linux/dma/edma.h>
> >>> +#include <linux/irq.h>
> >>> +#include <linux/irqdomain.h>
> >>> +#include <linux/of.h>
> >>> +#include <linux/of_irq.h>
> >>> +#include <dt-bindings/interrupt-controller/arm-gic.h>
> >>> +
> >>> +#include "ntb_edma.h"
> >>> +
> >>> +/*
> >>> + * The interrupt register offsets below are taken from the DesignWare
> >>> + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> >>> + * backend currently only supports this layout.
> >>> + */
> >>> +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> >>> +#define DMA_WRITE_INT_MASK_OFF     0x54
> >>> +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> >>> +#define DMA_READ_INT_STATUS_OFF    0xa0
> >>> +#define DMA_READ_INT_MASK_OFF      0xa8
> >>> +#define DMA_READ_INT_CLEAR_OFF     0xac
> >>> +
> >>> +#define NTB_EDMA_NOTIFY_MAX_QP		64
> >>> +
> >>> +static unsigned int edma_spi = 417; /* 0x1a1 */
> >>> +module_param(edma_spi, uint, 0644);
> >>> +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
> >>> +
> >>> +static u64 edma_regs_phys = 0xe65d5000;
> >>> +module_param(edma_regs_phys, ullong, 0644);
> >>> +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
> >>> +
> >>> +static unsigned long edma_regs_size = 0x1200;
> >>> +module_param(edma_regs_size, ulong, 0644);
> >>> +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
> >>> +
> >>> +struct ntb_edma_intr {
> >>> +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
> >>> +};
> >>> +
> >>> +struct ntb_edma_ctx {
> >>> +	void *ll_wr_virt[EDMA_WR_CH_NUM];
> >>> +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
> >>> +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
> >>> +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
> >>> +
> >>> +	struct ntb_edma_intr *intr_ep_virt;
> >>> +	dma_addr_t intr_ep_phys;
> >>> +	struct ntb_edma_intr *intr_rc_virt;
> >>> +	dma_addr_t intr_rc_phys;
> >>> +	u32 notify_qp_max;
> >>> +
> >>> +	bool initialized;
> >>> +};
> >>> +
> >>> +static struct ntb_edma_ctx edma_ctx;
> >>> +
> >>> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> >>> +
> >>> +struct ntb_edma_interrupt {
> >>> +	int virq;
> >>> +	void __iomem *base;
> >>> +	ntb_edma_interrupt_cb_t cb;
> >>> +	void *data;
> >>> +};
> >>> +
> >>> +static struct ntb_edma_interrupt ntb_edma_intr;
> >>> +
> >>> +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
> >>> +{
> >>> +	struct device_node *np = dev_of_node(dev);
> >>> +	struct device_node *parent;
> >>> +	struct irq_fwspec fwspec = { 0 };
> >>> +	int virq;
> >>> +
> >>> +	parent = of_irq_find_parent(np);
> >>> +	if (!parent)
> >>> +		return -ENODEV;
> >>> +
> >>> +	fwspec.fwnode      = of_fwnode_handle(parent);
> >>> +	fwspec.param_count = 3;
> >>> +	fwspec.param[0]    = GIC_SPI;
> >>> +	fwspec.param[1]    = spi;
> >>> +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
> >>> +
> >>> +	virq = irq_create_fwspec_mapping(&fwspec);
> >>> +	of_node_put(parent);
> >>> +	return (virq > 0) ? virq : -EINVAL;
> >>> +}
> >>> +
> >>> +static irqreturn_t ntb_edma_isr(int irq, void *data)
> >>> +{
> >>> +	struct ntb_edma_interrupt *v = data;
> >>> +	u32 mask = BIT(EDMA_RD_CH_NUM);
> >>> +	u32 i, val;
> >>> +
> >>> +	/*
> >>> +	 * We do not ack interrupts here but instead we mask all local interrupt
> >>> +	 * sources except the read channel used for notification. This reduces
> >>> +	 * needless ISR invocations.
> >>> +	 *
> >>> +	 * In theory we could configure LIE=1/RIE=0 only for the notification
> >>> +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
> >>> +	 * require intrusive changes to the dw-edma core.
> >>> +	 *
> >>> +	 * Note: The host side may have already cleared the read interrupt used
> >>> +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
> >>> +	 * way to detect it. As a result, we cannot reliably tell which specific
> >>> +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
> >>> +	 * instead.
> >>> +	 */
> >>> +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> >>> +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
> >>> +
> >>> +	if (!v->cb || !edma_ctx.intr_ep_virt)
> >>> +		return IRQ_HANDLED;
> >>> +
> >>> +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
> >>> +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
> >>> +		if (!val)
> >>> +			continue;
> >>> +
> >>> +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
> >>> +		v->cb(v->data, i);
> >>> +	}
> >>> +
> >>> +	return IRQ_HANDLED;
> >>> +}
> >>> +
> >>> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> >>> +		       ntb_edma_interrupt_cb_t cb, void *data)
> >>> +{
> >>> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> >>> +	int virq = ntb_edma_map_spi_to_virq(epc_dev->parent, edma_spi);
> >>> +	int ret;
> >>> +
> >>> +	if (virq < 0) {
> >>> +		dev_err(dev, "failed to get virq (%d)\n", virq);
> >>> +		return virq;
> >>> +	}
> >>> +
> >>> +	v->virq = virq;
> >>> +	v->cb = cb;
> >>> +	v->data = data;
> >>> +	if (edma_regs_phys && !v->base)
> >>> +		v->base = devm_ioremap(dev, edma_regs_phys, edma_regs_size);
> >>> +	if (!v->base) {
> >>> +		dev_err(dev, "failed to setup v->base\n");
> >>> +		return -1;
> >>> +	}
> >>> +	ret = devm_request_irq(dev, v->virq, ntb_edma_isr, 0, "ntb-edma", v);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	if (v->base) {
> >>> +		iowrite32(0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> >>> +		iowrite32(0x0, v->base + DMA_READ_INT_MASK_OFF);
> >>> +	}
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +void ntb_edma_teardown_isr(struct device *dev)
> >>> +{
> >>> +	struct ntb_edma_interrupt *v = &ntb_edma_intr;
> >>> +
> >>> +	/* Mask all write/read interrupts so we don't get called again. */
> >>> +	if (v->base) {
> >>> +		iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> >>> +		iowrite32(~0x0, v->base + DMA_READ_INT_MASK_OFF);
> >>> +	}
> >>> +
> >>> +	if (v->virq > 0)
> >>> +		devm_free_irq(dev, v->virq, v);
> >>> +
> >>> +	if (v->base)
> >>> +		devm_iounmap(dev, v->base);
> >>> +
> >>> +	v->virq = 0;
> >>> +	v->cb = NULL;
> >>> +	v->data = NULL;
> >>> +}
> >>> +
> >>> +int ntb_edma_setup_mws(struct ntb_dev *ndev)
> >>> +{
> >>> +	const size_t info_bytes = PAGE_SIZE;
> >>> +	resource_size_t size_max, offset;
> >>> +	dma_addr_t intr_phys, info_phys;
> >>> +	u32 wr_done = 0, rd_done = 0;
> >>> +	struct ntb_edma_intr *intr;
> >>> +	struct ntb_edma_info *info;
> >>> +	int peer_mw, mw_index, rc;
> >>> +	struct iommu_domain *dom;
> >>> +	bool reg_mapped = false;
> >>> +	size_t ll_bytes, size;
> >>> +	struct pci_epc *epc;
> >>> +	struct device *dev;
> >>> +	unsigned long iova;
> >>> +	phys_addr_t phys;
> >>> +	u64 need;
> >>> +	u32 i;
> >>> +
> >>> +	/* +1 is for interruption */
> >>> +	ll_bytes = (EDMA_WR_CH_NUM + EDMA_RD_CH_NUM + 1) * DMA_LLP_MEM_SIZE;
> >>> +	need = EDMA_REG_SIZE + info_bytes + ll_bytes;
> >>> +
> >>> +	epc = ntb_get_pci_epc(ndev);
> >>> +	if (!epc)
> >>> +		return -ENODEV;
> >>> +	dev = epc->dev.parent;
> >>> +
> >>> +	if (edma_ctx.initialized)
> >>> +		return 0;
> >>> +
> >>> +	info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL);
> >>> +	if (!info)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	memset(info, 0, info_bytes);
> >>> +	info->magic = NTB_EDMA_INFO_MAGIC;
> >>> +	info->wr_cnt = EDMA_WR_CH_NUM;
> >>> +	info->rd_cnt = EDMA_RD_CH_NUM + 1; /* +1 for interruption */
> >>> +	info->regs_phys = edma_regs_phys;
> >>> +	info->ll_stride = DMA_LLP_MEM_SIZE;
> >>> +
> >>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> >>> +		edma_ctx.ll_wr_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> >>> +							 &edma_ctx.ll_wr_phys[i],
> >>> +							 GFP_KERNEL,
> >>> +							 DMA_ATTR_FORCE_CONTIGUOUS);
> >>> +		if (!edma_ctx.ll_wr_virt[i]) {
> >>> +			rc = -ENOMEM;
> >>> +			goto err_free_ll;
> >>> +		}
> >>> +		wr_done++;
> >>> +		info->ll_wr_phys[i] = edma_ctx.ll_wr_phys[i];
> >>> +	}
> >>> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> >>> +		edma_ctx.ll_rd_virt[i] = dma_alloc_attrs(dev, DMA_LLP_MEM_SIZE,
> >>> +							 &edma_ctx.ll_rd_phys[i],
> >>> +							 GFP_KERNEL,
> >>> +							 DMA_ATTR_FORCE_CONTIGUOUS);
> >>> +		if (!edma_ctx.ll_rd_virt[i]) {
> >>> +			rc = -ENOMEM;
> >>> +			goto err_free_ll;
> >>> +		}
> >>> +		rd_done++;
> >>> +		info->ll_rd_phys[i] = edma_ctx.ll_rd_phys[i];
> >>> +	}
> >>> +
> >>> +	/* For interruption */
> >>> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> >>> +	intr = dma_alloc_coherent(dev, sizeof(*intr), &intr_phys, GFP_KERNEL);
> >>> +	if (!intr) {
> >>> +		rc = -ENOMEM;
> >>> +		goto err_free_ll;
> >>> +	}
> >>> +	memset(intr, 0, sizeof(*intr));
> >>> +	edma_ctx.intr_ep_virt = intr;
> >>> +	edma_ctx.intr_ep_phys = intr_phys;
> >>> +	info->intr_dar_base = intr_phys;
> >>> +
> >>> +	peer_mw = ntb_peer_mw_count(ndev);
> >>> +	if (peer_mw <= 0) {
> >>> +		rc = -ENODEV;
> >>> +		goto err_free_ll;
> >>> +	}
> >>> +
> >>> +	mw_index = peer_mw - 1; /* last MW */
> >>> +
> >>> +	rc = ntb_mw_get_align(ndev, 0, mw_index, 0, NULL, &size_max,
> >>> +			      &offset);
> >>> +	if (rc)
> >>> +		goto err_free_ll;
> >>> +
> >>> +	if (size_max < need) {
> >>> +		rc = -ENOSPC;
> >>> +		goto err_free_ll;
> >>> +	}
> >>> +
> >>> +	/* Map register space (direct) */
> >>> +	dom = iommu_get_domain_for_dev(dev);
> >>> +	if (dom) {
> >>> +		phys = edma_regs_phys & PAGE_MASK;
> >>> +		size = PAGE_ALIGN(EDMA_REG_SIZE + edma_regs_phys - phys);
> >>> +		iova = phys;
> >>> +
> >>> +		rc = iommu_map(dom, iova, phys, EDMA_REG_SIZE,
> >>> +			       IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, GFP_KERNEL);
> >>> +		if (rc)
> >>> +			dev_err(&ndev->dev, "failed to create direct mapping for eDMA reg space\n");
> >>> +		reg_mapped = true;
> >>> +	}
> >>> +
> >>> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_regs_phys, EDMA_REG_SIZE, offset);
> >>> +	if (rc)
> >>> +		goto err_unmap_reg;
> >>> +
> >>> +	offset += EDMA_REG_SIZE;
> >>> +
> >>> +	/* Map ntb_edma_info */
> >>> +	rc = ntb_mw_set_trans(ndev, 0, mw_index, info_phys, info_bytes, offset);
> >>> +	if (rc)
> >>> +		goto err_clear_trans;
> >>> +	offset += info_bytes;
> >>> +
> >>> +	/* Map LL location */
> >>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> >>> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_wr_phys[i],
> >>> +				      DMA_LLP_MEM_SIZE, offset);
> >>> +		if (rc)
> >>> +			goto err_clear_trans;
> >>> +		offset += DMA_LLP_MEM_SIZE;
> >>> +	}
> >>> +	for (i = 0; i < EDMA_RD_CH_NUM + 1; i++) {
> >>> +		rc = ntb_mw_set_trans(ndev, 0, mw_index, edma_ctx.ll_rd_phys[i],
> >>> +				      DMA_LLP_MEM_SIZE, offset);
> >>> +		if (rc)
> >>> +			goto err_clear_trans;
> >>> +		offset += DMA_LLP_MEM_SIZE;
> >>> +	}
> >>> +	edma_ctx.initialized = true;
> >>> +
> >>> +	return 0;
> >>> +
> >>> +err_clear_trans:
> >>> +	/*
> >>> +	 * Tear down the NTB translation window used for the eDMA MW.
> >>> +	 * There is no sub-range clear API for ntb_mw_set_trans(), so we
> >>> +	 * unconditionally drop the whole mapping on error.
> >>> +	 */
> >>> +	ntb_mw_clear_trans(ndev, 0, mw_index);
> >>> +
> >>> +err_unmap_reg:
> >>> +	if (reg_mapped)
> >>> +		iommu_unmap(dom, iova, size);
> >>> +err_free_ll:
> >>> +	while (rd_done--)
> >>> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> >>> +			       edma_ctx.ll_rd_virt[rd_done],
> >>> +			       edma_ctx.ll_rd_phys[rd_done],
> >>> +			       DMA_ATTR_FORCE_CONTIGUOUS);
> >>> +	while (wr_done--)
> >>> +		dma_free_attrs(dev, DMA_LLP_MEM_SIZE,
> >>> +			       edma_ctx.ll_wr_virt[wr_done],
> >>> +			       edma_ctx.ll_wr_phys[wr_done],
> >>> +			       DMA_ATTR_FORCE_CONTIGUOUS);
> >>> +	if (edma_ctx.intr_ep_virt)
> >>> +		dma_free_coherent(dev, sizeof(struct ntb_edma_intr),
> >>> +				  edma_ctx.intr_ep_virt,
> >>> +				  edma_ctx.intr_ep_phys);
> >>> +	dma_free_coherent(dev, info_bytes, info, info_phys);
> >>> +	return rc;
> >>> +}
> >>> +
> >>> +static int ntb_edma_irq_vector(struct device *dev, unsigned int nr)
> >>> +{
> >>> +	struct pci_dev *pdev = to_pci_dev(dev);
> >>> +	int ret, nvec;
> >>> +
> >>> +	nvec = pci_msi_vec_count(pdev);
> >>> +	for (; nr < nvec; nr++) {
> >>> +		ret = pci_irq_vector(pdev, nr);
> >>> +		if (!irq_has_action(ret))
> >>> +			return ret;
> >>> +	}
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static const struct dw_edma_plat_ops ntb_edma_ops = {
> >>> +	.irq_vector     = ntb_edma_irq_vector,
> >>> +};
> >>> +
> >>> +int ntb_edma_setup_peer(struct ntb_dev *ndev)
> >>> +{
> >>> +	struct ntb_edma_info *info;
> >>> +	unsigned int wr_cnt, rd_cnt;
> >>> +	struct dw_edma_chip *chip;
> >>> +	void __iomem *edma_virt;
> >>> +	phys_addr_t edma_phys;
> >>> +	resource_size_t mw_size;
> >>> +	u64 off = EDMA_REG_SIZE;
> >>> +	int peer_mw, mw_index;
> >>> +	unsigned int i;
> >>> +	int ret;
> >>> +
> >>> +	peer_mw = ntb_peer_mw_count(ndev);
> >>> +	if (peer_mw <= 0)
> >>> +		return -ENODEV;
> >>> +
> >>> +	mw_index = peer_mw - 1; /* last MW */
> >>> +
> >>> +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
> >>> +				   &mw_size);
> >>> +	if (ret)
> >>> +		return -1;
> >>> +
> >>> +	edma_virt = ioremap(edma_phys, mw_size);
> >>> +
> >>> +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
> >>> +	if (!chip) {
> >>> +		ret = -ENOMEM;
> >>> +		return ret;
> >>> +	}
> >>> +
> >>> +	chip->dev = &ndev->pdev->dev;
> >>> +	chip->nr_irqs = 4;
> >>> +	chip->ops = &ntb_edma_ops;
> >>> +	chip->flags = 0;
> >>> +	chip->reg_base = edma_virt;
> >>> +	chip->mf = EDMA_MF_EDMA_UNROLL;
> >>> +
> >>> +	info = edma_virt + off;
> >>> +	if (info->magic != NTB_EDMA_INFO_MAGIC)
> >>> +		return -EINVAL;
> >>> +	wr_cnt = info->wr_cnt;
> >>> +	rd_cnt = info->rd_cnt;
> >>> +	chip->ll_wr_cnt = wr_cnt;
> >>> +	chip->ll_rd_cnt = rd_cnt;
> >>> +	off += PAGE_SIZE;
> >>> +
> >>> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> >>> +	edma_ctx.intr_ep_phys = info->intr_dar_base;
> >>> +	if (edma_ctx.intr_ep_phys) {
> >>> +		edma_ctx.intr_rc_virt =
> >>> +			dma_alloc_coherent(&ndev->pdev->dev,
> >>> +					   sizeof(struct ntb_edma_intr),
> >>> +					   &edma_ctx.intr_rc_phys,
> >>> +					   GFP_KERNEL);
> >>> +		if (!edma_ctx.intr_rc_virt)
> >>> +			return -ENOMEM;
> >>> +		memset(edma_ctx.intr_rc_virt, 0,
> >>> +		       sizeof(struct ntb_edma_intr));
> >>> +	}
> >>> +
> >>> +	for (i = 0; i < wr_cnt; i++) {
> >>> +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
> >>> +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
> >>> +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
> >>> +		off += DMA_LLP_MEM_SIZE;
> >>> +	}
> >>> +	for (i = 0; i < rd_cnt; i++) {
> >>> +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
> >>> +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
> >>> +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
> >>> +		off += DMA_LLP_MEM_SIZE;
> >>> +	}
> >>> +
> >>> +	if (!pci_dev_msi_enabled(ndev->pdev))
> >>> +		return -ENXIO;
> >>> +
> >>> +	ret = dw_edma_probe(chip);
> >>> +	if (ret) {
> >>> +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> >>> +		return ret;
> >>> +	}
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +struct ntb_edma_filter {
> >>> +	struct device *dma_dev;
> >>> +	u32 direction;
> >>> +};
> >>> +
> >>> +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
> >>> +{
> >>> +	struct ntb_edma_filter *filter = arg;
> >>> +	u32 dir = filter->direction;
> >>> +	struct dma_slave_caps caps;
> >>> +	int ret;
> >>> +
> >>> +	if (chan->device->dev != filter->dma_dev)
> >>> +		return false;
> >>> +
> >>> +	ret = dma_get_slave_caps(chan, &caps);
> >>> +	if (ret < 0)
> >>> +		return false;
> >>> +
> >>> +	return !!(caps.directions & dir);
> >>> +}
> >>> +
> >>> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
> >>> +{
> >>> +	unsigned int i;
> >>> +
> >>> +	for (i = 0; i < edma->num_wr_chan; i++)
> >>> +		dma_release_channel(edma->wr_chan[i]);
> >>> +
> >>> +	for (i = 0; i < edma->num_rd_chan; i++)
> >>> +		dma_release_channel(edma->rd_chan[i]);
> >>> +
> >>> +	if (edma->intr_chan)
> >>> +		dma_release_channel(edma->intr_chan);
> >>> +}
> >>> +
> >>> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
> >>> +{
> >>> +	struct ntb_edma_filter filter;
> >>> +	dma_cap_mask_t dma_mask;
> >>> +	unsigned int i;
> >>> +
> >>> +	dma_cap_zero(dma_mask);
> >>> +	dma_cap_set(DMA_SLAVE, dma_mask);
> >>> +
> >>> +	memset(edma, 0, sizeof(*edma));
> >>> +	edma->dev = dma_dev;
> >>> +
> >>> +	filter.dma_dev = dma_dev;
> >>> +	filter.direction = BIT(DMA_DEV_TO_MEM);
> >>> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> >>> +		edma->wr_chan[i] = dma_request_channel(dma_mask,
> >>> +						       ntb_edma_filter_fn,
> >>> +						       &filter);
> >>> +		if (!edma->wr_chan[i])
> >>> +			break;
> >>> +		edma->num_wr_chan++;
> >>> +	}
> >>> +
> >>> +	filter.direction = BIT(DMA_MEM_TO_DEV);
> >>> +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
> >>> +		edma->rd_chan[i] = dma_request_channel(dma_mask,
> >>> +						       ntb_edma_filter_fn,
> >>> +						       &filter);
> >>> +		if (!edma->rd_chan[i])
> >>> +			break;
> >>> +		edma->num_rd_chan++;
> >>> +	}
> >>> +
> >>> +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
> >>> +					      &filter);
> >>> +	if (!edma->intr_chan)
> >>> +		dev_warn(dma_dev,
> >>> +			 "Remote eDMA notify channel could not be allocated\n");
> >>> +
> >>> +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
> >>> +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
> >>> +		ntb_edma_teardown_chans(edma);
> >>> +		return -ENODEV;
> >>> +	}
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> >>> +				    remote_edma_dir_t dir)
> >>> +{
> >>> +	unsigned int n, cur, idx;
> >>> +	struct dma_chan **chans;
> >>> +	atomic_t *cur_chan;
> >>> +
> >>> +	if (dir == REMOTE_EDMA_WRITE) {
> >>> +		n = edma->num_wr_chan;
> >>> +		chans = edma->wr_chan;
> >>> +		cur_chan = &edma->cur_wr_chan;
> >>> +	} else {
> >>> +		n = edma->num_rd_chan;
> >>> +		chans = edma->rd_chan;
> >>> +		cur_chan = &edma->cur_rd_chan;
> >>> +	}
> >>> +	if (WARN_ON_ONCE(!n))
> >>> +		return NULL;
> >>> +
> >>> +	/* Simple round-robin */
> >>> +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
> >>> +	idx = cur % n;
> >>> +	return chans[idx];
> >>> +}
> >>> +
> >>> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
> >>> +{
> >>> +	struct dma_async_tx_descriptor *txd;
> >>> +	struct dma_slave_config cfg;
> >>> +	struct scatterlist sgl;
> >>> +	dma_cookie_t cookie;
> >>> +	struct device *dev;
> >>> +
> >>> +	if (!edma || !edma->intr_chan)
> >>> +		return -ENXIO;
> >>> +
> >>> +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
> >>> +		return -EINVAL;
> >>> +
> >>> +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
> >>> +		return -EINVAL;
> >>> +
> >>> +	dev = edma->dev;
> >>> +	if (!dev)
> >>> +		return -ENODEV;
> >>> +
> >>> +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
> >>> +
> >>> +	/* Ensure store is visible before kicking the DMA transfer */
> >>> +	wmb();
> >>> +
> >>> +	sg_init_table(&sgl, 1);
> >>> +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
> >>> +	sg_dma_len(&sgl) = sizeof(u32);
> >>> +
> >>> +	memset(&cfg, 0, sizeof(cfg));
> >>> +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
> >>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.direction      = DMA_MEM_TO_DEV;
> >>> +
> >>> +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
> >>> +		return -EINVAL;
> >>> +
> >>> +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
> >>> +				      DMA_MEM_TO_DEV,
> >>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> >>> +	if (!txd)
> >>> +		return -ENOSPC;
> >>> +
> >>> +	cookie = dmaengine_submit(txd);
> >>> +	if (dma_submit_error(cookie))
> >>> +		return -ENOSPC;
> >>> +
> >>> +	dma_async_issue_pending(edma->intr_chan);
> >>> +	return 0;
> >>> +}
> >>> diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
> >>> new file mode 100644
> >>> index 000000000000..da0451827edb
> >>> --- /dev/null
> >>> +++ b/drivers/ntb/ntb_edma.h
> >>> @@ -0,0 +1,128 @@
> >>> +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
> >>> +#ifndef _NTB_EDMA_H_
> >>> +#define _NTB_EDMA_H_
> >>> +
> >>> +#include <linux/completion.h>
> >>> +#include <linux/device.h>
> >>> +#include <linux/interrupt.h>
> >>> +
> >>> +#define EDMA_REG_SIZE		SZ_64K
> >>> +#define DMA_LLP_MEM_SIZE	SZ_4K
> >>> +#define EDMA_WR_CH_NUM		4
> >>> +#define EDMA_RD_CH_NUM		4
> >>> +#define NTB_EDMA_MAX_CH		8
> >>> +
> >>> +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
> >>> +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
> >>> +
> >>> +#define NTB_EDMA_RING_ORDER	7
> >>> +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
> >>> +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
> >>> +
> >>> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> >>> +
> >>> +/*
> >>> + * REMOTE_EDMA_EP:
> >>> + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
> >>> + *
> >>> + * REMOTE_EDMA_RC:
> >>> + *   Root Complex controls the endpoint eDMA through the shared MW and
> >>> + *   drives reads/writes on behalf of the host.
> >>> + */
> >>> +typedef enum {
> >>> +	REMOTE_EDMA_UNKNOWN,
> >>> +	REMOTE_EDMA_EP,
> >>> +	REMOTE_EDMA_RC,
> >>> +} remote_edma_mode_t;
> >>> +
> >>> +typedef enum {
> >>> +	REMOTE_EDMA_WRITE,
> >>> +	REMOTE_EDMA_READ,
> >>> +} remote_edma_dir_t;
> >>> +
> >>> +/*
> >>> + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
> >>> + *
> >>> + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
> >>> + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
> >>> + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
> >>> + *                                RC configures via dw_edma)
> >>> + *
> >>> + * ntb_edma_setup_mws() on EP:
> >>> + *   - allocates ntb_edma_info and LLs in EP memory
> >>> + *   - programs inbound iATU so that RC peer MW[n] points at this block
> >>> + *
> >>> + * ntb_edma_setup_peer() on RC:
> >>> + *   - ioremaps peer MW[n]
> >>> + *   - reads ntb_edma_info
> >>> + *   - sets up dw_edma_chip ll_region_* from that info
> >>> + */
> >>> +struct ntb_edma_info {
> >>> +	u32 magic;
> >>> +	u16 wr_cnt;
> >>> +	u16 rd_cnt;
> >>> +	u64 regs_phys;
> >>> +	u32 ll_stride;
> >>> +	u32 rsvd;
> >>> +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
> >>> +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
> >>> +
> >>> +	u64 intr_dar_base;
> >>> +} __packed;
> >>> +
> >>> +struct ll_dma_addrs {
> >>> +	dma_addr_t wr[EDMA_WR_CH_NUM];
> >>> +	dma_addr_t rd[EDMA_RD_CH_NUM];
> >>> +};
> >>> +
> >>> +struct ntb_edma_chans {
> >>> +	struct device *dev;
> >>> +
> >>> +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
> >>> +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
> >>> +	struct dma_chan *intr_chan;
> >>> +
> >>> +	unsigned int num_wr_chan;
> >>> +	unsigned int num_rd_chan;
> >>> +	atomic_t cur_wr_chan;
> >>> +	atomic_t cur_rd_chan;
> >>> +};
> >>> +
> >>> +static __always_inline u32 ntb_edma_ring_idx(u32 v)
> >>> +{
> >>> +	return v & NTB_EDMA_RING_MASK;
> >>> +}
> >>> +
> >>> +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
> >>> +{
> >>> +	if (head >= tail) {
> >>> +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
> >>> +		return head - tail;
> >>> +	}
> >>> +
> >>> +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
> >>> +	return U32_MAX - tail + head + 1;
> >>> +}
> >>> +
> >>> +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
> >>> +{
> >>> +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
> >>> +}
> >>> +
> >>> +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
> >>> +{
> >>> +	return ntb_edma_ring_free_entry(head, tail) == 0;
> >>> +}
> >>> +
> >>> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> >>> +		       ntb_edma_interrupt_cb_t cb, void *data);
> >>> +void ntb_edma_teardown_isr(struct device *dev);
> >>> +int ntb_edma_setup_mws(struct ntb_dev *ndev);
> >>> +int ntb_edma_setup_peer(struct ntb_dev *ndev);
> >>> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
> >>> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> >>> +				    remote_edma_dir_t dir);
> >>> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
> >>> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
> >>> +
> >>> +#endif
> >>> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
> >>> similarity index 65%
> >>> rename from drivers/ntb/ntb_transport.c
> >>> rename to drivers/ntb/ntb_transport_core.c
> >>> index 907db6c93d4d..48d48921978d 100644
> >>> --- a/drivers/ntb/ntb_transport.c
> >>> +++ b/drivers/ntb/ntb_transport_core.c
> >>> @@ -47,6 +47,9 @@
> >>>   * Contact Information:
> >>>   * Jon Mason <jon.mason@intel.com>
> >>>   */
> >>> +#include <linux/atomic.h>
> >>> +#include <linux/bug.h>
> >>> +#include <linux/compiler.h>
> >>>  #include <linux/debugfs.h>
> >>>  #include <linux/delay.h>
> >>>  #include <linux/dmaengine.h>
> >>> @@ -71,6 +74,8 @@
> >>>  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
> >>>  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
> >>>  
> >>> +#define NTB_EDMA_MAX_POLL		32
> >>> +
> >>>  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
> >>>  MODULE_VERSION(NTB_TRANSPORT_VER);
> >>>  MODULE_LICENSE("Dual BSD/GPL");
> >>> @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
> >>>  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
> >>>  #endif
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>
> >> This is a comment throughout this patch. Doing ifdefs inside C source is pretty frowed upon in the kernel. The preferred way is to only have ifdefs in the header files. So please give this a bit more consideration and see if it can be done differently to address this.
> > 
> > I agree, there is no good reason to keep those remaining ifdefs at all.
> > I'll clean it up. Thanks for pointing this out.
> > 
> >>
> >>> +#include "ntb_edma.h"
> >>> +static bool use_remote_edma;
> >>> +module_param(use_remote_edma, bool, 0644);
> >>> +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
> >>> +#endif
> >>> +
> >>>  static struct dentry *nt_debugfs_dir;
> >>>  
> >>>  /* Only two-ports NTB devices are supported */
> >>> @@ -125,6 +137,14 @@ struct ntb_queue_entry {
> >>>  		struct ntb_payload_header __iomem *tx_hdr;
> >>>  		struct ntb_payload_header *rx_hdr;
> >>>  	};
> >>> +
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	dma_addr_t addr;
> >>> +
> >>> +	/* Used by RC side only */
> >>> +	struct scatterlist sgl;
> >>> +	struct work_struct dma_work;
> >>> +#endif
> >>>  };
> >>>  
> >>>  struct ntb_rx_info {
> >>> @@ -202,6 +222,33 @@ struct ntb_transport_qp {
> >>>  	int msi_irq;
> >>>  	struct ntb_msi_desc msi_desc;
> >>>  	struct ntb_msi_desc peer_msi_desc;
> >>> +
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	/*
> >>> +	 * For ensuring peer notification in non-atomic context.
> >>> +	 * ntb_peer_db_set might sleep or schedule.
> >>> +	 */
> >>> +	struct work_struct db_work;
> >>> +
> >>> +	/*
> >>> +	 * wr: remote eDMA write transfer (EP -> RC direction)
> >>> +	 * rd: remote eDMA read transfer (RC -> EP direction)
> >>> +	 */
> >>> +	u32 wr_cons;
> >>> +	u32 rd_cons;
> >>> +	u32 wr_prod;
> >>> +	u32 rd_prod;
> >>> +	u32 wr_issue;
> >>> +	u32 rd_issue;
> >>> +
> >>> +	spinlock_t ep_tx_lock;
> >>> +	spinlock_t ep_rx_lock;
> >>> +	spinlock_t rc_lock;
> >>> +
> >>> +	/* Completion work for read/write transfers. */
> >>> +	struct work_struct read_work;
> >>> +	struct work_struct write_work;
> >>> +#endif
> >>
> >> For something like this, maybe it needs its own struct instead of an ifdef chunk. Perhaps 'ntb_rx_info' can serve as a core data struct with EDMA having 'ntb_rx_info_edma' and embed 'ntb_rx_info'. 
> > 
> > Thanks again for the suggestion. I'll reorganize things.
> > 
> > Koichiro
> > 
> >>
> >> DJ
> >>
> >>>  };
> >>>  
> >>>  struct ntb_transport_mw {
> >>> @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
> >>>  
> >>>  	/* Make sure workq of link event be executed serially */
> >>>  	struct mutex link_event_lock;
> >>> +
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	remote_edma_mode_t remote_edma_mode;
> >>> +	struct device *dma_dev;
> >>> +	struct workqueue_struct *wq;
> >>> +	struct ntb_edma_chans edma;
> >>> +#endif
> >>>  };
> >>>  
> >>>  enum {
> >>> @@ -262,6 +316,19 @@ struct ntb_payload_header {
> >>>  	unsigned int flags;
> >>>  };
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
> >>> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> >>> +				   unsigned int *mw_count);
> >>> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> >>> +					  unsigned int qp_num);
> >>> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> >>> +					    struct ntb_transport_qp *qp);
> >>> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
> >>> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
> >>> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
> >>> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> >>> +
> >>>  /*
> >>>   * Return the device that should be used for DMA mapping.
> >>>   *
> >>> @@ -298,7 +365,7 @@ enum {
> >>>  	container_of((__drv), struct ntb_transport_client, driver)
> >>>  
> >>>  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
> >>> -#define NTB_QP_DEF_NUM_ENTRIES	100
> >>> +#define NTB_QP_DEF_NUM_ENTRIES	128
> >>>  #define NTB_LINK_DOWN_TIMEOUT	10
> >>>  
> >>>  static void ntb_transport_rxc_db(unsigned long data);
> >>> @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
> >>>  	count = ntb_spad_count(nt->ndev);
> >>>  	for (i = 0; i < count; i++)
> >>>  		ntb_spad_write(nt->ndev, i, 0);
> >>> +
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	ntb_edma_teardown_chans(&nt->edma);
> >>> +#endif
> >>>  }
> >>>  
> >>>  static void ntb_transport_link_cleanup_work(struct work_struct *work)
> >>> @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >>>  
> >>>  	/* send the local info, in the opposite order of the way we read it */
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	rc = ntb_transport_edma_ep_init(nt);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
> >>> +		return;
> >>> +	}
> >>> +#endif
> >>> +
> >>>  	if (nt->use_msi) {
> >>>  		rc = ntb_msi_setup_mws(ndev);
> >>>  		if (rc) {
> >>> @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >>>  
> >>>  	nt->link_is_up = true;
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	rc = ntb_transport_edma_rc_init(nt);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
> >>> +		goto out1;
> >>> +	}
> >>> +#endif
> >>> +
> >>>  	for (i = 0; i < nt->qp_count; i++) {
> >>>  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
> >>>  
> >>> @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
> >>>  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
> >>>  };
> >>>  
> >>> +static const struct ntb_transport_backend_ops edma_backend_ops;
> >>> +
> >>>  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >>>  {
> >>>  	struct ntb_transport_ctx *nt;
> >>> @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >>>  
> >>>  	nt->ndev = ndev;
> >>>  
> >>> -	nt->backend_ops = default_backend_ops;
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	if (use_remote_edma) {
> >>> +		rc = ntb_transport_edma_init(nt, &mw_count);
> >>> +		if (rc) {
> >>> +			nt->mw_count = 0;
> >>> +			goto err;
> >>> +		}
> >>> +		nt->backend_ops = edma_backend_ops;
> >>> +
> >>> +		/*
> >>> +		 * On remote eDMA mode, we reserve a read channel for Host->EP
> >>> +		 * interruption.
> >>> +		 */
> >>> +		use_msi = false;
> >>> +	} else
> >>> +#endif
> >>> +		nt->backend_ops = default_backend_ops;
> >>>  
> >>>  	/*
> >>>  	 * If we are using MSI, and have at least one extra memory window,
> >>> @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >>>  		rc = ntb_transport_init_queue(nt, i);
> >>>  		if (rc)
> >>>  			goto err2;
> >>> +
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +		ntb_transport_edma_init_queue(nt, i);
> >>> +#endif
> >>>  	}
> >>>  
> >>>  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
> >>> @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >>>  	}
> >>>  	kfree(nt->mw_vec);
> >>>  err:
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	ntb_transport_edma_uninit(nt);
> >>> +#endif
> >>>  	kfree(nt);
> >>>  	return rc;
> >>>  }
> >>> @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >>>  
> >>>  	nt->qp_bitmap_free &= ~qp_bit;
> >>>  
> >>> +	qp->qp_bit = qp_bit;
> >>>  	qp->cb_data = data;
> >>>  	qp->rx_handler = handlers->rx_handler;
> >>>  	qp->tx_handler = handlers->tx_handler;
> >>>  	qp->event_handler = handlers->event_handler;
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	ntb_transport_edma_create_queue(nt, qp);
> >>> +#endif
> >>> +
> >>>  	dma_cap_zero(dma_mask);
> >>>  	dma_cap_set(DMA_MEMCPY, dma_mask);
> >>>  
> >>> @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >>>  			goto err1;
> >>>  
> >>>  		entry->qp = qp;
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> >>> +#endif
> >>>  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> >>>  			     &qp->rx_free_q);
> >>>  	}
> >>> @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
> >>>   */
> >>>  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >>>  {
> >>> -	struct pci_dev *pdev;
> >>>  	struct ntb_queue_entry *entry;
> >>> +	struct pci_dev *pdev;
> >>>  	u64 qp_bit;
> >>>  
> >>>  	if (!qp)
> >>> @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >>>  	tasklet_kill(&qp->rxc_db_work);
> >>>  
> >>>  	cancel_delayed_work_sync(&qp->link_work);
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +	cancel_work_sync(&qp->read_work);
> >>> +	cancel_work_sync(&qp->write_work);
> >>> +#endif
> >>>  
> >>>  	qp->cb_data = NULL;
> >>>  	qp->rx_handler = NULL;
> >>> @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
> >>>  
> >>> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> >>> +/*
> >>> + * Remote eDMA mode implementation
> >>> + */
> >>> +struct ntb_edma_desc {
> >>> +	u32 len;
> >>> +	u32 flags;
> >>> +	u64 addr; /* DMA address */
> >>> +	u64 data;
> >>> +};
> >>> +
> >>> +struct ntb_edma_ring {
> >>> +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
> >>> +	u32 head;
> >>> +	u32 tail;
> >>> +};
> >>> +
> >>> +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
> >>> +
> >>> +#define __NTB_EDMA_CHECK_INDEX(_i)					\
> >>> +({									\
> >>> +	unsigned long __i = (unsigned long)(_i);			\
> >>> +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
> >>> +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
> >>> +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
> >>> +	__i;								\
> >>> +})
> >>> +
> >>> +#define NTB_EDMA_DESC_I(qp, i, n)					\
> >>> +({									\
> >>> +	typeof(qp) __qp = (qp);						\
> >>> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> >>> +	(struct ntb_edma_desc *)					\
> >>> +		((char *)(__qp)->rx_buff +				\
> >>> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> >>> +		 NTB_EDMA_DESC_OFF(__i));				\
> >>> +})
> >>> +
> >>> +#define NTB_EDMA_DESC_O(qp, i, n)					\
> >>> +({									\
> >>> +	typeof(qp) __qp = (qp);						\
> >>> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> >>> +	(struct ntb_edma_desc __iomem *)				\
> >>> +		((char __iomem *)(__qp)->tx_mw +			\
> >>> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> >>> +		 NTB_EDMA_DESC_OFF(__i));				\
> >>> +})
> >>> +
> >>> +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> >>> +				(sizeof(struct ntb_edma_ring) * n) +	\
> >>> +				offsetof(struct ntb_edma_ring, head)))
> >>> +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> >>> +				(sizeof(struct ntb_edma_ring) * n) +	\
> >>> +				offsetof(struct ntb_edma_ring, head)))
> >>> +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> >>> +				(sizeof(struct ntb_edma_ring) * n) +	\
> >>> +				offsetof(struct ntb_edma_ring, tail)))
> >>> +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> >>> +				(sizeof(struct ntb_edma_ring) * n) +	\
> >>> +				offsetof(struct ntb_edma_ring, tail)))
> >>> +
> >>> +/*
> >>> + * Macro naming rule:
> >>> + *   NTB_DESC_RD_EP_I (as an example)
> >>> + *            ^^ ^^ ^
> >>> + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
> >>> + *            :  `----- Who uses this macro.
> >>> + *            `-------- DESC / HEAD / TAIL
> >>> + *
> >>> + * Read transfers (RC->EP):
> >>> + *
> >>> + *   EP view (outbound, written via NTB):
> >>> + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *           :
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *       - head: NTB_HEAD_RD_EP_O(qp)
> >>> + *       - tail: NTB_TAIL_RD_EP_I(qp)
> >>> + *
> >>> + *   RC view (inbound, local mapping):
> >>> + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *           :
> >>> + *           [ len ][ flags ][ addr ][ data ]
> >>> + *       - head: NTB_HEAD_RD_RC_I(qp)
> >>> + *       - tail: NTB_TAIL_RD_RC_O(qp)
> >>> + *
> >>> + * Write transfers (EP -> RC) are analogous but use
> >>> + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
> >>> + * and NTB_TAIL_WR_{EP_I,RC_O}().
> >>> + */
> >>> +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> >>> +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> >>> +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> >>> +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> >>> +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> >>> +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> >>> +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> >>> +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> >>> +
> >>> +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
> >>> +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
> >>> +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
> >>> +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
> >>> +
> >>> +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
> >>> +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
> >>> +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
> >>> +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
> >>> +
> >>> +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
> >>> +}
> >>> +
> >>> +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
> >>> +}
> >>> +
> >>> +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
> >>> +}
> >>> +
> >>> +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	unsigned int head, tail;
> >>> +
> >>> +	if (ntb_qp_edma_is_ep(qp)) {
> >>> +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> >>> +			/* In this scope, only 'head' might proceed */
> >>> +			tail = READ_ONCE(qp->wr_cons);
> >>> +			head = READ_ONCE(qp->wr_prod);
> >>> +		}
> >>> +		return ntb_edma_ring_free_entry(head, tail);
> >>> +	}
> >>> +
> >>> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> >>> +		/* In this scope, only 'head' might proceed */
> >>> +		tail = READ_ONCE(qp->rd_issue);
> >>> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> >>> +	}
> >>> +	/*
> >>> +	 * On RC side, 'used' amount indicates how much EP side
> >>> +	 * has refilled, which are available for us to use for TX.
> >>> +	 */
> >>> +	return ntb_edma_ring_used_entry(head, tail);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
> >>> +						  struct ntb_transport_qp *qp)
> >>> +{
> >>> +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
> >>> +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
> >>> +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
> >>> +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
> >>> +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
> >>> +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
> >>> +
> >>> +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
> >>> +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
> >>> +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
> >>> +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
> >>> +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
> >>> +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
> >>> +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
> >>> +	seq_putc(s, '\n');
> >>> +
> >>> +	seq_puts(s, "Using Remote eDMA - Yes\n");
> >>> +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
> >>> +{
> >>> +	struct ntb_dev *ndev = nt->ndev;
> >>> +
> >>> +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
> >>> +		ntb_edma_teardown_isr(&ndev->pdev->dev);
> >>> +
> >>> +	if (nt->wq)
> >>> +		destroy_workqueue(nt->wq);
> >>> +	nt->wq = NULL;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> >>> +				   unsigned int *mw_count)
> >>> +{
> >>> +	struct ntb_dev *ndev = nt->ndev;
> >>> +
> >>> +	/*
> >>> +	 * We need at least one MW for the transport plus one MW reserved
> >>> +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
> >>> +	 */
> >>> +	if (*mw_count <= 1) {
> >>> +		dev_err(&ndev->dev,
> >>> +			"remote eDMA requires at least two MWS (have %u)\n",
> >>> +			*mw_count);
> >>> +		return -ENODEV;
> >>> +	}
> >>> +
> >>> +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
> >>> +	if (!nt->wq) {
> >>> +		ntb_transport_edma_uninit(nt);
> >>> +		return -ENOMEM;
> >>> +	}
> >>> +
> >>> +	/* Reserve the last peer MW exclusively for the eDMA window. */
> >>> +	*mw_count -= 1;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_db_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp =
> >>> +			container_of(work, struct ntb_transport_qp, db_work);
> >>> +
> >>> +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	if (ntb_qp_edma_is_rc(qp))
> >>> +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
> >>> +			return;
> >>> +
> >>> +	/*
> >>> +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
> >>> +	 * may sleep, delegate the actual doorbell write to a workqueue.
> >>> +	 */
> >>> +	queue_work(system_highpri_wq, &qp->db_work);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_isr(void *data, int qp_num)
> >>> +{
> >>> +	struct ntb_transport_ctx *nt = data;
> >>> +	struct ntb_transport_qp *qp;
> >>> +
> >>> +	if (qp_num < 0 || qp_num >= nt->qp_count)
> >>> +		return;
> >>> +
> >>> +	qp = &nt->qp_vec[qp_num];
> >>> +	if (WARN_ON(!qp))
> >>> +		return;
> >>> +
> >>> +	queue_work(nt->wq, &qp->read_work);
> >>> +	queue_work(nt->wq, &qp->write_work);
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
> >>> +{
> >>> +	struct ntb_dev *ndev = nt->ndev;
> >>> +	struct pci_dev *pdev = ndev->pdev;
> >>> +	int rc;
> >>> +
> >>> +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
> >>> +		return 0;
> >>> +
> >>> +	rc = ntb_edma_setup_peer(ndev);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
> >>> +		return rc;
> >>> +	}
> >>> +
> >>> +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
> >>> +		return rc;
> >>> +	}
> >>> +
> >>> +	nt->remote_edma_mode = REMOTE_EDMA_RC;
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
> >>> +{
> >>> +	struct ntb_dev *ndev = nt->ndev;
> >>> +	struct pci_dev *pdev = ndev->pdev;
> >>> +	struct pci_epc *epc;
> >>> +	int rc;
> >>> +
> >>> +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
> >>> +		return 0;
> >>> +
> >>> +	/* Only EP side can return pci_epc */
> >>> +	epc = ntb_get_pci_epc(ndev);
> >>> +	if (!epc)
> >>> +		return 0;
> >>> +
> >>> +	rc = ntb_edma_setup_mws(ndev);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev,
> >>> +			"Failed to set up memory window for eDMA: %d\n", rc);
> >>> +		return rc;
> >>> +	}
> >>> +
> >>> +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
> >>> +	if (rc) {
> >>> +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
> >>> +		return rc;
> >>> +	}
> >>> +
> >>> +	nt->remote_edma_mode = REMOTE_EDMA_EP;
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
> >>> +					  unsigned int qp_num)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> >>> +	struct ntb_dev *ndev = nt->ndev;
> >>> +	struct ntb_queue_entry *entry;
> >>> +	struct ntb_transport_mw *mw;
> >>> +	unsigned int mw_num, mw_count, qp_count;
> >>> +	unsigned int qp_offset, rx_info_offset;
> >>> +	unsigned int mw_size, mw_size_per_qp;
> >>> +	unsigned int num_qps_mw;
> >>> +	size_t edma_total;
> >>> +	unsigned int i;
> >>> +	int node;
> >>> +
> >>> +	mw_count = nt->mw_count;
> >>> +	qp_count = nt->qp_count;
> >>> +
> >>> +	mw_num = QP_TO_MW(nt, qp_num);
> >>> +	mw = &nt->mw_vec[mw_num];
> >>> +
> >>> +	if (!mw->virt_addr)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	if (mw_num < qp_count % mw_count)
> >>> +		num_qps_mw = qp_count / mw_count + 1;
> >>> +	else
> >>> +		num_qps_mw = qp_count / mw_count;
> >>> +
> >>> +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
> >>> +	if (max_mw_size && mw_size > max_mw_size)
> >>> +		mw_size = max_mw_size;
> >>> +
> >>> +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
> >>> +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
> >>> +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
> >>> +
> >>> +	qp->tx_mw_size = mw_size_per_qp;
> >>> +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> >>> +	if (!qp->tx_mw)
> >>> +		return -EINVAL;
> >>> +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
> >>> +	if (!qp->tx_mw_phys)
> >>> +		return -EINVAL;
> >>> +	qp->rx_info = qp->tx_mw + rx_info_offset;
> >>> +	qp->rx_buff = mw->virt_addr + qp_offset;
> >>> +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
> >>> +
> >>> +	/* Due to housekeeping, there must be at least 2 buffs */
> >>> +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> >>> +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> >>> +
> >>> +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
> >>> +	edma_total = 2 * sizeof(struct ntb_edma_ring);
> >>> +	if (rx_info_offset < edma_total) {
> >>> +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
> >>> +			edma_total, rx_info_offset);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
> >>> +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
> >>> +
> >>> +	/*
> >>> +	 * Checking to see if we have more entries than the default.
> >>> +	 * We should add additional entries if that is the case so we
> >>> +	 * can be in sync with the transport frames.
> >>> +	 */
> >>> +	node = dev_to_node(&ndev->dev);
> >>> +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
> >>> +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
> >>> +		if (!entry)
> >>> +			return -ENOMEM;
> >>> +
> >>> +		entry->qp = qp;
> >>> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> >>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> >>> +			     &qp->rx_free_q);
> >>> +		qp->rx_alloc_entry++;
> >>> +	}
> >>> +
> >>> +	memset(qp->rx_buff, 0, edma_total);
> >>> +
> >>> +	qp->rx_pkts = 0;
> >>> +	qp->tx_pkts = 0;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	struct ntb_queue_entry *entry;
> >>> +	struct ntb_edma_desc *in;
> >>> +	unsigned int len;
> >>> +	u32 idx;
> >>> +
> >>> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
> >>> +				     qp->rd_cons) == 0)
> >>> +		return 0;
> >>> +
> >>> +	idx = ntb_edma_ring_idx(qp->rd_cons);
> >>> +	in = NTB_DESC_RD_EP_I(qp, idx);
> >>> +	if (!(in->flags & DESC_DONE_FLAG))
> >>> +		return 0;
> >>> +
> >>> +	in->flags = 0;
> >>> +	len = in->len; /* might be smaller than entry->len */
> >>> +
> >>> +	entry = (struct ntb_queue_entry *)(in->data);
> >>> +	if (WARN_ON(!entry))
> >>> +		return 0;
> >>> +
> >>> +	if (in->flags & LINK_DOWN_FLAG) {
> >>> +		ntb_qp_link_down(qp);
> >>> +		qp->rd_cons++;
> >>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> >>> +		return 1;
> >>> +	}
> >>> +
> >>> +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
> >>> +
> >>> +	qp->rx_bytes += len;
> >>> +	qp->rx_pkts++;
> >>> +	qp->rd_cons++;
> >>> +
> >>> +	if (qp->rx_handler && qp->client_ready)
> >>> +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
> >>> +
> >>> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	struct ntb_queue_entry *entry;
> >>> +	struct ntb_edma_desc *in;
> >>> +	u32 idx;
> >>> +
> >>> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
> >>> +				     qp->wr_cons) == 0)
> >>> +		return 0;
> >>> +
> >>> +	idx = ntb_edma_ring_idx(qp->wr_cons);
> >>> +	in = NTB_DESC_WR_EP_I(qp, idx);
> >>> +
> >>> +	entry = (struct ntb_queue_entry *)(in->data);
> >>> +	if (WARN_ON(!entry))
> >>> +		return 0;
> >>> +
> >>> +	qp->wr_cons++;
> >>> +
> >>> +	if (qp->tx_handler)
> >>> +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
> >>> +
> >>> +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, read_work);
> >>> +	unsigned int i;
> >>> +
> >>> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> >>> +		if (!ntb_transport_edma_ep_read_complete(qp))
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (ntb_transport_edma_ep_read_complete(qp))
> >>> +		queue_work(qp->transport->wq, &qp->read_work);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, write_work);
> >>> +	unsigned int i;
> >>> +
> >>> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> >>> +		if (!ntb_transport_edma_ep_write_complete(qp))
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (ntb_transport_edma_ep_write_complete(qp))
> >>> +		queue_work(qp->transport->wq, &qp->write_work);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, write_work);
> >>> +	struct ntb_queue_entry *entry;
> >>> +	struct ntb_edma_desc *in;
> >>> +	unsigned int len;
> >>> +	void *cb_data;
> >>> +	u32 idx;
> >>> +
> >>> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
> >>> +					qp->wr_cons) != 0) {
> >>> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
> >>> +		smp_rmb();
> >>> +
> >>> +		idx = ntb_edma_ring_idx(qp->wr_cons);
> >>> +		in = NTB_DESC_WR_RC_I(qp, idx);
> >>> +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
> >>> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> >>> +			break;
> >>> +
> >>> +		in->data = 0;
> >>> +
> >>> +		cb_data = entry->cb_data;
> >>> +		len = entry->len;
> >>> +
> >>> +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
> >>> +
> >>> +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
> >>> +			ntb_qp_link_down(qp);
> >>> +			continue;
> >>> +		}
> >>> +
> >>> +		ntb_transport_edma_notify_peer(qp);
> >>> +
> >>> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> >>> +
> >>> +		if (qp->rx_handler && qp->client_ready)
> >>> +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
> >>> +
> >>> +		/* stat updates */
> >>> +		qp->rx_bytes += len;
> >>> +		qp->rx_pkts++;
> >>> +	}
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_write_cb(void *data,
> >>> +					   const struct dmaengine_result *res)
> >>> +{
> >>> +	struct ntb_queue_entry *entry = data;
> >>> +	struct ntb_transport_qp *qp = entry->qp;
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +	enum dmaengine_tx_result dma_err = res->result;
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +
> >>> +	switch (dma_err) {
> >>> +	case DMA_TRANS_READ_FAILED:
> >>> +	case DMA_TRANS_WRITE_FAILED:
> >>> +	case DMA_TRANS_ABORTED:
> >>> +		entry->errors++;
> >>> +		entry->len = -EIO;
> >>> +		break;
> >>> +	case DMA_TRANS_NOERROR:
> >>> +	default:
> >>> +		break;
> >>> +	}
> >>> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
> >>> +	sg_dma_address(&entry->sgl) = 0;
> >>> +
> >>> +	entry->flags |= DESC_DONE_FLAG;
> >>> +
> >>> +	queue_work(nt->wq, &qp->write_work);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, read_work);
> >>> +	struct ntb_edma_desc *in, __iomem *out;
> >>> +	struct ntb_queue_entry *entry;
> >>> +	unsigned int len;
> >>> +	void *cb_data;
> >>> +	u32 idx;
> >>> +
> >>> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
> >>> +					qp->rd_cons) != 0) {
> >>> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
> >>> +		smp_rmb();
> >>> +
> >>> +		idx = ntb_edma_ring_idx(qp->rd_cons);
> >>> +		in = NTB_DESC_RD_RC_I(qp, idx);
> >>> +		entry = (struct ntb_queue_entry *)in->data;
> >>> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> >>> +			break;
> >>> +
> >>> +		in->data = 0;
> >>> +
> >>> +		cb_data = entry->cb_data;
> >>> +		len = entry->len;
> >>> +
> >>> +		out = NTB_DESC_RD_RC_O(qp, idx);
> >>> +
> >>> +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
> >>> +
> >>> +		/*
> >>> +		 * No need to add barrier in-between to enforce ordering here.
> >>> +		 * The other side proceeds only after both flags and tail are
> >>> +		 * updated.
> >>> +		 */
> >>> +		iowrite32(entry->flags, &out->flags);
> >>> +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
> >>> +
> >>> +		ntb_transport_edma_notify_peer(qp);
> >>> +
> >>> +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
> >>> +			     &qp->tx_free_q);
> >>> +
> >>> +		if (qp->tx_handler)
> >>> +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
> >>> +
> >>> +		/* stat updates */
> >>> +		qp->tx_bytes += len;
> >>> +		qp->tx_pkts++;
> >>> +	}
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_read_cb(void *data,
> >>> +					  const struct dmaengine_result *res)
> >>> +{
> >>> +	struct ntb_queue_entry *entry = data;
> >>> +	struct ntb_transport_qp *qp = entry->qp;
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	enum dmaengine_tx_result dma_err = res->result;
> >>> +
> >>> +	switch (dma_err) {
> >>> +	case DMA_TRANS_READ_FAILED:
> >>> +	case DMA_TRANS_WRITE_FAILED:
> >>> +	case DMA_TRANS_ABORTED:
> >>> +		entry->errors++;
> >>> +		entry->len = -EIO;
> >>> +		break;
> >>> +	case DMA_TRANS_NOERROR:
> >>> +	default:
> >>> +		break;
> >>> +	}
> >>> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
> >>> +	sg_dma_address(&entry->sgl) = 0;
> >>> +
> >>> +	entry->flags |= DESC_DONE_FLAG;
> >>> +
> >>> +	queue_work(nt->wq, &qp->read_work);
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_rc_write_start(struct device *d,
> >>> +					     struct dma_chan *chan, size_t len,
> >>> +					     dma_addr_t ep_src, void *rc_dst,
> >>> +					     struct ntb_queue_entry *entry)
> >>> +{
> >>> +	struct scatterlist *sgl = &entry->sgl;
> >>> +	struct dma_async_tx_descriptor *txd;
> >>> +	struct dma_slave_config cfg;
> >>> +	dma_cookie_t cookie;
> >>> +	int nents, rc;
> >>> +
> >>> +	if (!d)
> >>> +		return -ENODEV;
> >>> +
> >>> +	if (!chan)
> >>> +		return -ENXIO;
> >>> +
> >>> +	if (WARN_ON(!ep_src || !rc_dst))
> >>> +		return -EINVAL;
> >>> +
> >>> +	if (WARN_ON(sg_dma_address(sgl)))
> >>> +		return -EINVAL;
> >>> +
> >>> +	sg_init_one(sgl, rc_dst, len);
> >>> +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
> >>> +	if (nents <= 0)
> >>> +		return -EIO;
> >>> +
> >>> +	memset(&cfg, 0, sizeof(cfg));
> >>> +	cfg.src_addr       = ep_src;
> >>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.direction      = DMA_DEV_TO_MEM;
> >>> +	rc = dmaengine_slave_config(chan, &cfg);
> >>> +	if (rc)
> >>> +		goto out_unmap;
> >>> +
> >>> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
> >>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> >>> +	if (!txd) {
> >>> +		rc = -EIO;
> >>> +		goto out_unmap;
> >>> +	}
> >>> +
> >>> +	txd->callback_result = ntb_transport_edma_rc_write_cb;
> >>> +	txd->callback_param = entry;
> >>> +
> >>> +	cookie = dmaengine_submit(txd);
> >>> +	if (dma_submit_error(cookie)) {
> >>> +		rc = -EIO;
> >>> +		goto out_unmap;
> >>> +	}
> >>> +	dma_async_issue_pending(chan);
> >>> +	return 0;
> >>> +out_unmap:
> >>> +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
> >>> +	return rc;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_rc_read_start(struct device *d,
> >>> +					    struct dma_chan *chan, size_t len,
> >>> +					    void *rc_src, dma_addr_t ep_dst,
> >>> +					    struct ntb_queue_entry *entry)
> >>> +{
> >>> +	struct scatterlist *sgl = &entry->sgl;
> >>> +	struct dma_async_tx_descriptor *txd;
> >>> +	struct dma_slave_config cfg;
> >>> +	dma_cookie_t cookie;
> >>> +	int nents, rc;
> >>> +
> >>> +	if (!d)
> >>> +		return -ENODEV;
> >>> +
> >>> +	if (!chan)
> >>> +		return -ENXIO;
> >>> +
> >>> +	if (WARN_ON(!rc_src || !ep_dst))
> >>> +		return -EINVAL;
> >>> +
> >>> +	if (WARN_ON(sg_dma_address(sgl)))
> >>> +		return -EINVAL;
> >>> +
> >>> +	sg_init_one(sgl, rc_src, len);
> >>> +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
> >>> +	if (nents <= 0)
> >>> +		return -EIO;
> >>> +
> >>> +	memset(&cfg, 0, sizeof(cfg));
> >>> +	cfg.dst_addr       = ep_dst;
> >>> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> >>> +	cfg.direction      = DMA_MEM_TO_DEV;
> >>> +	rc = dmaengine_slave_config(chan, &cfg);
> >>> +	if (rc)
> >>> +		goto out_unmap;
> >>> +
> >>> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
> >>> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> >>> +	if (!txd) {
> >>> +		rc = -EIO;
> >>> +		goto out_unmap;
> >>> +	}
> >>> +
> >>> +	txd->callback_result = ntb_transport_edma_rc_read_cb;
> >>> +	txd->callback_param = entry;
> >>> +
> >>> +	cookie = dmaengine_submit(txd);
> >>> +	if (dma_submit_error(cookie)) {
> >>> +		rc = -EIO;
> >>> +		goto out_unmap;
> >>> +	}
> >>> +	dma_async_issue_pending(chan);
> >>> +	return 0;
> >>> +out_unmap:
> >>> +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
> >>> +	return rc;
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_queue_entry *entry = container_of(
> >>> +				work, struct ntb_queue_entry, dma_work);
> >>> +	struct ntb_transport_qp *qp = entry->qp;
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	struct dma_chan *chan;
> >>> +	int rc;
> >>> +
> >>> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
> >>> +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
> >>> +					       entry->addr, entry->buf, entry);
> >>> +	if (rc) {
> >>> +		entry->errors++;
> >>> +		entry->len = -EIO;
> >>> +		entry->flags |= DESC_DONE_FLAG;
> >>> +		queue_work(nt->wq, &qp->write_work);
> >>> +		return;
> >>> +	}
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +	unsigned int budget = NTB_EDMA_MAX_POLL;
> >>> +	struct ntb_queue_entry *entry;
> >>> +	struct ntb_edma_desc *in;
> >>> +	dma_addr_t ep_src;
> >>> +	u32 len, idx;
> >>> +
> >>> +	while (budget--) {
> >>> +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
> >>> +					     qp->wr_issue) == 0)
> >>> +			break;
> >>> +
> >>> +		idx = ntb_edma_ring_idx(qp->wr_issue);
> >>> +		in = NTB_DESC_WR_RC_I(qp, idx);
> >>> +
> >>> +		len = READ_ONCE(in->len);
> >>> +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
> >>> +
> >>> +		/* Prepare 'entry' for write completion */
> >>> +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
> >>> +		if (!entry) {
> >>> +			qp->rx_err_no_buf++;
> >>> +			break;
> >>> +		}
> >>> +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
> >>> +			entry->flags &= ~DESC_DONE_FLAG;
> >>> +		entry->len = len; /* NB. entry->len can be <=0 */
> >>> +		entry->addr = ep_src;
> >>> +
> >>> +		/*
> >>> +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
> >>> +		 * so it needs to be set before wr_issue++.
> >>> +		 */
> >>> +		in->data = (uintptr_t)entry;
> >>> +
> >>> +		/* Ensure in->data visible before wr_issue++ */
> >>> +		smp_wmb();
> >>> +
> >>> +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
> >>> +
> >>> +		if (!len) {
> >>> +			entry->flags |= DESC_DONE_FLAG;
> >>> +			queue_work(nt->wq, &qp->write_work);
> >>> +			continue;
> >>> +		}
> >>> +
> >>> +		if (in->flags & LINK_DOWN_FLAG) {
> >>> +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
> >>> +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
> >>> +			queue_work(nt->wq, &qp->write_work);
> >>> +			continue;
> >>> +		}
> >>> +
> >>> +		queue_work(nt->wq, &entry->dma_work);
> >>> +	}
> >>> +
> >>> +	if (!budget)
> >>> +		tasklet_schedule(&qp->rxc_db_work);
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
> >>> +					    struct ntb_queue_entry *entry)
> >>> +{
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +	struct ntb_edma_desc *in, __iomem *out;
> >>> +	unsigned int len = entry->len;
> >>> +	struct dma_chan *chan;
> >>> +	u32 issue, idx, head;
> >>> +	dma_addr_t ep_dst;
> >>> +	int rc;
> >>> +
> >>> +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
> >>> +
> >>> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> >>> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> >>> +		issue = qp->rd_issue;
> >>> +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
> >>> +			qp->tx_ring_full++;
> >>> +			return -ENOSPC;
> >>> +		}
> >>> +
> >>> +		/*
> >>> +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
> >>> +		 * so it needs to be set before rd_issue++.
> >>> +		 */
> >>> +		idx = ntb_edma_ring_idx(issue);
> >>> +		in = NTB_DESC_RD_RC_I(qp, idx);
> >>> +		in->data = (uintptr_t)entry;
> >>> +
> >>> +		/* Make in->data visible before rd_issue++ */
> >>> +		smp_wmb();
> >>> +
> >>> +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
> >>> +	}
> >>> +
> >>> +	/* Publish the final transfer length to the EP side */
> >>> +	out = NTB_DESC_RD_RC_O(qp, idx);
> >>> +	iowrite32(len, &out->len);
> >>> +	ioread32(&out->len);
> >>> +
> >>> +	if (unlikely(!len)) {
> >>> +		entry->flags |= DESC_DONE_FLAG;
> >>> +		queue_work(nt->wq, &qp->read_work);
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
> >>> +	dma_rmb();
> >>> +
> >>> +	/* kick remote eDMA read transfer */
> >>> +	ep_dst = (dma_addr_t)in->addr;
> >>> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
> >>> +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
> >>> +					      entry->buf, ep_dst, entry);
> >>> +	if (rc) {
> >>> +		entry->errors++;
> >>> +		entry->len = -EIO;
> >>> +		entry->flags |= DESC_DONE_FLAG;
> >>> +		queue_work(nt->wq, &qp->read_work);
> >>> +	}
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
> >>> +					    struct ntb_queue_entry *entry)
> >>> +{
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	struct ntb_edma_desc *in, __iomem *out;
> >>> +	unsigned int len = entry->len;
> >>> +	dma_addr_t ep_src = 0;
> >>> +	u32 idx;
> >>> +	int rc;
> >>> +
> >>> +	if (likely(len)) {
> >>> +		ep_src = dma_map_single(dma_dev, entry->buf, len,
> >>> +					DMA_TO_DEVICE);
> >>> +		rc = dma_mapping_error(dma_dev, ep_src);
> >>> +		if (rc)
> >>> +			return rc;
> >>> +	}
> >>> +
> >>> +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> >>> +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
> >>> +			rc = -ENOSPC;
> >>> +			qp->tx_ring_full++;
> >>> +			goto out_unmap;
> >>> +		}
> >>> +
> >>> +		idx = ntb_edma_ring_idx(qp->wr_prod);
> >>> +		in  = NTB_DESC_WR_EP_I(qp, idx);
> >>> +		out = NTB_DESC_WR_EP_O(qp, idx);
> >>> +
> >>> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> >>> +		WARN_ON(entry->flags & DESC_DONE_FLAG);
> >>> +		in->flags = 0;
> >>> +		in->data  = (uintptr_t)entry;
> >>> +		entry->addr  = ep_src;
> >>> +
> >>> +		iowrite32(len,          &out->len);
> >>> +		iowrite32(entry->flags, &out->flags);
> >>> +		iowrite64(ep_src,       &out->addr);
> >>> +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
> >>> +
> >>> +		dma_wmb();
> >>> +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
> >>> +
> >>> +		qp->tx_bytes += len;
> >>> +		qp->tx_pkts++;
> >>> +	}
> >>> +
> >>> +	ntb_transport_edma_notify_peer(qp);
> >>> +
> >>> +	return 0;
> >>> +out_unmap:
> >>> +	if (likely(len))
> >>> +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
> >>> +	return rc;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
> >>> +					 struct ntb_queue_entry *entry,
> >>> +					 void *cb, void *data, unsigned int len,
> >>> +					 unsigned int flags)
> >>> +{
> >>> +	struct device *dma_dev;
> >>> +
> >>> +	if (entry->addr) {
> >>> +		/* Deferred unmap */
> >>> +		dma_dev = get_dma_dev(qp->ndev);
> >>> +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
> >>> +	}
> >>> +
> >>> +	entry->cb_data = cb;
> >>> +	entry->buf = data;
> >>> +	entry->len = len;
> >>> +	entry->flags = flags;
> >>> +	entry->errors = 0;
> >>> +	entry->addr = 0;
> >>> +
> >>> +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
> >>> +
> >>> +	if (ntb_qp_edma_is_ep(qp))
> >>> +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
> >>> +	else
> >>> +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
> >>> +					    struct ntb_queue_entry *entry)
> >>> +{
> >>> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> >>> +	struct ntb_edma_desc *in, __iomem *out;
> >>> +	unsigned int len = entry->len;
> >>> +	void *data = entry->buf;
> >>> +	dma_addr_t ep_dst;
> >>> +	u32 idx;
> >>> +	int rc;
> >>> +
> >>> +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
> >>> +	rc = dma_mapping_error(dma_dev, ep_dst);
> >>> +	if (rc)
> >>> +		return rc;
> >>> +
> >>> +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
> >>> +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
> >>> +				       READ_ONCE(qp->rd_cons))) {
> >>> +			rc = -ENOSPC;
> >>> +			goto out_unmap;
> >>> +		}
> >>> +
> >>> +		idx = ntb_edma_ring_idx(qp->rd_prod);
> >>> +		in = NTB_DESC_RD_EP_I(qp, idx);
> >>> +		out = NTB_DESC_RD_EP_O(qp, idx);
> >>> +
> >>> +		iowrite32(len, &out->len);
> >>> +		iowrite64(ep_dst, &out->addr);
> >>> +
> >>> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> >>> +		in->data = (uintptr_t)entry;
> >>> +		entry->addr = ep_dst;
> >>> +
> >>> +		/* Ensure len/addr are visible before the head update */
> >>> +		dma_wmb();
> >>> +
> >>> +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
> >>> +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
> >>> +	}
> >>> +	return 0;
> >>> +out_unmap:
> >>> +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
> >>> +	return rc;
> >>> +}
> >>> +
> >>> +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
> >>> +					 struct ntb_queue_entry *entry)
> >>> +{
> >>> +	int rc;
> >>> +
> >>> +	/* The behaviour is the same as the default backend for RC side */
> >>> +	if (ntb_qp_edma_is_ep(qp)) {
> >>> +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
> >>> +		if (rc) {
> >>> +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> >>> +				     &qp->rx_free_q);
> >>> +			return rc;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
> >>> +
> >>> +	if (qp->active)
> >>> +		tasklet_schedule(&qp->rxc_db_work);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
> >>> +{
> >>> +	struct ntb_transport_ctx *nt = qp->transport;
> >>> +
> >>> +	if (ntb_qp_edma_is_rc(qp))
> >>> +		ntb_transport_edma_rc_poll(qp);
> >>> +	else if (ntb_qp_edma_is_ep(qp)) {
> >>> +		/*
> >>> +		 * Make sure we poll the rings even if an eDMA interrupt is
> >>> +		 * cleared on the RC side earlier.
> >>> +		 */
> >>> +		queue_work(nt->wq, &qp->read_work);
> >>> +		queue_work(nt->wq, &qp->write_work);
> >>> +	} else
> >>> +		/* Unreachable */
> >>> +		WARN_ON_ONCE(1);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_read_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, read_work);
> >>> +
> >>> +	if (ntb_qp_edma_is_rc(qp))
> >>> +		ntb_transport_edma_rc_read_complete_work(work);
> >>> +	else if (ntb_qp_edma_is_ep(qp))
> >>> +		ntb_transport_edma_ep_read_work(work);
> >>> +	else
> >>> +		/* Unreachable */
> >>> +		WARN_ON_ONCE(1);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_write_work(struct work_struct *work)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = container_of(
> >>> +				work, struct ntb_transport_qp, write_work);
> >>> +
> >>> +	if (ntb_qp_edma_is_rc(qp))
> >>> +		ntb_transport_edma_rc_write_complete_work(work);
> >>> +	else if (ntb_qp_edma_is_ep(qp))
> >>> +		ntb_transport_edma_ep_write_work(work);
> >>> +	else
> >>> +		/* Unreachable */
> >>> +		WARN_ON_ONCE(1);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> >>> +					  unsigned int qp_num)
> >>> +{
> >>> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> >>> +
> >>> +	qp->wr_cons = 0;
> >>> +	qp->rd_cons = 0;
> >>> +	qp->wr_prod = 0;
> >>> +	qp->rd_prod = 0;
> >>> +	qp->wr_issue = 0;
> >>> +	qp->rd_issue = 0;
> >>> +
> >>> +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
> >>> +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
> >>> +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
> >>> +}
> >>> +
> >>> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> >>> +					    struct ntb_transport_qp *qp)
> >>> +{
> >>> +	spin_lock_init(&qp->ep_tx_lock);
> >>> +	spin_lock_init(&qp->ep_rx_lock);
> >>> +	spin_lock_init(&qp->rc_lock);
> >>> +}
> >>> +
> >>> +static const struct ntb_transport_backend_ops edma_backend_ops = {
> >>> +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> >>> +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> >>> +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> >>> +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> >>> +	.rx_poll = ntb_transport_edma_rx_poll,
> >>> +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> >>> +};
> >>> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> >>> +
> >>>  /**
> >>>   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> >>>   * @qp: NTB transport layer queue to be enabled
> >>
>
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Frank Li 2 months, 1 week ago
On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> Add a new transport backend that uses a remote DesignWare eDMA engine
> located on the NTB endpoint to move data between host and endpoint.
>
> In this mode:
>
>   - The endpoint exposes a dedicated memory window that contains the
>     eDMA register block followed by a small control structure (struct
>     ntb_edma_info) and per-channel linked-list (LL) rings.
>
>   - On the endpoint side, ntb_edma_setup_mws() allocates the control
>     structure and LL rings in endpoint memory, then programs an inbound
>     iATU region so that the host can access them via a peer MW.
>
>   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
>     ntb_edma_info and configures a dw-edma DMA device to use the LL
>     rings provided by the endpoint.
>
>   - ntb_transport is extended with a new backend_ops implementation that
>     routes TX and RX enqueue/poll operations through the remote eDMA
>     rings while keeping the existing shared-memory backend intact.
>
>   - The host signals the endpoint via a dedicated DMA read channel.
>     'use_msi' module option is ignored when 'use_remote_edma=1'.
>
> The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
> module parameter (use_remote_edma). When disabled, the existing
> ntb_transport behaviour is unchanged.
>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  drivers/ntb/Kconfig                           |   11 +
>  drivers/ntb/Makefile                          |    3 +
>  drivers/ntb/ntb_edma.c                        |  628 ++++++++
>  drivers/ntb/ntb_edma.h                        |  128 ++
>  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
>  5 files changed, 2048 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/ntb/ntb_edma.c
>  create mode 100644 drivers/ntb/ntb_edma.h
>  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
>
> diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
> index df16c755b4da..db63f02bb116 100644
> --- a/drivers/ntb/Kconfig
> +++ b/drivers/ntb/Kconfig
> @@ -37,4 +37,15 @@ config NTB_TRANSPORT
>
>  	 If unsure, say N.
>
> +config NTB_TRANSPORT_EDMA
> +	bool "NTB Transport backed by remote eDMA"
> +	depends on NTB_TRANSPORT
> +	depends on PCI
> +	select DMA_ENGINE
> +	help
> +	  Enable a transport backend that uses a remote DesignWare eDMA engine
> +	  exposed through a dedicated NTB memory window. The host uses the
> +	  endpoint's eDMA engine to move data in both directions.
> +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
> +
>  endif # NTB
> diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
> index 3a6fa181ff99..51f0e1e3aec7 100644
> --- a/drivers/ntb/Makefile
> +++ b/drivers/ntb/Makefile
> @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
>
>  ntb-y			:= core.o
>  ntb-$(CONFIG_NTB_MSI)	+= msi.o
> +
> +ntb_transport-y					:= ntb_transport_core.o
> +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
> diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
> new file mode 100644
> index 000000000000..cb35e0d56aa8
> --- /dev/null
> +++ b/drivers/ntb/ntb_edma.c
> @@ -0,0 +1,628 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/pci.h>
> +#include <linux/ntb.h>
> +#include <linux/io.h>
> +#include <linux/iommu.h>
> +#include <linux/dmaengine.h>
> +#include <linux/pci-epc.h>
> +#include <linux/dma/edma.h>
> +#include <linux/irq.h>
> +#include <linux/irqdomain.h>
> +#include <linux/of.h>
> +#include <linux/of_irq.h>
> +#include <dt-bindings/interrupt-controller/arm-gic.h>
> +
> +#include "ntb_edma.h"
> +
> +/*
> + * The interrupt register offsets below are taken from the DesignWare
> + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> + * backend currently only supports this layout.
> + */
> +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> +#define DMA_WRITE_INT_MASK_OFF     0x54
> +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> +#define DMA_READ_INT_STATUS_OFF    0xa0
> +#define DMA_READ_INT_MASK_OFF      0xa8
> +#define DMA_READ_INT_CLEAR_OFF     0xac

Not sure why need access EDMA register because EMDA driver already export
as dmaengine driver.

> +
> +#define NTB_EDMA_NOTIFY_MAX_QP		64
> +
> +static unsigned int edma_spi = 417; /* 0x1a1 */
> +module_param(edma_spi, uint, 0644);
> +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
> +
> +static u64 edma_regs_phys = 0xe65d5000;
> +module_param(edma_regs_phys, ullong, 0644);
> +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
> +
> +static unsigned long edma_regs_size = 0x1200;
> +module_param(edma_regs_size, ulong, 0644);
> +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
> +
> +struct ntb_edma_intr {
> +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
> +};
> +
> +struct ntb_edma_ctx {
> +	void *ll_wr_virt[EDMA_WR_CH_NUM];
> +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
> +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
> +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
> +
> +	struct ntb_edma_intr *intr_ep_virt;
> +	dma_addr_t intr_ep_phys;
> +	struct ntb_edma_intr *intr_rc_virt;
> +	dma_addr_t intr_rc_phys;
> +	u32 notify_qp_max;
> +
> +	bool initialized;
> +};
> +
> +static struct ntb_edma_ctx edma_ctx;
> +
> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> +
> +struct ntb_edma_interrupt {
> +	int virq;
> +	void __iomem *base;
> +	ntb_edma_interrupt_cb_t cb;
> +	void *data;
> +};
> +
> +static struct ntb_edma_interrupt ntb_edma_intr;
> +
> +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
> +{
> +	struct device_node *np = dev_of_node(dev);
> +	struct device_node *parent;
> +	struct irq_fwspec fwspec = { 0 };
> +	int virq;
> +
> +	parent = of_irq_find_parent(np);
> +	if (!parent)
> +		return -ENODEV;
> +
> +	fwspec.fwnode      = of_fwnode_handle(parent);
> +	fwspec.param_count = 3;
> +	fwspec.param[0]    = GIC_SPI;
> +	fwspec.param[1]    = spi;
> +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
> +
> +	virq = irq_create_fwspec_mapping(&fwspec);
> +	of_node_put(parent);
> +	return (virq > 0) ? virq : -EINVAL;
> +}
> +
> +static irqreturn_t ntb_edma_isr(int irq, void *data)
> +{

Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
just register callback for dmeengine.

> +	struct ntb_edma_interrupt *v = data;
> +	u32 mask = BIT(EDMA_RD_CH_NUM);
> +	u32 i, val;
> +
> +	/*
> +	 * We do not ack interrupts here but instead we mask all local interrupt
> +	 * sources except the read channel used for notification. This reduces
> +	 * needless ISR invocations.
> +	 *
> +	 * In theory we could configure LIE=1/RIE=0 only for the notification
> +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
> +	 * require intrusive changes to the dw-edma core.
> +	 *
> +	 * Note: The host side may have already cleared the read interrupt used
> +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
> +	 * way to detect it. As a result, we cannot reliably tell which specific
> +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
> +	 * instead.
> +	 */
> +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
> +
> +	if (!v->cb || !edma_ctx.intr_ep_virt)
> +		return IRQ_HANDLED;
> +
> +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
> +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
> +		if (!val)
> +			continue;
> +
> +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
> +		v->cb(v->data, i);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
...
> +
> +int ntb_edma_setup_peer(struct ntb_dev *ndev)
> +{
> +	struct ntb_edma_info *info;
> +	unsigned int wr_cnt, rd_cnt;
> +	struct dw_edma_chip *chip;
> +	void __iomem *edma_virt;
> +	phys_addr_t edma_phys;
> +	resource_size_t mw_size;
> +	u64 off = EDMA_REG_SIZE;
> +	int peer_mw, mw_index;
> +	unsigned int i;
> +	int ret;
> +
> +	peer_mw = ntb_peer_mw_count(ndev);
> +	if (peer_mw <= 0)
> +		return -ENODEV;
> +
> +	mw_index = peer_mw - 1; /* last MW */
> +
> +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
> +				   &mw_size);
> +	if (ret)
> +		return -1;
> +
> +	edma_virt = ioremap(edma_phys, mw_size);
> +
> +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
> +	if (!chip) {
> +		ret = -ENOMEM;
> +		return ret;
> +	}
> +
> +	chip->dev = &ndev->pdev->dev;
> +	chip->nr_irqs = 4;
> +	chip->ops = &ntb_edma_ops;
> +	chip->flags = 0;
> +	chip->reg_base = edma_virt;
> +	chip->mf = EDMA_MF_EDMA_UNROLL;
> +
> +	info = edma_virt + off;
> +	if (info->magic != NTB_EDMA_INFO_MAGIC)
> +		return -EINVAL;
> +	wr_cnt = info->wr_cnt;
> +	rd_cnt = info->rd_cnt;
> +	chip->ll_wr_cnt = wr_cnt;
> +	chip->ll_rd_cnt = rd_cnt;
> +	off += PAGE_SIZE;
> +
> +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> +	edma_ctx.intr_ep_phys = info->intr_dar_base;
> +	if (edma_ctx.intr_ep_phys) {
> +		edma_ctx.intr_rc_virt =
> +			dma_alloc_coherent(&ndev->pdev->dev,
> +					   sizeof(struct ntb_edma_intr),
> +					   &edma_ctx.intr_rc_phys,
> +					   GFP_KERNEL);
> +		if (!edma_ctx.intr_rc_virt)
> +			return -ENOMEM;
> +		memset(edma_ctx.intr_rc_virt, 0,
> +		       sizeof(struct ntb_edma_intr));
> +	}
> +
> +	for (i = 0; i < wr_cnt; i++) {
> +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
> +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
> +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
> +		off += DMA_LLP_MEM_SIZE;
> +	}
> +	for (i = 0; i < rd_cnt; i++) {
> +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
> +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
> +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
> +		off += DMA_LLP_MEM_SIZE;
> +	}
> +
> +	if (!pci_dev_msi_enabled(ndev->pdev))
> +		return -ENXIO;
> +
> +	ret = dw_edma_probe(chip);

I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
dma engine support.

EP side, suppose default dwc controller driver already setup edma engine,
so use correct filter function, you should get dma chan.

Frank

> +	if (ret) {
> +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +struct ntb_edma_filter {
> +	struct device *dma_dev;
> +	u32 direction;
> +};
> +
> +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
> +{
> +	struct ntb_edma_filter *filter = arg;
> +	u32 dir = filter->direction;
> +	struct dma_slave_caps caps;
> +	int ret;
> +
> +	if (chan->device->dev != filter->dma_dev)
> +		return false;
> +
> +	ret = dma_get_slave_caps(chan, &caps);
> +	if (ret < 0)
> +		return false;
> +
> +	return !!(caps.directions & dir);
> +}
> +
> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < edma->num_wr_chan; i++)
> +		dma_release_channel(edma->wr_chan[i]);
> +
> +	for (i = 0; i < edma->num_rd_chan; i++)
> +		dma_release_channel(edma->rd_chan[i]);
> +
> +	if (edma->intr_chan)
> +		dma_release_channel(edma->intr_chan);
> +}
> +
> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
> +{
> +	struct ntb_edma_filter filter;
> +	dma_cap_mask_t dma_mask;
> +	unsigned int i;
> +
> +	dma_cap_zero(dma_mask);
> +	dma_cap_set(DMA_SLAVE, dma_mask);
> +
> +	memset(edma, 0, sizeof(*edma));
> +	edma->dev = dma_dev;
> +
> +	filter.dma_dev = dma_dev;
> +	filter.direction = BIT(DMA_DEV_TO_MEM);
> +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> +		edma->wr_chan[i] = dma_request_channel(dma_mask,
> +						       ntb_edma_filter_fn,
> +						       &filter);
> +		if (!edma->wr_chan[i])
> +			break;
> +		edma->num_wr_chan++;
> +	}
> +
> +	filter.direction = BIT(DMA_MEM_TO_DEV);
> +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
> +		edma->rd_chan[i] = dma_request_channel(dma_mask,
> +						       ntb_edma_filter_fn,
> +						       &filter);
> +		if (!edma->rd_chan[i])
> +			break;
> +		edma->num_rd_chan++;
> +	}
> +
> +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
> +					      &filter);
> +	if (!edma->intr_chan)
> +		dev_warn(dma_dev,
> +			 "Remote eDMA notify channel could not be allocated\n");
> +
> +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
> +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
> +		ntb_edma_teardown_chans(edma);
> +		return -ENODEV;
> +	}
> +	return 0;
> +}
> +
> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> +				    remote_edma_dir_t dir)
> +{
> +	unsigned int n, cur, idx;
> +	struct dma_chan **chans;
> +	atomic_t *cur_chan;
> +
> +	if (dir == REMOTE_EDMA_WRITE) {
> +		n = edma->num_wr_chan;
> +		chans = edma->wr_chan;
> +		cur_chan = &edma->cur_wr_chan;
> +	} else {
> +		n = edma->num_rd_chan;
> +		chans = edma->rd_chan;
> +		cur_chan = &edma->cur_rd_chan;
> +	}
> +	if (WARN_ON_ONCE(!n))
> +		return NULL;
> +
> +	/* Simple round-robin */
> +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
> +	idx = cur % n;
> +	return chans[idx];
> +}
> +
> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
> +{
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	struct scatterlist sgl;
> +	dma_cookie_t cookie;
> +	struct device *dev;
> +
> +	if (!edma || !edma->intr_chan)
> +		return -ENXIO;
> +
> +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
> +		return -EINVAL;
> +
> +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
> +		return -EINVAL;
> +
> +	dev = edma->dev;
> +	if (!dev)
> +		return -ENODEV;
> +
> +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
> +
> +	/* Ensure store is visible before kicking the DMA transfer */
> +	wmb();
> +
> +	sg_init_table(&sgl, 1);
> +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
> +	sg_dma_len(&sgl) = sizeof(u32);
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_MEM_TO_DEV;
> +
> +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
> +		return -EINVAL;
> +
> +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
> +				      DMA_MEM_TO_DEV,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd)
> +		return -ENOSPC;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie))
> +		return -ENOSPC;
> +
> +	dma_async_issue_pending(edma->intr_chan);
> +	return 0;
> +}
> diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
> new file mode 100644
> index 000000000000..da0451827edb
> --- /dev/null
> +++ b/drivers/ntb/ntb_edma.h
> @@ -0,0 +1,128 @@
> +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
> +#ifndef _NTB_EDMA_H_
> +#define _NTB_EDMA_H_
> +
> +#include <linux/completion.h>
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +
> +#define EDMA_REG_SIZE		SZ_64K
> +#define DMA_LLP_MEM_SIZE	SZ_4K
> +#define EDMA_WR_CH_NUM		4
> +#define EDMA_RD_CH_NUM		4
> +#define NTB_EDMA_MAX_CH		8
> +
> +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
> +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
> +
> +#define NTB_EDMA_RING_ORDER	7
> +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
> +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
> +
> +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> +
> +/*
> + * REMOTE_EDMA_EP:
> + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
> + *
> + * REMOTE_EDMA_RC:
> + *   Root Complex controls the endpoint eDMA through the shared MW and
> + *   drives reads/writes on behalf of the host.
> + */
> +typedef enum {
> +	REMOTE_EDMA_UNKNOWN,
> +	REMOTE_EDMA_EP,
> +	REMOTE_EDMA_RC,
> +} remote_edma_mode_t;
> +
> +typedef enum {
> +	REMOTE_EDMA_WRITE,
> +	REMOTE_EDMA_READ,
> +} remote_edma_dir_t;
> +
> +/*
> + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
> + *
> + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
> + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
> + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
> + *                                RC configures via dw_edma)
> + *
> + * ntb_edma_setup_mws() on EP:
> + *   - allocates ntb_edma_info and LLs in EP memory
> + *   - programs inbound iATU so that RC peer MW[n] points at this block
> + *
> + * ntb_edma_setup_peer() on RC:
> + *   - ioremaps peer MW[n]
> + *   - reads ntb_edma_info
> + *   - sets up dw_edma_chip ll_region_* from that info
> + */
> +struct ntb_edma_info {
> +	u32 magic;
> +	u16 wr_cnt;
> +	u16 rd_cnt;
> +	u64 regs_phys;
> +	u32 ll_stride;
> +	u32 rsvd;
> +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
> +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
> +
> +	u64 intr_dar_base;
> +} __packed;
> +
> +struct ll_dma_addrs {
> +	dma_addr_t wr[EDMA_WR_CH_NUM];
> +	dma_addr_t rd[EDMA_RD_CH_NUM];
> +};
> +
> +struct ntb_edma_chans {
> +	struct device *dev;
> +
> +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
> +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
> +	struct dma_chan *intr_chan;
> +
> +	unsigned int num_wr_chan;
> +	unsigned int num_rd_chan;
> +	atomic_t cur_wr_chan;
> +	atomic_t cur_rd_chan;
> +};
> +
> +static __always_inline u32 ntb_edma_ring_idx(u32 v)
> +{
> +	return v & NTB_EDMA_RING_MASK;
> +}
> +
> +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
> +{
> +	if (head >= tail) {
> +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
> +		return head - tail;
> +	}
> +
> +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
> +	return U32_MAX - tail + head + 1;
> +}
> +
> +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
> +{
> +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
> +}
> +
> +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
> +{
> +	return ntb_edma_ring_free_entry(head, tail) == 0;
> +}
> +
> +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> +		       ntb_edma_interrupt_cb_t cb, void *data);
> +void ntb_edma_teardown_isr(struct device *dev);
> +int ntb_edma_setup_mws(struct ntb_dev *ndev);
> +int ntb_edma_setup_peer(struct ntb_dev *ndev);
> +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
> +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> +				    remote_edma_dir_t dir);
> +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
> +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
> +
> +#endif
> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
> similarity index 65%
> rename from drivers/ntb/ntb_transport.c
> rename to drivers/ntb/ntb_transport_core.c
> index 907db6c93d4d..48d48921978d 100644
> --- a/drivers/ntb/ntb_transport.c
> +++ b/drivers/ntb/ntb_transport_core.c
> @@ -47,6 +47,9 @@
>   * Contact Information:
>   * Jon Mason <jon.mason@intel.com>
>   */
> +#include <linux/atomic.h>
> +#include <linux/bug.h>
> +#include <linux/compiler.h>
>  #include <linux/debugfs.h>
>  #include <linux/delay.h>
>  #include <linux/dmaengine.h>
> @@ -71,6 +74,8 @@
>  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
>  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
>
> +#define NTB_EDMA_MAX_POLL		32
> +
>  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
>  MODULE_VERSION(NTB_TRANSPORT_VER);
>  MODULE_LICENSE("Dual BSD/GPL");
> @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
>  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
>  #endif
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +#include "ntb_edma.h"
> +static bool use_remote_edma;
> +module_param(use_remote_edma, bool, 0644);
> +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
> +#endif
> +
>  static struct dentry *nt_debugfs_dir;
>
>  /* Only two-ports NTB devices are supported */
> @@ -125,6 +137,14 @@ struct ntb_queue_entry {
>  		struct ntb_payload_header __iomem *tx_hdr;
>  		struct ntb_payload_header *rx_hdr;
>  	};
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	dma_addr_t addr;
> +
> +	/* Used by RC side only */
> +	struct scatterlist sgl;
> +	struct work_struct dma_work;
> +#endif
>  };
>
>  struct ntb_rx_info {
> @@ -202,6 +222,33 @@ struct ntb_transport_qp {
>  	int msi_irq;
>  	struct ntb_msi_desc msi_desc;
>  	struct ntb_msi_desc peer_msi_desc;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	/*
> +	 * For ensuring peer notification in non-atomic context.
> +	 * ntb_peer_db_set might sleep or schedule.
> +	 */
> +	struct work_struct db_work;
> +
> +	/*
> +	 * wr: remote eDMA write transfer (EP -> RC direction)
> +	 * rd: remote eDMA read transfer (RC -> EP direction)
> +	 */
> +	u32 wr_cons;
> +	u32 rd_cons;
> +	u32 wr_prod;
> +	u32 rd_prod;
> +	u32 wr_issue;
> +	u32 rd_issue;
> +
> +	spinlock_t ep_tx_lock;
> +	spinlock_t ep_rx_lock;
> +	spinlock_t rc_lock;
> +
> +	/* Completion work for read/write transfers. */
> +	struct work_struct read_work;
> +	struct work_struct write_work;
> +#endif
>  };
>
>  struct ntb_transport_mw {
> @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
>
>  	/* Make sure workq of link event be executed serially */
>  	struct mutex link_event_lock;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	remote_edma_mode_t remote_edma_mode;
> +	struct device *dma_dev;
> +	struct workqueue_struct *wq;
> +	struct ntb_edma_chans edma;
> +#endif
>  };
>
>  enum {
> @@ -262,6 +316,19 @@ struct ntb_payload_header {
>  	unsigned int flags;
>  };
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> +				   unsigned int *mw_count);
> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num);
> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> +					    struct ntb_transport_qp *qp);
> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> +
>  /*
>   * Return the device that should be used for DMA mapping.
>   *
> @@ -298,7 +365,7 @@ enum {
>  	container_of((__drv), struct ntb_transport_client, driver)
>
>  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
> -#define NTB_QP_DEF_NUM_ENTRIES	100
> +#define NTB_QP_DEF_NUM_ENTRIES	128
>  #define NTB_LINK_DOWN_TIMEOUT	10
>
>  static void ntb_transport_rxc_db(unsigned long data);
> @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
>  	count = ntb_spad_count(nt->ndev);
>  	for (i = 0; i < count; i++)
>  		ntb_spad_write(nt->ndev, i, 0);
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_edma_teardown_chans(&nt->edma);
> +#endif
>  }
>
>  static void ntb_transport_link_cleanup_work(struct work_struct *work)
> @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>
>  	/* send the local info, in the opposite order of the way we read it */
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	rc = ntb_transport_edma_ep_init(nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
> +		return;
> +	}
> +#endif
> +
>  	if (nt->use_msi) {
>  		rc = ntb_msi_setup_mws(ndev);
>  		if (rc) {
> @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
>
>  	nt->link_is_up = true;
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	rc = ntb_transport_edma_rc_init(nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
> +		goto out1;
> +	}
> +#endif
> +
>  	for (i = 0; i < nt->qp_count; i++) {
>  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
>
> @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
>  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
>  };
>
> +static const struct ntb_transport_backend_ops edma_backend_ops;
> +
>  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  {
>  	struct ntb_transport_ctx *nt;
> @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>
>  	nt->ndev = ndev;
>
> -	nt->backend_ops = default_backend_ops;
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	if (use_remote_edma) {
> +		rc = ntb_transport_edma_init(nt, &mw_count);
> +		if (rc) {
> +			nt->mw_count = 0;
> +			goto err;
> +		}
> +		nt->backend_ops = edma_backend_ops;
> +
> +		/*
> +		 * On remote eDMA mode, we reserve a read channel for Host->EP
> +		 * interruption.
> +		 */
> +		use_msi = false;
> +	} else
> +#endif
> +		nt->backend_ops = default_backend_ops;
>
>  	/*
>  	 * If we are using MSI, and have at least one extra memory window,
> @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  		rc = ntb_transport_init_queue(nt, i);
>  		if (rc)
>  			goto err2;
> +
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +		ntb_transport_edma_init_queue(nt, i);
> +#endif
>  	}
>
>  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
> @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
>  	}
>  	kfree(nt->mw_vec);
>  err:
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_transport_edma_uninit(nt);
> +#endif
>  	kfree(nt);
>  	return rc;
>  }
> @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>
>  	nt->qp_bitmap_free &= ~qp_bit;
>
> +	qp->qp_bit = qp_bit;
>  	qp->cb_data = data;
>  	qp->rx_handler = handlers->rx_handler;
>  	qp->tx_handler = handlers->tx_handler;
>  	qp->event_handler = handlers->event_handler;
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	ntb_transport_edma_create_queue(nt, qp);
> +#endif
> +
>  	dma_cap_zero(dma_mask);
>  	dma_cap_set(DMA_MEMCPY, dma_mask);
>
> @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
>  			goto err1;
>
>  		entry->qp = qp;
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> +#endif
>  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
>  			     &qp->rx_free_q);
>  	}
> @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
>   */
>  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>  {
> -	struct pci_dev *pdev;
>  	struct ntb_queue_entry *entry;
> +	struct pci_dev *pdev;
>  	u64 qp_bit;
>
>  	if (!qp)
> @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
>  	tasklet_kill(&qp->rxc_db_work);
>
>  	cancel_delayed_work_sync(&qp->link_work);
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +	cancel_work_sync(&qp->read_work);
> +	cancel_work_sync(&qp->write_work);
> +#endif
>
>  	qp->cb_data = NULL;
>  	qp->rx_handler = NULL;
> @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
>  }
>  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
>
> +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> +/*
> + * Remote eDMA mode implementation
> + */
> +struct ntb_edma_desc {
> +	u32 len;
> +	u32 flags;
> +	u64 addr; /* DMA address */
> +	u64 data;
> +};
> +
> +struct ntb_edma_ring {
> +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
> +	u32 head;
> +	u32 tail;
> +};
> +
> +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
> +
> +#define __NTB_EDMA_CHECK_INDEX(_i)					\
> +({									\
> +	unsigned long __i = (unsigned long)(_i);			\
> +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
> +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
> +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
> +	__i;								\
> +})
> +
> +#define NTB_EDMA_DESC_I(qp, i, n)					\
> +({									\
> +	typeof(qp) __qp = (qp);						\
> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> +	(struct ntb_edma_desc *)					\
> +		((char *)(__qp)->rx_buff +				\
> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> +		 NTB_EDMA_DESC_OFF(__i));				\
> +})
> +
> +#define NTB_EDMA_DESC_O(qp, i, n)					\
> +({									\
> +	typeof(qp) __qp = (qp);						\
> +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> +	(struct ntb_edma_desc __iomem *)				\
> +		((char __iomem *)(__qp)->tx_mw +			\
> +		 (sizeof(struct ntb_edma_ring) * n) +			\
> +		 NTB_EDMA_DESC_OFF(__i));				\
> +})
> +
> +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, head)))
> +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, head)))
> +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, tail)))
> +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> +				(sizeof(struct ntb_edma_ring) * n) +	\
> +				offsetof(struct ntb_edma_ring, tail)))
> +
> +/*
> + * Macro naming rule:
> + *   NTB_DESC_RD_EP_I (as an example)
> + *            ^^ ^^ ^
> + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
> + *            :  `----- Who uses this macro.
> + *            `-------- DESC / HEAD / TAIL
> + *
> + * Read transfers (RC->EP):
> + *
> + *   EP view (outbound, written via NTB):
> + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
> + *           [ len ][ flags ][ addr ][ data ]
> + *           [ len ][ flags ][ addr ][ data ]
> + *           :
> + *           [ len ][ flags ][ addr ][ data ]
> + *       - head: NTB_HEAD_RD_EP_O(qp)
> + *       - tail: NTB_TAIL_RD_EP_I(qp)
> + *
> + *   RC view (inbound, local mapping):
> + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
> + *           [ len ][ flags ][ addr ][ data ]
> + *           [ len ][ flags ][ addr ][ data ]
> + *           :
> + *           [ len ][ flags ][ addr ][ data ]
> + *       - head: NTB_HEAD_RD_RC_I(qp)
> + *       - tail: NTB_TAIL_RD_RC_O(qp)
> + *
> + * Write transfers (EP -> RC) are analogous but use
> + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
> + * and NTB_TAIL_WR_{EP_I,RC_O}().
> + */
> +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> +
> +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
> +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
> +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
> +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
> +
> +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
> +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
> +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
> +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
> +
> +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
> +{
> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
> +}
> +
> +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
> +{
> +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
> +}
> +
> +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
> +{
> +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
> +}
> +
> +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
> +{
> +	unsigned int head, tail;
> +
> +	if (ntb_qp_edma_is_ep(qp)) {
> +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> +			/* In this scope, only 'head' might proceed */
> +			tail = READ_ONCE(qp->wr_cons);
> +			head = READ_ONCE(qp->wr_prod);
> +		}
> +		return ntb_edma_ring_free_entry(head, tail);
> +	}
> +
> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> +		/* In this scope, only 'head' might proceed */
> +		tail = READ_ONCE(qp->rd_issue);
> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> +	}
> +	/*
> +	 * On RC side, 'used' amount indicates how much EP side
> +	 * has refilled, which are available for us to use for TX.
> +	 */
> +	return ntb_edma_ring_used_entry(head, tail);
> +}
> +
> +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
> +						  struct ntb_transport_qp *qp)
> +{
> +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
> +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
> +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
> +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
> +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
> +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
> +
> +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
> +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
> +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
> +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
> +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
> +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
> +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
> +	seq_putc(s, '\n');
> +
> +	seq_puts(s, "Using Remote eDMA - Yes\n");
> +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
> +}
> +
> +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +
> +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
> +		ntb_edma_teardown_isr(&ndev->pdev->dev);
> +
/pr> +	if (nt->wq)
> +		destroy_workqueue(nt->wq);
> +	nt->wq = NULL;
> +}
> +
> +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> +				   unsigned int *mw_count)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +
> +	/*
> +	 * We need at least one MW for the transport plus one MW reserved
> +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
> +	 */
> +	if (*mw_count <= 1) {
> +		dev_err(&ndev->dev,
> +			"remote eDMA requires at least two MWS (have %u)\n",
> +			*mw_count);
> +		return -ENODEV;
> +	}
> +
> +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
> +	if (!nt->wq) {
> +		ntb_transport_edma_uninit(nt);
> +		return -ENOMEM;
> +	}
> +
> +	/* Reserve the last peer MW exclusively for the eDMA window. */
> +	*mw_count -= 1;
> +
> +	return 0;
> +}
> +
> +static void ntb_transport_edma_db_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp =
> +			container_of(work, struct ntb_transport_qp, db_work);
> +
> +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
> +}
> +
> +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
> +{
> +	if (ntb_qp_edma_is_rc(qp))
> +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
> +			return;
> +
> +	/*
> +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
> +	 * may sleep, delegate the actual doorbell write to a workqueue.
> +	 */
> +	queue_work(system_highpri_wq, &qp->db_work);
> +}
> +
> +static void ntb_transport_edma_isr(void *data, int qp_num)
> +{
> +	struct ntb_transport_ctx *nt = data;
> +	struct ntb_transport_qp *qp;
> +
> +	if (qp_num < 0 || qp_num >= nt->qp_count)
> +		return;
> +
> +	qp = &nt->qp_vec[qp_num];
> +	if (WARN_ON(!qp))
> +		return;
> +
> +	queue_work(nt->wq, &qp->read_work);
> +	queue_work(nt->wq, &qp->write_work);
> +}
> +
> +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct pci_dev *pdev = ndev->pdev;
> +	int rc;
> +
> +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
> +		return 0;
> +
> +	rc = ntb_edma_setup_peer(ndev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
> +		return rc;
> +	}
> +
> +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
> +		return rc;
> +	}
> +
> +	nt->remote_edma_mode = REMOTE_EDMA_RC;
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
> +{
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct pci_dev *pdev = ndev->pdev;
> +	struct pci_epc *epc;
> +	int rc;
> +
> +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
> +		return 0;
> +
> +	/* Only EP side can return pci_epc */
> +	epc = ntb_get_pci_epc(ndev);
> +	if (!epc)
> +		return 0;
> +
> +	rc = ntb_edma_setup_mws(ndev);
> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"Failed to set up memory window for eDMA: %d\n", rc);
> +		return rc;
> +	}
> +
> +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
> +		return rc;
> +	}
> +
> +	nt->remote_edma_mode = REMOTE_EDMA_EP;
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num)
> +{
> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> +	struct ntb_dev *ndev = nt->ndev;
> +	struct ntb_queue_entry *entry;
> +	struct ntb_transport_mw *mw;
> +	unsigned int mw_num, mw_count, qp_count;
> +	unsigned int qp_offset, rx_info_offset;
> +	unsigned int mw_size, mw_size_per_qp;
> +	unsigned int num_qps_mw;
> +	size_t edma_total;
> +	unsigned int i;
> +	int node;
> +
> +	mw_count = nt->mw_count;
> +	qp_count = nt->qp_count;
> +
> +	mw_num = QP_TO_MW(nt, qp_num);
> +	mw = &nt->mw_vec[mw_num];
> +
> +	if (!mw->virt_addr)
> +		return -ENOMEM;
> +
> +	if (mw_num < qp_count % mw_count)
> +		num_qps_mw = qp_count / mw_count + 1;
> +	else
> +		num_qps_mw = qp_count / mw_count;
> +
> +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
> +	if (max_mw_size && mw_size > max_mw_size)
> +		mw_size = max_mw_size;
> +
> +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
> +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
> +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
> +
> +	qp->tx_mw_size = mw_size_per_qp;
> +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> +	if (!qp->tx_mw)
> +		return -EINVAL;
> +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
> +	if (!qp->tx_mw_phys)
> +		return -EINVAL;
> +	qp->rx_info = qp->tx_mw + rx_info_offset;
> +	qp->rx_buff = mw->virt_addr + qp_offset;
> +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
> +
> +	/* Due to housekeeping, there must be at least 2 buffs */
> +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> +
> +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
> +	edma_total = 2 * sizeof(struct ntb_edma_ring);
> +	if (rx_info_offset < edma_total) {
> +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
> +			edma_total, rx_info_offset);
> +		return -EINVAL;
> +	}
> +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
> +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
> +
> +	/*
> +	 * Checking to see if we have more entries than the default.
> +	 * We should add additional entries if that is the case so we
> +	 * can be in sync with the transport frames.
> +	 */
> +	node = dev_to_node(&ndev->dev);
> +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
> +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
> +		if (!entry)
> +			return -ENOMEM;
> +
> +		entry->qp = qp;
> +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> +			     &qp->rx_free_q);
> +		qp->rx_alloc_entry++;
> +	}
> +
> +	memset(qp->rx_buff, 0, edma_total);
> +
> +	qp->rx_pkts = 0;
> +	qp->tx_pkts = 0;
> +
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	unsigned int len;
> +	u32 idx;
> +
> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
> +				     qp->rd_cons) == 0)
> +		return 0;
> +
> +	idx = ntb_edma_ring_idx(qp->rd_cons);
> +	in = NTB_DESC_RD_EP_I(qp, idx);
> +	if (!(in->flags & DESC_DONE_FLAG))
> +		return 0;
> +
> +	in->flags = 0;
> +	len = in->len; /* might be smaller than entry->len */
> +
> +	entry = (struct ntb_queue_entry *)(in->data);
> +	if (WARN_ON(!entry))
> +		return 0;
> +
> +	if (in->flags & LINK_DOWN_FLAG) {
> +		ntb_qp_link_down(qp);
> +		qp->rd_cons++;
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +		return 1;
> +	}
> +
> +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
> +
> +	qp->rx_bytes += len;
> +	qp->rx_pkts++;
> +	qp->rd_cons++;
> +
> +	if (qp->rx_handler && qp->client_ready)
> +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
> +
> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +	return 1;
> +}
> +
> +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	u32 idx;
> +
> +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
> +				     qp->wr_cons) == 0)
> +		return 0;
> +
> +	idx = ntb_edma_ring_idx(qp->wr_cons);
> +	in = NTB_DESC_WR_EP_I(qp, idx);
> +
> +	entry = (struct ntb_queue_entry *)(in->data);
> +	if (WARN_ON(!entry))
> +		return 0;
> +
> +	qp->wr_cons++;
> +
> +	if (qp->tx_handler)
> +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
> +
> +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
> +	return 1;
> +}
> +
> +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +	unsigned int i;
> +
> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> +		if (!ntb_transport_edma_ep_read_complete(qp))
> +			break;
> +	}
> +
> +	if (ntb_transport_edma_ep_read_complete(qp))
> +		queue_work(qp->transport->wq, &qp->read_work);
> +}
> +
> +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +	unsigned int i;
> +
> +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> +		if (!ntb_transport_edma_ep_write_complete(qp))
> +			break;
> +	}
> +
> +	if (ntb_transport_edma_ep_write_complete(qp))
> +		queue_work(qp->transport->wq, &qp->write_work);
> +}
> +
> +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	unsigned int len;
> +	void *cb_data;
> +	u32 idx;
> +
> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
> +					qp->wr_cons) != 0) {
> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
> +		smp_rmb();
> +
> +		idx = ntb_edma_ring_idx(qp->wr_cons);
> +		in = NTB_DESC_WR_RC_I(qp, idx);
> +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> +			break;
> +
> +		in->data = 0;
> +
> +		cb_data = entry->cb_data;
> +		len = entry->len;
> +
> +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
> +
> +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
> +			ntb_qp_link_down(qp);
> +			continue;
> +		}
> +
> +		ntb_transport_edma_notify_peer(qp);
> +
> +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> +
> +		if (qp->rx_handler && qp->client_ready)
> +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
> +
> +		/* stat updates */
> +		qp->rx_bytes += len;
> +		qp->rx_pkts++;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_write_cb(void *data,
> +					   const struct dmaengine_result *res)
> +{
> +	struct ntb_queue_entry *entry = data;
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	enum dmaengine_tx_result dma_err = res->result;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +
> +	switch (dma_err) {
> +	case DMA_TRANS_READ_FAILED:
> +	case DMA_TRANS_WRITE_FAILED:
> +	case DMA_TRANS_ABORTED:
> +		entry->errors++;
> +		entry->len = -EIO;
> +		break;
> +	case DMA_TRANS_NOERROR:
> +	default:
> +		break;
> +	}
> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
> +	sg_dma_address(&entry->sgl) = 0;
> +
> +	entry->flags |= DESC_DONE_FLAG;
> +
> +	queue_work(nt->wq, &qp->write_work);
> +}
> +
> +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	struct ntb_queue_entry *entry;
> +	unsigned int len;
> +	void *cb_data;
> +	u32 idx;
> +
> +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
> +					qp->rd_cons) != 0) {
> +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
> +		smp_rmb();
> +
> +		idx = ntb_edma_ring_idx(qp->rd_cons);
> +		in = NTB_DESC_RD_RC_I(qp, idx);
> +		entry = (struct ntb_queue_entry *)in->data;
> +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> +			break;
> +
> +		in->data = 0;
> +
> +		cb_data = entry->cb_data;
> +		len = entry->len;
> +
> +		out = NTB_DESC_RD_RC_O(qp, idx);
> +
> +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
> +
> +		/*
> +		 * No need to add barrier in-between to enforce ordering here.
> +		 * The other side proceeds only after both flags and tail are
> +		 * updated.
> +		 */
> +		iowrite32(entry->flags, &out->flags);
> +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
> +
> +		ntb_transport_edma_notify_peer(qp);
> +
> +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
> +			     &qp->tx_free_q);
> +
> +		if (qp->tx_handler)
> +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
> +
> +		/* stat updates */
> +		qp->tx_bytes += len;
> +		qp->tx_pkts++;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_read_cb(void *data,
> +					  const struct dmaengine_result *res)
> +{
> +	struct ntb_queue_entry *entry = data;
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	enum dmaengine_tx_result dma_err = res->result;
> +
> +	switch (dma_err) {
> +	case DMA_TRANS_READ_FAILED:
> +	case DMA_TRANS_WRITE_FAILED:
> +	case DMA_TRANS_ABORTED:
> +		entry->errors++;
> +		entry->len = -EIO;
> +		break;
> +	case DMA_TRANS_NOERROR:
> +	default:
> +		break;
> +	}
> +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
> +	sg_dma_address(&entry->sgl) = 0;
> +
> +	entry->flags |= DESC_DONE_FLAG;
> +
> +	queue_work(nt->wq, &qp->read_work);
> +}
> +
> +static int ntb_transport_edma_rc_write_start(struct device *d,
> +					     struct dma_chan *chan, size_t len,
> +					     dma_addr_t ep_src, void *rc_dst,
> +					     struct ntb_queue_entry *entry)
> +{
> +	struct scatterlist *sgl = &entry->sgl;
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	dma_cookie_t cookie;
> +	int nents, rc;
> +
> +	if (!d)
> +		return -ENODEV;
> +
> +	if (!chan)
> +		return -ENXIO;
> +
> +	if (WARN_ON(!ep_src || !rc_dst))
> +		return -EINVAL;
> +
> +	if (WARN_ON(sg_dma_address(sgl)))
> +		return -EINVAL;
> +
> +	sg_init_one(sgl, rc_dst, len);
> +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
> +	if (nents <= 0)
> +		return -EIO;
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.src_addr       = ep_src;
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_DEV_TO_MEM;
> +	rc = dmaengine_slave_config(chan, &cfg);
> +	if (rc)
> +		goto out_unmap;
> +
> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +
> +	txd->callback_result = ntb_transport_edma_rc_write_cb;
> +	txd->callback_param = entry;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie)) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +	dma_async_issue_pending(chan);
> +	return 0;
> +out_unmap:
> +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_rc_read_start(struct device *d,
> +					    struct dma_chan *chan, size_t len,
> +					    void *rc_src, dma_addr_t ep_dst,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct scatterlist *sgl = &entry->sgl;
> +	struct dma_async_tx_descriptor *txd;
> +	struct dma_slave_config cfg;
> +	dma_cookie_t cookie;
> +	int nents, rc;
> +
> +	if (!d)
> +		return -ENODEV;
> +
> +	if (!chan)
> +		return -ENXIO;
> +
> +	if (WARN_ON(!rc_src || !ep_dst))
> +		return -EINVAL;
> +
> +	if (WARN_ON(sg_dma_address(sgl)))
> +		return -EINVAL;
> +
> +	sg_init_one(sgl, rc_src, len);
> +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
> +	if (nents <= 0)
> +		return -EIO;
> +
> +	memset(&cfg, 0, sizeof(cfg));
> +	cfg.dst_addr       = ep_dst;
> +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +	cfg.direction      = DMA_MEM_TO_DEV;
> +	rc = dmaengine_slave_config(chan, &cfg);
> +	if (rc)
> +		goto out_unmap;
> +
> +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
> +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> +	if (!txd) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +
> +	txd->callback_result = ntb_transport_edma_rc_read_cb;
> +	txd->callback_param = entry;
> +
> +	cookie = dmaengine_submit(txd);
> +	if (dma_submit_error(cookie)) {
> +		rc = -EIO;
> +		goto out_unmap;
> +	}
> +	dma_async_issue_pending(chan);
> +	return 0;
> +out_unmap:
> +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
> +	return rc;
> +}
> +
> +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
> +{
> +	struct ntb_queue_entry *entry = container_of(
> +				work, struct ntb_queue_entry, dma_work);
> +	struct ntb_transport_qp *qp = entry->qp;
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct dma_chan *chan;
> +	int rc;
> +
> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
> +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
> +					       entry->addr, entry->buf, entry);
> +	if (rc) {
> +		entry->errors++;
> +		entry->len = -EIO;
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->write_work);
> +		return;
> +	}
> +}
> +
> +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	unsigned int budget = NTB_EDMA_MAX_POLL;
> +	struct ntb_queue_entry *entry;
> +	struct ntb_edma_desc *in;
> +	dma_addr_t ep_src;
> +	u32 len, idx;
> +
> +	while (budget--) {
> +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
> +					     qp->wr_issue) == 0)
> +			break;
> +
> +		idx = ntb_edma_ring_idx(qp->wr_issue);
> +		in = NTB_DESC_WR_RC_I(qp, idx);
> +
> +		len = READ_ONCE(in->len);
> +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
> +
> +		/* Prepare 'entry' for write completion */
> +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
> +		if (!entry) {
> +			qp->rx_err_no_buf++;
> +			break;
> +		}
> +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
> +			entry->flags &= ~DESC_DONE_FLAG;
> +		entry->len = len; /* NB. entry->len can be <=0 */
> +		entry->addr = ep_src;
> +
> +		/*
> +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
> +		 * so it needs to be set before wr_issue++.
> +		 */
> +		in->data = (uintptr_t)entry;
> +
> +		/* Ensure in->data visible before wr_issue++ */
> +		smp_wmb();
> +
> +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
> +
> +		if (!len) {
> +			entry->flags |= DESC_DONE_FLAG;
> +			queue_work(nt->wq, &qp->write_work);
> +			continue;
> +		}
> +
> +		if (in->flags & LINK_DOWN_FLAG) {
> +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
> +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
> +			queue_work(nt->wq, &qp->write_work);
> +			continue;
> +		}
> +
> +		queue_work(nt->wq, &entry->dma_work);
> +	}
> +
> +	if (!budget)
> +		tasklet_schedule(&qp->rxc_db_work);
> +}
> +
> +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_transport_ctx *nt = qp->transport;
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	struct dma_chan *chan;
> +	u32 issue, idx, head;
> +	dma_addr_t ep_dst;
> +	int rc;
> +
> +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
> +
> +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> +		issue = qp->rd_issue;
> +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
> +			qp->tx_ring_full++;
> +			return -ENOSPC;
> +		}
> +
> +		/*
> +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
> +		 * so it needs to be set before rd_issue++.
> +		 */
> +		idx = ntb_edma_ring_idx(issue);
> +		in = NTB_DESC_RD_RC_I(qp, idx);
> +		in->data = (uintptr_t)entry;
> +
> +		/* Make in->data visible before rd_issue++ */
> +		smp_wmb();
> +
> +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
> +	}
> +
> +	/* Publish the final transfer length to the EP side */
> +	out = NTB_DESC_RD_RC_O(qp, idx);
> +	iowrite32(len, &out->len);
> +	ioread32(&out->len);
> +
> +	if (unlikely(!len)) {
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->read_work);
> +		return 0;
> +	}
> +
> +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
> +	dma_rmb();
> +
> +	/* kick remote eDMA read transfer */
> +	ep_dst = (dma_addr_t)in->addr;
> +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
> +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
> +					      entry->buf, ep_dst, entry);
> +	if (rc) {
> +		entry->errors++;
> +		entry->len = -EIO;
> +		entry->flags |= DESC_DONE_FLAG;
> +		queue_work(nt->wq, &qp->read_work);
> +	}
> +	return 0;
> +}
> +
> +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	dma_addr_t ep_src = 0;
> +	u32 idx;
> +	int rc;
> +
> +	if (likely(len)) {
> +		ep_src = dma_map_single(dma_dev, entry->buf, len,
> +					DMA_TO_DEVICE);
> +		rc = dma_mapping_error(dma_dev, ep_src);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
> +			rc = -ENOSPC;
> +			qp->tx_ring_full++;
> +			goto out_unmap;
> +		}
> +
> +		idx = ntb_edma_ring_idx(qp->wr_prod);
> +		in  = NTB_DESC_WR_EP_I(qp, idx);
> +		out = NTB_DESC_WR_EP_O(qp, idx);
> +
> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> +		WARN_ON(entry->flags & DESC_DONE_FLAG);
> +		in->flags = 0;
> +		in->data  = (uintptr_t)entry;
> +		entry->addr  = ep_src;
> +
> +		iowrite32(len,          &out->len);
> +		iowrite32(entry->flags, &out->flags);
> +		iowrite64(ep_src,       &out->addr);
> +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
> +
> +		dma_wmb();
> +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
> +
> +		qp->tx_bytes += len;
> +		qp->tx_pkts++;
> +	}
> +
> +	ntb_transport_edma_notify_peer(qp);
> +
> +	return 0;
> +out_unmap:
> +	if (likely(len))
> +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
> +					 struct ntb_queue_entry *entry,
> +					 void *cb, void *data, unsigned int len,
> +					 unsigned int flags)
> +{
> +	struct device *dma_dev;
> +
> +	if (entry->addr) {
> +		/* Deferred unmap */
> +		dma_dev = get_dma_dev(qp->ndev);
> +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
> +	}
> +
> +	entry->cb_data = cb;
> +	entry->buf = data;
> +	entry->len = len;
> +	entry->flags = flags;
> +	entry->errors = 0;
> +	entry->addr = 0;
> +
> +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
> +
> +	if (ntb_qp_edma_is_ep(qp))
> +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
> +	else
> +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
> +}
> +
> +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
> +					    struct ntb_queue_entry *entry)
> +{
> +	struct device *dma_dev = get_dma_dev(qp->ndev);
> +	struct ntb_edma_desc *in, __iomem *out;
> +	unsigned int len = entry->len;
> +	void *data = entry->buf;
> +	dma_addr_t ep_dst;
> +	u32 idx;
> +	int rc;
> +
> +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
> +	rc = dma_mapping_error(dma_dev, ep_dst);
> +	if (rc)
> +		return rc;
> +
> +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
> +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
> +				       READ_ONCE(qp->rd_cons))) {
> +			rc = -ENOSPC;
> +			goto out_unmap;
> +		}
> +
> +		idx = ntb_edma_ring_idx(qp->rd_prod);
> +		in = NTB_DESC_RD_EP_I(qp, idx);
> +		out = NTB_DESC_RD_EP_O(qp, idx);
> +
> +		iowrite32(len, &out->len);
> +		iowrite64(ep_dst, &out->addr);
> +
> +		WARN_ON(in->flags & DESC_DONE_FLAG);
> +		in->data = (uintptr_t)entry;
> +		entry->addr = ep_dst;
> +
> +		/* Ensure len/addr are visible before the head update */
> +		dma_wmb();
> +
> +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
> +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
> +	}
> +	return 0;
> +out_unmap:
> +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
> +	return rc;
> +}
> +
> +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
> +					 struct ntb_queue_entry *entry)
> +{
> +	int rc;
> +
> +	/* The behaviour is the same as the default backend for RC side */
> +	if (ntb_qp_edma_is_ep(qp)) {
> +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
> +		if (rc) {
> +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> +				     &qp->rx_free_q);
> +			return rc;
> +		}
> +	}
> +
> +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
> +
> +	if (qp->active)
> +		tasklet_schedule(&qp->rxc_db_work);
> +
> +	return 0;
> +}
> +
> +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
> +{
> +	struct ntb_transport_ctx *nt = qp->transport;
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_poll(qp);
> +	else if (ntb_qp_edma_is_ep(qp)) {
> +		/*
> +		 * Make sure we poll the rings even if an eDMA interrupt is
> +		 * cleared on the RC side earlier.
> +		 */
> +		queue_work(nt->wq, &qp->read_work);
> +		queue_work(nt->wq, &qp->write_work);
> +	} else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_read_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, read_work);
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_read_complete_work(work);
> +	else if (ntb_qp_edma_is_ep(qp))
> +		ntb_transport_edma_ep_read_work(work);
> +	else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_write_work(struct work_struct *work)
> +{
> +	struct ntb_transport_qp *qp = container_of(
> +				work, struct ntb_transport_qp, write_work);
> +
> +	if (ntb_qp_edma_is_rc(qp))
> +		ntb_transport_edma_rc_write_complete_work(work);
> +	else if (ntb_qp_edma_is_ep(qp))
> +		ntb_transport_edma_ep_write_work(work);
> +	else
> +		/* Unreachable */
> +		WARN_ON_ONCE(1);
> +}
> +
> +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> +					  unsigned int qp_num)
> +{
> +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> +
> +	qp->wr_cons = 0;
> +	qp->rd_cons = 0;
> +	qp->wr_prod = 0;
> +	qp->rd_prod = 0;
> +	qp->wr_issue = 0;
> +	qp->rd_issue = 0;
> +
> +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
> +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
> +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
> +}
> +
> +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> +					    struct ntb_transport_qp *qp)
> +{
> +	spin_lock_init(&qp->ep_tx_lock);
> +	spin_lock_init(&qp->ep_rx_lock);
> +	spin_lock_init(&qp->rc_lock);
> +}
> +
> +static const struct ntb_transport_backend_ops edma_backend_ops = {
> +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> +	.rx_poll = ntb_transport_edma_rx_poll,
> +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> +};
> +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> +
>  /**
>   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
>   * @qp: NTB transport layer queue to be enabled
> --
> 2.48.1
>
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > Add a new transport backend that uses a remote DesignWare eDMA engine
> > located on the NTB endpoint to move data between host and endpoint.
> >
> > In this mode:
> >
> >   - The endpoint exposes a dedicated memory window that contains the
> >     eDMA register block followed by a small control structure (struct
> >     ntb_edma_info) and per-channel linked-list (LL) rings.
> >
> >   - On the endpoint side, ntb_edma_setup_mws() allocates the control
> >     structure and LL rings in endpoint memory, then programs an inbound
> >     iATU region so that the host can access them via a peer MW.
> >
> >   - On the host side, ntb_edma_setup_peer() ioremaps the peer MW, reads
> >     ntb_edma_info and configures a dw-edma DMA device to use the LL
> >     rings provided by the endpoint.
> >
> >   - ntb_transport is extended with a new backend_ops implementation that
> >     routes TX and RX enqueue/poll operations through the remote eDMA
> >     rings while keeping the existing shared-memory backend intact.
> >
> >   - The host signals the endpoint via a dedicated DMA read channel.
> >     'use_msi' module option is ignored when 'use_remote_edma=1'.
> >
> > The new mode is guarded by a Kconfig option (NTB_TRANSPORT_EDMA) and a
> > module parameter (use_remote_edma). When disabled, the existing
> > ntb_transport behaviour is unchanged.
> >
> > Signed-off-by: Koichiro Den <den@valinux.co.jp>
> > ---
> >  drivers/ntb/Kconfig                           |   11 +
> >  drivers/ntb/Makefile                          |    3 +
> >  drivers/ntb/ntb_edma.c                        |  628 ++++++++
> >  drivers/ntb/ntb_edma.h                        |  128 ++
> >  .../{ntb_transport.c => ntb_transport_core.c} | 1281 ++++++++++++++++-
> >  5 files changed, 2048 insertions(+), 3 deletions(-)
> >  create mode 100644 drivers/ntb/ntb_edma.c
> >  create mode 100644 drivers/ntb/ntb_edma.h
> >  rename drivers/ntb/{ntb_transport.c => ntb_transport_core.c} (65%)
> >
> > diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
> > index df16c755b4da..db63f02bb116 100644
> > --- a/drivers/ntb/Kconfig
> > +++ b/drivers/ntb/Kconfig
> > @@ -37,4 +37,15 @@ config NTB_TRANSPORT
> >
> >  	 If unsure, say N.
> >
> > +config NTB_TRANSPORT_EDMA
> > +	bool "NTB Transport backed by remote eDMA"
> > +	depends on NTB_TRANSPORT
> > +	depends on PCI
> > +	select DMA_ENGINE
> > +	help
> > +	  Enable a transport backend that uses a remote DesignWare eDMA engine
> > +	  exposed through a dedicated NTB memory window. The host uses the
> > +	  endpoint's eDMA engine to move data in both directions.
> > +	  Say Y here if you intend to use the 'use_remote_edma' module parameter.
> > +
> >  endif # NTB
> > diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
> > index 3a6fa181ff99..51f0e1e3aec7 100644
> > --- a/drivers/ntb/Makefile
> > +++ b/drivers/ntb/Makefile
> > @@ -4,3 +4,6 @@ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
> >
> >  ntb-y			:= core.o
> >  ntb-$(CONFIG_NTB_MSI)	+= msi.o
> > +
> > +ntb_transport-y					:= ntb_transport_core.o
> > +ntb_transport-$(CONFIG_NTB_TRANSPORT_EDMA)	+= ntb_edma.o
> > diff --git a/drivers/ntb/ntb_edma.c b/drivers/ntb/ntb_edma.c
> > new file mode 100644
> > index 000000000000..cb35e0d56aa8
> > --- /dev/null
> > +++ b/drivers/ntb/ntb_edma.c
> > @@ -0,0 +1,628 @@
> > +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> > +
> > +#include <linux/module.h>
> > +#include <linux/device.h>
> > +#include <linux/pci.h>
> > +#include <linux/ntb.h>
> > +#include <linux/io.h>
> > +#include <linux/iommu.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/pci-epc.h>
> > +#include <linux/dma/edma.h>
> > +#include <linux/irq.h>
> > +#include <linux/irqdomain.h>
> > +#include <linux/of.h>
> > +#include <linux/of_irq.h>
> > +#include <dt-bindings/interrupt-controller/arm-gic.h>
> > +
> > +#include "ntb_edma.h"
> > +
> > +/*
> > + * The interrupt register offsets below are taken from the DesignWare
> > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > + * backend currently only supports this layout.
> > + */
> > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > +#define DMA_READ_INT_MASK_OFF      0xa8
> > +#define DMA_READ_INT_CLEAR_OFF     0xac
> 
> Not sure why need access EDMA register because EMDA driver already export
> as dmaengine driver.

These are intended for EP use. In my current design I intentionally don't
use the standard dw-edma dmaengine driver on the EP side.

> 
> > +
> > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > +
> > +static unsigned int edma_spi = 417; /* 0x1a1 */
> > +module_param(edma_spi, uint, 0644);
> > +MODULE_PARM_DESC(edma_spi, "SPI number used by remote eDMA interrupt (EP local)");
> > +
> > +static u64 edma_regs_phys = 0xe65d5000;
> > +module_param(edma_regs_phys, ullong, 0644);
> > +MODULE_PARM_DESC(edma_regs_phys, "Physical base address of local eDMA registers (EP)");
> > +
> > +static unsigned long edma_regs_size = 0x1200;
> > +module_param(edma_regs_size, ulong, 0644);
> > +MODULE_PARM_DESC(edma_regs_size, "Size of the local eDMA register space (EP)");
> > +
> > +struct ntb_edma_intr {
> > +	u32 db[NTB_EDMA_NOTIFY_MAX_QP];
> > +};
> > +
> > +struct ntb_edma_ctx {
> > +	void *ll_wr_virt[EDMA_WR_CH_NUM];
> > +	dma_addr_t ll_wr_phys[EDMA_WR_CH_NUM];
> > +	void *ll_rd_virt[EDMA_RD_CH_NUM + 1];
> > +	dma_addr_t ll_rd_phys[EDMA_RD_CH_NUM + 1];
> > +
> > +	struct ntb_edma_intr *intr_ep_virt;
> > +	dma_addr_t intr_ep_phys;
> > +	struct ntb_edma_intr *intr_rc_virt;
> > +	dma_addr_t intr_rc_phys;
> > +	u32 notify_qp_max;
> > +
> > +	bool initialized;
> > +};
> > +
> > +static struct ntb_edma_ctx edma_ctx;
> > +
> > +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> > +
> > +struct ntb_edma_interrupt {
> > +	int virq;
> > +	void __iomem *base;
> > +	ntb_edma_interrupt_cb_t cb;
> > +	void *data;
> > +};
> > +
> > +static struct ntb_edma_interrupt ntb_edma_intr;
> > +
> > +static int ntb_edma_map_spi_to_virq(struct device *dev, unsigned int spi)
> > +{
> > +	struct device_node *np = dev_of_node(dev);
> > +	struct device_node *parent;
> > +	struct irq_fwspec fwspec = { 0 };
> > +	int virq;
> > +
> > +	parent = of_irq_find_parent(np);
> > +	if (!parent)
> > +		return -ENODEV;
> > +
> > +	fwspec.fwnode      = of_fwnode_handle(parent);
> > +	fwspec.param_count = 3;
> > +	fwspec.param[0]    = GIC_SPI;
> > +	fwspec.param[1]    = spi;
> > +	fwspec.param[2]    = IRQ_TYPE_LEVEL_HIGH;
> > +
> > +	virq = irq_create_fwspec_mapping(&fwspec);
> > +	of_node_put(parent);
> > +	return (virq > 0) ? virq : -EINVAL;
> > +}
> > +
> > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > +{
> 
> Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> just register callback for dmeengine.

If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
callbacks handle int_status/int_clear, I think we could hit races. One side
might clear a status bit before the other side has a chance to see it and
invoke its callback. Please correct me if I'm missing something here.

To avoid that, in my current implementation, the RC side handles the
status/int_clear registers in the usual way, and the EP side only tries to
suppress needless edma_int as much as possible.

That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
That would require some changes on dw-edma core.

> 
> > +	struct ntb_edma_interrupt *v = data;
> > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > +	u32 i, val;
> > +
> > +	/*
> > +	 * We do not ack interrupts here but instead we mask all local interrupt
> > +	 * sources except the read channel used for notification. This reduces
> > +	 * needless ISR invocations.
> > +	 *
> > +	 * In theory we could configure LIE=1/RIE=0 only for the notification
> > +	 * transfer (keeping all other channels at LIE=1/RIE=1), but that would
> > +	 * require intrusive changes to the dw-edma core.
> > +	 *
> > +	 * Note: The host side may have already cleared the read interrupt used
> > +	 * for notification, so reading DMA_READ_INT_CLEAR_OFF is not a reliable
> > +	 * way to detect it. As a result, we cannot reliably tell which specific
> > +	 * channel triggered this interrupt. intr_ep_virt->db[i] teaches us
> > +	 * instead.
> > +	 */
> > +	iowrite32(~0x0, v->base + DMA_WRITE_INT_MASK_OFF);
> > +	iowrite32(~mask, v->base + DMA_READ_INT_MASK_OFF);
> > +
> > +	if (!v->cb || !edma_ctx.intr_ep_virt)
> > +		return IRQ_HANDLED;
> > +
> > +	for (i = 0; i < edma_ctx.notify_qp_max; i++) {
> > +		val = READ_ONCE(edma_ctx.intr_ep_virt->db[i]);
> > +		if (!val)
> > +			continue;
> > +
> > +		WRITE_ONCE(edma_ctx.intr_ep_virt->db[i], 0);
> > +		v->cb(v->data, i);
> > +	}
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> ...
> > +
> > +int ntb_edma_setup_peer(struct ntb_dev *ndev)
> > +{
> > +	struct ntb_edma_info *info;
> > +	unsigned int wr_cnt, rd_cnt;
> > +	struct dw_edma_chip *chip;
> > +	void __iomem *edma_virt;
> > +	phys_addr_t edma_phys;
> > +	resource_size_t mw_size;
> > +	u64 off = EDMA_REG_SIZE;
> > +	int peer_mw, mw_index;
> > +	unsigned int i;
> > +	int ret;
> > +
> > +	peer_mw = ntb_peer_mw_count(ndev);
> > +	if (peer_mw <= 0)
> > +		return -ENODEV;
> > +
> > +	mw_index = peer_mw - 1; /* last MW */
> > +
> > +	ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys,
> > +				   &mw_size);
> > +	if (ret)
> > +		return -1;
> > +
> > +	edma_virt = ioremap(edma_phys, mw_size);
> > +
> > +	chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL);
> > +	if (!chip) {
> > +		ret = -ENOMEM;
> > +		return ret;
> > +	}
> > +
> > +	chip->dev = &ndev->pdev->dev;
> > +	chip->nr_irqs = 4;
> > +	chip->ops = &ntb_edma_ops;
> > +	chip->flags = 0;
> > +	chip->reg_base = edma_virt;
> > +	chip->mf = EDMA_MF_EDMA_UNROLL;
> > +
> > +	info = edma_virt + off;
> > +	if (info->magic != NTB_EDMA_INFO_MAGIC)
> > +		return -EINVAL;
> > +	wr_cnt = info->wr_cnt;
> > +	rd_cnt = info->rd_cnt;
> > +	chip->ll_wr_cnt = wr_cnt;
> > +	chip->ll_rd_cnt = rd_cnt;
> > +	off += PAGE_SIZE;
> > +
> > +	edma_ctx.notify_qp_max = NTB_EDMA_NOTIFY_MAX_QP;
> > +	edma_ctx.intr_ep_phys = info->intr_dar_base;
> > +	if (edma_ctx.intr_ep_phys) {
> > +		edma_ctx.intr_rc_virt =
> > +			dma_alloc_coherent(&ndev->pdev->dev,
> > +					   sizeof(struct ntb_edma_intr),
> > +					   &edma_ctx.intr_rc_phys,
> > +					   GFP_KERNEL);
> > +		if (!edma_ctx.intr_rc_virt)
> > +			return -ENOMEM;
> > +		memset(edma_ctx.intr_rc_virt, 0,
> > +		       sizeof(struct ntb_edma_intr));
> > +	}
> > +
> > +	for (i = 0; i < wr_cnt; i++) {
> > +		chip->ll_region_wr[i].vaddr.io = edma_virt + off;
> > +		chip->ll_region_wr[i].paddr = info->ll_wr_phys[i];
> > +		chip->ll_region_wr[i].sz = DMA_LLP_MEM_SIZE;
> > +		off += DMA_LLP_MEM_SIZE;
> > +	}
> > +	for (i = 0; i < rd_cnt; i++) {
> > +		chip->ll_region_rd[i].vaddr.io = edma_virt + off;
> > +		chip->ll_region_rd[i].paddr = info->ll_rd_phys[i];
> > +		chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE;
> > +		off += DMA_LLP_MEM_SIZE;
> > +	}
> > +
> > +	if (!pci_dev_msi_enabled(ndev->pdev))
> > +		return -ENXIO;
> > +
> > +	ret = dw_edma_probe(chip);
> 
> I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> dma engine support.
> 
> EP side, suppose default dwc controller driver already setup edma engine,
> so use correct filter function, you should get dma chan.

I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
so that RC side only manages eDMA remotely and avoids the potential race
condition I mentioned above.

Thanks for reviewing,
Koichiro

> 
> Frank
> 
> > +	if (ret) {
> > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +struct ntb_edma_filter {
> > +	struct device *dma_dev;
> > +	u32 direction;
> > +};
> > +
> > +static bool ntb_edma_filter_fn(struct dma_chan *chan, void *arg)
> > +{
> > +	struct ntb_edma_filter *filter = arg;
> > +	u32 dir = filter->direction;
> > +	struct dma_slave_caps caps;
> > +	int ret;
> > +
> > +	if (chan->device->dev != filter->dma_dev)
> > +		return false;
> > +
> > +	ret = dma_get_slave_caps(chan, &caps);
> > +	if (ret < 0)
> > +		return false;
> > +
> > +	return !!(caps.directions & dir);
> > +}
> > +
> > +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma)
> > +{
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < edma->num_wr_chan; i++)
> > +		dma_release_channel(edma->wr_chan[i]);
> > +
> > +	for (i = 0; i < edma->num_rd_chan; i++)
> > +		dma_release_channel(edma->rd_chan[i]);
> > +
> > +	if (edma->intr_chan)
> > +		dma_release_channel(edma->intr_chan);
> > +}
> > +
> > +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma)
> > +{
> > +	struct ntb_edma_filter filter;
> > +	dma_cap_mask_t dma_mask;
> > +	unsigned int i;
> > +
> > +	dma_cap_zero(dma_mask);
> > +	dma_cap_set(DMA_SLAVE, dma_mask);
> > +
> > +	memset(edma, 0, sizeof(*edma));
> > +	edma->dev = dma_dev;
> > +
> > +	filter.dma_dev = dma_dev;
> > +	filter.direction = BIT(DMA_DEV_TO_MEM);
> > +	for (i = 0; i < EDMA_WR_CH_NUM; i++) {
> > +		edma->wr_chan[i] = dma_request_channel(dma_mask,
> > +						       ntb_edma_filter_fn,
> > +						       &filter);
> > +		if (!edma->wr_chan[i])
> > +			break;
> > +		edma->num_wr_chan++;
> > +	}
> > +
> > +	filter.direction = BIT(DMA_MEM_TO_DEV);
> > +	for (i = 0; i < EDMA_RD_CH_NUM; i++) {
> > +		edma->rd_chan[i] = dma_request_channel(dma_mask,
> > +						       ntb_edma_filter_fn,
> > +						       &filter);
> > +		if (!edma->rd_chan[i])
> > +			break;
> > +		edma->num_rd_chan++;
> > +	}
> > +
> > +	edma->intr_chan = dma_request_channel(dma_mask, ntb_edma_filter_fn,
> > +					      &filter);
> > +	if (!edma->intr_chan)
> > +		dev_warn(dma_dev,
> > +			 "Remote eDMA notify channel could not be allocated\n");
> > +
> > +	if (!edma->num_wr_chan || !edma->num_rd_chan) {
> > +		dev_warn(dma_dev, "Remote eDMA channels failed to initialize\n");
> > +		ntb_edma_teardown_chans(edma);
> > +		return -ENODEV;
> > +	}
> > +	return 0;
> > +}
> > +
> > +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> > +				    remote_edma_dir_t dir)
> > +{
> > +	unsigned int n, cur, idx;
> > +	struct dma_chan **chans;
> > +	atomic_t *cur_chan;
> > +
> > +	if (dir == REMOTE_EDMA_WRITE) {
> > +		n = edma->num_wr_chan;
> > +		chans = edma->wr_chan;
> > +		cur_chan = &edma->cur_wr_chan;
> > +	} else {
> > +		n = edma->num_rd_chan;
> > +		chans = edma->rd_chan;
> > +		cur_chan = &edma->cur_rd_chan;
> > +	}
> > +	if (WARN_ON_ONCE(!n))
> > +		return NULL;
> > +
> > +	/* Simple round-robin */
> > +	cur = (unsigned int)atomic_inc_return(cur_chan) - 1;
> > +	idx = cur % n;
> > +	return chans[idx];
> > +}
> > +
> > +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num)
> > +{
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	struct scatterlist sgl;
> > +	dma_cookie_t cookie;
> > +	struct device *dev;
> > +
> > +	if (!edma || !edma->intr_chan)
> > +		return -ENXIO;
> > +
> > +	if (qp_num < 0 || qp_num >= edma_ctx.notify_qp_max)
> > +		return -EINVAL;
> > +
> > +	if (!edma_ctx.intr_rc_virt || !edma_ctx.intr_ep_phys)
> > +		return -EINVAL;
> > +
> > +	dev = edma->dev;
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	WRITE_ONCE(edma_ctx.intr_rc_virt->db[qp_num], 1);
> > +
> > +	/* Ensure store is visible before kicking the DMA transfer */
> > +	wmb();
> > +
> > +	sg_init_table(&sgl, 1);
> > +	sg_dma_address(&sgl) = edma_ctx.intr_rc_phys + qp_num * sizeof(u32);
> > +	sg_dma_len(&sgl) = sizeof(u32);
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.dst_addr       = edma_ctx.intr_ep_phys + qp_num * sizeof(u32);
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_MEM_TO_DEV;
> > +
> > +	if (dmaengine_slave_config(edma->intr_chan, &cfg))
> > +		return -EINVAL;
> > +
> > +	txd = dmaengine_prep_slave_sg(edma->intr_chan, &sgl, 1,
> > +				      DMA_MEM_TO_DEV,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd)
> > +		return -ENOSPC;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie))
> > +		return -ENOSPC;
> > +
> > +	dma_async_issue_pending(edma->intr_chan);
> > +	return 0;
> > +}
> > diff --git a/drivers/ntb/ntb_edma.h b/drivers/ntb/ntb_edma.h
> > new file mode 100644
> > index 000000000000..da0451827edb
> > --- /dev/null
> > +++ b/drivers/ntb/ntb_edma.h
> > @@ -0,0 +1,128 @@
> > +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
> > +#ifndef _NTB_EDMA_H_
> > +#define _NTB_EDMA_H_
> > +
> > +#include <linux/completion.h>
> > +#include <linux/device.h>
> > +#include <linux/interrupt.h>
> > +
> > +#define EDMA_REG_SIZE		SZ_64K
> > +#define DMA_LLP_MEM_SIZE	SZ_4K
> > +#define EDMA_WR_CH_NUM		4
> > +#define EDMA_RD_CH_NUM		4
> > +#define NTB_EDMA_MAX_CH		8
> > +
> > +#define NTB_EDMA_INFO_MAGIC	0x45444D41 /* "EDMA" */
> > +#define NTB_EDMA_INFO_OFF	EDMA_REG_SIZE
> > +
> > +#define NTB_EDMA_RING_ORDER	7
> > +#define NTB_EDMA_RING_ENTRIES	(1U << NTB_EDMA_RING_ORDER)
> > +#define NTB_EDMA_RING_MASK	(NTB_EDMA_RING_ENTRIES - 1)
> > +
> > +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num);
> > +
> > +/*
> > + * REMOTE_EDMA_EP:
> > + *   Endpoint owns the eDMA engine and pushes descriptors into a shared MW.
> > + *
> > + * REMOTE_EDMA_RC:
> > + *   Root Complex controls the endpoint eDMA through the shared MW and
> > + *   drives reads/writes on behalf of the host.
> > + */
> > +typedef enum {
> > +	REMOTE_EDMA_UNKNOWN,
> > +	REMOTE_EDMA_EP,
> > +	REMOTE_EDMA_RC,
> > +} remote_edma_mode_t;
> > +
> > +typedef enum {
> > +	REMOTE_EDMA_WRITE,
> > +	REMOTE_EDMA_READ,
> > +} remote_edma_dir_t;
> > +
> > +/*
> > + * Layout of remote eDMA MW (EP local address space, RC sees via peer MW):
> > + *
> > + *  0 .. EDMA_REG_SIZE-1        : DesignWare eDMA registers
> > + *  EDMA_REG_SIZE .. +PAGE_SIZE : struct ntb_edma_info (EP writes, RC reads)
> > + *  +PAGE_SIZE ..               : LL ring buffers (EP allocates phys addresses,
> > + *                                RC configures via dw_edma)
> > + *
> > + * ntb_edma_setup_mws() on EP:
> > + *   - allocates ntb_edma_info and LLs in EP memory
> > + *   - programs inbound iATU so that RC peer MW[n] points at this block
> > + *
> > + * ntb_edma_setup_peer() on RC:
> > + *   - ioremaps peer MW[n]
> > + *   - reads ntb_edma_info
> > + *   - sets up dw_edma_chip ll_region_* from that info
> > + */
> > +struct ntb_edma_info {
> > +	u32 magic;
> > +	u16 wr_cnt;
> > +	u16 rd_cnt;
> > +	u64 regs_phys;
> > +	u32 ll_stride;
> > +	u32 rsvd;
> > +	u64 ll_wr_phys[NTB_EDMA_MAX_CH];
> > +	u64 ll_rd_phys[NTB_EDMA_MAX_CH];
> > +
> > +	u64 intr_dar_base;
> > +} __packed;
> > +
> > +struct ll_dma_addrs {
> > +	dma_addr_t wr[EDMA_WR_CH_NUM];
> > +	dma_addr_t rd[EDMA_RD_CH_NUM];
> > +};
> > +
> > +struct ntb_edma_chans {
> > +	struct device *dev;
> > +
> > +	struct dma_chan *wr_chan[EDMA_WR_CH_NUM];
> > +	struct dma_chan *rd_chan[EDMA_RD_CH_NUM];
> > +	struct dma_chan *intr_chan;
> > +
> > +	unsigned int num_wr_chan;
> > +	unsigned int num_rd_chan;
> > +	atomic_t cur_wr_chan;
> > +	atomic_t cur_rd_chan;
> > +};
> > +
> > +static __always_inline u32 ntb_edma_ring_idx(u32 v)
> > +{
> > +	return v & NTB_EDMA_RING_MASK;
> > +}
> > +
> > +static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
> > +{
> > +	if (head >= tail) {
> > +		WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
> > +		return head - tail;
> > +	}
> > +
> > +	WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
> > +	return U32_MAX - tail + head + 1;
> > +}
> > +
> > +static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
> > +{
> > +	return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
> > +}
> > +
> > +static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
> > +{
> > +	return ntb_edma_ring_free_entry(head, tail) == 0;
> > +}
> > +
> > +int ntb_edma_setup_isr(struct device *dev, struct device *epc_dev,
> > +		       ntb_edma_interrupt_cb_t cb, void *data);
> > +void ntb_edma_teardown_isr(struct device *dev);
> > +int ntb_edma_setup_mws(struct ntb_dev *ndev);
> > +int ntb_edma_setup_peer(struct ntb_dev *ndev);
> > +int ntb_edma_setup_chans(struct device *dma_dev, struct ntb_edma_chans *edma);
> > +struct dma_chan *ntb_edma_pick_chan(struct ntb_edma_chans *edma,
> > +				    remote_edma_dir_t dir);
> > +void ntb_edma_teardown_chans(struct ntb_edma_chans *edma);
> > +int ntb_edma_notify_peer(struct ntb_edma_chans *edma, int qp_num);
> > +
> > +#endif
> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport_core.c
> > similarity index 65%
> > rename from drivers/ntb/ntb_transport.c
> > rename to drivers/ntb/ntb_transport_core.c
> > index 907db6c93d4d..48d48921978d 100644
> > --- a/drivers/ntb/ntb_transport.c
> > +++ b/drivers/ntb/ntb_transport_core.c
> > @@ -47,6 +47,9 @@
> >   * Contact Information:
> >   * Jon Mason <jon.mason@intel.com>
> >   */
> > +#include <linux/atomic.h>
> > +#include <linux/bug.h>
> > +#include <linux/compiler.h>
> >  #include <linux/debugfs.h>
> >  #include <linux/delay.h>
> >  #include <linux/dmaengine.h>
> > @@ -71,6 +74,8 @@
> >  #define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
> >  #define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
> >
> > +#define NTB_EDMA_MAX_POLL		32
> > +
> >  MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
> >  MODULE_VERSION(NTB_TRANSPORT_VER);
> >  MODULE_LICENSE("Dual BSD/GPL");
> > @@ -102,6 +107,13 @@ module_param(use_msi, bool, 0644);
> >  MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
> >  #endif
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +#include "ntb_edma.h"
> > +static bool use_remote_edma;
> > +module_param(use_remote_edma, bool, 0644);
> > +MODULE_PARM_DESC(use_remote_edma, "Use remote eDMA mode (when enabled, use_msi is ignored)");
> > +#endif
> > +
> >  static struct dentry *nt_debugfs_dir;
> >
> >  /* Only two-ports NTB devices are supported */
> > @@ -125,6 +137,14 @@ struct ntb_queue_entry {
> >  		struct ntb_payload_header __iomem *tx_hdr;
> >  		struct ntb_payload_header *rx_hdr;
> >  	};
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	dma_addr_t addr;
> > +
> > +	/* Used by RC side only */
> > +	struct scatterlist sgl;
> > +	struct work_struct dma_work;
> > +#endif
> >  };
> >
> >  struct ntb_rx_info {
> > @@ -202,6 +222,33 @@ struct ntb_transport_qp {
> >  	int msi_irq;
> >  	struct ntb_msi_desc msi_desc;
> >  	struct ntb_msi_desc peer_msi_desc;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	/*
> > +	 * For ensuring peer notification in non-atomic context.
> > +	 * ntb_peer_db_set might sleep or schedule.
> > +	 */
> > +	struct work_struct db_work;
> > +
> > +	/*
> > +	 * wr: remote eDMA write transfer (EP -> RC direction)
> > +	 * rd: remote eDMA read transfer (RC -> EP direction)
> > +	 */
> > +	u32 wr_cons;
> > +	u32 rd_cons;
> > +	u32 wr_prod;
> > +	u32 rd_prod;
> > +	u32 wr_issue;
> > +	u32 rd_issue;
> > +
> > +	spinlock_t ep_tx_lock;
> > +	spinlock_t ep_rx_lock;
> > +	spinlock_t rc_lock;
> > +
> > +	/* Completion work for read/write transfers. */
> > +	struct work_struct read_work;
> > +	struct work_struct write_work;
> > +#endif
> >  };
> >
> >  struct ntb_transport_mw {
> > @@ -249,6 +296,13 @@ struct ntb_transport_ctx {
> >
> >  	/* Make sure workq of link event be executed serially */
> >  	struct mutex link_event_lock;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	remote_edma_mode_t remote_edma_mode;
> > +	struct device *dma_dev;
> > +	struct workqueue_struct *wq;
> > +	struct ntb_edma_chans edma;
> > +#endif
> >  };
> >
> >  enum {
> > @@ -262,6 +316,19 @@ struct ntb_payload_header {
> >  	unsigned int flags;
> >  };
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt);
> > +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> > +				   unsigned int *mw_count);
> > +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num);
> > +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> > +					    struct ntb_transport_qp *qp);
> > +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt);
> > +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt);
> > +static void ntb_transport_edma_rc_dma_work(struct work_struct *work);
> > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > +
> >  /*
> >   * Return the device that should be used for DMA mapping.
> >   *
> > @@ -298,7 +365,7 @@ enum {
> >  	container_of((__drv), struct ntb_transport_client, driver)
> >
> >  #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
> > -#define NTB_QP_DEF_NUM_ENTRIES	100
> > +#define NTB_QP_DEF_NUM_ENTRIES	128
> >  #define NTB_LINK_DOWN_TIMEOUT	10
> >
> >  static void ntb_transport_rxc_db(unsigned long data);
> > @@ -1015,6 +1082,10 @@ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
> >  	count = ntb_spad_count(nt->ndev);
> >  	for (i = 0; i < count; i++)
> >  		ntb_spad_write(nt->ndev, i, 0);
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_edma_teardown_chans(&nt->edma);
> > +#endif
> >  }
> >
> >  static void ntb_transport_link_cleanup_work(struct work_struct *work)
> > @@ -1051,6 +1122,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >
> >  	/* send the local info, in the opposite order of the way we read it */
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	rc = ntb_transport_edma_ep_init(nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
> > +		return;
> > +	}
> > +#endif
> > +
> >  	if (nt->use_msi) {
> >  		rc = ntb_msi_setup_mws(ndev);
> >  		if (rc) {
> > @@ -1132,6 +1211,14 @@ static void ntb_transport_link_work(struct work_struct *work)
> >
> >  	nt->link_is_up = true;
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	rc = ntb_transport_edma_rc_init(nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
> > +		goto out1;
> > +	}
> > +#endif
> > +
> >  	for (i = 0; i < nt->qp_count; i++) {
> >  		struct ntb_transport_qp *qp = &nt->qp_vec[i];
> >
> > @@ -1277,6 +1364,8 @@ static const struct ntb_transport_backend_ops default_backend_ops = {
> >  	.debugfs_stats_show = ntb_transport_default_debugfs_stats_show,
> >  };
> >
> > +static const struct ntb_transport_backend_ops edma_backend_ops;
> > +
> >  static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  {
> >  	struct ntb_transport_ctx *nt;
> > @@ -1311,7 +1400,23 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >
> >  	nt->ndev = ndev;
> >
> > -	nt->backend_ops = default_backend_ops;
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	if (use_remote_edma) {
> > +		rc = ntb_transport_edma_init(nt, &mw_count);
> > +		if (rc) {
> > +			nt->mw_count = 0;
> > +			goto err;
> > +		}
> > +		nt->backend_ops = edma_backend_ops;
> > +
> > +		/*
> > +		 * On remote eDMA mode, we reserve a read channel for Host->EP
> > +		 * interruption.
> > +		 */
> > +		use_msi = false;
> > +	} else
> > +#endif
> > +		nt->backend_ops = default_backend_ops;
> >
> >  	/*
> >  	 * If we are using MSI, and have at least one extra memory window,
> > @@ -1402,6 +1507,10 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  		rc = ntb_transport_init_queue(nt, i);
> >  		if (rc)
> >  			goto err2;
> > +
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +		ntb_transport_edma_init_queue(nt, i);
> > +#endif
> >  	}
> >
> >  	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
> > @@ -1433,6 +1542,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
> >  	}
> >  	kfree(nt->mw_vec);
> >  err:
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_transport_edma_uninit(nt);
> > +#endif
> >  	kfree(nt);
> >  	return rc;
> >  }
> > @@ -2055,11 +2167,16 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >
> >  	nt->qp_bitmap_free &= ~qp_bit;
> >
> > +	qp->qp_bit = qp_bit;
> >  	qp->cb_data = data;
> >  	qp->rx_handler = handlers->rx_handler;
> >  	qp->tx_handler = handlers->tx_handler;
> >  	qp->event_handler = handlers->event_handler;
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	ntb_transport_edma_create_queue(nt, qp);
> > +#endif
> > +
> >  	dma_cap_zero(dma_mask);
> >  	dma_cap_set(DMA_MEMCPY, dma_mask);
> >
> > @@ -2105,6 +2222,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> >  			goto err1;
> >
> >  		entry->qp = qp;
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> > +#endif
> >  		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> >  			     &qp->rx_free_q);
> >  	}
> > @@ -2156,8 +2276,8 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
> >   */
> >  void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >  {
> > -	struct pci_dev *pdev;
> >  	struct ntb_queue_entry *entry;
> > +	struct pci_dev *pdev;
> >  	u64 qp_bit;
> >
> >  	if (!qp)
> > @@ -2208,6 +2328,10 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> >  	tasklet_kill(&qp->rxc_db_work);
> >
> >  	cancel_delayed_work_sync(&qp->link_work);
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +	cancel_work_sync(&qp->read_work);
> > +	cancel_work_sync(&qp->write_work);
> > +#endif
> >
> >  	qp->cb_data = NULL;
> >  	qp->rx_handler = NULL;
> > @@ -2346,6 +2470,1157 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
> >  }
> >  EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
> >
> > +#ifdef CONFIG_NTB_TRANSPORT_EDMA
> > +/*
> > + * Remote eDMA mode implementation
> > + */
> > +struct ntb_edma_desc {
> > +	u32 len;
> > +	u32 flags;
> > +	u64 addr; /* DMA address */
> > +	u64 data;
> > +};
> > +
> > +struct ntb_edma_ring {
> > +	struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
> > +	u32 head;
> > +	u32 tail;
> > +};
> > +
> > +#define NTB_EDMA_DESC_OFF(i)	((size_t)(i) * sizeof(struct ntb_edma_desc))
> > +
> > +#define __NTB_EDMA_CHECK_INDEX(_i)					\
> > +({									\
> > +	unsigned long __i = (unsigned long)(_i);			\
> > +	WARN_ONCE(__i >= (unsigned long)NTB_EDMA_RING_ENTRIES,		\
> > +		  "ntb_edma: index i=%lu >= ring_entries=%lu\n",	\
> > +		  __i, (unsigned long)NTB_EDMA_RING_ENTRIES);		\
> > +	__i;								\
> > +})
> > +
> > +#define NTB_EDMA_DESC_I(qp, i, n)					\
> > +({									\
> > +	typeof(qp) __qp = (qp);						\
> > +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> > +	(struct ntb_edma_desc *)					\
> > +		((char *)(__qp)->rx_buff +				\
> > +		 (sizeof(struct ntb_edma_ring) * n) +			\
> > +		 NTB_EDMA_DESC_OFF(__i));				\
> > +})
> > +
> > +#define NTB_EDMA_DESC_O(qp, i, n)					\
> > +({									\
> > +	typeof(qp) __qp = (qp);						\
> > +	unsigned long __i = __NTB_EDMA_CHECK_INDEX(i);			\
> > +	(struct ntb_edma_desc __iomem *)				\
> > +		((char __iomem *)(__qp)->tx_mw +			\
> > +		 (sizeof(struct ntb_edma_ring) * n) +			\
> > +		 NTB_EDMA_DESC_OFF(__i));				\
> > +})
> > +
> > +#define NTB_EDMA_HEAD_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, head)))
> > +#define NTB_EDMA_HEAD_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, head)))
> > +#define NTB_EDMA_TAIL_I(qp, n) ((u32 *)((char *)qp->rx_buff +		\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, tail)))
> > +#define NTB_EDMA_TAIL_O(qp, n) ((u32 *)((char __iomem *)qp->tx_mw +	\
> > +				(sizeof(struct ntb_edma_ring) * n) +	\
> > +				offsetof(struct ntb_edma_ring, tail)))
> > +
> > +/*
> > + * Macro naming rule:
> > + *   NTB_DESC_RD_EP_I (as an example)
> > + *            ^^ ^^ ^
> > + *            :  :  `-- I(n) or O(ut). In = Read, Out = Write.
> > + *            :  `----- Who uses this macro.
> > + *            `-------- DESC / HEAD / TAIL
> > + *
> > + * Read transfers (RC->EP):
> > + *
> > + *   EP view (outbound, written via NTB):
> > + *       - descs: NTB_DESC_RD_EP_O(qp, i) / NTB_DESC_RD_EP_I(qp, i)
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           :
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *       - head: NTB_HEAD_RD_EP_O(qp)
> > + *       - tail: NTB_TAIL_RD_EP_I(qp)
> > + *
> > + *   RC view (inbound, local mapping):
> > + *       - descs: NTB_DESC_RD_RC_I(qp, i) / NTB_DESC_RD_RC_O(qp, i)
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *           :
> > + *           [ len ][ flags ][ addr ][ data ]
> > + *       - head: NTB_HEAD_RD_RC_I(qp)
> > + *       - tail: NTB_TAIL_RD_RC_O(qp)
> > + *
> > + * Write transfers (EP -> RC) are analogous but use
> > + * NTB_DESC_WR_{EP_O,RC_I}(), NTB_HEAD_WR_{EP_O,RC_I}(),
> > + * and NTB_TAIL_WR_{EP_I,RC_O}().
> > + */
> > +#define NTB_DESC_RD_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> > +#define NTB_DESC_RD_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> > +#define NTB_DESC_WR_EP_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> > +#define NTB_DESC_WR_EP_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> > +#define NTB_DESC_RD_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 0)
> > +#define NTB_DESC_RD_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 0)
> > +#define NTB_DESC_WR_RC_I(qp, i)	NTB_EDMA_DESC_I(qp, i, 1)
> > +#define NTB_DESC_WR_RC_O(qp, i)	NTB_EDMA_DESC_O(qp, i, 1)
> > +
> > +#define NTB_HEAD_RD_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 0)
> > +#define NTB_HEAD_WR_EP_O(qp)	NTB_EDMA_HEAD_O(qp, 1)
> > +#define NTB_HEAD_RD_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 0)
> > +#define NTB_HEAD_WR_RC_I(qp)	NTB_EDMA_HEAD_I(qp, 1)
> > +
> > +#define NTB_TAIL_RD_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 0)
> > +#define NTB_TAIL_WR_EP_I(qp)	NTB_EDMA_TAIL_I(qp, 1)
> > +#define NTB_TAIL_RD_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 0)
> > +#define NTB_TAIL_WR_RC_O(qp)	NTB_EDMA_TAIL_O(qp, 1)
> > +
> > +static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
> > +{
> > +	return qp->transport->remote_edma_mode == REMOTE_EDMA_RC;
> > +}
> > +
> > +static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
> > +{
> > +	return qp->transport->remote_edma_mode == REMOTE_EDMA_EP;
> > +}
> > +
> > +static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
> > +{
> > +	return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
> > +}
> > +
> > +static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
> > +{
> > +	unsigned int head, tail;
> > +
> > +	if (ntb_qp_edma_is_ep(qp)) {
> > +		scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> > +			/* In this scope, only 'head' might proceed */
> > +			tail = READ_ONCE(qp->wr_cons);
> > +			head = READ_ONCE(qp->wr_prod);
> > +		}
> > +		return ntb_edma_ring_free_entry(head, tail);
> > +	}
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> > +		/* In this scope, only 'head' might proceed */
> > +		tail = READ_ONCE(qp->rd_issue);
> > +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> > +	}
> > +	/*
> > +	 * On RC side, 'used' amount indicates how much EP side
> > +	 * has refilled, which are available for us to use for TX.
> > +	 */
> > +	return ntb_edma_ring_used_entry(head, tail);
> > +}
> > +
> > +static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
> > +						  struct ntb_transport_qp *qp)
> > +{
> > +	seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
> > +	seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
> > +	seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
> > +	seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
> > +	seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
> > +	seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
> > +
> > +	seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
> > +	seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
> > +	seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
> > +	seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
> > +	seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
> > +	seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
> > +	seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
> > +	seq_putc(s, '\n');
> > +
> > +	seq_puts(s, "Using Remote eDMA - Yes\n");
> > +	seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
> > +}
> > +
> > +static void ntb_transport_edma_uninit(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +
> > +	if (nt->remote_edma_mode == REMOTE_EDMA_EP && ndev && ndev->pdev)
> > +		ntb_edma_teardown_isr(&ndev->pdev->dev);
> > +
> /pr> +	if (nt->wq)
> > +		destroy_workqueue(nt->wq);
> > +	nt->wq = NULL;
> > +}
> > +
> > +static int ntb_transport_edma_init(struct ntb_transport_ctx *nt,
> > +				   unsigned int *mw_count)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +
> > +	/*
> > +	 * We need at least one MW for the transport plus one MW reserved
> > +	 * for the remote eDMA window (see ntb_edma_setup_mws/peer).
> > +	 */
> > +	if (*mw_count <= 1) {
> > +		dev_err(&ndev->dev,
> > +			"remote eDMA requires at least two MWS (have %u)\n",
> > +			*mw_count);
> > +		return -ENODEV;
> > +	}
> > +
> > +	nt->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
> > +	if (!nt->wq) {
> > +		ntb_transport_edma_uninit(nt);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* Reserve the last peer MW exclusively for the eDMA window. */
> > +	*mw_count -= 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static void ntb_transport_edma_db_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp =
> > +			container_of(work, struct ntb_transport_qp, db_work);
> > +
> > +	ntb_peer_db_set(qp->ndev, qp->qp_bit);
> > +}
> > +
> > +static void ntb_transport_edma_notify_peer(struct ntb_transport_qp *qp)
> > +{
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		if (!ntb_edma_notify_peer(&qp->transport->edma, qp->qp_num))
> > +			return;
> > +
> > +	/*
> > +	 * Called from contexts that may be atomic. Since ntb_peer_db_set()
> > +	 * may sleep, delegate the actual doorbell write to a workqueue.
> > +	 */
> > +	queue_work(system_highpri_wq, &qp->db_work);
> > +}
> > +
> > +static void ntb_transport_edma_isr(void *data, int qp_num)
> > +{
> > +	struct ntb_transport_ctx *nt = data;
> > +	struct ntb_transport_qp *qp;
> > +
> > +	if (qp_num < 0 || qp_num >= nt->qp_count)
> > +		return;
> > +
> > +	qp = &nt->qp_vec[qp_num];
> > +	if (WARN_ON(!qp))
> > +		return;
> > +
> > +	queue_work(nt->wq, &qp->read_work);
> > +	queue_work(nt->wq, &qp->write_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct pci_dev *pdev = ndev->pdev;
> > +	int rc;
> > +
> > +	if (!use_remote_edma || nt->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
> > +		return 0;
> > +
> > +	rc = ntb_edma_setup_peer(ndev);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	rc = ntb_edma_setup_chans(get_dma_dev(ndev), &nt->edma);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	nt->remote_edma_mode = REMOTE_EDMA_RC;
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
> > +{
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct pci_dev *pdev = ndev->pdev;
> > +	struct pci_epc *epc;
> > +	int rc;
> > +
> > +	if (!use_remote_edma || nt->remote_edma_mode == REMOTE_EDMA_EP)
> > +		return 0;
> > +
> > +	/* Only EP side can return pci_epc */
> > +	epc = ntb_get_pci_epc(ndev);
> > +	if (!epc)
> > +		return 0;
> > +
> > +	rc = ntb_edma_setup_mws(ndev);
> > +	if (rc) {
> > +		dev_err(&pdev->dev,
> > +			"Failed to set up memory window for eDMA: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	rc = ntb_edma_setup_isr(&pdev->dev, &epc->dev, ntb_transport_edma_isr, nt);
> > +	if (rc) {
> > +		dev_err(&pdev->dev, "Failed to setup eDMA ISR (%d)\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	nt->remote_edma_mode = REMOTE_EDMA_EP;
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num)
> > +{
> > +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> > +	struct ntb_dev *ndev = nt->ndev;
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_transport_mw *mw;
> > +	unsigned int mw_num, mw_count, qp_count;
> > +	unsigned int qp_offset, rx_info_offset;
> > +	unsigned int mw_size, mw_size_per_qp;
> > +	unsigned int num_qps_mw;
> > +	size_t edma_total;
> > +	unsigned int i;
> > +	int node;
> > +
> > +	mw_count = nt->mw_count;
> > +	qp_count = nt->qp_count;
> > +
> > +	mw_num = QP_TO_MW(nt, qp_num);
> > +	mw = &nt->mw_vec[mw_num];
> > +
> > +	if (!mw->virt_addr)
> > +		return -ENOMEM;
> > +
> > +	if (mw_num < qp_count % mw_count)
> > +		num_qps_mw = qp_count / mw_count + 1;
> > +	else
> > +		num_qps_mw = qp_count / mw_count;
> > +
> > +	mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
> > +	if (max_mw_size && mw_size > max_mw_size)
> > +		mw_size = max_mw_size;
> > +
> > +	mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
> > +	qp_offset = mw_size_per_qp * (qp_num / mw_count);
> > +	rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
> > +
> > +	qp->tx_mw_size = mw_size_per_qp;
> > +	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> > +	if (!qp->tx_mw)
> > +		return -EINVAL;
> > +	qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
> > +	if (!qp->tx_mw_phys)
> > +		return -EINVAL;
> > +	qp->rx_info = qp->tx_mw + rx_info_offset;
> > +	qp->rx_buff = mw->virt_addr + qp_offset;
> > +	qp->remote_rx_info = qp->rx_buff + rx_info_offset;
> > +
> > +	/* Due to housekeeping, there must be at least 2 buffs */
> > +	qp->tx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> > +	qp->rx_max_frame = min(transport_mtu, mw_size_per_qp / 2);
> > +
> > +	/* In eDMA mode, decouple from MW sizing and force ring-sized entries */
> > +	edma_total = 2 * sizeof(struct ntb_edma_ring);
> > +	if (rx_info_offset < edma_total) {
> > +		dev_err(&ndev->dev, "Ring space requires %luB (>=%uB)\n",
> > +			edma_total, rx_info_offset);
> > +		return -EINVAL;
> > +	}
> > +	qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
> > +	qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
> > +
> > +	/*
> > +	 * Checking to see if we have more entries than the default.
> > +	 * We should add additional entries if that is the case so we
> > +	 * can be in sync with the transport frames.
> > +	 */
> > +	node = dev_to_node(&ndev->dev);
> > +	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
> > +		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
> > +		if (!entry)
> > +			return -ENOMEM;
> > +
> > +		entry->qp = qp;
> > +		INIT_WORK(&entry->dma_work, ntb_transport_edma_rc_dma_work);
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> > +			     &qp->rx_free_q);
> > +		qp->rx_alloc_entry++;
> > +	}
> > +
> > +	memset(qp->rx_buff, 0, edma_total);
> > +
> > +	qp->rx_pkts = 0;
> > +	qp->tx_pkts = 0;
> > +
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_read_complete(struct ntb_transport_qp *qp)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	unsigned int len;
> > +	u32 idx;
> > +
> > +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RD_EP_I(qp)),
> > +				     qp->rd_cons) == 0)
> > +		return 0;
> > +
> > +	idx = ntb_edma_ring_idx(qp->rd_cons);
> > +	in = NTB_DESC_RD_EP_I(qp, idx);
> > +	if (!(in->flags & DESC_DONE_FLAG))
> > +		return 0;
> > +
> > +	in->flags = 0;
> > +	len = in->len; /* might be smaller than entry->len */
> > +
> > +	entry = (struct ntb_queue_entry *)(in->data);
> > +	if (WARN_ON(!entry))
> > +		return 0;
> > +
> > +	if (in->flags & LINK_DOWN_FLAG) {
> > +		ntb_qp_link_down(qp);
> > +		qp->rd_cons++;
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +		return 1;
> > +	}
> > +
> > +	dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_FROM_DEVICE);
> > +
> > +	qp->rx_bytes += len;
> > +	qp->rx_pkts++;
> > +	qp->rd_cons++;
> > +
> > +	if (qp->rx_handler && qp->client_ready)
> > +		qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
> > +
> > +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +	return 1;
> > +}
> > +
> > +static int ntb_transport_edma_ep_write_complete(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	u32 idx;
> > +
> > +	if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_WR_EP_I(qp)),
> > +				     qp->wr_cons) == 0)
> > +		return 0;
> > +
> > +	idx = ntb_edma_ring_idx(qp->wr_cons);
> > +	in = NTB_DESC_WR_EP_I(qp, idx);
> > +
> > +	entry = (struct ntb_queue_entry *)(in->data);
> > +	if (WARN_ON(!entry))
> > +		return 0;
> > +
> > +	qp->wr_cons++;
> > +
> > +	if (qp->tx_handler)
> > +		qp->tx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
> > +
> > +	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
> > +	return 1;
> > +}
> > +
> > +static void ntb_transport_edma_ep_read_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> > +		if (!ntb_transport_edma_ep_read_complete(qp))
> > +			break;
> > +	}
> > +
> > +	if (ntb_transport_edma_ep_read_complete(qp))
> > +		queue_work(qp->transport->wq, &qp->read_work);
> > +}
> > +
> > +static void ntb_transport_edma_ep_write_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
> > +		if (!ntb_transport_edma_ep_write_complete(qp))
> > +			break;
> > +	}
> > +
> > +	if (ntb_transport_edma_ep_write_complete(qp))
> > +		queue_work(qp->transport->wq, &qp->write_work);
> > +}
> > +
> > +static void ntb_transport_edma_rc_write_complete_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	unsigned int len;
> > +	void *cb_data;
> > +	u32 idx;
> > +
> > +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->wr_issue),
> > +					qp->wr_cons) != 0) {
> > +		/* Paired with smp_wmb() in ntb_transport_edma_rc_poll() */
> > +		smp_rmb();
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_cons);
> > +		in = NTB_DESC_WR_RC_I(qp, idx);
> > +		entry = (struct ntb_queue_entry *)READ_ONCE(in->data);
> > +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> > +			break;
> > +
> > +		in->data = 0;
> > +
> > +		cb_data = entry->cb_data;
> > +		len = entry->len;
> > +
> > +		iowrite32(++qp->wr_cons, NTB_TAIL_WR_RC_O(qp));
> > +
> > +		if (unlikely(entry->flags & LINK_DOWN_FLAG)) {
> > +			ntb_qp_link_down(qp);
> > +			continue;
> > +		}
> > +
> > +		ntb_transport_edma_notify_peer(qp);
> > +
> > +		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
> > +
> > +		if (qp->rx_handler && qp->client_ready)
> > +			qp->rx_handler(qp, qp->cb_data, cb_data, len);
> > +
> > +		/* stat updates */
> > +		qp->rx_bytes += len;
> > +		qp->rx_pkts++;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_write_cb(void *data,
> > +					   const struct dmaengine_result *res)
> > +{
> > +	struct ntb_queue_entry *entry = data;
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	enum dmaengine_tx_result dma_err = res->result;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +
> > +	switch (dma_err) {
> > +	case DMA_TRANS_READ_FAILED:
> > +	case DMA_TRANS_WRITE_FAILED:
> > +	case DMA_TRANS_ABORTED:
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		break;
> > +	case DMA_TRANS_NOERROR:
> > +	default:
> > +		break;
> > +	}
> > +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_FROM_DEVICE);
> > +	sg_dma_address(&entry->sgl) = 0;
> > +
> > +	entry->flags |= DESC_DONE_FLAG;
> > +
> > +	queue_work(nt->wq, &qp->write_work);
> > +}
> > +
> > +static void ntb_transport_edma_rc_read_complete_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	struct ntb_queue_entry *entry;
> > +	unsigned int len;
> > +	void *cb_data;
> > +	u32 idx;
> > +
> > +	while (ntb_edma_ring_used_entry(READ_ONCE(qp->rd_issue),
> > +					qp->rd_cons) != 0) {
> > +		/* Paired with smp_wmb() in ntb_transport_edma_rc_tx_enqueue() */
> > +		smp_rmb();
> > +
> > +		idx = ntb_edma_ring_idx(qp->rd_cons);
> > +		in = NTB_DESC_RD_RC_I(qp, idx);
> > +		entry = (struct ntb_queue_entry *)in->data;
> > +		if (!entry || !(entry->flags & DESC_DONE_FLAG))
> > +			break;
> > +
> > +		in->data = 0;
> > +
> > +		cb_data = entry->cb_data;
> > +		len = entry->len;
> > +
> > +		out = NTB_DESC_RD_RC_O(qp, idx);
> > +
> > +		WRITE_ONCE(qp->rd_cons, qp->rd_cons + 1);
> > +
> > +		/*
> > +		 * No need to add barrier in-between to enforce ordering here.
> > +		 * The other side proceeds only after both flags and tail are
> > +		 * updated.
> > +		 */
> > +		iowrite32(entry->flags, &out->flags);
> > +		iowrite32(qp->rd_cons, NTB_TAIL_RD_RC_O(qp));
> > +
> > +		ntb_transport_edma_notify_peer(qp);
> > +
> > +		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
> > +			     &qp->tx_free_q);
> > +
> > +		if (qp->tx_handler)
> > +			qp->tx_handler(qp, qp->cb_data, cb_data, len);
> > +
> > +		/* stat updates */
> > +		qp->tx_bytes += len;
> > +		qp->tx_pkts++;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_read_cb(void *data,
> > +					  const struct dmaengine_result *res)
> > +{
> > +	struct ntb_queue_entry *entry = data;
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	enum dmaengine_tx_result dma_err = res->result;
> > +
> > +	switch (dma_err) {
> > +	case DMA_TRANS_READ_FAILED:
> > +	case DMA_TRANS_WRITE_FAILED:
> > +	case DMA_TRANS_ABORTED:
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		break;
> > +	case DMA_TRANS_NOERROR:
> > +	default:
> > +		break;
> > +	}
> > +	dma_unmap_sg(dma_dev, &entry->sgl, 1, DMA_TO_DEVICE);
> > +	sg_dma_address(&entry->sgl) = 0;
> > +
> > +	entry->flags |= DESC_DONE_FLAG;
> > +
> > +	queue_work(nt->wq, &qp->read_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_write_start(struct device *d,
> > +					     struct dma_chan *chan, size_t len,
> > +					     dma_addr_t ep_src, void *rc_dst,
> > +					     struct ntb_queue_entry *entry)
> > +{
> > +	struct scatterlist *sgl = &entry->sgl;
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	dma_cookie_t cookie;
> > +	int nents, rc;
> > +
> > +	if (!d)
> > +		return -ENODEV;
> > +
> > +	if (!chan)
> > +		return -ENXIO;
> > +
> > +	if (WARN_ON(!ep_src || !rc_dst))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(sg_dma_address(sgl)))
> > +		return -EINVAL;
> > +
> > +	sg_init_one(sgl, rc_dst, len);
> > +	nents = dma_map_sg(d, sgl, 1, DMA_FROM_DEVICE);
> > +	if (nents <= 0)
> > +		return -EIO;
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.src_addr       = ep_src;
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_DEV_TO_MEM;
> > +	rc = dmaengine_slave_config(chan, &cfg);
> > +	if (rc)
> > +		goto out_unmap;
> > +
> > +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +
> > +	txd->callback_result = ntb_transport_edma_rc_write_cb;
> > +	txd->callback_param = entry;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie)) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +	dma_async_issue_pending(chan);
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_sg(d, sgl, 1, DMA_FROM_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_rc_read_start(struct device *d,
> > +					    struct dma_chan *chan, size_t len,
> > +					    void *rc_src, dma_addr_t ep_dst,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct scatterlist *sgl = &entry->sgl;
> > +	struct dma_async_tx_descriptor *txd;
> > +	struct dma_slave_config cfg;
> > +	dma_cookie_t cookie;
> > +	int nents, rc;
> > +
> > +	if (!d)
> > +		return -ENODEV;
> > +
> > +	if (!chan)
> > +		return -ENXIO;
> > +
> > +	if (WARN_ON(!rc_src || !ep_dst))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(sg_dma_address(sgl)))
> > +		return -EINVAL;
> > +
> > +	sg_init_one(sgl, rc_src, len);
> > +	nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
> > +	if (nents <= 0)
> > +		return -EIO;
> > +
> > +	memset(&cfg, 0, sizeof(cfg));
> > +	cfg.dst_addr       = ep_dst;
> > +	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +	cfg.direction      = DMA_MEM_TO_DEV;
> > +	rc = dmaengine_slave_config(chan, &cfg);
> > +	if (rc)
> > +		goto out_unmap;
> > +
> > +	txd = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
> > +				      DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
> > +	if (!txd) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +
> > +	txd->callback_result = ntb_transport_edma_rc_read_cb;
> > +	txd->callback_param = entry;
> > +
> > +	cookie = dmaengine_submit(txd);
> > +	if (dma_submit_error(cookie)) {
> > +		rc = -EIO;
> > +		goto out_unmap;
> > +	}
> > +	dma_async_issue_pending(chan);
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static void ntb_transport_edma_rc_dma_work(struct work_struct *work)
> > +{
> > +	struct ntb_queue_entry *entry = container_of(
> > +				work, struct ntb_queue_entry, dma_work);
> > +	struct ntb_transport_qp *qp = entry->qp;
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct dma_chan *chan;
> > +	int rc;
> > +
> > +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_WRITE);
> > +	rc = ntb_transport_edma_rc_write_start(dma_dev, chan, entry->len,
> > +					       entry->addr, entry->buf, entry);
> > +	if (rc) {
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->write_work);
> > +		return;
> > +	}
> > +}
> > +
> > +static void ntb_transport_edma_rc_poll(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	unsigned int budget = NTB_EDMA_MAX_POLL;
> > +	struct ntb_queue_entry *entry;
> > +	struct ntb_edma_desc *in;
> > +	dma_addr_t ep_src;
> > +	u32 len, idx;
> > +
> > +	while (budget--) {
> > +		if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_HEAD_WR_RC_I(qp)),
> > +					     qp->wr_issue) == 0)
> > +			break;
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_issue);
> > +		in = NTB_DESC_WR_RC_I(qp, idx);
> > +
> > +		len = READ_ONCE(in->len);
> > +		ep_src = (dma_addr_t)READ_ONCE(in->addr);
> > +
> > +		/* Prepare 'entry' for write completion */
> > +		entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
> > +		if (!entry) {
> > +			qp->rx_err_no_buf++;
> > +			break;
> > +		}
> > +		if (WARN_ON(entry->flags & DESC_DONE_FLAG))
> > +			entry->flags &= ~DESC_DONE_FLAG;
> > +		entry->len = len; /* NB. entry->len can be <=0 */
> > +		entry->addr = ep_src;
> > +
> > +		/*
> > +		 * ntb_transport_edma_rc_write_complete_work() checks entry->flags
> > +		 * so it needs to be set before wr_issue++.
> > +		 */
> > +		in->data = (uintptr_t)entry;
> > +
> > +		/* Ensure in->data visible before wr_issue++ */
> > +		smp_wmb();
> > +
> > +		WRITE_ONCE(qp->wr_issue, qp->wr_issue + 1);
> > +
> > +		if (!len) {
> > +			entry->flags |= DESC_DONE_FLAG;
> > +			queue_work(nt->wq, &qp->write_work);
> > +			continue;
> > +		}
> > +
> > +		if (in->flags & LINK_DOWN_FLAG) {
> > +			dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
> > +			entry->flags |= DESC_DONE_FLAG | LINK_DOWN_FLAG;
> > +			queue_work(nt->wq, &qp->write_work);
> > +			continue;
> > +		}
> > +
> > +		queue_work(nt->wq, &entry->dma_work);
> > +	}
> > +
> > +	if (!budget)
> > +		tasklet_schedule(&qp->rxc_db_work);
> > +}
> > +
> > +static int ntb_transport_edma_rc_tx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	struct dma_chan *chan;
> > +	u32 issue, idx, head;
> > +	dma_addr_t ep_dst;
> > +	int rc;
> > +
> > +	WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->rc_lock) {
> > +		head = READ_ONCE(*NTB_HEAD_RD_RC_I(qp));
> > +		issue = qp->rd_issue;
> > +		if (ntb_edma_ring_used_entry(head, issue) == 0) {
> > +			qp->tx_ring_full++;
> > +			return -ENOSPC;
> > +		}
> > +
> > +		/*
> > +		 * ntb_transport_edma_rc_read_complete_work() checks entry->flags
> > +		 * so it needs to be set before rd_issue++.
> > +		 */
> > +		idx = ntb_edma_ring_idx(issue);
> > +		in = NTB_DESC_RD_RC_I(qp, idx);
> > +		in->data = (uintptr_t)entry;
> > +
> > +		/* Make in->data visible before rd_issue++ */
> > +		smp_wmb();
> > +
> > +		WRITE_ONCE(qp->rd_issue, qp->rd_issue + 1);
> > +	}
> > +
> > +	/* Publish the final transfer length to the EP side */
> > +	out = NTB_DESC_RD_RC_O(qp, idx);
> > +	iowrite32(len, &out->len);
> > +	ioread32(&out->len);
> > +
> > +	if (unlikely(!len)) {
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->read_work);
> > +		return 0;
> > +	}
> > +
> > +	/* Paired with dma_wmb() in ntb_transport_edma_ep_rx_enqueue() */
> > +	dma_rmb();
> > +
> > +	/* kick remote eDMA read transfer */
> > +	ep_dst = (dma_addr_t)in->addr;
> > +	chan = ntb_edma_pick_chan(&nt->edma, REMOTE_EDMA_READ);
> > +	rc = ntb_transport_edma_rc_read_start(dma_dev, chan, len,
> > +					      entry->buf, ep_dst, entry);
> > +	if (rc) {
> > +		entry->errors++;
> > +		entry->len = -EIO;
> > +		entry->flags |= DESC_DONE_FLAG;
> > +		queue_work(nt->wq, &qp->read_work);
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int ntb_transport_edma_ep_tx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	dma_addr_t ep_src = 0;
> > +	u32 idx;
> > +	int rc;
> > +
> > +	if (likely(len)) {
> > +		ep_src = dma_map_single(dma_dev, entry->buf, len,
> > +					DMA_TO_DEVICE);
> > +		rc = dma_mapping_error(dma_dev, ep_src);
> > +		if (rc)
> > +			return rc;
> > +	}
> > +
> > +	scoped_guard(spinlock_irqsave, &qp->ep_tx_lock) {
> > +		if (ntb_edma_ring_full(qp->wr_prod, qp->wr_cons)) {
> > +			rc = -ENOSPC;
> > +			qp->tx_ring_full++;
> > +			goto out_unmap;
> > +		}
> > +
> > +		idx = ntb_edma_ring_idx(qp->wr_prod);
> > +		in  = NTB_DESC_WR_EP_I(qp, idx);
> > +		out = NTB_DESC_WR_EP_O(qp, idx);
> > +
> > +		WARN_ON(in->flags & DESC_DONE_FLAG);
> > +		WARN_ON(entry->flags & DESC_DONE_FLAG);
> > +		in->flags = 0;
> > +		in->data  = (uintptr_t)entry;
> > +		entry->addr  = ep_src;
> > +
> > +		iowrite32(len,          &out->len);
> > +		iowrite32(entry->flags, &out->flags);
> > +		iowrite64(ep_src,       &out->addr);
> > +		WRITE_ONCE(qp->wr_prod, qp->wr_prod + 1);
> > +
> > +		dma_wmb();
> > +		iowrite32(qp->wr_prod, NTB_HEAD_WR_EP_O(qp));
> > +
> > +		qp->tx_bytes += len;
> > +		qp->tx_pkts++;
> > +	}
> > +
> > +	ntb_transport_edma_notify_peer(qp);
> > +
> > +	return 0;
> > +out_unmap:
> > +	if (likely(len))
> > +		dma_unmap_single(dma_dev, ep_src, len, DMA_TO_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
> > +					 struct ntb_queue_entry *entry,
> > +					 void *cb, void *data, unsigned int len,
> > +					 unsigned int flags)
> > +{
> > +	struct device *dma_dev;
> > +
> > +	if (entry->addr) {
> > +		/* Deferred unmap */
> > +		dma_dev = get_dma_dev(qp->ndev);
> > +		dma_unmap_single(dma_dev, entry->addr, entry->len, DMA_TO_DEVICE);
> > +	}
> > +
> > +	entry->cb_data = cb;
> > +	entry->buf = data;
> > +	entry->len = len;
> > +	entry->flags = flags;
> > +	entry->errors = 0;
> > +	entry->addr = 0;
> > +
> > +	WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
> > +
> > +	if (ntb_qp_edma_is_ep(qp))
> > +		return ntb_transport_edma_ep_tx_enqueue(qp, entry);
> > +	else
> > +		return ntb_transport_edma_rc_tx_enqueue(qp, entry);
> > +}
> > +
> > +static int ntb_transport_edma_ep_rx_enqueue(struct ntb_transport_qp *qp,
> > +					    struct ntb_queue_entry *entry)
> > +{
> > +	struct device *dma_dev = get_dma_dev(qp->ndev);
> > +	struct ntb_edma_desc *in, __iomem *out;
> > +	unsigned int len = entry->len;
> > +	void *data = entry->buf;
> > +	dma_addr_t ep_dst;
> > +	u32 idx;
> > +	int rc;
> > +
> > +	ep_dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
> > +	rc = dma_mapping_error(dma_dev, ep_dst);
> > +	if (rc)
> > +		return rc;
> > +
> > +	scoped_guard(spinlock_bh, &qp->ep_rx_lock) {
> > +		if (ntb_edma_ring_full(READ_ONCE(qp->rd_prod),
> > +				       READ_ONCE(qp->rd_cons))) {
> > +			rc = -ENOSPC;
> > +			goto out_unmap;
> > +		}
> > +
> > +		idx = ntb_edma_ring_idx(qp->rd_prod);
> > +		in = NTB_DESC_RD_EP_I(qp, idx);
> > +		out = NTB_DESC_RD_EP_O(qp, idx);
> > +
> > +		iowrite32(len, &out->len);
> > +		iowrite64(ep_dst, &out->addr);
> > +
> > +		WARN_ON(in->flags & DESC_DONE_FLAG);
> > +		in->data = (uintptr_t)entry;
> > +		entry->addr = ep_dst;
> > +
> > +		/* Ensure len/addr are visible before the head update */
> > +		dma_wmb();
> > +
> > +		WRITE_ONCE(qp->rd_prod, qp->rd_prod + 1);
> > +		iowrite32(qp->rd_prod, NTB_HEAD_RD_EP_O(qp));
> > +	}
> > +	return 0;
> > +out_unmap:
> > +	dma_unmap_single(dma_dev, ep_dst, len, DMA_FROM_DEVICE);
> > +	return rc;
> > +}
> > +
> > +static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
> > +					 struct ntb_queue_entry *entry)
> > +{
> > +	int rc;
> > +
> > +	/* The behaviour is the same as the default backend for RC side */
> > +	if (ntb_qp_edma_is_ep(qp)) {
> > +		rc = ntb_transport_edma_ep_rx_enqueue(qp, entry);
> > +		if (rc) {
> > +			ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
> > +				     &qp->rx_free_q);
> > +			return rc;
> > +		}
> > +	}
> > +
> > +	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
> > +
> > +	if (qp->active)
> > +		tasklet_schedule(&qp->rxc_db_work);
> > +
> > +	return 0;
> > +}
> > +
> > +static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
> > +{
> > +	struct ntb_transport_ctx *nt = qp->transport;
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_poll(qp);
> > +	else if (ntb_qp_edma_is_ep(qp)) {
> > +		/*
> > +		 * Make sure we poll the rings even if an eDMA interrupt is
> > +		 * cleared on the RC side earlier.
> > +		 */
> > +		queue_work(nt->wq, &qp->read_work);
> > +		queue_work(nt->wq, &qp->write_work);
> > +	} else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_read_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, read_work);
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_read_complete_work(work);
> > +	else if (ntb_qp_edma_is_ep(qp))
> > +		ntb_transport_edma_ep_read_work(work);
> > +	else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_write_work(struct work_struct *work)
> > +{
> > +	struct ntb_transport_qp *qp = container_of(
> > +				work, struct ntb_transport_qp, write_work);
> > +
> > +	if (ntb_qp_edma_is_rc(qp))
> > +		ntb_transport_edma_rc_write_complete_work(work);
> > +	else if (ntb_qp_edma_is_ep(qp))
> > +		ntb_transport_edma_ep_write_work(work);
> > +	else
> > +		/* Unreachable */
> > +		WARN_ON_ONCE(1);
> > +}
> > +
> > +static void ntb_transport_edma_init_queue(struct ntb_transport_ctx *nt,
> > +					  unsigned int qp_num)
> > +{
> > +	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
> > +
> > +	qp->wr_cons = 0;
> > +	qp->rd_cons = 0;
> > +	qp->wr_prod = 0;
> > +	qp->rd_prod = 0;
> > +	qp->wr_issue = 0;
> > +	qp->rd_issue = 0;
> > +
> > +	INIT_WORK(&qp->db_work, ntb_transport_edma_db_work);
> > +	INIT_WORK(&qp->read_work, ntb_transport_edma_read_work);
> > +	INIT_WORK(&qp->write_work, ntb_transport_edma_write_work);
> > +}
> > +
> > +static void ntb_transport_edma_create_queue(struct ntb_transport_ctx *nt,
> > +					    struct ntb_transport_qp *qp)
> > +{
> > +	spin_lock_init(&qp->ep_tx_lock);
> > +	spin_lock_init(&qp->ep_rx_lock);
> > +	spin_lock_init(&qp->rc_lock);
> > +}
> > +
> > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > +	.rx_poll = ntb_transport_edma_rx_poll,
> > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > +};
> > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > +
> >  /**
> >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> >   * @qp: NTB transport layer queue to be enabled
> > --
> > 2.48.1
> >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Frank Li 2 months, 1 week ago
On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > located on the NTB endpoint to move data between host and endpoint.
> > >
...
> > > +#include "ntb_edma.h"
> > > +
> > > +/*
> > > + * The interrupt register offsets below are taken from the DesignWare
> > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > + * backend currently only supports this layout.
> > > + */
> > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> >
> > Not sure why need access EDMA register because EMDA driver already export
> > as dmaengine driver.
>
> These are intended for EP use. In my current design I intentionally don't
> use the standard dw-edma dmaengine driver on the EP side.

why not?

>
> >
> > > +
> > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > +
...
> > > +
> > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > +	of_node_put(parent);
> > > +	return (virq > 0) ? virq : -EINVAL;
> > > +}
> > > +
> > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > +{
> >
> > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > just register callback for dmeengine.
>
> If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> callbacks handle int_status/int_clear, I think we could hit races. One side
> might clear a status bit before the other side has a chance to see it and
> invoke its callback. Please correct me if I'm missing something here.

You should use difference channel?

>
> To avoid that, in my current implementation, the RC side handles the
> status/int_clear registers in the usual way, and the EP side only tries to
> suppress needless edma_int as much as possible.
>
> That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> That would require some changes on dw-edma core.

If dw-edma work as remote DMA, which should enable RIE. like
dw-edma-pcie.c, but not one actually use it recently.

Use EDMA as doorbell should be new case and I think it is quite useful.

> >
> > > +	struct ntb_edma_interrupt *v = data;
> > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > +	u32 i, val;
> > > +
...
> > > +	ret = dw_edma_probe(chip);
> >
> > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > dma engine support.
> >
> > EP side, suppose default dwc controller driver already setup edma engine,
> > so use correct filter function, you should get dma chan.
>
> I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> so that RC side only manages eDMA remotely and avoids the potential race
> condition I mentioned above.

Improve eDMA core to suppport some dma channel work at local, some for
remote.

Frank
>
> Thanks for reviewing,
> Koichiro
>
> >
> > Frank
> >
> > > +	if (ret) {
> > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > +		return ret;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
...

> > > +{
> > > +	spin_lock_init(&qp->ep_tx_lock);
> > > +	spin_lock_init(&qp->ep_rx_lock);
> > > +	spin_lock_init(&qp->rc_lock);
> > > +}
> > > +
> > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > +};
> > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > +
> > >  /**
> > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > >   * @qp: NTB transport layer queue to be enabled
> > > --
> > > 2.48.1
> > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > located on the NTB endpoint to move data between host and endpoint.
> > > >
> ...
> > > > +#include "ntb_edma.h"
> > > > +
> > > > +/*
> > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > + * backend currently only supports this layout.
> > > > + */
> > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > >
> > > Not sure why need access EDMA register because EMDA driver already export
> > > as dmaengine driver.
> >
> > These are intended for EP use. In my current design I intentionally don't
> > use the standard dw-edma dmaengine driver on the EP side.
> 
> why not?

Conceptually I agree that using the standard dw-edma driver on both sides
would be attractive for future extensibility and maintainability. However,
there are a couple of concerns for me, some of which might be alleviated by
your suggestion below, and some which are more generic safety concerns that
I tried to outline in my replies to your other comments.

> 
> >
> > >
> > > > +
> > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > +
> ...
> > > > +
> > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > +	of_node_put(parent);
> > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > +}
> > > > +
> > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > +{
> > >
> > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > just register callback for dmeengine.
> >
> > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > callbacks handle int_status/int_clear, I think we could hit races. One side
> > might clear a status bit before the other side has a chance to see it and
> > invoke its callback. Please correct me if I'm missing something here.
> 
> You should use difference channel?

Do you mean something like this:
- on EP side, dw_edma_probe() only set up a dedicated channel for notification,
- on RC side, do not set up that particular channel via dw_edma_channel_setup(),
  but do other remaining channels for DMA transfers.

Also, is it generically safe to have dw_edma_probe() executed from both ends on
the same eDMA instance, as long as the channels are carefully partitioned
between them?

> 
> >
> > To avoid that, in my current implementation, the RC side handles the
> > status/int_clear registers in the usual way, and the EP side only tries to
> > suppress needless edma_int as much as possible.
> >
> > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > That would require some changes on dw-edma core.
> 
> If dw-edma work as remote DMA, which should enable RIE. like
> dw-edma-pcie.c, but not one actually use it recently.
> 
> Use EDMA as doorbell should be new case and I think it is quite useful.
> 
> > >
> > > > +	struct ntb_edma_interrupt *v = data;
> > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > +	u32 i, val;
> > > > +
> ...
> > > > +	ret = dw_edma_probe(chip);
> > >
> > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > dma engine support.
> > >
> > > EP side, suppose default dwc controller driver already setup edma engine,
> > > so use correct filter function, you should get dma chan.
> >
> > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > so that RC side only manages eDMA remotely and avoids the potential race
> > condition I mentioned above.
> 
> Improve eDMA core to suppport some dma channel work at local, some for
> remote.

Right, Firstly I experimented a bit more with different LIE/RIE settings and
ended up with the following observations:

* LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
  DMA transfer channels, the RC side never received any interrupt. The databook
  (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
  "If you want a remote interrupt and not a local interrupt then: Set LIE and
  RIE [...]", so I think this behaviour is expected.
* LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
  design, where the RC issues the DMA transfer for the notification via
  ntb_edma_notify_peer(). With RIE=0, the RC never calls
  dw_edma_core_handle_int() for that channel, which means that internal state
  such as dw_edma_chan.status is never managed correctly.

> 
> Frank
> >
> > Thanks for reviewing,
> > Koichiro
> >
> > >
> > > Frank
> > >
> > > > +	if (ret) {
> > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> ...
> 
> > > > +{
> > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > +	spin_lock_init(&qp->rc_lock);
> > > > +}
> > > > +
> > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > +};
> > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > +
> > > >  /**
> > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > >   * @qp: NTB transport layer queue to be enabled
> > > > --
> > > > 2.48.1
> > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Frank Li 2 months, 1 week ago
On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > >
> > ...
> > > > > +#include "ntb_edma.h"
> > > > > +
> > > > > +/*
> > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > + * backend currently only supports this layout.
> > > > > + */
> > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > >
> > > > Not sure why need access EDMA register because EMDA driver already export
> > > > as dmaengine driver.
> > >
> > > These are intended for EP use. In my current design I intentionally don't
> > > use the standard dw-edma dmaengine driver on the EP side.
> >
> > why not?
>
> Conceptually I agree that using the standard dw-edma driver on both sides
> would be attractive for future extensibility and maintainability. However,
> there are a couple of concerns for me, some of which might be alleviated by
> your suggestion below, and some which are more generic safety concerns that
> I tried to outline in my replies to your other comments.
>
> >
> > >
> > > >
> > > > > +
> > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > +
> > ...
> > > > > +
> > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > +	of_node_put(parent);
> > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > +}
> > > > > +
> > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > +{
> > > >
> > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > just register callback for dmeengine.
> > >
> > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > might clear a status bit before the other side has a chance to see it and
> > > invoke its callback. Please correct me if I'm missing something here.
> >
> > You should use difference channel?
>
> Do you mean something like this:
> - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
>   but do other remaining channels for DMA transfers.

Yes, it may be simple overall. Of course this will waste a channel.

>
> Also, is it generically safe to have dw_edma_probe() executed from both ends on
> the same eDMA instance, as long as the channels are carefully partitioned
> between them?

Channel register MMIO space is sperated. Some channel register shared
into one 32bit register.

But the critical one, interrupt status is w1c. So only write BIT(channel)
is safe.

Need careful handle irq enable/disable.

Or you can defer all actual DMA transfer to EP side, you can append
MSI write at last item of link to notify RC side about DMA done. (actually
RIE should do the same thing)

>
> >
> > >
> > > To avoid that, in my current implementation, the RC side handles the
> > > status/int_clear registers in the usual way, and the EP side only tries to
> > > suppress needless edma_int as much as possible.
> > >
> > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > That would require some changes on dw-edma core.
> >
> > If dw-edma work as remote DMA, which should enable RIE. like
> > dw-edma-pcie.c, but not one actually use it recently.
> >
> > Use EDMA as doorbell should be new case and I think it is quite useful.
> >
> > > >
> > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > +	u32 i, val;
> > > > > +
> > ...
> > > > > +	ret = dw_edma_probe(chip);
> > > >
> > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > dma engine support.
> > > >
> > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > so use correct filter function, you should get dma chan.
> > >
> > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > so that RC side only manages eDMA remotely and avoids the potential race
> > > condition I mentioned above.
> >
> > Improve eDMA core to suppport some dma channel work at local, some for
> > remote.
>
> Right, Firstly I experimented a bit more with different LIE/RIE settings and
> ended up with the following observations:
>
> * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
>   DMA transfer channels, the RC side never received any interrupt. The databook
>   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
>   "If you want a remote interrupt and not a local interrupt then: Set LIE and
>   RIE [...]", so I think this behaviour is expected.

Actually, you can append MSI write at last one of DMA descriptor link. So
it will not depend on eDMA's IRQ at all.

> * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
>   design, where the RC issues the DMA transfer for the notification via
>   ntb_edma_notify_peer(). With RIE=0, the RC never calls
>   dw_edma_core_handle_int() for that channel, which means that internal state
>   such as dw_edma_chan.status is never managed correctly.

If you append on MSI write at DMA link, you needn't check status register,
just check current LL pos to know which descrptor already done.

Or you also enable LIE and disable related IRQ line(without register
irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
RC side.

Frank
>
> >
> > Frank
> > >
> > > Thanks for reviewing,
> > > Koichiro
> > >
> > > >
> > > > Frank
> > > >
> > > > > +	if (ret) {
> > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > +		return ret;
> > > > > +	}
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > ...
> >
> > > > > +{
> > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > +}
> > > > > +
> > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > +};
> > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > +
> > > > >  /**
> > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > --
> > > > > 2.48.1
> > > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months, 1 week ago
On Wed, Dec 03, 2025 at 11:14:43AM -0500, Frank Li wrote:
> On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> > On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > > >
> > > ...
> > > > > > +#include "ntb_edma.h"
> > > > > > +
> > > > > > +/*
> > > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > > + * backend currently only supports this layout.
> > > > > > + */
> > > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > > >
> > > > > Not sure why need access EDMA register because EMDA driver already export
> > > > > as dmaengine driver.
> > > >
> > > > These are intended for EP use. In my current design I intentionally don't
> > > > use the standard dw-edma dmaengine driver on the EP side.
> > >
> > > why not?
> >
> > Conceptually I agree that using the standard dw-edma driver on both sides
> > would be attractive for future extensibility and maintainability. However,
> > there are a couple of concerns for me, some of which might be alleviated by
> > your suggestion below, and some which are more generic safety concerns that
> > I tried to outline in my replies to your other comments.
> >
> > >
> > > >
> > > > >
> > > > > > +
> > > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > > +
> > > ...
> > > > > > +
> > > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > > +	of_node_put(parent);
> > > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > > +}
> > > > > > +
> > > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > > +{
> > > > >
> > > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > > just register callback for dmeengine.
> > > >
> > > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > > might clear a status bit before the other side has a chance to see it and
> > > > invoke its callback. Please correct me if I'm missing something here.
> > >
> > > You should use difference channel?
> >
> > Do you mean something like this:
> > - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> > - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
> >   but do other remaining channels for DMA transfers.
> 
> Yes, it may be simple overall. Of course this will waste a channel.

So, on the EP side I see two possible approaches:

(a) Hide "dma" [1] as in [RFC PATCH v2 26/27] and call dw_edma_probe() with
    hand-crafted settings (chip->ll_rd_cnt = 1, chip->ll_wr_cnt = 0).
(b) Or, teach this special-purpose policy (i.e. configuring only a single
    notification channel) to the SoC glue driver's dw_pcie_ep_init_registers(),
    for example via Kconfig. I don't think DT is a good place to describe
    such a policy.

There is also another option, which do not necessarily run dw_edma_probe()
by ourselves:

(c) Leave the default initialization by the SoC glue as-is, and override the
    per-channel role via some new dw-edma interface, with the guarantee
    that all channels except the notification channel remain unused on its
    side afterwards. In this model, the EP side builds the LL locations
    for data transfers and the RC configures all channels, but it sets up
    the notification channel in a special manner.

[1] https://github.com/jonmason/ntb/blob/68113d260674/Documentation/devicetree/bindings/pci/snps%2Cdw-pcie-ep.yaml#L83

> 
> >
> > Also, is it generically safe to have dw_edma_probe() executed from both ends on
> > the same eDMA instance, as long as the channels are carefully partitioned
> > between them?
> 
> Channel register MMIO space is sperated. Some channel register shared
> into one 32bit register.
> 
> But the critical one, interrupt status is w1c. So only write BIT(channel)
> is safe.
> 
> Need careful handle irq enable/disable.

Yeah, I agree it is unavoidable in this model.

> 
> Or you can defer all actual DMA transfer to EP side, you can append
> MSI write at last item of link to notify RC side about DMA done. (actually
> RIE should do the same thing)
> 
> >
> > >
> > > >
> > > > To avoid that, in my current implementation, the RC side handles the
> > > > status/int_clear registers in the usual way, and the EP side only tries to
> > > > suppress needless edma_int as much as possible.
> > > >
> > > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > > That would require some changes on dw-edma core.
> > >
> > > If dw-edma work as remote DMA, which should enable RIE. like
> > > dw-edma-pcie.c, but not one actually use it recently.
> > >
> > > Use EDMA as doorbell should be new case and I think it is quite useful.
> > >
> > > > >
> > > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > > +	u32 i, val;
> > > > > > +
> > > ...
> > > > > > +	ret = dw_edma_probe(chip);
> > > > >
> > > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > > dma engine support.
> > > > >
> > > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > > so use correct filter function, you should get dma chan.
> > > >
> > > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > > so that RC side only manages eDMA remotely and avoids the potential race
> > > > condition I mentioned above.
> > >
> > > Improve eDMA core to suppport some dma channel work at local, some for
> > > remote.
> >
> > Right, Firstly I experimented a bit more with different LIE/RIE settings and
> > ended up with the following observations:
> >
> > * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
> >   DMA transfer channels, the RC side never received any interrupt. The databook
> >   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
> >   "If you want a remote interrupt and not a local interrupt then: Set LIE and
> >   RIE [...]", so I think this behaviour is expected.
> 
> Actually, you can append MSI write at last one of DMA descriptor link. So
> it will not depend on eDMA's IRQ at all.

For RC->EP interrupts on R-Car S4 in EP mode, using ITS_TRANSLATER as the
IB iATU target did not appear to work in practice. Indeed that was the
motivation for the RFC v1 series [2]. I have not tried using ITS_TRANSLATER
as the eDMA read transfer DAR.

But in any case, simply masking the local interrupt is sufficient here. I
mainly wanted to point out that my naive idea of LIE=0/RIE=1 is not
implementable with this hardware. This whole LIE/RIE topic is a bit
off-track, sorry for the noise.

[2] For the record, RFC v2 is conceptually orthogonal and introduces a
    broader concept ie. remote eDMA model, but I reused many of the
    preparatory commits from v1, which is why this is RFC v2 rather than a
    separate series.

> 
> > * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
> >   design, where the RC issues the DMA transfer for the notification via
> >   ntb_edma_notify_peer(). With RIE=0, the RC never calls
> >   dw_edma_core_handle_int() for that channel, which means that internal state
> >   such as dw_edma_chan.status is never managed correctly.
> 
> If you append on MSI write at DMA link, you needn't check status register,
> just check current LL pos to know which descrptor already done.
> 
> Or you also enable LIE and disable related IRQ line(without register
> irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
> RC side.

What I was worried about here is that, with RIE=0 the current dw-edma
handling of struct dw_edma_chan::status field (not status register) would
not run for that channel, which could affect subsequent tx submissions. But
your suggestion also makes sense, thank you.

--8<--

So anyway the key point seems that we should avoid such hard-coded register
handling in [RFC PATCH v2 20/27] and rely only on the standard dw-edma
interfaces (possibly with some extensions to the dw-edma core). From your
feedback, I feel this is the essential direction.

From that perspective, I'm leaning toward (b) (which I wrote above in a
reply comment) with a Kconfig guard, i.e. in dw_pcie_ep_init_registers(),
if IS_ENABLED(CONFIG_DW_REMOTE_EDMA) we only configure the notification
channel. In practice, a DT-based variant of (b) (for example a new property
such as "dma-notification-channel = <N>;" and making
dw_pcie_ep_init_registers() honour it) would be very handy for users, but I
suspect putting this kind of policy into DT is not acceptable.

Assuming careful handling, (c) might actually be the simplest approach. I
may need to add a small hook for the notification channel in
dw_edma_done_interrupt(), via a new API such as
dw_edma_chan_register_notify().

Thank you for your time and review,
Koichiro

> 
> Frank
> >
> > >
> > > Frank
> > > >
> > > > Thanks for reviewing,
> > > > Koichiro
> > > >
> > > > >
> > > > > Frank
> > > > >
> > > > > > +	if (ret) {
> > > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > > +		return ret;
> > > > > > +	}
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > ...
> > >
> > > > > > +{
> > > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > > +}
> > > > > > +
> > > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > > +};
> > > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > > +
> > > > > >  /**
> > > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > > --
> > > > > > 2.48.1
> > > > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Frank Li 2 months, 1 week ago
On Fri, Dec 05, 2025 at 12:42:03AM +0900, Koichiro Den wrote:
> On Wed, Dec 03, 2025 at 11:14:43AM -0500, Frank Li wrote:
> > On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> > > On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > > > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > > > >
> > > > ...
> > > > > > > +#include "ntb_edma.h"
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > > > + * backend currently only supports this layout.
> > > > > > > + */
> > > > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > > > >
> > > > > > Not sure why need access EDMA register because EMDA driver already export
> > > > > > as dmaengine driver.
> > > > >
> > > > > These are intended for EP use. In my current design I intentionally don't
> > > > > use the standard dw-edma dmaengine driver on the EP side.
> > > >
> > > > why not?
> > >
> > > Conceptually I agree that using the standard dw-edma driver on both sides
> > > would be attractive for future extensibility and maintainability. However,
> > > there are a couple of concerns for me, some of which might be alleviated by
> > > your suggestion below, and some which are more generic safety concerns that
> > > I tried to outline in my replies to your other comments.
> > >
> > > >
> > > > >
> > > > > >
> > > > > > > +
> > > > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > > > +
> > > > ...
> > > > > > > +
> > > > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > > > +	of_node_put(parent);
> > > > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > > > +{
> > > > > >
> > > > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > > > just register callback for dmeengine.
> > > > >
> > > > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > > > might clear a status bit before the other side has a chance to see it and
> > > > > invoke its callback. Please correct me if I'm missing something here.
> > > >
> > > > You should use difference channel?
> > >
> > > Do you mean something like this:
> > > - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> > > - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
> > >   but do other remaining channels for DMA transfers.
> >
> > Yes, it may be simple overall. Of course this will waste a channel.
>
> So, on the EP side I see two possible approaches:
>
> (a) Hide "dma" [1] as in [RFC PATCH v2 26/27] and call dw_edma_probe() with
>     hand-crafted settings (chip->ll_rd_cnt = 1, chip->ll_wr_cnt = 0).
> (b) Or, teach this special-purpose policy (i.e. configuring only a single
>     notification channel) to the SoC glue driver's dw_pcie_ep_init_registers(),
>     for example via Kconfig. I don't think DT is a good place to describe
>     such a policy.
>
> There is also another option, which do not necessarily run dw_edma_probe()
> by ourselves:
>
> (c) Leave the default initialization by the SoC glue as-is, and override the
>     per-channel role via some new dw-edma interface, with the guarantee
>     that all channels except the notification channel remain unused on its
>     side afterwards. In this model, the EP side builds the LL locations
>     for data transfers and the RC configures all channels, but it sets up
>     the notification channel in a special manner.
>
> [1] https://github.com/jonmason/ntb/blob/68113d260674/Documentation/devicetree/bindings/pci/snps%2Cdw-pcie-ep.yaml#L83
>
> >
> > >
> > > Also, is it generically safe to have dw_edma_probe() executed from both ends on
> > > the same eDMA instance, as long as the channels are carefully partitioned
> > > between them?
> >
> > Channel register MMIO space is sperated. Some channel register shared
> > into one 32bit register.
> >
> > But the critical one, interrupt status is w1c. So only write BIT(channel)
> > is safe.
> >
> > Need careful handle irq enable/disable.
>
> Yeah, I agree it is unavoidable in this model.
>
> >
> > Or you can defer all actual DMA transfer to EP side, you can append
> > MSI write at last item of link to notify RC side about DMA done. (actually
> > RIE should do the same thing)
> >
> > >
> > > >
> > > > >
> > > > > To avoid that, in my current implementation, the RC side handles the
> > > > > status/int_clear registers in the usual way, and the EP side only tries to
> > > > > suppress needless edma_int as much as possible.
> > > > >
> > > > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > > > That would require some changes on dw-edma core.
> > > >
> > > > If dw-edma work as remote DMA, which should enable RIE. like
> > > > dw-edma-pcie.c, but not one actually use it recently.
> > > >
> > > > Use EDMA as doorbell should be new case and I think it is quite useful.
> > > >
> > > > > >
> > > > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > > > +	u32 i, val;
> > > > > > > +
> > > > ...
> > > > > > > +	ret = dw_edma_probe(chip);
> > > > > >
> > > > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > > > dma engine support.
> > > > > >
> > > > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > > > so use correct filter function, you should get dma chan.
> > > > >
> > > > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > > > so that RC side only manages eDMA remotely and avoids the potential race
> > > > > condition I mentioned above.
> > > >
> > > > Improve eDMA core to suppport some dma channel work at local, some for
> > > > remote.
> > >
> > > Right, Firstly I experimented a bit more with different LIE/RIE settings and
> > > ended up with the following observations:
> > >
> > > * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
> > >   DMA transfer channels, the RC side never received any interrupt. The databook
> > >   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
> > >   "If you want a remote interrupt and not a local interrupt then: Set LIE and
> > >   RIE [...]", so I think this behaviour is expected.
> >
> > Actually, you can append MSI write at last one of DMA descriptor link. So
> > it will not depend on eDMA's IRQ at all.
>
> For RC->EP interrupts on R-Car S4 in EP mode, using ITS_TRANSLATER as the
> IB iATU target did not appear to work in practice. Indeed that was the
> motivation for the RFC v1 series [2]. I have not tried using ITS_TRANSLATER
> as the eDMA read transfer DAR.
>
> But in any case, simply masking the local interrupt is sufficient here. I
> mainly wanted to point out that my naive idea of LIE=0/RIE=1 is not
> implementable with this hardware. This whole LIE/RIE topic is a bit
> off-track, sorry for the noise.
>
> [2] For the record, RFC v2 is conceptually orthogonal and introduces a
>     broader concept ie. remote eDMA model, but I reused many of the
>     preparatory commits from v1, which is why this is RFC v2 rather than a
>     separate series.
>
> >
> > > * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
> > >   design, where the RC issues the DMA transfer for the notification via
> > >   ntb_edma_notify_peer(). With RIE=0, the RC never calls
> > >   dw_edma_core_handle_int() for that channel, which means that internal state
> > >   such as dw_edma_chan.status is never managed correctly.
> >
> > If you append on MSI write at DMA link, you needn't check status register,
> > just check current LL pos to know which descrptor already done.
> >
> > Or you also enable LIE and disable related IRQ line(without register
> > irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
> > RC side.
>
> What I was worried about here is that, with RIE=0 the current dw-edma
> handling of struct dw_edma_chan::status field (not status register) would
> not run for that channel, which could affect subsequent tx submissions. But
> your suggestion also makes sense, thank you.
>
> --8<--
>
> So anyway the key point seems that we should avoid such hard-coded register
> handling in [RFC PATCH v2 20/27] and rely only on the standard dw-edma
> interfaces (possibly with some extensions to the dw-edma core). From your
> feedback, I feel this is the essential direction.
>
> From that perspective, I'm leaning toward (b) (which I wrote above in a
> reply comment) with a Kconfig guard, i.e. in dw_pcie_ep_init_registers(),
> if IS_ENABLED(CONFIG_DW_REMOTE_EDMA) we only configure the notification
> channel. In practice, a DT-based variant of (b) (for example a new property
> such as "dma-notification-channel = <N>;" and making
> dw_pcie_ep_init_registers() honour it) would be very handy for users, but I
> suspect putting this kind of policy into DT is not acceptable.
>
> Assuming careful handling, (c) might actually be the simplest approach. I
> may need to add a small hook for the notification channel in
> dw_edma_done_interrupt(), via a new API such as
> dw_edma_chan_register_notify().

I reply everything here for overall design

EDMA actually can access all memory at both EP and RC side regardless PCI
map windows. NTB defination is that only access part of both system memory,
so anyway need once memcpy. Although NTB can't take 100% eDMA advantage, it
is still easiest path now. I have a draft idea without touch NTB core code
(most likley).

EP side                          RC side
             1:  Control bar
             2:  Doorbell bar
             3:  WM1

MW1 is fixed sized array [ntb_payload_header + data]. Current NTB built
queue in system memory, transfer data (RW) to this array.

Use EDMA only one side, RC/EP. use EP as example.

In 1 (control bar, resever memory space, which call B)

In ntb_hw_epf.c driver, create a simple 'fake' DMA memcpy driver, which
just implement device_prep_dma_memcpy(). That just put src\dest\size info
to memory space B, then push doorbell.

in EP side's a workqueue, fetch info from B, the send to EDMA queue to
do actual transfer, after EP DMA finish, mark done at B, then raise msi irq,
'fake' DMA memcpy driver will be triggered.

Futher, 3 WM1 is not necessary existed at all, because both side don't
access it directly.

For example:

case RC TX, EP RX

RC ntb_async_tx_submit() use device_prep_dma_memcpy() copy user space
memory (0xRC_1000 to PCI_1000, size 0x1000), put into share bar0 position

            0xRC_1000 -> 0xPCI_1000 0x1000

EP side, there RX request ntb_async_rx_submit(),  from 0xPCI_1000 to
0xEP_8000 size 0x20000.

so setup eDMA transfer form 0xRC_1000 -> 0xEP_8000 size 1000. After complete
mark both side done, then trigger related callback functions.

You can see 0xPCI_1000 is not used at all. Actually 0xPCI_1000 is trouble
maker,  RC and EP system PCI space is not necesary the same as CPU space,
PCI controller may do address convert.

Frank
>
> Thank you for your time and review,
> Koichiro
>
> >
> > Frank
> > >
> > > >
> > > > Frank
> > > > >
> > > > > Thanks for reviewing,
> > > > > Koichiro
> > > > >
> > > > > >
> > > > > > Frank
> > > > > >
> > > > > > > +	if (ret) {
> > > > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > > > +		return ret;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > ...
> > > >
> > > > > > > +{
> > > > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > > > +};
> > > > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > > > +
> > > > > > >  /**
> > > > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > > > --
> > > > > > > 2.48.1
> > > > > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 2 months ago
On Thu, Dec 04, 2025 at 03:16:25PM -0500, Frank Li wrote:
> On Fri, Dec 05, 2025 at 12:42:03AM +0900, Koichiro Den wrote:
> > On Wed, Dec 03, 2025 at 11:14:43AM -0500, Frank Li wrote:
> > > On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> > > > On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > > > > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > > > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > > > > >
> > > > > ...
> > > > > > > > +#include "ntb_edma.h"
> > > > > > > > +
> > > > > > > > +/*
> > > > > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > > > > + * backend currently only supports this layout.
> > > > > > > > + */
> > > > > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > > > > >
> > > > > > > Not sure why need access EDMA register because EMDA driver already export
> > > > > > > as dmaengine driver.
> > > > > >
> > > > > > These are intended for EP use. In my current design I intentionally don't
> > > > > > use the standard dw-edma dmaengine driver on the EP side.
> > > > >
> > > > > why not?
> > > >
> > > > Conceptually I agree that using the standard dw-edma driver on both sides
> > > > would be attractive for future extensibility and maintainability. However,
> > > > there are a couple of concerns for me, some of which might be alleviated by
> > > > your suggestion below, and some which are more generic safety concerns that
> > > > I tried to outline in my replies to your other comments.
> > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > > +
> > > > > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > > > > +
> > > > > ...
> > > > > > > > +
> > > > > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > > > > +	of_node_put(parent);
> > > > > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > > > > +{
> > > > > > >
> > > > > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > > > > just register callback for dmeengine.
> > > > > >
> > > > > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > > > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > > > > might clear a status bit before the other side has a chance to see it and
> > > > > > invoke its callback. Please correct me if I'm missing something here.
> > > > >
> > > > > You should use difference channel?
> > > >
> > > > Do you mean something like this:
> > > > - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> > > > - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
> > > >   but do other remaining channels for DMA transfers.
> > >
> > > Yes, it may be simple overall. Of course this will waste a channel.
> >
> > So, on the EP side I see two possible approaches:
> >
> > (a) Hide "dma" [1] as in [RFC PATCH v2 26/27] and call dw_edma_probe() with
> >     hand-crafted settings (chip->ll_rd_cnt = 1, chip->ll_wr_cnt = 0).
> > (b) Or, teach this special-purpose policy (i.e. configuring only a single
> >     notification channel) to the SoC glue driver's dw_pcie_ep_init_registers(),
> >     for example via Kconfig. I don't think DT is a good place to describe
> >     such a policy.
> >
> > There is also another option, which do not necessarily run dw_edma_probe()
> > by ourselves:
> >
> > (c) Leave the default initialization by the SoC glue as-is, and override the
> >     per-channel role via some new dw-edma interface, with the guarantee
> >     that all channels except the notification channel remain unused on its
> >     side afterwards. In this model, the EP side builds the LL locations
> >     for data transfers and the RC configures all channels, but it sets up
> >     the notification channel in a special manner.
> >
> > [1] https://github.com/jonmason/ntb/blob/68113d260674/Documentation/devicetree/bindings/pci/snps%2Cdw-pcie-ep.yaml#L83
> >
> > >
> > > >
> > > > Also, is it generically safe to have dw_edma_probe() executed from both ends on
> > > > the same eDMA instance, as long as the channels are carefully partitioned
> > > > between them?
> > >
> > > Channel register MMIO space is sperated. Some channel register shared
> > > into one 32bit register.
> > >
> > > But the critical one, interrupt status is w1c. So only write BIT(channel)
> > > is safe.
> > >
> > > Need careful handle irq enable/disable.
> >
> > Yeah, I agree it is unavoidable in this model.
> >
> > >
> > > Or you can defer all actual DMA transfer to EP side, you can append
> > > MSI write at last item of link to notify RC side about DMA done. (actually
> > > RIE should do the same thing)
> > >
> > > >
> > > > >
> > > > > >
> > > > > > To avoid that, in my current implementation, the RC side handles the
> > > > > > status/int_clear registers in the usual way, and the EP side only tries to
> > > > > > suppress needless edma_int as much as possible.
> > > > > >
> > > > > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > > > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > > > > That would require some changes on dw-edma core.
> > > > >
> > > > > If dw-edma work as remote DMA, which should enable RIE. like
> > > > > dw-edma-pcie.c, but not one actually use it recently.
> > > > >
> > > > > Use EDMA as doorbell should be new case and I think it is quite useful.
> > > > >
> > > > > > >
> > > > > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > > > > +	u32 i, val;
> > > > > > > > +
> > > > > ...
> > > > > > > > +	ret = dw_edma_probe(chip);
> > > > > > >
> > > > > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > > > > dma engine support.
> > > > > > >
> > > > > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > > > > so use correct filter function, you should get dma chan.
> > > > > >
> > > > > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > > > > so that RC side only manages eDMA remotely and avoids the potential race
> > > > > > condition I mentioned above.
> > > > >
> > > > > Improve eDMA core to suppport some dma channel work at local, some for
> > > > > remote.
> > > >
> > > > Right, Firstly I experimented a bit more with different LIE/RIE settings and
> > > > ended up with the following observations:
> > > >
> > > > * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
> > > >   DMA transfer channels, the RC side never received any interrupt. The databook
> > > >   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
> > > >   "If you want a remote interrupt and not a local interrupt then: Set LIE and
> > > >   RIE [...]", so I think this behaviour is expected.
> > >
> > > Actually, you can append MSI write at last one of DMA descriptor link. So
> > > it will not depend on eDMA's IRQ at all.
> >
> > For RC->EP interrupts on R-Car S4 in EP mode, using ITS_TRANSLATER as the
> > IB iATU target did not appear to work in practice. Indeed that was the
> > motivation for the RFC v1 series [2]. I have not tried using ITS_TRANSLATER
> > as the eDMA read transfer DAR.
> >
> > But in any case, simply masking the local interrupt is sufficient here. I
> > mainly wanted to point out that my naive idea of LIE=0/RIE=1 is not
> > implementable with this hardware. This whole LIE/RIE topic is a bit
> > off-track, sorry for the noise.
> >
> > [2] For the record, RFC v2 is conceptually orthogonal and introduces a
> >     broader concept ie. remote eDMA model, but I reused many of the
> >     preparatory commits from v1, which is why this is RFC v2 rather than a
> >     separate series.
> >
> > >
> > > > * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
> > > >   design, where the RC issues the DMA transfer for the notification via
> > > >   ntb_edma_notify_peer(). With RIE=0, the RC never calls
> > > >   dw_edma_core_handle_int() for that channel, which means that internal state
> > > >   such as dw_edma_chan.status is never managed correctly.
> > >
> > > If you append on MSI write at DMA link, you needn't check status register,
> > > just check current LL pos to know which descrptor already done.
> > >
> > > Or you also enable LIE and disable related IRQ line(without register
> > > irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
> > > RC side.
> >
> > What I was worried about here is that, with RIE=0 the current dw-edma
> > handling of struct dw_edma_chan::status field (not status register) would
> > not run for that channel, which could affect subsequent tx submissions. But
> > your suggestion also makes sense, thank you.
> >
> > --8<--
> >
> > So anyway the key point seems that we should avoid such hard-coded register
> > handling in [RFC PATCH v2 20/27] and rely only on the standard dw-edma
> > interfaces (possibly with some extensions to the dw-edma core). From your
> > feedback, I feel this is the essential direction.
> >
> > From that perspective, I'm leaning toward (b) (which I wrote above in a
> > reply comment) with a Kconfig guard, i.e. in dw_pcie_ep_init_registers(),
> > if IS_ENABLED(CONFIG_DW_REMOTE_EDMA) we only configure the notification
> > channel. In practice, a DT-based variant of (b) (for example a new property
> > such as "dma-notification-channel = <N>;" and making
> > dw_pcie_ep_init_registers() honour it) would be very handy for users, but I
> > suspect putting this kind of policy into DT is not acceptable.
> >
> > Assuming careful handling, (c) might actually be the simplest approach. I
> > may need to add a small hook for the notification channel in
> > dw_edma_done_interrupt(), via a new API such as
> > dw_edma_chan_register_notify().
> 
> I reply everything here for overall design
> 
> EDMA actually can access all memory at both EP and RC side regardless PCI
> map windows. NTB defination is that only access part of both system memory,
> so anyway need once memcpy. Although NTB can't take 100% eDMA advantage, it
> is still easiest path now. I have a draft idea without touch NTB core code
> (most likley).
> 
> EP side                          RC side
>              1:  Control bar
>              2:  Doorbell bar
>              3:  WM1
> 
> MW1 is fixed sized array [ntb_payload_header + data]. Current NTB built
> queue in system memory, transfer data (RW) to this array.
> 
> Use EDMA only one side, RC/EP. use EP as example.
> 
> In 1 (control bar, resever memory space, which call B)
> 
> In ntb_hw_epf.c driver, create a simple 'fake' DMA memcpy driver, which
> just implement device_prep_dma_memcpy(). That just put src\dest\size info
> to memory space B, then push doorbell.
> 
> in EP side's a workqueue, fetch info from B, the send to EDMA queue to
> do actual transfer, after EP DMA finish, mark done at B, then raise msi irq,
> 'fake' DMA memcpy driver will be triggered.
> 
> Futher, 3 WM1 is not necessary existed at all, because both side don't
> access it directly.
> 
> For example:
> 
> case RC TX, EP RX
> 
> RC ntb_async_tx_submit() use device_prep_dma_memcpy() copy user space
> memory (0xRC_1000 to PCI_1000, size 0x1000), put into share bar0 position
> 
>             0xRC_1000 -> 0xPCI_1000 0x1000
> 
> EP side, there RX request ntb_async_rx_submit(),  from 0xPCI_1000 to
> 0xEP_8000 size 0x20000.
> 
> so setup eDMA transfer form 0xRC_1000 -> 0xEP_8000 size 1000. After complete
> mark both side done, then trigger related callback functions.
> 
> You can see 0xPCI_1000 is not used at all. Actually 0xPCI_1000 is trouble
> maker,  RC and EP system PCI space is not necesary the same as CPU space,
> PCI controller may do address convert.

Thanks for the detailed explanation.

Just to clarify, regarding your comments about the number of memcpy
operations and not using the 0xPCI_1000 window for data path, I think RFC
v2 is already similar to what you're describing.

To me it seems the key differences in your proposal are mainly two-fold:
(1) the layering, and (2) local eDMA use rather than remote.

For (1), instead of adding more eDMA-specific handling into ntb_transport
layer, your approach would keep changes to ntb_transport minimal and
encapsulate the eDMA usage inside the "fake DMA memcpy driver" as much as
possible. In that design, would the MW1 layout change? Leaving the existing
layout as-is would waste the space (so RFC v2 had introduced a new layout).

Also, one point I'm still unsure about is the opposite direction (ie.
EP->RC). In that case, do you also expect the EP to trigger its local eDMA
engine? If yes, then, similar to the RC->EP direction in RFC v2, the EP
would need to know the RC-side receive buffer address (e.g. 0xRC_1000) in
advance.

You also mentioned that you already have some draft. Are you planning to
post that as a patch series? If not, I can of course try to
implement/prototype this approach based on your suggestion.

Please let me know if the above understanding does not match what you had
in mind.

Thank you,
Koichiro


> 
> Frank
> >
> > Thank you for your time and review,
> > Koichiro
> >
> > >
> > > Frank
> > > >
> > > > >
> > > > > Frank
> > > > > >
> > > > > > Thanks for reviewing,
> > > > > > Koichiro
> > > > > >
> > > > > > >
> > > > > > > Frank
> > > > > > >
> > > > > > > > +	if (ret) {
> > > > > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > > > > +		return ret;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > ...
> > > > >
> > > > > > > > +{
> > > > > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > > > > +};
> > > > > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > > > > +
> > > > > > > >  /**
> > > > > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > > > > --
> > > > > > > > 2.48.1
> > > > > > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Frank Li 2 months ago
On Fri, Dec 05, 2025 at 12:04:24PM +0900, Koichiro Den wrote:
> On Thu, Dec 04, 2025 at 03:16:25PM -0500, Frank Li wrote:
> > On Fri, Dec 05, 2025 at 12:42:03AM +0900, Koichiro Den wrote:
> > > On Wed, Dec 03, 2025 at 11:14:43AM -0500, Frank Li wrote:
> > > > On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> > > > > On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > > > > > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > > > > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > > > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > > > > > >
> > > > > > ...
> > > > > > > > > +#include "ntb_edma.h"
> > > > > > > > > +
> > > > > > > > > +/*
> > > > > > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > > > > > + * backend currently only supports this layout.
> > > > > > > > > + */
> > > > > > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > > > > > >
> > > > > > > > Not sure why need access EDMA register because EMDA driver already export
> > > > > > > > as dmaengine driver.
> > > > > > >
> > > > > > > These are intended for EP use. In my current design I intentionally don't
> > > > > > > use the standard dw-edma dmaengine driver on the EP side.
> > > > > >
> > > > > > why not?
> > > > >
> > > > > Conceptually I agree that using the standard dw-edma driver on both sides
> > > > > would be attractive for future extensibility and maintainability. However,
> > > > > there are a couple of concerns for me, some of which might be alleviated by
> > > > > your suggestion below, and some which are more generic safety concerns that
> > > > > I tried to outline in my replies to your other comments.
> > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > +
> > > > > > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > > > > > +
> > > > > > ...
> > > > > > > > > +
> > > > > > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > > > > > +	of_node_put(parent);
> > > > > > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > > > > > +{
> > > > > > > >
> > > > > > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > > > > > just register callback for dmeengine.
> > > > > > >
> > > > > > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > > > > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > > > > > might clear a status bit before the other side has a chance to see it and
> > > > > > > invoke its callback. Please correct me if I'm missing something here.
> > > > > >
> > > > > > You should use difference channel?
> > > > >
> > > > > Do you mean something like this:
> > > > > - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> > > > > - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
> > > > >   but do other remaining channels for DMA transfers.
> > > >
> > > > Yes, it may be simple overall. Of course this will waste a channel.
> > >
> > > So, on the EP side I see two possible approaches:
> > >
> > > (a) Hide "dma" [1] as in [RFC PATCH v2 26/27] and call dw_edma_probe() with
> > >     hand-crafted settings (chip->ll_rd_cnt = 1, chip->ll_wr_cnt = 0).
> > > (b) Or, teach this special-purpose policy (i.e. configuring only a single
> > >     notification channel) to the SoC glue driver's dw_pcie_ep_init_registers(),
> > >     for example via Kconfig. I don't think DT is a good place to describe
> > >     such a policy.
> > >
> > > There is also another option, which do not necessarily run dw_edma_probe()
> > > by ourselves:
> > >
> > > (c) Leave the default initialization by the SoC glue as-is, and override the
> > >     per-channel role via some new dw-edma interface, with the guarantee
> > >     that all channels except the notification channel remain unused on its
> > >     side afterwards. In this model, the EP side builds the LL locations
> > >     for data transfers and the RC configures all channels, but it sets up
> > >     the notification channel in a special manner.
> > >
> > > [1] https://github.com/jonmason/ntb/blob/68113d260674/Documentation/devicetree/bindings/pci/snps%2Cdw-pcie-ep.yaml#L83
> > >
> > > >
> > > > >
> > > > > Also, is it generically safe to have dw_edma_probe() executed from both ends on
> > > > > the same eDMA instance, as long as the channels are carefully partitioned
> > > > > between them?
> > > >
> > > > Channel register MMIO space is sperated. Some channel register shared
> > > > into one 32bit register.
> > > >
> > > > But the critical one, interrupt status is w1c. So only write BIT(channel)
> > > > is safe.
> > > >
> > > > Need careful handle irq enable/disable.
> > >
> > > Yeah, I agree it is unavoidable in this model.
> > >
> > > >
> > > > Or you can defer all actual DMA transfer to EP side, you can append
> > > > MSI write at last item of link to notify RC side about DMA done. (actually
> > > > RIE should do the same thing)
> > > >
> > > > >
> > > > > >
> > > > > > >
> > > > > > > To avoid that, in my current implementation, the RC side handles the
> > > > > > > status/int_clear registers in the usual way, and the EP side only tries to
> > > > > > > suppress needless edma_int as much as possible.
> > > > > > >
> > > > > > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > > > > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > > > > > That would require some changes on dw-edma core.
> > > > > >
> > > > > > If dw-edma work as remote DMA, which should enable RIE. like
> > > > > > dw-edma-pcie.c, but not one actually use it recently.
> > > > > >
> > > > > > Use EDMA as doorbell should be new case and I think it is quite useful.
> > > > > >
> > > > > > > >
> > > > > > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > > > > > +	u32 i, val;
> > > > > > > > > +
> > > > > > ...
> > > > > > > > > +	ret = dw_edma_probe(chip);
> > > > > > > >
> > > > > > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > > > > > dma engine support.
> > > > > > > >
> > > > > > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > > > > > so use correct filter function, you should get dma chan.
> > > > > > >
> > > > > > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > > > > > so that RC side only manages eDMA remotely and avoids the potential race
> > > > > > > condition I mentioned above.
> > > > > >
> > > > > > Improve eDMA core to suppport some dma channel work at local, some for
> > > > > > remote.
> > > > >
> > > > > Right, Firstly I experimented a bit more with different LIE/RIE settings and
> > > > > ended up with the following observations:
> > > > >
> > > > > * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
> > > > >   DMA transfer channels, the RC side never received any interrupt. The databook
> > > > >   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
> > > > >   "If you want a remote interrupt and not a local interrupt then: Set LIE and
> > > > >   RIE [...]", so I think this behaviour is expected.
> > > >
> > > > Actually, you can append MSI write at last one of DMA descriptor link. So
> > > > it will not depend on eDMA's IRQ at all.
> > >
> > > For RC->EP interrupts on R-Car S4 in EP mode, using ITS_TRANSLATER as the
> > > IB iATU target did not appear to work in practice. Indeed that was the
> > > motivation for the RFC v1 series [2]. I have not tried using ITS_TRANSLATER
> > > as the eDMA read transfer DAR.
> > >
> > > But in any case, simply masking the local interrupt is sufficient here. I
> > > mainly wanted to point out that my naive idea of LIE=0/RIE=1 is not
> > > implementable with this hardware. This whole LIE/RIE topic is a bit
> > > off-track, sorry for the noise.
> > >
> > > [2] For the record, RFC v2 is conceptually orthogonal and introduces a
> > >     broader concept ie. remote eDMA model, but I reused many of the
> > >     preparatory commits from v1, which is why this is RFC v2 rather than a
> > >     separate series.
> > >
> > > >
> > > > > * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
> > > > >   design, where the RC issues the DMA transfer for the notification via
> > > > >   ntb_edma_notify_peer(). With RIE=0, the RC never calls
> > > > >   dw_edma_core_handle_int() for that channel, which means that internal state
> > > > >   such as dw_edma_chan.status is never managed correctly.
> > > >
> > > > If you append on MSI write at DMA link, you needn't check status register,
> > > > just check current LL pos to know which descrptor already done.
> > > >
> > > > Or you also enable LIE and disable related IRQ line(without register
> > > > irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
> > > > RC side.
> > >
> > > What I was worried about here is that, with RIE=0 the current dw-edma
> > > handling of struct dw_edma_chan::status field (not status register) would
> > > not run for that channel, which could affect subsequent tx submissions. But
> > > your suggestion also makes sense, thank you.
> > >
> > > --8<--
> > >
> > > So anyway the key point seems that we should avoid such hard-coded register
> > > handling in [RFC PATCH v2 20/27] and rely only on the standard dw-edma
> > > interfaces (possibly with some extensions to the dw-edma core). From your
> > > feedback, I feel this is the essential direction.
> > >
> > > From that perspective, I'm leaning toward (b) (which I wrote above in a
> > > reply comment) with a Kconfig guard, i.e. in dw_pcie_ep_init_registers(),
> > > if IS_ENABLED(CONFIG_DW_REMOTE_EDMA) we only configure the notification
> > > channel. In practice, a DT-based variant of (b) (for example a new property
> > > such as "dma-notification-channel = <N>;" and making
> > > dw_pcie_ep_init_registers() honour it) would be very handy for users, but I
> > > suspect putting this kind of policy into DT is not acceptable.
> > >
> > > Assuming careful handling, (c) might actually be the simplest approach. I
> > > may need to add a small hook for the notification channel in
> > > dw_edma_done_interrupt(), via a new API such as
> > > dw_edma_chan_register_notify().
> >
> > I reply everything here for overall design
> >
> > EDMA actually can access all memory at both EP and RC side regardless PCI
> > map windows. NTB defination is that only access part of both system memory,
> > so anyway need once memcpy. Although NTB can't take 100% eDMA advantage, it
> > is still easiest path now. I have a draft idea without touch NTB core code
> > (most likley).
> >
> > EP side                          RC side
> >              1:  Control bar
> >              2:  Doorbell bar
> >              3:  WM1
> >
> > MW1 is fixed sized array [ntb_payload_header + data]. Current NTB built
> > queue in system memory, transfer data (RW) to this array.
> >
> > Use EDMA only one side, RC/EP. use EP as example.
> >
> > In 1 (control bar, resever memory space, which call B)
> >
> > In ntb_hw_epf.c driver, create a simple 'fake' DMA memcpy driver, which
> > just implement device_prep_dma_memcpy(). That just put src\dest\size info
> > to memory space B, then push doorbell.
> >
> > in EP side's a workqueue, fetch info from B, the send to EDMA queue to
> > do actual transfer, after EP DMA finish, mark done at B, then raise msi irq,
> > 'fake' DMA memcpy driver will be triggered.
> >
> > Futher, 3 WM1 is not necessary existed at all, because both side don't
> > access it directly.
> >
> > For example:
> >
> > case RC TX, EP RX
> >
> > RC ntb_async_tx_submit() use device_prep_dma_memcpy() copy user space
> > memory (0xRC_1000 to PCI_1000, size 0x1000), put into share bar0 position
> >
> >             0xRC_1000 -> 0xPCI_1000 0x1000
> >
> > EP side, there RX request ntb_async_rx_submit(),  from 0xPCI_1000 to
> > 0xEP_8000 size 0x20000.
> >
> > so setup eDMA transfer form 0xRC_1000 -> 0xEP_8000 size 1000. After complete
> > mark both side done, then trigger related callback functions.
> >
> > You can see 0xPCI_1000 is not used at all. Actually 0xPCI_1000 is trouble
> > maker,  RC and EP system PCI space is not necesary the same as CPU space,
> > PCI controller may do address convert.
>
> Thanks for the detailed explanation.
>
> Just to clarify, regarding your comments about the number of memcpy
> operations and not using the 0xPCI_1000 window for data path, I think RFC
> v2 is already similar to what you're describing.
>
> To me it seems the key differences in your proposal are mainly two-fold:
> (1) the layering, and (2) local eDMA use rather than remote.

Not big difference between remote and local DMA. My major means just use
oneside is enough. If eDMA handle in remote, EP side need virtual memcpy
and RC side to handle actual transfer.

I use EP as example, just because some logic R/W is reverted between EP/RC.
RC's write is EP's read.

>
> For (1), instead of adding more eDMA-specific handling into ntb_transport
> layer, your approach would keep changes to ntb_transport minimal and
> encapsulate the eDMA usage inside the "fake DMA memcpy driver" as much as
> possible. In that design, would the MW1 layout change? Leaving the existing
> layout as-is would waste the space (so RFC v2 had introduced a new layout).

It is fine if NTB maintainer agree it.

>
> Also, one point I'm still unsure about is the opposite direction (ie.
> EP->RC). In that case, do you also expect the EP to trigger its local eDMA
> engine? If yes, then, similar to the RC->EP direction in RFC v2, the EP
> would need to know the RC-side receive buffer address (e.g. 0xRC_1000) in
> advance.

'fake DMA memcpy driver' already put 0xRC_1000 to one shared memory place.

>
> You also mentioned that you already have some draft. Are you planning to
> post that as a patch series? If not, I can of course try to
> implement/prototype this approach based on your suggestion.

Sorry, I have not actually worked for ntb eDMA before. My work base on RDMA
framework. Idealy, RDMA can do user-space(EP) to user space (RC) data
transfer with zero copy.

But I think NTB is also a good path since RDMA is over complexed.

Frank

>
> Please let me know if the above understanding does not match what you had
> in mind.
>
> Thank you,
> Koichiro
>
>
> >
> > Frank
> > >
> > > Thank you for your time and review,
> > > Koichiro
> > >
> > > >
> > > > Frank
> > > > >
> > > > > >
> > > > > > Frank
> > > > > > >
> > > > > > > Thanks for reviewing,
> > > > > > > Koichiro
> > > > > > >
> > > > > > > >
> > > > > > > > Frank
> > > > > > > >
> > > > > > > > > +	if (ret) {
> > > > > > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > > > > > +		return ret;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	return 0;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > ...
> > > > > >
> > > > > > > > > +{
> > > > > > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > > > > > +};
> > > > > > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > > > > > +
> > > > > > > > >  /**
> > > > > > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > > > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > > > > > --
> > > > > > > > > 2.48.1
> > > > > > > > >
Re: [RFC PATCH v2 20/27] NTB: ntb_transport: Introduce remote eDMA backed transport mode
Posted by Koichiro Den 1 month, 3 weeks ago
On Fri, Dec 05, 2025 at 10:06:30AM -0500, Frank Li wrote:
> On Fri, Dec 05, 2025 at 12:04:24PM +0900, Koichiro Den wrote:
> > On Thu, Dec 04, 2025 at 03:16:25PM -0500, Frank Li wrote:
> > > On Fri, Dec 05, 2025 at 12:42:03AM +0900, Koichiro Den wrote:
> > > > On Wed, Dec 03, 2025 at 11:14:43AM -0500, Frank Li wrote:
> > > > > On Wed, Dec 03, 2025 at 05:53:03PM +0900, Koichiro Den wrote:
> > > > > > On Tue, Dec 02, 2025 at 10:42:29AM -0500, Frank Li wrote:
> > > > > > > On Tue, Dec 02, 2025 at 03:43:10PM +0900, Koichiro Den wrote:
> > > > > > > > On Mon, Dec 01, 2025 at 04:41:05PM -0500, Frank Li wrote:
> > > > > > > > > On Sun, Nov 30, 2025 at 01:03:58AM +0900, Koichiro Den wrote:
> > > > > > > > > > Add a new transport backend that uses a remote DesignWare eDMA engine
> > > > > > > > > > located on the NTB endpoint to move data between host and endpoint.
> > > > > > > > > >
> > > > > > > ...
> > > > > > > > > > +#include "ntb_edma.h"
> > > > > > > > > > +
> > > > > > > > > > +/*
> > > > > > > > > > + * The interrupt register offsets below are taken from the DesignWare
> > > > > > > > > > + * eDMA "unrolled" register map (EDMA_MF_EDMA_UNROLL). The remote eDMA
> > > > > > > > > > + * backend currently only supports this layout.
> > > > > > > > > > + */
> > > > > > > > > > +#define DMA_WRITE_INT_STATUS_OFF   0x4c
> > > > > > > > > > +#define DMA_WRITE_INT_MASK_OFF     0x54
> > > > > > > > > > +#define DMA_WRITE_INT_CLEAR_OFF    0x58
> > > > > > > > > > +#define DMA_READ_INT_STATUS_OFF    0xa0
> > > > > > > > > > +#define DMA_READ_INT_MASK_OFF      0xa8
> > > > > > > > > > +#define DMA_READ_INT_CLEAR_OFF     0xac
> > > > > > > > >
> > > > > > > > > Not sure why need access EDMA register because EMDA driver already export
> > > > > > > > > as dmaengine driver.
> > > > > > > >
> > > > > > > > These are intended for EP use. In my current design I intentionally don't
> > > > > > > > use the standard dw-edma dmaengine driver on the EP side.
> > > > > > >
> > > > > > > why not?
> > > > > >
> > > > > > Conceptually I agree that using the standard dw-edma driver on both sides
> > > > > > would be attractive for future extensibility and maintainability. However,
> > > > > > there are a couple of concerns for me, some of which might be alleviated by
> > > > > > your suggestion below, and some which are more generic safety concerns that
> > > > > > I tried to outline in my replies to your other comments.
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +
> > > > > > > > > > +#define NTB_EDMA_NOTIFY_MAX_QP		64
> > > > > > > > > > +
> > > > > > > ...
> > > > > > > > > > +
> > > > > > > > > > +	virq = irq_create_fwspec_mapping(&fwspec);
> > > > > > > > > > +	of_node_put(parent);
> > > > > > > > > > +	return (virq > 0) ? virq : -EINVAL;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static irqreturn_t ntb_edma_isr(int irq, void *data)
> > > > > > > > > > +{
> > > > > > > > >
> > > > > > > > > Not sue why dw_edma_interrupt_write/read() does work for your case. Suppose
> > > > > > > > > just register callback for dmeengine.
> > > > > > > >
> > > > > > > > If we ran dw_edma_probe() on both the EP and RC sides and let the dmaengine
> > > > > > > > callbacks handle int_status/int_clear, I think we could hit races. One side
> > > > > > > > might clear a status bit before the other side has a chance to see it and
> > > > > > > > invoke its callback. Please correct me if I'm missing something here.
> > > > > > >
> > > > > > > You should use difference channel?
> > > > > >
> > > > > > Do you mean something like this:
> > > > > > - on EP side, dw_edma_probe() only set up a dedicated channel for notification,
> > > > > > - on RC side, do not set up that particular channel via dw_edma_channel_setup(),
> > > > > >   but do other remaining channels for DMA transfers.
> > > > >
> > > > > Yes, it may be simple overall. Of course this will waste a channel.
> > > >
> > > > So, on the EP side I see two possible approaches:
> > > >
> > > > (a) Hide "dma" [1] as in [RFC PATCH v2 26/27] and call dw_edma_probe() with
> > > >     hand-crafted settings (chip->ll_rd_cnt = 1, chip->ll_wr_cnt = 0).
> > > > (b) Or, teach this special-purpose policy (i.e. configuring only a single
> > > >     notification channel) to the SoC glue driver's dw_pcie_ep_init_registers(),
> > > >     for example via Kconfig. I don't think DT is a good place to describe
> > > >     such a policy.
> > > >
> > > > There is also another option, which do not necessarily run dw_edma_probe()
> > > > by ourselves:
> > > >
> > > > (c) Leave the default initialization by the SoC glue as-is, and override the
> > > >     per-channel role via some new dw-edma interface, with the guarantee
> > > >     that all channels except the notification channel remain unused on its
> > > >     side afterwards. In this model, the EP side builds the LL locations
> > > >     for data transfers and the RC configures all channels, but it sets up
> > > >     the notification channel in a special manner.
> > > >
> > > > [1] https://github.com/jonmason/ntb/blob/68113d260674/Documentation/devicetree/bindings/pci/snps%2Cdw-pcie-ep.yaml#L83
> > > >
> > > > >
> > > > > >
> > > > > > Also, is it generically safe to have dw_edma_probe() executed from both ends on
> > > > > > the same eDMA instance, as long as the channels are carefully partitioned
> > > > > > between them?
> > > > >
> > > > > Channel register MMIO space is sperated. Some channel register shared
> > > > > into one 32bit register.
> > > > >
> > > > > But the critical one, interrupt status is w1c. So only write BIT(channel)
> > > > > is safe.
> > > > >
> > > > > Need careful handle irq enable/disable.
> > > >
> > > > Yeah, I agree it is unavoidable in this model.
> > > >
> > > > >
> > > > > Or you can defer all actual DMA transfer to EP side, you can append
> > > > > MSI write at last item of link to notify RC side about DMA done. (actually
> > > > > RIE should do the same thing)
> > > > >
> > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > To avoid that, in my current implementation, the RC side handles the
> > > > > > > > status/int_clear registers in the usual way, and the EP side only tries to
> > > > > > > > suppress needless edma_int as much as possible.
> > > > > > > >
> > > > > > > > That said, I'm now wondering if it would be better to set LIE=0/RIE=1 for
> > > > > > > > the DMA transfer channels and LIE=1/RIE=0 for the notification channel.
> > > > > > > > That would require some changes on dw-edma core.
> > > > > > >
> > > > > > > If dw-edma work as remote DMA, which should enable RIE. like
> > > > > > > dw-edma-pcie.c, but not one actually use it recently.
> > > > > > >
> > > > > > > Use EDMA as doorbell should be new case and I think it is quite useful.
> > > > > > >
> > > > > > > > >
> > > > > > > > > > +	struct ntb_edma_interrupt *v = data;
> > > > > > > > > > +	u32 mask = BIT(EDMA_RD_CH_NUM);
> > > > > > > > > > +	u32 i, val;
> > > > > > > > > > +
> > > > > > > ...
> > > > > > > > > > +	ret = dw_edma_probe(chip);
> > > > > > > > >
> > > > > > > > > I think dw_edma_probe() should be in ntb_hw_epf.c, which provide DMA
> > > > > > > > > dma engine support.
> > > > > > > > >
> > > > > > > > > EP side, suppose default dwc controller driver already setup edma engine,
> > > > > > > > > so use correct filter function, you should get dma chan.
> > > > > > > >
> > > > > > > > I intentionally hid edma for EP side in .dts patch in [RFC PATCH v2 26/27]
> > > > > > > > so that RC side only manages eDMA remotely and avoids the potential race
> > > > > > > > condition I mentioned above.
> > > > > > >
> > > > > > > Improve eDMA core to suppport some dma channel work at local, some for
> > > > > > > remote.
> > > > > >
> > > > > > Right, Firstly I experimented a bit more with different LIE/RIE settings and
> > > > > > ended up with the following observations:
> > > > > >
> > > > > > * LIE=0/RIE=1 does not seem to work at the hardware level. When I tried this for
> > > > > >   DMA transfer channels, the RC side never received any interrupt. The databook
> > > > > >   (5.40a, 8.2.2 "Interrupts and Error Handling") has a hint that says
> > > > > >   "If you want a remote interrupt and not a local interrupt then: Set LIE and
> > > > > >   RIE [...]", so I think this behaviour is expected.
> > > > >
> > > > > Actually, you can append MSI write at last one of DMA descriptor link. So
> > > > > it will not depend on eDMA's IRQ at all.
> > > >
> > > > For RC->EP interrupts on R-Car S4 in EP mode, using ITS_TRANSLATER as the
> > > > IB iATU target did not appear to work in practice. Indeed that was the
> > > > motivation for the RFC v1 series [2]. I have not tried using ITS_TRANSLATER
> > > > as the eDMA read transfer DAR.
> > > >
> > > > But in any case, simply masking the local interrupt is sufficient here. I
> > > > mainly wanted to point out that my naive idea of LIE=0/RIE=1 is not
> > > > implementable with this hardware. This whole LIE/RIE topic is a bit
> > > > off-track, sorry for the noise.
> > > >
> > > > [2] For the record, RFC v2 is conceptually orthogonal and introduces a
> > > >     broader concept ie. remote eDMA model, but I reused many of the
> > > >     preparatory commits from v1, which is why this is RFC v2 rather than a
> > > >     separate series.
> > > >
> > > > >
> > > > > > * LIE=1/RIE=0 does work at the hardware level, but is problematic for my current
> > > > > >   design, where the RC issues the DMA transfer for the notification via
> > > > > >   ntb_edma_notify_peer(). With RIE=0, the RC never calls
> > > > > >   dw_edma_core_handle_int() for that channel, which means that internal state
> > > > > >   such as dw_edma_chan.status is never managed correctly.
> > > > >
> > > > > If you append on MSI write at DMA link, you needn't check status register,
> > > > > just check current LL pos to know which descrptor already done.
> > > > >
> > > > > Or you also enable LIE and disable related IRQ line(without register
> > > > > irq handler), so Local IRQ will be ignore by GIC, you can safe handle at
> > > > > RC side.
> > > >
> > > > What I was worried about here is that, with RIE=0 the current dw-edma
> > > > handling of struct dw_edma_chan::status field (not status register) would
> > > > not run for that channel, which could affect subsequent tx submissions. But
> > > > your suggestion also makes sense, thank you.
> > > >
> > > > --8<--
> > > >
> > > > So anyway the key point seems that we should avoid such hard-coded register
> > > > handling in [RFC PATCH v2 20/27] and rely only on the standard dw-edma
> > > > interfaces (possibly with some extensions to the dw-edma core). From your
> > > > feedback, I feel this is the essential direction.
> > > >
> > > > From that perspective, I'm leaning toward (b) (which I wrote above in a
> > > > reply comment) with a Kconfig guard, i.e. in dw_pcie_ep_init_registers(),
> > > > if IS_ENABLED(CONFIG_DW_REMOTE_EDMA) we only configure the notification
> > > > channel. In practice, a DT-based variant of (b) (for example a new property
> > > > such as "dma-notification-channel = <N>;" and making
> > > > dw_pcie_ep_init_registers() honour it) would be very handy for users, but I
> > > > suspect putting this kind of policy into DT is not acceptable.
> > > >
> > > > Assuming careful handling, (c) might actually be the simplest approach. I
> > > > may need to add a small hook for the notification channel in
> > > > dw_edma_done_interrupt(), via a new API such as
> > > > dw_edma_chan_register_notify().
> > >
> > > I reply everything here for overall design
> > >
> > > EDMA actually can access all memory at both EP and RC side regardless PCI
> > > map windows. NTB defination is that only access part of both system memory,
> > > so anyway need once memcpy. Although NTB can't take 100% eDMA advantage, it
> > > is still easiest path now. I have a draft idea without touch NTB core code
> > > (most likley).
> > >
> > > EP side                          RC side
> > >              1:  Control bar
> > >              2:  Doorbell bar
> > >              3:  WM1
> > >
> > > MW1 is fixed sized array [ntb_payload_header + data]. Current NTB built
> > > queue in system memory, transfer data (RW) to this array.
> > >
> > > Use EDMA only one side, RC/EP. use EP as example.
> > >
> > > In 1 (control bar, resever memory space, which call B)
> > >
> > > In ntb_hw_epf.c driver, create a simple 'fake' DMA memcpy driver, which
> > > just implement device_prep_dma_memcpy(). That just put src\dest\size info
> > > to memory space B, then push doorbell.
> > >
> > > in EP side's a workqueue, fetch info from B, the send to EDMA queue to
> > > do actual transfer, after EP DMA finish, mark done at B, then raise msi irq,
> > > 'fake' DMA memcpy driver will be triggered.
> > >
> > > Futher, 3 WM1 is not necessary existed at all, because both side don't
> > > access it directly.
> > >
> > > For example:
> > >
> > > case RC TX, EP RX
> > >
> > > RC ntb_async_tx_submit() use device_prep_dma_memcpy() copy user space
> > > memory (0xRC_1000 to PCI_1000, size 0x1000), put into share bar0 position
> > >
> > >             0xRC_1000 -> 0xPCI_1000 0x1000
> > >
> > > EP side, there RX request ntb_async_rx_submit(),  from 0xPCI_1000 to
> > > 0xEP_8000 size 0x20000.
> > >
> > > so setup eDMA transfer form 0xRC_1000 -> 0xEP_8000 size 1000. After complete
> > > mark both side done, then trigger related callback functions.
> > >
> > > You can see 0xPCI_1000 is not used at all. Actually 0xPCI_1000 is trouble
> > > maker,  RC and EP system PCI space is not necesary the same as CPU space,
> > > PCI controller may do address convert.
> >
> > Thanks for the detailed explanation.
> >
> > Just to clarify, regarding your comments about the number of memcpy
> > operations and not using the 0xPCI_1000 window for data path, I think RFC
> > v2 is already similar to what you're describing.
> >
> > To me it seems the key differences in your proposal are mainly two-fold:
> > (1) the layering, and (2) local eDMA use rather than remote.
> 
> Not big difference between remote and local DMA. My major means just use
> oneside is enough. If eDMA handle in remote, EP side need virtual memcpy
> and RC side to handle actual transfer.
> 
> I use EP as example, just because some logic R/W is reverted between EP/RC.
> RC's write is EP's read.
> 
> >
> > For (1), instead of adding more eDMA-specific handling into ntb_transport
> > layer, your approach would keep changes to ntb_transport minimal and
> > encapsulate the eDMA usage inside the "fake DMA memcpy driver" as much as
> > possible. In that design, would the MW1 layout change? Leaving the existing
> > layout as-is would waste the space (so RFC v2 had introduced a new layout).
> 
> It is fine if NTB maintainer agree it.
> 
> >
> > Also, one point I'm still unsure about is the opposite direction (ie.
> > EP->RC). In that case, do you also expect the EP to trigger its local eDMA
> > engine? If yes, then, similar to the RC->EP direction in RFC v2, the EP
> > would need to know the RC-side receive buffer address (e.g. 0xRC_1000) in
> > advance.
> 
> 'fake DMA memcpy driver' already put 0xRC_1000 to one shared memory place.
> 
> >
> > You also mentioned that you already have some draft. Are you planning to
> > post that as a patch series? If not, I can of course try to
> > implement/prototype this approach based on your suggestion.
> 
> Sorry, I have not actually worked for ntb eDMA before. My work base on RDMA
> framework. Idealy, RDMA can do user-space(EP) to user space (RC) data
> transfer with zero copy.
> 
> But I think NTB is also a good path since RDMA is over complexed.
> 
> Frank

Hi Frank,

Thank you for the review and the discussion. Apologies for the delayed
response here in this thread, I needed some time to think through the
re-design.

After further consideration, I sent RFC v3 [1] with the following design:
* all the read channels (including a channel for notification) are driven
  by host (RC)
* all the write channels are driven by endpoint (EP)

This way we can avoid both ends touching and updating per-direction
registers concurrently at runtime [2]. Also the data plane behaviour
becomes symmetric in both directions, resulting in a simpler data path in
the NTB transport layer compared to RFC v2. As you commented earlier, RFC
v3 no longer relies on the duplicate hard-coded register offsets, and leave
dma_device/dma_chan initialization to the standard path. RFC v3 no longer
hides eDMA instance on enpoint side, like I did in [RFC PATCH v2 26/28]
[3].

But still I didn't implement the fake DMA memcpy driver idea in RFC v3.
Instead, I chose MW1 layout optimized for eDMA-backed transport, since it
reduces MW1 usage and makes it possible to scale to multiple queue pairs
with deeper ring buffers, which helps fully exploit the potential of the
eDMA-backed transport.

[1] https://lore.kernel.org/all/20251217151609.3162665-1-den@valinux.co.jp/
[2] as a somewhat relevant topic, I've found an existing issue that becomes
    observable under heavy load across multiple channels.
    https://lore.kernel.org/all/20251217151609.3162665-23-den@valinux.co.jp/
[3] https://lore.kernel.org/all/20251129160405.2568284-27-den@valinux.co.jp/

Thank you again for your time and for the review,

Koichiro

> 
> >
> > Please let me know if the above understanding does not match what you had
> > in mind.
> >
> > Thank you,
> > Koichiro
> >
> >
> > >
> > > Frank
> > > >
> > > > Thank you for your time and review,
> > > > Koichiro
> > > >
> > > > >
> > > > > Frank
> > > > > >
> > > > > > >
> > > > > > > Frank
> > > > > > > >
> > > > > > > > Thanks for reviewing,
> > > > > > > > Koichiro
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Frank
> > > > > > > > >
> > > > > > > > > > +	if (ret) {
> > > > > > > > > > +		dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret);
> > > > > > > > > > +		return ret;
> > > > > > > > > > +	}
> > > > > > > > > > +
> > > > > > > > > > +	return 0;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > ...
> > > > > > >
> > > > > > > > > > +{
> > > > > > > > > > +	spin_lock_init(&qp->ep_tx_lock);
> > > > > > > > > > +	spin_lock_init(&qp->ep_rx_lock);
> > > > > > > > > > +	spin_lock_init(&qp->rc_lock);
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static const struct ntb_transport_backend_ops edma_backend_ops = {
> > > > > > > > > > +	.setup_qp_mw = ntb_transport_edma_setup_qp_mw,
> > > > > > > > > > +	.tx_free_entry = ntb_transport_edma_tx_free_entry,
> > > > > > > > > > +	.tx_enqueue = ntb_transport_edma_tx_enqueue,
> > > > > > > > > > +	.rx_enqueue = ntb_transport_edma_rx_enqueue,
> > > > > > > > > > +	.rx_poll = ntb_transport_edma_rx_poll,
> > > > > > > > > > +	.debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
> > > > > > > > > > +};
> > > > > > > > > > +#endif /* CONFIG_NTB_TRANSPORT_EDMA */
> > > > > > > > > > +
> > > > > > > > > >  /**
> > > > > > > > > >   * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
> > > > > > > > > >   * @qp: NTB transport layer queue to be enabled
> > > > > > > > > > --
> > > > > > > > > > 2.48.1
> > > > > > > > > >