[PATCH 14/15] NTB: Add ntb_ep_dma test client

Koichiro Den posted 15 patches 3 weeks, 4 days ago
[PATCH 14/15] NTB: Add ntb_ep_dma test client
Posted by Koichiro Den 3 weeks, 4 days ago
Add a small NTB client that exercises an endpoint-integrated DMA engine
exported through vNTB.

Both peers allocate a coherent test buffer and publish its DMA address
and size through scratchpads. The initiator requests the remote DMA
engine provider and uses it to transfer a known pattern into the
peer-published buffer. The responder verifies the contents locally and
reports PASS or FAIL back through scratchpads and a doorbell.

Expose ready, run, and result files in debugfs to make the flow easy to
trigger during bring-up.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
 drivers/ntb/test/Kconfig      |  10 +
 drivers/ntb/test/Makefile     |   1 +
 drivers/ntb/test/ntb_ep_dma.c | 695 ++++++++++++++++++++++++++++++++++
 3 files changed, 706 insertions(+)
 create mode 100644 drivers/ntb/test/ntb_ep_dma.c

diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index 516b991f33b9..30d0fede968c 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -35,3 +35,13 @@ config NTB_MSI_TEST
 	  send MSI interrupts between peers.
 
 	  If unsure, say N.
+
+config NTB_EP_DMA
+	tristate "NTB EP DMA Test Client"
+	help
+	  This test client demonstrates the use of an endpoint-integrated DMA
+	  engine exported through vNTB. It is intended as a simple bring-up and
+	  end-to-end validation tool for the remote-DMA discovery and transfer
+	  path.
+
+	  If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index 19ed91d8a3b1..f5bd0a85d4c8 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
 obj-$(CONFIG_NTB_PERF) += ntb_perf.o
 obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o
+obj-$(CONFIG_NTB_EP_DMA) += ntb_ep_dma.o
diff --git a/drivers/ntb/test/ntb_ep_dma.c b/drivers/ntb/test/ntb_ep_dma.c
new file mode 100644
index 000000000000..7cee158369a1
--- /dev/null
+++ b/drivers/ntb/test/ntb_ep_dma.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/cleanup.h>
+#include <linux/completion.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ntb.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Koichiro Den <den@valinux.co.jp>");
+MODULE_DESCRIPTION("Test for using EPC-integrated DMA engine remotely");
+
+#define NTB_EP_DMA_BUF_LEN	SZ_4K
+#define NTB_EP_DMA_TIMEOUT_MS	5000
+#define NTB_EP_DMA_TIMEOUT	msecs_to_jiffies(NTB_EP_DMA_TIMEOUT_MS)
+
+#define NTB_EP_DMA_SPAD_STATE		0
+#define NTB_EP_DMA_SPAD_ADDR_LO	1
+#define NTB_EP_DMA_SPAD_ADDR_HI	2
+#define NTB_EP_DMA_SPAD_SIZE		3
+#define NTB_EP_DMA_SPAD_SEQ		4
+#define NTB_EP_DMA_SPAD_XFER_LEN	5
+#define NTB_EP_DMA_SPAD_CNT		6
+
+/*
+ * Test protocol:
+ *   - both peers publish a local coherent buffer through scratchpads and
+ *     ring a doorbell when READY
+ *   - the initiator submits one transfer through the remote DMA provider
+ *   - the responder verifies the pattern locally and reports PASS/FAIL
+ *     back through scratchpads and a doorbell
+ */
+enum ntb_ep_dma_state {
+	NTB_EP_DMA_ST_INIT = 0,
+	NTB_EP_DMA_ST_READY,
+	NTB_EP_DMA_ST_XFER_DONE,
+	NTB_EP_DMA_ST_PASS,
+	NTB_EP_DMA_ST_FAIL,
+};
+
+struct ntb_ep_dma_peer {
+	dma_addr_t dma_addr;
+	u32 size;
+	u32 state;
+	u32 seq;
+	u32 xfer_len;
+};
+
+struct ntb_ep_dma_ctx {
+	struct ntb_dev *ntb;
+	struct dentry *dbgfs_dir;
+	/* Serialize userspace-triggered runs through debugfs. */
+	struct mutex run_lock;
+	/* Protect peer state and completion sequencing shared with db_event. */
+	spinlock_t lock;
+	struct work_struct setup_work;
+	struct work_struct verify_work;
+	struct completion peer_ready;
+	struct completion xfer_done;
+
+	struct device *buf_dev;
+	void *buf;
+	dma_addr_t buf_dma;
+	size_t buf_size;
+
+	struct ntb_ep_dma_peer peer;
+	u32 local_seq;
+	u32 done_seq;
+	u32 verify_seq;
+	u32 verify_len;
+	u32 verified_seq;
+
+	size_t last_len;
+	int last_status;
+};
+
+static struct dentry *ntb_ep_dma_dbgfs_topdir;
+
+static const char *ntb_ep_dma_state_name(u32 state)
+{
+	switch (state) {
+	case NTB_EP_DMA_ST_INIT:
+		return "init";
+	case NTB_EP_DMA_ST_READY:
+		return "ready";
+	case NTB_EP_DMA_ST_XFER_DONE:
+		return "xfer-done";
+	case NTB_EP_DMA_ST_PASS:
+		return "pass";
+	case NTB_EP_DMA_ST_FAIL:
+		return "fail";
+	default:
+		return "unknown";
+	}
+}
+
+static void ntb_ep_dma_fill_pattern(void *buf, size_t len)
+{
+	u8 *ptr = buf;
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ptr[i] = (u8)i;
+}
+
+static int ntb_ep_dma_verify_pattern(const void *buf, size_t len)
+{
+	const u8 *ptr = buf;
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (ptr[i] != (u8)i)
+			return -EIO;
+	}
+
+	return 0;
+}
+
+static int ntb_ep_dma_signal_peer(struct ntb_ep_dma_ctx *ctx)
+{
+	return ntb_peer_db_set(ctx->ntb, BIT_ULL(ntb_port_number(ctx->ntb)));
+}
+
+static int ntb_ep_dma_publish(struct ntb_ep_dma_ctx *ctx, u32 state, u32 seq,
+			      u32 xfer_len)
+{
+	int ret;
+
+	ret = ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_STATE, state);
+	if (ret)
+		return ret;
+
+	ret = ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_ADDR_LO,
+			     lower_32_bits(ctx->buf_dma));
+	if (ret)
+		return ret;
+
+	ret = ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_ADDR_HI,
+			     upper_32_bits(ctx->buf_dma));
+	if (ret)
+		return ret;
+
+	ret = ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_SIZE, ctx->buf_size);
+	if (ret)
+		return ret;
+
+	ret = ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_SEQ, seq);
+	if (ret)
+		return ret;
+
+	return ntb_spad_write(ctx->ntb, NTB_EP_DMA_SPAD_XFER_LEN, xfer_len);
+}
+
+static struct ntb_ep_dma_peer
+ntb_ep_dma_read_peer(struct ntb_ep_dma_ctx *ctx)
+{
+	struct ntb_ep_dma_peer peer;
+	u32 hi, lo;
+
+	peer.state = ntb_peer_spad_read(ctx->ntb, 0, NTB_EP_DMA_SPAD_STATE);
+	lo = ntb_peer_spad_read(ctx->ntb, 0, NTB_EP_DMA_SPAD_ADDR_LO);
+	hi = ntb_peer_spad_read(ctx->ntb, 0, NTB_EP_DMA_SPAD_ADDR_HI);
+	peer.dma_addr = ((u64)hi << 32) | lo;
+	peer.size = ntb_peer_spad_read(ctx->ntb, 0, NTB_EP_DMA_SPAD_SIZE);
+	peer.seq = ntb_peer_spad_read(ctx->ntb, 0, NTB_EP_DMA_SPAD_SEQ);
+	peer.xfer_len = ntb_peer_spad_read(ctx->ntb, 0,
+					   NTB_EP_DMA_SPAD_XFER_LEN);
+
+	return peer;
+}
+
+static bool ntb_ep_dma_filter(struct dma_chan *chan, void *data)
+{
+	struct ntb_ep_dma_ctx *ctx = data;
+	struct device *dev;
+
+	dev = dmaengine_get_dma_device(chan);
+	if (!dev || !dev->parent)
+		return false;
+
+	return dev == ntb_get_dma_dev(ctx->ntb);
+}
+
+static void ntb_ep_dma_done(void *arg)
+{
+	complete(arg);
+}
+
+static int ntb_ep_dma_submit_xfer(struct ntb_ep_dma_ctx *ctx,
+				  dma_addr_t peer_dma, size_t len)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct dma_slave_config cfg = {};
+	struct completion done;
+	struct device *dma_dev;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+	dma_cookie_t cookie;
+	dma_addr_t src_dma;
+	void *src_buf;
+	int ret = 0;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_PRIVATE, mask);
+
+	chan = dma_request_channel(mask, ntb_ep_dma_filter, ctx);
+	if (!chan)
+		return -ENODEV;
+
+	dma_dev = ntb_get_dma_dev(ctx->ntb);
+	if (!dma_dev) {
+		ret = -ENODEV;
+		goto err_release_chan;
+	}
+
+	src_buf = dma_alloc_coherent(dma_dev, len, &src_dma, GFP_KERNEL);
+	if (!src_buf) {
+		ret = -ENOMEM;
+		goto err_release_chan;
+	}
+
+	ntb_ep_dma_fill_pattern(src_buf, len);
+	dma_wmb();
+
+	cfg.dst_addr = peer_dma;
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.direction = DMA_MEM_TO_DEV;
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret)
+		goto err_free_src;
+
+	tx = dmaengine_prep_slave_single(chan, src_dma, len,
+					 DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!tx) {
+		ret = -EIO;
+		goto err_free_src;
+	}
+
+	init_completion(&done);
+	tx->callback = ntb_ep_dma_done;
+	tx->callback_param = &done;
+
+	cookie = dmaengine_submit(tx);
+	ret = dma_submit_error(cookie);
+	if (ret)
+		goto err_free_src;
+
+	dma_async_issue_pending(chan);
+
+	if (!wait_for_completion_timeout(&done, NTB_EP_DMA_TIMEOUT)) {
+		ret = -ETIMEDOUT;
+		dmaengine_terminate_sync(chan);
+	}
+
+err_free_src:
+	dma_free_coherent(dma_dev, len, src_buf, src_dma);
+err_release_chan:
+	dma_release_channel(chan);
+
+	return ret;
+}
+
+static void ntb_ep_dma_verify_work(struct work_struct *work)
+{
+	struct ntb_ep_dma_ctx *ctx =
+			container_of(work, struct ntb_ep_dma_ctx, verify_work);
+	u32 seq, len, state;
+	int ret;
+
+	scoped_guard(spinlock_irqsave, &ctx->lock) {
+		seq = ctx->verify_seq;
+		len = ctx->verify_len;
+	}
+
+	if (!ctx->buf || len > ctx->buf_size)
+		ret = -EMSGSIZE;
+	else
+		ret = ntb_ep_dma_verify_pattern(ctx->buf, len);
+
+	state = ret ? NTB_EP_DMA_ST_FAIL : NTB_EP_DMA_ST_PASS;
+
+	scoped_guard(spinlock_irqsave, &ctx->lock) {
+		ctx->verified_seq = seq;
+		ctx->last_status = ret;
+		ctx->last_len = len;
+	}
+
+	ret = ntb_ep_dma_publish(ctx, state, seq, len);
+	if (!ret)
+		ret = ntb_ep_dma_signal_peer(ctx);
+	if (ret)
+		dev_err(&ctx->ntb->dev, "failed to publish verify result: %d\n",
+			ret);
+}
+
+static void ntb_ep_dma_try_capture_peer_ready(struct ntb_ep_dma_ctx *ctx)
+{
+	struct ntb_ep_dma_peer peer = ntb_ep_dma_read_peer(ctx);
+
+	guard(spinlock_irqsave)(&ctx->lock);
+
+	ctx->peer = peer;
+	if (peer.state >= NTB_EP_DMA_ST_READY &&
+	    peer.dma_addr && peer.size)
+		complete_all(&ctx->peer_ready);
+}
+
+static void ntb_ep_dma_setup_work(struct work_struct *work)
+{
+	struct ntb_ep_dma_ctx *ctx =
+			container_of(work, struct ntb_ep_dma_ctx, setup_work);
+	struct device *dma_dev;
+	int ret;
+
+	if (!ntb_link_is_up(ctx->ntb, NULL, NULL))
+		return;
+
+	if (!ctx->buf) {
+		dma_dev = ntb_get_dma_dev(ctx->ntb);
+		if (!dma_dev) {
+			dev_err(&ctx->ntb->dev,
+				"no DMA mapping device available\n");
+			return;
+		}
+
+		ctx->buf = dma_alloc_coherent(dma_dev, ctx->buf_size,
+					      &ctx->buf_dma, GFP_KERNEL);
+		if (!ctx->buf)
+			return;
+
+		ctx->buf_dev = dma_dev;
+	}
+
+	memset(ctx->buf, 0, ctx->buf_size);
+	reinit_completion(&ctx->peer_ready);
+
+	ret = ntb_ep_dma_publish(ctx, NTB_EP_DMA_ST_READY, 0, 0);
+	if (ret)
+		goto err_free_buf;
+
+	ntb_ep_dma_try_capture_peer_ready(ctx);
+
+	ret = ntb_ep_dma_signal_peer(ctx);
+	if (ret)
+		goto err_free_buf;
+
+	return;
+
+err_free_buf:
+	dev_err(&ctx->ntb->dev, "failed to publish READY state: %d\n", ret);
+	dma_free_coherent(ctx->buf_dev, ctx->buf_size, ctx->buf, ctx->buf_dma);
+	ctx->buf = NULL;
+	ctx->buf_dev = NULL;
+	ctx->buf_dma = 0;
+}
+
+static void ntb_ep_dma_link_event(void *data)
+{
+	struct ntb_ep_dma_ctx *ctx = data;
+
+	if (!ntb_link_is_up(ctx->ntb, NULL, NULL))
+		return;
+
+	schedule_work(&ctx->setup_work);
+}
+
+static void ntb_ep_dma_db_event(void *data, int vec)
+{
+	struct ntb_ep_dma_ctx *ctx = data;
+	struct ntb_ep_dma_peer peer;
+	bool do_complete = false;
+	bool do_verify = false;
+	u64 db_bits;
+
+	db_bits = ntb_db_read(ctx->ntb);
+	if (!db_bits)
+		return;
+
+	ntb_db_clear(ctx->ntb, db_bits);
+
+	peer = ntb_ep_dma_read_peer(ctx);
+
+	scoped_guard(spinlock_irqsave, &ctx->lock) {
+		ctx->peer = peer;
+		if (peer.state >= NTB_EP_DMA_ST_READY && peer.dma_addr &&
+		    peer.size)
+			complete_all(&ctx->peer_ready);
+
+		if (peer.state == NTB_EP_DMA_ST_XFER_DONE &&
+		    peer.seq != ctx->verified_seq) {
+			ctx->verify_seq = peer.seq;
+			ctx->verify_len = peer.xfer_len;
+			do_verify = true;
+		} else if ((peer.state == NTB_EP_DMA_ST_PASS ||
+			    peer.state == NTB_EP_DMA_ST_FAIL) &&
+			   peer.seq == ctx->local_seq &&
+			   peer.seq != ctx->done_seq) {
+			ctx->done_seq = peer.seq;
+			do_complete = true;
+		}
+	}
+
+	if (do_verify)
+		schedule_work(&ctx->verify_work);
+	if (do_complete)
+		complete_all(&ctx->xfer_done);
+}
+
+static const struct ntb_ctx_ops ntb_ep_dma_ops = {
+	.link_event = ntb_ep_dma_link_event,
+	.db_event = ntb_ep_dma_db_event,
+};
+
+static int ntb_ep_dma_ready_get(void *data, u64 *ready)
+{
+	struct ntb_ep_dma_ctx *ctx = data;
+
+	*ready = completion_done(&ctx->peer_ready);
+	return 0;
+}
+
+static int ntb_ep_dma_ready_set(void *data, u64 ready)
+{
+	struct ntb_ep_dma_ctx *ctx = data;
+
+	return wait_for_completion_interruptible(&ctx->peer_ready);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_ep_dma_ready_fops, ntb_ep_dma_ready_get,
+			 ntb_ep_dma_ready_set, "%llu\n");
+
+static int ntb_ep_dma_result_show(struct seq_file *s, void *unused)
+{
+	struct ntb_ep_dma_ctx *ctx = s->private;
+	struct ntb_ep_dma_peer peer;
+
+	guard(spinlock_irqsave)(&ctx->lock);
+
+	peer = ctx->peer;
+
+	seq_printf(s, "last_status: %d\n", ctx->last_status);
+	seq_printf(s, "last_len: %zu\n", ctx->last_len);
+	seq_printf(s, "local_buf_dma: %#llx\n", ctx->buf_dma);
+	seq_printf(s, "local_buf_size: %zu\n", ctx->buf_size);
+	seq_printf(s, "peer_ready: %u\n", completion_done(&ctx->peer_ready));
+	seq_printf(s, "peer_state: %s\n", ntb_ep_dma_state_name(peer.state));
+	seq_printf(s, "peer_dma: 0x%llx\n", peer.dma_addr);
+	seq_printf(s, "peer_size: %u\n", peer.size);
+	seq_printf(s, "peer_seq: %u\n", peer.seq);
+	seq_printf(s, "peer_xfer_len: %u\n", peer.xfer_len);
+	seq_printf(s, "link_up: %u\n", !!ntb_link_is_up(ctx->ntb, NULL, NULL));
+
+	return 0;
+}
+
+static int ntb_ep_dma_result_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ntb_ep_dma_result_show, inode->i_private);
+}
+
+static int ntb_ep_dma_run_once(struct ntb_ep_dma_ctx *ctx)
+{
+	struct ntb_ep_dma_peer peer;
+	size_t len;
+	int status;
+	long ret;
+	u32 seq;
+
+	ret = wait_for_completion_interruptible_timeout(&ctx->peer_ready,
+							NTB_EP_DMA_TIMEOUT);
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return -ETIMEDOUT;
+
+	peer = ctx->peer;
+	scoped_guard(spinlock_irqsave, &ctx->lock) {
+		seq = ++ctx->local_seq;
+		ctx->done_seq = 0;
+	}
+
+	if (!peer.dma_addr || !peer.size)
+		return -ENXIO;
+
+	len = min_t(size_t, ctx->buf_size, peer.size);
+	if (!len)
+		return -EMSGSIZE;
+
+	reinit_completion(&ctx->xfer_done);
+
+	status = ntb_ep_dma_submit_xfer(ctx, peer.dma_addr, len);
+	if (status)
+		return status;
+
+	status = ntb_ep_dma_publish(ctx, NTB_EP_DMA_ST_XFER_DONE, seq, len);
+	if (status)
+		return status;
+
+	status = ntb_ep_dma_signal_peer(ctx);
+	if (status)
+		return status;
+
+	ret = wait_for_completion_interruptible_timeout(&ctx->xfer_done,
+							NTB_EP_DMA_TIMEOUT);
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return -ETIMEDOUT;
+
+	guard(spinlock_irqsave)(&ctx->lock);
+
+	peer = ctx->peer;
+
+	if (peer.seq != seq)
+		return -EPROTO;
+	if (peer.state != NTB_EP_DMA_ST_PASS)
+		return -EIO;
+
+	ctx->last_len = len;
+	return 0;
+}
+
+static ssize_t ntb_ep_dma_run_write(struct file *file, const char __user *ubuf,
+				    size_t len, loff_t *ppos)
+{
+	struct ntb_ep_dma_ctx *ctx = file->private_data;
+	unsigned long start;
+	char buf[32];
+	size_t cplen;
+	int ret;
+
+	if (*ppos)
+		return -EINVAL;
+
+	cplen = min(len, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cplen))
+		return -EFAULT;
+
+	buf[cplen] = '\0';
+	strim(buf);
+
+	ret = kstrtoul(buf, 0, &start);
+	if (ret)
+		return ret;
+	if (!start)
+		return -EINVAL;
+
+	guard(mutex)(&ctx->run_lock);
+
+	ret = ntb_ep_dma_run_once(ctx);
+	ctx->last_status = ret;
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static const struct file_operations ntb_ep_dma_result_fops = {
+	.owner = THIS_MODULE,
+	.open = ntb_ep_dma_result_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static const struct file_operations ntb_ep_dma_run_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = ntb_ep_dma_run_write,
+	.llseek = noop_llseek,
+};
+
+static int ntb_ep_dma_check_ntb(struct ntb_dev *ntb)
+{
+	if (ntb_peer_port_count(ntb) != 1)
+		return -EINVAL;
+
+	if (ntb_spad_count(ntb) < NTB_EP_DMA_SPAD_CNT)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ntb_ep_dma_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct ntb_ep_dma_ctx *ctx;
+	int ret;
+
+	ret = ntb_ep_dma_check_ntb(ntb);
+	if (ret)
+		return ret;
+
+	ctx = devm_kzalloc(&ntb->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->ntb = ntb;
+	ctx->buf_size = NTB_EP_DMA_BUF_LEN;
+	ctx->last_len = 0;
+	ctx->last_status = 0;
+	ctx->verified_seq = U32_MAX;
+	mutex_init(&ctx->run_lock);
+	spin_lock_init(&ctx->lock);
+	init_completion(&ctx->peer_ready);
+	init_completion(&ctx->xfer_done);
+	INIT_WORK(&ctx->setup_work, ntb_ep_dma_setup_work);
+	INIT_WORK(&ctx->verify_work, ntb_ep_dma_verify_work);
+
+	ret = ntb_set_ctx(ntb, ctx, &ntb_ep_dma_ops);
+	if (ret)
+		return ret;
+
+	ret = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	if (ret)
+		goto err_clear_ctx;
+
+	if (ntb_link_is_up(ntb, NULL, NULL))
+		schedule_work(&ctx->setup_work);
+
+	if (debugfs_initialized()) {
+		ctx->dbgfs_dir = debugfs_create_dir(pci_name(ntb->pdev),
+						    ntb_ep_dma_dbgfs_topdir);
+		debugfs_create_file("run", 0200, ctx->dbgfs_dir, ctx,
+				    &ntb_ep_dma_run_fops);
+		debugfs_create_file("result", 0400, ctx->dbgfs_dir, ctx,
+				    &ntb_ep_dma_result_fops);
+		debugfs_create_file_unsafe("ready", 0600, ctx->dbgfs_dir, ctx,
+					   &ntb_ep_dma_ready_fops);
+	}
+
+	return 0;
+
+err_clear_ctx:
+	ntb_clear_ctx(ntb);
+	return ret;
+}
+
+static void ntb_ep_dma_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct ntb_ep_dma_ctx *ctx = ntb->ctx;
+
+	debugfs_remove_recursive(ctx->dbgfs_dir);
+	cancel_work_sync(&ctx->verify_work);
+	cancel_work_sync(&ctx->setup_work);
+	if (ctx->buf)
+		dma_free_coherent(ctx->buf_dev, ctx->buf_size,
+				  ctx->buf, ctx->buf_dma);
+	ntb_link_disable(ntb);
+	ntb_clear_ctx(ntb);
+}
+
+static struct ntb_client ntb_ep_dma_client = {
+	.ops = {
+		.probe = ntb_ep_dma_probe,
+		.remove = ntb_ep_dma_remove,
+	},
+};
+
+static int __init ntb_ep_dma_init(void)
+{
+	int ret;
+
+	if (debugfs_initialized())
+		ntb_ep_dma_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	ret = ntb_register_client(&ntb_ep_dma_client);
+	if (ret)
+		debugfs_remove_recursive(ntb_ep_dma_dbgfs_topdir);
+
+	return ret;
+}
+module_init(ntb_ep_dma_init);
+
+static void __exit ntb_ep_dma_exit(void)
+{
+	ntb_unregister_client(&ntb_ep_dma_client);
+	debugfs_remove_recursive(ntb_ep_dma_dbgfs_topdir);
+}
+module_exit(ntb_ep_dma_exit);
-- 
2.51.0