[PATCH 14/14] dmaengine: dma350: Support ARM DMA-250

Jisheng Zhang posted 14 patches 1 month, 1 week ago
[PATCH 14/14] dmaengine: dma350: Support ARM DMA-250
Posted by Jisheng Zhang 1 month, 1 week ago
Compared with ARM DMA-350, DMA-250 is a simplified version. They share
many common parts, but they do have difference. Add DMA-250 support
by handling their difference by using different device_prep_slave_sg,
device_prep_dma_cyclic and device_prep_dma_memcpy. DMA-250 doesn't
support device_prep_dma_memset.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
 drivers/dma/arm-dma350.c | 444 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 424 insertions(+), 20 deletions(-)

diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c
index 5abb965c6687..0ee807424b7e 100644
--- a/drivers/dma/arm-dma350.c
+++ b/drivers/dma/arm-dma350.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2024-2025 Arm Limited
 // Copyright (C) 2025 Synaptics Incorporated
-// Arm DMA-350 driver
+// Arm DMA-350/DMA-250 driver
 
 #include <linux/bitfield.h>
 #include <linux/dmaengine.h>
@@ -16,6 +16,10 @@
 #include "dmaengine.h"
 #include "virt-dma.h"
 
+#define DMANSECCTRL		0x0200
+
+#define NSEC_CNTXBASE		0x10
+
 #define DMAINFO			0x0f00
 
 #define DMA_BUILDCFG0		0xb0
@@ -26,12 +30,16 @@
 #define DMA_BUILDCFG1		0xb4
 #define DMA_CFG_NUM_TRIGGER_IN	GENMASK(8, 0)
 
+#define DMA_BUILDCFG2		0xb8
+#define DMA_CFG_HAS_TZ		BIT(8)
+
 #define IIDR			0xc8
 #define IIDR_PRODUCTID		GENMASK(31, 20)
 #define IIDR_VARIANT		GENMASK(19, 16)
 #define IIDR_REVISION		GENMASK(15, 12)
 #define IIDR_IMPLEMENTER	GENMASK(11, 0)
 
+#define PRODUCTID_DMA250	0x250
 #define PRODUCTID_DMA350	0x3a0
 #define IMPLEMENTER_ARM		0x43b
 
@@ -140,6 +148,7 @@
 #define CH_CFG_HAS_TRIGSEL	BIT(7)
 #define CH_CFG_HAS_TRIGIN	BIT(5)
 #define CH_CFG_HAS_WRAP		BIT(1)
+#define CH_CFG_HAS_XSIZEHI	BIT(0)
 
 
 #define LINK_REGCLEAR		BIT(0)
@@ -218,6 +227,7 @@ struct d350_chan {
 	bool cyclic;
 	bool has_trig;
 	bool has_wrap;
+	bool has_xsizehi;
 	bool coherent;
 };
 
@@ -225,6 +235,10 @@ struct d350 {
 	struct dma_device dma;
 	int nchan;
 	int nreq;
+	bool is_d250;
+	dma_addr_t cntx_mem_paddr;
+	void *cntx_mem;
+	u32 cntx_mem_size;
 	struct d350_chan channels[] __counted_by(nchan);
 };
 
@@ -238,6 +252,11 @@ static inline struct d350_desc *to_d350_desc(struct virt_dma_desc *vd)
 	return container_of(vd, struct d350_desc, vd);
 }
 
+static inline struct d350 *to_d350(struct dma_device *dd)
+{
+	return container_of(dd, struct d350, dma);
+}
+
 static void d350_desc_free(struct virt_dma_desc *vd)
 {
 	struct d350_chan *dch = to_d350_chan(vd->tx.chan);
@@ -585,6 +604,337 @@ static int d350_slave_config(struct dma_chan *chan, struct dma_slave_config *con
 	return 0;
 }
 
+static struct dma_async_tx_descriptor *d250_prep_memcpy(struct dma_chan *chan,
+		dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct d350_chan *dch = to_d350_chan(chan);
+	struct d350_desc *desc;
+	u32 *cmd, *la_cmd, tsz;
+	int sglen, i;
+	struct d350_sg *sg;
+	size_t xfer_len, step_max;
+	dma_addr_t phys;
+
+	tsz = __ffs(len | dest | src | (1 << dch->tsz));
+	step_max = ((1UL << 16) - 1) << tsz;
+	sglen = DIV_ROUND_UP(len, step_max);
+
+	desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->sglen = sglen;
+	sglen = 0;
+	while (len) {
+		sg = &desc->sg[sglen];
+		xfer_len = (len > step_max) ? step_max : len;
+		sg->tsz = __ffs(xfer_len | dest | src | (1 << dch->tsz));
+		sg->xsize = lower_16_bits(xfer_len >> sg->tsz);
+
+		sg->command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+		if (unlikely(!sg->command))
+			goto err_cmd_alloc;
+		sg->phys = phys;
+
+		cmd = sg->command;
+		if (!sglen) {
+			cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+				 LINK_XSIZE | LINK_SRCTRANSCFG |
+				 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+			cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
+				 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+
+			cmd[2] = lower_32_bits(src);
+			cmd[3] = lower_32_bits(dest);
+			cmd[4] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+				 FIELD_PREP(CH_XY_DES, sg->xsize);
+			cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+			cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+			cmd[7] = FIELD_PREP(CH_XY_SRC, 1) | FIELD_PREP(CH_XY_DES, 1);
+			la_cmd = &cmd[8];
+		} else {
+			*la_cmd = phys | CH_LINKADDR_EN;
+			if (len <= step_max) {
+				cmd[0] = LINK_CTRL | LINK_XSIZE | LINK_LINKADDR;
+				cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
+					 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+				cmd[2] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+					 FIELD_PREP(CH_XY_DES, sg->xsize);
+				la_cmd = &cmd[3];
+			} else {
+				cmd[0] = LINK_XSIZE | LINK_LINKADDR;
+				cmd[1] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+					 FIELD_PREP(CH_XY_DES, sg->xsize);
+				la_cmd = &cmd[2];
+			}
+		}
+
+		len -= xfer_len;
+		src += xfer_len;
+		dest += xfer_len;
+		sglen++;
+	}
+
+	/* the last cmdlink */
+	*la_cmd = 0;
+	desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+	mb();
+
+	return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+	for (i = 0; i < sglen; i++)
+		dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d250_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		   unsigned int sg_len, enum dma_transfer_direction dir,
+		   unsigned long flags, void *context)
+{
+	struct d350_chan *dch = to_d350_chan(chan);
+	dma_addr_t src, dst, phys, mem_addr;
+	size_t xfer_len, step_max;
+	u32 len, trig, *cmd, *la_cmd, tsz;
+	struct d350_desc *desc;
+	struct scatterlist *sg;
+	struct d350_sg *dsg;
+	int i, sglen = 0;
+
+	if (unlikely(!is_slave_direction(dir) || !sg_len))
+		return NULL;
+
+	if (dir == DMA_MEM_TO_DEV)
+		tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
+	else
+		tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
+	step_max = ((1UL << 16) - 1) << tsz;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		sglen += DIV_ROUND_UP(sg_dma_len(sg), step_max);
+
+	desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->sglen = sglen;
+
+	sglen = 0;
+	for_each_sg(sgl, sg, sg_len, i) {
+		len = sg_dma_len(sg);
+		mem_addr = sg_dma_address(sg);
+
+		do {
+			desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+			if (unlikely(!desc->sg[sglen].command))
+				goto err_cmd_alloc;
+
+			xfer_len = (len > step_max) ? step_max : len;
+			desc->sg[sglen].phys = phys;
+			dsg = &desc->sg[sglen];
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = mem_addr;
+				dst = dch->config.dst_addr;
+				trig = CH_CTRL_USEDESTRIGIN;
+			} else {
+				src = dch->config.src_addr;
+				dst = mem_addr;
+				trig = CH_CTRL_USESRCTRIGIN;
+			}
+			dsg->tsz = tsz;
+			dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz);
+
+			cmd = dsg->command;
+			if (!sglen) {
+				cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+					 LINK_XSIZE | LINK_SRCTRANSCFG |
+					 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+				cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
+					 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+
+				cmd[2] = lower_32_bits(src);
+				cmd[3] = lower_32_bits(dst);
+				cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+					 FIELD_PREP(CH_XY_DES, dsg->xsize);
+				if (dir == DMA_MEM_TO_DEV) {
+					cmd[0] |= LINK_DESTRIGINCFG;
+					cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+					cmd[6] = TRANSCFG_DEVICE;
+					cmd[7] = FIELD_PREP(CH_XY_SRC, 1);
+					cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
+						  FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
+				} else {
+					cmd[0] |= LINK_SRCTRIGINCFG;
+					cmd[5] = TRANSCFG_DEVICE;
+					cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+					cmd[7] = FIELD_PREP(CH_XY_DES, 1);
+					cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
+						  FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
+				}
+				la_cmd = &cmd[9];
+			} else {
+				*la_cmd = phys | CH_LINKADDR_EN;
+				if (sglen == desc->sglen - 1) {
+					cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+						 LINK_XSIZE | LINK_LINKADDR;
+					cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
+						 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+					cmd[2] = lower_32_bits(src);
+					cmd[3] = lower_32_bits(dst);
+					cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+						 FIELD_PREP(CH_XY_DES, dsg->xsize);
+					la_cmd = &cmd[5];
+				} else {
+					cmd[0] = LINK_SRCADDR | LINK_DESADDR |
+						 LINK_XSIZE | LINK_LINKADDR;
+					cmd[1] = lower_32_bits(src);
+					cmd[2] = lower_32_bits(dst);
+					cmd[3] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+						 FIELD_PREP(CH_XY_DES, dsg->xsize);
+					la_cmd = &cmd[4];
+				}
+			}
+
+			len -= xfer_len;
+			mem_addr += xfer_len;
+			sglen++;
+		} while (len);
+	}
+
+	/* the last command */
+	*la_cmd = 0;
+	desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+	mb();
+
+	return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+	for (i = 0; i < sglen; i++)
+		dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d250_prep_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+		 size_t buf_len, size_t period_len, enum dma_transfer_direction dir,
+		 unsigned long flags)
+{
+	struct d350_chan *dch = to_d350_chan(chan);
+	u32 len, periods, trig, *cmd, tsz;
+	dma_addr_t src, dst, phys, mem_addr;
+	size_t xfer_len, step_max;
+	struct d350_desc *desc;
+	struct scatterlist *sg;
+	struct d350_sg *dsg;
+	int sglen, i;
+
+	if (unlikely(!is_slave_direction(dir) || !buf_len || !period_len))
+		return NULL;
+
+	if (dir == DMA_MEM_TO_DEV)
+		tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
+	else
+		tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
+	step_max = ((1UL << 16) - 1) << tsz;
+
+	periods = buf_len / period_len;
+	sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods;
+
+	desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	dch->cyclic = true;
+	dch->periods = periods;
+	desc->sglen = sglen;
+
+	sglen = 0;
+	for (i = 0; i < periods; i++) {
+		len = period_len;
+		mem_addr = buf_addr + i * period_len;
+		do {
+			desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+			if (unlikely(!desc->sg[sglen].command))
+				goto err_cmd_alloc;
+
+			xfer_len = (len > step_max) ? step_max : len;
+			desc->sg[sglen].phys = phys;
+			dsg = &desc->sg[sglen];
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = mem_addr;
+				dst = dch->config.dst_addr;
+				trig = CH_CTRL_USEDESTRIGIN;
+			} else {
+				src = dch->config.src_addr;
+				dst = mem_addr;
+				trig = CH_CTRL_USESRCTRIGIN;
+			}
+			dsg->tsz = tsz;
+			dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz);
+
+			cmd = dsg->command;
+			cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+				 LINK_XSIZE | LINK_SRCTRANSCFG |
+				 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+			cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) |
+				 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE) |
+				 FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD) | trig;
+
+			cmd[2] = lower_32_bits(src);
+			cmd[3] = lower_32_bits(dst);
+			cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+				 FIELD_PREP(CH_XY_DES, dsg->xsize);
+			if (dir == DMA_MEM_TO_DEV) {
+				cmd[0] |= LINK_DESTRIGINCFG;
+				cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+				cmd[6] = TRANSCFG_DEVICE;
+				cmd[7] = FIELD_PREP(CH_XY_SRC, 1);
+				cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
+					  FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
+			} else {
+				cmd[0] |= LINK_SRCTRIGINCFG;
+				cmd[5] = TRANSCFG_DEVICE;
+				cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+				cmd[7] = FIELD_PREP(CH_XY_DES, 1);
+				cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
+					  FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
+			}
+
+			if (sglen)
+				desc->sg[sglen - 1].command[9] = phys | CH_LINKADDR_EN;
+
+			len -= xfer_len;
+			mem_addr += xfer_len;
+			sglen++;
+		} while (len);
+		desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE,
+							     CH_CTRL_DONETYPE_CMD);
+	}
+
+	/* cyclic list */
+	desc->sg[sglen - 1].command[9] = desc->sg[0].phys | CH_LINKADDR_EN;
+
+	mb();
+
+	return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+	for (i = 0; i < sglen; i++)
+		dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+	kfree(desc);
+	return NULL;
+}
+
 static int d350_pause(struct dma_chan *chan)
 {
 	struct d350_chan *dch = to_d350_chan(chan);
@@ -620,20 +970,31 @@ static u32 d350_get_residue(struct d350_chan *dch)
 	u32 res, xsize, xsizehi, linkaddr, linkaddrhi, hi_new;
 	int i, sgcur, retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
 	struct d350_desc *desc = dch->desc;
+	struct d350 *dmac = to_d350(dch->vc.chan.device);
 
-	hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
-	do {
-		xsizehi = hi_new;
-		xsize = readl_relaxed(dch->base + CH_XSIZE);
+	if (dch->has_xsizehi) {
 		hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
-	} while (xsizehi != hi_new && --retries);
+		do {
+			xsizehi = hi_new;
+			xsize = readl_relaxed(dch->base + CH_XSIZE);
+			hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
+		} while (xsizehi != hi_new && --retries);
+	} else {
+		xsize = readl_relaxed(dch->base + CH_XSIZE);
+		xsizehi = 0;
+	}
 
-	hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
-	do {
-		linkaddrhi = hi_new;
-		linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+	if (!dmac->is_d250) {
 		hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
-	} while (linkaddrhi != hi_new && --retries);
+		do {
+			linkaddrhi = hi_new;
+			linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+			hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
+		} while (linkaddrhi != hi_new && --retries);
+	} else {
+		linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+		linkaddrhi = 0;
+	}
 
 	for (i = 0; i < desc->sglen; i++) {
 		if (desc->sg[i].phys == (((u64)linkaddrhi << 32) | (linkaddr & ~CH_LINKADDR_EN)))
@@ -876,6 +1237,14 @@ static void d350_free_chan_resources(struct dma_chan *chan)
 	dch->cmd_pool = NULL;
 }
 
+static void d250_cntx_mem_release(void *ptr)
+{
+	struct d350 *dmac = ptr;
+	struct device *dev = dmac->dma.dev;
+
+	dma_free_coherent(dev, dmac->cntx_mem_size, dmac->cntx_mem, dmac->cntx_mem_paddr);
+}
+
 static int d350_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev)
 	r = FIELD_GET(IIDR_VARIANT, reg);
 	p = FIELD_GET(IIDR_REVISION, reg);
 	if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM ||
-	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350)
-		return dev_err_probe(dev, -ENODEV, "Not a DMA-350!");
+	    ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) &&
+	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250))
+		return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!");
 
 	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0);
 	nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1;
@@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	if (device_is_compatible(dev, "arm,dma-250")) {
+		u32 cfg2;
+		int secext_present;
+
+		dmac->is_d250 = true;
+
+		cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2);
+		secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0;
+		dmac->cntx_mem_size = nchan * 64 * (1 + secext_present);
+		dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size,
+						    &dmac->cntx_mem_paddr,
+						    GFP_KERNEL);
+		if (!dmac->cntx_mem)
+			return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n");
+
+		ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac);
+		if (ret) {
+			dma_free_coherent(dev, dmac->cntx_mem_size,
+					  dmac->cntx_mem, dmac->cntx_mem_paddr);
+			return ret;
+		}
+		writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE);
+	}
+
 	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
 	coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
 
 	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
 	dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
 
-	dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
+	dev_info(dev, "%s r%dp%d with %d channels, %d requests\n",
+		 dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq);
 
 	for (int i = min(dw, 16); i > 0; i /= 2) {
 		dmac->dma.src_addr_widths |= BIT(i);
@@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev)
 	dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources;
 	dmac->dma.device_free_chan_resources = d350_free_chan_resources;
 	dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask);
-	dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
+	if (dmac->is_d250)
+		dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy;
+	else
+		dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
 	dmac->dma.device_pause = d350_pause;
 	dmac->dma.device_resume = d350_resume;
 	dmac->dma.device_terminate_all = d350_terminate_all;
@@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev)
 			return dch->irq;
 
 		dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg);
-		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) &
-				FIELD_GET(CH_CFG_HAS_TRIGSEL, reg);
+		dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg);
+		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg);
 
 		/* Fill is a special case of Wrap */
 		memset &= dch->has_wrap;
@@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev)
 		dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask);
 		dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask);
 		dmac->dma.device_config = d350_slave_config;
-		dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
-		dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
+		if (dmac->is_d250) {
+			dmac->dma.device_prep_slave_sg = d250_prep_slave_sg;
+			dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic;
+		} else {
+			dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
+			dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
+		}
 	}
 
 	if (memset) {
@@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev)
 
 static const struct of_device_id d350_of_match[] __maybe_unused = {
 	{ .compatible = "arm,dma-350" },
+	{ .compatible = "arm,dma-250" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, d350_of_match);
@@ -1035,5 +1439,5 @@ module_platform_driver(d350_driver);
 
 MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
 MODULE_AUTHOR("Jisheng Zhang <jszhang@kernel.org>");
-MODULE_DESCRIPTION("Arm DMA-350 driver");
+MODULE_DESCRIPTION("Arm DMA-350/DMA-250 driver");
 MODULE_LICENSE("GPL v2");
-- 
2.50.0
Re: [PATCH 14/14] dmaengine: dma350: Support ARM DMA-250
Posted by Robin Murphy 1 month ago
On 2025-08-23 4:40 pm, Jisheng Zhang wrote:
> Compared with ARM DMA-350, DMA-250 is a simplified version. They share
> many common parts, but they do have difference. Add DMA-250 support
> by handling their difference by using different device_prep_slave_sg,
> device_prep_dma_cyclic and device_prep_dma_memcpy. DMA-250 doesn't
> support device_prep_dma_memset.
> 
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
>   drivers/dma/arm-dma350.c | 444 +++++++++++++++++++++++++++++++++++++--
>   1 file changed, 424 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c
> index 5abb965c6687..0ee807424b7e 100644
> --- a/drivers/dma/arm-dma350.c
> +++ b/drivers/dma/arm-dma350.c
> @@ -1,7 +1,7 @@
>   // SPDX-License-Identifier: GPL-2.0
>   // Copyright (C) 2024-2025 Arm Limited
>   // Copyright (C) 2025 Synaptics Incorporated
> -// Arm DMA-350 driver
> +// Arm DMA-350/DMA-250 driver

Yeah, that's going to get old fast... By all means update the Kconfig 
help text if you think it's helpful to end users, but I don't think 
anyone expects comments like this to be exhaustive, so honestly I'd save 
the churn.
>   #include <linux/bitfield.h>
>   #include <linux/dmaengine.h>
> @@ -16,6 +16,10 @@
>   #include "dmaengine.h"
>   #include "virt-dma.h"
>   
> +#define DMANSECCTRL		0x0200
> +
> +#define NSEC_CNTXBASE		0x10
> +
>   #define DMAINFO			0x0f00
>   
>   #define DMA_BUILDCFG0		0xb0
> @@ -26,12 +30,16 @@
>   #define DMA_BUILDCFG1		0xb4
>   #define DMA_CFG_NUM_TRIGGER_IN	GENMASK(8, 0)
>   
> +#define DMA_BUILDCFG2		0xb8
> +#define DMA_CFG_HAS_TZ		BIT(8)I don't think we need to care about that. Yes, the TRM describes the 
total context memory size required from the PoV of the hardware itself, 
but even if SEC_CNTXBASE does exist, Non-Secure Linux can't set it, so 
clearly Linux can't need to provide memory for it.

> +
>   #define IIDR			0xc8
>   #define IIDR_PRODUCTID		GENMASK(31, 20)
>   #define IIDR_VARIANT		GENMASK(19, 16)
>   #define IIDR_REVISION		GENMASK(15, 12)
>   #define IIDR_IMPLEMENTER	GENMASK(11, 0)
>   
> +#define PRODUCTID_DMA250	0x250
>   #define PRODUCTID_DMA350	0x3a0
>   #define IMPLEMENTER_ARM		0x43b
>   
> @@ -140,6 +148,7 @@
>   #define CH_CFG_HAS_TRIGSEL	BIT(7)
>   #define CH_CFG_HAS_TRIGIN	BIT(5)
>   #define CH_CFG_HAS_WRAP		BIT(1)
> +#define CH_CFG_HAS_XSIZEHI	BIT(0)
>   
>   
>   #define LINK_REGCLEAR		BIT(0)
> @@ -218,6 +227,7 @@ struct d350_chan {
>   	bool cyclic;
>   	bool has_trig;
>   	bool has_wrap;
> +	bool has_xsizehi;
>   	bool coherent;
>   };
>   
> @@ -225,6 +235,10 @@ struct d350 {
>   	struct dma_device dma;
>   	int nchan;
>   	int nreq;
> +	bool is_d250;

That won't scale, but it also shouldn't be needed anyway - other than 
the context memory which is easily handled within the scope of the probe 
routine that already has the IIDR to hand, everything else ought to be 
based on the relevant feature flags.

> +	dma_addr_t cntx_mem_paddr;
> +	void *cntx_mem;
> +	u32 cntx_mem_size;
>   	struct d350_chan channels[] __counted_by(nchan);
>   };
>   
> @@ -238,6 +252,11 @@ static inline struct d350_desc *to_d350_desc(struct virt_dma_desc *vd)
>   	return container_of(vd, struct d350_desc, vd);
>   }
>   
> +static inline struct d350 *to_d350(struct dma_device *dd)
> +{
> +	return container_of(dd, struct d350, dma);
> +}
> +
>   static void d350_desc_free(struct virt_dma_desc *vd)
>   {
>   	struct d350_chan *dch = to_d350_chan(vd->tx.chan);
> @@ -585,6 +604,337 @@ static int d350_slave_config(struct dma_chan *chan, struct dma_slave_config *con
>   	return 0;
>   }
>   
> +static struct dma_async_tx_descriptor *d250_prep_memcpy(struct dma_chan *chan,
> +		dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags)

Case in point: We don't need a mess of separate copy-pasted functions, 
we just need to evolve the existing ones to split the respective 
operations into either 32-bit or 16-bit chunks depending on has_xsizehi 
- even on DMA-350, >32-bit sizes aren't properly supported since I never 
got as far as command linking, but there's no reason they shouldn't be.

> +{
> +	struct d350_chan *dch = to_d350_chan(chan);
> +	struct d350_desc *desc;
> +	u32 *cmd, *la_cmd, tsz;
> +	int sglen, i;
> +	struct d350_sg *sg;
> +	size_t xfer_len, step_max;
> +	dma_addr_t phys;
> +
> +	tsz = __ffs(len | dest | src | (1 << dch->tsz));
> +	step_max = ((1UL << 16) - 1) << tsz;
> +	sglen = DIV_ROUND_UP(len, step_max);
> +
> +	desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
> +	if (!desc)
> +		return NULL;
> +
> +	desc->sglen = sglen;
> +	sglen = 0;
> +	while (len) {
> +		sg = &desc->sg[sglen];
> +		xfer_len = (len > step_max) ? step_max : len;

If only we had a min() function...

> +		sg->tsz = __ffs(xfer_len | dest | src | (1 << dch->tsz));

Um, what? By this point we've already decided to split based on the 
initial tsz, what purpose does recalculating it serve?

> +		sg->xsize = lower_16_bits(xfer_len >> sg->tsz);
> +
> +		sg->command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
> +		if (unlikely(!sg->command))
> +			goto err_cmd_alloc;
> +		sg->phys = phys;
> +
> +		cmd = sg->command;
> +		if (!sglen) {
> +			cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
> +				 LINK_XSIZE | LINK_SRCTRANSCFG |
> +				 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
> +
> +			cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
> +				 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
> +
> +			cmd[2] = lower_32_bits(src);
> +			cmd[3] = lower_32_bits(dest);
> +			cmd[4] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
> +				 FIELD_PREP(CH_XY_DES, sg->xsize);
> +			cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
> +			cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
> +			cmd[7] = FIELD_PREP(CH_XY_SRC, 1) | FIELD_PREP(CH_XY_DES, 1);
> +			la_cmd = &cmd[8];
> +		} else {
> +			*la_cmd = phys | CH_LINKADDR_EN;
> +			if (len <= step_max) {
> +				cmd[0] = LINK_CTRL | LINK_XSIZE | LINK_LINKADDR;
> +				cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
> +					 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
> +				cmd[2] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
> +					 FIELD_PREP(CH_XY_DES, sg->xsize);
> +				la_cmd = &cmd[3];
> +			} else {
> +				cmd[0] = LINK_XSIZE | LINK_LINKADDR;
> +				cmd[1] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
> +					 FIELD_PREP(CH_XY_DES, sg->xsize);
> +				la_cmd = &cmd[2];
> +			}

Ok, we really need to figure out a better abstraction for command 
construction, the hard-coded array indices were a bad enough idea to 
start with, but this is almost impossible to make sense of.

> +		}
> +
> +		len -= xfer_len;
> +		src += xfer_len;
> +		dest += xfer_len;
> +		sglen++;
> +	}
> +
> +	/* the last cmdlink */
> +	*la_cmd = 0;
> +	desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);

As for that, I don't even...

Furthermore, all these loops and conditionals are crazy anyway, and 
thoroughly failing to do justice to the hardware actually being pretty 
cool, namely that *commands can loop themselves*! Any single buffer/sg 
segment should take at most two commands - one dividing as much of the 
length as possible between XSIZE{HI} and CMDRESTARTCOUNT using 
REGRELOADTYPE=1, and/or one to transfer whatever non-multiple tail 
portion remains.

Honestly I'm sad the project for which I originally started this driver 
got canned, as this is the part I was really looking forward to having 
some fun with...

[...]
>   static int d350_pause(struct dma_chan *chan)
>   {
>   	struct d350_chan *dch = to_d350_chan(chan);
> @@ -620,20 +970,31 @@ static u32 d350_get_residue(struct d350_chan *dch)
>   	u32 res, xsize, xsizehi, linkaddr, linkaddrhi, hi_new;
>   	int i, sgcur, retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
>   	struct d350_desc *desc = dch->desc;
> +	struct d350 *dmac = to_d350(dch->vc.chan.device);
>   
> -	hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
> -	do {
> -		xsizehi = hi_new;
> -		xsize = readl_relaxed(dch->base + CH_XSIZE);
> +	if (dch->has_xsizehi) {
>   		hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
> -	} while (xsizehi != hi_new && --retries);
> +		do {
> +			xsizehi = hi_new;
> +			xsize = readl_relaxed(dch->base + CH_XSIZE);
> +			hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
> +		} while (xsizehi != hi_new && --retries);
> +	} else {
> +		xsize = readl_relaxed(dch->base + CH_XSIZE);
> +		xsizehi = 0;
> +	}
This is unnecessary - if the CH_XSIZEHI location isn't the actual 
register then it's RAZ/WI, which means the existing logic can take full 
advantage of it reading as zero and still work just the same.

> -	hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
> -	do {
> -		linkaddrhi = hi_new;
> -		linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
> +	if (!dmac->is_d250) {

And similarly here. The only thing we should perhaps do specially for 
LINKADDRHI is omit it from command generation when ADDR_WIDTH <= 32 in 
general. I admit I was lazy there, since it's currently harmless for 
d350_start_next() to write the register location unconditionally, but 
I'm not sure how a 32-bit DMA-350 would handle it in an actual command 
link header.

>   		hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
> -	} while (linkaddrhi != hi_new && --retries);
> +		do {
> +			linkaddrhi = hi_new;
> +			linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
> +			hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
> +		} while (linkaddrhi != hi_new && --retries);
> +	} else {
> +		linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
> +		linkaddrhi = 0;
> +	}
>   
>   	for (i = 0; i < desc->sglen; i++) {
>   		if (desc->sg[i].phys == (((u64)linkaddrhi << 32) | (linkaddr & ~CH_LINKADDR_EN)))
> @@ -876,6 +1237,14 @@ static void d350_free_chan_resources(struct dma_chan *chan)
>   	dch->cmd_pool = NULL;
>   }
>   
> +static void d250_cntx_mem_release(void *ptr)
> +{
> +	struct d350 *dmac = ptr;
> +	struct device *dev = dmac->dma.dev;
> +
> +	dma_free_coherent(dev, dmac->cntx_mem_size, dmac->cntx_mem, dmac->cntx_mem_paddr);
> +}
> +
>   static int d350_probe(struct platform_device *pdev)
>   {
>   	struct device *dev = &pdev->dev;
> @@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev)
>   	r = FIELD_GET(IIDR_VARIANT, reg);
>   	p = FIELD_GET(IIDR_REVISION, reg);
>   	if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM ||
> -	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350)
> -		return dev_err_probe(dev, -ENODEV, "Not a DMA-350!");
> +	    ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) &&
> +	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250))
> +		return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!");
>   
>   	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0);
>   	nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1;
> @@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev)
>   		return ret;
>   	}
>   
> +	if (device_is_compatible(dev, "arm,dma-250")) {
If only we had a completely reliable product ID from the hardware itself...

> +		u32 cfg2;
> +		int secext_present;
> +
> +		dmac->is_d250 = true;
> +
> +		cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2);
> +		secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0;
> +		dmac->cntx_mem_size = nchan * 64 * (1 + secext_present);

As before I think that's wrong.

> +		dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size,
> +						    &dmac->cntx_mem_paddr,
> +						    GFP_KERNEL);

This is too early, it needs to wait until after we've set the DMA mask. 
Also since this is purely private memory for the device, it may as well 
use DMA_ATTR_NO_KERNEL_MAPPING.

> +		if (!dmac->cntx_mem)
> +			return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n");
Just return -ENOMEM - dev_err_probe() adds nothing.

> +		ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac);
> +		if (ret) {
> +			dma_free_coherent(dev, dmac->cntx_mem_size,
> +					  dmac->cntx_mem, dmac->cntx_mem_paddr);

a) Understand that the mildly non-obvious "or reset" means it already 
calls the cleanup action on error, so this would be a double-free.

b) Don't reinvent dmam_alloc_*() in the first place though.

> +			return ret;
> +		}
> +		writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE);

Perhaps we should check that this hasn't already been set up first? I 
mean, we can't necessarily even be sure teh context memory interface can 
access the same address space as the DMA transfer interface at all; the 
design intent is at least partly to allow connecting a dedicated SRAM 
directly, see figure 1 here: 
https://developer.arm.com/documentation/108001/0000/DMAC-interfaces/AHB5-manager-interfaces/Separate-AHB5-ports-for-data-and-virtual-channel-context?lang=en

However I'm not sure how feasible that is to detect from software - the 
base register alone clearly isn't foolproof since 0 could be a valid 
address (especially in a private SRAM). At worst I suppose we might end 
up needing a DMA-250-specific DT property to say whether it does or 
doesn't need context memory from the OS...

> +	}
> +
>   	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
>   	coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
>   
>   	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
>   	dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
>   
> -	dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
> +	dev_info(dev, "%s r%dp%d with %d channels, %d requests\n",
> +		 dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq);

As Krzysztof said, this is a debug message and it's staying a debug 
message. And just replace "DMA-350" with "ProductID 0x%x" - it's only 
meant as a sanity-check that we're looking at the hardware we expect to 
be looking at.
>   	for (int i = min(dw, 16); i > 0; i /= 2) {
>   		dmac->dma.src_addr_widths |= BIT(i);
> @@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev)
>   	dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources;
>   	dmac->dma.device_free_chan_resources = d350_free_chan_resources;
>   	dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask);
> -	dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
> +	if (dmac->is_d250)
> +		dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy;
> +	else
> +		dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
>   	dmac->dma.device_pause = d350_pause;
>   	dmac->dma.device_resume = d350_resume;
>   	dmac->dma.device_terminate_all = d350_terminate_all;
> @@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev)
>   			return dch->irq;
>   
>   		dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg);
> -		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) &
> -				FIELD_GET(CH_CFG_HAS_TRIGSEL, reg);

Not only is this in the wrong patch, it's the wrong change to make 
anyway. If you're only adding support for fixed triggers, you need to 
explicitly *exclude* selectable triggers from that, because they work 
differently.

Thanks,
Robin.

> +		dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg);
> +		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg);
>   
>   		/* Fill is a special case of Wrap */
>   		memset &= dch->has_wrap;
> @@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev)
>   		dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask);
>   		dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask);
>   		dmac->dma.device_config = d350_slave_config;
> -		dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
> -		dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
> +		if (dmac->is_d250) {
> +			dmac->dma.device_prep_slave_sg = d250_prep_slave_sg;
> +			dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic;
> +		} else {
> +			dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
> +			dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
> +		}
>   	}
>   
>   	if (memset) {
> @@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev)
>   
>   static const struct of_device_id d350_of_match[] __maybe_unused = {
>   	{ .compatible = "arm,dma-350" },
> +	{ .compatible = "arm,dma-250" },
>   	{}
>   };
>   MODULE_DEVICE_TABLE(of, d350_of_match);
> @@ -1035,5 +1439,5 @@ module_platform_driver(d350_driver);
>   
>   MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
>   MODULE_AUTHOR("Jisheng Zhang <jszhang@kernel.org>");
> -MODULE_DESCRIPTION("Arm DMA-350 driver");
> +MODULE_DESCRIPTION("Arm DMA-350/DMA-250 driver");
>   MODULE_LICENSE("GPL v2");
Re: [PATCH 14/14] dmaengine: dma350: Support ARM DMA-250
Posted by kernel test robot 1 month, 1 week ago
Hi Jisheng,

kernel test robot noticed the following build warnings:

[auto build test WARNING on vkoul-dmaengine/next]
[also build test WARNING on robh/for-next krzk-dt/for-next linus/master v6.17-rc2 next-20250822]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jisheng-Zhang/dmaengine-dma350-Fix-CH_CTRL_USESRCTRIGIN-definition/20250824-000425
base:   https://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git next
patch link:    https://lore.kernel.org/r/20250823154009.25992-15-jszhang%40kernel.org
patch subject: [PATCH 14/14] dmaengine: dma350: Support ARM DMA-250
config: x86_64-buildonly-randconfig-002-20250824 (https://download.01.org/0day-ci/archive/20250825/202508250351.vxyvTsJa-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250825/202508250351.vxyvTsJa-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202508250351.vxyvTsJa-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/dma/arm-dma350.c:849:34: warning: variable 'sg' is uninitialized when used here [-Wuninitialized]
     849 |         sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods;
         |                                         ^~
   include/linux/scatterlist.h:34:27: note: expanded from macro 'sg_dma_len'
      34 | #define sg_dma_len(sg)          ((sg)->dma_length)
         |                                   ^~
   include/uapi/linux/const.h:51:40: note: expanded from macro '__KERNEL_DIV_ROUND_UP'
      51 | #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
         |                                        ^
   drivers/dma/arm-dma350.c:835:24: note: initialize the variable 'sg' to silence this warning
     835 |         struct scatterlist *sg;
         |                               ^
         |                                = NULL
   1 warning generated.


vim +/sg +849 drivers/dma/arm-dma350.c

   824	
   825	static struct dma_async_tx_descriptor *
   826	d250_prep_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
   827			 size_t buf_len, size_t period_len, enum dma_transfer_direction dir,
   828			 unsigned long flags)
   829	{
   830		struct d350_chan *dch = to_d350_chan(chan);
   831		u32 len, periods, trig, *cmd, tsz;
   832		dma_addr_t src, dst, phys, mem_addr;
   833		size_t xfer_len, step_max;
   834		struct d350_desc *desc;
   835		struct scatterlist *sg;
   836		struct d350_sg *dsg;
   837		int sglen, i;
   838	
   839		if (unlikely(!is_slave_direction(dir) || !buf_len || !period_len))
   840			return NULL;
   841	
   842		if (dir == DMA_MEM_TO_DEV)
   843			tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
   844		else
   845			tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
   846		step_max = ((1UL << 16) - 1) << tsz;
   847	
   848		periods = buf_len / period_len;
 > 849		sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods;
   850	
   851		desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
   852		if (!desc)
   853			return NULL;
   854	
   855		dch->cyclic = true;
   856		dch->periods = periods;
   857		desc->sglen = sglen;
   858	
   859		sglen = 0;
   860		for (i = 0; i < periods; i++) {
   861			len = period_len;
   862			mem_addr = buf_addr + i * period_len;
   863			do {
   864				desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
   865				if (unlikely(!desc->sg[sglen].command))
   866					goto err_cmd_alloc;
   867	
   868				xfer_len = (len > step_max) ? step_max : len;
   869				desc->sg[sglen].phys = phys;
   870				dsg = &desc->sg[sglen];
   871	
   872				if (dir == DMA_MEM_TO_DEV) {
   873					src = mem_addr;
   874					dst = dch->config.dst_addr;
   875					trig = CH_CTRL_USEDESTRIGIN;
   876				} else {
   877					src = dch->config.src_addr;
   878					dst = mem_addr;
   879					trig = CH_CTRL_USESRCTRIGIN;
   880				}
   881				dsg->tsz = tsz;
   882				dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz);
   883	
   884				cmd = dsg->command;
   885				cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
   886					 LINK_XSIZE | LINK_SRCTRANSCFG |
   887					 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
   888	
   889				cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) |
   890					 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE) |
   891					 FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD) | trig;
   892	
   893				cmd[2] = lower_32_bits(src);
   894				cmd[3] = lower_32_bits(dst);
   895				cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
   896					 FIELD_PREP(CH_XY_DES, dsg->xsize);
   897				if (dir == DMA_MEM_TO_DEV) {
   898					cmd[0] |= LINK_DESTRIGINCFG;
   899					cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
   900					cmd[6] = TRANSCFG_DEVICE;
   901					cmd[7] = FIELD_PREP(CH_XY_SRC, 1);
   902					cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
   903						  FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
   904				} else {
   905					cmd[0] |= LINK_SRCTRIGINCFG;
   906					cmd[5] = TRANSCFG_DEVICE;
   907					cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
   908					cmd[7] = FIELD_PREP(CH_XY_DES, 1);
   909					cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
   910						  FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
   911				}
   912	
   913				if (sglen)
   914					desc->sg[sglen - 1].command[9] = phys | CH_LINKADDR_EN;
   915	
   916				len -= xfer_len;
   917				mem_addr += xfer_len;
   918				sglen++;
   919			} while (len);
   920			desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE,
   921								     CH_CTRL_DONETYPE_CMD);
   922		}
   923	
   924		/* cyclic list */
   925		desc->sg[sglen - 1].command[9] = desc->sg[0].phys | CH_LINKADDR_EN;
   926	
   927		mb();
   928	
   929		return vchan_tx_prep(&dch->vc, &desc->vd, flags);
   930	
   931	err_cmd_alloc:
   932		for (i = 0; i < sglen; i++)
   933			dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
   934		kfree(desc);
   935		return NULL;
   936	}
   937	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 14/14] dmaengine: dma350: Support ARM DMA-250
Posted by Krzysztof Kozlowski 1 month, 1 week ago
On 23/08/2025 17:40, Jisheng Zhang wrote:
>  	struct device *dev = &pdev->dev;
> @@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev)
>  	r = FIELD_GET(IIDR_VARIANT, reg);
>  	p = FIELD_GET(IIDR_REVISION, reg);
>  	if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM ||
> -	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350)
> -		return dev_err_probe(dev, -ENODEV, "Not a DMA-350!");
> +	    ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) &&
> +	    FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250))
> +		return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!");
>  
>  	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0);
>  	nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1;
> @@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev)
>  		return ret;
>  	}
>  
> +	if (device_is_compatible(dev, "arm,dma-250")) {

No, don't sprinkle compatibles through driver code. driver match data is
for that.

> +		u32 cfg2;
> +		int secext_present;
> +
> +		dmac->is_d250 = true;
> +
> +		cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2);
> +		secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0;
> +		dmac->cntx_mem_size = nchan * 64 * (1 + secext_present);
> +		dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size,
> +						    &dmac->cntx_mem_paddr,
> +						    GFP_KERNEL);
> +		if (!dmac->cntx_mem)
> +			return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n");
> +
> +		ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac);
> +		if (ret) {
> +			dma_free_coherent(dev, dmac->cntx_mem_size,
> +					  dmac->cntx_mem, dmac->cntx_mem_paddr);
> +			return ret;
> +		}
> +		writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE);
> +	}
> +
>  	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
>  	coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
>  
>  	reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
>  	dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
>  
> -	dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
> +	dev_info(dev, "%s r%dp%d with %d channels, %d requests\n",
> +		 dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq);

No, don't makek drivers more verbose. Please follow Linux driver
design/coding style - this should be silent on success.

>  
>  	for (int i = min(dw, 16); i > 0; i /= 2) {
>  		dmac->dma.src_addr_widths |= BIT(i);
> @@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev)
>  	dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources;
>  	dmac->dma.device_free_chan_resources = d350_free_chan_resources;
>  	dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask);
> -	dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
> +	if (dmac->is_d250)
> +		dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy;
> +	else
> +		dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
>  	dmac->dma.device_pause = d350_pause;
>  	dmac->dma.device_resume = d350_resume;
>  	dmac->dma.device_terminate_all = d350_terminate_all;
> @@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev)
>  			return dch->irq;
>  
>  		dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg);
> -		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) &
> -				FIELD_GET(CH_CFG_HAS_TRIGSEL, reg);
> +		dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg);
> +		dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg);
>  
>  		/* Fill is a special case of Wrap */
>  		memset &= dch->has_wrap;
> @@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev)
>  		dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask);
>  		dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask);
>  		dmac->dma.device_config = d350_slave_config;
> -		dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
> -		dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
> +		if (dmac->is_d250) {
> +			dmac->dma.device_prep_slave_sg = d250_prep_slave_sg;
> +			dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic;
> +		} else {
> +			dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
> +			dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
> +		}
>  	}
>  
>  	if (memset) {
> @@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev)
>  
>  static const struct of_device_id d350_of_match[] __maybe_unused = {
>  	{ .compatible = "arm,dma-350" },
> +	{ .compatible = "arm,dma-250" },

And based on that devices would be compatible...

BTW, incorrect order - 2 < 3.



Best regards,
Krzysztof