Compared with ARM DMA-350, DMA-250 is a simplified version. They share
many common parts, but they do have difference. Add DMA-250 support
by handling their difference by using different device_prep_slave_sg,
device_prep_dma_cyclic and device_prep_dma_memcpy. DMA-250 doesn't
support device_prep_dma_memset.
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
drivers/dma/arm-dma350.c | 444 +++++++++++++++++++++++++++++++++++++--
1 file changed, 424 insertions(+), 20 deletions(-)
diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c
index 5abb965c6687..0ee807424b7e 100644
--- a/drivers/dma/arm-dma350.c
+++ b/drivers/dma/arm-dma350.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2024-2025 Arm Limited
// Copyright (C) 2025 Synaptics Incorporated
-// Arm DMA-350 driver
+// Arm DMA-350/DMA-250 driver
#include <linux/bitfield.h>
#include <linux/dmaengine.h>
@@ -16,6 +16,10 @@
#include "dmaengine.h"
#include "virt-dma.h"
+#define DMANSECCTRL 0x0200
+
+#define NSEC_CNTXBASE 0x10
+
#define DMAINFO 0x0f00
#define DMA_BUILDCFG0 0xb0
@@ -26,12 +30,16 @@
#define DMA_BUILDCFG1 0xb4
#define DMA_CFG_NUM_TRIGGER_IN GENMASK(8, 0)
+#define DMA_BUILDCFG2 0xb8
+#define DMA_CFG_HAS_TZ BIT(8)
+
#define IIDR 0xc8
#define IIDR_PRODUCTID GENMASK(31, 20)
#define IIDR_VARIANT GENMASK(19, 16)
#define IIDR_REVISION GENMASK(15, 12)
#define IIDR_IMPLEMENTER GENMASK(11, 0)
+#define PRODUCTID_DMA250 0x250
#define PRODUCTID_DMA350 0x3a0
#define IMPLEMENTER_ARM 0x43b
@@ -140,6 +148,7 @@
#define CH_CFG_HAS_TRIGSEL BIT(7)
#define CH_CFG_HAS_TRIGIN BIT(5)
#define CH_CFG_HAS_WRAP BIT(1)
+#define CH_CFG_HAS_XSIZEHI BIT(0)
#define LINK_REGCLEAR BIT(0)
@@ -218,6 +227,7 @@ struct d350_chan {
bool cyclic;
bool has_trig;
bool has_wrap;
+ bool has_xsizehi;
bool coherent;
};
@@ -225,6 +235,10 @@ struct d350 {
struct dma_device dma;
int nchan;
int nreq;
+ bool is_d250;
+ dma_addr_t cntx_mem_paddr;
+ void *cntx_mem;
+ u32 cntx_mem_size;
struct d350_chan channels[] __counted_by(nchan);
};
@@ -238,6 +252,11 @@ static inline struct d350_desc *to_d350_desc(struct virt_dma_desc *vd)
return container_of(vd, struct d350_desc, vd);
}
+static inline struct d350 *to_d350(struct dma_device *dd)
+{
+ return container_of(dd, struct d350, dma);
+}
+
static void d350_desc_free(struct virt_dma_desc *vd)
{
struct d350_chan *dch = to_d350_chan(vd->tx.chan);
@@ -585,6 +604,337 @@ static int d350_slave_config(struct dma_chan *chan, struct dma_slave_config *con
return 0;
}
+static struct dma_async_tx_descriptor *d250_prep_memcpy(struct dma_chan *chan,
+ dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ struct d350_desc *desc;
+ u32 *cmd, *la_cmd, tsz;
+ int sglen, i;
+ struct d350_sg *sg;
+ size_t xfer_len, step_max;
+ dma_addr_t phys;
+
+ tsz = __ffs(len | dest | src | (1 << dch->tsz));
+ step_max = ((1UL << 16) - 1) << tsz;
+ sglen = DIV_ROUND_UP(len, step_max);
+
+ desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->sglen = sglen;
+ sglen = 0;
+ while (len) {
+ sg = &desc->sg[sglen];
+ xfer_len = (len > step_max) ? step_max : len;
+ sg->tsz = __ffs(xfer_len | dest | src | (1 << dch->tsz));
+ sg->xsize = lower_16_bits(xfer_len >> sg->tsz);
+
+ sg->command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+ if (unlikely(!sg->command))
+ goto err_cmd_alloc;
+ sg->phys = phys;
+
+ cmd = sg->command;
+ if (!sglen) {
+ cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+ LINK_XSIZE | LINK_SRCTRANSCFG |
+ LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+
+ cmd[2] = lower_32_bits(src);
+ cmd[3] = lower_32_bits(dest);
+ cmd[4] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+ FIELD_PREP(CH_XY_DES, sg->xsize);
+ cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[7] = FIELD_PREP(CH_XY_SRC, 1) | FIELD_PREP(CH_XY_DES, 1);
+ la_cmd = &cmd[8];
+ } else {
+ *la_cmd = phys | CH_LINKADDR_EN;
+ if (len <= step_max) {
+ cmd[0] = LINK_CTRL | LINK_XSIZE | LINK_LINKADDR;
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+ cmd[2] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+ FIELD_PREP(CH_XY_DES, sg->xsize);
+ la_cmd = &cmd[3];
+ } else {
+ cmd[0] = LINK_XSIZE | LINK_LINKADDR;
+ cmd[1] = FIELD_PREP(CH_XY_SRC, sg->xsize) |
+ FIELD_PREP(CH_XY_DES, sg->xsize);
+ la_cmd = &cmd[2];
+ }
+ }
+
+ len -= xfer_len;
+ src += xfer_len;
+ dest += xfer_len;
+ sglen++;
+ }
+
+ /* the last cmdlink */
+ *la_cmd = 0;
+ desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+ mb();
+
+ return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+ for (i = 0; i < sglen; i++)
+ dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+ kfree(desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d250_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ dma_addr_t src, dst, phys, mem_addr;
+ size_t xfer_len, step_max;
+ u32 len, trig, *cmd, *la_cmd, tsz;
+ struct d350_desc *desc;
+ struct scatterlist *sg;
+ struct d350_sg *dsg;
+ int i, sglen = 0;
+
+ if (unlikely(!is_slave_direction(dir) || !sg_len))
+ return NULL;
+
+ if (dir == DMA_MEM_TO_DEV)
+ tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
+ else
+ tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
+ step_max = ((1UL << 16) - 1) << tsz;
+
+ for_each_sg(sgl, sg, sg_len, i)
+ sglen += DIV_ROUND_UP(sg_dma_len(sg), step_max);
+
+ desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->sglen = sglen;
+
+ sglen = 0;
+ for_each_sg(sgl, sg, sg_len, i) {
+ len = sg_dma_len(sg);
+ mem_addr = sg_dma_address(sg);
+
+ do {
+ desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+ if (unlikely(!desc->sg[sglen].command))
+ goto err_cmd_alloc;
+
+ xfer_len = (len > step_max) ? step_max : len;
+ desc->sg[sglen].phys = phys;
+ dsg = &desc->sg[sglen];
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = mem_addr;
+ dst = dch->config.dst_addr;
+ trig = CH_CTRL_USEDESTRIGIN;
+ } else {
+ src = dch->config.src_addr;
+ dst = mem_addr;
+ trig = CH_CTRL_USESRCTRIGIN;
+ }
+ dsg->tsz = tsz;
+ dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz);
+
+ cmd = dsg->command;
+ if (!sglen) {
+ cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+ LINK_XSIZE | LINK_SRCTRANSCFG |
+ LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+
+ cmd[2] = lower_32_bits(src);
+ cmd[3] = lower_32_bits(dst);
+ cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+ FIELD_PREP(CH_XY_DES, dsg->xsize);
+ if (dir == DMA_MEM_TO_DEV) {
+ cmd[0] |= LINK_DESTRIGINCFG;
+ cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[6] = TRANSCFG_DEVICE;
+ cmd[7] = FIELD_PREP(CH_XY_SRC, 1);
+ cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
+ FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
+ } else {
+ cmd[0] |= LINK_SRCTRIGINCFG;
+ cmd[5] = TRANSCFG_DEVICE;
+ cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[7] = FIELD_PREP(CH_XY_DES, 1);
+ cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
+ FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
+ }
+ la_cmd = &cmd[9];
+ } else {
+ *la_cmd = phys | CH_LINKADDR_EN;
+ if (sglen == desc->sglen - 1) {
+ cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+ LINK_XSIZE | LINK_LINKADDR;
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | trig |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE);
+ cmd[2] = lower_32_bits(src);
+ cmd[3] = lower_32_bits(dst);
+ cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+ FIELD_PREP(CH_XY_DES, dsg->xsize);
+ la_cmd = &cmd[5];
+ } else {
+ cmd[0] = LINK_SRCADDR | LINK_DESADDR |
+ LINK_XSIZE | LINK_LINKADDR;
+ cmd[1] = lower_32_bits(src);
+ cmd[2] = lower_32_bits(dst);
+ cmd[3] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+ FIELD_PREP(CH_XY_DES, dsg->xsize);
+ la_cmd = &cmd[4];
+ }
+ }
+
+ len -= xfer_len;
+ mem_addr += xfer_len;
+ sglen++;
+ } while (len);
+ }
+
+ /* the last command */
+ *la_cmd = 0;
+ desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD);
+
+ mb();
+
+ return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+ for (i = 0; i < sglen; i++)
+ dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+ kfree(desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d250_prep_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len, enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct d350_chan *dch = to_d350_chan(chan);
+ u32 len, periods, trig, *cmd, tsz;
+ dma_addr_t src, dst, phys, mem_addr;
+ size_t xfer_len, step_max;
+ struct d350_desc *desc;
+ struct scatterlist *sg;
+ struct d350_sg *dsg;
+ int sglen, i;
+
+ if (unlikely(!is_slave_direction(dir) || !buf_len || !period_len))
+ return NULL;
+
+ if (dir == DMA_MEM_TO_DEV)
+ tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz));
+ else
+ tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz));
+ step_max = ((1UL << 16) - 1) << tsz;
+
+ periods = buf_len / period_len;
+ sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods;
+
+ desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ dch->cyclic = true;
+ dch->periods = periods;
+ desc->sglen = sglen;
+
+ sglen = 0;
+ for (i = 0; i < periods; i++) {
+ len = period_len;
+ mem_addr = buf_addr + i * period_len;
+ do {
+ desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys);
+ if (unlikely(!desc->sg[sglen].command))
+ goto err_cmd_alloc;
+
+ xfer_len = (len > step_max) ? step_max : len;
+ desc->sg[sglen].phys = phys;
+ dsg = &desc->sg[sglen];
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = mem_addr;
+ dst = dch->config.dst_addr;
+ trig = CH_CTRL_USEDESTRIGIN;
+ } else {
+ src = dch->config.src_addr;
+ dst = mem_addr;
+ trig = CH_CTRL_USESRCTRIGIN;
+ }
+ dsg->tsz = tsz;
+ dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz);
+
+ cmd = dsg->command;
+ cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR |
+ LINK_XSIZE | LINK_SRCTRANSCFG |
+ LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR;
+
+ cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) |
+ FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE) |
+ FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD) | trig;
+
+ cmd[2] = lower_32_bits(src);
+ cmd[3] = lower_32_bits(dst);
+ cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) |
+ FIELD_PREP(CH_XY_DES, dsg->xsize);
+ if (dir == DMA_MEM_TO_DEV) {
+ cmd[0] |= LINK_DESTRIGINCFG;
+ cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[6] = TRANSCFG_DEVICE;
+ cmd[7] = FIELD_PREP(CH_XY_SRC, 1);
+ cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) |
+ FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ);
+ } else {
+ cmd[0] |= LINK_SRCTRIGINCFG;
+ cmd[5] = TRANSCFG_DEVICE;
+ cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC;
+ cmd[7] = FIELD_PREP(CH_XY_DES, 1);
+ cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) |
+ FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ);
+ }
+
+ if (sglen)
+ desc->sg[sglen - 1].command[9] = phys | CH_LINKADDR_EN;
+
+ len -= xfer_len;
+ mem_addr += xfer_len;
+ sglen++;
+ } while (len);
+ desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE,
+ CH_CTRL_DONETYPE_CMD);
+ }
+
+ /* cyclic list */
+ desc->sg[sglen - 1].command[9] = desc->sg[0].phys | CH_LINKADDR_EN;
+
+ mb();
+
+ return vchan_tx_prep(&dch->vc, &desc->vd, flags);
+
+err_cmd_alloc:
+ for (i = 0; i < sglen; i++)
+ dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys);
+ kfree(desc);
+ return NULL;
+}
+
static int d350_pause(struct dma_chan *chan)
{
struct d350_chan *dch = to_d350_chan(chan);
@@ -620,20 +970,31 @@ static u32 d350_get_residue(struct d350_chan *dch)
u32 res, xsize, xsizehi, linkaddr, linkaddrhi, hi_new;
int i, sgcur, retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */
struct d350_desc *desc = dch->desc;
+ struct d350 *dmac = to_d350(dch->vc.chan.device);
- hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
- do {
- xsizehi = hi_new;
- xsize = readl_relaxed(dch->base + CH_XSIZE);
+ if (dch->has_xsizehi) {
hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
- } while (xsizehi != hi_new && --retries);
+ do {
+ xsizehi = hi_new;
+ xsize = readl_relaxed(dch->base + CH_XSIZE);
+ hi_new = readl_relaxed(dch->base + CH_XSIZEHI);
+ } while (xsizehi != hi_new && --retries);
+ } else {
+ xsize = readl_relaxed(dch->base + CH_XSIZE);
+ xsizehi = 0;
+ }
- hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
- do {
- linkaddrhi = hi_new;
- linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+ if (!dmac->is_d250) {
hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
- } while (linkaddrhi != hi_new && --retries);
+ do {
+ linkaddrhi = hi_new;
+ linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+ hi_new = readl_relaxed(dch->base + CH_LINKADDRHI);
+ } while (linkaddrhi != hi_new && --retries);
+ } else {
+ linkaddr = readl_relaxed(dch->base + CH_LINKADDR);
+ linkaddrhi = 0;
+ }
for (i = 0; i < desc->sglen; i++) {
if (desc->sg[i].phys == (((u64)linkaddrhi << 32) | (linkaddr & ~CH_LINKADDR_EN)))
@@ -876,6 +1237,14 @@ static void d350_free_chan_resources(struct dma_chan *chan)
dch->cmd_pool = NULL;
}
+static void d250_cntx_mem_release(void *ptr)
+{
+ struct d350 *dmac = ptr;
+ struct device *dev = dmac->dma.dev;
+
+ dma_free_coherent(dev, dmac->cntx_mem_size, dmac->cntx_mem, dmac->cntx_mem_paddr);
+}
+
static int d350_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev)
r = FIELD_GET(IIDR_VARIANT, reg);
p = FIELD_GET(IIDR_REVISION, reg);
if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM ||
- FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350)
- return dev_err_probe(dev, -ENODEV, "Not a DMA-350!");
+ ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) &&
+ FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250))
+ return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!");
reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0);
nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1;
@@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev)
return ret;
}
+ if (device_is_compatible(dev, "arm,dma-250")) {
+ u32 cfg2;
+ int secext_present;
+
+ dmac->is_d250 = true;
+
+ cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2);
+ secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0;
+ dmac->cntx_mem_size = nchan * 64 * (1 + secext_present);
+ dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size,
+ &dmac->cntx_mem_paddr,
+ GFP_KERNEL);
+ if (!dmac->cntx_mem)
+ return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n");
+
+ ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac);
+ if (ret) {
+ dma_free_coherent(dev, dmac->cntx_mem_size,
+ dmac->cntx_mem, dmac->cntx_mem_paddr);
+ return ret;
+ }
+ writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE);
+ }
+
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw));
coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT;
reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1);
dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg);
- dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq);
+ dev_info(dev, "%s r%dp%d with %d channels, %d requests\n",
+ dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq);
for (int i = min(dw, 16); i > 0; i /= 2) {
dmac->dma.src_addr_widths |= BIT(i);
@@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev)
dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources;
dmac->dma.device_free_chan_resources = d350_free_chan_resources;
dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask);
- dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
+ if (dmac->is_d250)
+ dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy;
+ else
+ dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy;
dmac->dma.device_pause = d350_pause;
dmac->dma.device_resume = d350_resume;
dmac->dma.device_terminate_all = d350_terminate_all;
@@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev)
return dch->irq;
dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg);
- dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) &
- FIELD_GET(CH_CFG_HAS_TRIGSEL, reg);
+ dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg);
+ dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg);
/* Fill is a special case of Wrap */
memset &= dch->has_wrap;
@@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev)
dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask);
dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask);
dmac->dma.device_config = d350_slave_config;
- dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
- dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
+ if (dmac->is_d250) {
+ dmac->dma.device_prep_slave_sg = d250_prep_slave_sg;
+ dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic;
+ } else {
+ dmac->dma.device_prep_slave_sg = d350_prep_slave_sg;
+ dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic;
+ }
}
if (memset) {
@@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev)
static const struct of_device_id d350_of_match[] __maybe_unused = {
{ .compatible = "arm,dma-350" },
+ { .compatible = "arm,dma-250" },
{}
};
MODULE_DEVICE_TABLE(of, d350_of_match);
@@ -1035,5 +1439,5 @@ module_platform_driver(d350_driver);
MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
MODULE_AUTHOR("Jisheng Zhang <jszhang@kernel.org>");
-MODULE_DESCRIPTION("Arm DMA-350 driver");
+MODULE_DESCRIPTION("Arm DMA-350/DMA-250 driver");
MODULE_LICENSE("GPL v2");
--
2.50.0
On 2025-08-23 4:40 pm, Jisheng Zhang wrote: > Compared with ARM DMA-350, DMA-250 is a simplified version. They share > many common parts, but they do have difference. Add DMA-250 support > by handling their difference by using different device_prep_slave_sg, > device_prep_dma_cyclic and device_prep_dma_memcpy. DMA-250 doesn't > support device_prep_dma_memset. > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > --- > drivers/dma/arm-dma350.c | 444 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 424 insertions(+), 20 deletions(-) > > diff --git a/drivers/dma/arm-dma350.c b/drivers/dma/arm-dma350.c > index 5abb965c6687..0ee807424b7e 100644 > --- a/drivers/dma/arm-dma350.c > +++ b/drivers/dma/arm-dma350.c > @@ -1,7 +1,7 @@ > // SPDX-License-Identifier: GPL-2.0 > // Copyright (C) 2024-2025 Arm Limited > // Copyright (C) 2025 Synaptics Incorporated > -// Arm DMA-350 driver > +// Arm DMA-350/DMA-250 driver Yeah, that's going to get old fast... By all means update the Kconfig help text if you think it's helpful to end users, but I don't think anyone expects comments like this to be exhaustive, so honestly I'd save the churn. > #include <linux/bitfield.h> > #include <linux/dmaengine.h> > @@ -16,6 +16,10 @@ > #include "dmaengine.h" > #include "virt-dma.h" > > +#define DMANSECCTRL 0x0200 > + > +#define NSEC_CNTXBASE 0x10 > + > #define DMAINFO 0x0f00 > > #define DMA_BUILDCFG0 0xb0 > @@ -26,12 +30,16 @@ > #define DMA_BUILDCFG1 0xb4 > #define DMA_CFG_NUM_TRIGGER_IN GENMASK(8, 0) > > +#define DMA_BUILDCFG2 0xb8 > +#define DMA_CFG_HAS_TZ BIT(8)I don't think we need to care about that. Yes, the TRM describes the total context memory size required from the PoV of the hardware itself, but even if SEC_CNTXBASE does exist, Non-Secure Linux can't set it, so clearly Linux can't need to provide memory for it. > + > #define IIDR 0xc8 > #define IIDR_PRODUCTID GENMASK(31, 20) > #define IIDR_VARIANT GENMASK(19, 16) > #define IIDR_REVISION GENMASK(15, 12) > #define IIDR_IMPLEMENTER GENMASK(11, 0) > > +#define PRODUCTID_DMA250 0x250 > #define PRODUCTID_DMA350 0x3a0 > #define IMPLEMENTER_ARM 0x43b > > @@ -140,6 +148,7 @@ > #define CH_CFG_HAS_TRIGSEL BIT(7) > #define CH_CFG_HAS_TRIGIN BIT(5) > #define CH_CFG_HAS_WRAP BIT(1) > +#define CH_CFG_HAS_XSIZEHI BIT(0) > > > #define LINK_REGCLEAR BIT(0) > @@ -218,6 +227,7 @@ struct d350_chan { > bool cyclic; > bool has_trig; > bool has_wrap; > + bool has_xsizehi; > bool coherent; > }; > > @@ -225,6 +235,10 @@ struct d350 { > struct dma_device dma; > int nchan; > int nreq; > + bool is_d250; That won't scale, but it also shouldn't be needed anyway - other than the context memory which is easily handled within the scope of the probe routine that already has the IIDR to hand, everything else ought to be based on the relevant feature flags. > + dma_addr_t cntx_mem_paddr; > + void *cntx_mem; > + u32 cntx_mem_size; > struct d350_chan channels[] __counted_by(nchan); > }; > > @@ -238,6 +252,11 @@ static inline struct d350_desc *to_d350_desc(struct virt_dma_desc *vd) > return container_of(vd, struct d350_desc, vd); > } > > +static inline struct d350 *to_d350(struct dma_device *dd) > +{ > + return container_of(dd, struct d350, dma); > +} > + > static void d350_desc_free(struct virt_dma_desc *vd) > { > struct d350_chan *dch = to_d350_chan(vd->tx.chan); > @@ -585,6 +604,337 @@ static int d350_slave_config(struct dma_chan *chan, struct dma_slave_config *con > return 0; > } > > +static struct dma_async_tx_descriptor *d250_prep_memcpy(struct dma_chan *chan, > + dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) Case in point: We don't need a mess of separate copy-pasted functions, we just need to evolve the existing ones to split the respective operations into either 32-bit or 16-bit chunks depending on has_xsizehi - even on DMA-350, >32-bit sizes aren't properly supported since I never got as far as command linking, but there's no reason they shouldn't be. > +{ > + struct d350_chan *dch = to_d350_chan(chan); > + struct d350_desc *desc; > + u32 *cmd, *la_cmd, tsz; > + int sglen, i; > + struct d350_sg *sg; > + size_t xfer_len, step_max; > + dma_addr_t phys; > + > + tsz = __ffs(len | dest | src | (1 << dch->tsz)); > + step_max = ((1UL << 16) - 1) << tsz; > + sglen = DIV_ROUND_UP(len, step_max); > + > + desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT); > + if (!desc) > + return NULL; > + > + desc->sglen = sglen; > + sglen = 0; > + while (len) { > + sg = &desc->sg[sglen]; > + xfer_len = (len > step_max) ? step_max : len; If only we had a min() function... > + sg->tsz = __ffs(xfer_len | dest | src | (1 << dch->tsz)); Um, what? By this point we've already decided to split based on the initial tsz, what purpose does recalculating it serve? > + sg->xsize = lower_16_bits(xfer_len >> sg->tsz); > + > + sg->command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys); > + if (unlikely(!sg->command)) > + goto err_cmd_alloc; > + sg->phys = phys; > + > + cmd = sg->command; > + if (!sglen) { > + cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR | > + LINK_XSIZE | LINK_SRCTRANSCFG | > + LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR; > + > + cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) | > + FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE); > + > + cmd[2] = lower_32_bits(src); > + cmd[3] = lower_32_bits(dest); > + cmd[4] = FIELD_PREP(CH_XY_SRC, sg->xsize) | > + FIELD_PREP(CH_XY_DES, sg->xsize); > + cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC; > + cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC; > + cmd[7] = FIELD_PREP(CH_XY_SRC, 1) | FIELD_PREP(CH_XY_DES, 1); > + la_cmd = &cmd[8]; > + } else { > + *la_cmd = phys | CH_LINKADDR_EN; > + if (len <= step_max) { > + cmd[0] = LINK_CTRL | LINK_XSIZE | LINK_LINKADDR; > + cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, sg->tsz) | > + FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE); > + cmd[2] = FIELD_PREP(CH_XY_SRC, sg->xsize) | > + FIELD_PREP(CH_XY_DES, sg->xsize); > + la_cmd = &cmd[3]; > + } else { > + cmd[0] = LINK_XSIZE | LINK_LINKADDR; > + cmd[1] = FIELD_PREP(CH_XY_SRC, sg->xsize) | > + FIELD_PREP(CH_XY_DES, sg->xsize); > + la_cmd = &cmd[2]; > + } Ok, we really need to figure out a better abstraction for command construction, the hard-coded array indices were a bad enough idea to start with, but this is almost impossible to make sense of. > + } > + > + len -= xfer_len; > + src += xfer_len; > + dest += xfer_len; > + sglen++; > + } > + > + /* the last cmdlink */ > + *la_cmd = 0; > + desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD); As for that, I don't even... Furthermore, all these loops and conditionals are crazy anyway, and thoroughly failing to do justice to the hardware actually being pretty cool, namely that *commands can loop themselves*! Any single buffer/sg segment should take at most two commands - one dividing as much of the length as possible between XSIZE{HI} and CMDRESTARTCOUNT using REGRELOADTYPE=1, and/or one to transfer whatever non-multiple tail portion remains. Honestly I'm sad the project for which I originally started this driver got canned, as this is the part I was really looking forward to having some fun with... [...] > static int d350_pause(struct dma_chan *chan) > { > struct d350_chan *dch = to_d350_chan(chan); > @@ -620,20 +970,31 @@ static u32 d350_get_residue(struct d350_chan *dch) > u32 res, xsize, xsizehi, linkaddr, linkaddrhi, hi_new; > int i, sgcur, retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */ > struct d350_desc *desc = dch->desc; > + struct d350 *dmac = to_d350(dch->vc.chan.device); > > - hi_new = readl_relaxed(dch->base + CH_XSIZEHI); > - do { > - xsizehi = hi_new; > - xsize = readl_relaxed(dch->base + CH_XSIZE); > + if (dch->has_xsizehi) { > hi_new = readl_relaxed(dch->base + CH_XSIZEHI); > - } while (xsizehi != hi_new && --retries); > + do { > + xsizehi = hi_new; > + xsize = readl_relaxed(dch->base + CH_XSIZE); > + hi_new = readl_relaxed(dch->base + CH_XSIZEHI); > + } while (xsizehi != hi_new && --retries); > + } else { > + xsize = readl_relaxed(dch->base + CH_XSIZE); > + xsizehi = 0; > + } This is unnecessary - if the CH_XSIZEHI location isn't the actual register then it's RAZ/WI, which means the existing logic can take full advantage of it reading as zero and still work just the same. > - hi_new = readl_relaxed(dch->base + CH_LINKADDRHI); > - do { > - linkaddrhi = hi_new; > - linkaddr = readl_relaxed(dch->base + CH_LINKADDR); > + if (!dmac->is_d250) { And similarly here. The only thing we should perhaps do specially for LINKADDRHI is omit it from command generation when ADDR_WIDTH <= 32 in general. I admit I was lazy there, since it's currently harmless for d350_start_next() to write the register location unconditionally, but I'm not sure how a 32-bit DMA-350 would handle it in an actual command link header. > hi_new = readl_relaxed(dch->base + CH_LINKADDRHI); > - } while (linkaddrhi != hi_new && --retries); > + do { > + linkaddrhi = hi_new; > + linkaddr = readl_relaxed(dch->base + CH_LINKADDR); > + hi_new = readl_relaxed(dch->base + CH_LINKADDRHI); > + } while (linkaddrhi != hi_new && --retries); > + } else { > + linkaddr = readl_relaxed(dch->base + CH_LINKADDR); > + linkaddrhi = 0; > + } > > for (i = 0; i < desc->sglen; i++) { > if (desc->sg[i].phys == (((u64)linkaddrhi << 32) | (linkaddr & ~CH_LINKADDR_EN))) > @@ -876,6 +1237,14 @@ static void d350_free_chan_resources(struct dma_chan *chan) > dch->cmd_pool = NULL; > } > > +static void d250_cntx_mem_release(void *ptr) > +{ > + struct d350 *dmac = ptr; > + struct device *dev = dmac->dma.dev; > + > + dma_free_coherent(dev, dmac->cntx_mem_size, dmac->cntx_mem, dmac->cntx_mem_paddr); > +} > + > static int d350_probe(struct platform_device *pdev) > { > struct device *dev = &pdev->dev; > @@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev) > r = FIELD_GET(IIDR_VARIANT, reg); > p = FIELD_GET(IIDR_REVISION, reg); > if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM || > - FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) > - return dev_err_probe(dev, -ENODEV, "Not a DMA-350!"); > + ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) && > + FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250)) > + return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!"); > > reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0); > nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1; > @@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev) > return ret; > } > > + if (device_is_compatible(dev, "arm,dma-250")) { If only we had a completely reliable product ID from the hardware itself... > + u32 cfg2; > + int secext_present; > + > + dmac->is_d250 = true; > + > + cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2); > + secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0; > + dmac->cntx_mem_size = nchan * 64 * (1 + secext_present); As before I think that's wrong. > + dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size, > + &dmac->cntx_mem_paddr, > + GFP_KERNEL); This is too early, it needs to wait until after we've set the DMA mask. Also since this is purely private memory for the device, it may as well use DMA_ATTR_NO_KERNEL_MAPPING. > + if (!dmac->cntx_mem) > + return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n"); Just return -ENOMEM - dev_err_probe() adds nothing. > + ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac); > + if (ret) { > + dma_free_coherent(dev, dmac->cntx_mem_size, > + dmac->cntx_mem, dmac->cntx_mem_paddr); a) Understand that the mildly non-obvious "or reset" means it already calls the cleanup action on error, so this would be a double-free. b) Don't reinvent dmam_alloc_*() in the first place though. > + return ret; > + } > + writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE); Perhaps we should check that this hasn't already been set up first? I mean, we can't necessarily even be sure teh context memory interface can access the same address space as the DMA transfer interface at all; the design intent is at least partly to allow connecting a dedicated SRAM directly, see figure 1 here: https://developer.arm.com/documentation/108001/0000/DMAC-interfaces/AHB5-manager-interfaces/Separate-AHB5-ports-for-data-and-virtual-channel-context?lang=en However I'm not sure how feasible that is to detect from software - the base register alone clearly isn't foolproof since 0 could be a valid address (especially in a private SRAM). At worst I suppose we might end up needing a DMA-250-specific DT property to say whether it does or doesn't need context memory from the OS... > + } > + > dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw)); > coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT; > > reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1); > dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg); > > - dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq); > + dev_info(dev, "%s r%dp%d with %d channels, %d requests\n", > + dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq); As Krzysztof said, this is a debug message and it's staying a debug message. And just replace "DMA-350" with "ProductID 0x%x" - it's only meant as a sanity-check that we're looking at the hardware we expect to be looking at. > for (int i = min(dw, 16); i > 0; i /= 2) { > dmac->dma.src_addr_widths |= BIT(i); > @@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev) > dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources; > dmac->dma.device_free_chan_resources = d350_free_chan_resources; > dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask); > - dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy; > + if (dmac->is_d250) > + dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy; > + else > + dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy; > dmac->dma.device_pause = d350_pause; > dmac->dma.device_resume = d350_resume; > dmac->dma.device_terminate_all = d350_terminate_all; > @@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev) > return dch->irq; > > dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg); > - dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) & > - FIELD_GET(CH_CFG_HAS_TRIGSEL, reg); Not only is this in the wrong patch, it's the wrong change to make anyway. If you're only adding support for fixed triggers, you need to explicitly *exclude* selectable triggers from that, because they work differently. Thanks, Robin. > + dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg); > + dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg); > > /* Fill is a special case of Wrap */ > memset &= dch->has_wrap; > @@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev) > dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask); > dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask); > dmac->dma.device_config = d350_slave_config; > - dmac->dma.device_prep_slave_sg = d350_prep_slave_sg; > - dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic; > + if (dmac->is_d250) { > + dmac->dma.device_prep_slave_sg = d250_prep_slave_sg; > + dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic; > + } else { > + dmac->dma.device_prep_slave_sg = d350_prep_slave_sg; > + dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic; > + } > } > > if (memset) { > @@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev) > > static const struct of_device_id d350_of_match[] __maybe_unused = { > { .compatible = "arm,dma-350" }, > + { .compatible = "arm,dma-250" }, > {} > }; > MODULE_DEVICE_TABLE(of, d350_of_match); > @@ -1035,5 +1439,5 @@ module_platform_driver(d350_driver); > > MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>"); > MODULE_AUTHOR("Jisheng Zhang <jszhang@kernel.org>"); > -MODULE_DESCRIPTION("Arm DMA-350 driver"); > +MODULE_DESCRIPTION("Arm DMA-350/DMA-250 driver"); > MODULE_LICENSE("GPL v2");
Hi Jisheng, kernel test robot noticed the following build warnings: [auto build test WARNING on vkoul-dmaengine/next] [also build test WARNING on robh/for-next krzk-dt/for-next linus/master v6.17-rc2 next-20250822] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Jisheng-Zhang/dmaengine-dma350-Fix-CH_CTRL_USESRCTRIGIN-definition/20250824-000425 base: https://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git next patch link: https://lore.kernel.org/r/20250823154009.25992-15-jszhang%40kernel.org patch subject: [PATCH 14/14] dmaengine: dma350: Support ARM DMA-250 config: x86_64-buildonly-randconfig-002-20250824 (https://download.01.org/0day-ci/archive/20250825/202508250351.vxyvTsJa-lkp@intel.com/config) compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250825/202508250351.vxyvTsJa-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202508250351.vxyvTsJa-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/dma/arm-dma350.c:849:34: warning: variable 'sg' is uninitialized when used here [-Wuninitialized] 849 | sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods; | ^~ include/linux/scatterlist.h:34:27: note: expanded from macro 'sg_dma_len' 34 | #define sg_dma_len(sg) ((sg)->dma_length) | ^~ include/uapi/linux/const.h:51:40: note: expanded from macro '__KERNEL_DIV_ROUND_UP' 51 | #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) | ^ drivers/dma/arm-dma350.c:835:24: note: initialize the variable 'sg' to silence this warning 835 | struct scatterlist *sg; | ^ | = NULL 1 warning generated. vim +/sg +849 drivers/dma/arm-dma350.c 824 825 static struct dma_async_tx_descriptor * 826 d250_prep_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, 827 size_t buf_len, size_t period_len, enum dma_transfer_direction dir, 828 unsigned long flags) 829 { 830 struct d350_chan *dch = to_d350_chan(chan); 831 u32 len, periods, trig, *cmd, tsz; 832 dma_addr_t src, dst, phys, mem_addr; 833 size_t xfer_len, step_max; 834 struct d350_desc *desc; 835 struct scatterlist *sg; 836 struct d350_sg *dsg; 837 int sglen, i; 838 839 if (unlikely(!is_slave_direction(dir) || !buf_len || !period_len)) 840 return NULL; 841 842 if (dir == DMA_MEM_TO_DEV) 843 tsz = __ffs(dch->config.dst_addr_width | (1 << dch->tsz)); 844 else 845 tsz = __ffs(dch->config.src_addr_width | (1 << dch->tsz)); 846 step_max = ((1UL << 16) - 1) << tsz; 847 848 periods = buf_len / period_len; > 849 sglen = DIV_ROUND_UP(sg_dma_len(sg), step_max) * periods; 850 851 desc = kzalloc(struct_size(desc, sg, sglen), GFP_NOWAIT); 852 if (!desc) 853 return NULL; 854 855 dch->cyclic = true; 856 dch->periods = periods; 857 desc->sglen = sglen; 858 859 sglen = 0; 860 for (i = 0; i < periods; i++) { 861 len = period_len; 862 mem_addr = buf_addr + i * period_len; 863 do { 864 desc->sg[sglen].command = dma_pool_zalloc(dch->cmd_pool, GFP_NOWAIT, &phys); 865 if (unlikely(!desc->sg[sglen].command)) 866 goto err_cmd_alloc; 867 868 xfer_len = (len > step_max) ? step_max : len; 869 desc->sg[sglen].phys = phys; 870 dsg = &desc->sg[sglen]; 871 872 if (dir == DMA_MEM_TO_DEV) { 873 src = mem_addr; 874 dst = dch->config.dst_addr; 875 trig = CH_CTRL_USEDESTRIGIN; 876 } else { 877 src = dch->config.src_addr; 878 dst = mem_addr; 879 trig = CH_CTRL_USESRCTRIGIN; 880 } 881 dsg->tsz = tsz; 882 dsg->xsize = lower_16_bits(xfer_len >> dsg->tsz); 883 884 cmd = dsg->command; 885 cmd[0] = LINK_CTRL | LINK_SRCADDR | LINK_DESADDR | 886 LINK_XSIZE | LINK_SRCTRANSCFG | 887 LINK_DESTRANSCFG | LINK_XADDRINC | LINK_LINKADDR; 888 889 cmd[1] = FIELD_PREP(CH_CTRL_TRANSIZE, dsg->tsz) | 890 FIELD_PREP(CH_CTRL_XTYPE, CH_CTRL_XTYPE_CONTINUE) | 891 FIELD_PREP(CH_CTRL_DONETYPE, CH_CTRL_DONETYPE_CMD) | trig; 892 893 cmd[2] = lower_32_bits(src); 894 cmd[3] = lower_32_bits(dst); 895 cmd[4] = FIELD_PREP(CH_XY_SRC, dsg->xsize) | 896 FIELD_PREP(CH_XY_DES, dsg->xsize); 897 if (dir == DMA_MEM_TO_DEV) { 898 cmd[0] |= LINK_DESTRIGINCFG; 899 cmd[5] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC; 900 cmd[6] = TRANSCFG_DEVICE; 901 cmd[7] = FIELD_PREP(CH_XY_SRC, 1); 902 cmd[8] = FIELD_PREP(CH_DESTRIGINMODE, CH_DESTRIG_DMA_FC) | 903 FIELD_PREP(CH_DESTRIGINTYPE, CH_DESTRIG_HW_REQ); 904 } else { 905 cmd[0] |= LINK_SRCTRIGINCFG; 906 cmd[5] = TRANSCFG_DEVICE; 907 cmd[6] = dch->coherent ? TRANSCFG_WB : TRANSCFG_NC; 908 cmd[7] = FIELD_PREP(CH_XY_DES, 1); 909 cmd[8] = FIELD_PREP(CH_SRCTRIGINMODE, CH_SRCTRIG_DMA_FC) | 910 FIELD_PREP(CH_SRCTRIGINTYPE, CH_SRCTRIG_HW_REQ); 911 } 912 913 if (sglen) 914 desc->sg[sglen - 1].command[9] = phys | CH_LINKADDR_EN; 915 916 len -= xfer_len; 917 mem_addr += xfer_len; 918 sglen++; 919 } while (len); 920 desc->sg[sglen - 1].command[1] |= FIELD_PREP(CH_CTRL_DONETYPE, 921 CH_CTRL_DONETYPE_CMD); 922 } 923 924 /* cyclic list */ 925 desc->sg[sglen - 1].command[9] = desc->sg[0].phys | CH_LINKADDR_EN; 926 927 mb(); 928 929 return vchan_tx_prep(&dch->vc, &desc->vd, flags); 930 931 err_cmd_alloc: 932 for (i = 0; i < sglen; i++) 933 dma_pool_free(dch->cmd_pool, desc->sg[i].command, desc->sg[i].phys); 934 kfree(desc); 935 return NULL; 936 } 937 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On 23/08/2025 17:40, Jisheng Zhang wrote: > struct device *dev = &pdev->dev; > @@ -893,8 +1262,9 @@ static int d350_probe(struct platform_device *pdev) > r = FIELD_GET(IIDR_VARIANT, reg); > p = FIELD_GET(IIDR_REVISION, reg); > if (FIELD_GET(IIDR_IMPLEMENTER, reg) != IMPLEMENTER_ARM || > - FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) > - return dev_err_probe(dev, -ENODEV, "Not a DMA-350!"); > + ((FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA350) && > + FIELD_GET(IIDR_PRODUCTID, reg) != PRODUCTID_DMA250)) > + return dev_err_probe(dev, -ENODEV, "Not a DMA-350/DMA-250!"); > > reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG0); > nchan = FIELD_GET(DMA_CFG_NUM_CHANNELS, reg) + 1; > @@ -917,13 +1287,38 @@ static int d350_probe(struct platform_device *pdev) > return ret; > } > > + if (device_is_compatible(dev, "arm,dma-250")) { No, don't sprinkle compatibles through driver code. driver match data is for that. > + u32 cfg2; > + int secext_present; > + > + dmac->is_d250 = true; > + > + cfg2 = readl_relaxed(base + DMAINFO + DMA_BUILDCFG2); > + secext_present = (cfg2 & DMA_CFG_HAS_TZ) ? 1 : 0; > + dmac->cntx_mem_size = nchan * 64 * (1 + secext_present); > + dmac->cntx_mem = dma_alloc_coherent(dev, dmac->cntx_mem_size, > + &dmac->cntx_mem_paddr, > + GFP_KERNEL); > + if (!dmac->cntx_mem) > + return dev_err_probe(dev, -ENOMEM, "Failed to alloc context memory\n"); > + > + ret = devm_add_action_or_reset(dev, d250_cntx_mem_release, dmac); > + if (ret) { > + dma_free_coherent(dev, dmac->cntx_mem_size, > + dmac->cntx_mem, dmac->cntx_mem_paddr); > + return ret; > + } > + writel_relaxed(dmac->cntx_mem_paddr, base + DMANSECCTRL + NSEC_CNTXBASE); > + } > + > dma_set_mask_and_coherent(dev, DMA_BIT_MASK(aw)); > coherent = device_get_dma_attr(dev) == DEV_DMA_COHERENT; > > reg = readl_relaxed(base + DMAINFO + DMA_BUILDCFG1); > dmac->nreq = FIELD_GET(DMA_CFG_NUM_TRIGGER_IN, reg); > > - dev_dbg(dev, "DMA-350 r%dp%d with %d channels, %d requests\n", r, p, dmac->nchan, dmac->nreq); > + dev_info(dev, "%s r%dp%d with %d channels, %d requests\n", > + dmac->is_d250 ? "DMA-250" : "DMA-350", r, p, dmac->nchan, dmac->nreq); No, don't makek drivers more verbose. Please follow Linux driver design/coding style - this should be silent on success. > > for (int i = min(dw, 16); i > 0; i /= 2) { > dmac->dma.src_addr_widths |= BIT(i); > @@ -935,7 +1330,10 @@ static int d350_probe(struct platform_device *pdev) > dmac->dma.device_alloc_chan_resources = d350_alloc_chan_resources; > dmac->dma.device_free_chan_resources = d350_free_chan_resources; > dma_cap_set(DMA_MEMCPY, dmac->dma.cap_mask); > - dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy; > + if (dmac->is_d250) > + dmac->dma.device_prep_dma_memcpy = d250_prep_memcpy; > + else > + dmac->dma.device_prep_dma_memcpy = d350_prep_memcpy; > dmac->dma.device_pause = d350_pause; > dmac->dma.device_resume = d350_resume; > dmac->dma.device_terminate_all = d350_terminate_all; > @@ -971,8 +1369,8 @@ static int d350_probe(struct platform_device *pdev) > return dch->irq; > > dch->has_wrap = FIELD_GET(CH_CFG_HAS_WRAP, reg); > - dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg) & > - FIELD_GET(CH_CFG_HAS_TRIGSEL, reg); > + dch->has_xsizehi = FIELD_GET(CH_CFG_HAS_XSIZEHI, reg); > + dch->has_trig = FIELD_GET(CH_CFG_HAS_TRIGIN, reg); > > /* Fill is a special case of Wrap */ > memset &= dch->has_wrap; > @@ -994,8 +1392,13 @@ static int d350_probe(struct platform_device *pdev) > dma_cap_set(DMA_SLAVE, dmac->dma.cap_mask); > dma_cap_set(DMA_CYCLIC, dmac->dma.cap_mask); > dmac->dma.device_config = d350_slave_config; > - dmac->dma.device_prep_slave_sg = d350_prep_slave_sg; > - dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic; > + if (dmac->is_d250) { > + dmac->dma.device_prep_slave_sg = d250_prep_slave_sg; > + dmac->dma.device_prep_dma_cyclic = d250_prep_cyclic; > + } else { > + dmac->dma.device_prep_slave_sg = d350_prep_slave_sg; > + dmac->dma.device_prep_dma_cyclic = d350_prep_cyclic; > + } > } > > if (memset) { > @@ -1019,6 +1422,7 @@ static void d350_remove(struct platform_device *pdev) > > static const struct of_device_id d350_of_match[] __maybe_unused = { > { .compatible = "arm,dma-350" }, > + { .compatible = "arm,dma-250" }, And based on that devices would be compatible... BTW, incorrect order - 2 < 3. Best regards, Krzysztof
© 2016 - 2025 Red Hat, Inc.