From nobody Sat Dec 27 22:58:39 2025 Received: from aposti.net (aposti.net [89.234.176.197]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 896C62DF8F; Fri, 15 Dec 2023 13:14:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=crapouillou.net Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=crapouillou.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=crapouillou.net header.i=@crapouillou.net header.b="UIcXbG7C" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=crapouillou.net; s=mail; t=1702646030; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=DfTyNXzfHCBEsP75+j9tuzi+q9BjmgIsVWTMiU/h6PU=; b=UIcXbG7CBMh88X7t8sZevtwrmaHlWv/g0RP3uQ6xH5EHBISmC30BrAsOhMogM5QvY/RQb9 sDnWahVnoDWEl0gC976GnESmk1OOQRn/iBQleCIREPLL1vGp1FCpwEzb4yq+K0It3rjtmn 7c4/ox+xPUj1aoI9v8eALHbOI5fi1bo= From: Paul Cercueil To: Vinod Koul Cc: Lars-Peter Clausen , =?UTF-8?q?Nuno=20S=C3=A1?= , Michael Hennerich , dmaengine@vger.kernel.org, linux-kernel@vger.kernel.org, Paul Cercueil Subject: [PATCH v2 3/5] dmaengine: axi-dmac: Add support for scatter-gather transfers Date: Fri, 15 Dec 2023 14:13:11 +0100 Message-ID: <20231215131313.23840-4-paul@crapouillou.net> In-Reply-To: <20231215131313.23840-1-paul@crapouillou.net> References: <20231215131313.23840-1-paul@crapouillou.net> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Spam: Yes Content-Type: text/plain; charset="utf-8" Implement support for scatter-gather transfers. Build a chain of hardware descriptors, each one corresponding to a segment of the transfer, and linked to the next one. The hardware will transfer the chain and only fire interrupts when the whole chain has been transferred. Support for scatter-gather is automatically enabled when the driver detects that the hardware supports it, by writing then reading the AXI_DMAC_REG_SG_ADDRESS register. If not available, the driver will fall back to standard DMA transfers. Signed-off-by: Paul Cercueil --- drivers/dma/dma-axi-dmac.c | 135 +++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 42 deletions(-) diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index 185230a769b9..5109530b66de 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -81,9 +81,13 @@ #define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438 #define AXI_DMAC_REG_PARTIAL_XFER_LEN 0x44c #define AXI_DMAC_REG_PARTIAL_XFER_ID 0x450 +#define AXI_DMAC_REG_CURRENT_SG_ID 0x454 +#define AXI_DMAC_REG_SG_ADDRESS 0x47c +#define AXI_DMAC_REG_SG_ADDRESS_HIGH 0x4bc =20 #define AXI_DMAC_CTRL_ENABLE BIT(0) #define AXI_DMAC_CTRL_PAUSE BIT(1) +#define AXI_DMAC_CTRL_ENABLE_SG BIT(2) =20 #define AXI_DMAC_IRQ_SOT BIT(0) #define AXI_DMAC_IRQ_EOT BIT(1) @@ -97,12 +101,16 @@ /* The maximum ID allocated by the hardware is 31 */ #define AXI_DMAC_SG_UNUSED 32U =20 +/* Flags for axi_dmac_hw_desc.flags */ +#define AXI_DMAC_HW_FLAG_LAST BIT(0) +#define AXI_DMAC_HW_FLAG_IRQ BIT(1) + struct axi_dmac_hw_desc { u32 flags; u32 id; u64 dest_addr; u64 src_addr; - u64 __unused; + u64 next_sg_addr; u32 y_len; u32 x_len; u32 src_stride; @@ -150,6 +158,7 @@ struct axi_dmac_chan { bool hw_partial_xfer; bool hw_cyclic; bool hw_2d; + bool hw_sg; }; =20 struct axi_dmac { @@ -224,9 +233,11 @@ static void axi_dmac_start_transfer(struct axi_dmac_ch= an *chan) unsigned int flags =3D 0; unsigned int val; =20 - val =3D axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); - if (val) /* Queue is full, wait for the next SOT IRQ */ - return; + if (!chan->hw_sg) { + val =3D axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); + if (val) /* Queue is full, wait for the next SOT IRQ */ + return; + } =20 desc =3D chan->next_desc; =20 @@ -245,9 +256,10 @@ static void axi_dmac_start_transfer(struct axi_dmac_ch= an *chan) return; } =20 - desc->num_submitted++; - if (desc->num_submitted =3D=3D desc->num_sgs || - desc->have_partial_xfer) { + if (chan->hw_sg) { + chan->next_desc =3D NULL; + } else if (++desc->num_submitted =3D=3D desc->num_sgs || + desc->have_partial_xfer) { if (desc->cyclic) desc->num_submitted =3D 0; /* Start again */ else @@ -259,14 +271,16 @@ static void axi_dmac_start_transfer(struct axi_dmac_c= han *chan) =20 sg->hw->id =3D axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); =20 - if (axi_dmac_dest_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride); - } + if (!chan->hw_sg) { + if (axi_dmac_dest_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride); + } =20 - if (axi_dmac_src_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride); + if (axi_dmac_src_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride); + } } =20 /* @@ -281,8 +295,14 @@ static void axi_dmac_start_transfer(struct axi_dmac_ch= an *chan) if (chan->hw_partial_xfer) flags |=3D AXI_DMAC_FLAG_PARTIAL_REPORT; =20 - axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len); - axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len); + if (chan->hw_sg) { + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, (u32)sg->hw_phys); + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS_HIGH, + (u64)sg->hw_phys >> 32); + } else { + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len); + } axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); } @@ -359,6 +379,9 @@ static void axi_dmac_compute_residue(struct axi_dmac_ch= an *chan, rslt->result =3D DMA_TRANS_NOERROR; rslt->residue =3D 0; =20 + if (chan->hw_sg) + return; + /* * We get here if the last completed segment is partial, which * means we can compute the residue from that segment onwards @@ -385,36 +408,46 @@ static bool axi_dmac_transfer_done(struct axi_dmac_ch= an *chan, (completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE)) axi_dmac_dequeue_partial_xfers(chan); =20 - do { - sg =3D &active->sg[active->num_completed]; - if (sg->hw->id =3D=3D AXI_DMAC_SG_UNUSED) /* Not yet submitted */ - break; - if (!(BIT(sg->hw->id) & completed_transfers)) - break; - active->num_completed++; - sg->hw->id =3D AXI_DMAC_SG_UNUSED; - if (sg->schedule_when_free) { - sg->schedule_when_free =3D false; - start_next =3D true; + if (chan->hw_sg) { + if (active->cyclic) { + vchan_cyclic_callback(&active->vdesc); + } else { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active =3D axi_dmac_active_desc(chan); } + } else { + do { + sg =3D &active->sg[active->num_completed]; + if (sg->hw->id =3D=3D AXI_DMAC_SG_UNUSED) /* Not yet submitted */ + break; + if (!(BIT(sg->hw->id) & completed_transfers)) + break; + active->num_completed++; + sg->hw->id =3D AXI_DMAC_SG_UNUSED; + if (sg->schedule_when_free) { + sg->schedule_when_free =3D false; + start_next =3D true; + } =20 - if (sg->partial_len) - axi_dmac_compute_residue(chan, active); + if (sg->partial_len) + axi_dmac_compute_residue(chan, active); =20 - if (active->cyclic) - vchan_cyclic_callback(&active->vdesc); + if (active->cyclic) + vchan_cyclic_callback(&active->vdesc); =20 - if (active->num_completed =3D=3D active->num_sgs || - sg->partial_len) { - if (active->cyclic) { - active->num_completed =3D 0; /* wrap around */ - } else { - list_del(&active->vdesc.node); - vchan_cookie_complete(&active->vdesc); - active =3D axi_dmac_active_desc(chan); + if (active->num_completed =3D=3D active->num_sgs || + sg->partial_len) { + if (active->cyclic) { + active->num_completed =3D 0; /* wrap around */ + } else { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active =3D axi_dmac_active_desc(chan); + } } - } - } while (active); + } while (active); + } =20 return start_next; } @@ -478,8 +511,12 @@ static void axi_dmac_issue_pending(struct dma_chan *c) struct axi_dmac_chan *chan =3D to_axi_dmac_chan(c); struct axi_dmac *dmac =3D chan_to_axi_dmac(chan); unsigned long flags; + u32 ctrl =3D AXI_DMAC_CTRL_ENABLE; + + if (chan->hw_sg) + ctrl |=3D AXI_DMAC_CTRL_ENABLE_SG; =20 - axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE); + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, ctrl); =20 spin_lock_irqsave(&chan->vchan.lock, flags); if (vchan_issue_pending(&chan->vchan)) @@ -516,8 +553,14 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsign= ed int num_sgs) =20 hws[i].id =3D AXI_DMAC_SG_UNUSED; hws[i].flags =3D 0; + + /* Link hardware descriptors */ + hws[i].next_sg_addr =3D hw_phys + (i + 1) * sizeof(*hws); } =20 + /* The last hardware descriptor will trigger an interrupt */ + desc->sg[num_sgs - 1].hw->flags =3D AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_F= LAG_IRQ; + return desc; } =20 @@ -753,6 +796,9 @@ static bool axi_dmac_regmap_rdwr(struct device *dev, un= signed int reg) case AXI_DMAC_REG_CURRENT_DEST_ADDR: case AXI_DMAC_REG_PARTIAL_XFER_LEN: case AXI_DMAC_REG_PARTIAL_XFER_ID: + case AXI_DMAC_REG_CURRENT_SG_ID: + case AXI_DMAC_REG_SG_ADDRESS: + case AXI_DMAC_REG_SG_ADDRESS_HIGH: return true; default: return false; @@ -905,6 +951,10 @@ static int axi_dmac_detect_caps(struct axi_dmac *dmac,= unsigned int version) if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) =3D=3D AXI_DMAC_FLAG_CYCLIC) chan->hw_cyclic =3D true; =20 + axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, 0xffffffff); + if (axi_dmac_read(dmac, AXI_DMAC_REG_SG_ADDRESS)) + chan->hw_sg =3D true; + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1); if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) =3D=3D 1) chan->hw_2d =3D true; @@ -1005,6 +1055,7 @@ static int axi_dmac_probe(struct platform_device *pde= v) dma_dev->dst_addr_widths =3D BIT(dmac->chan.dest_width); dma_dev->directions =3D BIT(dmac->chan.direction); dma_dev->residue_granularity =3D DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_dev->max_sg_burst =3D 31; /* 31 SGs maximum in one burst */ INIT_LIST_HEAD(&dma_dev->channels); =20 dmac->chan.vchan.desc_free =3D axi_dmac_desc_free; --=20 2.42.0