From nobody Sat Dec 27 22:58:39 2025 Received: from aposti.net (aposti.net [89.234.176.197]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A43082DB78; Fri, 15 Dec 2023 13:14:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=crapouillou.net Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=crapouillou.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=crapouillou.net header.i=@crapouillou.net header.b="n73GF01n" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=crapouillou.net; s=mail; t=1702646030; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=dX1OOI1l+mZGv2ZWGYC1ua/+SRhxUb4vYorhXQG2m0s=; b=n73GF01nlvHq339iRA74SaYN1veT8I1ymfb0jyl4RXfsslvWnxW8Sigt6UGYAq0KKmDwt/ l0mmWMMHKRX4jyKHL6DdB9ueF2czOEgP0AbgpUzqhX4I8+vBvKmer9DU0oUwCM/txc2hdU TmL1k3pWMFkaKPOw9u2qHVQN4x3utqk= From: Paul Cercueil To: Vinod Koul Cc: Lars-Peter Clausen , =?UTF-8?q?Nuno=20S=C3=A1?= , Michael Hennerich , dmaengine@vger.kernel.org, linux-kernel@vger.kernel.org, Paul Cercueil Subject: [PATCH v2 2/5] dmaengine: axi-dmac: Allocate hardware descriptors Date: Fri, 15 Dec 2023 14:13:10 +0100 Message-ID: <20231215131313.23840-3-paul@crapouillou.net> In-Reply-To: <20231215131313.23840-1-paul@crapouillou.net> References: <20231215131313.23840-1-paul@crapouillou.net> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Spam: Yes Content-Type: text/plain; charset="utf-8" Change where and how the DMA transfers meta-data is stored, to prepare for the upcoming introduction of scatter-gather support. Allocate hardware descriptors in the format that the HDL core will be expecting them when the scatter-gather feature is enabled, and use these fields to store the data that was previously stored in the axi_dmac_sg structure. Note that the 'x_len' and 'y_len' fields now contain the transfer length minus one, since that's what the hardware will expect in these fields. Signed-off-by: Paul Cercueil --- drivers/dma/dma-axi-dmac.c | 134 ++++++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 46 deletions(-) diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index 760940b21eab..185230a769b9 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -97,20 +97,31 @@ /* The maximum ID allocated by the hardware is 31 */ #define AXI_DMAC_SG_UNUSED 32U =20 +struct axi_dmac_hw_desc { + u32 flags; + u32 id; + u64 dest_addr; + u64 src_addr; + u64 __unused; + u32 y_len; + u32 x_len; + u32 src_stride; + u32 dst_stride; + u64 __pad[2]; +}; + struct axi_dmac_sg { - dma_addr_t src_addr; - dma_addr_t dest_addr; - unsigned int x_len; - unsigned int y_len; - unsigned int dest_stride; - unsigned int src_stride; - unsigned int id; unsigned int partial_len; bool schedule_when_free; + + struct axi_dmac_hw_desc *hw; + dma_addr_t hw_phys; }; =20 struct axi_dmac_desc { struct virt_dma_desc vdesc; + struct axi_dmac_chan *chan; + bool cyclic; bool have_partial_xfer; =20 @@ -229,7 +240,7 @@ static void axi_dmac_start_transfer(struct axi_dmac_cha= n *chan) sg =3D &desc->sg[desc->num_submitted]; =20 /* Already queued in cyclic mode. Wait for it to finish */ - if (sg->id !=3D AXI_DMAC_SG_UNUSED) { + if (sg->hw->id !=3D AXI_DMAC_SG_UNUSED) { sg->schedule_when_free =3D true; return; } @@ -246,16 +257,16 @@ static void axi_dmac_start_transfer(struct axi_dmac_c= han *chan) chan->next_desc =3D desc; } =20 - sg->id =3D axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); + sg->hw->id =3D axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); =20 if (axi_dmac_dest_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride); } =20 if (axi_dmac_src_is_mem(chan)) { - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr); - axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride); } =20 /* @@ -270,8 +281,8 @@ static void axi_dmac_start_transfer(struct axi_dmac_cha= n *chan) if (chan->hw_partial_xfer) flags |=3D AXI_DMAC_FLAG_PARTIAL_REPORT; =20 - axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1); - axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len); axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); } @@ -286,9 +297,9 @@ static inline unsigned int axi_dmac_total_sg_bytes(stru= ct axi_dmac_chan *chan, struct axi_dmac_sg *sg) { if (chan->hw_2d) - return sg->x_len * sg->y_len; + return (sg->hw->x_len + 1) * (sg->hw->y_len + 1); else - return sg->x_len; + return (sg->hw->x_len + 1); } =20 static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan) @@ -307,9 +318,9 @@ static void axi_dmac_dequeue_partial_xfers(struct axi_d= mac_chan *chan) list_for_each_entry(desc, &chan->active_descs, vdesc.node) { for (i =3D 0; i < desc->num_sgs; i++) { sg =3D &desc->sg[i]; - if (sg->id =3D=3D AXI_DMAC_SG_UNUSED) + if (sg->hw->id =3D=3D AXI_DMAC_SG_UNUSED) continue; - if (sg->id =3D=3D id) { + if (sg->hw->id =3D=3D id) { desc->have_partial_xfer =3D true; sg->partial_len =3D len; found_sg =3D true; @@ -376,12 +387,12 @@ static bool axi_dmac_transfer_done(struct axi_dmac_ch= an *chan, =20 do { sg =3D &active->sg[active->num_completed]; - if (sg->id =3D=3D AXI_DMAC_SG_UNUSED) /* Not yet submitted */ + if (sg->hw->id =3D=3D AXI_DMAC_SG_UNUSED) /* Not yet submitted */ break; - if (!(BIT(sg->id) & completed_transfers)) + if (!(BIT(sg->hw->id) & completed_transfers)) break; active->num_completed++; - sg->id =3D AXI_DMAC_SG_UNUSED; + sg->hw->id =3D AXI_DMAC_SG_UNUSED; if (sg->schedule_when_free) { sg->schedule_when_free =3D false; start_next =3D true; @@ -476,22 +487,52 @@ static void axi_dmac_issue_pending(struct dma_chan *c) spin_unlock_irqrestore(&chan->vchan.lock, flags); } =20 -static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs) +static struct axi_dmac_desc * +axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs) { + struct axi_dmac *dmac =3D chan_to_axi_dmac(chan); + struct device *dev =3D dmac->dma_dev.dev; + struct axi_dmac_hw_desc *hws; struct axi_dmac_desc *desc; + dma_addr_t hw_phys; unsigned int i; =20 desc =3D kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT); if (!desc) return NULL; desc->num_sgs =3D num_sgs; + desc->chan =3D chan; =20 - for (i =3D 0; i < num_sgs; i++) - desc->sg[i].id =3D AXI_DMAC_SG_UNUSED; + hws =3D dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)), + &hw_phys, GFP_ATOMIC); + if (!hws) { + kfree(desc); + return NULL; + } + + for (i =3D 0; i < num_sgs; i++) { + desc->sg[i].hw =3D &hws[i]; + desc->sg[i].hw_phys =3D hw_phys + i * sizeof(*hws); + + hws[i].id =3D AXI_DMAC_SG_UNUSED; + hws[i].flags =3D 0; + } =20 return desc; } =20 +static void axi_dmac_free_desc(struct axi_dmac_desc *desc) +{ + struct axi_dmac *dmac =3D chan_to_axi_dmac(desc->chan); + struct device *dev =3D dmac->dma_dev.dev; + struct axi_dmac_hw_desc *hw =3D desc->sg[0].hw; + dma_addr_t hw_phys =3D desc->sg[0].hw_phys; + + dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)), + hw, hw_phys); + kfree(desc); +} + static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *c= han, enum dma_transfer_direction direction, dma_addr_t addr, unsigned int num_periods, unsigned int period_len, @@ -510,21 +551,22 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(st= ruct axi_dmac_chan *chan, for (i =3D 0; i < num_periods; i++) { for (len =3D period_len; len > segment_size; sg++) { if (direction =3D=3D DMA_DEV_TO_MEM) - sg->dest_addr =3D addr; + sg->hw->dest_addr =3D addr; else - sg->src_addr =3D addr; - sg->x_len =3D segment_size; - sg->y_len =3D 1; + sg->hw->src_addr =3D addr; + sg->hw->x_len =3D segment_size - 1; + sg->hw->y_len =3D 0; + sg->hw->flags =3D 0; addr +=3D segment_size; len -=3D segment_size; } =20 if (direction =3D=3D DMA_DEV_TO_MEM) - sg->dest_addr =3D addr; + sg->hw->dest_addr =3D addr; else - sg->src_addr =3D addr; - sg->x_len =3D len; - sg->y_len =3D 1; + sg->hw->src_addr =3D addr; + sg->hw->x_len =3D len - 1; + sg->hw->y_len =3D 0; sg++; addr +=3D len; } @@ -551,7 +593,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_sl= ave_sg( for_each_sg(sgl, sg, sg_len, i) num_sgs +=3D DIV_ROUND_UP(sg_dma_len(sg), chan->max_length); =20 - desc =3D axi_dmac_alloc_desc(num_sgs); + desc =3D axi_dmac_alloc_desc(chan, num_sgs); if (!desc) return NULL; =20 @@ -560,7 +602,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_sl= ave_sg( for_each_sg(sgl, sg, sg_len, i) { if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) || !axi_dmac_check_len(chan, sg_dma_len(sg))) { - kfree(desc); + axi_dmac_free_desc(desc); return NULL; } =20 @@ -595,7 +637,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dm= a_cyclic( num_periods =3D buf_len / period_len; num_segments =3D DIV_ROUND_UP(period_len, chan->max_length); =20 - desc =3D axi_dmac_alloc_desc(num_periods * num_segments); + desc =3D axi_dmac_alloc_desc(chan, num_periods * num_segments); if (!desc) return NULL; =20 @@ -650,26 +692,26 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_= interleaved( return NULL; } =20 - desc =3D axi_dmac_alloc_desc(1); + desc =3D axi_dmac_alloc_desc(chan, 1); if (!desc) return NULL; =20 if (axi_dmac_src_is_mem(chan)) { - desc->sg[0].src_addr =3D xt->src_start; - desc->sg[0].src_stride =3D xt->sgl[0].size + src_icg; + desc->sg[0].hw->src_addr =3D xt->src_start; + desc->sg[0].hw->src_stride =3D xt->sgl[0].size + src_icg; } =20 if (axi_dmac_dest_is_mem(chan)) { - desc->sg[0].dest_addr =3D xt->dst_start; - desc->sg[0].dest_stride =3D xt->sgl[0].size + dst_icg; + desc->sg[0].hw->dest_addr =3D xt->dst_start; + desc->sg[0].hw->dst_stride =3D xt->sgl[0].size + dst_icg; } =20 if (chan->hw_2d) { - desc->sg[0].x_len =3D xt->sgl[0].size; - desc->sg[0].y_len =3D xt->numf; + desc->sg[0].hw->x_len =3D xt->sgl[0].size - 1; + desc->sg[0].hw->y_len =3D xt->numf - 1; } else { - desc->sg[0].x_len =3D xt->sgl[0].size * xt->numf; - desc->sg[0].y_len =3D 1; + desc->sg[0].hw->x_len =3D xt->sgl[0].size * xt->numf - 1; + desc->sg[0].hw->y_len =3D 0; } =20 if (flags & DMA_CYCLIC) @@ -685,7 +727,7 @@ static void axi_dmac_free_chan_resources(struct dma_cha= n *c) =20 static void axi_dmac_desc_free(struct virt_dma_desc *vdesc) { - kfree(container_of(vdesc, struct axi_dmac_desc, vdesc)); + axi_dmac_free_desc(to_axi_dmac_desc(vdesc)); } =20 static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg) --=20 2.42.0