octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
"9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
smaller cache_line_size()"
Completion address should be 32-Byte alignment when loading
microcode.
Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Cc: <stable@vger.kernel.org> #v6.5+
---
v1->v2:
- No Change
.../marvell/octeontx2/otx2_cptpf_ucode.c | 30 +++++++++++--------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 9095dea2748d..3e8357c0ecb2 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1491,12 +1491,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
union otx2_cpt_opcode opcode;
union otx2_cpt_res_s *result;
union otx2_cpt_inst_s inst;
+ dma_addr_t result_baddr;
dma_addr_t rptr_baddr;
struct pci_dev *pdev;
- u32 len, compl_rlen;
int timeout = 10000;
int ret, etype;
void *rptr;
+ u32 len;
/*
* We don't get capabilities if it was already done
@@ -1519,22 +1520,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
if (ret)
goto delete_grps;
- compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN);
- len = compl_rlen + LOADFVC_RLEN;
+ len = LOADFVC_RLEN + sizeof(union otx2_cpt_res_s) +
+ OTX2_CPT_RES_ADDR_ALIGN;
- result = kzalloc(len, GFP_KERNEL);
- if (!result) {
+ rptr = kzalloc(len, GFP_KERNEL);
+ if (!rptr) {
ret = -ENOMEM;
goto lf_cleanup;
}
- rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len,
+
+ rptr_baddr = dma_map_single(&pdev->dev, rptr, len,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(&pdev->dev, rptr_baddr)) {
dev_err(&pdev->dev, "DMA mapping failed\n");
ret = -EFAULT;
- goto free_result;
+ goto free_rptr;
}
- rptr = (u8 *)result + compl_rlen;
+
+ result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN,
+ OTX2_CPT_RES_ADDR_ALIGN);
+ result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN,
+ OTX2_CPT_RES_ADDR_ALIGN);
/* Fill in the command */
opcode.s.major = LOADFVC_MAJOR_OP;
@@ -1546,14 +1552,14 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
/* 64-bit swap for microcode data reads, not needed for addresses */
cpu_to_be64s(&iq_cmd.cmd.u);
iq_cmd.dptr = 0;
- iq_cmd.rptr = rptr_baddr + compl_rlen;
+ iq_cmd.rptr = rptr_baddr;
iq_cmd.cptr.u = 0;
for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) {
result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT;
iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps,
etype);
- otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
+ otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr);
lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
timeout = 10000;
@@ -1576,8 +1582,8 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
error_no_response:
dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL);
-free_result:
- kfree(result);
+free_rptr:
+ kfree(rptr);
lf_cleanup:
otx2_cptlf_shutdown(lfs);
delete_grps:
--
2.34.1
On Tue, May 20, 2025 at 06:37:35PM +0530, Bharat Bhushan wrote:
>
> @@ -1519,22 +1520,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
> if (ret)
> goto delete_grps;
>
> - compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN);
> - len = compl_rlen + LOADFVC_RLEN;
> + len = LOADFVC_RLEN + sizeof(union otx2_cpt_res_s) +
> + OTX2_CPT_RES_ADDR_ALIGN;
>
> - result = kzalloc(len, GFP_KERNEL);
> - if (!result) {
> + rptr = kzalloc(len, GFP_KERNEL);
> + if (!rptr) {
> ret = -ENOMEM;
> goto lf_cleanup;
> }
> - rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len,
> +
> + rptr_baddr = dma_map_single(&pdev->dev, rptr, len,
> DMA_BIDIRECTIONAL);
After this change rptr is still unaligned. However, you appear
to be doing bidirectional DMA to rptr, so it should be aligned
to ARCH_DMA_MINALIGN or you risk corrupting the surrounding
memory.
Only TO_DEVICE DMA addresses can be unaligned.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Wed, May 21, 2025 at 4:32 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Tue, May 20, 2025 at 06:37:35PM +0530, Bharat Bhushan wrote:
> >
> > @@ -1519,22 +1520,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
> > if (ret)
> > goto delete_grps;
> >
> > - compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN);
> > - len = compl_rlen + LOADFVC_RLEN;
> > + len = LOADFVC_RLEN + sizeof(union otx2_cpt_res_s) +
> > + OTX2_CPT_RES_ADDR_ALIGN;
> >
> > - result = kzalloc(len, GFP_KERNEL);
> > - if (!result) {
> > + rptr = kzalloc(len, GFP_KERNEL);
> > + if (!rptr) {
> > ret = -ENOMEM;
> > goto lf_cleanup;
> > }
> > - rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len,
> > +
> > + rptr_baddr = dma_map_single(&pdev->dev, rptr, len,
> > DMA_BIDIRECTIONAL);
>
> After this change rptr is still unaligned. However, you appear
> to be doing bidirectional DMA to rptr, so it should be aligned
> to ARCH_DMA_MINALIGN or you risk corrupting the surrounding
> memory.
Yes, rptr was not aligned as ARCH_KMALLOC_MINALIGN and rptr alignment are same.
But as per the second part of the comment, rptr must be aligned to
ARCH_DMA_MINALIGN.
So will change total memory allocation and rptr and result_address
alignment accordingly.
Thanks
-Bharat
>
> Only TO_DEVICE DMA addresses can be unaligned.
>
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
© 2016 - 2025 Red Hat, Inc.