[PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2

Bharat Bhushan posted 4 patches 9 months ago
There is a newer version of this series
[PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
Posted by Bharat Bhushan 9 months ago
octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
  "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
   smaller cache_line_size()

Memory allocated are used for following purpose:
 - Input data or scatter list address - 8-Byte alignment
 - Output data or gather list address - 8-Byte alignment
 - Completion address - 32-Byte alignment.

This patch ensures all addresses are aligned as mentioned above.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
 .../marvell/octeontx2/otx2_cpt_reqmgr.h       | 62 ++++++++++++++-----
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
index e27e849b01df..f0f1ff45c383 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
@@ -34,6 +34,9 @@
 #define SG_COMP_2    2
 #define SG_COMP_1    1
 
+#define OTX2_CPT_DPTR_RPTR_ALIGN	8
+#define OTX2_CPT_RES_ADDR_ALIGN		32
+
 union otx2_cpt_opcode {
 	u16 flags;
 	struct {
@@ -417,10 +420,9 @@ static inline struct otx2_cpt_inst_info *
 otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 		    gfp_t gfp)
 {
-	int align = OTX2_CPT_DMA_MINALIGN;
 	struct otx2_cpt_inst_info *info;
-	u32 dlen, align_dlen, info_len;
-	u16 g_sz_bytes, s_sz_bytes;
+	u32 dlen, info_len;
+	u16 g_len, s_len;
 	u32 total_mem_len;
 
 	if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT ||
@@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 		return NULL;
 	}
 
-	g_sz_bytes = ((req->in_cnt + 3) / 4) *
-		      sizeof(struct otx2_cpt_sglist_component);
-	s_sz_bytes = ((req->out_cnt + 3) / 4) *
-		      sizeof(struct otx2_cpt_sglist_component);
+	/* Allocate memory to meet below alignment requirement:
+	 *  ----------------------------------
+	 * |    struct otx2_cpt_inst_info     |
+	 * |    (No alignment required)       |
+	 * |     -----------------------------|
+	 * |    | padding for 8B alignment    |
+	 * |----------------------------------|
+	 * |    SG List Gather/Input memory   |
+	 * |    Length = multiple of 32Bytes  |
+	 * |    Alignment = 8Byte             |
+	 * |----------------------------------|
+	 * |    SG List Scatter/Output memory |
+	 * |    Length = multiple of 32Bytes  |
+	 * |    Alignment = 8Byte             |
+	 * |    (padding for below alignment) |
+	 * |     -----------------------------|
+	 * |    | padding for 32B alignment   |
+	 * |----------------------------------|
+	 * |    Result response memory        |
+	 *  ----------------------------------
+	 */
 
-	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
-	align_dlen = ALIGN(dlen, align);
-	info_len = ALIGN(sizeof(*info), align);
-	total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
+	info_len = sizeof(*info);
+
+	g_len = ((req->in_cnt + 3) / 4) *
+		 sizeof(struct otx2_cpt_sglist_component);
+	s_len = ((req->out_cnt + 3) / 4) *
+		 sizeof(struct otx2_cpt_sglist_component);
+
+	dlen = g_len + s_len + SG_LIST_HDR_SIZE;
+
+	/* Allocate extra memory for SG and response address alignment */
+	total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
+	total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
+			 sizeof(union otx2_cpt_res_s);
 
 	info = kzalloc(total_mem_len, gfp);
 	if (unlikely(!info))
 		return NULL;
 
 	info->dlen = dlen;
-	info->in_buffer = (u8 *)info + info_len;
+	info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
+				    OTX2_CPT_DPTR_RPTR_ALIGN);
+	info->out_buffer = info->in_buffer + 8 + g_len;
 
 	((u16 *)info->in_buffer)[0] = req->out_cnt;
 	((u16 *)info->in_buffer)[1] = req->in_cnt;
@@ -460,7 +490,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 	}
 
 	if (setup_sgio_components(pdev, req->out, req->out_cnt,
-				  &info->in_buffer[8 + g_sz_bytes])) {
+				  info->out_buffer)) {
 		dev_err(&pdev->dev, "Failed to setup scatter list\n");
 		goto destroy_info;
 	}
@@ -476,8 +506,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 	 * Get buffer for union otx2_cpt_res_s response
 	 * structure and its physical address
 	 */
-	info->completion_addr = info->in_buffer + align_dlen;
-	info->comp_baddr = info->dptr_baddr + align_dlen;
+	info->completion_addr = PTR_ALIGN((info->in_buffer + dlen),
+					  OTX2_CPT_RES_ADDR_ALIGN);
+	info->comp_baddr = ALIGN((info->dptr_baddr + dlen),
+				 OTX2_CPT_RES_ADDR_ALIGN);
 
 	return info;
 
-- 
2.34.1
Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
Posted by Herbert Xu 8 months, 3 weeks ago
On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
>
> @@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
>  		return NULL;
>  	}
>  
> -	g_sz_bytes = ((req->in_cnt + 3) / 4) *
> -		      sizeof(struct otx2_cpt_sglist_component);
> -	s_sz_bytes = ((req->out_cnt + 3) / 4) *
> -		      sizeof(struct otx2_cpt_sglist_component);
> +	/* Allocate memory to meet below alignment requirement:
> +	 *  ----------------------------------
> +	 * |    struct otx2_cpt_inst_info     |
> +	 * |    (No alignment required)       |
> +	 * |     -----------------------------|
> +	 * |    | padding for 8B alignment    |
> +	 * |----------------------------------|
> +	 * |    SG List Gather/Input memory   |
> +	 * |    Length = multiple of 32Bytes  |
> +	 * |    Alignment = 8Byte             |
> +	 * |----------------------------------|
> +	 * |    SG List Scatter/Output memory |
> +	 * |    Length = multiple of 32Bytes  |
> +	 * |    Alignment = 8Byte             |
> +	 * |    (padding for below alignment) |
> +	 * |     -----------------------------|
> +	 * |    | padding for 32B alignment   |
> +	 * |----------------------------------|
> +	 * |    Result response memory        |
> +	 *  ----------------------------------
> +	 */
>  
> -	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
> -	align_dlen = ALIGN(dlen, align);
> -	info_len = ALIGN(sizeof(*info), align);
> -	total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
> +	info_len = sizeof(*info);
> +
> +	g_len = ((req->in_cnt + 3) / 4) *
> +		 sizeof(struct otx2_cpt_sglist_component);
> +	s_len = ((req->out_cnt + 3) / 4) *
> +		 sizeof(struct otx2_cpt_sglist_component);
> +
> +	dlen = g_len + s_len + SG_LIST_HDR_SIZE;
> +
> +	/* Allocate extra memory for SG and response address alignment */
> +	total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> +	total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> +			 sizeof(union otx2_cpt_res_s);

This doesn't look right.  It would be correct if kzalloc returned
a 32-byte aligned pointer to start with.  But it doesn't anymore,
which is why you're making this patch in the first place :)

So you need to add extra memory to bridge the gap between what it
returns and what you expect.  Since it returns 8-byte aligned
memory, and you expect 32-byte aligned pointers, you should add
24 bytes.

IOW the calculation should be:

	total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
	total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
	total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
			 ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);

>  	info = kzalloc(total_mem_len, gfp);
>  	if (unlikely(!info))
>  		return NULL;
>  
>  	info->dlen = dlen;
> -	info->in_buffer = (u8 *)info + info_len;
> +	info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
> +				    OTX2_CPT_DPTR_RPTR_ALIGN);
> +	info->out_buffer = info->in_buffer + 8 + g_len;

I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen
calculation above.  If so please spell it out as otherwise it's just
confusing.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
Posted by Bharat Bhushan 8 months, 3 weeks ago
On Mon, May 19, 2025 at 9:57 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
> >
> > @@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
> >               return NULL;
> >       }
> >
> > -     g_sz_bytes = ((req->in_cnt + 3) / 4) *
> > -                   sizeof(struct otx2_cpt_sglist_component);
> > -     s_sz_bytes = ((req->out_cnt + 3) / 4) *
> > -                   sizeof(struct otx2_cpt_sglist_component);
> > +     /* Allocate memory to meet below alignment requirement:
> > +      *  ----------------------------------
> > +      * |    struct otx2_cpt_inst_info     |
> > +      * |    (No alignment required)       |
> > +      * |     -----------------------------|
> > +      * |    | padding for 8B alignment    |
> > +      * |----------------------------------|
> > +      * |    SG List Gather/Input memory   |
> > +      * |    Length = multiple of 32Bytes  |
> > +      * |    Alignment = 8Byte             |
> > +      * |----------------------------------|
> > +      * |    SG List Scatter/Output memory |
> > +      * |    Length = multiple of 32Bytes  |
> > +      * |    Alignment = 8Byte             |
> > +      * |    (padding for below alignment) |
> > +      * |     -----------------------------|
> > +      * |    | padding for 32B alignment   |
> > +      * |----------------------------------|
> > +      * |    Result response memory        |
> > +      *  ----------------------------------
> > +      */
> >
> > -     dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
> > -     align_dlen = ALIGN(dlen, align);
> > -     info_len = ALIGN(sizeof(*info), align);
> > -     total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
> > +     info_len = sizeof(*info);
> > +
> > +     g_len = ((req->in_cnt + 3) / 4) *
> > +              sizeof(struct otx2_cpt_sglist_component);
> > +     s_len = ((req->out_cnt + 3) / 4) *
> > +              sizeof(struct otx2_cpt_sglist_component);
> > +
> > +     dlen = g_len + s_len + SG_LIST_HDR_SIZE;
> > +
> > +     /* Allocate extra memory for SG and response address alignment */
> > +     total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;

This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment

> > +     total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > +                      sizeof(union otx2_cpt_res_s);

This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
In case not observed,  OTX2_CPT_RES_ADDR_ALIGN is not the same as
OTX2_CPT_DPTR_RPTR_ALIGN.

>
> This doesn't look right.  It would be correct if kzalloc returned
> a 32-byte aligned pointer to start with.  But it doesn't anymore,
> which is why you're making this patch in the first place :)
>
> So you need to add extra memory to bridge the gap between what it
> returns and what you expect.  Since it returns 8-byte aligned
> memory, and you expect 32-byte aligned pointers, you should add
> 24 bytes.
>
> IOW the calculation should be:
>
>         total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
>         total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
>         total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
>                          ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
>
> >       info = kzalloc(total_mem_len, gfp);
> >       if (unlikely(!info))
> >               return NULL;
> >
> >       info->dlen = dlen;
> > -     info->in_buffer = (u8 *)info + info_len;
> > +     info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
> > +                                 OTX2_CPT_DPTR_RPTR_ALIGN);
> > +     info->out_buffer = info->in_buffer + 8 + g_len;
>
> I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen
> calculation above.  If so please spell it out as otherwise it's just
> confusing.

Yes, this is for SG_LIST_HDR_SIZE, will use same here.

Thanks
-Bharat

>
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
Posted by Herbert Xu 8 months, 3 weeks ago
On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
>
> > > +     /* Allocate extra memory for SG and response address alignment */
> > > +     total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> 
> This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
> 
> > > +     total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > > +                      sizeof(union otx2_cpt_res_s);
> 
> This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
> In case not observed,  OTX2_CPT_RES_ADDR_ALIGN is not the same as
> OTX2_CPT_DPTR_RPTR_ALIGN.

But it doesn't do that.  Look, assume that total_mem_len is 64,
then ALIGN(64, 32) will still be 64.  You're not adding any extra
space for the alignment padding.

OTOH, kmalloc can return something that has a page offset of 8,
and you will need 24 extra bytes in your structure to make it
align at 32.

Now of course if you're very lucky, and total_mem_len starts out
at 8, then it would work but that's purely by chance.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
Posted by Bharat Bhushan 8 months, 3 weeks ago
On Mon, May 19, 2025 at 1:05 PM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
> >
> > > > +     /* Allocate extra memory for SG and response address alignment */
> > > > +     total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> >
> > This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
> >
> > > > +     total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > > > +                      sizeof(union otx2_cpt_res_s);
> >
> > This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
> > In case not observed,  OTX2_CPT_RES_ADDR_ALIGN is not the same as
> > OTX2_CPT_DPTR_RPTR_ALIGN.
>
> But it doesn't do that.  Look, assume that total_mem_len is 64,
> then ALIGN(64, 32) will still be 64.  You're not adding any extra
> space for the alignment padding.
>
> OTOH, kmalloc can return something that has a page offset of 8,
> and you will need 24 extra bytes in your structure to make it
> align at 32.
>
> Now of course if you're very lucky, and total_mem_len starts out
> at 8, then it would work but that's purely by chance.

Thanks for explaining, will change in the next version.

Thanks
-Bharat

>
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt