[PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver

T Pratham posted 4 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver
Posted by T Pratham 3 months, 2 weeks ago
Add support for CTR mode of operation for AES algorithm in the AES
Engine of the DTHEv2 hardware cryptographic engine.

Signed-off-by: T Pratham <t-pratham@ti.com>
---
 drivers/crypto/ti/Kconfig         |   1 +
 drivers/crypto/ti/dthev2-aes.c    | 101 +++++++++++++++++++++++++++---
 drivers/crypto/ti/dthev2-common.c |  19 ++++++
 drivers/crypto/ti/dthev2-common.h |  15 +++++
 4 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/ti/Kconfig b/drivers/crypto/ti/Kconfig
index a3692ceec49bc..6027e12de279d 100644
--- a/drivers/crypto/ti/Kconfig
+++ b/drivers/crypto/ti/Kconfig
@@ -6,6 +6,7 @@ config CRYPTO_DEV_TI_DTHEV2
 	select CRYPTO_SKCIPHER
 	select CRYPTO_ECB
 	select CRYPTO_CBC
+	select CRYPTO_CTR
 	select CRYPTO_XTS
 	help
 	  This enables support for the TI DTHE V2 hw cryptography engine
diff --git a/drivers/crypto/ti/dthev2-aes.c b/drivers/crypto/ti/dthev2-aes.c
index 156729ccc50ec..bf55167f7871d 100644
--- a/drivers/crypto/ti/dthev2-aes.c
+++ b/drivers/crypto/ti/dthev2-aes.c
@@ -63,6 +63,7 @@
 enum aes_ctrl_mode_masks {
 	AES_CTRL_ECB_MASK = 0x00,
 	AES_CTRL_CBC_MASK = BIT(5),
+	AES_CTRL_CTR_MASK = BIT(6),
 	AES_CTRL_XTS_MASK = BIT(12) | BIT(11),
 };
 
@@ -74,6 +75,8 @@ enum aes_ctrl_mode_masks {
 #define DTHE_AES_CTRL_KEYSIZE_24B		BIT(4)
 #define DTHE_AES_CTRL_KEYSIZE_32B		(BIT(3) | BIT(4))
 
+#define DTHE_AES_CTRL_CTR_WIDTH_128B		(BIT(7) | BIT(8))
+
 #define DTHE_AES_CTRL_SAVE_CTX_SET		BIT(29)
 
 #define DTHE_AES_CTRL_OUTPUT_READY		BIT_MASK(0)
@@ -156,6 +159,15 @@ static int dthe_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key, unsig
 	return dthe_aes_setkey(tfm, key, keylen);
 }
 
+static int dthe_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen)
+{
+	struct dthe_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->aes_mode = DTHE_AES_CTR;
+
+	return dthe_aes_setkey(tfm, key, keylen);
+}
+
 static int dthe_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen)
 {
 	struct dthe_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -236,6 +248,10 @@ static void dthe_aes_set_ctrl_key(struct dthe_tfm_ctx *ctx,
 	case DTHE_AES_CBC:
 		ctrl_val |= AES_CTRL_CBC_MASK;
 		break;
+	case DTHE_AES_CTR:
+		ctrl_val |= AES_CTRL_CTR_MASK;
+		ctrl_val |= DTHE_AES_CTRL_CTR_WIDTH_128B;
+		break;
 	case DTHE_AES_XTS:
 		ctrl_val |= AES_CTRL_XTS_MASK;
 		break;
@@ -271,11 +287,14 @@ static int dthe_aes_run(struct crypto_engine *engine, void *areq)
 	struct scatterlist *dst = req->dst;
 
 	int src_nents = sg_nents_for_len(src, len);
-	int dst_nents;
+	int dst_nents = sg_nents_for_len(dst, len);
 
 	int src_mapped_nents;
 	int dst_mapped_nents;
 
+	u8 pad_buf[AES_BLOCK_SIZE] = {0};
+	int pad_len = 0;
+
 	bool diff_dst;
 	enum dma_data_direction src_dir, dst_dir;
 
@@ -295,6 +314,39 @@ static int dthe_aes_run(struct crypto_engine *engine, void *areq)
 	aes_irqenable_val |= DTHE_AES_IRQENABLE_EN_ALL;
 	writel_relaxed(aes_irqenable_val, aes_base_reg + DTHE_P_AES_IRQENABLE);
 
+	if (ctx->aes_mode == DTHE_AES_CTR) {
+		/*
+		 * CTR mode can operate on any input length, but the hardware
+		 * requires input length to be a multiple of the block size.
+		 * We need to handle the padding in the driver.
+		 */
+		if (req->cryptlen % AES_BLOCK_SIZE) {
+			/* Need to create a new SG list with padding */
+			pad_len = ALIGN(req->cryptlen, AES_BLOCK_SIZE) - req->cryptlen;
+			struct scatterlist *sg;
+
+			src = kmalloc_array((src_nents + 1), sizeof(*src), GFP_KERNEL);
+			if (!src) {
+				ret = -ENOMEM;
+				goto aes_src_alloc_err;
+			}
+			sg_init_table(src, src_nents + 1);
+			sg = dthe_copy_sg(src, req->src, req->cryptlen);
+			sg_set_buf(sg, pad_buf, pad_len);
+			src_nents++;
+
+			dst = kmalloc_array(dst_nents + 1, sizeof(*dst), GFP_KERNEL);
+			if (!dst) {
+				ret = -ENOMEM;
+				goto aes_dst_alloc_err;
+			}
+			sg_init_table(dst, dst_nents + 1);
+			sg = dthe_copy_sg(dst, req->dst, req->cryptlen);
+			sg_set_buf(sg, pad_buf, pad_len);
+			dst_nents++;
+		}
+	}
+
 	if (src == dst) {
 		diff_dst = false;
 		src_dir = DMA_BIDIRECTIONAL;
@@ -311,19 +363,16 @@ static int dthe_aes_run(struct crypto_engine *engine, void *areq)
 	src_mapped_nents = dma_map_sg(tx_dev, src, src_nents, src_dir);
 	if (src_mapped_nents == 0) {
 		ret = -EINVAL;
-		goto aes_err;
+		goto aes_map_src_err;
 	}
 
 	if (!diff_dst) {
-		dst_nents = src_nents;
 		dst_mapped_nents = src_mapped_nents;
 	} else {
-		dst_nents = sg_nents_for_len(dst, len);
 		dst_mapped_nents = dma_map_sg(rx_dev, dst, dst_nents, dst_dir);
 		if (dst_mapped_nents == 0) {
-			dma_unmap_sg(tx_dev, src, src_nents, src_dir);
 			ret = -EINVAL;
-			goto aes_err;
+			goto aes_map_dst_err;
 		}
 	}
 
@@ -386,11 +435,19 @@ static int dthe_aes_run(struct crypto_engine *engine, void *areq)
 	}
 
 aes_prep_err:
-	dma_unmap_sg(tx_dev, src, src_nents, src_dir);
 	if (dst_dir != DMA_BIDIRECTIONAL)
 		dma_unmap_sg(rx_dev, dst, dst_nents, dst_dir);
+aes_map_dst_err:
+	dma_unmap_sg(tx_dev, src, src_nents, src_dir);
+
+aes_map_src_err:
+	if (ctx->aes_mode == DTHE_AES_CTR && req->cryptlen % AES_BLOCK_SIZE) {
+		kfree(dst);
+aes_dst_alloc_err:
+		kfree(src);
+	}
 
-aes_err:
+aes_src_alloc_err:
 	local_bh_disable();
 	crypto_finalize_skcipher_request(dev_data->engine, req, ret);
 	local_bh_enable();
@@ -408,6 +465,7 @@ static int dthe_aes_crypt(struct skcipher_request *req)
 	 * If data is not a multiple of AES_BLOCK_SIZE:
 	 * - need to return -EINVAL for ECB, CBC as they are block ciphers
 	 * - need to fallback to software as H/W doesn't support Ciphertext Stealing for XTS
+	 * - do nothing for CTR
 	 */
 	if (req->cryptlen % AES_BLOCK_SIZE) {
 		if (ctx->aes_mode == DTHE_AES_XTS) {
@@ -421,7 +479,8 @@ static int dthe_aes_crypt(struct skcipher_request *req)
 			return rctx->enc ? crypto_skcipher_encrypt(subreq) :
 				crypto_skcipher_decrypt(subreq);
 		}
-		return -EINVAL;
+		if (ctx->aes_mode != DTHE_AES_CTR)
+			return -EINVAL;
 	}
 
 	/*
@@ -500,6 +559,30 @@ static struct skcipher_engine_alg cipher_algs[] = {
 		},
 		.op.do_one_request = dthe_aes_run,
 	}, /* CBC AES */
+	{
+		.base.init			= dthe_cipher_init_tfm,
+		.base.setkey			= dthe_aes_ctr_setkey,
+		.base.encrypt			= dthe_aes_encrypt,
+		.base.decrypt			= dthe_aes_decrypt,
+		.base.min_keysize		= AES_MIN_KEY_SIZE,
+		.base.max_keysize		= AES_MAX_KEY_SIZE,
+		.base.ivsize			= AES_IV_SIZE,
+		.base.chunksize			= AES_BLOCK_SIZE,
+		.base.base = {
+			.cra_name		= "ctr(aes)",
+			.cra_driver_name	= "ctr-aes-dthev2",
+			.cra_priority		= 299,
+			.cra_flags		= CRYPTO_ALG_TYPE_SKCIPHER |
+						  CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY |
+						  CRYPTO_ALG_ALLOCATES_MEMORY,
+			.cra_blocksize		= 1,
+			.cra_ctxsize		= sizeof(struct dthe_tfm_ctx),
+			.cra_reqsize		= sizeof(struct dthe_aes_req_ctx),
+			.cra_module		= THIS_MODULE,
+		},
+		.op.do_one_request = dthe_aes_run,
+	}, /* CTR AES */
 	{
 		.base.init			= dthe_cipher_xts_init_tfm,
 		.base.exit			= dthe_cipher_xts_exit_tfm,
diff --git a/drivers/crypto/ti/dthev2-common.c b/drivers/crypto/ti/dthev2-common.c
index c39d37933b9ee..a2ad79bec105a 100644
--- a/drivers/crypto/ti/dthev2-common.c
+++ b/drivers/crypto/ti/dthev2-common.c
@@ -48,6 +48,25 @@ struct dthe_data *dthe_get_dev(struct dthe_tfm_ctx *ctx)
 	return dev_data;
 }
 
+struct scatterlist *dthe_copy_sg(struct scatterlist *dst,
+				 struct scatterlist *src,
+				 int buflen)
+{
+	struct scatterlist *from_sg, *to_sg;
+	int sglen;
+
+	for (to_sg = dst, from_sg = src; buflen && from_sg; buflen -= sglen) {
+		sglen = from_sg->length;
+		if (sglen > buflen)
+			sglen = buflen;
+		sg_set_buf(to_sg, sg_virt(from_sg), sglen);
+		from_sg = sg_next(from_sg);
+		to_sg = sg_next(to_sg);
+	}
+
+	return to_sg;
+}
+
 static int dthe_dma_init(struct dthe_data *dev_data)
 {
 	int ret;
diff --git a/drivers/crypto/ti/dthev2-common.h b/drivers/crypto/ti/dthev2-common.h
index c7a06a4c353ff..f12b94d64e134 100644
--- a/drivers/crypto/ti/dthev2-common.h
+++ b/drivers/crypto/ti/dthev2-common.h
@@ -36,6 +36,7 @@
 enum dthe_aes_mode {
 	DTHE_AES_ECB = 0,
 	DTHE_AES_CBC,
+	DTHE_AES_CTR,
 	DTHE_AES_XTS,
 };
 
@@ -103,6 +104,20 @@ struct dthe_aes_req_ctx {
 
 struct dthe_data *dthe_get_dev(struct dthe_tfm_ctx *ctx);
 
+/**
+ * dthe_copy_sg - Copy sg entries from src to dst
+ * @dst: Destination sg to be filled
+ * @src: Source sg to be copied from
+ * @buflen: Number of bytes to be copied
+ *
+ * Description:
+ *   Copy buflen bytes of data from src to dst.
+ *
+ **/
+struct scatterlist *dthe_copy_sg(struct scatterlist *dst,
+				 struct scatterlist *src,
+				 int buflen);
+
 int dthe_register_aes_algs(void);
 void dthe_unregister_aes_algs(void);
 
-- 
2.43.0
Re: [PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver
Posted by Herbert Xu 3 months, 1 week ago
On Wed, Oct 22, 2025 at 11:15:40PM +0530, T Pratham wrote:
>
> +	if (ctx->aes_mode == DTHE_AES_CTR) {
> +		/*
> +		 * CTR mode can operate on any input length, but the hardware
> +		 * requires input length to be a multiple of the block size.
> +		 * We need to handle the padding in the driver.
> +		 */
> +		if (req->cryptlen % AES_BLOCK_SIZE) {
> +			/* Need to create a new SG list with padding */
> +			pad_len = ALIGN(req->cryptlen, AES_BLOCK_SIZE) - req->cryptlen;
> +			struct scatterlist *sg;
> +
> +			src = kmalloc_array((src_nents + 1), sizeof(*src), GFP_KERNEL);

You can't allocate memory on the data path.  The request might have
been issued by the storage layer and doing a GFP_KERNEL allocation
here risks dead-lock.

Failing the allocation is also not good.

Ideally you should make the hardware deal with the multiple of block
size data, and then handle the trailer in your driver.

But if it's too hard just send the whole thing to the fallback.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver
Posted by T Pratham 3 months, 1 week ago
Hi Herbert,
Thanks for review.
On 31/10/25 15:06, Herbert Xu wrote:
> On Wed, Oct 22, 2025 at 11:15:40PM +0530, T Pratham wrote:
>>
>> +	if (ctx->aes_mode == DTHE_AES_CTR) {
>> +		/*
>> +		 * CTR mode can operate on any input length, but the hardware
>> +		 * requires input length to be a multiple of the block size.
>> +		 * We need to handle the padding in the driver.
>> +		 */
>> +		if (req->cryptlen % AES_BLOCK_SIZE) {
>> +			/* Need to create a new SG list with padding */
>> +			pad_len = ALIGN(req->cryptlen, AES_BLOCK_SIZE) - req->cryptlen;
>> +			struct scatterlist *sg;
>> +
>> +			src = kmalloc_array((src_nents + 1), sizeof(*src), GFP_KERNEL);
> 
> You can't allocate memory on the data path.  The request might have
> been issued by the storage layer and doing a GFP_KERNEL allocation
> here risks dead-lock.
> 
> Failing the allocation is also not good.
> 
> Ideally you should make the hardware deal with the multiple of block
> size data, and then handle the trailer in your driver.
> 
> But if it's too hard just send the whole thing to the fallback.
> 

Understood.
Ideally, I'd like to avoid fallback as much as possible. While it does
sound lucrative to handle the trailer in s/w and let h/w handle blocks,
I have another proposal to avoid memory allocations here: using
scatterlist chaining.

Padding:

struct scatterlist src_pad[2];
struct scatterlist *sg;

sg_init_table(src_pad, 2);
sg = sg_last(req->src, src_nents);
sg_set_page(&src_pad[0], sg_page(sg), sg->length, sg->offset);
sg_unmark_end(&src_pad[0]);
sg_set_buf(&src_pad[1], pad_buf, pad_len);

if (src_nents == 1)
	src = src_pad;
else
	sg_chain(sg, 1, src_pad);

src_nents++;


Cleanup (restoring original req->src's last nent):

if (src_nents > 2) {
	struct scatterlist *sg;
	unsigned int i;

	for (i = 0, sg = req->src; i < src_nents - 3; ++i)
		sg = sg_next(sg);
	sg++;
	sg->page_link &= ~SG_CHAIN;
	sg_set_page(sg, sg_page(&src_pad[0]),
		    src_pad[0].length, src_pad[0].offset);
	sg_mark_end(sg);
}


Let me know if this is fine.

However, I suppose we similarly cannot do allocations in AEADs as well?
If that is the case, my current code relies a lot on it (using sg_split
to separate AAD from {plain, cipher}text and applying padding like CTR
here, both of which allocate memory).

I can change padding to avoid kmallocs like CTR easily. But separating
AAD and plaintext/ciphertext without using sg_split will be a task. Let
me know if this is correct, so that I'll split the series and send
ciphers now, and AEADs later.

-- 
Regards
T Pratham <t-pratham@ti.com>
Re: [PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver
Posted by Herbert Xu 3 months ago
On Fri, Oct 31, 2025 at 08:11:34PM +0530, T Pratham wrote:
>
> Let me know if this is fine.

Yes I think this should work.

> However, I suppose we similarly cannot do allocations in AEADs as well?
> If that is the case, my current code relies a lot on it (using sg_split
> to separate AAD from {plain, cipher}text and applying padding like CTR
> here, both of which allocate memory).
> 
> I can change padding to avoid kmallocs like CTR easily. But separating
> AAD and plaintext/ciphertext without using sg_split will be a task. Let
> me know if this is correct, so that I'll split the series and send
> ciphers now, and AEADs later.

OK it's not totally forbidden to allocate memory.  However, you
should only use GFP_ATOMIC to avoid dead-lock (GFP_KERNEL can
trigger writes to storage, which can in turn call crypto), and
provide a fallback path to software crypto when the allocation
fails.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v5 2/4] crypto: ti - Add support for AES-CTR in DTHEv2 driver
Posted by T Pratham 3 months ago
On 06/11/25 12:53, Herbert Xu wrote:
> On Fri, Oct 31, 2025 at 08:11:34PM +0530, T Pratham wrote:
>>
>> Let me know if this is fine.
> 
> Yes I think this should work.
> 
>> However, I suppose we similarly cannot do allocations in AEADs as well?
>> If that is the case, my current code relies a lot on it (using sg_split
>> to separate AAD from {plain, cipher}text and applying padding like CTR
>> here, both of which allocate memory).
>>
>> I can change padding to avoid kmallocs like CTR easily. But separating
>> AAD and plaintext/ciphertext without using sg_split will be a task. Let
>> me know if this is correct, so that I'll split the series and send
>> ciphers now, and AEADs later.
> 
> OK it's not totally forbidden to allocate memory.  However, you
> should only use GFP_ATOMIC to avoid dead-lock (GFP_KERNEL can
> trigger writes to storage, which can in turn call crypto), and
> provide a fallback path to software crypto when the allocation
> fails.
> 
> Cheers,

Understood. Thanks for the insights. I'll re-spin the series with
necessary changes.

-- 
Regards
T Pratham <t-pratham@ti.com>