[v1] net: faster and simpler CRC32C computation

[PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Eric Biggers 9 months ago

From: Eric Biggers <ebiggers@google.com>

Now that the crc32c() library function directly takes advantage of
architecture-specific optimizations and there also now exists a function
skb_copy_and_crc32c_datagram_iter(), it is unnecessary to go through the
crypto_ahash API.  Just use those functions.  This is much simpler, and
it also improves performance due to eliminating the crypto API overhead.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 drivers/nvme/host/Kconfig |   4 +-
 drivers/nvme/host/tcp.c   | 118 ++++++++++++--------------------------
 2 files changed, 39 insertions(+), 83 deletions(-)

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 4d64b6935bb9..7dca58f0a237 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -82,13 +82,13 @@ config NVME_FC
 
 config NVME_TCP
 	tristate "NVM Express over Fabrics TCP host driver"
 	depends on INET
 	depends on BLOCK
+	select CRC32
+	select NET_CRC32C
 	select NVME_FABRICS
-	select CRYPTO
-	select CRYPTO_CRC32C
 	help
 	  This provides support for the NVMe over Fabrics protocol using
 	  the TCP transport.  This allows you to use remote block devices
 	  exported using the NVMe protocol set.
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6..d32e168036ec 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -6,19 +6,19 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/crc32.h>
 #include <linux/nvme-tcp.h>
 #include <linux/nvme-keyring.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/tls.h>
 #include <net/tls_prot.h>
 #include <net/handshake.h>
 #include <linux/blk-mq.h>
-#include <crypto/hash.h>
 #include <net/busy_poll.h>
 #include <trace/events/sock.h>
 
 #include "nvme.h"
 #include "fabrics.h"
@@ -166,12 +166,12 @@ struct nvme_tcp_queue {
 	bool			rd_enabled;
 
 	bool			hdr_digest;
 	bool			data_digest;
 	bool			tls_enabled;
-	struct ahash_request	*rcv_hash;
-	struct ahash_request	*snd_hash;
+	u32			rcv_crc;
+	u32			snd_crc;
 	__le32			exp_ddgst;
 	__le32			recv_ddgst;
 	struct completion       tls_complete;
 	int                     tls_err;
 	struct page_frag_cache	pf_cache;
@@ -454,36 +454,40 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
 
 	list_del(&req->entry);
 	return req;
 }
 
-static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
-		__le32 *dgst)
+static inline void nvme_tcp_ddgst_init(u32 *crcp)
 {
-	ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
-	crypto_ahash_final(hash);
+	*crcp = ~0;
 }
 
-static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
-		struct page *page, off_t off, size_t len)
+static inline void nvme_tcp_ddgst_update(u32 *crcp,
+		struct page *page, size_t off, size_t len)
 {
-	struct scatterlist sg;
+	page += off / PAGE_SIZE;
+	off %= PAGE_SIZE;
+	while (len) {
+		const void *vaddr = kmap_local_page(page);
+		size_t n = min(len, (size_t)PAGE_SIZE - off);
 
-	sg_init_table(&sg, 1);
-	sg_set_page(&sg, page, len, off);
-	ahash_request_set_crypt(hash, &sg, NULL, len);
-	crypto_ahash_update(hash);
+		*crcp = crc32c(*crcp, vaddr + off, n);
+		kunmap_local(vaddr);
+		page++;
+		off = 0;
+		len -= n;
+	}
 }
 
-static inline void nvme_tcp_hdgst(struct ahash_request *hash,
-		void *pdu, size_t len)
+static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
 {
-	struct scatterlist sg;
+	*ddgst = cpu_to_le32(~*crcp);
+}
 
-	sg_init_one(&sg, pdu, len);
-	ahash_request_set_crypt(hash, &sg, pdu + len, len);
-	crypto_ahash_digest(hash);
+static inline void nvme_tcp_hdgst(void *pdu, size_t len)
+{
+	put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
 }
 
 static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
 		void *pdu, size_t pdu_len)
 {
@@ -497,11 +501,11 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
 			nvme_tcp_queue_id(queue));
 		return -EPROTO;
 	}
 
 	recv_digest = *(__le32 *)(pdu + hdr->hlen);
-	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
+	nvme_tcp_hdgst(pdu, pdu_len);
 	exp_digest = *(__le32 *)(pdu + hdr->hlen);
 	if (recv_digest != exp_digest) {
 		dev_err(queue->ctrl->ctrl.device,
 			"header digest error: recv %#x expected %#x\n",
 			le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
@@ -524,11 +528,11 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
 		dev_err(queue->ctrl->ctrl.device,
 			"queue %d: data digest flag is cleared\n",
 		nvme_tcp_queue_id(queue));
 		return -EPROTO;
 	}
-	crypto_ahash_init(queue->rcv_hash);
+	nvme_tcp_ddgst_init(&queue->rcv_crc);
 
 	return 0;
 }
 
 static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
@@ -924,12 +928,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		/* we can read only from what is left in this bio */
 		recv_len = min_t(size_t, recv_len,
 				iov_iter_count(&req->iter));
 
 		if (queue->data_digest)
-			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
-				&req->iter, recv_len, queue->rcv_hash);
+			ret = skb_copy_and_crc32c_datagram_iter(skb, *offset,
+				&req->iter, recv_len, &queue->rcv_crc);
 		else
 			ret = skb_copy_datagram_iter(skb, *offset,
 					&req->iter, recv_len);
 		if (ret) {
 			dev_err(queue->ctrl->ctrl.device,
@@ -943,11 +947,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		queue->data_remaining -= recv_len;
 	}
 
 	if (!queue->data_remaining) {
 		if (queue->data_digest) {
-			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+			nvme_tcp_ddgst_final(&queue->rcv_crc,
+					     &queue->exp_ddgst);
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
 				nvme_tcp_end_request(rq,
 						le16_to_cpu(req->status));
@@ -1145,11 +1150,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		ret = sock_sendmsg(queue->sock, &msg);
 		if (ret <= 0)
 			return ret;
 
 		if (queue->data_digest)
-			nvme_tcp_ddgst_update(queue->snd_hash, page,
+			nvme_tcp_ddgst_update(&queue->snd_crc, page,
 					offset, ret);
 
 		/*
 		 * update the request iterator except for the last payload send
 		 * in the request where we don't want to modify it as we may
@@ -1159,11 +1164,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 			nvme_tcp_advance_req(req, ret);
 
 		/* fully successful last send in current PDU */
 		if (last && ret == len) {
 			if (queue->data_digest) {
-				nvme_tcp_ddgst_final(queue->snd_hash,
+				nvme_tcp_ddgst_final(&queue->snd_crc,
 					&req->ddgst);
 				req->state = NVME_TCP_SEND_DDGST;
 				req->offset = 0;
 			} else {
 				if (h2cdata_left)
@@ -1192,11 +1197,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 		msg.msg_flags |= MSG_MORE;
 	else
 		msg.msg_flags |= MSG_EOR;
 
 	if (queue->hdr_digest && !req->offset)
-		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+		nvme_tcp_hdgst(pdu, sizeof(*pdu));
 
 	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
 	ret = sock_sendmsg(queue->sock, &msg);
 	if (unlikely(ret <= 0))
@@ -1205,11 +1210,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	len -= ret;
 	if (!len) {
 		if (inline_data) {
 			req->state = NVME_TCP_SEND_DATA;
 			if (queue->data_digest)
-				crypto_ahash_init(queue->snd_hash);
+				nvme_tcp_ddgst_init(&queue->snd_crc);
 		} else {
 			nvme_tcp_done_send_req(queue);
 		}
 		return 1;
 	}
@@ -1227,11 +1232,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) - req->offset + hdgst;
 	int ret;
 
 	if (queue->hdr_digest && !req->offset)
-		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+		nvme_tcp_hdgst(pdu, sizeof(*pdu));
 
 	if (!req->h2cdata_left)
 		msg.msg_flags |= MSG_SPLICE_PAGES;
 
 	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
@@ -1242,11 +1247,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 
 	len -= ret;
 	if (!len) {
 		req->state = NVME_TCP_SEND_DATA;
 		if (queue->data_digest)
-			crypto_ahash_init(queue->snd_hash);
+			nvme_tcp_ddgst_init(&queue->snd_crc);
 		return 1;
 	}
 	req->offset += ret;
 
 	return -EAGAIN;
@@ -1382,45 +1387,10 @@ static void nvme_tcp_io_work(struct work_struct *w)
 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
 	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
 }
 
-static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
-
-	ahash_request_free(queue->rcv_hash);
-	ahash_request_free(queue->snd_hash);
-	crypto_free_ahash(tfm);
-}
-
-static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
-{
-	struct crypto_ahash *tfm;
-
-	tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!queue->snd_hash)
-		goto free_tfm;
-	ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
-
-	queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!queue->rcv_hash)
-		goto free_snd_hash;
-	ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
-
-	return 0;
-free_snd_hash:
-	ahash_request_free(queue->snd_hash);
-free_tfm:
-	crypto_free_ahash(tfm);
-	return -ENOMEM;
-}
-
 static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
 {
 	struct nvme_tcp_request *async = &ctrl->async_req;
 
 	page_frag_free(async->pdu);
@@ -1449,13 +1419,10 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
 	unsigned int noreclaim_flag;
 
 	if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
 		return;
 
-	if (queue->hdr_digest || queue->data_digest)
-		nvme_tcp_free_crypto(queue);
-
 	page_frag_cache_drain(&queue->pf_cache);
 
 	noreclaim_flag = memalloc_noreclaim_save();
 	/* ->sock will be released by fput() */
 	fput(queue->sock->file);
@@ -1865,25 +1832,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
 		}
 	}
 
 	queue->hdr_digest = nctrl->opts->hdr_digest;
 	queue->data_digest = nctrl->opts->data_digest;
-	if (queue->hdr_digest || queue->data_digest) {
-		ret = nvme_tcp_alloc_crypto(queue);
-		if (ret) {
-			dev_err(nctrl->device,
-				"failed to allocate queue %d crypto\n", qid);
-			goto err_sock;
-		}
-	}
 
 	rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
 			nvme_tcp_hdgst_len(queue);
 	queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
 	if (!queue->pdu) {
 		ret = -ENOMEM;
-		goto err_crypto;
+		goto err_sock;
 	}
 
 	dev_dbg(nctrl->device, "connecting queue %d\n",
 			nvme_tcp_queue_id(queue));
 
@@ -1912,13 +1871,10 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
 
 err_init_connect:
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
 err_rcv_pdu:
 	kfree(queue->pdu);
-err_crypto:
-	if (queue->hdr_digest || queue->data_digest)
-		nvme_tcp_free_crypto(queue);
 err_sock:
 	/* ->sock will be released by fput() */
 	fput(queue->sock->file);
 	queue->sock = NULL;
 err_destroy_mutex:
-- 
2.49.0

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Sagi Grimberg 8 months, 3 weeks ago

>   #include "nvme.h"
>   #include "fabrics.h"
> @@ -166,12 +166,12 @@ struct nvme_tcp_queue {
>   	bool			rd_enabled;
>   
>   	bool			hdr_digest;
>   	bool			data_digest;
>   	bool			tls_enabled;
> -	struct ahash_request	*rcv_hash;
> -	struct ahash_request	*snd_hash;
> +	u32			rcv_crc;
> +	u32			snd_crc;

Let's call it rcv_dgst (recv digest) and snd_dgst (send digest).
Other than that, looks good to me.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Eric Biggers 8 months, 3 weeks ago

On Sat, May 17, 2025 at 12:58:35PM +0300, Sagi Grimberg wrote:
> >   #include "nvme.h"
> >   #include "fabrics.h"
> > @@ -166,12 +166,12 @@ struct nvme_tcp_queue {
> >   	bool			rd_enabled;
> >   	bool			hdr_digest;
> >   	bool			data_digest;
> >   	bool			tls_enabled;
> > -	struct ahash_request	*rcv_hash;
> > -	struct ahash_request	*snd_hash;
> > +	u32			rcv_crc;
> > +	u32			snd_crc;
> >   	__le32			exp_ddgst;
> >   	__le32			recv_ddgst;
> 
> Let's call it rcv_dgst (recv digest) and snd_dgst (send digest).
> Other than that, looks good to me.
> 
> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>

rcv_dgst would be awfully close to recv_ddgst which holds the on-wire digest
that is received.  I think I slightly prefer *_crc, since that helps
differentiate the in-progress values from the finalized values.

- Eric

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Sagi Grimberg 8 months, 3 weeks ago

>> Let's call it rcv_dgst (recv digest) and snd_dgst (send digest).
>> Other than that, looks good to me.
>>
>> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
> rcv_dgst would be awfully close to recv_ddgst which holds the on-wire digest
> that is received.  I think I slightly prefer *_crc, since that helps
> differentiate the in-progress values from the finalized values.

yea, sounds good.

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Christoph Hellwig 8 months, 3 weeks ago

On Sat, May 10, 2025 at 05:41:09PM -0700, Eric Biggers wrote:
> +static inline void nvme_tcp_ddgst_init(u32 *crcp)
>  {
> +	*crcp = ~0;
>  }

This helper looks really odd.  It coud just return the value, or
we could just assign it there using a seed define, e.g. this is what
XFS does:

#define XFS_CRC_SEED    (~(uint32_t)0)

nd that might in fact be worth lifting to common code with a good
comment on why all-d is used as the typical crc32 seed.

> +static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
>  {
> +	*ddgst = cpu_to_le32(~*crcp);
> +}

Just return the big endian version calculated here?

> +static inline void nvme_tcp_hdgst(void *pdu, size_t len)
> +{
> +	put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
>  }

This could also use the seed define mentioned above.

>  	recv_digest = *(__le32 *)(pdu + hdr->hlen);
> -	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
> +	nvme_tcp_hdgst(pdu, pdu_len);
>  	exp_digest = *(__le32 *)(pdu + hdr->hlen);
>  	if (recv_digest != exp_digest) {

This code looks really weird, as it samples the on-the-wire digest
first and then overwrites it.  I'd expect to just check the on-the-wire
on against one calculated in a local variable.

Sagi, any idea what is going on here?

Otherwise this looks great.  It's always good to get rid of the
horrendous crypto API calls.

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Eric Biggers 8 months, 3 weeks ago

On Thu, May 15, 2025 at 09:36:00PM -0700, Christoph Hellwig wrote:
> On Sat, May 10, 2025 at 05:41:09PM -0700, Eric Biggers wrote:
> > +static inline void nvme_tcp_ddgst_init(u32 *crcp)
> >  {
> > +	*crcp = ~0;
> >  }
> 
> This helper looks really odd.  It coud just return the value, or
> we could just assign it there using a seed define, e.g. this is what
> XFS does:
> 
> #define XFS_CRC_SEED    (~(uint32_t)0)
> 
> nd that might in fact be worth lifting to common code with a good
> comment on why all-d is used as the typical crc32 seed.

Yes, there is CRC8_INIT_VALUE for crc8 but nothing for crc32.  It might be worth
adding a CRC32_INIT_VALUE to <linux/crc32.h>.  Though typically there's an
inversion at the end too, which can't be abstracted out the same way...  And
many users initialize with 0 instead of ~0, even though ~0 is the "right" way.

> > +static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
> >  {
> > +	*ddgst = cpu_to_le32(~*crcp);
> > +}
> 
> Just return the big endian version calculated here?
> 
> > +static inline void nvme_tcp_hdgst(void *pdu, size_t len)
> > +{
> > +	put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
> >  }
> 
> This could also use the seed define mentioned above.
> 
> >  	recv_digest = *(__le32 *)(pdu + hdr->hlen);
> > -	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
> > +	nvme_tcp_hdgst(pdu, pdu_len);
> >  	exp_digest = *(__le32 *)(pdu + hdr->hlen);
> >  	if (recv_digest != exp_digest) {
> 
> This code looks really weird, as it samples the on-the-wire digest
> first and then overwrites it.  I'd expect to just check the on-the-wire
> on against one calculated in a local variable.
> 
> Sagi, any idea what is going on here?

It was probably just written that way to reuse nvme_tcp_hdgst() which writes to
the hdgst field.

Anyway, I was trying to keep the translation to the new API as straightforward
as possible, but I'll take your suggestions.  The following is what I ended up
with, but I'll resend the series formally after giving a bit more time.

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 4d64b6935bb91..7dca58f0a237b 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -82,13 +82,13 @@ config NVME_FC
 
 config NVME_TCP
 	tristate "NVM Express over Fabrics TCP host driver"
 	depends on INET
 	depends on BLOCK
+	select CRC32
+	select NET_CRC32C
 	select NVME_FABRICS
-	select CRYPTO
-	select CRYPTO_CRC32C
 	help
 	  This provides support for the NVMe over Fabrics protocol using
 	  the TCP transport.  This allows you to use remote block devices
 	  exported using the NVMe protocol set.
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6b..0c139d844422b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -6,19 +6,19 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/crc32.h>
 #include <linux/nvme-tcp.h>
 #include <linux/nvme-keyring.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/tls.h>
 #include <net/tls_prot.h>
 #include <net/handshake.h>
 #include <linux/blk-mq.h>
-#include <crypto/hash.h>
 #include <net/busy_poll.h>
 #include <trace/events/sock.h>
 
 #include "nvme.h"
 #include "fabrics.h"
@@ -166,12 +166,12 @@ struct nvme_tcp_queue {
 	bool			rd_enabled;
 
 	bool			hdr_digest;
 	bool			data_digest;
 	bool			tls_enabled;
-	struct ahash_request	*rcv_hash;
-	struct ahash_request	*snd_hash;
+	u32			rcv_crc;
+	u32			snd_crc;
 	__le32			exp_ddgst;
 	__le32			recv_ddgst;
 	struct completion       tls_complete;
 	int                     tls_err;
 	struct page_frag_cache	pf_cache;
@@ -454,36 +454,37 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
 
 	list_del(&req->entry);
 	return req;
 }
 
-static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
-		__le32 *dgst)
-{
-	ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
-	crypto_ahash_final(hash);
-}
+#define NVME_TCP_CRC_SEED (~0)
 
-static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
-		struct page *page, off_t off, size_t len)
+static inline void nvme_tcp_ddgst_update(u32 *crcp,
+		struct page *page, size_t off, size_t len)
 {
-	struct scatterlist sg;
+	page += off / PAGE_SIZE;
+	off %= PAGE_SIZE;
+	while (len) {
+		const void *vaddr = kmap_local_page(page);
+		size_t n = min(len, (size_t)PAGE_SIZE - off);
 
-	sg_init_table(&sg, 1);
-	sg_set_page(&sg, page, len, off);
-	ahash_request_set_crypt(hash, &sg, NULL, len);
-	crypto_ahash_update(hash);
+		*crcp = crc32c(*crcp, vaddr + off, n);
+		kunmap_local(vaddr);
+		page++;
+		off = 0;
+		len -= n;
+	}
 }
 
-static inline void nvme_tcp_hdgst(struct ahash_request *hash,
-		void *pdu, size_t len)
+static inline __le32 nvme_tcp_ddgst_final(u32 crc)
 {
-	struct scatterlist sg;
+	return cpu_to_le32(~crc);
+}
 
-	sg_init_one(&sg, pdu, len);
-	ahash_request_set_crypt(hash, &sg, pdu + len, len);
-	crypto_ahash_digest(hash);
+static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
+{
+	return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
 }
 
 static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
 		void *pdu, size_t pdu_len)
 {
@@ -497,12 +498,11 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
 			nvme_tcp_queue_id(queue));
 		return -EPROTO;
 	}
 
 	recv_digest = *(__le32 *)(pdu + hdr->hlen);
-	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
-	exp_digest = *(__le32 *)(pdu + hdr->hlen);
+	exp_digest = nvme_tcp_hdgst(pdu, pdu_len);
 	if (recv_digest != exp_digest) {
 		dev_err(queue->ctrl->ctrl.device,
 			"header digest error: recv %#x expected %#x\n",
 			le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
 		return -EIO;
@@ -524,11 +524,11 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
 		dev_err(queue->ctrl->ctrl.device,
 			"queue %d: data digest flag is cleared\n",
 		nvme_tcp_queue_id(queue));
 		return -EPROTO;
 	}
-	crypto_ahash_init(queue->rcv_hash);
+	queue->rcv_crc = NVME_TCP_CRC_SEED;
 
 	return 0;
 }
 
 static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
@@ -924,12 +924,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		/* we can read only from what is left in this bio */
 		recv_len = min_t(size_t, recv_len,
 				iov_iter_count(&req->iter));
 
 		if (queue->data_digest)
-			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
-				&req->iter, recv_len, queue->rcv_hash);
+			ret = skb_copy_and_crc32c_datagram_iter(skb, *offset,
+				&req->iter, recv_len, &queue->rcv_crc);
 		else
 			ret = skb_copy_datagram_iter(skb, *offset,
 					&req->iter, recv_len);
 		if (ret) {
 			dev_err(queue->ctrl->ctrl.device,
@@ -943,11 +943,11 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		queue->data_remaining -= recv_len;
 	}
 
 	if (!queue->data_remaining) {
 		if (queue->data_digest) {
-			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+			queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc);
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
 				nvme_tcp_end_request(rq,
 						le16_to_cpu(req->status));
@@ -1145,11 +1145,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		ret = sock_sendmsg(queue->sock, &msg);
 		if (ret <= 0)
 			return ret;
 
 		if (queue->data_digest)
-			nvme_tcp_ddgst_update(queue->snd_hash, page,
+			nvme_tcp_ddgst_update(&queue->snd_crc, page,
 					offset, ret);
 
 		/*
 		 * update the request iterator except for the last payload send
 		 * in the request where we don't want to modify it as we may
@@ -1159,12 +1159,12 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 			nvme_tcp_advance_req(req, ret);
 
 		/* fully successful last send in current PDU */
 		if (last && ret == len) {
 			if (queue->data_digest) {
-				nvme_tcp_ddgst_final(queue->snd_hash,
-					&req->ddgst);
+				req->ddgst =
+					nvme_tcp_ddgst_final(queue->snd_crc);
 				req->state = NVME_TCP_SEND_DDGST;
 				req->offset = 0;
 			} else {
 				if (h2cdata_left)
 					nvme_tcp_setup_h2c_data_pdu(req);
@@ -1192,11 +1192,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 		msg.msg_flags |= MSG_MORE;
 	else
 		msg.msg_flags |= MSG_EOR;
 
 	if (queue->hdr_digest && !req->offset)
-		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+		*(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu));
 
 	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
 	ret = sock_sendmsg(queue->sock, &msg);
 	if (unlikely(ret <= 0))
@@ -1205,11 +1205,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	len -= ret;
 	if (!len) {
 		if (inline_data) {
 			req->state = NVME_TCP_SEND_DATA;
 			if (queue->data_digest)
-				crypto_ahash_init(queue->snd_hash);
+				queue->snd_crc = NVME_TCP_CRC_SEED;
 		} else {
 			nvme_tcp_done_send_req(queue);
 		}
 		return 1;
 	}
@@ -1227,11 +1227,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) - req->offset + hdgst;
 	int ret;
 
 	if (queue->hdr_digest && !req->offset)
-		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+		*(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu));
 
 	if (!req->h2cdata_left)
 		msg.msg_flags |= MSG_SPLICE_PAGES;
 
 	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
@@ -1242,11 +1242,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 
 	len -= ret;
 	if (!len) {
 		req->state = NVME_TCP_SEND_DATA;
 		if (queue->data_digest)
-			crypto_ahash_init(queue->snd_hash);
+			queue->snd_crc = NVME_TCP_CRC_SEED;
 		return 1;
 	}
 	req->offset += ret;
 
 	return -EAGAIN;
@@ -1382,45 +1382,10 @@ static void nvme_tcp_io_work(struct work_struct *w)
 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
 	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
 }
 
-static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
-
-	ahash_request_free(queue->rcv_hash);
-	ahash_request_free(queue->snd_hash);
-	crypto_free_ahash(tfm);
-}
-
-static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
-{
-	struct crypto_ahash *tfm;
-
-	tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!queue->snd_hash)
-		goto free_tfm;
-	ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
-
-	queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!queue->rcv_hash)
-		goto free_snd_hash;
-	ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
-
-	return 0;
-free_snd_hash:
-	ahash_request_free(queue->snd_hash);
-free_tfm:
-	crypto_free_ahash(tfm);
-	return -ENOMEM;
-}
-
 static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
 {
 	struct nvme_tcp_request *async = &ctrl->async_req;
 
 	page_frag_free(async->pdu);
@@ -1449,13 +1414,10 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
 	unsigned int noreclaim_flag;
 
 	if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
 		return;
 
-	if (queue->hdr_digest || queue->data_digest)
-		nvme_tcp_free_crypto(queue);
-
 	page_frag_cache_drain(&queue->pf_cache);
 
 	noreclaim_flag = memalloc_noreclaim_save();
 	/* ->sock will be released by fput() */
 	fput(queue->sock->file);
@@ -1865,25 +1827,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
 		}
 	}
 
 	queue->hdr_digest = nctrl->opts->hdr_digest;
 	queue->data_digest = nctrl->opts->data_digest;
-	if (queue->hdr_digest || queue->data_digest) {
-		ret = nvme_tcp_alloc_crypto(queue);
-		if (ret) {
-			dev_err(nctrl->device,
-				"failed to allocate queue %d crypto\n", qid);
-			goto err_sock;
-		}
-	}
 
 	rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
 			nvme_tcp_hdgst_len(queue);
 	queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
 	if (!queue->pdu) {
 		ret = -ENOMEM;
-		goto err_crypto;
+		goto err_sock;
 	}
 
 	dev_dbg(nctrl->device, "connecting queue %d\n",
 			nvme_tcp_queue_id(queue));
 
@@ -1912,13 +1866,10 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
 
 err_init_connect:
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
 err_rcv_pdu:
 	kfree(queue->pdu);
-err_crypto:
-	if (queue->hdr_digest || queue->data_digest)
-		nvme_tcp_free_crypto(queue);
 err_sock:
 	/* ->sock will be released by fput() */
 	fput(queue->sock->file);
 	queue->sock = NULL;
 err_destroy_mutex:

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Sagi Grimberg 8 months, 3 weeks ago

>   	if (queue->hdr_digest && !req->offset)
> -		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
> +		*(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu));

I'd move this assignment to a helper nvme_tcp_set_hdgst(), especially as it
has two call-sites.

Other than that,

Reviewed-by: Sagi Grimberg <sagi@grimberg.me

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Christoph Hellwig 8 months, 3 weeks ago

On Thu, May 15, 2025 at 10:31:00PM -0700, Eric Biggers wrote:
> +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
> +{
> +	return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
>  }

This drops the unaligned handling.  Now in the NVMe protocol it will
always be properly aligned, but my TCP-foo is not good enough to
remember if the networking code will also guarantee 32-bit alignment
for the start of the packet?

Otherwise this looks great:

Reviewed-by: Christoph Hellwig <hch@lst.de>

Re: [PATCH net-next 09/10] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter()

Posted by Eric Biggers 8 months, 3 weeks ago

On Thu, May 15, 2025 at 11:06:56PM -0700, Christoph Hellwig wrote:
> On Thu, May 15, 2025 at 10:31:00PM -0700, Eric Biggers wrote:
> > +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
> > +{
> > +	return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
> >  }
> 
> This drops the unaligned handling.  Now in the NVMe protocol it will
> always be properly aligned, but my TCP-foo is not good enough to
> remember if the networking code will also guarantee 32-bit alignment
> for the start of the packet?
> 
> Otherwise this looks great:
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>

The nvme-tcp driver already assumes that the header is at least 4-byte aligned,
considering that it accesses hdr->plen and the struct doesn't use __packed:

    struct nvme_tcp_hdr {
            __u8	type;
            __u8	flags;
            __u8	hlen;
            __u8	pdo;
            __le32	plen;
    };

On the send size, the header size is always sizeof(struct nvme_tcp_cmd_pdu) or
sizeof(struct nvme_tcp_data_pdu) which are multiples of 4.  On the receive side,
nvme-tcp validates hdr->hlen == sizeof(struct nvme_tcp_rsp_pdu) and then does:

    recv_digest = *(__le32 *)(pdu + hdr->hlen);

So, using put_unaligned_le32() is unnecessary.  I just had it there in v1
because I had directly translated crypto_ahash_digest(), which does ultimately
do a put_unaligned_le32() once you unravel all the API layers.

- Eric