From: Eric Biggers <ebiggers@google.com>
Now that the crc32c() library function directly takes advantage of
architecture-specific optimizations and there also now exists a function
skb_copy_and_crc32c_datagram_iter(), it is unnecessary to go through the
crypto_ahash API. Just use those functions. This is much simpler, and
it also improves performance due to eliminating the crypto API overhead.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
drivers/nvme/host/Kconfig | 4 +-
drivers/nvme/host/tcp.c | 118 ++++++++++++--------------------------
2 files changed, 39 insertions(+), 83 deletions(-)
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 4d64b6935bb9..7dca58f0a237 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -82,13 +82,13 @@ config NVME_FC
config NVME_TCP
tristate "NVM Express over Fabrics TCP host driver"
depends on INET
depends on BLOCK
+ select CRC32
+ select NET_CRC32C
select NVME_FABRICS
- select CRYPTO
- select CRYPTO_CRC32C
help
This provides support for the NVMe over Fabrics protocol using
the TCP transport. This allows you to use remote block devices
exported using the NVMe protocol set.
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6..d32e168036ec 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -6,19 +6,19 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/crc32.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/blk-mq.h>
-#include <crypto/hash.h>
#include <net/busy_poll.h>
#include <trace/events/sock.h>
#include "nvme.h"
#include "fabrics.h"
@@ -166,12 +166,12 @@ struct nvme_tcp_queue {
bool rd_enabled;
bool hdr_digest;
bool data_digest;
bool tls_enabled;
- struct ahash_request *rcv_hash;
- struct ahash_request *snd_hash;
+ u32 rcv_crc;
+ u32 snd_crc;
__le32 exp_ddgst;
__le32 recv_ddgst;
struct completion tls_complete;
int tls_err;
struct page_frag_cache pf_cache;
@@ -454,36 +454,40 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
list_del(&req->entry);
return req;
}
-static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
- __le32 *dgst)
+static inline void nvme_tcp_ddgst_init(u32 *crcp)
{
- ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
- crypto_ahash_final(hash);
+ *crcp = ~0;
}
-static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
- struct page *page, off_t off, size_t len)
+static inline void nvme_tcp_ddgst_update(u32 *crcp,
+ struct page *page, size_t off, size_t len)
{
- struct scatterlist sg;
+ page += off / PAGE_SIZE;
+ off %= PAGE_SIZE;
+ while (len) {
+ const void *vaddr = kmap_local_page(page);
+ size_t n = min(len, (size_t)PAGE_SIZE - off);
- sg_init_table(&sg, 1);
- sg_set_page(&sg, page, len, off);
- ahash_request_set_crypt(hash, &sg, NULL, len);
- crypto_ahash_update(hash);
+ *crcp = crc32c(*crcp, vaddr + off, n);
+ kunmap_local(vaddr);
+ page++;
+ off = 0;
+ len -= n;
+ }
}
-static inline void nvme_tcp_hdgst(struct ahash_request *hash,
- void *pdu, size_t len)
+static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
{
- struct scatterlist sg;
+ *ddgst = cpu_to_le32(~*crcp);
+}
- sg_init_one(&sg, pdu, len);
- ahash_request_set_crypt(hash, &sg, pdu + len, len);
- crypto_ahash_digest(hash);
+static inline void nvme_tcp_hdgst(void *pdu, size_t len)
+{
+ put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
}
static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
void *pdu, size_t pdu_len)
{
@@ -497,11 +501,11 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
nvme_tcp_queue_id(queue));
return -EPROTO;
}
recv_digest = *(__le32 *)(pdu + hdr->hlen);
- nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
+ nvme_tcp_hdgst(pdu, pdu_len);
exp_digest = *(__le32 *)(pdu + hdr->hlen);
if (recv_digest != exp_digest) {
dev_err(queue->ctrl->ctrl.device,
"header digest error: recv %#x expected %#x\n",
le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
@@ -524,11 +528,11 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
dev_err(queue->ctrl->ctrl.device,
"queue %d: data digest flag is cleared\n",
nvme_tcp_queue_id(queue));
return -EPROTO;
}
- crypto_ahash_init(queue->rcv_hash);
+ nvme_tcp_ddgst_init(&queue->rcv_crc);
return 0;
}
static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
@@ -924,12 +928,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
/* we can read only from what is left in this bio */
recv_len = min_t(size_t, recv_len,
iov_iter_count(&req->iter));
if (queue->data_digest)
- ret = skb_copy_and_hash_datagram_iter(skb, *offset,
- &req->iter, recv_len, queue->rcv_hash);
+ ret = skb_copy_and_crc32c_datagram_iter(skb, *offset,
+ &req->iter, recv_len, &queue->rcv_crc);
else
ret = skb_copy_datagram_iter(skb, *offset,
&req->iter, recv_len);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
@@ -943,11 +947,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
queue->data_remaining -= recv_len;
}
if (!queue->data_remaining) {
if (queue->data_digest) {
- nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+ nvme_tcp_ddgst_final(&queue->rcv_crc,
+ &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
nvme_tcp_end_request(rq,
le16_to_cpu(req->status));
@@ -1145,11 +1150,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
ret = sock_sendmsg(queue->sock, &msg);
if (ret <= 0)
return ret;
if (queue->data_digest)
- nvme_tcp_ddgst_update(queue->snd_hash, page,
+ nvme_tcp_ddgst_update(&queue->snd_crc, page,
offset, ret);
/*
* update the request iterator except for the last payload send
* in the request where we don't want to modify it as we may
@@ -1159,11 +1164,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
nvme_tcp_advance_req(req, ret);
/* fully successful last send in current PDU */
if (last && ret == len) {
if (queue->data_digest) {
- nvme_tcp_ddgst_final(queue->snd_hash,
+ nvme_tcp_ddgst_final(&queue->snd_crc,
&req->ddgst);
req->state = NVME_TCP_SEND_DDGST;
req->offset = 0;
} else {
if (h2cdata_left)
@@ -1192,11 +1197,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
- nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ nvme_tcp_hdgst(pdu, sizeof(*pdu));
bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
@@ -1205,11 +1210,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
len -= ret;
if (!len) {
if (inline_data) {
req->state = NVME_TCP_SEND_DATA;
if (queue->data_digest)
- crypto_ahash_init(queue->snd_hash);
+ nvme_tcp_ddgst_init(&queue->snd_crc);
} else {
nvme_tcp_done_send_req(queue);
}
return 1;
}
@@ -1227,11 +1232,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) - req->offset + hdgst;
int ret;
if (queue->hdr_digest && !req->offset)
- nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ nvme_tcp_hdgst(pdu, sizeof(*pdu));
if (!req->h2cdata_left)
msg.msg_flags |= MSG_SPLICE_PAGES;
bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
@@ -1242,11 +1247,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
len -= ret;
if (!len) {
req->state = NVME_TCP_SEND_DATA;
if (queue->data_digest)
- crypto_ahash_init(queue->snd_hash);
+ nvme_tcp_ddgst_init(&queue->snd_crc);
return 1;
}
req->offset += ret;
return -EAGAIN;
@@ -1382,45 +1387,10 @@ static void nvme_tcp_io_work(struct work_struct *w)
} while (!time_after(jiffies, deadline)); /* quota is exhausted */
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
-static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
-
- ahash_request_free(queue->rcv_hash);
- ahash_request_free(queue->snd_hash);
- crypto_free_ahash(tfm);
-}
-
-static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
-{
- struct crypto_ahash *tfm;
-
- tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->snd_hash)
- goto free_tfm;
- ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
-
- queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->rcv_hash)
- goto free_snd_hash;
- ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
-
- return 0;
-free_snd_hash:
- ahash_request_free(queue->snd_hash);
-free_tfm:
- crypto_free_ahash(tfm);
- return -ENOMEM;
-}
-
static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
{
struct nvme_tcp_request *async = &ctrl->async_req;
page_frag_free(async->pdu);
@@ -1449,13 +1419,10 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
unsigned int noreclaim_flag;
if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
return;
- if (queue->hdr_digest || queue->data_digest)
- nvme_tcp_free_crypto(queue);
-
page_frag_cache_drain(&queue->pf_cache);
noreclaim_flag = memalloc_noreclaim_save();
/* ->sock will be released by fput() */
fput(queue->sock->file);
@@ -1865,25 +1832,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
}
}
queue->hdr_digest = nctrl->opts->hdr_digest;
queue->data_digest = nctrl->opts->data_digest;
- if (queue->hdr_digest || queue->data_digest) {
- ret = nvme_tcp_alloc_crypto(queue);
- if (ret) {
- dev_err(nctrl->device,
- "failed to allocate queue %d crypto\n", qid);
- goto err_sock;
- }
- }
rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
nvme_tcp_hdgst_len(queue);
queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
if (!queue->pdu) {
ret = -ENOMEM;
- goto err_crypto;
+ goto err_sock;
}
dev_dbg(nctrl->device, "connecting queue %d\n",
nvme_tcp_queue_id(queue));
@@ -1912,13 +1871,10 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
err_init_connect:
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
err_rcv_pdu:
kfree(queue->pdu);
-err_crypto:
- if (queue->hdr_digest || queue->data_digest)
- nvme_tcp_free_crypto(queue);
err_sock:
/* ->sock will be released by fput() */
fput(queue->sock->file);
queue->sock = NULL;
err_destroy_mutex:
--
2.49.0
> #include "nvme.h"
> #include "fabrics.h"
> @@ -166,12 +166,12 @@ struct nvme_tcp_queue {
> bool rd_enabled;
>
> bool hdr_digest;
> bool data_digest;
> bool tls_enabled;
> - struct ahash_request *rcv_hash;
> - struct ahash_request *snd_hash;
> + u32 rcv_crc;
> + u32 snd_crc;
Let's call it rcv_dgst (recv digest) and snd_dgst (send digest).
Other than that, looks good to me.
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
On Sat, May 17, 2025 at 12:58:35PM +0300, Sagi Grimberg wrote:
> > #include "nvme.h"
> > #include "fabrics.h"
> > @@ -166,12 +166,12 @@ struct nvme_tcp_queue {
> > bool rd_enabled;
> > bool hdr_digest;
> > bool data_digest;
> > bool tls_enabled;
> > - struct ahash_request *rcv_hash;
> > - struct ahash_request *snd_hash;
> > + u32 rcv_crc;
> > + u32 snd_crc;
> > __le32 exp_ddgst;
> > __le32 recv_ddgst;
>
> Let's call it rcv_dgst (recv digest) and snd_dgst (send digest).
> Other than that, looks good to me.
>
> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
rcv_dgst would be awfully close to recv_ddgst which holds the on-wire digest
that is received. I think I slightly prefer *_crc, since that helps
differentiate the in-progress values from the finalized values.
- Eric
>> Let's call it rcv_dgst (recv digest) and snd_dgst (send digest). >> Other than that, looks good to me. >> >> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> > rcv_dgst would be awfully close to recv_ddgst which holds the on-wire digest > that is received. I think I slightly prefer *_crc, since that helps > differentiate the in-progress values from the finalized values. yea, sounds good.
On Sat, May 10, 2025 at 05:41:09PM -0700, Eric Biggers wrote:
> +static inline void nvme_tcp_ddgst_init(u32 *crcp)
> {
> + *crcp = ~0;
> }
This helper looks really odd. It coud just return the value, or
we could just assign it there using a seed define, e.g. this is what
XFS does:
#define XFS_CRC_SEED (~(uint32_t)0)
nd that might in fact be worth lifting to common code with a good
comment on why all-d is used as the typical crc32 seed.
> +static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
> {
> + *ddgst = cpu_to_le32(~*crcp);
> +}
Just return the big endian version calculated here?
> +static inline void nvme_tcp_hdgst(void *pdu, size_t len)
> +{
> + put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
> }
This could also use the seed define mentioned above.
> recv_digest = *(__le32 *)(pdu + hdr->hlen);
> - nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
> + nvme_tcp_hdgst(pdu, pdu_len);
> exp_digest = *(__le32 *)(pdu + hdr->hlen);
> if (recv_digest != exp_digest) {
This code looks really weird, as it samples the on-the-wire digest
first and then overwrites it. I'd expect to just check the on-the-wire
on against one calculated in a local variable.
Sagi, any idea what is going on here?
Otherwise this looks great. It's always good to get rid of the
horrendous crypto API calls.
On Thu, May 15, 2025 at 09:36:00PM -0700, Christoph Hellwig wrote:
> On Sat, May 10, 2025 at 05:41:09PM -0700, Eric Biggers wrote:
> > +static inline void nvme_tcp_ddgst_init(u32 *crcp)
> > {
> > + *crcp = ~0;
> > }
>
> This helper looks really odd. It coud just return the value, or
> we could just assign it there using a seed define, e.g. this is what
> XFS does:
>
> #define XFS_CRC_SEED (~(uint32_t)0)
>
> nd that might in fact be worth lifting to common code with a good
> comment on why all-d is used as the typical crc32 seed.
Yes, there is CRC8_INIT_VALUE for crc8 but nothing for crc32. It might be worth
adding a CRC32_INIT_VALUE to <linux/crc32.h>. Though typically there's an
inversion at the end too, which can't be abstracted out the same way... And
many users initialize with 0 instead of ~0, even though ~0 is the "right" way.
> > +static inline void nvme_tcp_ddgst_final(u32 *crcp, __le32 *ddgst)
> > {
> > + *ddgst = cpu_to_le32(~*crcp);
> > +}
>
> Just return the big endian version calculated here?
>
> > +static inline void nvme_tcp_hdgst(void *pdu, size_t len)
> > +{
> > + put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len);
> > }
>
> This could also use the seed define mentioned above.
>
> > recv_digest = *(__le32 *)(pdu + hdr->hlen);
> > - nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
> > + nvme_tcp_hdgst(pdu, pdu_len);
> > exp_digest = *(__le32 *)(pdu + hdr->hlen);
> > if (recv_digest != exp_digest) {
>
> This code looks really weird, as it samples the on-the-wire digest
> first and then overwrites it. I'd expect to just check the on-the-wire
> on against one calculated in a local variable.
>
> Sagi, any idea what is going on here?
It was probably just written that way to reuse nvme_tcp_hdgst() which writes to
the hdgst field.
Anyway, I was trying to keep the translation to the new API as straightforward
as possible, but I'll take your suggestions. The following is what I ended up
with, but I'll resend the series formally after giving a bit more time.
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 4d64b6935bb91..7dca58f0a237b 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -82,13 +82,13 @@ config NVME_FC
config NVME_TCP
tristate "NVM Express over Fabrics TCP host driver"
depends on INET
depends on BLOCK
+ select CRC32
+ select NET_CRC32C
select NVME_FABRICS
- select CRYPTO
- select CRYPTO_CRC32C
help
This provides support for the NVMe over Fabrics protocol using
the TCP transport. This allows you to use remote block devices
exported using the NVMe protocol set.
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6b..0c139d844422b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -6,19 +6,19 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/crc32.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/blk-mq.h>
-#include <crypto/hash.h>
#include <net/busy_poll.h>
#include <trace/events/sock.h>
#include "nvme.h"
#include "fabrics.h"
@@ -166,12 +166,12 @@ struct nvme_tcp_queue {
bool rd_enabled;
bool hdr_digest;
bool data_digest;
bool tls_enabled;
- struct ahash_request *rcv_hash;
- struct ahash_request *snd_hash;
+ u32 rcv_crc;
+ u32 snd_crc;
__le32 exp_ddgst;
__le32 recv_ddgst;
struct completion tls_complete;
int tls_err;
struct page_frag_cache pf_cache;
@@ -454,36 +454,37 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
list_del(&req->entry);
return req;
}
-static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
- __le32 *dgst)
-{
- ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
- crypto_ahash_final(hash);
-}
+#define NVME_TCP_CRC_SEED (~0)
-static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
- struct page *page, off_t off, size_t len)
+static inline void nvme_tcp_ddgst_update(u32 *crcp,
+ struct page *page, size_t off, size_t len)
{
- struct scatterlist sg;
+ page += off / PAGE_SIZE;
+ off %= PAGE_SIZE;
+ while (len) {
+ const void *vaddr = kmap_local_page(page);
+ size_t n = min(len, (size_t)PAGE_SIZE - off);
- sg_init_table(&sg, 1);
- sg_set_page(&sg, page, len, off);
- ahash_request_set_crypt(hash, &sg, NULL, len);
- crypto_ahash_update(hash);
+ *crcp = crc32c(*crcp, vaddr + off, n);
+ kunmap_local(vaddr);
+ page++;
+ off = 0;
+ len -= n;
+ }
}
-static inline void nvme_tcp_hdgst(struct ahash_request *hash,
- void *pdu, size_t len)
+static inline __le32 nvme_tcp_ddgst_final(u32 crc)
{
- struct scatterlist sg;
+ return cpu_to_le32(~crc);
+}
- sg_init_one(&sg, pdu, len);
- ahash_request_set_crypt(hash, &sg, pdu + len, len);
- crypto_ahash_digest(hash);
+static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
+{
+ return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
}
static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
void *pdu, size_t pdu_len)
{
@@ -497,12 +498,11 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
nvme_tcp_queue_id(queue));
return -EPROTO;
}
recv_digest = *(__le32 *)(pdu + hdr->hlen);
- nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
- exp_digest = *(__le32 *)(pdu + hdr->hlen);
+ exp_digest = nvme_tcp_hdgst(pdu, pdu_len);
if (recv_digest != exp_digest) {
dev_err(queue->ctrl->ctrl.device,
"header digest error: recv %#x expected %#x\n",
le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
return -EIO;
@@ -524,11 +524,11 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
dev_err(queue->ctrl->ctrl.device,
"queue %d: data digest flag is cleared\n",
nvme_tcp_queue_id(queue));
return -EPROTO;
}
- crypto_ahash_init(queue->rcv_hash);
+ queue->rcv_crc = NVME_TCP_CRC_SEED;
return 0;
}
static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
@@ -924,12 +924,12 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
/* we can read only from what is left in this bio */
recv_len = min_t(size_t, recv_len,
iov_iter_count(&req->iter));
if (queue->data_digest)
- ret = skb_copy_and_hash_datagram_iter(skb, *offset,
- &req->iter, recv_len, queue->rcv_hash);
+ ret = skb_copy_and_crc32c_datagram_iter(skb, *offset,
+ &req->iter, recv_len, &queue->rcv_crc);
else
ret = skb_copy_datagram_iter(skb, *offset,
&req->iter, recv_len);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
@@ -943,11 +943,11 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
queue->data_remaining -= recv_len;
}
if (!queue->data_remaining) {
if (queue->data_digest) {
- nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+ queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
nvme_tcp_end_request(rq,
le16_to_cpu(req->status));
@@ -1145,11 +1145,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
ret = sock_sendmsg(queue->sock, &msg);
if (ret <= 0)
return ret;
if (queue->data_digest)
- nvme_tcp_ddgst_update(queue->snd_hash, page,
+ nvme_tcp_ddgst_update(&queue->snd_crc, page,
offset, ret);
/*
* update the request iterator except for the last payload send
* in the request where we don't want to modify it as we may
@@ -1159,12 +1159,12 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
nvme_tcp_advance_req(req, ret);
/* fully successful last send in current PDU */
if (last && ret == len) {
if (queue->data_digest) {
- nvme_tcp_ddgst_final(queue->snd_hash,
- &req->ddgst);
+ req->ddgst =
+ nvme_tcp_ddgst_final(queue->snd_crc);
req->state = NVME_TCP_SEND_DDGST;
req->offset = 0;
} else {
if (h2cdata_left)
nvme_tcp_setup_h2c_data_pdu(req);
@@ -1192,11 +1192,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
- nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ *(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu));
bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
@@ -1205,11 +1205,11 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
len -= ret;
if (!len) {
if (inline_data) {
req->state = NVME_TCP_SEND_DATA;
if (queue->data_digest)
- crypto_ahash_init(queue->snd_hash);
+ queue->snd_crc = NVME_TCP_CRC_SEED;
} else {
nvme_tcp_done_send_req(queue);
}
return 1;
}
@@ -1227,11 +1227,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) - req->offset + hdgst;
int ret;
if (queue->hdr_digest && !req->offset)
- nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ *(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu));
if (!req->h2cdata_left)
msg.msg_flags |= MSG_SPLICE_PAGES;
bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
@@ -1242,11 +1242,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
len -= ret;
if (!len) {
req->state = NVME_TCP_SEND_DATA;
if (queue->data_digest)
- crypto_ahash_init(queue->snd_hash);
+ queue->snd_crc = NVME_TCP_CRC_SEED;
return 1;
}
req->offset += ret;
return -EAGAIN;
@@ -1382,45 +1382,10 @@ static void nvme_tcp_io_work(struct work_struct *w)
} while (!time_after(jiffies, deadline)); /* quota is exhausted */
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
-static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
-
- ahash_request_free(queue->rcv_hash);
- ahash_request_free(queue->snd_hash);
- crypto_free_ahash(tfm);
-}
-
-static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
-{
- struct crypto_ahash *tfm;
-
- tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->snd_hash)
- goto free_tfm;
- ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
-
- queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!queue->rcv_hash)
- goto free_snd_hash;
- ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
-
- return 0;
-free_snd_hash:
- ahash_request_free(queue->snd_hash);
-free_tfm:
- crypto_free_ahash(tfm);
- return -ENOMEM;
-}
-
static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
{
struct nvme_tcp_request *async = &ctrl->async_req;
page_frag_free(async->pdu);
@@ -1449,13 +1414,10 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
unsigned int noreclaim_flag;
if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
return;
- if (queue->hdr_digest || queue->data_digest)
- nvme_tcp_free_crypto(queue);
-
page_frag_cache_drain(&queue->pf_cache);
noreclaim_flag = memalloc_noreclaim_save();
/* ->sock will be released by fput() */
fput(queue->sock->file);
@@ -1865,25 +1827,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
}
}
queue->hdr_digest = nctrl->opts->hdr_digest;
queue->data_digest = nctrl->opts->data_digest;
- if (queue->hdr_digest || queue->data_digest) {
- ret = nvme_tcp_alloc_crypto(queue);
- if (ret) {
- dev_err(nctrl->device,
- "failed to allocate queue %d crypto\n", qid);
- goto err_sock;
- }
- }
rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
nvme_tcp_hdgst_len(queue);
queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
if (!queue->pdu) {
ret = -ENOMEM;
- goto err_crypto;
+ goto err_sock;
}
dev_dbg(nctrl->device, "connecting queue %d\n",
nvme_tcp_queue_id(queue));
@@ -1912,13 +1866,10 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
err_init_connect:
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
err_rcv_pdu:
kfree(queue->pdu);
-err_crypto:
- if (queue->hdr_digest || queue->data_digest)
- nvme_tcp_free_crypto(queue);
err_sock:
/* ->sock will be released by fput() */
fput(queue->sock->file);
queue->sock = NULL;
err_destroy_mutex:
> if (queue->hdr_digest && !req->offset) > - nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); > + *(__le32 *)(pdu + 1) = nvme_tcp_hdgst(pdu, sizeof(*pdu)); I'd move this assignment to a helper nvme_tcp_set_hdgst(), especially as it has two call-sites. Other than that, Reviewed-by: Sagi Grimberg <sagi@grimberg.me
On Thu, May 15, 2025 at 10:31:00PM -0700, Eric Biggers wrote:
> +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
> +{
> + return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
> }
This drops the unaligned handling. Now in the NVMe protocol it will
always be properly aligned, but my TCP-foo is not good enough to
remember if the networking code will also guarantee 32-bit alignment
for the start of the packet?
Otherwise this looks great:
Reviewed-by: Christoph Hellwig <hch@lst.de>
On Thu, May 15, 2025 at 11:06:56PM -0700, Christoph Hellwig wrote:
> On Thu, May 15, 2025 at 10:31:00PM -0700, Eric Biggers wrote:
> > +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
> > +{
> > + return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
> > }
>
> This drops the unaligned handling. Now in the NVMe protocol it will
> always be properly aligned, but my TCP-foo is not good enough to
> remember if the networking code will also guarantee 32-bit alignment
> for the start of the packet?
>
> Otherwise this looks great:
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
The nvme-tcp driver already assumes that the header is at least 4-byte aligned,
considering that it accesses hdr->plen and the struct doesn't use __packed:
struct nvme_tcp_hdr {
__u8 type;
__u8 flags;
__u8 hlen;
__u8 pdo;
__le32 plen;
};
On the send size, the header size is always sizeof(struct nvme_tcp_cmd_pdu) or
sizeof(struct nvme_tcp_data_pdu) which are multiples of 4. On the receive side,
nvme-tcp validates hdr->hlen == sizeof(struct nvme_tcp_rsp_pdu) and then does:
recv_digest = *(__le32 *)(pdu + hdr->hlen);
So, using put_unaligned_le32() is unnecessary. I just had it there in v1
because I had directly translated crypto_ahash_digest(), which does ultimately
do a put_unaligned_le32() once you unravel all the API layers.
- Eric
© 2016 - 2026 Red Hat, Inc.