From nobody Tue Apr 7 10:40:52 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 77FCAECAAD2 for ; Mon, 29 Aug 2022 07:54:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229493AbiH2HyL (ORCPT ); Mon, 29 Aug 2022 03:54:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51290 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229455AbiH2HyJ (ORCPT ); Mon, 29 Aug 2022 03:54:09 -0400 Received: from mail-wr1-f47.google.com (mail-wr1-f47.google.com [209.85.221.47]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BD6ECDFA0 for ; Mon, 29 Aug 2022 00:54:07 -0700 (PDT) Received: by mail-wr1-f47.google.com with SMTP id e20so9057587wri.13 for ; Mon, 29 Aug 2022 00:54:07 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc; bh=ShZ9RCmn3f9kz90o0jDhL8jf0hYJJDCa3dy49UOnJ0c=; b=0E0i9sMOls8j/7Iv8RxP16qNhZMQjQMwJdUArpo5c9TwjCuav4YhBBArh0LbqQb812 N+i4wF071mveN7rDaGrUgzPY2yPCklLf0pWzqpZ50nlx8aD/KaGXS00UK2SpT0nMY1Gm Qhq4MC0731msEl6lx+dYx0Ufx9lg9PvuQzDPvd8Ih/TvuVa1wEgvkAgcYJUTpgruelNL lHN7tEqpZcOEuIIpshNeTEqoAk1YG6ZbF11frk+mUUi5lHPTK65EJa4TZqGP8mVfH5xs uYr3DvoDY5taaMSciRotGBZGTYRc0fjmGa+cijbM4go4Q/gVpyY9eRa7Op3QWBMlScB9 D+wA== X-Gm-Message-State: ACgBeo3F/RHxIp+pbDton+rjvWRA2duotu9kz4cZvHgo6691Qb/aAIss 4pVoKMDNm6NdszIWehzH3mo= X-Google-Smtp-Source: AA6agR6qbJAN0rlN50REUaB4wr68h/NDPjgtgEotNaa5UHtqbsabyDKvM4slVWLQXm5k2iqCD7RrPA== X-Received: by 2002:a05:6000:887:b0:21e:24a0:f302 with SMTP id ca7-20020a056000088700b0021e24a0f302mr5364239wrb.466.1661759646285; Mon, 29 Aug 2022 00:54:06 -0700 (PDT) Received: from localhost.localdomain (bzq-219-42-90.isdn.bezeqint.net. [62.219.42.90]) by smtp.gmail.com with ESMTPSA id k35-20020a05600c1ca300b003a5f3de6fddsm8675544wms.25.2022.08.29.00.54.03 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 29 Aug 2022 00:54:04 -0700 (PDT) From: Sagi Grimberg To: linux-nvme@lists.infradead.org Cc: linux-kernel@vger.kernel.org, "Fabio M . De Francesco" , Christoph Hellwig , Keith Busch , Chaitanya Kulkarni , James Smart , Ira Weiny , Venkataramanan Anirudh Subject: [PATCH] nvmet-tcp: Don't map pages which can't come from HIGHMEM Date: Mon, 29 Aug 2022 10:54:01 +0300 Message-Id: <20220829075401.868981-1-sagi@grimberg.me> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: "Fabio M. De Francesco" kmap() is being deprecated in favor of kmap_local_page().[1] There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap=E2=80=99s pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. The pages which will be mapped are allocated in nvmet_tcp_map_data(), using the GFP_KERNEL flag. This assures that they cannot come from HIGHMEM. This imply that a straight page_address() can replace the kmap() of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because, after removing the kmap() calls, there would be no longer any need of it. In addition, there is no reason to use a kvec for the command receive data buffers iovec, use a bio_vec instead and let iov_iter handle the buffer mapping and data copy. Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. [1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.c= om/ Cc: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Keith Busch Suggested-by: Ira Weiny Signed-off-by: Fabio M. De Francesco [sagi: added bio_vec plus minor naming changes] Signed-off-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 44 ++++++++++++--------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dc3b4dc8fe08..43594e0d609c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -77,9 +77,8 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; - int nr_mapped; struct msghdr recv_msg; - struct kvec *iov; + struct bio_vec *iov; u32 flags; =20 struct list_head entry; @@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd); =20 static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) @@ -301,35 +299,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_que= ue *queue, void *pdu) =20 static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { - WARN_ON(unlikely(cmd->nr_mapped > 0)); - kfree(cmd->iov); sgl_free(cmd->req.sg); cmd->iov =3D NULL; cmd->req.sg =3D NULL; } =20 -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) -{ - struct scatterlist *sg; - int i; - - sg =3D &cmd->req.sg[cmd->sg_idx]; - - for (i =3D 0; i < cmd->nr_mapped; i++) - kunmap(sg_page(&sg[i])); - - cmd->nr_mapped =3D 0; -} - -static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { - struct kvec *iov =3D cmd->iov; + struct bio_vec *iov =3D cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; + int nr_pages; =20 length =3D cmd->pdu_len; - cmd->nr_mapped =3D DIV_ROUND_UP(length, PAGE_SIZE); + nr_pages =3D DIV_ROUND_UP(length, PAGE_SIZE); offset =3D cmd->rbytes_done; cmd->sg_idx =3D offset / PAGE_SIZE; sg_offset =3D offset % PAGE_SIZE; @@ -338,8 +322,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cm= d *cmd) while (length) { u32 iov_len =3D min_t(u32, length, sg->length - sg_offset); =20 - iov->iov_base =3D kmap(sg_page(sg)) + sg->offset + sg_offset; - iov->iov_len =3D iov_len; + iov->bv_page =3D sg_page(sg); + iov->bv_len =3D sg->length; + iov->bv_offset =3D sg->offset + sg_offset; =20 length -=3D iov_len; sg =3D sg_next(sg); @@ -347,8 +332,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cm= d *cmd) sg_offset =3D 0; } =20 - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, - cmd->nr_mapped, cmd->pdu_len); + iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, + nr_pages, cmd->pdu_len); } =20 static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -926,7 +911,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_t= cp_queue *queue, } =20 queue->rcv_state =3D NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |=3D NVMET_TCP_F_INIT_FAILED; } =20 @@ -952,7 +937,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_t= cp_queue *queue) =20 cmd->pdu_len =3D le32_to_cpu(data->data_length); cmd->pdu_recv =3D 0; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); queue->cmd =3D cmd; queue->rcv_state =3D NVMET_TCP_RECV_DATA; =20 @@ -1021,7 +1006,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_q= ueue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state =3D NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(queue->cmd); + nvmet_tcp_build_pdu_iovec(queue->cmd); return 0; } /* send back R2T */ @@ -1141,7 +1126,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_q= ueue *queue) cmd->rbytes_done +=3D ret; } =20 - nvmet_tcp_unmap_pdu_iovec(cmd); if (queue->data_digest) { nvmet_tcp_prep_recv_ddgst(cmd); return 0; @@ -1411,7 +1395,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct= nvmet_tcp_queue *queue) static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) { nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } =20 @@ -1424,7 +1407,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvme= t_tcp_queue *queue) if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); =20 - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } =20 --=20 2.34.1