From nobody Thu Feb 12 00:25:23 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2E337CA550A for ; Wed, 13 Sep 2023 08:39:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239082AbjIMIjT (ORCPT ); Wed, 13 Sep 2023 04:39:19 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:60564 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239015AbjIMIjK (ORCPT ); Wed, 13 Sep 2023 04:39:10 -0400 Received: from m12.mail.163.com (m12.mail.163.com [220.181.12.198]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 6067019AD for ; Wed, 13 Sep 2023 01:39:01 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=163.com; s=s110527; h=From:Subject:Date:Message-Id:MIME-Version; bh=EOqlq SAkn0k3xbfI47MZ3fOAdq93w0S0QRWAAFzrDC4=; b=K6t/DXXC4Dtt0OrhKO75c eE8F5UsbDcgDZMdkezi+yZw4G7sm9iJSgjptRX+takJBZ59PxiYNMd6tvFbFJw/U aa/DaMealt6ebaKBrfvlHWm2ITqEFyNMtr+J9Qqd5vbOfATl2RPDfu0+m291ObA1 A5DY3iAlcGM+8tqJ5iAtVY= Received: from localhost.localdomain (unknown [223.166.237.2]) by zwqz-smtp-mta-g1-1 (Coremail) with SMTP id _____wAnln7QdAFli8i+Bw--.56942S5; Wed, 13 Sep 2023 16:37:57 +0800 (CST) From: Ping Gan To: kbusch@kernel.org, axboe@kernel.dk, hch@lst.de, sagi@grimberg.me, kch@nvidia.com, linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org Cc: ping_gan@dell.com, jacky_gam_2001@163.com Subject: [PATCH 3/4] nvmet: support bio polling queue request Date: Wed, 13 Sep 2023 16:36:43 +0800 Message-Id: X-Mailer: git-send-email 2.26.2 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-CM-TRANSID: _____wAnln7QdAFli8i+Bw--.56942S5 X-Coremail-Antispam: 1Uf129KBjvJXoW3Xr4UJFWxGFyxKF1UtryUAwb_yoWfXr4kpF y3JFWktrZ7GrsY9a13Jry7Aay3Ka48Aa4DJr4xWrn3Gr4ft3s3WF1UKFyFvFyfKr95uFZr Gwn0yFWxuw45W3DanT9S1TB71UUUUUUqnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDUYxBIdaVFxhVjvjDU0xZFpf9x0zR1mhrUUUUU= X-Originating-IP: [223.166.237.2] X-CM-SenderInfo: 5mdfy55bjdzsisqqiqqrwthudrp/1tbiWAfpKWNftmHX9QAAsW Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" If enabling bio polling queue task, we will split and chain the bios if needed, then fill the request to the lossless ring of polling queue task. Signed-off-by: Ping Gan --- drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++++++++++++++++++++---- 1 file changed, 214 insertions(+), 29 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd= -bdev.c index 468833675cc9..6f7d04ae6cb7 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -184,6 +184,16 @@ static void nvmet_bio_done(struct bio *bio) nvmet_req_bio_put(req, bio); } =20 +static void nvmet_pqt_bio_done(struct bio *bio) +{ + struct nvmet_pqt_bio_req *req_done =3D bio->bi_private; + + nvmet_req_complete(req_done->req, blk_to_nvme_status(req_done->req, + bio->bi_status)); + nvmet_req_bio_put(req_done->req, bio); + req_done->io_completed =3D 1; +} + #ifdef CONFIG_BLK_DEV_INTEGRITY static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, struct sg_mapping_iter *miter) @@ -237,6 +247,38 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req,= struct bio *bio, } #endif /* CONFIG_BLK_DEV_INTEGRITY */ =20 +#ifdef CONFIG_NVME_MULTIPATH +extern struct block_device *nvme_mpath_get_bdev(struct block_device *bdev); +extern const struct block_device_operations nvme_ns_head_ops; +#endif + +static inline int nvmet_chain_par_bio(struct nvmet_req *req, struct bio **= bio, + struct sg_mapping_iter *prot_miter, struct block_device *bdev, + sector_t sector, struct bio_list *blist) +{ + struct bio *parent, *child; + unsigned int vec_cnt; + int rc; + + parent =3D *bio; + vec_cnt =3D queue_max_segments(bdev->bd_disk->queue); + if (req->metadata_len) { + rc =3D nvmet_bdev_alloc_bip(req, parent, + prot_miter); + if (unlikely(rc)) + return rc; + } + child =3D bio_alloc(bdev, vec_cnt, parent->bi_opf, GFP_KERNEL); + child->bi_iter.bi_sector =3D sector; + *bio =3D child; + bio_chain(*bio, parent); + parent->bi_opf |=3D REQ_POLLED; + parent->bi_opf |=3D REQ_NOWAIT; + parent->bi_opf |=3D REQ_NOMERGE; + bio_list_add(blist, parent); + return 0; +} + static void nvmet_bdev_execute_rw(struct nvmet_req *req) { unsigned int sg_cnt =3D req->sg_cnt; @@ -247,8 +289,13 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *re= q) blk_opf_t opf; int i, rc; struct sg_mapping_iter prot_miter; - unsigned int iter_flags; + unsigned int iter_flags, max_sectors; + unsigned short vec_cnt, max_segments; unsigned int total_len =3D nvmet_rw_data_len(req) + req->metadata_len; + bool pqt_enabled =3D nvmet_pqt_enabled(); + unsigned int sg_len; + struct nvmet_pqt_bio_req *req_done =3D NULL; + struct block_device *bdev =3D req->ns->bdev; =20 if (!nvmet_check_transfer_len(req, total_len)) return; @@ -268,6 +315,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *re= q) iter_flags =3D SG_MITER_FROM_SG; } =20 +#ifdef CONFIG_NVME_MULTIPATH + if (pqt_enabled && bdev->bd_disk->fops =3D=3D &nvme_ns_head_ops) { + bdev =3D nvme_mpath_get_bdev(bdev); + if (!bdev) { + nvmet_req_complete(req, 0); + return; + } + opf |=3D REQ_DRV; + } +#endif + if (pqt_enabled) { + req_done =3D kmalloc(sizeof(struct nvmet_pqt_bio_req), GFP_KERNEL); + if (!req_done) { + nvmet_req_complete(req, 0); + return; + } + } + if (is_pci_p2pdma_page(sg_page(req->sg))) opf |=3D REQ_NOMERGE; =20 @@ -278,54 +343,174 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *= req) bio_init(bio, req->ns->bdev, req->inline_bvec, ARRAY_SIZE(req->inline_bvec), opf); } else { - bio =3D bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf, + vec_cnt =3D bio_max_segs(sg_cnt); + if (pqt_enabled) + vec_cnt =3D queue_max_segments(bdev->bd_disk->queue); + bio =3D bio_alloc(bdev, vec_cnt, opf, GFP_KERNEL); } bio->bi_iter.bi_sector =3D sector; - bio->bi_private =3D req; - bio->bi_end_io =3D nvmet_bio_done; + if (!pqt_enabled) { + bio->bi_private =3D req; + bio->bi_end_io =3D nvmet_bio_done; + } else { + req_done->req =3D req; + bio->bi_private =3D req_done; + bio->bi_end_io =3D nvmet_pqt_bio_done; + } =20 - blk_start_plug(&plug); + if (!pqt_enabled) + blk_start_plug(&plug); if (req->metadata_len) sg_miter_start(&prot_miter, req->metadata_sg, req->metadata_sg_cnt, iter_flags); =20 - for_each_sg(req->sg, sg, req->sg_cnt, i) { - while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) - !=3D sg->length) { - struct bio *prev =3D bio; - - if (req->metadata_len) { - rc =3D nvmet_bdev_alloc_bip(req, bio, - &prot_miter); - if (unlikely(rc)) { - bio_io_error(bio); - return; + if (!pqt_enabled) { + for_each_sg(req->sg, sg, req->sg_cnt, i) { + while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) + !=3D sg->length) { + struct bio *prev =3D bio; + + if (req->metadata_len) { + rc =3D nvmet_bdev_alloc_bip(req, bio, + &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } } - } =20 - bio =3D bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), - opf, GFP_KERNEL); - bio->bi_iter.bi_sector =3D sector; + bio =3D bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), + opf, GFP_KERNEL); + bio->bi_iter.bi_sector =3D sector; =20 - bio_chain(bio, prev); - submit_bio(prev); - } + bio_chain(bio, prev); + submit_bio(prev); + } =20 - sector +=3D sg->length >> 9; - sg_cnt--; + sector +=3D sg->length >> 9; + sg_cnt--; + } + } else { + bio_list_init(&req_done->blist); + if (!test_bit(QUEUE_FLAG_POLL, &bdev->bd_disk->queue->queue_flags)) + goto err_bio; + max_sectors =3D bdev->bd_disk->queue->limits.max_sectors; + max_sectors <<=3D 9; + max_segments =3D queue_max_segments(bdev->bd_disk->queue); + sg_len =3D 0; + unsigned int offset, len, vec_len, i; + bool sg_start_pg =3D true, need_chain_bio =3D false; + struct page *sglist_page, *max_sector_align; + sector_t temp_sector; + + /* + * for bio's polling mode we will split bio to + * avoid low level's bio splitting when submit. + */ + for_each_sg(req->sg, sg, req->sg_cnt, i) { + temp_sector =3D sector; + offset =3D (sg->offset % PAGE_SIZE); + if (offset + sg->length > PAGE_SIZE) { // need to split + len =3D sg->length; + i =3D 0; + sglist_page =3D virt_to_page(page_to_virt(sg_page(sg)) + offset); + if (offset !=3D 0) + sg_start_pg =3D false; + while (len > PAGE_SIZE) { + max_sector_align =3D virt_to_page(page_to_virt(sglist_page) + + (PAGE_SIZE*i)); + vec_len =3D sg_start_pg?PAGE_SIZE:(PAGE_SIZE - offset); + if (bio->bi_vcnt =3D=3D max_segments - 1 || + sg_len + vec_len > max_sectors) + need_chain_bio =3D true; + else { + __bio_add_page(bio, max_sector_align, + vec_len, sg_start_pg?0:offset); + temp_sector +=3D vec_len >> 9; + sg_len +=3D vec_len; + } + if (need_chain_bio) { + rc =3D nvmet_chain_par_bio(req, &bio, &prot_miter, + bdev, temp_sector, &req_done->blist); + if (unlikely(rc)) + goto err_bio; + __bio_add_page(bio, max_sector_align, vec_len, + sg_start_pg?0:(PAGE_SIZE - offset)); + temp_sector +=3D vec_len >> 9; + sg_len =3D vec_len; + need_chain_bio =3D false; + } + if (!sg_start_pg) { + len -=3D (PAGE_SIZE - offset); + sg_start_pg =3D true; + } else { + len -=3D PAGE_SIZE; + } + i++; + } + if (len > 0) { + max_sector_align =3D virt_to_page(page_to_virt(sglist_page) + + (i * PAGE_SIZE)); + if (bio->bi_vcnt =3D=3D max_segments - 1 || + sg_len + len > max_sectors) { + rc =3D nvmet_chain_par_bio(req, &bio, &prot_miter, + bdev, temp_sector, &req_done->blist); + if (unlikely(rc)) + goto err_bio; + sg_len =3D len; + } else { + sg_len +=3D len; + } + __bio_add_page(bio, max_sector_align, len, 0); + temp_sector +=3D len >> 9; + } + } else { + if (bio->bi_vcnt =3D=3D max_segments - 1 || + sg_len + sg->length > max_sectors) { + rc =3D nvmet_chain_par_bio(req, &bio, &prot_miter, + bdev, temp_sector, &req_done->blist); + if (unlikely(rc)) + goto err_bio; + sg_len =3D sg->length; + } else { + sg_len +=3D sg->length; + } + __bio_add_page(bio, sg_page(sg), sg->length, sg->offset); + } + sector +=3D sg->length >> 9; + sg_cnt--; + } } =20 if (req->metadata_len) { rc =3D nvmet_bdev_alloc_bip(req, bio, &prot_miter); if (unlikely(rc)) { - bio_io_error(bio); - return; + goto err_bio; } } =20 - submit_bio(bio); - blk_finish_plug(&plug); + if (pqt_enabled) { + bio->bi_opf |=3D REQ_POLLED; + bio->bi_opf |=3D REQ_NOWAIT; + bio->bi_opf |=3D REQ_NOMERGE; + bio_list_add(&req_done->blist, bio); + req_done->io_completed =3D 0; + rc =3D nvmet_pqt_ring_enqueue(req_done); + if (rc < 0) + goto err_bio; + nvmet_wakeup_pq_thread(); + } else { + submit_bio(bio); + } + if (!pqt_enabled) + blk_finish_plug(&plug); + return; +err_bio: + bio_io_error(bio); + if (pqt_enabled) + kfree(req_done); + return; } =20 static void nvmet_bdev_execute_flush(struct nvmet_req *req) --=20 2.26.2