From nobody Sun Feb 8 05:20:20 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CB09A257427; Mon, 20 Oct 2025 17:00:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1760979632; cv=none; b=etgn9Eosi78E2Gdxqu2wcmmtVvc1b5kjEnh2wDIsJISNqEqVCFYgFWal65gL7VIS9K6rkoi78xdOng/Am3a5LSe0V4yzTsfhyDX8zd6dJdPSiV2iw6fu2ZyeiDTxXU5veriiz54WuUfP9+ZEACO1w2xPRGi5o7uMq+RNxtCTDRQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1760979632; c=relaxed/simple; bh=GSMtbQxIziDBartMOI3UoueF4pGkRJRkbLHLAUdPIT0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=A3xuDO60DnMiZV1mWOjaFUmm2eyoH6YlhaCrcmzFQDfOjE6hZNB/0B0KQsaLLkK33Rh1hnjRlHT/dd1y6UdRJAZklqlI1e+uyl4mVjZiZRwmEBk6iJRdE2Punpsjq8Ge0fKG/dTB37nxvY8yzhzKWsacTBbAKWo8cIO6q/HSMZE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=oDJTxk26; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="oDJTxk26" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DF041C4CEFE; Mon, 20 Oct 2025 17:00:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1760979632; bh=GSMtbQxIziDBartMOI3UoueF4pGkRJRkbLHLAUdPIT0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oDJTxk26YjTNViQxMXa/zV1CGWeeSQVTEbQDTM8NOcw8eI1xLFsYDQ1iier8bEdRW 8yHyDhepMuRyhFhOkNNsmU1QFQrmu+6zUZmqeYMc/uf1IHSTEK5vzah4cjh42iHj3T oBUaMNpnoC4Oc3MmgDpDpxogrfayvYH9ER89u85/o4NFHNvkFYkoUttJz9dx9Bqo10 BsimoVjtuyOxSm1YLY79ef1hcCtfthQ55hYcbRykZ+m0O6j6PbgZ80LouxUJDEKiIg IEbSwIDT5HeUQqnvxFBppayK7PO81sa2UxqqrD+FZnW3o7qtrBCiljXRModdkg9xLA vP9QJFp5Aq7Aw== From: Leon Romanovsky To: Jens Axboe , Keith Busch , Christoph Hellwig , Sagi Grimberg Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org Subject: [PATCH v2 1/2] nvme-pci: migrate to dma_map_phys instead of map_page Date: Mon, 20 Oct 2025 20:00:20 +0300 Message-ID: <20251020-block-with-mmio-v2-1-147e9f93d8d4@nvidia.com> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251020-block-with-mmio-v2-0-147e9f93d8d4@nvidia.com> References: <20251020-block-with-mmio-v2-0-147e9f93d8d4@nvidia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" X-Mailer: b4 0.15-dev Content-Transfer-Encoding: quoted-printable From: Leon Romanovsky After introduction of dma_map_phys(), there is no need to convert from physical address to struct page in order to map page. So let's use it directly. Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Reviewed-by: Chaitanya Kulkarni --- block/blk-mq-dma.c | 4 ++-- drivers/nvme/host/pci.c | 27 +++++++++++++++------------ include/linux/blk-mq-dma.h | 1 + 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index 449950029872..4ba7b0323da4 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -93,8 +93,8 @@ static bool blk_dma_map_bus(struct blk_dma_iter *iter, st= ruct phys_vec *vec) static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, struct blk_dma_iter *iter, struct phys_vec *vec) { - iter->addr =3D dma_map_page(dma_dev, phys_to_page(vec->paddr), - offset_in_page(vec->paddr), vec->len, rq_dma_dir(req)); + iter->addr =3D dma_map_phys(dma_dev, vec->paddr, vec->len, + rq_dma_dir(req), 0); if (dma_mapping_error(dma_dev, iter->addr)) { iter->status =3D BLK_STS_RESOURCE; return false; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c916176bd9f0..91a8965754f0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -685,20 +685,20 @@ static void nvme_free_descriptors(struct request *req) } } =20 -static void nvme_free_prps(struct request *req) +static void nvme_free_prps(struct request *req, unsigned int attrs) { struct nvme_iod *iod =3D blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq =3D req->mq_hctx->driver_data; unsigned int i; =20 for (i =3D 0; i < iod->nr_dma_vecs; i++) - dma_unmap_page(nvmeq->dev->dev, iod->dma_vecs[i].addr, - iod->dma_vecs[i].len, rq_dma_dir(req)); + dma_unmap_phys(nvmeq->dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req), attrs); mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool); } =20 static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, - struct nvme_sgl_desc *sg_list) + struct nvme_sgl_desc *sg_list, unsigned int attrs) { struct nvme_queue *nvmeq =3D req->mq_hctx->driver_data; enum dma_data_direction dir =3D rq_dma_dir(req); @@ -707,13 +707,14 @@ static void nvme_free_sgls(struct request *req, struc= t nvme_sgl_desc *sge, unsigned int i; =20 if (sge->type =3D=3D (NVME_SGL_FMT_DATA_DESC << 4)) { - dma_unmap_page(dma_dev, le64_to_cpu(sge->addr), len, dir); + dma_unmap_phys(dma_dev, le64_to_cpu(sge->addr), len, dir, + attrs); return; } =20 for (i =3D 0; i < len / sizeof(*sg_list); i++) - dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr), - le32_to_cpu(sg_list[i].length), dir); + dma_unmap_phys(dma_dev, le64_to_cpu(sg_list[i].addr), + le32_to_cpu(sg_list[i].length), dir, attrs); } =20 static void nvme_unmap_metadata(struct request *req) @@ -723,6 +724,7 @@ static void nvme_unmap_metadata(struct request *req) struct nvme_iod *iod =3D blk_mq_rq_to_pdu(req); struct device *dma_dev =3D nvmeq->dev->dev; struct nvme_sgl_desc *sge =3D iod->meta_descriptor; + unsigned int attrs =3D 0; =20 if (iod->flags & IOD_SINGLE_META_SEGMENT) { dma_unmap_page(dma_dev, iod->meta_dma, @@ -734,10 +736,10 @@ static void nvme_unmap_metadata(struct request *req) if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state, iod->meta_total_len)) { if (nvme_pci_cmd_use_meta_sgl(&iod->cmd)) - nvme_free_sgls(req, sge, &sge[1]); + nvme_free_sgls(req, sge, &sge[1], attrs); else - dma_unmap_page(dma_dev, iod->meta_dma, - iod->meta_total_len, dir); + dma_unmap_phys(dma_dev, iod->meta_dma, + iod->meta_total_len, dir, attrs); } =20 if (iod->meta_descriptor) @@ -750,6 +752,7 @@ static void nvme_unmap_data(struct request *req) struct nvme_iod *iod =3D blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq =3D req->mq_hctx->driver_data; struct device *dma_dev =3D nvmeq->dev->dev; + unsigned int attrs =3D 0; =20 if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) =3D=3D @@ -762,9 +765,9 @@ static void nvme_unmap_data(struct request *req) if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) nvme_free_sgls(req, iod->descriptors[0], - &iod->cmd.common.dptr.sgl); + &iod->cmd.common.dptr.sgl, attrs); else - nvme_free_prps(req); + nvme_free_prps(req, attrs); } =20 if (iod->nr_descriptors) diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 51829958d872..faf4dd574c62 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -16,6 +16,7 @@ struct blk_dma_iter { /* Output address range for this iteration */ dma_addr_t addr; u32 len; + unsigned int attrs; =20 /* Status code. Only valid when blk_rq_dma_map_iter_* returned false */ blk_status_t status; --=20 2.51.0 From nobody Sun Feb 8 05:20:20 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2BBE4330329; Mon, 20 Oct 2025 17:00:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1760979637; cv=none; b=bPL0jBhDVYeDEuAdAWMd2MboyZrN5lXVfHPg8RFGJO/UTBRo5SO/EVt5ejmMX8gHcdgDlfEu4LvFqBQeCevZBKF96Ng+IR2Mc1yI3wpIHvr8Tp7+aMsthAM7PD2aOM9F/GikrzelqyrChzZzH3B1zUm8rLwLfb5Ep2d/z0W5BMc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1760979637; c=relaxed/simple; bh=BU3E31v1tzbfSNTfzttdVX/zcrCKLPuxR+y2TXX47C0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=fKAEvp4YumU7E671b9dfOivytTNsoHC+rtE05lP5mIJIhpwTvP9hmKoz3PNvPYSB6Mu/pdbRN+9pF5ZFYKIXJVtHaYwQLVtpN3Zu9MjBQsLRhIKIGMb47alcPl8UWnfcu+qfg4coCw9+KJDoL11mxe3+yB//WUmGkYyRBxxWsJc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=rH/0U3I5; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rH/0U3I5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EFE23C116B1; Mon, 20 Oct 2025 17:00:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1760979636; bh=BU3E31v1tzbfSNTfzttdVX/zcrCKLPuxR+y2TXX47C0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rH/0U3I59eKLObUfB/6bgVypw316c+hwH4CMBh1hcv8VJ8H6yogYx5PcmfmSGgOgi Ltmkcm4AGogUL8Kn7lN+qoMnAVV0khCe16lfAyj5VtLIhg4AF7kgjkMYdf0In+xJtN FjzaaPT6UP8pKUF3gzFYRQwRdsbnoIKIOVucWRWlZ8FiuJWAEsNsYkjGNFzLb98fwo 5gDKG7hf2vv3TlrJMZ7RAGgZ/35Z6E95MtxHBKoNfepTCMVRJnZGeMbWXRTDKLHg81 5VeMx3ZQdd9GoBV3X9ALrMqC8AR6X7Vpkk6XjJ1Ud19vG3uQbkDG2gcD841PpayDNz iaTdr6vseKAww== From: Leon Romanovsky To: Jens Axboe , Keith Busch , Christoph Hellwig , Sagi Grimberg Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org Subject: [PATCH v2 2/2] block-dma: properly take MMIO path Date: Mon, 20 Oct 2025 20:00:21 +0300 Message-ID: <20251020-block-with-mmio-v2-2-147e9f93d8d4@nvidia.com> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251020-block-with-mmio-v2-0-147e9f93d8d4@nvidia.com> References: <20251020-block-with-mmio-v2-0-147e9f93d8d4@nvidia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" X-Mailer: b4 0.15-dev Content-Transfer-Encoding: quoted-printable From: Leon Romanovsky In commit eadaa8b255f3 ("dma-mapping: introduce new DMA attribute to indicate MMIO memory"), DMA_ATTR_MMIO attribute was added to describe MMIO addresses, which require to avoid any memory cache flushing, as an outcome of the discussion pointed in Link tag below. In case of PCI_P2PDMA_MAP_THRU_HOST_BRIDGE transfer, blk-mq-dm logic treated this as regular page and relied on "struct page" DMA flow. That flow performs CPU cache flushing, which shouldn't be done here, and doesn't set IOMMU_MMIO flag in DMA-IOMMU case. Link: https://lore.kernel.org/all/f912c446-1ae9-4390-9c11-00dce7bf0fd3@arm.= com/ Signed-off-by: Leon Romanovsky Reviewed-by: Chaitanya Kulkarni --- block/blk-mq-dma.c | 6 ++++-- drivers/nvme/host/pci.c | 23 +++++++++++++++++++++-- include/linux/blk-integrity.h | 7 ++++--- include/linux/blk-mq-dma.h | 11 +++++++---- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index 4ba7b0323da4..3ede8022b41c 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -94,7 +94,7 @@ static bool blk_dma_map_direct(struct request *req, struc= t device *dma_dev, struct blk_dma_iter *iter, struct phys_vec *vec) { iter->addr =3D dma_map_phys(dma_dev, vec->paddr, vec->len, - rq_dma_dir(req), 0); + rq_dma_dir(req), iter->attrs); if (dma_mapping_error(dma_dev, iter->addr)) { iter->status =3D BLK_STS_RESOURCE; return false; @@ -116,7 +116,7 @@ static bool blk_rq_dma_map_iova(struct request *req, st= ruct device *dma_dev, =20 do { error =3D dma_iova_link(dma_dev, state, vec->paddr, mapped, - vec->len, dir, 0); + vec->len, dir, iter->attrs); if (error) break; mapped +=3D vec->len; @@ -184,6 +184,8 @@ static bool blk_dma_map_iter_start(struct request *req,= struct device *dma_dev, * P2P transfers through the host bridge are treated the * same as non-P2P transfers below and during unmap. */ + iter->attrs |=3D DMA_ATTR_MMIO; + fallthrough; case PCI_P2PDMA_MAP_NONE: break; default: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 91a8965754f0..f45d1968611d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -260,6 +260,12 @@ enum nvme_iod_flags { /* single segment dma mapping */ IOD_SINGLE_SEGMENT =3D 1U << 2, =20 + /* Data payload contains MMIO memory */ + IOD_DATA_MMIO =3D 1U << 3, + + /* Metadata contains MMIO memory */ + IOD_META_MMIO =3D 1U << 4, + /* Metadata using non-coalesced MPTR */ IOD_SINGLE_META_SEGMENT =3D 1U << 5, }; @@ -733,8 +739,11 @@ static void nvme_unmap_metadata(struct request *req) return; } =20 + if (iod->flags & IOD_META_MMIO) + attrs |=3D DMA_ATTR_MMIO; + if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state, - iod->meta_total_len)) { + iod->meta_total_len, attrs)) { if (nvme_pci_cmd_use_meta_sgl(&iod->cmd)) nvme_free_sgls(req, sge, &sge[1], attrs); else @@ -762,7 +771,11 @@ static void nvme_unmap_data(struct request *req) return; } =20 - if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { + if (iod->flags & IOD_DATA_MMIO) + attrs |=3D DMA_ATTR_MMIO; + + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, + attrs)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) nvme_free_sgls(req, iod->descriptors[0], &iod->cmd.common.dptr.sgl, attrs); @@ -1038,6 +1051,9 @@ static blk_status_t nvme_map_data(struct request *req) if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) return iter.status; =20 + if (iter.attrs & DMA_ATTR_MMIO) + iod->flags |=3D IOD_DATA_MMIO; + if (use_sgl =3D=3D SGL_FORCED || (use_sgl =3D=3D SGL_SUPPORTED && (sgl_threshold && nvme_pci_avg_seg_size(req) >=3D sgl_threshold))) @@ -1060,6 +1076,9 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct r= equest *req) &iod->meta_dma_state, &iter)) return iter.status; =20 + if (iter.attrs & DMA_ATTR_MMIO) + iod->flags |=3D IOD_META_MMIO; + if (blk_rq_dma_map_coalesce(&iod->meta_dma_state)) entries =3D 1; =20 diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index b659373788f6..aa42172f5cc9 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -30,10 +30,11 @@ int blk_rq_map_integrity_sg(struct request *, struct sc= atterlist *); =20 static inline bool blk_rq_integrity_dma_unmap(struct request *req, struct device *dma_dev, struct dma_iova_state *state, - size_t mapped_len) + size_t mapped_len, unsigned int attrs) { return blk_dma_unmap(req, dma_dev, state, mapped_len, - bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA); + bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA, + attrs); } =20 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); @@ -126,7 +127,7 @@ static inline int blk_rq_map_integrity_sg(struct reques= t *q, } static inline bool blk_rq_integrity_dma_unmap(struct request *req, struct device *dma_dev, struct dma_iova_state *state, - size_t mapped_len) + size_t mapped_len, unsigned int attrs) { return false; } diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index faf4dd574c62..aab4d04e6c69 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -50,19 +50,21 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_i= ova_state *state) * @state: DMA IOVA state * @mapped_len: number of bytes to unmap * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR + * @attrs: DMA attributes * * Returns %false if the callers need to manually unmap every DMA segment * mapped using @iter or %true if no work is left to be done. */ static inline bool blk_dma_unmap(struct request *req, struct device *dma_d= ev, - struct dma_iova_state *state, size_t mapped_len, bool is_p2p) + struct dma_iova_state *state, size_t mapped_len, bool is_p2p, + unsigned int attrs) { if (is_p2p) return true; =20 if (dma_use_iova(state)) { dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req), - 0); + attrs); return true; } =20 @@ -70,10 +72,11 @@ static inline bool blk_dma_unmap(struct request *req, s= truct device *dma_dev, } =20 static inline bool blk_rq_dma_unmap(struct request *req, struct device *dm= a_dev, - struct dma_iova_state *state, size_t mapped_len) + struct dma_iova_state *state, size_t mapped_len, + unsigned int attrs) { return blk_dma_unmap(req, dma_dev, state, mapped_len, - req->cmd_flags & REQ_P2PDMA); + req->cmd_flags & REQ_P2PDMA, attrs); } =20 #endif /* BLK_MQ_DMA_H */ --=20 2.51.0