[PATCH v10 1/8] iov_iter: Define flags to qualify page extraction.

David Howells posted 8 patches 2 years, 7 months ago
There is a newer version of this series
[PATCH v10 1/8] iov_iter: Define flags to qualify page extraction.
Posted by David Howells 2 years, 7 months ago
Define flags to qualify page extraction to pass into iov_iter_*_pages*()
rather than passing in FOLL_* flags.

For now only a flag to allow peer-to-peer DMA is supported.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: linux-fsdevel@vger.kernel.org
cc: linux-block@vger.kernel.org
---

Notes:
    ver #9)
     - Change extract_flags to extraction_flags.
    
    ver #7)
     - Don't use FOLL_* as a parameter, but rather define constants
       specifically to use with iov_iter_*_pages*().
     - Drop the I/O direction constants for now.

 block/bio.c         |  6 +++---
 block/blk-map.c     |  8 ++++----
 include/linux/uio.h |  7 +++++--
 lib/iov_iter.c      | 14 ++++++++------
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index ab59a491a883..683444e6b711 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1249,7 +1249,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
-	unsigned int gup_flags = 0;
+	unsigned int extraction_flags = 0;
 	ssize_t size, left;
 	unsigned len, i = 0;
 	size_t offset, trim;
@@ -1264,7 +1264,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
 	if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
-		gup_flags |= FOLL_PCI_P2PDMA;
+		extraction_flags |= ITER_ALLOW_P2PDMA;
 
 	/*
 	 * Each segment in the iov is required to be a block size multiple.
@@ -1275,7 +1275,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	 */
 	size = iov_iter_get_pages(iter, pages,
 				  UINT_MAX - bio->bi_iter.bi_size,
-				  nr_pages, &offset, gup_flags);
+				  nr_pages, &offset, extraction_flags);
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/blk-map.c b/block/blk-map.c
index 19940c978c73..7db52ad5b2d0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -267,7 +267,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 {
 	unsigned int max_sectors = queue_max_hw_sectors(rq->q);
 	unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
-	unsigned int gup_flags = 0;
+	unsigned int extraction_flags = 0;
 	struct bio *bio;
 	int ret;
 	int j;
@@ -280,7 +280,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		return -ENOMEM;
 
 	if (blk_queue_pci_p2pdma(rq->q))
-		gup_flags |= FOLL_PCI_P2PDMA;
+		extraction_flags |= ITER_ALLOW_P2PDMA;
 
 	while (iov_iter_count(iter)) {
 		struct page **pages, *stack_pages[UIO_FASTIOV];
@@ -291,10 +291,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		if (nr_vecs <= ARRAY_SIZE(stack_pages)) {
 			pages = stack_pages;
 			bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
-						   nr_vecs, &offs, gup_flags);
+						   nr_vecs, &offs, extraction_flags);
 		} else {
 			bytes = iov_iter_get_pages_alloc(iter, &pages,
-						LONG_MAX, &offs, gup_flags);
+						LONG_MAX, &offs, extraction_flags);
 		}
 		if (unlikely(bytes <= 0)) {
 			ret = bytes ? bytes : -EFAULT;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 9f158238edba..58fda77f6847 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -252,12 +252,12 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *
 		     loff_t start, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 		size_t maxsize, unsigned maxpages, size_t *start,
-		unsigned gup_flags);
+		unsigned extraction_flags);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		struct page ***pages, size_t maxsize, size_t *start,
-		unsigned gup_flags);
+		unsigned extraction_flags);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
@@ -360,4 +360,7 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
 	};
 }
 
+/* Flags for iov_iter_get/extract_pages*() */
+#define ITER_ALLOW_P2PDMA	0x01	/* Allow P2PDMA on the extracted pages */
+
 #endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f9a3ff37ecd1..da7db39075c6 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1432,9 +1432,9 @@ static struct page *first_bvec_segment(const struct iov_iter *i,
 static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   unsigned int maxpages, size_t *start,
-		   unsigned int gup_flags)
+		   unsigned int extraction_flags)
 {
-	unsigned int n;
+	unsigned int n, gup_flags = 0;
 
 	if (maxsize > i->count)
 		maxsize = i->count;
@@ -1442,6 +1442,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		return 0;
 	if (maxsize > MAX_RW_COUNT)
 		maxsize = MAX_RW_COUNT;
+	if (extraction_flags & ITER_ALLOW_P2PDMA)
+		gup_flags |= FOLL_PCI_P2PDMA;
 
 	if (likely(user_backed_iter(i))) {
 		unsigned long addr;
@@ -1495,14 +1497,14 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 
 ssize_t iov_iter_get_pages(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
-		   size_t *start, unsigned gup_flags)
+		   size_t *start, unsigned extraction_flags)
 {
 	if (!maxpages)
 		return 0;
 	BUG_ON(!pages);
 
 	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages,
-					  start, gup_flags);
+					  start, extraction_flags);
 }
 EXPORT_SYMBOL_GPL(iov_iter_get_pages);
 
@@ -1515,14 +1517,14 @@ EXPORT_SYMBOL(iov_iter_get_pages2);
 
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
-		   size_t *start, unsigned gup_flags)
+		   size_t *start, unsigned extraction_flags)
 {
 	ssize_t len;
 
 	*pages = NULL;
 
 	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start,
-					 gup_flags);
+					 extraction_flags);
 	if (len <= 0) {
 		kvfree(*pages);
 		*pages = NULL;
Re: [PATCH v10 1/8] iov_iter: Define flags to qualify page extraction.
Posted by David Hildenbrand 2 years, 7 months ago
On 25.01.23 22:06, David Howells wrote:
> Define flags to qualify page extraction to pass into iov_iter_*_pages*()
> rather than passing in FOLL_* flags.
> 
> For now only a flag to allow peer-to-peer DMA is supported.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: John Hubbard <jhubbard@nvidia.com>
> cc: Al Viro <viro@zeniv.linux.org.uk>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Logan Gunthorpe <logang@deltatee.com>
> cc: linux-fsdevel@vger.kernel.org
> cc: linux-block@vger.kernel.org
> ---
> 
[...]

> +++ b/include/linux/uio.h
> @@ -252,12 +252,12 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *
>   		     loff_t start, size_t count);
>   ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
>   		size_t maxsize, unsigned maxpages, size_t *start,
> -		unsigned gup_flags);
> +		unsigned extraction_flags);
>   ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
>   			size_t maxsize, unsigned maxpages, size_t *start);
>   ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
>   		struct page ***pages, size_t maxsize, size_t *start,
> -		unsigned gup_flags);
> +		unsigned extraction_flags);
>   ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
>   			size_t maxsize, size_t *start);
>   int iov_iter_npages(const struct iov_iter *i, int maxpages);
> @@ -360,4 +360,7 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
>   	};
>   }
>   
> +/* Flags for iov_iter_get/extract_pages*() */
> +#define ITER_ALLOW_P2PDMA	0x01	/* Allow P2PDMA on the extracted pages */

Just a note that the usage of new __bitwise types instead of "unsigned" 
is encouraged for flags.

See rmap_t in include/linux/rmap.h as an example.

Apart from that LGTM.

-- 
Thanks,

David / dhildenb
Re: [PATCH v10 1/8] iov_iter: Define flags to qualify page extraction.
Posted by David Howells 2 years, 7 months ago
David Hildenbrand <david@redhat.com> wrote:

> Just a note that the usage of new __bitwise types instead of "unsigned" is
> encouraged for flags.
> 
> See rmap_t in include/linux/rmap.h as an example.

Hmmm...  rmap_t really ought to be unsigned int.

David
Re: [PATCH v10 1/8] iov_iter: Define flags to qualify page extraction.
Posted by David Hildenbrand 2 years, 7 months ago
On 26.01.23 10:49, David Howells wrote:
> David Hildenbrand <david@redhat.com> wrote:
> 
>> Just a note that the usage of new __bitwise types instead of "unsigned" is
>> encouraged for flags.
>>
>> See rmap_t in include/linux/rmap.h as an example.
> 
> Hmmm...  rmap_t really ought to be unsigned int.

Not sure if that particularly matters here in practice, ... anyhow

$ git grep "typedef int" | grep __bitwise | wc -l
27
$ git grep "typedef unsigned" | grep __bitwise | wc -l
23

So chose what you prefer ;)


-- 
Thanks,

David / dhildenb
[PATCH] iov_iter: Use __bitwise with the extraction_flags
Posted by David Howells 2 years, 7 months ago
X-Mailer: MH-E 8.6+git; nmh 1.7.1; GNU Emacs 28.2
--------
David Hildenbrand <david@redhat.com> wrote:

> >> Just a note that the usage of new __bitwise types instead of "unsigned" is
> >> encouraged for flags.

Something like the attached?

> $ git grep "typedef int" | grep __bitwise | wc -l
> 27
> $ git grep "typedef unsigned" | grep __bitwise | wc -l
> 23

git grep __bitwise | grep typedef | grep __u | wc -l
62

*shrug*

Interestingly, things like __be32 are __bitwise.  I wonder if that actually
makes sense or if it was just convenient so stop people doing arithmetic on
them.  I guess doing AND/OR/XOR on them isn't a problem provided both
arguments are appropriately byte-swapped.

David
---
 block/bio.c         |    2 +-
 block/blk-map.c     |    2 +-
 include/linux/uio.h |   13 ++++++++-----
 lib/iov_iter.c      |   14 +++++++-------
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 466956779d2c..fc57f0aa098e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1244,11 +1244,11 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
  */
 static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
+	iov_iter_extraction_t extraction_flags = 0;
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
-	unsigned int extraction_flags = 0;
 	ssize_t size, left;
 	unsigned len, i = 0;
 	size_t offset, trim;
diff --git a/block/blk-map.c b/block/blk-map.c
index 9c7ccea3f334..0f1593e144da 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -265,9 +265,9 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		gfp_t gfp_mask)
 {
+	iov_iter_extraction_t extraction_flags = 0;
 	unsigned int max_sectors = queue_max_hw_sectors(rq->q);
 	unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
-	unsigned int extraction_flags = 0;
 	struct bio *bio;
 	int ret;
 	int j;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 47ebb59a0202..b1be128bb2fa 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -13,6 +13,8 @@
 struct page;
 struct pipe_inode_info;
 
+typedef unsigned int iov_iter_extraction_t;
+
 struct kvec {
 	void *iov_base; /* and that should *never* hold a userland pointer */
 	size_t iov_len;
@@ -252,12 +254,12 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *
 		     loff_t start, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 		size_t maxsize, unsigned maxpages, size_t *start,
-		unsigned extraction_flags);
+		iov_iter_extraction_t extraction_flags);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		struct page ***pages, size_t maxsize, size_t *start,
-		unsigned extraction_flags);
+		iov_iter_extraction_t extraction_flags);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
@@ -359,13 +361,14 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
 		.count = count
 	};
 }
-
 /* Flags for iov_iter_get/extract_pages*() */
-#define ITER_ALLOW_P2PDMA	0x01	/* Allow P2PDMA on the extracted pages */
+/* Allow P2PDMA on the extracted pages */
+#define ITER_ALLOW_P2PDMA	((__force iov_iter_extraction_t)0x01)
 
 ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages,
 			       size_t maxsize, unsigned int maxpages,
-			       unsigned int extraction_flags, size_t *offset0);
+			       iov_iter_extraction_t extraction_flags,
+			       size_t *offset0);
 
 /**
  * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index cea503b2ec30..d70496019b1d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1432,7 +1432,7 @@ static struct page *first_bvec_segment(const struct iov_iter *i,
 static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   unsigned int maxpages, size_t *start,
-		   unsigned int extraction_flags)
+		   iov_iter_extraction_t extraction_flags)
 {
 	unsigned int n, gup_flags = 0;
 
@@ -1929,7 +1929,7 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
 					   unsigned int maxpages,
-					   unsigned int extraction_flags,
+					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
 	unsigned int nr, offset, chunk, j;
@@ -1971,7 +1971,7 @@ static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i,
 static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 					     struct page ***pages, size_t maxsize,
 					     unsigned int maxpages,
-					     unsigned int extraction_flags,
+					     iov_iter_extraction_t extraction_flags,
 					     size_t *offset0)
 {
 	struct page *page, **p;
@@ -2017,7 +2017,7 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
 					   unsigned int maxpages,
-					   unsigned int extraction_flags,
+					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
 	struct page **p, *page;
@@ -2060,7 +2060,7 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
 					   unsigned int maxpages,
-					   unsigned int extraction_flags,
+					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
 	struct page **p, *page;
@@ -2125,7 +2125,7 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
 					   struct page ***pages,
 					   size_t maxsize,
 					   unsigned int maxpages,
-					   unsigned int extraction_flags,
+					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
 	unsigned long addr;
@@ -2207,7 +2207,7 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
 			       struct page ***pages,
 			       size_t maxsize,
 			       unsigned int maxpages,
-			       unsigned int extraction_flags,
+			       iov_iter_extraction_t extraction_flags,
 			       size_t *offset0)
 {
 	maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT);
Re: [PATCH] iov_iter: Use __bitwise with the extraction_flags
Posted by Al Viro 2 years, 7 months ago
On Thu, Jan 26, 2023 at 10:33:50AM +0000, David Howells wrote:
> X-Mailer: MH-E 8.6+git; nmh 1.7.1; GNU Emacs 28.2
> --------
> David Hildenbrand <david@redhat.com> wrote:
> 
> > >> Just a note that the usage of new __bitwise types instead of "unsigned" is
> > >> encouraged for flags.
> 
> Something like the attached?
> 
> > $ git grep "typedef int" | grep __bitwise | wc -l
> > 27
> > $ git grep "typedef unsigned" | grep __bitwise | wc -l
> > 23
> 
> git grep __bitwise | grep typedef | grep __u | wc -l
> 62
> 
> *shrug*
> 
> Interestingly, things like __be32 are __bitwise.  I wonder if that actually
> makes sense or if it was just convenient so stop people doing arithmetic on
> them.  I guess doing AND/OR/XOR on them isn't a problem provided both
> arguments are appropriately byte-swapped.

Forget the words "byte-swapped".  There are several data types.
With different memory representations.  Bitwise operations are
valid between the values of the same type and yield the result
of that same type.

The fact that mapping between those representations happens to
be an involution is an accident; keeping track of the number of
times you've done a byteswap to the value currently in this
variable is asking for trouble.  It's really easy to fuck up.

"Am I trying to store the value of type X in variable of type Y
(presumably having forgotten that I need to use X_to_Y(...)
to convert)" is much easier to keep track of.
Re: [PATCH] iov_iter: Use __bitwise with the extraction_flags
Posted by David Hildenbrand 2 years, 7 months ago
On 26.01.23 11:33, David Howells wrote:

> Interestingly, things like __be32 are __bitwise.  I wonder if that actually
> makes sense or if it was just convenient so stop people doing arithmetic on
> them.  I guess doing AND/OR/XOR on them isn't a problem provided both
> arguments are appropriately byte-swapped.

I recall that __be32 and friends were one of the early users of 
__bitwise in the kernel. And the reason IIRC was exactly that: detect 
when no proper conversion was performed using static code analysis 
(Sparse). While some operations might make sense, the abuse is much more 
likely.


LGTM, thanks!

-- 
Thanks,

David / dhildenb