[PATCH 05/12] iomap: Introduce IOMAP_ENCODED

Goldwyn Rodrigues posted 12 patches 1 month, 3 weeks ago
[PATCH 05/12] iomap: Introduce IOMAP_ENCODED
Posted by Goldwyn Rodrigues 1 month, 3 weeks ago
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

An encoded extent must be read completely. Make the bio just as a
regular bio and let filesystem deal with the rest of the extent.
A new bio must be created if a new iomap is returned.
The filesystem must be informed that the bio to be read is
encoded and the offset from which the encoded extent starts. So, pass
the iomap associated with the bio while calling submit_io. Save the
previous iomap (associated with the bio being submitted) in prev in
order to submit when the iomap changes.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/iomap/buffered-io.c | 17 ++++++++++-------
 include/linux/iomap.h  | 11 +++++++++--
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0e682ff84e4a..4c734899a8e5 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -378,12 +378,13 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
 {
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
 
-	return srcmap->type != IOMAP_MAPPED ||
+	return (srcmap->type != IOMAP_MAPPED &&
+			srcmap->type != IOMAP_ENCODED) ||
 		(srcmap->flags & IOMAP_F_NEW) ||
 		pos >= i_size_read(iter->inode);
 }
 
-static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
+static loff_t iomap_readpage_iter(struct iomap_iter *iter,
 		struct iomap_readpage_ctx *ctx, loff_t offset)
 {
 	const struct iomap *iomap = &iter->iomap;
@@ -419,6 +420,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 
 	sector = iomap_sector(iomap, pos);
 	if (!ctx->bio ||
+	    (iomap->type & IOMAP_ENCODED && iomap->offset != iter->prev.offset) ||
 	    bio_end_sector(ctx->bio) != sector ||
 	    !bio_add_folio(ctx->bio, folio, plen, poff)) {
 		struct bio_set *bioset;
@@ -428,10 +430,11 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 
 		if (ctx->bio) {
 			if (ctx->ops && ctx->ops->submit_io)
-				ctx->ops->submit_io(iter->inode, ctx->bio);
+				ctx->ops->submit_io(iter->inode, ctx->bio, &iter->prev);
 			else
 				submit_bio(ctx->bio);
 		}
+		iter->prev = iter->iomap;
 
 		if (ctx->rac) /* same as readahead_gfp_mask */
 			gfp |= __GFP_NORETRY | __GFP_NOWARN;
@@ -470,7 +473,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 	return pos - orig_pos + plen;
 }
 
-static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
+static loff_t iomap_read_folio_iter(struct iomap_iter *iter,
 		struct iomap_readpage_ctx *ctx)
 {
 	struct folio *folio = ctx->cur_folio;
@@ -509,7 +512,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
 
 	if (ctx.bio) {
 		if (ctx.ops->submit_io)
-			ctx.ops->submit_io(iter.inode, ctx.bio);
+			ctx.ops->submit_io(iter.inode, ctx.bio, &iter.prev);
 		else
 			submit_bio(ctx.bio);
 		WARN_ON_ONCE(!ctx.cur_folio_in_bio);
@@ -527,7 +530,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
 }
 EXPORT_SYMBOL_GPL(iomap_read_folio);
 
-static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
+static loff_t iomap_readahead_iter(struct iomap_iter *iter,
 		struct iomap_readpage_ctx *ctx)
 {
 	loff_t length = iomap_length(iter);
@@ -588,7 +591,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops,
 
 	if (ctx.bio) {
 		if (ctx.ops && ctx.ops->submit_io)
-			ctx.ops->submit_io(iter.inode, ctx.bio);
+			ctx.ops->submit_io(iter.inode, ctx.bio, &iter.prev);
 		else
 			submit_bio(ctx.bio);
 	}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 7b757bea8455..a5cf00a01f23 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -30,6 +30,7 @@ struct vm_fault;
 #define IOMAP_MAPPED	2	/* blocks allocated at @addr */
 #define IOMAP_UNWRITTEN	3	/* blocks allocated at @addr in unwritten state */
 #define IOMAP_INLINE	4	/* data inline in the inode */
+#define IOMAP_ENCODED	5	/* data encoded, R/W whole extent */
 
 /*
  * Flags reported by the file system from iomap_begin:
@@ -107,6 +108,8 @@ struct iomap {
 
 static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
 {
+	if (iomap->type & IOMAP_ENCODED)
+		return iomap->addr;
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
@@ -217,9 +220,13 @@ struct iomap_iter {
 	loff_t pos;
 	u64 len;
 	s64 processed;
+	unsigned type;
 	unsigned flags;
 	struct iomap iomap;
-	struct iomap srcmap;
+	union {
+		struct iomap srcmap;
+		struct iomap prev;
+	};
 	void *private;
 };
 
@@ -261,7 +268,7 @@ struct iomap_read_folio_ops {
 	 * Optional, allows the filesystem to perform a custom submission of
 	 * bio, such as csum calculations or multi-device bio split
 	 */
-	void (*submit_io)(struct inode *inode, struct bio *bio);
+	void (*submit_io)(struct inode *inode, struct bio *bio, const struct iomap *iomap);
 
 	/*
 	 * Optional, allows filesystem to specify own bio_set, so new bio's
-- 
2.46.1
Re: [PATCH 05/12] iomap: Introduce IOMAP_ENCODED
Posted by Christoph Hellwig 1 month, 2 weeks ago
On Fri, Oct 04, 2024 at 04:04:32PM -0400, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> An encoded extent must be read completely. Make the bio just as a
> regular bio and let filesystem deal with the rest of the extent.
> A new bio must be created if a new iomap is returned.
> The filesystem must be informed that the bio to be read is
> encoded and the offset from which the encoded extent starts. So, pass
> the iomap associated with the bio while calling submit_io. Save the
> previous iomap (associated with the bio being submitted) in prev in
> order to submit when the iomap changes.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/iomap/buffered-io.c | 17 ++++++++++-------
>  include/linux/iomap.h  | 11 +++++++++--
>  2 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 0e682ff84e4a..4c734899a8e5 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -378,12 +378,13 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
>  {
>  	const struct iomap *srcmap = iomap_iter_srcmap(iter);
>  
> -	return srcmap->type != IOMAP_MAPPED ||
> +	return (srcmap->type != IOMAP_MAPPED &&
> +			srcmap->type != IOMAP_ENCODED) ||
>  		(srcmap->flags & IOMAP_F_NEW) ||
>  		pos >= i_size_read(iter->inode);

Non-standard indentation for the new line.  But at this point the
condition is becoming complex enough that it is best split into
multiple if statements anyway.


>  	sector = iomap_sector(iomap, pos);
>  	if (!ctx->bio ||
> +	    (iomap->type & IOMAP_ENCODED && iomap->offset != iter->prev.offset) ||

Overly long line.  And this could really use a comment as well.

> -static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> +static loff_t iomap_readahead_iter(struct iomap_iter *iter,
>  		struct iomap_readpage_ctx *ctx)

Why is the iter de-constified?

In general I'm not a huge fan of the encoded magic here, but I'll
need to take a closer look at the caller if I can come up with
something better.
Re: [PATCH 05/12] iomap: Introduce IOMAP_ENCODED
Posted by Christoph Hellwig 1 month, 2 weeks ago
On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote:
> In general I'm not a huge fan of the encoded magic here, but I'll
> need to take a closer look at the caller if I can come up with
> something better.

I looked a bit more at the code.  I'm not entirely sure I fully
understand it yet, but:

I think most of the read side special casing would be handled by
always submitting the bio at the end of an iomap.  Ritesh was
looking into that for supporting ext2-like file systems that
read indirect block ondemand, but I think it actually is fundamentally
the right thing to do anyway.

For the write we plan to add a new IOMAP_BOUNDARY flag to prevent
merges as part of the XFS RT group series, and I think this should
also solve your problems with keeping one bio per iomap?  The
current patch is here:

https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=djwong-wtf&id=91b5d7a52dab63732aee451bba0db315ae9bd09b
Re: [PATCH 05/12] iomap: Introduce IOMAP_ENCODED
Posted by Ritesh Harjani (IBM) 1 month, 1 week ago
Christoph Hellwig <hch@infradead.org> writes:

> On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote:
>> In general I'm not a huge fan of the encoded magic here, but I'll
>> need to take a closer look at the caller if I can come up with
>> something better.
>
> I looked a bit more at the code.  I'm not entirely sure I fully
> understand it yet, but:
>
> I think most of the read side special casing would be handled by
> always submitting the bio at the end of an iomap.  Ritesh was
> looking into that for supporting ext2-like file systems that
> read indirect block ondemand, but I think it actually is fundamentally
> the right thing to do anyway.

yes, it was... 
    This patch optimizes the data access patterns for filesystems with
    indirect block mapping by implementing BH_Boundary handling within
    iomap.

    Currently the bios for reads within iomap are only submitted at
    2 places -
    1. If we cannot merge the new req. with previous bio, only then we
    submit the previous bio.
    2. Submit the bio at the end of the entire read processing.

    This means for filesystems with indirect block mapping, we call into
    ->iomap_begin() again w/o submitting the previous bios. That causes
    unoptimized data access patterns for blocks which are of BH_Boundary type.

Here is the change which describes this [1]. The implementation part
needed to be change to reduce the complexity. Willy gave a better
implementation alternative here [2].  

[1]: https://lore.kernel.org/all/4e2752e99f55469c4eb5f2fe83e816d529110192.1714046808.git.ritesh.list@gmail.com/
[2]: https://lore.kernel.org/all/Zivu0gzb4aiazSNu@casper.infradead.org/

Sorry once I am done with the other priority work on my plate - I can
resume this work. Meanwhile I would be happy to help if someone would
like to work on this.

-ritesh
Re: [PATCH 05/12] iomap: Introduce IOMAP_ENCODED
Posted by Goldwyn Rodrigues 1 month, 2 weeks ago
On  2:42 10/10, Christoph Hellwig wrote:
> On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote:
> > In general I'm not a huge fan of the encoded magic here, but I'll
> > need to take a closer look at the caller if I can come up with
> > something better.
> 
> I looked a bit more at the code.  I'm not entirely sure I fully
> understand it yet, but:
> 
> I think most of the read side special casing would be handled by
> always submitting the bio at the end of an iomap.  Ritesh was
> looking into that for supporting ext2-like file systems that
> read indirect block ondemand, but I think it actually is fundamentally
> the right thing to do anyway.
> 
> For the write we plan to add a new IOMAP_BOUNDARY flag to prevent
> merges as part of the XFS RT group series, and I think this should
> also solve your problems with keeping one bio per iomap?  The
> current patch is here:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=djwong-wtf&id=91b5d7a52dab63732aee451bba0db315ae9bd09b
> 

Yes, this is helpful but it will solve only one of the three
requirements.

The first, compressed and uncompressed extents are to be dealth with
differently because of the additional step of uncompressing the bio
corresponding to the extent. So, iomap needs to inform btrfs that the
bio submitted (through newly created function submit_io()) is compressed
and needs to be read and decompressed.

The second, btrfs sets the bi_sector to the start of the extent map
and the assignment cannot go through iomap_sector(). Compressed extents
being smaller than regular one, iomap_sector would most of the times
overrun beyond the compressed extent. So, in this patchset, I am setting
this in ->submit_io()


-- 
Goldwyn