From: Goldwyn Rodrigues <rgoldwyn@suse.com>
An encoded extent must be read completely. Make the bio just as a
regular bio and let filesystem deal with the rest of the extent.
A new bio must be created if a new iomap is returned.
The filesystem must be informed that the bio to be read is
encoded and the offset from which the encoded extent starts. So, pass
the iomap associated with the bio while calling submit_io. Save the
previous iomap (associated with the bio being submitted) in prev in
order to submit when the iomap changes.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/iomap/buffered-io.c | 17 ++++++++++-------
include/linux/iomap.h | 11 +++++++++--
2 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0e682ff84e4a..4c734899a8e5 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -378,12 +378,13 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- return srcmap->type != IOMAP_MAPPED ||
+ return (srcmap->type != IOMAP_MAPPED &&
+ srcmap->type != IOMAP_ENCODED) ||
(srcmap->flags & IOMAP_F_NEW) ||
pos >= i_size_read(iter->inode);
}
-static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
+static loff_t iomap_readpage_iter(struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx, loff_t offset)
{
const struct iomap *iomap = &iter->iomap;
@@ -419,6 +420,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
sector = iomap_sector(iomap, pos);
if (!ctx->bio ||
+ (iomap->type & IOMAP_ENCODED && iomap->offset != iter->prev.offset) ||
bio_end_sector(ctx->bio) != sector ||
!bio_add_folio(ctx->bio, folio, plen, poff)) {
struct bio_set *bioset;
@@ -428,10 +430,11 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
if (ctx->bio) {
if (ctx->ops && ctx->ops->submit_io)
- ctx->ops->submit_io(iter->inode, ctx->bio);
+ ctx->ops->submit_io(iter->inode, ctx->bio, &iter->prev);
else
submit_bio(ctx->bio);
}
+ iter->prev = iter->iomap;
if (ctx->rac) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
@@ -470,7 +473,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
return pos - orig_pos + plen;
}
-static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
+static loff_t iomap_read_folio_iter(struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx)
{
struct folio *folio = ctx->cur_folio;
@@ -509,7 +512,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
if (ctx.bio) {
if (ctx.ops->submit_io)
- ctx.ops->submit_io(iter.inode, ctx.bio);
+ ctx.ops->submit_io(iter.inode, ctx.bio, &iter.prev);
else
submit_bio(ctx.bio);
WARN_ON_ONCE(!ctx.cur_folio_in_bio);
@@ -527,7 +530,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
}
EXPORT_SYMBOL_GPL(iomap_read_folio);
-static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
+static loff_t iomap_readahead_iter(struct iomap_iter *iter,
struct iomap_readpage_ctx *ctx)
{
loff_t length = iomap_length(iter);
@@ -588,7 +591,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops,
if (ctx.bio) {
if (ctx.ops && ctx.ops->submit_io)
- ctx.ops->submit_io(iter.inode, ctx.bio);
+ ctx.ops->submit_io(iter.inode, ctx.bio, &iter.prev);
else
submit_bio(ctx.bio);
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 7b757bea8455..a5cf00a01f23 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -30,6 +30,7 @@ struct vm_fault;
#define IOMAP_MAPPED 2 /* blocks allocated at @addr */
#define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */
#define IOMAP_INLINE 4 /* data inline in the inode */
+#define IOMAP_ENCODED 5 /* data encoded, R/W whole extent */
/*
* Flags reported by the file system from iomap_begin:
@@ -107,6 +108,8 @@ struct iomap {
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
{
+ if (iomap->type & IOMAP_ENCODED)
+ return iomap->addr;
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
@@ -217,9 +220,13 @@ struct iomap_iter {
loff_t pos;
u64 len;
s64 processed;
+ unsigned type;
unsigned flags;
struct iomap iomap;
- struct iomap srcmap;
+ union {
+ struct iomap srcmap;
+ struct iomap prev;
+ };
void *private;
};
@@ -261,7 +268,7 @@ struct iomap_read_folio_ops {
* Optional, allows the filesystem to perform a custom submission of
* bio, such as csum calculations or multi-device bio split
*/
- void (*submit_io)(struct inode *inode, struct bio *bio);
+ void (*submit_io)(struct inode *inode, struct bio *bio, const struct iomap *iomap);
/*
* Optional, allows filesystem to specify own bio_set, so new bio's
--
2.46.1
On Fri, Oct 04, 2024 at 04:04:32PM -0400, Goldwyn Rodrigues wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > An encoded extent must be read completely. Make the bio just as a > regular bio and let filesystem deal with the rest of the extent. > A new bio must be created if a new iomap is returned. > The filesystem must be informed that the bio to be read is > encoded and the offset from which the encoded extent starts. So, pass > the iomap associated with the bio while calling submit_io. Save the > previous iomap (associated with the bio being submitted) in prev in > order to submit when the iomap changes. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > --- > fs/iomap/buffered-io.c | 17 ++++++++++------- > include/linux/iomap.h | 11 +++++++++-- > 2 files changed, 19 insertions(+), 9 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 0e682ff84e4a..4c734899a8e5 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -378,12 +378,13 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, > { > const struct iomap *srcmap = iomap_iter_srcmap(iter); > > - return srcmap->type != IOMAP_MAPPED || > + return (srcmap->type != IOMAP_MAPPED && > + srcmap->type != IOMAP_ENCODED) || > (srcmap->flags & IOMAP_F_NEW) || > pos >= i_size_read(iter->inode); Non-standard indentation for the new line. But at this point the condition is becoming complex enough that it is best split into multiple if statements anyway. > sector = iomap_sector(iomap, pos); > if (!ctx->bio || > + (iomap->type & IOMAP_ENCODED && iomap->offset != iter->prev.offset) || Overly long line. And this could really use a comment as well. > -static loff_t iomap_readahead_iter(const struct iomap_iter *iter, > +static loff_t iomap_readahead_iter(struct iomap_iter *iter, > struct iomap_readpage_ctx *ctx) Why is the iter de-constified? In general I'm not a huge fan of the encoded magic here, but I'll need to take a closer look at the caller if I can come up with something better.
On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote: > In general I'm not a huge fan of the encoded magic here, but I'll > need to take a closer look at the caller if I can come up with > something better. I looked a bit more at the code. I'm not entirely sure I fully understand it yet, but: I think most of the read side special casing would be handled by always submitting the bio at the end of an iomap. Ritesh was looking into that for supporting ext2-like file systems that read indirect block ondemand, but I think it actually is fundamentally the right thing to do anyway. For the write we plan to add a new IOMAP_BOUNDARY flag to prevent merges as part of the XFS RT group series, and I think this should also solve your problems with keeping one bio per iomap? The current patch is here: https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=djwong-wtf&id=91b5d7a52dab63732aee451bba0db315ae9bd09b
Christoph Hellwig <hch@infradead.org> writes: > On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote: >> In general I'm not a huge fan of the encoded magic here, but I'll >> need to take a closer look at the caller if I can come up with >> something better. > > I looked a bit more at the code. I'm not entirely sure I fully > understand it yet, but: > > I think most of the read side special casing would be handled by > always submitting the bio at the end of an iomap. Ritesh was > looking into that for supporting ext2-like file systems that > read indirect block ondemand, but I think it actually is fundamentally > the right thing to do anyway. yes, it was... This patch optimizes the data access patterns for filesystems with indirect block mapping by implementing BH_Boundary handling within iomap. Currently the bios for reads within iomap are only submitted at 2 places - 1. If we cannot merge the new req. with previous bio, only then we submit the previous bio. 2. Submit the bio at the end of the entire read processing. This means for filesystems with indirect block mapping, we call into ->iomap_begin() again w/o submitting the previous bios. That causes unoptimized data access patterns for blocks which are of BH_Boundary type. Here is the change which describes this [1]. The implementation part needed to be change to reduce the complexity. Willy gave a better implementation alternative here [2]. [1]: https://lore.kernel.org/all/4e2752e99f55469c4eb5f2fe83e816d529110192.1714046808.git.ritesh.list@gmail.com/ [2]: https://lore.kernel.org/all/Zivu0gzb4aiazSNu@casper.infradead.org/ Sorry once I am done with the other priority work on my plate - I can resume this work. Meanwhile I would be happy to help if someone would like to work on this. -ritesh
On 2:42 10/10, Christoph Hellwig wrote: > On Tue, Oct 08, 2024 at 02:48:43AM -0700, Christoph Hellwig wrote: > > In general I'm not a huge fan of the encoded magic here, but I'll > > need to take a closer look at the caller if I can come up with > > something better. > > I looked a bit more at the code. I'm not entirely sure I fully > understand it yet, but: > > I think most of the read side special casing would be handled by > always submitting the bio at the end of an iomap. Ritesh was > looking into that for supporting ext2-like file systems that > read indirect block ondemand, but I think it actually is fundamentally > the right thing to do anyway. > > For the write we plan to add a new IOMAP_BOUNDARY flag to prevent > merges as part of the XFS RT group series, and I think this should > also solve your problems with keeping one bio per iomap? The > current patch is here: > > https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=djwong-wtf&id=91b5d7a52dab63732aee451bba0db315ae9bd09b > Yes, this is helpful but it will solve only one of the three requirements. The first, compressed and uncompressed extents are to be dealth with differently because of the additional step of uncompressing the bio corresponding to the extent. So, iomap needs to inform btrfs that the bio submitted (through newly created function submit_io()) is compressed and needs to be read and decompressed. The second, btrfs sets the bi_sector to the start of the extent map and the assignment cannot go through iomap_sector(). Compressed extents being smaller than regular one, iomap_sector would most of the times overrun beyond the compressed extent. So, in this patchset, I am setting this in ->submit_io() -- Goldwyn
© 2016 - 2024 Red Hat, Inc.