From: Goldwyn Rodrigues <rgoldwyn@suse.com>
iomap_read_folio_ops provide additional functions to allocate or submit
the bio. Filesystems such as btrfs have additional operations with bios
such as verifying data checksums. Creating a bio submission hook allows
the filesystem to process and verify the bio.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
block/fops.c | 4 ++--
fs/erofs/data.c | 4 ++--
fs/gfs2/aops.c | 4 ++--
fs/iomap/buffered-io.c | 31 ++++++++++++++++++++++++-------
fs/xfs/xfs_aops.c | 4 ++--
fs/zonefs/file.c | 4 ++--
include/linux/iomap.h | 14 ++++++++++++--
7 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index 9825c1713a49..41cbd8e5db11 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -483,12 +483,12 @@ const struct address_space_operations def_blk_aops = {
#else /* CONFIG_BUFFER_HEAD */
static int blkdev_read_folio(struct file *file, struct folio *folio)
{
- return iomap_read_folio(folio, &blkdev_iomap_ops);
+ return iomap_read_folio(folio, &blkdev_iomap_ops, NULL);
}
static void blkdev_readahead(struct readahead_control *rac)
{
- iomap_readahead(rac, &blkdev_iomap_ops);
+ iomap_readahead(rac, &blkdev_iomap_ops, NULL);
}
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 1b7eba38ba1e..335ce8be5894 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -347,12 +347,12 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
- return iomap_read_folio(folio, &erofs_iomap_ops);
+ return iomap_read_folio(folio, &erofs_iomap_ops, NULL);
}
static void erofs_readahead(struct readahead_control *rac)
{
- return iomap_readahead(rac, &erofs_iomap_ops);
+ return iomap_readahead(rac, &erofs_iomap_ops, NULL);
}
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 10d5acd3f742..3cc66b640cb9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -451,7 +451,7 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
if (!gfs2_is_jdata(ip) ||
(i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
- error = iomap_read_folio(folio, &gfs2_iomap_ops);
+ error = iomap_read_folio(folio, &gfs2_iomap_ops, NULL);
} else if (gfs2_is_stuffed(ip)) {
error = stuffed_read_folio(ip, folio);
} else {
@@ -526,7 +526,7 @@ static void gfs2_readahead(struct readahead_control *rac)
else if (gfs2_is_jdata(ip))
mpage_readahead(rac, gfs2_block_map);
else
- iomap_readahead(rac, &gfs2_iomap_ops);
+ iomap_readahead(rac, &gfs2_iomap_ops, NULL);
}
/**
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ad656f501f38..71370bbe466c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -341,6 +341,7 @@ struct iomap_readpage_ctx {
bool cur_folio_in_bio;
struct bio *bio;
struct readahead_control *rac;
+ const struct iomap_read_folio_ops *ops;
};
/**
@@ -424,8 +425,12 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
gfp_t orig_gfp = gfp;
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
- if (ctx->bio)
- submit_bio(ctx->bio);
+ if (ctx->bio) {
+ if (ctx->ops && ctx->ops->submit_io)
+ ctx->ops->submit_io(iter->inode, ctx->bio);
+ else
+ submit_bio(ctx->bio);
+ }
if (ctx->rac) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
@@ -475,7 +480,8 @@ static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
return done;
}
-int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
+int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
+ const struct iomap_read_folio_ops *read_folio_ops)
{
struct iomap_iter iter = {
.inode = folio->mapping->host,
@@ -484,6 +490,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
};
struct iomap_readpage_ctx ctx = {
.cur_folio = folio,
+ .ops = read_folio_ops,
};
int ret;
@@ -493,7 +500,10 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
iter.processed = iomap_read_folio_iter(&iter, &ctx);
if (ctx.bio) {
- submit_bio(ctx.bio);
+ if (ctx.ops->submit_io)
+ ctx.ops->submit_io(iter.inode, ctx.bio);
+ else
+ submit_bio(ctx.bio);
WARN_ON_ONCE(!ctx.cur_folio_in_bio);
} else {
WARN_ON_ONCE(ctx.cur_folio_in_bio);
@@ -538,6 +548,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
* iomap_readahead - Attempt to read pages from a file.
* @rac: Describes the pages to be read.
* @ops: The operations vector for the filesystem.
+ * @read_folio_ops: Function hooks for filesystems for special bio submissions
*
* This function is for filesystems to call to implement their readahead
* address_space operation.
@@ -549,7 +560,8 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
* function is called with memalloc_nofs set, so allocations will not cause
* the filesystem to be reentered.
*/
-void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
+void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops,
+ const struct iomap_read_folio_ops *read_folio_ops)
{
struct iomap_iter iter = {
.inode = rac->mapping->host,
@@ -558,6 +570,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
};
struct iomap_readpage_ctx ctx = {
.rac = rac,
+ .ops = read_folio_ops,
};
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
@@ -565,8 +578,12 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
while (iomap_iter(&iter, ops) > 0)
iter.processed = iomap_readahead_iter(&iter, &ctx);
- if (ctx.bio)
- submit_bio(ctx.bio);
+ if (ctx.bio) {
+ if (ctx.ops && ctx.ops->submit_io)
+ ctx.ops->submit_io(iter.inode, ctx.bio);
+ else
+ submit_bio(ctx.bio);
+ }
if (ctx.cur_folio) {
if (!ctx.cur_folio_in_bio)
folio_unlock(ctx.cur_folio);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6dead20338e2..9a3221d738bd 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -517,14 +517,14 @@ xfs_vm_read_folio(
struct file *unused,
struct folio *folio)
{
- return iomap_read_folio(folio, &xfs_read_iomap_ops);
+ return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
}
STATIC void
xfs_vm_readahead(
struct readahead_control *rac)
{
- iomap_readahead(rac, &xfs_read_iomap_ops);
+ iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
}
static int
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 3b103715acc9..bf8db1ddd761 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -112,12 +112,12 @@ static const struct iomap_ops zonefs_write_iomap_ops = {
static int zonefs_read_folio(struct file *unused, struct folio *folio)
{
- return iomap_read_folio(folio, &zonefs_read_iomap_ops);
+ return iomap_read_folio(folio, &zonefs_read_iomap_ops, NULL);
}
static void zonefs_readahead(struct readahead_control *rac)
{
- iomap_readahead(rac, &zonefs_read_iomap_ops);
+ iomap_readahead(rac, &zonefs_read_iomap_ops, NULL);
}
/*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6fc1c858013d..5b775213920e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -256,14 +256,24 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
return &i->iomap;
}
+struct iomap_read_folio_ops {
+ /*
+ * Optional, allows the filesystem to perform a custom submission of
+ * bio, such as csum calculations or multi-device bio split
+ */
+ void (*submit_io)(struct inode *inode, struct bio *bio);
+};
+
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
int (*punch)(struct inode *inode, loff_t pos, loff_t length));
-int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
-void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
+int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
+ const struct iomap_read_folio_ops *);
+void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops,
+ const struct iomap_read_folio_ops *);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
--
2.46.1
On Fri, Oct 04, 2024 at 04:04:29PM -0400, Goldwyn Rodrigues wrote: > iomap_read_folio_ops provide additional functions to allocate or submit > the bio. Filesystems such as btrfs have additional operations with bios > such as verifying data checksums. Creating a bio submission hook allows > the filesystem to process and verify the bio. But surely you're going to need something similar for writeback too? So why go to all this trouble to add a new kind of ops instead of making it part of iomap_ops or iomap_folio_ops?
On Sat, Oct 05, 2024 at 03:20:06AM +0100, Matthew Wilcox wrote: > On Fri, Oct 04, 2024 at 04:04:29PM -0400, Goldwyn Rodrigues wrote: > > iomap_read_folio_ops provide additional functions to allocate or submit > > the bio. Filesystems such as btrfs have additional operations with bios > > such as verifying data checksums. Creating a bio submission hook allows > > the filesystem to process and verify the bio. > > But surely you're going to need something similar for writeback too? > So why go to all this trouble to add a new kind of ops instead of making > it part of iomap_ops or iomap_folio_ops? We really should not add anything to iomap_ops. That's just the iteration that should not even know about pages. In fact I hope to eventuall get back and replace it with a single iterator that could use direct calls for the fast path as per your RFC from years ago. iomap_folio_ops is entirely specific to the buffered write path. I'm honestly not sure what the point is of merging structures specific to buffered read, buffered write (and suggested later in the thread buffered writeback) when they have very little overlap.
On Sat, Oct 05, 2024 at 03:20:06AM +0100, Matthew Wilcox wrote: > On Fri, Oct 04, 2024 at 04:04:29PM -0400, Goldwyn Rodrigues wrote: > > iomap_read_folio_ops provide additional functions to allocate or submit > > the bio. Filesystems such as btrfs have additional operations with bios > > such as verifying data checksums. Creating a bio submission hook allows > > the filesystem to process and verify the bio. > > But surely you're going to need something similar for writeback too? > So why go to all this trouble to add a new kind of ops instead of making > it part of iomap_ops or iomap_folio_ops? iomap_folio_ops, and maybe it's time to rename it iomap_pagecache_ops. I almost wonder if we should have this instead: struct iomap_pagecache_ops { struct iomap_ops ops; /* folio management */ struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); /* mapping revalidation */ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); /* writeback */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned len); int (*prepare_ioend)(struct iomap_ioend *ioend, int status); void (*discard_folio)(struct folio *folio, loff_t pos); }; and then we change the buffered-io.c functions to take a (const struct iomap_pagecache_ops*) instead of iomap_ops+iomap_folio_ops, or iomap_ops+iomap_writeback_ops. Same embedding suggestion for iomap_dio_ops. --D
© 2016 - 2024 Red Hat, Inc.