With aio the only thing we need to be careful off is that writethrough
can be in progress even after dropping inode and folio lock. Due to
this, we need a way to synchronise with other paths where stable write
is not enough, example:
1. Truncate to 0 in xfs sets i_size = 0 before waiting for writeback to
complete. In case of writethrough, the end io completion can again
push the i_size to a non-zero value.
2. Dio reads might race with aio writethrough ->end_io() and read 0s if
unwritten conversion is yet to happen.
Hence use the dio begin/end as it gives us the required guarantees.
Co-developed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
---
fs/iomap/buffered-io.c | 53 ++++++++++++++++++++++++++++++++++++------
include/linux/iomap.h | 10 ++++++--
2 files changed, 54 insertions(+), 9 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 74e1ab108b0f..6937f10e2782 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1113,6 +1113,9 @@ static ssize_t iomap_writethrough_complete(struct iomap_writethrough_ctx *wt_ctx
mapping_clear_stable_writes(inode->i_mapping);
+ if (wt_ctx->is_aio)
+ inode_dio_end(inode);
+
if (!ret) {
ret = wt_ctx->written;
iocb->ki_pos = wt_ctx->pos + ret;
@@ -1122,12 +1125,27 @@ static ssize_t iomap_writethrough_complete(struct iomap_writethrough_ctx *wt_ctx
return ret;
}
+static void iomap_writethrough_complete_work(struct work_struct *work)
+{
+ struct iomap_writethrough_ctx *wt_ctx =
+ container_of(work, struct iomap_writethrough_ctx, aio_work);
+ struct kiocb *iocb = wt_ctx->iocb;
+
+ iocb->ki_complete(iocb, iomap_writethrough_complete(wt_ctx));
+}
+
static void iomap_writethrough_done(struct iomap_writethrough_ctx *wt_ctx)
{
- struct task_struct *waiter = wt_ctx->waiter;
+ if (!wt_ctx->is_aio) {
+ struct task_struct *waiter = wt_ctx->waiter;
- WRITE_ONCE(wt_ctx->waiter, NULL);
- blk_wake_io_task(waiter);
+ WRITE_ONCE(wt_ctx->waiter, NULL);
+ blk_wake_io_task(waiter);
+ return;
+ }
+
+ INIT_WORK(&wt_ctx->aio_work, iomap_writethrough_complete_work);
+ queue_work(wt_ctx->inode->i_sb->s_dio_done_wq, &wt_ctx->aio_work);
return;
}
@@ -1530,9 +1548,6 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
if (iocb_is_dsync(iocb))
/* D_SYNC support not implemented yet */
return -EOPNOTSUPP;
- if (!is_sync_kiocb(iocb))
- /* aio support not implemented yet */
- return -EOPNOTSUPP;
/*
* +1 to max bvecs to account for unaligned write spanning multiple
@@ -1557,11 +1572,32 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
wt_ctx->pos = iocb->ki_pos;
wt_ctx->new_i_size = i_size_read(inode);
wt_ctx->max_bvecs = max_bvecs;
+ wt_ctx->is_aio = !is_sync_kiocb(iocb);
atomic_set(&wt_ctx->ref, 1);
- wt_ctx->waiter = current;
+
+ if (!wt_ctx->is_aio)
+ wt_ctx->waiter = current;
+ else
+ /*
+ * With aio, writethrough can be in progress even after dropping
+ * inode and folio lock. Due to this, we need a way to
+ * synchronise with other paths where stable write is not enough
+ * (example truncate). Hence use the dio begin/end as it gives
+ * us the required guarantees.
+ */
+ inode_dio_begin(inode);
mapping_set_stable_writes(inode->i_mapping);
+ if (wt_ctx->is_aio && !inode->i_sb->s_dio_done_wq) {
+ ret = sb_init_dio_done_wq(inode->i_sb);
+ if (ret < 0) {
+ mapping_clear_stable_writes(inode->i_mapping);
+ kfree(wt_ctx);
+ return ret;
+ }
+ }
+
while ((ret = iomap_iter(&iter, wt_ops->ops)) > 0) {
WARN_ON(iter.iomap.type != IOMAP_UNWRITTEN &&
iter.iomap.type != IOMAP_MAPPED);
@@ -1571,6 +1607,9 @@ ssize_t iomap_file_writethrough_write(struct kiocb *iocb, struct iov_iter *i,
cmpxchg(&wt_ctx->error, 0, ret);
if (!atomic_dec_and_test(&wt_ctx->ref)) {
+ if (wt_ctx->is_aio)
+ return -EIOCBQUEUED;
+
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(wt_ctx->waiter))
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 661233aa009d..e99f7c279dc6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -486,9 +486,15 @@ struct iomap_writethrough_ctx {
atomic_t ref;
unsigned int flags;
int error;
+ bool is_aio;
- /* used during submission and for non-aio completion */
- struct task_struct *waiter;
+ union {
+ /* used during submission and for non-aio completion */
+ struct task_struct *waiter;
+
+ /* used during aio completion */
+ struct work_struct aio_work;
+ };
loff_t bio_pos;
unsigned int nr_bvecs;
--
2.53.0
© 2016 - 2026 Red Hat, Inc.