A number of places that generate kiocbs didn't use init_sync_kiocb() to
initialise the new kiocb. Fix these to always use init_kiocb().
Note that aio and io_uring pass information in through ki_filp through an
overlaid union before I can call init_kiocb(), so that gets reinitialised.
I don't think it clobbers anything else.
After this point, IOCB_WRITE is only set by init_kiocb().
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christian Brauner <christian@brauner.io>
cc: Alexander Viro <viro@zeniv.linux.org.uk>
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---
drivers/block/loop.c | 11 ++++++-----
drivers/nvme/target/io-cmd-file.c | 5 +++--
drivers/target/target_core_file.c | 2 +-
fs/aio.c | 9 ++++-----
fs/cachefiles/io.c | 10 ++++------
io_uring/rw.c | 10 +++++-----
6 files changed, 23 insertions(+), 24 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 37511d2b2caf..ea92235c5ba2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -439,16 +439,17 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
}
atomic_set(&cmd->ref, 2);
- iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
+ iov_iter_bvec(&iter, rw == WRITE ? ITER_SOURCE : ITER_DEST,
+ bvec, nr_bvec, blk_rq_bytes(rq));
iter.iov_offset = offset;
+ init_kiocb(&cmd->iocb, file, rw);
cmd->iocb.ki_pos = pos;
- cmd->iocb.ki_filp = file;
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
- if (rw == ITER_SOURCE)
+ if (rw == WRITE)
ret = call_write_iter(file, &cmd->iocb, &iter);
else
ret = call_read_iter(file, &cmd->iocb, &iter);
@@ -490,12 +491,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
+ return lo_rw_aio(lo, cmd, pos, WRITE);
else
return lo_write_simple(lo, rq, pos);
case REQ_OP_READ:
if (cmd->use_aio)
- return lo_rw_aio(lo, cmd, pos, ITER_DEST);
+ return lo_rw_aio(lo, cmd, pos, READ);
else
return lo_read_simple(lo, rq, pos);
default:
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 2d068439b129..0b6577d51b69 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -85,17 +85,18 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
ki_flags |= IOCB_DSYNC;
call_iter = req->ns->file->f_op->write_iter;
+ init_kiocb(iocb, req->ns->file, WRITE);
rw = ITER_SOURCE;
} else {
call_iter = req->ns->file->f_op->read_iter;
+ init_kiocb(iocb, req->ns->file, READ);
rw = ITER_DEST;
}
iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
+ iocb->ki_flags |= ki_flags;
iocb->ki_pos = pos;
- iocb->ki_filp = req->ns->file;
- iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags;
return call_iter(iocb, &iter);
}
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index ce0e000b74fc..d70cf89959dc 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -287,11 +287,11 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
}
iov_iter_bvec(&iter, is_write, aio_cmd->bvecs, sgl_nents, len);
+ init_kiocb(&aio_cmd->iocb, file, is_write);
aio_cmd->cmd = cmd;
aio_cmd->len = len;
aio_cmd->iocb.ki_pos = cmd->t_task_lba * dev->dev_attrib.block_size;
- aio_cmd->iocb.ki_filp = file;
aio_cmd->iocb.ki_complete = cmd_rw_aio_complete;
aio_cmd->iocb.ki_flags = IOCB_DIRECT;
diff --git a/fs/aio.c b/fs/aio.c
index 77e33619de40..26e173be9448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1461,14 +1461,14 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
iocb_put(iocb);
}
-static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb, int rw)
{
int ret;
+ init_kiocb(req, req->ki_filp, rw);
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
- req->ki_flags = req->ki_filp->f_iocb_flags;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
@@ -1539,7 +1539,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, READ);
if (ret)
return ret;
file = req->ki_filp;
@@ -1566,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
struct file *file;
int ret;
- ret = aio_prep_rw(req, iocb);
+ ret = aio_prep_rw(req, iocb, WRITE);
if (ret)
return ret;
file = req->ki_filp;
@@ -1592,7 +1592,6 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
sb_start_write(file_inode(file)->i_sb);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
- req->ki_flags |= IOCB_WRITE;
aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 175a25fcade8..2c47788f38d2 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -134,11 +134,10 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
if (!ki)
goto presubmission_error;
+ init_kiocb(&ki->iocb, file, READ);
refcount_set(&ki->ki_refcnt, 2);
- ki->iocb.ki_filp = file;
+ ki->iocb.ki_flags |= IOCB_DIRECT;
ki->iocb.ki_pos = start_pos + skipped;
- ki->iocb.ki_flags = IOCB_DIRECT;
- ki->iocb.ki_ioprio = get_current_ioprio();
ki->skipped = skipped;
ki->object = object;
ki->inval_counter = cres->inval_counter;
@@ -306,10 +305,9 @@ int __cachefiles_write(struct cachefiles_object *object,
}
refcount_set(&ki->ki_refcnt, 2);
- ki->iocb.ki_filp = file;
+ init_kiocb(&ki->iocb, file, WRITE);
ki->iocb.ki_pos = start_pos;
- ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
- ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->iocb.ki_flags |= IOCB_DIRECT;
ki->object = object;
ki->start = start_pos;
ki->len = len;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..1cade1567162 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -655,12 +655,13 @@ static bool need_complete_io(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
-static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
+static int io_rw_init_file(struct io_kiocb *req, unsigned int io_direction)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct kiocb *kiocb = &rw->kiocb;
struct io_ring_ctx *ctx = req->ctx;
struct file *file = req->file;
+ fmode_t mode = (io_direction == WRITE) ? FMODE_WRITE : FMODE_READ;
int ret;
if (unlikely(!file || !(file->f_mode & mode)))
@@ -669,7 +670,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
if (!(req->flags & REQ_F_FIXED_FILE))
req->flags |= io_file_get_flags(file);
- kiocb->ki_flags = file->f_iocb_flags;
+ init_kiocb(kiocb, file, io_direction);
ret = kiocb_set_rw_flags(kiocb, rw->flags);
if (unlikely(ret))
return ret;
@@ -738,7 +739,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
- ret = io_rw_init_file(req, FMODE_READ);
+ ret = io_rw_init_file(req, READ);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -870,7 +871,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
- ret = io_rw_init_file(req, FMODE_WRITE);
+ ret = io_rw_init_file(req, WRITE);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -914,7 +915,6 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
__sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
- kiocb->ki_flags |= IOCB_WRITE;
if (likely(req->file->f_op->write_iter))
ret2 = call_write_iter(req->file, kiocb, &s->iter);
On 6/30/23 9:25?AM, David Howells wrote:
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index 1bce2208b65c..1cade1567162 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -655,12 +655,13 @@ static bool need_complete_io(struct io_kiocb *req)
> S_ISBLK(file_inode(req->file)->i_mode);
> }
>
> -static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
> +static int io_rw_init_file(struct io_kiocb *req, unsigned int io_direction)
> {
> struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> struct kiocb *kiocb = &rw->kiocb;
> struct io_ring_ctx *ctx = req->ctx;
> struct file *file = req->file;
> + fmode_t mode = (io_direction == WRITE) ? FMODE_WRITE : FMODE_READ;
> int ret;
>
> if (unlikely(!file || !(file->f_mode & mode)))
Not ideal to add a branch here, probably better to just pass in both?
> @@ -870,7 +871,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
> iov_iter_restore(&s->iter, &s->iter_state);
> iovec = NULL;
> }
> - ret = io_rw_init_file(req, FMODE_WRITE);
> + ret = io_rw_init_file(req, WRITE);
> if (unlikely(ret)) {
> kfree(iovec);
> return ret;
> @@ -914,7 +915,6 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
> __sb_writers_release(file_inode(req->file)->i_sb,
> SB_FREEZE_WRITE);
> }
> - kiocb->ki_flags |= IOCB_WRITE;
>
> if (likely(req->file->f_op->write_iter))
> ret2 = call_write_iter(req->file, kiocb, &s->iter);
>
One concern here is that we're using IOCB_WRITE here to tell if
sb_start_write() has been done or not, and hence whether
kiocb_end_write() needs to be called. You know set it earlier, which
means if we get a failure if we need to setup async data, then we know
have IOCB_WRITE set at that point even though we did not call
sb_start_write().
--
Jens Axboe
Jens Axboe <axboe@kernel.dk> wrote: > One concern here is that we're using IOCB_WRITE here to tell if > sb_start_write() has been done or not, and hence whether > kiocb_end_write() needs to be called. You know set it earlier, which > means if we get a failure if we need to setup async data, then we know > have IOCB_WRITE set at that point even though we did not call > sb_start_write(). Hmmm... It's set earlier in a number of places anyway - __cachefiles_write() for example. Btw, can you please put some comments on the IOCB_* constants? I have to guess at what they mean and how they're meant to be used. Or better still, get Christoph to write Documentation/core-api/iocb.rst describing the API? ;-) David
On 6/30/23 10:00?AM, David Howells wrote: > Jens Axboe <axboe@kernel.dk> wrote: > >> One concern here is that we're using IOCB_WRITE here to tell if >> sb_start_write() has been done or not, and hence whether >> kiocb_end_write() needs to be called. You know set it earlier, which >> means if we get a failure if we need to setup async data, then we know >> have IOCB_WRITE set at that point even though we did not call >> sb_start_write(). > > Hmmm... It's set earlier in a number of places anyway - > __cachefiles_write() for example. Not sure how that's relevant, that's a private kiocb and not related to the private one that io_uring uses? > Btw, can you please put some comments on the IOCB_* constants? I have > to guess at what they mean and how they're meant to be used. Or > better still, get Christoph to write Documentation/core-api/iocb.rst > describing the API? ;-) The ones I have added do have comments, mostly, though it's not a lot of commentary for sure... Which ones are confusing and need better comments? Would be happy to do that. I do think the comments belong in there rather than have a separate doc for the kiocb. Though one thing that's confusing is the ki_private ownership. You'd think it belongs to the owner of the kiocb, but nope, it has random uses in iomap and ocfs2 at least. -- Jens Axboe
On Fri, Jun 30, 2023 at 04:25:16PM +0100, David Howells wrote:
> A number of places that generate kiocbs didn't use init_sync_kiocb() to
> initialise the new kiocb. Fix these to always use init_kiocb().
>
> Note that aio and io_uring pass information in through ki_filp through an
> overlaid union before I can call init_kiocb(), so that gets reinitialised.
> I don't think it clobbers anything else.
>
> After this point, IOCB_WRITE is only set by init_kiocb().
Nothing in this patch touches the VFS, so the subject line is
wrong. And I think we're better off splitting it into one per
subsystem, which also allows documenting the exact changes.
Which includes now setting the flags from f_iocb_flags and setting
and I/O priority. Please explain why this is harmless or even useful.
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 37511d2b2caf..ea92235c5ba2 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -439,16 +439,17 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
> }
> atomic_set(&cmd->ref, 2);
>
> - iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
> + iov_iter_bvec(&iter, rw == WRITE ? ITER_SOURCE : ITER_DEST,
> + bvec, nr_bvec, blk_rq_bytes(rq));
Given the cover letter I expect this is going to go away, but the
changes would probably a lot more readable if you had a helper
to convert from READ/WRITE to the iter flags inbetween.
Or maybe do it the other way - add a helper to init the
> @@ -490,12 +491,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
> return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
> case REQ_OP_WRITE:
> if (cmd->use_aio)
> - return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
> + return lo_rw_aio(lo, cmd, pos, WRITE);
> else
> return lo_write_simple(lo, rq, pos);
> case REQ_OP_READ:
> if (cmd->use_aio)
> - return lo_rw_aio(lo, cmd, pos, ITER_DEST);
> + return lo_rw_aio(lo, cmd, pos, READ);
I don't think there is any need to pass the rw argument at all,
lo_rw_aio can just do an op_is_write(req_op(rq))
> -static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
> +static int io_rw_init_file(struct io_kiocb *req, unsigned int io_direction)
> {
> struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> struct kiocb *kiocb = &rw->kiocb;
> struct io_ring_ctx *ctx = req->ctx;
> struct file *file = req->file;
> + fmode_t mode = (io_direction == WRITE) ? FMODE_WRITE : FMODE_READ;
> int ret;
>
> if (unlikely(!file || !(file->f_mode & mode)))
I'd just move this check into the two callers, that way you can hard
code the mode instead of adding a conversion.
© 2016 - 2026 Red Hat, Inc.