Switch timerfd to using fops->read_iter(), so it can support not just
O_NONBLOCK but IOCB_NOWAIT as well. With the latter, users like io_uring
interact with timerfds a lot better, as they can be driven purely
by the poll trigger.
Manually get and install the required fd, so that FMODE_NOWAIT can be
set before the file is installed into the file table.
No functional changes intended in this patch, it's purely a straight
conversion to using the read iterator method.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/timerfd.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index e9c96a0c79f1..f0d82dcbffef 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
return events;
}
-static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ struct file *file = iocb->ki_filp;
struct timerfd_ctx *ctx = file->private_data;
ssize_t res;
u64 ticks = 0;
- if (count < sizeof(ticks))
+ if (iov_iter_count(to) < sizeof(ticks))
return -EINVAL;
+
spin_lock_irq(&ctx->wqh.lock);
- if (file->f_flags & O_NONBLOCK)
+ if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
res = -EAGAIN;
else
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
@@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
ctx->ticks = 0;
}
spin_unlock_irq(&ctx->wqh.lock);
- if (ticks)
- res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
+ if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
+ res = -EFAULT;
return res;
}
@@ -384,7 +385,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
.poll = timerfd_poll,
- .read = timerfd_read,
+ .read_iter = timerfd_read_iter,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
.unlocked_ioctl = timerfd_ioctl,
@@ -407,6 +408,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
+ struct file *file;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +445,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real(0);
- ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
- O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
- if (ufd < 0)
+ ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS);
+ if (ufd < 0) {
kfree(ctx);
+ return ufd;
+ }
+
+ file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
+ O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+ if (IS_ERR(file)) {
+ put_unused_fd(ufd);
+ kfree(ctx);
+ return PTR_ERR(file);
+ }
+ file->f_mode |= FMODE_NOWAIT;
+ fd_install(ufd, file);
return ufd;
}
--
2.43.0
On 4/9/24 9:22 AM, Jens Axboe wrote:
> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
> ctx->ticks = 0;
> }
> spin_unlock_irq(&ctx->wqh.lock);
> - if (ticks)
> - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
> + if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
> + res = -EFAULT;
> return res;
> }
Dumb thinko here, as that should be:
if (ticks) {
res = copy_to_iter(&ticks, sizeof(ticks), to);
if (!res)
res = -EFAULT;
}
I've updated my branch, just a heads-up. Odd how it passing testing,
guess I got stack lucky...
--
Jens Axboe
Hi,
On 11.04.2024 00:27, Jens Axboe wrote:
> On 4/9/24 9:22 AM, Jens Axboe wrote:
>> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
>> ctx->ticks = 0;
>> }
>> spin_unlock_irq(&ctx->wqh.lock);
>> - if (ticks)
>> - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
>> + if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
>> + res = -EFAULT;
>> return res;
>> }
> Dumb thinko here, as that should be:
>
> if (ticks) {
> res = copy_to_iter(&ticks, sizeof(ticks), to);
> if (!res)
> res = -EFAULT;
> }
>
> I've updated my branch, just a heads-up. Odd how it passing testing,
> guess I got stack lucky...
The old version got its way into today's linux-next and bisecting the
boot issues directed me here. There is nothing more to report, but I can
confirm that the above change indeed fixes the problems observed on
next-20240411.
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
I hope that tomorrow's linux-next will have the correct version of this
patch.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On 4/11/24 5:40 AM, Marek Szyprowski wrote:
> Hi,
>
> On 11.04.2024 00:27, Jens Axboe wrote:
>> On 4/9/24 9:22 AM, Jens Axboe wrote:
>>> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
>>> ctx->ticks = 0;
>>> }
>>> spin_unlock_irq(&ctx->wqh.lock);
>>> - if (ticks)
>>> - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
>>> + if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
>>> + res = -EFAULT;
>>> return res;
>>> }
>> Dumb thinko here, as that should be:
>>
>> if (ticks) {
>> res = copy_to_iter(&ticks, sizeof(ticks), to);
>> if (!res)
>> res = -EFAULT;
>> }
>>
>> I've updated my branch, just a heads-up. Odd how it passing testing,
>> guess I got stack lucky...
>
> The old version got its way into today's linux-next and bisecting the
> boot issues directed me here. There is nothing more to report, but I can
> confirm that the above change indeed fixes the problems observed on
> next-20240411.
Yeah sorry about that :(
> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Thanks!
> I hope that tomorrow's linux-next will have the correct version of this
> patch.
It should, the branches have been updated.
--
Jens Axboe
© 2016 - 2026 Red Hat, Inc.