[v1] linux-aio/io-uring: Resubmit tails of short requests

[PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb

Posted by Hanna Czenczek 2 weeks, 5 days ago

Put all request parameters into the qemu_laiocb struct, which will allow
re-submitting the tail of short reads/writes.

Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
 block/linux-aio.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53c3e9af8a..1f25339dc9 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -40,10 +40,15 @@ struct qemu_laiocb {
     Coroutine *co;
     LinuxAioState *ctx;
     struct iocb iocb;
+    int fd;
     ssize_t ret;
+    off_t offset;
     size_t nbytes;
     QEMUIOVector *qiov;
-    bool is_read;
+
+    int type;
+    BdrvRequestFlags flags;
+    uint64_t dev_max_batch;
     QSIMPLEQ_ENTRY(qemu_laiocb) next;
 };
 
@@ -87,7 +92,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
             ret = 0;
         } else if (ret >= 0) {
             /* Short reads mean EOF, pad with zeros. */
-            if (laiocb->is_read) {
+            if (laiocb->type == QEMU_AIO_READ) {
                 qemu_iovec_memset(laiocb->qiov, ret, 0,
                     laiocb->qiov->size - ret);
             } else {
@@ -367,23 +372,23 @@ static void laio_deferred_fn(void *opaque)
     }
 }
 
-static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type, BdrvRequestFlags flags,
-                          uint64_t dev_max_batch)
+static int laio_do_submit(struct qemu_laiocb *laiocb)
 {
     LinuxAioState *s = laiocb->ctx;
     struct iocb *iocbs = &laiocb->iocb;
     QEMUIOVector *qiov = laiocb->qiov;
+    int fd = laiocb->fd;
+    off_t offset = laiocb->offset;
 
-    switch (type) {
+    switch (laiocb->type) {
     case QEMU_AIO_WRITE:
 #ifdef HAVE_IO_PREP_PWRITEV2
     {
-        int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+        int laio_flags = (laiocb->flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
         io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
     }
 #else
-        assert(flags == 0);
+        assert(laiocb->flags == 0);
         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
 #endif
         break;
@@ -399,7 +404,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     /* Currently Linux kernel does not support other operations */
     default:
         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
-                        __func__, type);
+                        __func__, laiocb->type);
         return -EIO;
     }
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -407,7 +412,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
     s->io_q.in_queue++;
     if (!s->io_q.blocked) {
-        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
+        if (s->io_q.in_queue >= laio_max_batch(s, laiocb->dev_max_batch)) {
             ioq_submit(s);
         } else {
             defer_call(laio_deferred_fn, s);
@@ -425,14 +430,18 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
     AioContext *ctx = qemu_get_current_aio_context();
     struct qemu_laiocb laiocb = {
         .co         = qemu_coroutine_self(),
-        .nbytes     = qiov ? qiov->size : 0,
+        .fd         = fd,
+        .offset     = offset,
+        .nbytes     = (qiov ? qiov->size : 0),
         .ctx        = aio_get_linux_aio(ctx),
         .ret        = -EINPROGRESS,
-        .is_read    = (type == QEMU_AIO_READ),
         .qiov       = qiov,
+        .type       = type,
+        .flags      = flags,
+        .dev_max_batch = dev_max_batch,
     };
 
-    ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
+    ret = laio_do_submit(&laiocb);
     if (ret < 0) {
         return ret;
     }
-- 
2.53.0

Re: [PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb

Posted by Kevin Wolf 2 weeks ago

Am 18.03.2026 um 16:32 hat Hanna Czenczek geschrieben:
> Put all request parameters into the qemu_laiocb struct, which will allow
> re-submitting the tail of short reads/writes.
> 
> Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
> ---
>  block/linux-aio.c | 35 ++++++++++++++++++++++-------------
>  1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/block/linux-aio.c b/block/linux-aio.c
> index 53c3e9af8a..1f25339dc9 100644
> --- a/block/linux-aio.c
> +++ b/block/linux-aio.c
> @@ -40,10 +40,15 @@ struct qemu_laiocb {
>      Coroutine *co;
>      LinuxAioState *ctx;
>      struct iocb iocb;
> +    int fd;
>      ssize_t ret;
> +    off_t offset;
>      size_t nbytes;
>      QEMUIOVector *qiov;
> -    bool is_read;
> +
> +    int type;

If you put fd and type next to each other, you'd avoid a hole in the
struct.

> +    BdrvRequestFlags flags;
> +    uint64_t dev_max_batch;
>      QSIMPLEQ_ENTRY(qemu_laiocb) next;
>  };
>  
> @@ -87,7 +92,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
>              ret = 0;
>          } else if (ret >= 0) {
>              /* Short reads mean EOF, pad with zeros. */
> -            if (laiocb->is_read) {
> +            if (laiocb->type == QEMU_AIO_READ) {
>                  qemu_iovec_memset(laiocb->qiov, ret, 0,
>                      laiocb->qiov->size - ret);
>              } else {
> @@ -367,23 +372,23 @@ static void laio_deferred_fn(void *opaque)
>      }
>  }
>  
> -static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
> -                          int type, BdrvRequestFlags flags,
> -                          uint64_t dev_max_batch)
> +static int laio_do_submit(struct qemu_laiocb *laiocb)
>  {
>      LinuxAioState *s = laiocb->ctx;
>      struct iocb *iocbs = &laiocb->iocb;
>      QEMUIOVector *qiov = laiocb->qiov;
> +    int fd = laiocb->fd;
> +    off_t offset = laiocb->offset;
>  
> -    switch (type) {
> +    switch (laiocb->type) {
>      case QEMU_AIO_WRITE:
>  #ifdef HAVE_IO_PREP_PWRITEV2
>      {
> -        int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
> +        int laio_flags = (laiocb->flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
>          io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
>      }
>  #else
> -        assert(flags == 0);
> +        assert(laiocb->flags == 0);
>          io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
>  #endif
>          break;
> @@ -399,7 +404,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
>      /* Currently Linux kernel does not support other operations */
>      default:
>          fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
> -                        __func__, type);
> +                        __func__, laiocb->type);
>          return -EIO;
>      }
>      io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
> @@ -407,7 +412,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
>      QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
>      s->io_q.in_queue++;
>      if (!s->io_q.blocked) {
> -        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
> +        if (s->io_q.in_queue >= laio_max_batch(s, laiocb->dev_max_batch)) {
>              ioq_submit(s);
>          } else {
>              defer_call(laio_deferred_fn, s);
> @@ -425,14 +430,18 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
>      AioContext *ctx = qemu_get_current_aio_context();
>      struct qemu_laiocb laiocb = {
>          .co         = qemu_coroutine_self(),
> -        .nbytes     = qiov ? qiov->size : 0,
> +        .fd         = fd,
> +        .offset     = offset,
> +        .nbytes     = (qiov ? qiov->size : 0),

Adding parentheses around the expression seems both unrelated and
unnecessary.

>          .ctx        = aio_get_linux_aio(ctx),
>          .ret        = -EINPROGRESS,
> -        .is_read    = (type == QEMU_AIO_READ),
>          .qiov       = qiov,
> +        .type       = type,
> +        .flags      = flags,
> +        .dev_max_batch = dev_max_batch,
>      };
>  
> -    ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
> +    ret = laio_do_submit(&laiocb);
>      if (ret < 0) {
>          return ret;
>      }

Reviewed-by: Kevin Wolf <kwolf@redhat.com>

Re: [PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb

Posted by Hanna Czenczek 2 weeks ago

On 23.03.26 17:36, Kevin Wolf wrote:
> Am 18.03.2026 um 16:32 hat Hanna Czenczek geschrieben:
>> Put all request parameters into the qemu_laiocb struct, which will allow
>> re-submitting the tail of short reads/writes.
>>
>> Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
>> ---
>>   block/linux-aio.c | 35 ++++++++++++++++++++++-------------
>>   1 file changed, 22 insertions(+), 13 deletions(-)
>>
>> diff --git a/block/linux-aio.c b/block/linux-aio.c
>> index 53c3e9af8a..1f25339dc9 100644
>> --- a/block/linux-aio.c
>> +++ b/block/linux-aio.c
>> @@ -40,10 +40,15 @@ struct qemu_laiocb {
>>       Coroutine *co;
>>       LinuxAioState *ctx;
>>       struct iocb iocb;
>> +    int fd;
>>       ssize_t ret;
>> +    off_t offset;
>>       size_t nbytes;
>>       QEMUIOVector *qiov;
>> -    bool is_read;
>> +
>> +    int type;
> If you put fd and type next to each other, you'd avoid a hole in the
> struct.

And I assume pulling dev_max_batch forward, after qiov, yes.

>> +    BdrvRequestFlags flags;
>> +    uint64_t dev_max_batch;
>>       QSIMPLEQ_ENTRY(qemu_laiocb) next;
>>   };
>>   
>> @@ -87,7 +92,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
>>               ret = 0;
>>           } else if (ret >= 0) {
>>               /* Short reads mean EOF, pad with zeros. */
>> -            if (laiocb->is_read) {
>> +            if (laiocb->type == QEMU_AIO_READ) {
>>                   qemu_iovec_memset(laiocb->qiov, ret, 0,
>>                       laiocb->qiov->size - ret);
>>               } else {
>> @@ -367,23 +372,23 @@ static void laio_deferred_fn(void *opaque)
>>       }
>>   }
>>   
>> -static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
>> -                          int type, BdrvRequestFlags flags,
>> -                          uint64_t dev_max_batch)
>> +static int laio_do_submit(struct qemu_laiocb *laiocb)
>>   {
>>       LinuxAioState *s = laiocb->ctx;
>>       struct iocb *iocbs = &laiocb->iocb;
>>       QEMUIOVector *qiov = laiocb->qiov;
>> +    int fd = laiocb->fd;
>> +    off_t offset = laiocb->offset;
>>   
>> -    switch (type) {
>> +    switch (laiocb->type) {
>>       case QEMU_AIO_WRITE:
>>   #ifdef HAVE_IO_PREP_PWRITEV2
>>       {
>> -        int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
>> +        int laio_flags = (laiocb->flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
>>           io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
>>       }
>>   #else
>> -        assert(flags == 0);
>> +        assert(laiocb->flags == 0);
>>           io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
>>   #endif
>>           break;
>> @@ -399,7 +404,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
>>       /* Currently Linux kernel does not support other operations */
>>       default:
>>           fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
>> -                        __func__, type);
>> +                        __func__, laiocb->type);
>>           return -EIO;
>>       }
>>       io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
>> @@ -407,7 +412,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
>>       QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
>>       s->io_q.in_queue++;
>>       if (!s->io_q.blocked) {
>> -        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
>> +        if (s->io_q.in_queue >= laio_max_batch(s, laiocb->dev_max_batch)) {
>>               ioq_submit(s);
>>           } else {
>>               defer_call(laio_deferred_fn, s);
>> @@ -425,14 +430,18 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
>>       AioContext *ctx = qemu_get_current_aio_context();
>>       struct qemu_laiocb laiocb = {
>>           .co         = qemu_coroutine_self(),
>> -        .nbytes     = qiov ? qiov->size : 0,
>> +        .fd         = fd,
>> +        .offset     = offset,
>> +        .nbytes     = (qiov ? qiov->size : 0),
> Adding parentheses around the expression seems both unrelated and
> unnecessary.

True.  Sorry, artifact from a prior version, I think.

>>           .ctx        = aio_get_linux_aio(ctx),
>>           .ret        = -EINPROGRESS,
>> -        .is_read    = (type == QEMU_AIO_READ),
>>           .qiov       = qiov,
>> +        .type       = type,
>> +        .flags      = flags,
>> +        .dev_max_batch = dev_max_batch,
>>       };
>>   
>> -    ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
>> +    ret = laio_do_submit(&laiocb);
>>       if (ret < 0) {
>>           return ret;
>>       }
> Reviewed-by: Kevin Wolf <kwolf@redhat.com>

Thanks!

Hanna

Re: [PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb

Posted by Hanna Czenczek 2 weeks ago

On 23.03.26 18:02, Hanna Czenczek wrote:
> On 23.03.26 17:36, Kevin Wolf wrote:
>> Am 18.03.2026 um 16:32 hat Hanna Czenczek geschrieben:
>>> Put all request parameters into the qemu_laiocb struct, which will 
>>> allow
>>> re-submitting the tail of short reads/writes.
>>>
>>> Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
>>> ---
>>>   block/linux-aio.c | 35 ++++++++++++++++++++++-------------
>>>   1 file changed, 22 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/block/linux-aio.c b/block/linux-aio.c
>>> index 53c3e9af8a..1f25339dc9 100644
>>> --- a/block/linux-aio.c
>>> +++ b/block/linux-aio.c
>>> @@ -40,10 +40,15 @@ struct qemu_laiocb {
>>>       Coroutine *co;
>>>       LinuxAioState *ctx;
>>>       struct iocb iocb;
>>> +    int fd;
>>>       ssize_t ret;
>>> +    off_t offset;
>>>       size_t nbytes;
>>>       QEMUIOVector *qiov;
>>> -    bool is_read;
>>> +
>>> +    int type;
>> If you put fd and type next to each other, you'd avoid a hole in the
>> struct.
>
> And I assume pulling dev_max_batch forward, after qiov, yes.

Noticed while sending: I missed `.next` for some reason. So no reason to 
pull dev_max_batch forward, and I guess there will always be a hole, but 
I suppose it is still best to put all 32-bit ints next to each other 
(fd, type, flags) so filling that up in the future is more likely.

Hanna

>>> +    BdrvRequestFlags flags;
>>> +    uint64_t dev_max_batch;
>>>       QSIMPLEQ_ENTRY(qemu_laiocb) next;
>>>   };

Re: [PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb

Posted by Kevin Wolf 1 week, 6 days ago

Am 23.03.2026 um 18:04 hat Hanna Czenczek geschrieben:
> On 23.03.26 18:02, Hanna Czenczek wrote:
> > On 23.03.26 17:36, Kevin Wolf wrote:
> > > Am 18.03.2026 um 16:32 hat Hanna Czenczek geschrieben:
> > > > Put all request parameters into the qemu_laiocb struct, which
> > > > will allow
> > > > re-submitting the tail of short reads/writes.
> > > > 
> > > > Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
> > > > ---
> > > >   block/linux-aio.c | 35 ++++++++++++++++++++++-------------
> > > >   1 file changed, 22 insertions(+), 13 deletions(-)
> > > > 
> > > > diff --git a/block/linux-aio.c b/block/linux-aio.c
> > > > index 53c3e9af8a..1f25339dc9 100644
> > > > --- a/block/linux-aio.c
> > > > +++ b/block/linux-aio.c
> > > > @@ -40,10 +40,15 @@ struct qemu_laiocb {
> > > >       Coroutine *co;
> > > >       LinuxAioState *ctx;
> > > >       struct iocb iocb;
> > > > +    int fd;
> > > >       ssize_t ret;
> > > > +    off_t offset;
> > > >       size_t nbytes;
> > > >       QEMUIOVector *qiov;
> > > > -    bool is_read;
> > > > +
> > > > +    int type;
> > > If you put fd and type next to each other, you'd avoid a hole in the
> > > struct.
> > 
> > And I assume pulling dev_max_batch forward, after qiov, yes.
> 
> Noticed while sending: I missed `.next` for some reason. So no reason to
> pull dev_max_batch forward, and I guess there will always be a hole, but I
> suppose it is still best to put all 32-bit ints next to each other (fd,
> type, flags) so filling that up in the future is more likely.

Ah, yes, I missed that flags is only 32 bits, too.

I'm sure that things could still be rearranged to be more optimal, and
I'm also sure it's premature optimisation. Sorry for the noise.

Kevin

[PATCH for-11.0 1/3] linux-aio: Put all parameters into qemu_laiocb
[PATCH for-11.0 2/3] linux-aio: Resubmit tails of short reads/writes
[PATCH for-11.0 3/3] io-uring: Resubmit tails of short writes