[Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks

Eric Blake posted 6 patches 7 years, 7 months ago
There is a newer version of this series
[Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks
Posted by Eric Blake 7 years, 7 months ago
We are gradually moving away from sector-based interfaces, towards
byte-based.  Add new sector-based aio callbacks for read and write,
to match the fact that bdrv_aio_pdiscard is already byte-based.

Ideally, drivers should be converted to use coroutine callbacks
rather than aio; but that is not quite as trivial (if we do that
conversion, the null-aio driver will disappear), so for the
short term, converting the signature but keeping things with aio
is easier.  Once all drivers are converted, the sector-based aio
callbacks will be removed.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 include/block/block_int.h |  6 ++++++
 block/io.c                | 37 +++++++++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 5ae7738cf8d..c882dc4232d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -137,9 +137,15 @@ struct BlockDriver {
     BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockCompletionFunc *cb, void *opaque);
+    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
+        BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockCompletionFunc *cb, void *opaque);
+    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
+        BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
         BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
diff --git a/block/io.c b/block/io.c
index 4d3d1f640a3..84a4caa72b7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -934,9 +934,11 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
     sector_num = offset >> BDRV_SECTOR_BITS;
     nb_sectors = bytes >> BDRV_SECTOR_BITS;

-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+    if (!drv->bdrv_aio_preadv) {
+        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+    }

     if (drv->bdrv_co_readv) {
         return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
@@ -946,8 +948,13 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
             .coroutine = qemu_coroutine_self(),
         };

-        acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
-                                      bdrv_co_io_em_complete, &co);
+        if (drv->bdrv_aio_preadv) {
+            acb = bs->drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
+                                           bdrv_co_io_em_complete, &co);
+        } else {
+            acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+                                          bdrv_co_io_em_complete, &co);
+        }
         if (acb == NULL) {
             return -EIO;
         } else {
@@ -982,9 +989,11 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     sector_num = offset >> BDRV_SECTOR_BITS;
     nb_sectors = bytes >> BDRV_SECTOR_BITS;

-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+    if (!drv->bdrv_aio_pwritev) {
+        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+    }

     if (drv->bdrv_co_writev_flags) {
         ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
@@ -999,8 +1008,16 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
             .coroutine = qemu_coroutine_self(),
         };

-        acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
-                                       bdrv_co_io_em_complete, &co);
+        if (drv->bdrv_aio_pwritev) {
+            acb = bs->drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
+                                            flags & bs->supported_write_flags,
+                                            bdrv_co_io_em_complete, &co);
+            flags &= ~bs->supported_write_flags;
+        } else {
+            assert(!bs->supported_write_flags);
+            acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+                                           bdrv_co_io_em_complete, &co);
+        }
         if (acb == NULL) {
             ret = -EIO;
         } else {
-- 
2.14.3


Re: [Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks
Posted by Kevin Wolf 7 years, 5 months ago
Am 15.02.2018 um 20:28 hat Eric Blake geschrieben:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Add new sector-based aio callbacks for read and write,
> to match the fact that bdrv_aio_pdiscard is already byte-based.
> 
> Ideally, drivers should be converted to use coroutine callbacks
> rather than aio; but that is not quite as trivial (if we do that
> conversion, the null-aio driver will disappear), so for the
> short term, converting the signature but keeping things with aio
> is easier.  Once all drivers are converted, the sector-based aio
> callbacks will be removed.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  include/block/block_int.h |  6 ++++++
>  block/io.c                | 37 +++++++++++++++++++++++++++----------
>  2 files changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 5ae7738cf8d..c882dc4232d 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -137,9 +137,15 @@ struct BlockDriver {
>      BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
>          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>          BlockCompletionFunc *cb, void *opaque);
> +    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
> +        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
> +        BlockCompletionFunc *cb, void *opaque);
>      BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
>          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>          BlockCompletionFunc *cb, void *opaque);
> +    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
> +        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
> +        BlockCompletionFunc *cb, void *opaque);
>      BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
>          BlockCompletionFunc *cb, void *opaque);
>      BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
> diff --git a/block/io.c b/block/io.c
> index 4d3d1f640a3..84a4caa72b7 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -934,9 +934,11 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
>      sector_num = offset >> BDRV_SECTOR_BITS;
>      nb_sectors = bytes >> BDRV_SECTOR_BITS;
> 
> -    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> -    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> -    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
> +    if (!drv->bdrv_aio_preadv) {
> +        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> +        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> +        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
> +    }

Hm, this is kind of ugly. Previously, we handled everything byte-aligned
in the first section, now we mix both in the second section.

I can see that you do this so you don't have to duplicate the acb and
coroutine yielding code below, but can we move things into the right
place in the final patch at least? That is, calculate sector_num and
nb_sectors only if all the byte-based interfaces weren't available.

Kevin

Re: [Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks
Posted by Eric Blake 7 years, 5 months ago
On 04/24/2018 10:40 AM, Kevin Wolf wrote:
> Am 15.02.2018 um 20:28 hat Eric Blake geschrieben:
>> We are gradually moving away from sector-based interfaces, towards
>> byte-based.  Add new sector-based aio callbacks for read and write,
>> to match the fact that bdrv_aio_pdiscard is already byte-based.
>>
>> Ideally, drivers should be converted to use coroutine callbacks
>> rather than aio; but that is not quite as trivial (if we do that
>> conversion, the null-aio driver will disappear), so for the
>> short term, converting the signature but keeping things with aio
>> is easier.  Once all drivers are converted, the sector-based aio
>> callbacks will be removed.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>> ---
>>  include/block/block_int.h |  6 ++++++
>>  block/io.c                | 37 +++++++++++++++++++++++++++----------
>>  2 files changed, 33 insertions(+), 10 deletions(-)
>>

>> +++ b/block/io.c
>> @@ -934,9 +934,11 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
>>      sector_num = offset >> BDRV_SECTOR_BITS;
>>      nb_sectors = bytes >> BDRV_SECTOR_BITS;
>>
>> -    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
>> -    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
>> -    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
>> +    if (!drv->bdrv_aio_preadv) {
>> +        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
>> +        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
>> +        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
>> +    }
> 
> Hm, this is kind of ugly. Previously, we handled everything byte-aligned
> in the first section, now we mix both in the second section.
> 
> I can see that you do this so you don't have to duplicate the acb and
> coroutine yielding code below, but can we move things into the right
> place in the final patch at least? That is, calculate sector_num and
> nb_sectors only if all the byte-based interfaces weren't available.

Yeah, that's easy enough to squash into patch 6:

diff --git i/block/io.c w/block/io.c
index ba767612931..49fabe8eeb1 100644
--- i/block/io.c
+++ w/block/io.c
@@ -924,16 +924,13 @@ static int coroutine_fn
bdrv_driver_preadv(BlockDriverState *bs,
         return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
     }

-    sector_num = offset >> BDRV_SECTOR_BITS;
-    nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    if (!drv->bdrv_aio_preadv) {
-        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
-    }
-
     if (drv->bdrv_co_readv) {
+        sector_num = offset >> BDRV_SECTOR_BITS;
+        nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
         return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
     } else {
         BlockAIOCB *acb;

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks
Posted by Kevin Wolf 7 years, 5 months ago
Am 24.04.2018 um 19:06 hat Eric Blake geschrieben:
> On 04/24/2018 10:40 AM, Kevin Wolf wrote:
> > Am 15.02.2018 um 20:28 hat Eric Blake geschrieben:
> >> We are gradually moving away from sector-based interfaces, towards
> >> byte-based.  Add new sector-based aio callbacks for read and write,
> >> to match the fact that bdrv_aio_pdiscard is already byte-based.
> >>
> >> Ideally, drivers should be converted to use coroutine callbacks
> >> rather than aio; but that is not quite as trivial (if we do that
> >> conversion, the null-aio driver will disappear), so for the
> >> short term, converting the signature but keeping things with aio
> >> is easier.  Once all drivers are converted, the sector-based aio
> >> callbacks will be removed.
> >>
> >> Signed-off-by: Eric Blake <eblake@redhat.com>
> >> ---
> >>  include/block/block_int.h |  6 ++++++
> >>  block/io.c                | 37 +++++++++++++++++++++++++++----------
> >>  2 files changed, 33 insertions(+), 10 deletions(-)
> >>
> 
> >> +++ b/block/io.c
> >> @@ -934,9 +934,11 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
> >>      sector_num = offset >> BDRV_SECTOR_BITS;
> >>      nb_sectors = bytes >> BDRV_SECTOR_BITS;
> >>
> >> -    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> >> -    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> >> -    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
> >> +    if (!drv->bdrv_aio_preadv) {
> >> +        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> >> +        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> >> +        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
> >> +    }
> > 
> > Hm, this is kind of ugly. Previously, we handled everything byte-aligned
> > in the first section, now we mix both in the second section.
> > 
> > I can see that you do this so you don't have to duplicate the acb and
> > coroutine yielding code below, but can we move things into the right
> > place in the final patch at least? That is, calculate sector_num and
> > nb_sectors only if all the byte-based interfaces weren't available.
> 
> Yeah, that's easy enough to squash into patch 6:
> 
> diff --git i/block/io.c w/block/io.c
> index ba767612931..49fabe8eeb1 100644
> --- i/block/io.c
> +++ w/block/io.c
> @@ -924,16 +924,13 @@ static int coroutine_fn
> bdrv_driver_preadv(BlockDriverState *bs,
>          return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
>      }
> 
> -    sector_num = offset >> BDRV_SECTOR_BITS;
> -    nb_sectors = bytes >> BDRV_SECTOR_BITS;
> -
> -    if (!drv->bdrv_aio_preadv) {
> -        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> -        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> -        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
> -    }
> -
>      if (drv->bdrv_co_readv) {
> +        sector_num = offset >> BDRV_SECTOR_BITS;
> +        nb_sectors = bytes >> BDRV_SECTOR_BITS;
> +
> +        assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> +        assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> +        assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
>          return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
>      } else {
>          BlockAIOCB *acb;

Ah, yes. I thought of moving the code in the else block, but this works,
too. Maybe it's even a bit nicer.

Kevin
Re: [Qemu-devel] [PATCH 1/6] block: Support byte-based aio callbacks
Posted by Eric Blake 7 years, 5 months ago
On 04/24/2018 12:15 PM, Kevin Wolf wrote:

>>> Hm, this is kind of ugly. Previously, we handled everything byte-aligned
>>> in the first section, now we mix both in the second section.
>>>
>>> I can see that you do this so you don't have to duplicate the acb and
>>> coroutine yielding code below, but can we move things into the right
>>> place in the final patch at least? That is, calculate sector_num and
>>> nb_sectors only if all the byte-based interfaces weren't available.
>>
>> Yeah, that's easy enough to squash into patch 6:
>>

> 
> Ah, yes. I thought of moving the code in the else block, but this works,
> too. Maybe it's even a bit nicer.

Moving the code into the 'if' works for bdrv_co_readv, but not so nicely
for the bdrv_co_writev vs. bdrv_co_writev_flags.  So for v2, I'll just
hoist the aio code first; and I guess I smell another round of cleanups
coming...

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org