We are gradually moving away from sector-based interfaces, towards
byte-based. Make the change for the last few sector-based callbacks
in the rbd driver.
Note that the driver was already using byte-based calls for
performing actual I/O, so this just gets rid of a round trip
of scaling; however, as I don't know if RBD is tolerant of
non-sector AIO operations, I went with the conservate approach
of adding .bdrv_refresh_limits to override the block layer
defaults back to the pre-patch value of 512.
Signed-off-by: Eric Blake <eblake@redhat.com>
---
v2: override new block layer default alignment [Kevin]
---
block/rbd.c | 44 ++++++++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index c9359d0ad84..638ecf8d986 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -231,6 +231,13 @@ done:
}
+static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ /* XXX Does RBD support AIO on less than 512-byte alignment? */
+ bs->bl.request_alignment = 512;
+}
+
+
static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
Error **errp)
{
@@ -899,27 +906,23 @@ failed:
return NULL;
}
-static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
-{
- return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
- (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
- RBD_AIO_READ);
-}
-
-static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
+static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb,
void *opaque)
{
- return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
- (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+ return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
+ RBD_AIO_READ);
+}
+
+static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags,
+ BlockCompletionFunc *cb,
+ void *opaque)
+{
+ return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
RBD_AIO_WRITE);
}
@@ -1158,6 +1161,7 @@ static BlockDriver bdrv_rbd = {
.format_name = "rbd",
.instance_size = sizeof(BDRVRBDState),
.bdrv_parse_filename = qemu_rbd_parse_filename,
+ .bdrv_refresh_limits = qemu_rbd_refresh_limits,
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
@@ -1170,8 +1174,8 @@ static BlockDriver bdrv_rbd = {
.bdrv_truncate = qemu_rbd_truncate,
.protocol_name = "rbd",
- .bdrv_aio_readv = qemu_rbd_aio_readv,
- .bdrv_aio_writev = qemu_rbd_aio_writev,
+ .bdrv_aio_preadv = qemu_rbd_aio_preadv,
+ .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
.bdrv_aio_flush = qemu_rbd_aio_flush,
--
2.14.3
On Tue, Apr 24, 2018 at 3:25 PM, Eric Blake <eblake@redhat.com> wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based. Make the change for the last few sector-based callbacks
> in the rbd driver.
>
> Note that the driver was already using byte-based calls for
> performing actual I/O, so this just gets rid of a round trip
> of scaling; however, as I don't know if RBD is tolerant of
> non-sector AIO operations, I went with the conservate approach
> of adding .bdrv_refresh_limits to override the block layer
> defaults back to the pre-patch value of 512.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
>
> ---
> v2: override new block layer default alignment [Kevin]
> ---
> block/rbd.c | 44 ++++++++++++++++++++++++--------------------
> 1 file changed, 24 insertions(+), 20 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index c9359d0ad84..638ecf8d986 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -231,6 +231,13 @@ done:
> }
>
>
> +static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> +{
> + /* XXX Does RBD support AIO on less than 512-byte alignment? */
Yes, librbd internally supports 1-byte alignment for IO, but the
optimal alignment/length would be object size * stripe count.
> + bs->bl.request_alignment = 512;
> +}
> +
> +
> static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
> Error **errp)
> {
> @@ -899,27 +906,23 @@ failed:
> return NULL;
> }
>
> -static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
> - int64_t sector_num,
> - QEMUIOVector *qiov,
> - int nb_sectors,
> - BlockCompletionFunc *cb,
> - void *opaque)
> -{
> - return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
> - (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
> - RBD_AIO_READ);
> -}
> -
> -static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
> - int64_t sector_num,
> - QEMUIOVector *qiov,
> - int nb_sectors,
> +static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
> + uint64_t offset, uint64_t bytes,
> + QEMUIOVector *qiov, int flags,
> BlockCompletionFunc *cb,
> void *opaque)
> {
> - return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
> - (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
> + return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
> + RBD_AIO_READ);
> +}
> +
> +static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
> + uint64_t offset, uint64_t bytes,
> + QEMUIOVector *qiov, int flags,
> + BlockCompletionFunc *cb,
> + void *opaque)
> +{
> + return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
> RBD_AIO_WRITE);
> }
>
> @@ -1158,6 +1161,7 @@ static BlockDriver bdrv_rbd = {
> .format_name = "rbd",
> .instance_size = sizeof(BDRVRBDState),
> .bdrv_parse_filename = qemu_rbd_parse_filename,
> + .bdrv_refresh_limits = qemu_rbd_refresh_limits,
> .bdrv_file_open = qemu_rbd_open,
> .bdrv_close = qemu_rbd_close,
> .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
> @@ -1170,8 +1174,8 @@ static BlockDriver bdrv_rbd = {
> .bdrv_truncate = qemu_rbd_truncate,
> .protocol_name = "rbd",
>
> - .bdrv_aio_readv = qemu_rbd_aio_readv,
> - .bdrv_aio_writev = qemu_rbd_aio_writev,
> + .bdrv_aio_preadv = qemu_rbd_aio_preadv,
> + .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
>
> #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
> .bdrv_aio_flush = qemu_rbd_aio_flush,
> --
> 2.14.3
>
>
--
Jason
Am 24.04.2018 um 21:53 hat Jason Dillaman geschrieben:
> On Tue, Apr 24, 2018 at 3:25 PM, Eric Blake <eblake@redhat.com> wrote:
> > We are gradually moving away from sector-based interfaces, towards
> > byte-based. Make the change for the last few sector-based callbacks
> > in the rbd driver.
> >
> > Note that the driver was already using byte-based calls for
> > performing actual I/O, so this just gets rid of a round trip
> > of scaling; however, as I don't know if RBD is tolerant of
> > non-sector AIO operations, I went with the conservate approach
> > of adding .bdrv_refresh_limits to override the block layer
> > defaults back to the pre-patch value of 512.
> >
> > Signed-off-by: Eric Blake <eblake@redhat.com>
> >
> > ---
> > v2: override new block layer default alignment [Kevin]
> > ---
> > block/rbd.c | 44 ++++++++++++++++++++++++--------------------
> > 1 file changed, 24 insertions(+), 20 deletions(-)
> >
> > diff --git a/block/rbd.c b/block/rbd.c
> > index c9359d0ad84..638ecf8d986 100644
> > --- a/block/rbd.c
> > +++ b/block/rbd.c
> > @@ -231,6 +231,13 @@ done:
> > }
> >
> >
> > +static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> > +{
> > + /* XXX Does RBD support AIO on less than 512-byte alignment? */
>
> Yes, librbd internally supports 1-byte alignment for IO, but the
> optimal alignment/length would be object size * stripe count.
Would you like to post a follow-up patch to this series that removes the
.bdrv_refresh_limits implementation again with a commit message
explaining that RBD does support byte alignment?
Kevin
On 04/24/2018 02:25 PM, Eric Blake wrote: > We are gradually moving away from sector-based interfaces, towards > byte-based. Make the change for the last few sector-based callbacks > in the rbd driver. > > Note that the driver was already using byte-based calls for > performing actual I/O, so this just gets rid of a round trip > of scaling; however, as I don't know if RBD is tolerant of > non-sector AIO operations, I went with the conservate approach s/conservate/conservative/ > of adding .bdrv_refresh_limits to override the block layer > defaults back to the pre-patch value of 512. > > Signed-off-by: Eric Blake <eblake@redhat.com> > -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
© 2016 - 2026 Red Hat, Inc.