Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 31 ++++++++++++++++++++++++++++++-
1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/block/rbd.c b/block/rbd.c
index 2d77d0007f..27b4404adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@ typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
RBD_AIO_DISCARD,
- RBD_AIO_FLUSH
+ RBD_AIO_FLUSH,
+ RBD_AIO_WRITE_ZEROES
} RBDAIOCmd;
typedef struct BDRVRBDState {
@@ -221,8 +222,12 @@ done:
static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
+ BDRVRBDState *s = bs->opaque;
/* XXX Does RBD support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ bs->bl.pwrite_zeroes_alignment = s->object_size;
+#endif
}
@@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
s->aio_context = bdrv_get_aio_context(bs);
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+#endif
/* When extending regular files, we get zeros from the OS */
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
@@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
case RBD_AIO_FLUSH:
r = rbd_aio_flush(s->image, c);
break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ case RBD_AIO_WRITE_ZEROES:
+ r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
+ break;
+#endif
default:
r = -EINVAL;
}
@@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
}
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+ return -ENOTSUP;
+ }
+ return qemu_rbd_start_co(bs, offset, count, NULL, flags,
+ RBD_AIO_WRITE_ZEROES);
+}
+#endif
+
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVRBDState *s = bs->opaque;
@@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = {
.bdrv_co_pwritev = qemu_rbd_co_pwritev,
.bdrv_co_flush_to_disk = qemu_rbd_co_flush,
.bdrv_co_pdiscard = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+ .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
+#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
--
2.17.1
On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
> block/rbd.c | 31 ++++++++++++++++++++++++++++++-
> 1 file changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 2d77d0007f..27b4404adf 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -63,7 +63,8 @@ typedef enum {
> RBD_AIO_READ,
> RBD_AIO_WRITE,
> RBD_AIO_DISCARD,
> - RBD_AIO_FLUSH
> + RBD_AIO_FLUSH,
> + RBD_AIO_WRITE_ZEROES
> } RBDAIOCmd;
>
> typedef struct BDRVRBDState {
> @@ -221,8 +222,12 @@ done:
>
> static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> {
> + BDRVRBDState *s = bs->opaque;
> /* XXX Does RBD support AIO on less than 512-byte alignment? */
> bs->bl.request_alignment = 512;
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> + bs->bl.pwrite_zeroes_alignment = s->object_size;
The optimal alignment is 512 bytes -- but it can safely work just fine
down to 1 byte alignment since it will pad the request with additional
writes if needed.
> +#endif
> }
>
>
> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> }
>
> s->aio_context = bdrv_get_aio_context(bs);
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> +#endif
>
> /* When extending regular files, we get zeros from the OS */
> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
> case RBD_AIO_FLUSH:
> r = rbd_aio_flush(s->image, c);
> break;
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> + case RBD_AIO_WRITE_ZEROES:
> + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
> + break;
> +#endif
> default:
> r = -EINVAL;
> }
> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
> return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
> }
>
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +static int
> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
> + int count, BdrvRequestFlags flags)
> +{
> + if (!(flags & BDRV_REQ_MAY_UNMAP)) {
> + return -ENOTSUP;
> + }
There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
use to optionally disable unmap.
> + return qemu_rbd_start_co(bs, offset, count, NULL, flags,
> + RBD_AIO_WRITE_ZEROES);
> +}
> +#endif
> +
> static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
> {
> BDRVRBDState *s = bs->opaque;
> @@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = {
> .bdrv_co_pwritev = qemu_rbd_co_pwritev,
> .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
> .bdrv_co_pdiscard = qemu_rbd_co_pdiscard,
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> + .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
> +#endif
>
> .bdrv_snapshot_create = qemu_rbd_snap_create,
> .bdrv_snapshot_delete = qemu_rbd_snap_remove,
> --
> 2.17.1
>
>
--
Jason
Am 14.01.21 um 20:19 schrieb Jason Dillaman:
> On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/rbd.c | 31 ++++++++++++++++++++++++++++++-
>> 1 file changed, 30 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 2d77d0007f..27b4404adf 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -63,7 +63,8 @@ typedef enum {
>> RBD_AIO_READ,
>> RBD_AIO_WRITE,
>> RBD_AIO_DISCARD,
>> - RBD_AIO_FLUSH
>> + RBD_AIO_FLUSH,
>> + RBD_AIO_WRITE_ZEROES
>> } RBDAIOCmd;
>>
>> typedef struct BDRVRBDState {
>> @@ -221,8 +222,12 @@ done:
>>
>> static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
>> {
>> + BDRVRBDState *s = bs->opaque;
>> /* XXX Does RBD support AIO on less than 512-byte alignment? */
>> bs->bl.request_alignment = 512;
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> + bs->bl.pwrite_zeroes_alignment = s->object_size;
> The optimal alignment is 512 bytes -- but it can safely work just fine
> down to 1 byte alignment since it will pad the request with additional
> writes if needed.
Okay and this will likely be faster than having Qemu doing that request split, right?
If we drop the alignment hint Qemu will pass the original request.
>
>> +#endif
>> }
>>
>>
>> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>> }
>>
>> s->aio_context = bdrv_get_aio_context(bs);
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
>> +#endif
>>
>> /* When extending regular files, we get zeros from the OS */
>> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
>> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
>> case RBD_AIO_FLUSH:
>> r = rbd_aio_flush(s->image, c);
>> break;
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> + case RBD_AIO_WRITE_ZEROES:
>> + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
>> + break;
>> +#endif
>> default:
>> r = -EINVAL;
>> }
>> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
>> return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
>> }
>>
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> +static int
>> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
>> + int count, BdrvRequestFlags flags)
>> +{
>> + if (!(flags & BDRV_REQ_MAY_UNMAP)) {
>> + return -ENOTSUP;
>> + }
> There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
> use to optionally disable unmap.
I have seen that. If you want I can support for this, too. But afaik this
is only available since Octopus release?
Peter
On Thu, Jan 14, 2021 at 2:41 PM Peter Lieven <pl@kamp.de> wrote:
>
> Am 14.01.21 um 20:19 schrieb Jason Dillaman:
> > On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >> block/rbd.c | 31 ++++++++++++++++++++++++++++++-
> >> 1 file changed, 30 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index 2d77d0007f..27b4404adf 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -63,7 +63,8 @@ typedef enum {
> >> RBD_AIO_READ,
> >> RBD_AIO_WRITE,
> >> RBD_AIO_DISCARD,
> >> - RBD_AIO_FLUSH
> >> + RBD_AIO_FLUSH,
> >> + RBD_AIO_WRITE_ZEROES
> >> } RBDAIOCmd;
> >>
> >> typedef struct BDRVRBDState {
> >> @@ -221,8 +222,12 @@ done:
> >>
> >> static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> >> {
> >> + BDRVRBDState *s = bs->opaque;
> >> /* XXX Does RBD support AIO on less than 512-byte alignment? */
> >> bs->bl.request_alignment = 512;
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> + bs->bl.pwrite_zeroes_alignment = s->object_size;
> > The optimal alignment is 512 bytes -- but it can safely work just fine
> > down to 1 byte alignment since it will pad the request with additional
> > writes if needed.
>
>
> Okay and this will likely be faster than having Qemu doing that request split, right?
>
> If we drop the alignment hint Qemu will pass the original request.
>
>
> >
> >> +#endif
> >> }
> >>
> >>
> >> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> >> }
> >>
> >> s->aio_context = bdrv_get_aio_context(bs);
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> >> +#endif
> >>
> >> /* When extending regular files, we get zeros from the OS */
> >> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
> >> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
> >> case RBD_AIO_FLUSH:
> >> r = rbd_aio_flush(s->image, c);
> >> break;
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> + case RBD_AIO_WRITE_ZEROES:
> >> + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
> >> + break;
> >> +#endif
> >> default:
> >> r = -EINVAL;
> >> }
> >> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
> >> return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
> >> }
> >>
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> +static int
> >> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
> >> + int count, BdrvRequestFlags flags)
> >> +{
> >> + if (!(flags & BDRV_REQ_MAY_UNMAP)) {
> >> + return -ENOTSUP;
> >> + }
> > There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
> > use to optionally disable unmap.
>
>
> I have seen that. If you want I can support for this, too. But afaik this
>
> is only available since Octopus release?
True -- I didn't backport that support to Nautilus since it was a new
feature vs the bug-fix that the write-zeroes API was introduced to
fix.
>
> Peter
>
>
--
Jason
© 2016 - 2025 Red Hat, Inc.