[PATCH v4 6/7] block/copy-before-write: implement cbw-timeout option

Vladimir Sementsov-Ogievskiy posted 7 patches 3 years, 10 months ago
Maintainers: John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
[PATCH v4 6/7] block/copy-before-write: implement cbw-timeout option
Posted by Vladimir Sementsov-Ogievskiy 3 years, 10 months ago
In some scenarios, when copy-before-write operations lasts too long
time, it's better to cancel it.

Most useful would be to use the new option together with
on-cbw-error=break-snapshot: this way if cbw operation takes too long
time we'll just cancel backup process but do not disturb the guest too
much.

Note the tricky point of realization: we keep additional point in
bs->in_flight during block_copy operation even if it's timed-out.
Background "cancelled" block_copy operations will finish at some point
and will want to access state. We should care to not free the state in
.bdrv_close() earlier.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
---
 qapi/block-core.json      |  8 +++++++-
 block/copy-before-write.c | 23 ++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6b870b2f37..682b599a4a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4206,12 +4206,18 @@
 # @on-cbw-error: Behavior on failure of copy-before-write operation.
 #                Default is @break-guest-write. (Since 7.1)
 #
+# @cbw-timeout: Zero means no limit. Non-zero sets the timeout in seconds
+#               for copy-before-write operation. When a timeout occurs,
+#               the respective copy-before-write operation will fail, and
+#               the @on-cbw-error parameter will decide how this failure
+#               is handled. Default 0. (Since 7.1)
+#
 # Since: 6.2
 ##
 { 'struct': 'BlockdevOptionsCbw',
   'base': 'BlockdevOptionsGenericFormat',
   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
-            '*on-cbw-error': 'OnCbwError' } }
+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
 
 ##
 # @BlockdevOptions:
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index fc13c7cd44..1bc2e7f9ba 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -42,6 +42,7 @@ typedef struct BDRVCopyBeforeWriteState {
     BlockCopyState *bcs;
     BdrvChild *target;
     OnCbwError on_cbw_error;
+    uint32_t cbw_timeout_ns;
 
     /*
      * @lock: protects access to @access_bitmap, @done_bitmap and
@@ -83,6 +84,14 @@ static coroutine_fn int cbw_co_preadv(
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
+static void block_copy_cb(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    bs->in_flight--;
+    aio_wait_kick();
+}
+
 /*
  * Do copy-before-write operation.
  *
@@ -111,7 +120,16 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
     off = QEMU_ALIGN_DOWN(offset, cluster_size);
     end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
 
-    ret = block_copy(s->bcs, off, end - off, true, 0, NULL, NULL);
+    /*
+     * Increase in_flight, so that in case of timed-out block-copy, the
+     * remaining background block_copy() request (which can't be immediately
+     * cancelled by timeout) is presented in bs->in_flight. This way we are
+     * sure that on bs close() we'll previously wait for all timed-out but yet
+     * running block_copy calls.
+     */
+    bs->in_flight++;
+    ret = block_copy(s->bcs, off, end - off, true, s->cbw_timeout_ns,
+                     block_copy_cb, bs);
     if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) {
         return ret;
     }
@@ -377,6 +395,7 @@ static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp)
      */
     qdict_extract_subqdict(options, NULL, "bitmap");
     qdict_del(options, "on-cbw-error");
+    qdict_del(options, "cbw-timeout");
 
 out:
     visit_free(v);
@@ -423,6 +442,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
     }
     s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
             ON_CBW_ERROR_BREAK_GUEST_WRITE;
+    s->cbw_timeout_ns = opts->has_cbw_timeout ?
+        opts->cbw_timeout * NANOSECONDS_PER_SECOND : 0;
 
     bs->total_sectors = bs->file->bs->total_sectors;
     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-- 
2.35.1
Re: [PATCH v4 6/7] block/copy-before-write: implement cbw-timeout option
Posted by Vladimir Sementsov-Ogievskiy 3 years, 7 months ago
While debugging my "[PULL 00/10] Block jobs & NBD patches", I found that we have bdrv_dec_in_flight() and bdrv_inc_in_flight().

So, this should be fixed:

On 4/7/22 16:27, Vladimir Sementsov-Ogievskiy wrote:
> In some scenarios, when copy-before-write operations lasts too long
> time, it's better to cancel it.
> 
> Most useful would be to use the new option together with
> on-cbw-error=break-snapshot: this way if cbw operation takes too long
> time we'll just cancel backup process but do not disturb the guest too
> much.
> 

[..]

>   
> +static void block_copy_cb(void *opaque)
> +{
> +    BlockDriverState *bs = opaque;
> +
> +    bs->in_flight--;
> +    aio_wait_kick();

Just bdrv_dec_in_flight(bs), which includes aio_wait_kick().

> +}
> +
>   /*
>    * Do copy-before-write operation.
>    *
> @@ -111,7 +120,16 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
>       off = QEMU_ALIGN_DOWN(offset, cluster_size);
>       end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
>   
> -    ret = block_copy(s->bcs, off, end - off, true, 0, NULL, NULL);
> +    /*
> +     * Increase in_flight, so that in case of timed-out block-copy, the
> +     * remaining background block_copy() request (which can't be immediately
> +     * cancelled by timeout) is presented in bs->in_flight. This way we are
> +     * sure that on bs close() we'll previously wait for all timed-out but yet
> +     * running block_copy calls.
> +     */
> +    bs->in_flight++;

bdrv_inc_in_flight(bs)

> +    ret = block_copy(s->bcs, off, end - off, true, s->cbw_timeout_ns,
> +                     block_copy_cb, bs);
>       if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) {
>           return ret;
>       }
> @@ -377,6 +395,7 @@ static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp)
>        */
>       qdict_extract_subqdict(options, NULL, "bitmap");
>       qdict_del(options, "on-cbw-error");
> +    qdict_del(options, "cbw-timeout");
>   

I'm going to resend "[PULL 00/10] Block jobs & NBD patches" with this fix and with fix in 03, if no objections.

-- 
Best regards,
Vladimir