[v6] block: Deal with filters

[Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Max Reitz 6 years, 6 months ago

This includes some permission limiting (for example, we only need to
take the RESIZE permission for active commits where the base is smaller
than the top).

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
 blockdev.c     |  47 +++++++++++++++++---
 2 files changed, 131 insertions(+), 33 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 54bafdf176..6ddbfb9708 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
     BlockBackend *target;
     BlockDriverState *mirror_top_bs;
     BlockDriverState *base;
+    BlockDriverState *base_overlay;
 
     /* The name of the graph node to replace */
     char *replaces;
@@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
                              &error_abort);
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target_bs) != backing) {
-            bdrv_set_backing_hd(target_bs, backing, &local_err);
+        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
+
+        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
+            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
             if (local_err) {
                 error_report_err(local_err);
                 ret = -EPERM;
@@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
      * valid.
      */
     block_job_remove_all_bdrv(bjob);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
     /* We just changed the BDS the job BB refers to (with either or both of the
      * bdrv_replace_node() calls), so switch the BB back so the cleanup does
@@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
+        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
+                                      &count);
         if (ret < 0) {
             return ret;
         }
@@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     } else {
         s->target_cluster_size = BDRV_SECTOR_SIZE;
     }
-    if (backing_filename[0] && !target_bs->backing &&
+    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
         s->granularity < s->target_cluster_size) {
         s->buf_size = MAX(s->buf_size, s->target_cluster_size);
         s->cow_bitmap = bitmap_new(length);
@@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
     if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
         int ret;
 
-        assert(!target->backing);
-        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+        assert(!bdrv_backing_chain_next(target));
+        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
+                                     "backing", errp);
         if (ret < 0) {
             return;
         }
@@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
     MirrorBlockJob *s;
     MirrorBDSOpaque *bs_opaque;
     BlockDriverState *mirror_top_bs;
-    bool target_graph_mod;
     bool target_is_backing;
+    uint64_t target_perms, target_shared_perms;
     Error *local_err = NULL;
     int ret;
 
@@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    if (bs == target) {
+    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
         error_setg(errp, "Can't mirror node into itself");
         return NULL;
     }
@@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
      * In the case of active commit, things look a bit different, though,
      * because the target is an already populated backing file in active use.
      * We can allow anything except resize there.*/
+
+    target_perms = BLK_PERM_WRITE;
+    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
+
     target_is_backing = bdrv_chain_contains(bs, target);
-    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+    if (target_is_backing) {
+        int64_t bs_size, target_size;
+        bs_size = bdrv_getlength(bs);
+        if (bs_size < 0) {
+            error_setg_errno(errp, -bs_size,
+                             "Could not inquire top image size");
+            goto fail;
+        }
+
+        target_size = bdrv_getlength(target);
+        if (target_size < 0) {
+            error_setg_errno(errp, -target_size,
+                             "Could not inquire base image size");
+            goto fail;
+        }
+
+        if (target_size < bs_size) {
+            target_perms |= BLK_PERM_RESIZE;
+        }
+
+        target_shared_perms |= BLK_PERM_CONSISTENT_READ
+                            |  BLK_PERM_WRITE
+                            |  BLK_PERM_GRAPH_MOD;
+    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
+        /*
+         * We may want to allow this in the future, but it would
+         * require taking some extra care.
+         */
+        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
+                   "source's backing chain");
+        goto fail;
+    }
+
+    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
+        target_perms |= BLK_PERM_GRAPH_MOD;
+    }
+
     s->target = blk_new(s->common.job.aio_context,
-                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
-                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
-                        BLK_PERM_WRITE_UNCHANGED |
-                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
-                                             BLK_PERM_WRITE |
-                                             BLK_PERM_GRAPH_MOD : 0));
+                        target_perms, target_shared_perms);
     ret = blk_insert_bs(s->target, target, errp);
     if (ret < 0) {
         goto fail;
@@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
     s->backing_mode = backing_mode;
     s->copy_mode = copy_mode;
     s->base = base;
+    s->base_overlay = bdrv_find_overlay(bs, base);
     s->granularity = granularity;
     s->buf_size = ROUND_UP(buf_size, granularity);
     s->unmap = unmap;
@@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
     /* In commit_active_start() all intermediate nodes disappear, so
      * any jobs in them must be blocked */
     if (target_is_backing) {
-        BlockDriverState *iter;
-        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
-             * ourselves at s->base (if writes are blocked for a node, they are
-             * also blocked for its backing file). The other options would be a
-             * second filter driver above s->base (== target). */
+        BlockDriverState *iter, *filtered_target;
+        uint64_t iter_shared_perms;
+
+        /*
+         * The topmost node with
+         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
+         */
+        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
+
+        assert(bdrv_skip_rw_filters(filtered_target) ==
+               bdrv_skip_rw_filters(target));
+
+        /*
+         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
+         * ourselves at s->base (if writes are blocked for a node, they are
+         * also blocked for its backing file). The other options would be a
+         * second filter driver above s->base (== target).
+         */
+        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
+
+        for (iter = bdrv_filtered_bs(bs); iter != target;
+             iter = bdrv_filtered_bs(iter))
+        {
+            if (iter == filtered_target) {
+                /*
+                 * From here on, all nodes are filters on the base.
+                 * This allows us to share BLK_PERM_CONSISTENT_READ.
+                 */
+                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
+            }
+
             ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
-                                     errp);
+                                     iter_shared_perms, errp);
             if (ret < 0) {
                 goto fail;
             }
@@ -1737,7 +1802,7 @@ fail:
     bs_opaque->stop = true;
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
     bdrv_unref(mirror_top_bs);
 
@@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
         return;
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
+    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
     mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode,
                      on_source_error, on_target_error, unmap, NULL, NULL,
diff --git a/blockdev.c b/blockdev.c
index c540802127..c451f553f7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
+    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
@@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs, *unfiltered_bs;
     BlockDriverState *source, *target_bs;
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode;
@@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     int flags;
     int64_t size;
     const char *format = arg->format;
+    const char *replaces_node_name = NULL;
     int ret;
 
     bs = qmp_get_root_bs(arg->device, errp);
@@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         return;
     }
 
+    /*
+     * If the user has not instructed us otherwise, we should let the
+     * block job run from @bs (thus taking into account all filters on
+     * it) but replace @unfiltered_bs when it finishes (thus not
+     * removing those filters).
+     * (And if there are any explicit filters, we should assume the
+     *  user knows how to use the @replaces option.)
+     */
+    unfiltered_bs = bdrv_skip_implicit_filters(bs);
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
@@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     flags = bs->open_flags | BDRV_O_RDWR;
-    source = backing_bs(bs);
+    source = bdrv_filtered_cow_bs(unfiltered_bs);
     if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
+        if (bdrv_filtered_bs(unfiltered_bs)) {
+            /* @unfiltered_bs is an explicit filter */
+            error_setg(errp, "Cannot perform sync=top mirror through an "
+                       "explicitly added filter node on the source");
+            goto out;
+        }
         arg->sync = MIRROR_SYNC_MODE_FULL;
     }
     if (arg->sync == MIRROR_SYNC_MODE_NONE) {
@@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                              " named node of the graph");
             goto out;
         }
+        replaces_node_name = arg->replaces;
+    } else if (unfiltered_bs != bs) {
+        replaces_node_name = unfiltered_bs->node_name;
     }
 
     if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
@@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         bdrv_img_create(arg->target, format,
                         NULL, NULL, NULL, size, flags, false, &local_err);
     } else {
+        /* Implicit filters should not appear in the filename */
+        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
+
         switch (arg->mode) {
         case NEW_IMAGE_MODE_EXISTING:
             break;
@@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
             /* create new image with backing file */
             bdrv_refresh_filename(source);
             bdrv_img_create(arg->target, format,
-                            source->filename,
-                            source->drv->format_name,
+                            explicit_backing->filename,
+                            explicit_backing->drv->format_name,
                             NULL, size, flags, false, &local_err);
             break;
         default:
@@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
-                           arg->has_replaces, arg->replaces, arg->sync,
+                           !!replaces_node_name, replaces_node_name, arg->sync,
                            backing_mode, arg->has_speed, arg->speed,
                            arg->has_granularity, arg->granularity,
                            arg->has_buf_size, arg->buf_size,
@@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          bool has_auto_dismiss, bool auto_dismiss,
                          Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs, *unfiltered_bs;
     BlockDriverState *target_bs;
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
@@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
         return;
     }
 
+    /*
+     * Same as in qmp_drive_mirror(): We want to run the job from @bs,
+     * but we want to replace @unfiltered_bs on completion.
+     */
+    unfiltered_bs = bdrv_skip_implicit_filters(bs);
+    if (!has_replaces && unfiltered_bs != bs) {
+        replaces = unfiltered_bs->node_name;
+        has_replaces = true;
+    }
+
     target_bs = bdrv_lookup_bs(target, target, errp);
     if (!target_bs) {
         return;
-- 
2.21.0

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Vladimir Sementsov-Ogievskiy 6 years, 5 months ago

09.08.2019 19:13, Max Reitz wrote:
> This includes some permission limiting (for example, we only need to
> take the RESIZE permission for active commits where the base is smaller
> than the top).
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>   block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>   blockdev.c     |  47 +++++++++++++++++---
>   2 files changed, 131 insertions(+), 33 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index 54bafdf176..6ddbfb9708 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>       BlockBackend *target;
>       BlockDriverState *mirror_top_bs;
>       BlockDriverState *base;
> +    BlockDriverState *base_overlay;
>   
>       /* The name of the graph node to replace */
>       char *replaces;
> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>                                &error_abort);
>       if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>           BlockDriverState *backing = s->is_none_mode ? src : s->base;
> -        if (backing_bs(target_bs) != backing) {
> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
> +
> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>               if (local_err) {
>                   error_report_err(local_err);
>                   ret = -EPERM;
> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>        * valid.
>        */
>       block_job_remove_all_bdrv(bjob);
> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>   
>       /* We just changed the BDS the job BB refers to (with either or both of the
>        * bdrv_replace_node() calls), so switch the BB back so the cleanup does
> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>               return 0;
>           }
>   
> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
> +                                      &count);
>           if (ret < 0) {
>               return ret;
>           }
> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>       } else {
>           s->target_cluster_size = BDRV_SECTOR_SIZE;
>       }
> -    if (backing_filename[0] && !target_bs->backing &&
> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>           s->granularity < s->target_cluster_size) {
>           s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>           s->cow_bitmap = bitmap_new(length);
> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>       if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>           int ret;
>   
> -        assert(!target->backing);
> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
> +        assert(!bdrv_backing_chain_next(target));

Preexisting, but seems we may crash here, I don't see where it is checked before, to
return error if there is some backing. And even if we do so, we don't prevent appearing
of target backing during mirror operation.

> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
> +                                     "backing", errp);
>           if (ret < 0) {
>               return;
>           }
> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>       MirrorBlockJob *s;
>       MirrorBDSOpaque *bs_opaque;
>       BlockDriverState *mirror_top_bs;
> -    bool target_graph_mod;
>       bool target_is_backing;
> +    uint64_t target_perms, target_shared_perms;
>       Error *local_err = NULL;
>       int ret;
>   
> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>           buf_size = DEFAULT_MIRROR_BUF_SIZE;
>       }
>   
> -    if (bs == target) {
> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>           error_setg(errp, "Can't mirror node into itself");
>           return NULL;
>       }
> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>        * In the case of active commit, things look a bit different, though,
>        * because the target is an already populated backing file in active use.
>        * We can allow anything except resize there.*/
> +
> +    target_perms = BLK_PERM_WRITE;
> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
> +
>       target_is_backing = bdrv_chain_contains(bs, target);
> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
> +    if (target_is_backing) {
> +        int64_t bs_size, target_size;

<empty after definitions>

> +        bs_size = bdrv_getlength(bs);
> +        if (bs_size < 0) {
> +            error_setg_errno(errp, -bs_size,
> +                             "Could not inquire top image size");
> +            goto fail;
> +        }
> +
> +        target_size = bdrv_getlength(target);
> +        if (target_size < 0) {
> +            error_setg_errno(errp, -target_size,
> +                             "Could not inquire base image size");
> +            goto fail;
> +        }
> +
> +        if (target_size < bs_size) {
> +            target_perms |= BLK_PERM_RESIZE;
> +        }
> +
> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
> +                            |  BLK_PERM_WRITE
> +                            |  BLK_PERM_GRAPH_MOD;
> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
> +        /*
> +         * We may want to allow this in the future, but it would
> +         * require taking some extra care.
> +         */
> +        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
> +                   "source's backing chain");
> +        goto fail;
> +    }
> +
> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
> +        target_perms |= BLK_PERM_GRAPH_MOD;
> +    }
> +
>       s->target = blk_new(s->common.job.aio_context,
> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
> -                        BLK_PERM_WRITE_UNCHANGED |
> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
> -                                             BLK_PERM_WRITE |
> -                                             BLK_PERM_GRAPH_MOD : 0));
> +                        target_perms, target_shared_perms);
>       ret = blk_insert_bs(s->target, target, errp);
>       if (ret < 0) {
>           goto fail;
> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>       s->backing_mode = backing_mode;
>       s->copy_mode = copy_mode;
>       s->base = base;
> +    s->base_overlay = bdrv_find_overlay(bs, base);
>       s->granularity = granularity;
>       s->buf_size = ROUND_UP(buf_size, granularity);
>       s->unmap = unmap;
> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>       /* In commit_active_start() all intermediate nodes disappear, so
>        * any jobs in them must be blocked */
>       if (target_is_backing) {
> -        BlockDriverState *iter;
> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
> -             * ourselves at s->base (if writes are blocked for a node, they are
> -             * also blocked for its backing file). The other options would be a
> -             * second filter driver above s->base (== target). */
> +        BlockDriverState *iter, *filtered_target;
> +        uint64_t iter_shared_perms;
> +
> +        /*
> +         * The topmost node with
> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
> +         */
> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
> +
> +        assert(bdrv_skip_rw_filters(filtered_target) ==
> +               bdrv_skip_rw_filters(target));
> +
> +        /*
> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
> +         * ourselves at s->base (if writes are blocked for a node, they are
> +         * also blocked for its backing file). The other options would be a
> +         * second filter driver above s->base (== target).
> +         */
> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
> +
> +        for (iter = bdrv_filtered_bs(bs); iter != target;
> +             iter = bdrv_filtered_bs(iter))
> +        {
> +            if (iter == filtered_target) {
> +                /*
> +                 * From here on, all nodes are filters on the base.
> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.

I'd prefere to add something like: "because we share it on target (see target BlockBackend creation
and corresponding comment above)".

> +                 */
> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
> +            }
> +
>               ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
> -                                     errp);
> +                                     iter_shared_perms, errp);
>               if (ret < 0) {
>                   goto fail;
>               }
> @@ -1737,7 +1802,7 @@ fail:
>       bs_opaque->stop = true;
>       bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>                                &error_abort);
> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>   
>       bdrv_unref(mirror_top_bs);
>   
> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
>           return;
>       }
>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
>       mirror_start_job(job_id, bs, creation_flags, target, replaces,
>                        speed, granularity, buf_size, backing_mode,
>                        on_source_error, on_target_error, unmap, NULL, NULL,
> diff --git a/blockdev.c b/blockdev.c
> index c540802127..c451f553f7 100644


block/mirroc.c is OK for me. Continue with blockdev.c...

> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>           return;
>       }
>   
> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>           sync = MIRROR_SYNC_MODE_FULL;
>       }
>   
> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>   
>   void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>   {
> -    BlockDriverState *bs;
> +    BlockDriverState *bs, *unfiltered_bs;
>       BlockDriverState *source, *target_bs;
>       AioContext *aio_context;
>       BlockMirrorBackingMode backing_mode;
> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>       int flags;
>       int64_t size;
>       const char *format = arg->format;
> +    const char *replaces_node_name = NULL;
>       int ret;
>   
>       bs = qmp_get_root_bs(arg->device, errp);
> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>           return;
>       }
>   
> +    /*
> +     * If the user has not instructed us otherwise, we should let the
> +     * block job run from @bs (thus taking into account all filters on
> +     * it) but replace @unfiltered_bs when it finishes (thus not
> +     * removing those filters).
> +     * (And if there are any explicit filters, we should assume the
> +     *  user knows how to use the @replaces option.)
> +     */
> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
> +
>       aio_context = bdrv_get_aio_context(bs);
>       aio_context_acquire(aio_context);
>   
> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>       }
>   
>       flags = bs->open_flags | BDRV_O_RDWR;
> -    source = backing_bs(bs);
> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>       if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {


Hmm, you handle this case a bit differently here and in blockdev_mirror_common..
Can we handle it only in blockdev_mirror_common, to be consistent with qmp_blockdev_mirror?

> +        if (bdrv_filtered_bs(unfiltered_bs)) {
> +            /* @unfiltered_bs is an explicit filter */
> +            error_setg(errp, "Cannot perform sync=top mirror through an "
> +                       "explicitly added filter node on the source");
> +            goto out;
> +        }
>           arg->sync = MIRROR_SYNC_MODE_FULL;
>       }
>       if (arg->sync == MIRROR_SYNC_MODE_NONE) {
> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>                                " named node of the graph");
>               goto out;
>           }
> +        replaces_node_name = arg->replaces;
> +    } else if (unfiltered_bs != bs) {
> +        replaces_node_name = unfiltered_bs->node_name;
>       }
>   
>       if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>           bdrv_img_create(arg->target, format,
>                           NULL, NULL, NULL, size, flags, false, &local_err);
>       } else {
> +        /* Implicit filters should not appear in the filename */
> +        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
> +
>           switch (arg->mode) {
>           case NEW_IMAGE_MODE_EXISTING:
>               break;
> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>               /* create new image with backing file */
>               bdrv_refresh_filename(source);
>               bdrv_img_create(arg->target, format,
> -                            source->filename,
> -                            source->drv->format_name,
> +                            explicit_backing->filename,
> +                            explicit_backing->drv->format_name,
>                               NULL, size, flags, false, &local_err);
>               break;
>           default:
> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>       }
>   
>       blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
> -                           arg->has_replaces, arg->replaces, arg->sync,
> +                           !!replaces_node_name, replaces_node_name, arg->sync,
>                              backing_mode, arg->has_speed, arg->speed,
>                              arg->has_granularity, arg->granularity,
>                              arg->has_buf_size, arg->buf_size,
> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>                            bool has_auto_dismiss, bool auto_dismiss,
>                            Error **errp)
>   {
> -    BlockDriverState *bs;
> +    BlockDriverState *bs, *unfiltered_bs;
>       BlockDriverState *target_bs;
>       AioContext *aio_context;
>       BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>           return;
>       }
>   
> +    /*
> +     * Same as in qmp_drive_mirror():

Then, may be better do it in blockdev_mirror_common ?

> We want to run the job from @bs,
> +     * but we want to replace @unfiltered_bs on completion.
> +     */
> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
> +    if (!has_replaces && unfiltered_bs != bs) {
> +        replaces = unfiltered_bs->node_name;
> +        has_replaces = true;
> +    }
> +
>       target_bs = bdrv_lookup_bs(target, target, errp);
>       if (!target_bs) {
>           return;
> 


-- 
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Max Reitz 6 years, 5 months ago

On 31.08.19 11:57, Vladimir Sementsov-Ogievskiy wrote:
> 09.08.2019 19:13, Max Reitz wrote:
>> This includes some permission limiting (for example, we only need to
>> take the RESIZE permission for active commits where the base is smaller
>> than the top).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>   blockdev.c     |  47 +++++++++++++++++---
>>   2 files changed, 131 insertions(+), 33 deletions(-)
>>
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 54bafdf176..6ddbfb9708 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>>       BlockBackend *target;
>>       BlockDriverState *mirror_top_bs;
>>       BlockDriverState *base;
>> +    BlockDriverState *base_overlay;
>>   
>>       /* The name of the graph node to replace */
>>       char *replaces;
>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>>                                &error_abort);
>>       if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>>           BlockDriverState *backing = s->is_none_mode ? src : s->base;
>> -        if (backing_bs(target_bs) != backing) {
>> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
>> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
>> +
>> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
>> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>>               if (local_err) {
>>                   error_report_err(local_err);
>>                   ret = -EPERM;
>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>>        * valid.
>>        */
>>       block_job_remove_all_bdrv(bjob);
>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>   
>>       /* We just changed the BDS the job BB refers to (with either or both of the
>>        * bdrv_replace_node() calls), so switch the BB back so the cleanup does
>> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>>               return 0;
>>           }
>>   
>> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
>> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
>> +                                      &count);
>>           if (ret < 0) {
>>               return ret;
>>           }
>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>>       } else {
>>           s->target_cluster_size = BDRV_SECTOR_SIZE;
>>       }
>> -    if (backing_filename[0] && !target_bs->backing &&
>> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>>           s->granularity < s->target_cluster_size) {
>>           s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>>           s->cow_bitmap = bitmap_new(length);
>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>>       if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>>           int ret;
>>   
>> -        assert(!target->backing);
>> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
>> +        assert(!bdrv_backing_chain_next(target));
> 
> Preexisting, but seems we may crash here, I don't see where it is checked before, to
> return error if there is some backing. And even if we do so, we don't prevent appearing
> of target backing during mirror operation.

The idea is that MIRROR_OPEN_BACKING_CHAIN is set only when using
drive-mirror with mode=existing.  In this case, we also set
BDRV_O_NO_BACKING for the target.

You’re right that a user could add a backing chain to the target during
the operation.  They really have to make an effort to shoot themselves
in the foot for this because the target must have an auto-generated node
name.

I suppose the best would be not to open the backing chain if the target
node already has a backing child?

>> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
>> +                                     "backing", errp);
>>           if (ret < 0) {
>>               return;
>>           }
>> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>>       MirrorBlockJob *s;
>>       MirrorBDSOpaque *bs_opaque;
>>       BlockDriverState *mirror_top_bs;
>> -    bool target_graph_mod;
>>       bool target_is_backing;
>> +    uint64_t target_perms, target_shared_perms;
>>       Error *local_err = NULL;
>>       int ret;
>>   
>> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>>           buf_size = DEFAULT_MIRROR_BUF_SIZE;
>>       }
>>   
>> -    if (bs == target) {
>> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>>           error_setg(errp, "Can't mirror node into itself");
>>           return NULL;
>>       }
>> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>>        * In the case of active commit, things look a bit different, though,
>>        * because the target is an already populated backing file in active use.
>>        * We can allow anything except resize there.*/
>> +
>> +    target_perms = BLK_PERM_WRITE;
>> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
>> +
>>       target_is_backing = bdrv_chain_contains(bs, target);
>> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
>> +    if (target_is_backing) {
>> +        int64_t bs_size, target_size;
> 
> <empty after definitions>

Is that part of any of our guidelines? :-)

Sure, will add.

>> +        bs_size = bdrv_getlength(bs);
>> +        if (bs_size < 0) {
>> +            error_setg_errno(errp, -bs_size,
>> +                             "Could not inquire top image size");
>> +            goto fail;
>> +        }
>> +
>> +        target_size = bdrv_getlength(target);
>> +        if (target_size < 0) {
>> +            error_setg_errno(errp, -target_size,
>> +                             "Could not inquire base image size");
>> +            goto fail;
>> +        }
>> +
>> +        if (target_size < bs_size) {
>> +            target_perms |= BLK_PERM_RESIZE;
>> +        }
>> +
>> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
>> +                            |  BLK_PERM_WRITE
>> +                            |  BLK_PERM_GRAPH_MOD;
>> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
>> +        /*
>> +         * We may want to allow this in the future, but it would
>> +         * require taking some extra care.
>> +         */
>> +        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
>> +                   "source's backing chain");
>> +        goto fail;
>> +    }
>> +
>> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
>> +        target_perms |= BLK_PERM_GRAPH_MOD;
>> +    }
>> +
>>       s->target = blk_new(s->common.job.aio_context,
>> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
>> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
>> -                        BLK_PERM_WRITE_UNCHANGED |
>> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
>> -                                             BLK_PERM_WRITE |
>> -                                             BLK_PERM_GRAPH_MOD : 0));
>> +                        target_perms, target_shared_perms);
>>       ret = blk_insert_bs(s->target, target, errp);
>>       if (ret < 0) {
>>           goto fail;
>> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>>       s->backing_mode = backing_mode;
>>       s->copy_mode = copy_mode;
>>       s->base = base;
>> +    s->base_overlay = bdrv_find_overlay(bs, base);
>>       s->granularity = granularity;
>>       s->buf_size = ROUND_UP(buf_size, granularity);
>>       s->unmap = unmap;
>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>       /* In commit_active_start() all intermediate nodes disappear, so
>>        * any jobs in them must be blocked */
>>       if (target_is_backing) {
>> -        BlockDriverState *iter;
>> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> -             * ourselves at s->base (if writes are blocked for a node, they are
>> -             * also blocked for its backing file). The other options would be a
>> -             * second filter driver above s->base (== target). */
>> +        BlockDriverState *iter, *filtered_target;
>> +        uint64_t iter_shared_perms;
>> +
>> +        /*
>> +         * The topmost node with
>> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
>> +         */
>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
>> +
>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>> +               bdrv_skip_rw_filters(target));
>> +
>> +        /*
>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> +         * ourselves at s->base (if writes are blocked for a node, they are
>> +         * also blocked for its backing file). The other options would be a
>> +         * second filter driver above s->base (== target).
>> +         */
>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>> +
>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>> +             iter = bdrv_filtered_bs(iter))
>> +        {
>> +            if (iter == filtered_target) {
>> +                /*
>> +                 * From here on, all nodes are filters on the base.
>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
> 
> I'd prefere to add something like: "because we share it on target (see target BlockBackend creation
> and corresponding comment above)".

I’d rather not refer to other comments in case they change…  Maybe just
“This allows us to share BLK_PERM_CONSISTENT_READ, as we do on the
target.”?  I think if someone is interested, they will scan the file for
what permissions are shared on the target anyway.

>> +                 */
>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
>> +            }
>> +
>>               ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
>> -                                     errp);
>> +                                     iter_shared_perms, errp);
>>               if (ret < 0) {
>>                   goto fail;
>>               }
>> @@ -1737,7 +1802,7 @@ fail:
>>       bs_opaque->stop = true;
>>       bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>>                                &error_abort);
>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>   
>>       bdrv_unref(mirror_top_bs);
>>   
>> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
>>           return;
>>       }
>>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
>> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
>>       mirror_start_job(job_id, bs, creation_flags, target, replaces,
>>                        speed, granularity, buf_size, backing_mode,
>>                        on_source_error, on_target_error, unmap, NULL, NULL,
>> diff --git a/blockdev.c b/blockdev.c
>> index c540802127..c451f553f7 100644
> 
> 
> block/mirroc.c is OK for me. Continue with blockdev.c...
> 
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>           return;
>>       }
>>   
>> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
>> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>>           sync = MIRROR_SYNC_MODE_FULL;
>>       }
>>   
>> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>   
>>   void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>   {
>> -    BlockDriverState *bs;
>> +    BlockDriverState *bs, *unfiltered_bs;
>>       BlockDriverState *source, *target_bs;
>>       AioContext *aio_context;
>>       BlockMirrorBackingMode backing_mode;
>> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>       int flags;
>>       int64_t size;
>>       const char *format = arg->format;
>> +    const char *replaces_node_name = NULL;
>>       int ret;
>>   
>>       bs = qmp_get_root_bs(arg->device, errp);
>> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>           return;
>>       }
>>   
>> +    /*
>> +     * If the user has not instructed us otherwise, we should let the
>> +     * block job run from @bs (thus taking into account all filters on
>> +     * it) but replace @unfiltered_bs when it finishes (thus not
>> +     * removing those filters).
>> +     * (And if there are any explicit filters, we should assume the
>> +     *  user knows how to use the @replaces option.)
>> +     */
>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>> +
>>       aio_context = bdrv_get_aio_context(bs);
>>       aio_context_acquire(aio_context);
>>   
>> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>       }
>>   
>>       flags = bs->open_flags | BDRV_O_RDWR;
>> -    source = backing_bs(bs);
>> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>>       if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
> 
> 
> Hmm, you handle this case a bit differently here and in blockdev_mirror_common..
> Can we handle it only in blockdev_mirror_common, to be consistent with qmp_blockdev_mirror?

What exactly do you mean?  The difference between skipping all filters
and just skipping implicit filters?  Hm.

First, the check in blockdev_mirror_common() should actually be
unnecessary.  In qmp_{blockdev,drive}_mirror(), we do nearly the same
check anyway (and then force sync=full if there is no backing file).  So
if all three functions did the same check, we wouldn’t need it in
blockdev_mirror_common().

Second, let’s look at the difference in an example: One where
blockdev_mirror_common() would not decide to enforce mode=full, but
qmp_{blockdev,drive}_mirror() would.
This happens when @bs is an explicit filter over some overlay with a
backing file, e.g.:

throttle --file--> qcow2 --backing--> raw

It’s correct to run the mirror job from the throttle node; but @source
should be bdrv_backing_chain_next() so it will point to the raw node.
Currently, it is NULL (because the throttle node does not have a COW child).

But then again, I’ve made qmp_{blockdev,drive}_mirror() throw an error
in such a case:

>> +        if (bdrv_filtered_bs(unfiltered_bs)) {
>> +            /* @unfiltered_bs is an explicit filter */
>> +            error_setg(errp, "Cannot perform sync=top mirror through an "
>> +                       "explicitly added filter node on the source");
>> +            goto out;
>> +        }

So it isn’t really a problem.  Still, does the error make sense?  Should
we just allow that case by letting source be
bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs))?

(BTW, I just noticed that @base seems to be pretty much unused in
block/mirror.c.  It only really uses @base_overlay now.  So I suppose it
makes sense to remove it in v7.)

>>           arg->sync = MIRROR_SYNC_MODE_FULL;
>>       }
>>       if (arg->sync == MIRROR_SYNC_MODE_NONE) {
>> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>                                " named node of the graph");
>>               goto out;
>>           }
>> +        replaces_node_name = arg->replaces;
>> +    } else if (unfiltered_bs != bs) {
>> +        replaces_node_name = unfiltered_bs->node_name;
>>       }
>>   
>>       if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
>> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>           bdrv_img_create(arg->target, format,
>>                           NULL, NULL, NULL, size, flags, false, &local_err);
>>       } else {
>> +        /* Implicit filters should not appear in the filename */
>> +        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
>> +
>>           switch (arg->mode) {
>>           case NEW_IMAGE_MODE_EXISTING:
>>               break;
>> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>               /* create new image with backing file */
>>               bdrv_refresh_filename(source);
>>               bdrv_img_create(arg->target, format,
>> -                            source->filename,
>> -                            source->drv->format_name,
>> +                            explicit_backing->filename,
>> +                            explicit_backing->drv->format_name,
>>                               NULL, size, flags, false, &local_err);
>>               break;
>>           default:
>> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>       }
>>   
>>       blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
>> -                           arg->has_replaces, arg->replaces, arg->sync,
>> +                           !!replaces_node_name, replaces_node_name, arg->sync,
>>                              backing_mode, arg->has_speed, arg->speed,
>>                              arg->has_granularity, arg->granularity,
>>                              arg->has_buf_size, arg->buf_size,
>> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>                            bool has_auto_dismiss, bool auto_dismiss,
>>                            Error **errp)
>>   {
>> -    BlockDriverState *bs;
>> +    BlockDriverState *bs, *unfiltered_bs;
>>       BlockDriverState *target_bs;
>>       AioContext *aio_context;
>>       BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
>> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>           return;
>>       }
>>   
>> +    /*
>> +     * Same as in qmp_drive_mirror():
> 
> Then, may be better do it in blockdev_mirror_common ?

Hm, maybe.  Should we decide to let @source be
bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs)) in qmp_drive_mirror(), I
don’t think we need @unfiltered_bs there to determine @source.

Max

>> We want to run the job from @bs,
>> +     * but we want to replace @unfiltered_bs on completion.
>> +     */
>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>> +    if (!has_replaces && unfiltered_bs != bs) {
>> +        replaces = unfiltered_bs->node_name;
>> +        has_replaces = true;
>> +    }
>> +
>>       target_bs = bdrv_lookup_bs(target, target, errp);
>>       if (!target_bs) {
>>           return;
>>
> 
>

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Vladimir Sementsov-Ogievskiy 6 years, 5 months ago

02.09.2019 17:35, Max Reitz wrote:
> On 31.08.19 11:57, Vladimir Sementsov-Ogievskiy wrote:
>> 09.08.2019 19:13, Max Reitz wrote:
>>> This includes some permission limiting (for example, we only need to
>>> take the RESIZE permission for active commits where the base is smaller
>>> than the top).
>>>
>>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>>> ---
>>>    block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>>    blockdev.c     |  47 +++++++++++++++++---
>>>    2 files changed, 131 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/block/mirror.c b/block/mirror.c
>>> index 54bafdf176..6ddbfb9708 100644
>>> --- a/block/mirror.c
>>> +++ b/block/mirror.c
>>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>>>        BlockBackend *target;
>>>        BlockDriverState *mirror_top_bs;
>>>        BlockDriverState *base;
>>> +    BlockDriverState *base_overlay;
>>>    
>>>        /* The name of the graph node to replace */
>>>        char *replaces;
>>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>>>                                 &error_abort);
>>>        if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>>>            BlockDriverState *backing = s->is_none_mode ? src : s->base;
>>> -        if (backing_bs(target_bs) != backing) {
>>> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
>>> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
>>> +
>>> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
>>> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>>>                if (local_err) {
>>>                    error_report_err(local_err);
>>>                    ret = -EPERM;
>>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>>>         * valid.
>>>         */
>>>        block_job_remove_all_bdrv(bjob);
>>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>>    
>>>        /* We just changed the BDS the job BB refers to (with either or both of the
>>>         * bdrv_replace_node() calls), so switch the BB back so the cleanup does
>>> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>>>                return 0;
>>>            }
>>>    
>>> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
>>> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
>>> +                                      &count);
>>>            if (ret < 0) {
>>>                return ret;
>>>            }
>>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>>>        } else {
>>>            s->target_cluster_size = BDRV_SECTOR_SIZE;
>>>        }
>>> -    if (backing_filename[0] && !target_bs->backing &&
>>> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>>>            s->granularity < s->target_cluster_size) {
>>>            s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>>>            s->cow_bitmap = bitmap_new(length);
>>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>>>        if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>>>            int ret;
>>>    
>>> -        assert(!target->backing);
>>> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
>>> +        assert(!bdrv_backing_chain_next(target));
>>
>> Preexisting, but seems we may crash here, I don't see where it is checked before, to
>> return error if there is some backing. And even if we do so, we don't prevent appearing
>> of target backing during mirror operation.
> 
> The idea is that MIRROR_OPEN_BACKING_CHAIN is set only when using
> drive-mirror with mode=existing.  In this case, we also set
> BDRV_O_NO_BACKING for the target.
> 
> You’re right that a user could add a backing chain to the target during
> the operation.  They really have to make an effort to shoot themselves
> in the foot for this because the target must have an auto-generated node
> name.
> 
> I suppose the best would be not to open the backing chain if the target
> node already has a backing child?

Hmm, but we still should generate an error, as we can't do what was requested.

> 
>>> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
>>> +                                     "backing", errp);
>>>            if (ret < 0) {
>>>                return;
>>>            }
>>> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>>>        MirrorBlockJob *s;
>>>        MirrorBDSOpaque *bs_opaque;
>>>        BlockDriverState *mirror_top_bs;
>>> -    bool target_graph_mod;
>>>        bool target_is_backing;
>>> +    uint64_t target_perms, target_shared_perms;
>>>        Error *local_err = NULL;
>>>        int ret;
>>>    
>>> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>>>            buf_size = DEFAULT_MIRROR_BUF_SIZE;
>>>        }
>>>    
>>> -    if (bs == target) {
>>> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>>>            error_setg(errp, "Can't mirror node into itself");
>>>            return NULL;
>>>        }
>>> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>>>         * In the case of active commit, things look a bit different, though,
>>>         * because the target is an already populated backing file in active use.
>>>         * We can allow anything except resize there.*/
>>> +
>>> +    target_perms = BLK_PERM_WRITE;
>>> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
>>> +
>>>        target_is_backing = bdrv_chain_contains(bs, target);
>>> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
>>> +    if (target_is_backing) {
>>> +        int64_t bs_size, target_size;
>>
>> <empty after definitions>
> 
> Is that part of any of our guidelines? :-)
> 
> Sure, will add.

Not sure. Someone asked me about it on list in past and I'm used to.

> 
>>> +        bs_size = bdrv_getlength(bs);
>>> +        if (bs_size < 0) {
>>> +            error_setg_errno(errp, -bs_size,
>>> +                             "Could not inquire top image size");
>>> +            goto fail;
>>> +        }
>>> +
>>> +        target_size = bdrv_getlength(target);
>>> +        if (target_size < 0) {
>>> +            error_setg_errno(errp, -target_size,
>>> +                             "Could not inquire base image size");
>>> +            goto fail;
>>> +        }
>>> +
>>> +        if (target_size < bs_size) {
>>> +            target_perms |= BLK_PERM_RESIZE;
>>> +        }
>>> +
>>> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
>>> +                            |  BLK_PERM_WRITE
>>> +                            |  BLK_PERM_GRAPH_MOD;
>>> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
>>> +        /*
>>> +         * We may want to allow this in the future, but it would
>>> +         * require taking some extra care.
>>> +         */
>>> +        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
>>> +                   "source's backing chain");
>>> +        goto fail;
>>> +    }
>>> +
>>> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
>>> +        target_perms |= BLK_PERM_GRAPH_MOD;
>>> +    }
>>> +
>>>        s->target = blk_new(s->common.job.aio_context,
>>> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
>>> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
>>> -                        BLK_PERM_WRITE_UNCHANGED |
>>> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
>>> -                                             BLK_PERM_WRITE |
>>> -                                             BLK_PERM_GRAPH_MOD : 0));
>>> +                        target_perms, target_shared_perms);
>>>        ret = blk_insert_bs(s->target, target, errp);
>>>        if (ret < 0) {
>>>            goto fail;
>>> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>>>        s->backing_mode = backing_mode;
>>>        s->copy_mode = copy_mode;
>>>        s->base = base;
>>> +    s->base_overlay = bdrv_find_overlay(bs, base);
>>>        s->granularity = granularity;
>>>        s->buf_size = ROUND_UP(buf_size, granularity);
>>>        s->unmap = unmap;
>>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>>        /* In commit_active_start() all intermediate nodes disappear, so
>>>         * any jobs in them must be blocked */
>>>        if (target_is_backing) {
>>> -        BlockDriverState *iter;
>>> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
>>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> -             * ourselves at s->base (if writes are blocked for a node, they are
>>> -             * also blocked for its backing file). The other options would be a
>>> -             * second filter driver above s->base (== target). */
>>> +        BlockDriverState *iter, *filtered_target;
>>> +        uint64_t iter_shared_perms;
>>> +
>>> +        /*
>>> +         * The topmost node with
>>> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
>>> +         */
>>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
>>> +
>>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>>> +               bdrv_skip_rw_filters(target));
>>> +
>>> +        /*
>>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> +         * ourselves at s->base (if writes are blocked for a node, they are
>>> +         * also blocked for its backing file). The other options would be a
>>> +         * second filter driver above s->base (== target).
>>> +         */
>>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>>> +
>>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>>> +             iter = bdrv_filtered_bs(iter))
>>> +        {
>>> +            if (iter == filtered_target) {
>>> +                /*
>>> +                 * From here on, all nodes are filters on the base.
>>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
>>
>> I'd prefere to add something like: "because we share it on target (see target BlockBackend creation
>> and corresponding comment above)".
> 
> I’d rather not refer to other comments in case they change…  Maybe just
> “This allows us to share BLK_PERM_CONSISTENT_READ, as we do on the
> target.”?  I think if someone is interested, they will scan the file for
> what permissions are shared on the target anyway.

OK. Yes I just wanted to stress that we just duplicate behavior about target, as it helped
me to understand.

> 
>>> +                 */
>>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
>>> +            }
>>> +
>>>                ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>>> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
>>> -                                     errp);
>>> +                                     iter_shared_perms, errp);
>>>                if (ret < 0) {
>>>                    goto fail;
>>>                }
>>> @@ -1737,7 +1802,7 @@ fail:
>>>        bs_opaque->stop = true;
>>>        bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>>>                                 &error_abort);
>>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>>    
>>>        bdrv_unref(mirror_top_bs);
>>>    
>>> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
>>>            return;
>>>        }
>>>        is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>>> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
>>> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
>>>        mirror_start_job(job_id, bs, creation_flags, target, replaces,
>>>                         speed, granularity, buf_size, backing_mode,
>>>                         on_source_error, on_target_error, unmap, NULL, NULL,
>>> diff --git a/blockdev.c b/blockdev.c
>>> index c540802127..c451f553f7 100644
>>
>>
>> block/mirroc.c is OK for me. Continue with blockdev.c...
>>
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>>            return;
>>>        }
>>>    
>>> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
>>> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>>>            sync = MIRROR_SYNC_MODE_FULL;
>>>        }
>>>    
>>> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>>    
>>>    void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>    {
>>> -    BlockDriverState *bs;
>>> +    BlockDriverState *bs, *unfiltered_bs;
>>>        BlockDriverState *source, *target_bs;
>>>        AioContext *aio_context;
>>>        BlockMirrorBackingMode backing_mode;
>>> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        int flags;
>>>        int64_t size;
>>>        const char *format = arg->format;
>>> +    const char *replaces_node_name = NULL;
>>>        int ret;
>>>    
>>>        bs = qmp_get_root_bs(arg->device, errp);
>>> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>            return;
>>>        }
>>>    
>>> +    /*
>>> +     * If the user has not instructed us otherwise, we should let the
>>> +     * block job run from @bs (thus taking into account all filters on
>>> +     * it) but replace @unfiltered_bs when it finishes (thus not
>>> +     * removing those filters).
>>> +     * (And if there are any explicit filters, we should assume the
>>> +     *  user knows how to use the @replaces option.)
>>> +     */
>>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>>> +
>>>        aio_context = bdrv_get_aio_context(bs);
>>>        aio_context_acquire(aio_context);
>>>    
>>> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        }
>>>    
>>>        flags = bs->open_flags | BDRV_O_RDWR;
>>> -    source = backing_bs(bs);
>>> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>>>        if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
>>
>>
>> Hmm, you handle this case a bit differently here and in blockdev_mirror_common..
>> Can we handle it only in blockdev_mirror_common, to be consistent with qmp_blockdev_mirror?
> 
> What exactly do you mean?  The difference between skipping all filters
> and just skipping implicit filters?  Hm.
> 
> First, the check in blockdev_mirror_common() should actually be
> unnecessary.  In qmp_{blockdev,drive}_mirror(), we do nearly the same
> check anyway (and then force sync=full if there is no backing file).

Hmm, I see it only in qmp_drive_mirror, not in _blockdev_

>  So
> if all three functions did the same check, we wouldn’t need it in
> blockdev_mirror_common().

And if it was so, better have one check in _common than two duplicated.

> 
> Second, let’s look at the difference in an example: One where
> blockdev_mirror_common() would not decide to enforce mode=full, but
> qmp_{blockdev,drive}_mirror() would.
> This happens when @bs is an explicit filter over some overlay with a
> backing file, e.g.:
> 
> throttle --file--> qcow2 --backing--> raw
> 
> It’s correct to run the mirror job from the throttle node; but @source
> should be bdrv_backing_chain_next() so it will point to the raw node.
> Currently, it is NULL (because the throttle node does not have a COW child).
> 
> But then again, I’ve made qmp_{blockdev,drive}_mirror() throw an error
> in such a case:
> 
>>> +        if (bdrv_filtered_bs(unfiltered_bs)) {
>>> +            /* @unfiltered_bs is an explicit filter */
>>> +            error_setg(errp, "Cannot perform sync=top mirror through an "
>>> +                       "explicitly added filter node on the source");
>>> +            goto out;
>>> +        }
> 
> So it isn’t really a problem.  Still, does the error make sense?  Should
> we just allow that case by letting source be
> bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs))?

Looks good. As I understand, you have the test (40/42) for this case and it
works for _blockdev_ and for for _drive_ version of command. Of course, they'd
better behave in same manner.

> 
> (BTW, I just noticed that @base seems to be pretty much unused in
> block/mirror.c.  It only really uses @base_overlay now.  So I suppose it
> makes sense to remove it in v7.)
> 
>>>            arg->sync = MIRROR_SYNC_MODE_FULL;
>>>        }
>>>        if (arg->sync == MIRROR_SYNC_MODE_NONE) {
>>> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>                                 " named node of the graph");
>>>                goto out;
>>>            }
>>> +        replaces_node_name = arg->replaces;
>>> +    } else if (unfiltered_bs != bs) {
>>> +        replaces_node_name = unfiltered_bs->node_name;
>>>        }
>>>    
>>>        if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
>>> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>            bdrv_img_create(arg->target, format,
>>>                            NULL, NULL, NULL, size, flags, false, &local_err);
>>>        } else {
>>> +        /* Implicit filters should not appear in the filename */
>>> +        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
>>> +
>>>            switch (arg->mode) {
>>>            case NEW_IMAGE_MODE_EXISTING:
>>>                break;
>>> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>                /* create new image with backing file */
>>>                bdrv_refresh_filename(source);
>>>                bdrv_img_create(arg->target, format,
>>> -                            source->filename,
>>> -                            source->drv->format_name,
>>> +                            explicit_backing->filename,
>>> +                            explicit_backing->drv->format_name,
>>>                                NULL, size, flags, false, &local_err);
>>>                break;
>>>            default:
>>> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        }
>>>    
>>>        blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
>>> -                           arg->has_replaces, arg->replaces, arg->sync,
>>> +                           !!replaces_node_name, replaces_node_name, arg->sync,
>>>                               backing_mode, arg->has_speed, arg->speed,
>>>                               arg->has_granularity, arg->granularity,
>>>                               arg->has_buf_size, arg->buf_size,
>>> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>>                             bool has_auto_dismiss, bool auto_dismiss,
>>>                             Error **errp)
>>>    {
>>> -    BlockDriverState *bs;
>>> +    BlockDriverState *bs, *unfiltered_bs;
>>>        BlockDriverState *target_bs;
>>>        AioContext *aio_context;
>>>        BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
>>> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>>            return;
>>>        }
>>>    
>>> +    /*
>>> +     * Same as in qmp_drive_mirror():
>>
>> Then, may be better do it in blockdev_mirror_common ?
> 
> Hm, maybe.  Should we decide to let @source be
> bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs)) in qmp_drive_mirror(), I
> don’t think we need @unfiltered_bs there to determine @source.
> 
> Max
> 
>>> We want to run the job from @bs,
>>> +     * but we want to replace @unfiltered_bs on completion.
>>> +     */
>>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>>> +    if (!has_replaces && unfiltered_bs != bs) {
>>> +        replaces = unfiltered_bs->node_name;
>>> +        has_replaces = true;
>>> +    }
>>> +
>>>        target_bs = bdrv_lookup_bs(target, target, errp);
>>>        if (!target_bs) {
>>>            return;
>>>
>>
>>
> 
> 


-- 
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Max Reitz 6 years, 5 months ago

On 03.09.19 10:32, Vladimir Sementsov-Ogievskiy wrote:
> 02.09.2019 17:35, Max Reitz wrote:
>> On 31.08.19 11:57, Vladimir Sementsov-Ogievskiy wrote:
>>> 09.08.2019 19:13, Max Reitz wrote:
>>>> This includes some permission limiting (for example, we only need to
>>>> take the RESIZE permission for active commits where the base is smaller
>>>> than the top).
>>>>
>>>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>>>> ---
>>>>    block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>>>    blockdev.c     |  47 +++++++++++++++++---
>>>>    2 files changed, 131 insertions(+), 33 deletions(-)
>>>>
>>>> diff --git a/block/mirror.c b/block/mirror.c
>>>> index 54bafdf176..6ddbfb9708 100644
>>>> --- a/block/mirror.c
>>>> +++ b/block/mirror.c
>>>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>>>>        BlockBackend *target;
>>>>        BlockDriverState *mirror_top_bs;
>>>>        BlockDriverState *base;
>>>> +    BlockDriverState *base_overlay;
>>>>    
>>>>        /* The name of the graph node to replace */
>>>>        char *replaces;
>>>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>>>>                                 &error_abort);
>>>>        if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>>>>            BlockDriverState *backing = s->is_none_mode ? src : s->base;
>>>> -        if (backing_bs(target_bs) != backing) {
>>>> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
>>>> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
>>>> +
>>>> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
>>>> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>>>>                if (local_err) {
>>>>                    error_report_err(local_err);
>>>>                    ret = -EPERM;
>>>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>>>>         * valid.
>>>>         */
>>>>        block_job_remove_all_bdrv(bjob);
>>>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>>>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>>>    
>>>>        /* We just changed the BDS the job BB refers to (with either or both of the
>>>>         * bdrv_replace_node() calls), so switch the BB back so the cleanup does
>>>> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>>>>                return 0;
>>>>            }
>>>>    
>>>> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
>>>> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
>>>> +                                      &count);
>>>>            if (ret < 0) {
>>>>                return ret;
>>>>            }
>>>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>>>>        } else {
>>>>            s->target_cluster_size = BDRV_SECTOR_SIZE;
>>>>        }
>>>> -    if (backing_filename[0] && !target_bs->backing &&
>>>> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>>>>            s->granularity < s->target_cluster_size) {
>>>>            s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>>>>            s->cow_bitmap = bitmap_new(length);
>>>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>>>>        if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>>>>            int ret;
>>>>    
>>>> -        assert(!target->backing);
>>>> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
>>>> +        assert(!bdrv_backing_chain_next(target));
>>>
>>> Preexisting, but seems we may crash here, I don't see where it is checked before, to
>>> return error if there is some backing. And even if we do so, we don't prevent appearing
>>> of target backing during mirror operation.
>>
>> The idea is that MIRROR_OPEN_BACKING_CHAIN is set only when using
>> drive-mirror with mode=existing.  In this case, we also set
>> BDRV_O_NO_BACKING for the target.
>>
>> You’re right that a user could add a backing chain to the target during
>> the operation.  They really have to make an effort to shoot themselves
>> in the foot for this because the target must have an auto-generated node
>> name.
>>
>> I suppose the best would be not to open the backing chain if the target
>> node already has a backing child?
> 
> Hmm, but we still should generate an error, as we can't do what was requested.

But the user didn’t request anything.  They just specified an existing
file as the target with mode=existing, then (for whatever reason) made a
real effort to add a backing node to it (i.e. they had to look up the
target’s auto-generated node name), and then the job finishes.

The user didn’t request us to open the backing chain of the target.
They just requested to mirror to an existing file.  If they manually
override that existing file’s backing chain, I think it makes sense to
keep it that way.

Max

>>>> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
>>>> +                                     "backing", errp);
>>>>            if (ret < 0) {
>>>>                return;
>>>>            }

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Kevin Wolf 6 years, 4 months ago

Am 09.08.2019 um 18:13 hat Max Reitz geschrieben:
> This includes some permission limiting (for example, we only need to
> take the RESIZE permission for active commits where the base is smaller
> than the top).
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>  blockdev.c     |  47 +++++++++++++++++---
>  2 files changed, 131 insertions(+), 33 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index 54bafdf176..6ddbfb9708 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>      BlockBackend *target;
>      BlockDriverState *mirror_top_bs;
>      BlockDriverState *base;
> +    BlockDriverState *base_overlay;
>  
>      /* The name of the graph node to replace */
>      char *replaces;
> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>                               &error_abort);
>      if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>          BlockDriverState *backing = s->is_none_mode ? src : s->base;
> -        if (backing_bs(target_bs) != backing) {
> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
> +
> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>              if (local_err) {
>                  error_report_err(local_err);
>                  ret = -EPERM;
> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>       * valid.
>       */
>      block_job_remove_all_bdrv(bjob);
> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>  
>      /* We just changed the BDS the job BB refers to (with either or both of the
>       * bdrv_replace_node() calls), so switch the BB back so the cleanup does
> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>              return 0;
>          }
>  
> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
> +                                      &count);
>          if (ret < 0) {
>              return ret;
>          }
> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>      } else {
>          s->target_cluster_size = BDRV_SECTOR_SIZE;
>      }
> -    if (backing_filename[0] && !target_bs->backing &&
> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>          s->granularity < s->target_cluster_size) {
>          s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>          s->cow_bitmap = bitmap_new(length);
> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>      if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>          int ret;
>  
> -        assert(!target->backing);
> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
> +        assert(!bdrv_backing_chain_next(target));
> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
> +                                     "backing", errp);
>          if (ret < 0) {
>              return;
>          }
> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>      MirrorBlockJob *s;
>      MirrorBDSOpaque *bs_opaque;
>      BlockDriverState *mirror_top_bs;
> -    bool target_graph_mod;
>      bool target_is_backing;
> +    uint64_t target_perms, target_shared_perms;
>      Error *local_err = NULL;
>      int ret;
>  
> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>          buf_size = DEFAULT_MIRROR_BUF_SIZE;
>      }
>  
> -    if (bs == target) {
> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>          error_setg(errp, "Can't mirror node into itself");
>          return NULL;
>      }
> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>       * In the case of active commit, things look a bit different, though,
>       * because the target is an already populated backing file in active use.
>       * We can allow anything except resize there.*/
> +
> +    target_perms = BLK_PERM_WRITE;
> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
> +
>      target_is_backing = bdrv_chain_contains(bs, target);
> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
> +    if (target_is_backing) {
> +        int64_t bs_size, target_size;
> +        bs_size = bdrv_getlength(bs);
> +        if (bs_size < 0) {
> +            error_setg_errno(errp, -bs_size,
> +                             "Could not inquire top image size");
> +            goto fail;
> +        }
> +
> +        target_size = bdrv_getlength(target);
> +        if (target_size < 0) {
> +            error_setg_errno(errp, -target_size,
> +                             "Could not inquire base image size");
> +            goto fail;
> +        }
> +
> +        if (target_size < bs_size) {
> +            target_perms |= BLK_PERM_RESIZE;
> +        }
> +
> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
> +                            |  BLK_PERM_WRITE
> +                            |  BLK_PERM_GRAPH_MOD;
> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
> +        /*
> +         * We may want to allow this in the future, but it would
> +         * require taking some extra care.
> +         */
> +        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
> +                   "source's backing chain");
> +        goto fail;
> +    }
> +
> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
> +        target_perms |= BLK_PERM_GRAPH_MOD;
> +    }

This is getting absurd. We keep moving GRAPH_MOD around, but still
nobody knows what it's actually supposed to mean. Maybe it would be
better to just remove it finally?

Of course, not a reason to stop this patch, after all it's moving the
nonsensical piece of code correctly...

>      s->target = blk_new(s->common.job.aio_context,
> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
> -                        BLK_PERM_WRITE_UNCHANGED |
> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
> -                                             BLK_PERM_WRITE |
> -                                             BLK_PERM_GRAPH_MOD : 0));
> +                        target_perms, target_shared_perms);
>      ret = blk_insert_bs(s->target, target, errp);
>      if (ret < 0) {
>          goto fail;
> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>      s->backing_mode = backing_mode;
>      s->copy_mode = copy_mode;
>      s->base = base;
> +    s->base_overlay = bdrv_find_overlay(bs, base);
>      s->granularity = granularity;
>      s->buf_size = ROUND_UP(buf_size, granularity);
>      s->unmap = unmap;
> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>      /* In commit_active_start() all intermediate nodes disappear, so
>       * any jobs in them must be blocked */
>      if (target_is_backing) {
> -        BlockDriverState *iter;
> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
> -             * ourselves at s->base (if writes are blocked for a node, they are
> -             * also blocked for its backing file). The other options would be a
> -             * second filter driver above s->base (== target). */
> +        BlockDriverState *iter, *filtered_target;
> +        uint64_t iter_shared_perms;
> +
> +        /*
> +         * The topmost node with
> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
> +         */
> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
> +
> +        assert(bdrv_skip_rw_filters(filtered_target) ==
> +               bdrv_skip_rw_filters(target));
> +
> +        /*
> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
> +         * ourselves at s->base (if writes are blocked for a node, they are
> +         * also blocked for its backing file). The other options would be a
> +         * second filter driver above s->base (== target).
> +         */
> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
> +
> +        for (iter = bdrv_filtered_bs(bs); iter != target;
> +             iter = bdrv_filtered_bs(iter))
> +        {
> +            if (iter == filtered_target) {
> +                /*
> +                 * From here on, all nodes are filters on the base.
> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
> +                 */
> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
> +            }
> +
>              ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
> -                                     errp);
> +                                     iter_shared_perms, errp);
>              if (ret < 0) {
>                  goto fail;
>              }
> @@ -1737,7 +1802,7 @@ fail:
>      bs_opaque->stop = true;
>      bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>                               &error_abort);
> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>  
>      bdrv_unref(mirror_top_bs);
>  
> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
>          return;
>      }
>      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
>      mirror_start_job(job_id, bs, creation_flags, target, replaces,
>                       speed, granularity, buf_size, backing_mode,
>                       on_source_error, on_target_error, unmap, NULL, NULL,
> diff --git a/blockdev.c b/blockdev.c
> index c540802127..c451f553f7 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>          return;
>      }
>  
> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>          sync = MIRROR_SYNC_MODE_FULL;
>      }
>  
> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>  
>  void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>  {
> -    BlockDriverState *bs;
> +    BlockDriverState *bs, *unfiltered_bs;
>      BlockDriverState *source, *target_bs;
>      AioContext *aio_context;
>      BlockMirrorBackingMode backing_mode;
> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>      int flags;
>      int64_t size;
>      const char *format = arg->format;
> +    const char *replaces_node_name = NULL;
>      int ret;
>  
>      bs = qmp_get_root_bs(arg->device, errp);
> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>          return;
>      }
>  
> +    /*
> +     * If the user has not instructed us otherwise, we should let the
> +     * block job run from @bs (thus taking into account all filters on
> +     * it) but replace @unfiltered_bs when it finishes (thus not
> +     * removing those filters).
> +     * (And if there are any explicit filters, we should assume the
> +     *  user knows how to use the @replaces option.)
> +     */
> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);

Should this behaviour be documented in the QAPI schema for drive-mirror?

>      aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(aio_context);
>  
> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>      }
>  
>      flags = bs->open_flags | BDRV_O_RDWR;
> -    source = backing_bs(bs);
> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>      if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
> +        if (bdrv_filtered_bs(unfiltered_bs)) {
> +            /* @unfiltered_bs is an explicit filter */
> +            error_setg(errp, "Cannot perform sync=top mirror through an "
> +                       "explicitly added filter node on the source");
> +            goto out;
> +        }
>          arg->sync = MIRROR_SYNC_MODE_FULL;
>      }
>      if (arg->sync == MIRROR_SYNC_MODE_NONE) {
> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>                               " named node of the graph");
>              goto out;
>          }
> +        replaces_node_name = arg->replaces;
> +    } else if (unfiltered_bs != bs) {
> +        replaces_node_name = unfiltered_bs->node_name;
>      }
>  
>      if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>          bdrv_img_create(arg->target, format,
>                          NULL, NULL, NULL, size, flags, false, &local_err);
>      } else {
> +        /* Implicit filters should not appear in the filename */
> +        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
> +
>          switch (arg->mode) {
>          case NEW_IMAGE_MODE_EXISTING:
>              break;
> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>              /* create new image with backing file */
>              bdrv_refresh_filename(source);
>              bdrv_img_create(arg->target, format,
> -                            source->filename,
> -                            source->drv->format_name,
> +                            explicit_backing->filename,
> +                            explicit_backing->drv->format_name,
>                              NULL, size, flags, false, &local_err);
>              break;
>          default:
> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>      }
>  
>      blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
> -                           arg->has_replaces, arg->replaces, arg->sync,
> +                           !!replaces_node_name, replaces_node_name, arg->sync,
>                             backing_mode, arg->has_speed, arg->speed,
>                             arg->has_granularity, arg->granularity,
>                             arg->has_buf_size, arg->buf_size,
> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>                           bool has_auto_dismiss, bool auto_dismiss,
>                           Error **errp)
>  {
> -    BlockDriverState *bs;
> +    BlockDriverState *bs, *unfiltered_bs;
>      BlockDriverState *target_bs;
>      AioContext *aio_context;
>      BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>          return;
>      }
>  
> +    /*
> +     * Same as in qmp_drive_mirror(): We want to run the job from @bs,
> +     * but we want to replace @unfiltered_bs on completion.
> +     */
> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);

Do we? I thought the idea with blockdev-mirror was that the client tells
us the exact node it is interested in, without any magic skipping nodes.

Skipping implicit nodes is a feature for compatibility with legacy
clients, but a client using blockdev-mirror isn't a legacy client.

> +    if (!has_replaces && unfiltered_bs != bs) {
> +        replaces = unfiltered_bs->node_name;
> +        has_replaces = true;
> +    }
> +
>      target_bs = bdrv_lookup_bs(target, target, errp);
>      if (!target_bs) {
>          return;

Kevin

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Max Reitz 6 years, 4 months ago

On 13.09.19 14:55, Kevin Wolf wrote:
> Am 09.08.2019 um 18:13 hat Max Reitz geschrieben:
>> This includes some permission limiting (for example, we only need to
>> take the RESIZE permission for active commits where the base is smaller
>> than the top).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>  block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>  blockdev.c     |  47 +++++++++++++++++---
>>  2 files changed, 131 insertions(+), 33 deletions(-)
>>
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 54bafdf176..6ddbfb9708 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>>      BlockBackend *target;
>>      BlockDriverState *mirror_top_bs;
>>      BlockDriverState *base;
>> +    BlockDriverState *base_overlay;
>>  
>>      /* The name of the graph node to replace */
>>      char *replaces;
>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>>                               &error_abort);
>>      if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>>          BlockDriverState *backing = s->is_none_mode ? src : s->base;
>> -        if (backing_bs(target_bs) != backing) {
>> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
>> +        BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
>> +
>> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
>> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>>              if (local_err) {
>>                  error_report_err(local_err);
>>                  ret = -EPERM;
>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>>       * valid.
>>       */
>>      block_job_remove_all_bdrv(bjob);
>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>  
>>      /* We just changed the BDS the job BB refers to (with either or both of the
>>       * bdrv_replace_node() calls), so switch the BB back so the cleanup does
>> @@ -812,7 +815,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
>>              return 0;
>>          }
>>  
>> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
>> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
>> +                                      &count);
>>          if (ret < 0) {
>>              return ret;
>>          }
>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
>>      } else {
>>          s->target_cluster_size = BDRV_SECTOR_SIZE;
>>      }
>> -    if (backing_filename[0] && !target_bs->backing &&
>> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>>          s->granularity < s->target_cluster_size) {
>>          s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>>          s->cow_bitmap = bitmap_new(length);
>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>>      if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>>          int ret;
>>  
>> -        assert(!target->backing);
>> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
>> +        assert(!bdrv_backing_chain_next(target));
>> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
>> +                                     "backing", errp);
>>          if (ret < 0) {
>>              return;
>>          }
>> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>>      MirrorBlockJob *s;
>>      MirrorBDSOpaque *bs_opaque;
>>      BlockDriverState *mirror_top_bs;
>> -    bool target_graph_mod;
>>      bool target_is_backing;
>> +    uint64_t target_perms, target_shared_perms;
>>      Error *local_err = NULL;
>>      int ret;
>>  
>> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>>          buf_size = DEFAULT_MIRROR_BUF_SIZE;
>>      }
>>  
>> -    if (bs == target) {
>> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>>          error_setg(errp, "Can't mirror node into itself");
>>          return NULL;
>>      }
>> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>>       * In the case of active commit, things look a bit different, though,
>>       * because the target is an already populated backing file in active use.
>>       * We can allow anything except resize there.*/
>> +
>> +    target_perms = BLK_PERM_WRITE;
>> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
>> +
>>      target_is_backing = bdrv_chain_contains(bs, target);
>> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
>> +    if (target_is_backing) {
>> +        int64_t bs_size, target_size;
>> +        bs_size = bdrv_getlength(bs);
>> +        if (bs_size < 0) {
>> +            error_setg_errno(errp, -bs_size,
>> +                             "Could not inquire top image size");
>> +            goto fail;
>> +        }
>> +
>> +        target_size = bdrv_getlength(target);
>> +        if (target_size < 0) {
>> +            error_setg_errno(errp, -target_size,
>> +                             "Could not inquire base image size");
>> +            goto fail;
>> +        }
>> +
>> +        if (target_size < bs_size) {
>> +            target_perms |= BLK_PERM_RESIZE;
>> +        }
>> +
>> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
>> +                            |  BLK_PERM_WRITE
>> +                            |  BLK_PERM_GRAPH_MOD;
>> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
>> +        /*
>> +         * We may want to allow this in the future, but it would
>> +         * require taking some extra care.
>> +         */
>> +        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
>> +                   "source's backing chain");
>> +        goto fail;
>> +    }
>> +
>> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
>> +        target_perms |= BLK_PERM_GRAPH_MOD;
>> +    }
> 
> This is getting absurd. We keep moving GRAPH_MOD around, but still
> nobody knows what it's actually supposed to mean. Maybe it would be
> better to just remove it finally?

I suppose even if we ever needed it, we no longer do now with .freeze.

> Of course, not a reason to stop this patch, after all it's moving the
> nonsensical piece of code correctly...
> 
>>      s->target = blk_new(s->common.job.aio_context,
>> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
>> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
>> -                        BLK_PERM_WRITE_UNCHANGED |
>> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
>> -                                             BLK_PERM_WRITE |
>> -                                             BLK_PERM_GRAPH_MOD : 0));
>> +                        target_perms, target_shared_perms);
>>      ret = blk_insert_bs(s->target, target, errp);
>>      if (ret < 0) {
>>          goto fail;
>> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>>      s->backing_mode = backing_mode;
>>      s->copy_mode = copy_mode;
>>      s->base = base;
>> +    s->base_overlay = bdrv_find_overlay(bs, base);
>>      s->granularity = granularity;
>>      s->buf_size = ROUND_UP(buf_size, granularity);
>>      s->unmap = unmap;
>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>      /* In commit_active_start() all intermediate nodes disappear, so
>>       * any jobs in them must be blocked */
>>      if (target_is_backing) {
>> -        BlockDriverState *iter;
>> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> -             * ourselves at s->base (if writes are blocked for a node, they are
>> -             * also blocked for its backing file). The other options would be a
>> -             * second filter driver above s->base (== target). */
>> +        BlockDriverState *iter, *filtered_target;
>> +        uint64_t iter_shared_perms;
>> +
>> +        /*
>> +         * The topmost node with
>> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
>> +         */
>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
>> +
>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>> +               bdrv_skip_rw_filters(target));
>> +
>> +        /*
>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> +         * ourselves at s->base (if writes are blocked for a node, they are
>> +         * also blocked for its backing file). The other options would be a
>> +         * second filter driver above s->base (== target).
>> +         */
>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>> +
>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>> +             iter = bdrv_filtered_bs(iter))
>> +        {
>> +            if (iter == filtered_target) {
>> +                /*
>> +                 * From here on, all nodes are filters on the base.
>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
>> +                 */
>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
>> +            }
>> +
>>              ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
>> -                                     errp);
>> +                                     iter_shared_perms, errp);
>>              if (ret < 0) {
>>                  goto fail;
>>              }
>> @@ -1737,7 +1802,7 @@ fail:
>>      bs_opaque->stop = true;
>>      bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>>                               &error_abort);
>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
>>  
>>      bdrv_unref(mirror_top_bs);
>>  
>> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
>>          return;
>>      }
>>      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
>> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
>>      mirror_start_job(job_id, bs, creation_flags, target, replaces,
>>                       speed, granularity, buf_size, backing_mode,
>>                       on_source_error, on_target_error, unmap, NULL, NULL,
>> diff --git a/blockdev.c b/blockdev.c
>> index c540802127..c451f553f7 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>          return;
>>      }
>>  
>> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
>> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>>          sync = MIRROR_SYNC_MODE_FULL;
>>      }
>>  
>> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
>>  
>>  void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>  {
>> -    BlockDriverState *bs;
>> +    BlockDriverState *bs, *unfiltered_bs;
>>      BlockDriverState *source, *target_bs;
>>      AioContext *aio_context;
>>      BlockMirrorBackingMode backing_mode;
>> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>      int flags;
>>      int64_t size;
>>      const char *format = arg->format;
>> +    const char *replaces_node_name = NULL;
>>      int ret;
>>  
>>      bs = qmp_get_root_bs(arg->device, errp);
>> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>          return;
>>      }
>>  
>> +    /*
>> +     * If the user has not instructed us otherwise, we should let the
>> +     * block job run from @bs (thus taking into account all filters on
>> +     * it) but replace @unfiltered_bs when it finishes (thus not
>> +     * removing those filters).
>> +     * (And if there are any explicit filters, we should assume the
>> +     *  user knows how to use the @replaces option.)
>> +     */
>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
> 
> Should this behaviour be documented in the QAPI schema for drive-mirror?

Hm.  I’d document it for @replaces.  But what would I write?  “By
default, @device is replaced, though implicitly created nodes on it are
kept”?

I feel bad about referencing implicit nodes, especially if our plan is
to remove them anyway.  OTOH, if I dropped special handling of implicit
nodes here, I should probably drop it in the whole series.  And I don’t
feel like that’s right as long as we haven’t actually removed implicit
nodes.

So I suppose I’ll go for the documentation addendum? :-/

>>      aio_context = bdrv_get_aio_context(bs);
>>      aio_context_acquire(aio_context);
>>  
>> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>      }
>>  
>>      flags = bs->open_flags | BDRV_O_RDWR;
>> -    source = backing_bs(bs);
>> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>>      if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
>> +        if (bdrv_filtered_bs(unfiltered_bs)) {
>> +            /* @unfiltered_bs is an explicit filter */
>> +            error_setg(errp, "Cannot perform sync=top mirror through an "
>> +                       "explicitly added filter node on the source");
>> +            goto out;
>> +        }
>>          arg->sync = MIRROR_SYNC_MODE_FULL;
>>      }
>>      if (arg->sync == MIRROR_SYNC_MODE_NONE) {
>> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>                               " named node of the graph");
>>              goto out;
>>          }
>> +        replaces_node_name = arg->replaces;
>> +    } else if (unfiltered_bs != bs) {
>> +        replaces_node_name = unfiltered_bs->node_name;
>>      }
>>  
>>      if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
>> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>          bdrv_img_create(arg->target, format,
>>                          NULL, NULL, NULL, size, flags, false, &local_err);
>>      } else {
>> +        /* Implicit filters should not appear in the filename */
>> +        BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source);
>> +
>>          switch (arg->mode) {
>>          case NEW_IMAGE_MODE_EXISTING:
>>              break;
>> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>              /* create new image with backing file */
>>              bdrv_refresh_filename(source);
>>              bdrv_img_create(arg->target, format,
>> -                            source->filename,
>> -                            source->drv->format_name,
>> +                            explicit_backing->filename,
>> +                            explicit_backing->drv->format_name,
>>                              NULL, size, flags, false, &local_err);
>>              break;
>>          default:
>> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>      }
>>  
>>      blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
>> -                           arg->has_replaces, arg->replaces, arg->sync,
>> +                           !!replaces_node_name, replaces_node_name, arg->sync,
>>                             backing_mode, arg->has_speed, arg->speed,
>>                             arg->has_granularity, arg->granularity,
>>                             arg->has_buf_size, arg->buf_size,
>> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>                           bool has_auto_dismiss, bool auto_dismiss,
>>                           Error **errp)
>>  {
>> -    BlockDriverState *bs;
>> +    BlockDriverState *bs, *unfiltered_bs;
>>      BlockDriverState *target_bs;
>>      AioContext *aio_context;
>>      BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
>> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
>>          return;
>>      }
>>  
>> +    /*
>> +     * Same as in qmp_drive_mirror(): We want to run the job from @bs,
>> +     * but we want to replace @unfiltered_bs on completion.
>> +     */
>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
> 
> Do we? I thought the idea with blockdev-mirror was that the client tells
> us the exact node it is interested in, without any magic skipping nodes.
> 
> Skipping implicit nodes is a feature for compatibility with legacy
> clients, but a client using blockdev-mirror isn't a legacy client.

And I thought legacy clients didn’t let implicit nodes be created.

We could return an error if @bs is an implicit node.  But I don’t think
that would help anyone, and it’d be more complicated in my opinion.

Again, the best thing (IMO) is to remove the concept of implicit nodes
altogether and then we can drop this piece of code anyway.

Max

>> +    if (!has_replaces && unfiltered_bs != bs) {
>> +        replaces = unfiltered_bs->node_name;
>> +        has_replaces = true;
>> +    }
>> +
>>      target_bs = bdrv_lookup_bs(target, target, errp);
>>      if (!target_bs) {
>>          return;
> 
> Kevin
>

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Vladimir Sementsov-Ogievskiy 6 years, 6 months ago

09.08.2019 19:13, Max Reitz wrote:
> This includes some permission limiting (for example, we only need to
> take the RESIZE permission for active commits where the base is smaller
> than the top).
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>   block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>   blockdev.c     |  47 +++++++++++++++++---
>   2 files changed, 131 insertions(+), 33 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index 54bafdf176..6ddbfb9708 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c


[..]

> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>       /* In commit_active_start() all intermediate nodes disappear, so
>        * any jobs in them must be blocked */
>       if (target_is_backing) {
> -        BlockDriverState *iter;
> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
> -             * ourselves at s->base (if writes are blocked for a node, they are
> -             * also blocked for its backing file). The other options would be a
> -             * second filter driver above s->base (== target). */
> +        BlockDriverState *iter, *filtered_target;
> +        uint64_t iter_shared_perms;
> +
> +        /*
> +         * The topmost node with
> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
> +         */
> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
> +
> +        assert(bdrv_skip_rw_filters(filtered_target) ==
> +               bdrv_skip_rw_filters(target));
> +
> +        /*
> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
> +         * ourselves at s->base (if writes are blocked for a node, they are
> +         * also blocked for its backing file). The other options would be a
> +         * second filter driver above s->base (== target).
> +         */
> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
> +
> +        for (iter = bdrv_filtered_bs(bs); iter != target;
> +             iter = bdrv_filtered_bs(iter))
> +        {
> +            if (iter == filtered_target) {
> +                /*
> +                 * From here on, all nodes are filters on the base.
> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
> +                 */
> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;


Hmm, I don't understand, why read from upper nodes is not shared?

> +            }
> +
>               ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
> -                                     errp);
> +                                     iter_shared_perms, errp);
>               if (ret < 0) {
>                   goto fail;
>               }

[..]

-- 
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Max Reitz 6 years, 6 months ago

On 12.08.19 13:09, Vladimir Sementsov-Ogievskiy wrote:
> 09.08.2019 19:13, Max Reitz wrote:
>> This includes some permission limiting (for example, we only need to
>> take the RESIZE permission for active commits where the base is smaller
>> than the top).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>   blockdev.c     |  47 +++++++++++++++++---
>>   2 files changed, 131 insertions(+), 33 deletions(-)
>>
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 54bafdf176..6ddbfb9708 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
> 
> 
> [..]
> 
>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>       /* In commit_active_start() all intermediate nodes disappear, so
>>        * any jobs in them must be blocked */
>>       if (target_is_backing) {
>> -        BlockDriverState *iter;
>> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> -             * ourselves at s->base (if writes are blocked for a node, they are
>> -             * also blocked for its backing file). The other options would be a
>> -             * second filter driver above s->base (== target). */
>> +        BlockDriverState *iter, *filtered_target;
>> +        uint64_t iter_shared_perms;
>> +
>> +        /*
>> +         * The topmost node with
>> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
>> +         */
>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
>> +
>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>> +               bdrv_skip_rw_filters(target));
>> +
>> +        /*
>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>> +         * ourselves at s->base (if writes are blocked for a node, they are
>> +         * also blocked for its backing file). The other options would be a
>> +         * second filter driver above s->base (== target).
>> +         */
>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>> +
>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>> +             iter = bdrv_filtered_bs(iter))
>> +        {
>> +            if (iter == filtered_target) {
>> +                /*
>> +                 * From here on, all nodes are filters on the base.
>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
>> +                 */
>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
> 
> 
> Hmm, I don't understand, why read from upper nodes is not shared?

Because they don’t represent a consistent disk state during the commit.

Please don’t ask me details about CONSISTENT_READ, because I always
pretend I understand it, but I never really do, actually.

(My problem is that I do understand why the intermediate nodes shouldn’t
share CONSISTENT_READ: It’s because they only read garbage, effectively.
 But I don’t understand how any block job target (like our base here)
can have CONSISTENT_READ.  Block job targets are mostly written front to
back (except with sync=none), so they too don’t “[represent] the
contents of a disk at a specific point.”
But that is how it was, so that is how it should be kept.)

If it makes you any happier, BLK_PERM_CONSISTENT_READ’s description
explicitly notes that it will not be shared on intermediate nodes of a
commit job.

Max

>> +            }
>> +
>>               ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
>> -                                     errp);
>> +                                     iter_shared_perms, errp);
>>               if (ret < 0) {
>>                   goto fail;
>>               }
> 
> [..]
>

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters

Posted by Vladimir Sementsov-Ogievskiy 6 years, 5 months ago

12.08.2019 16:26, Max Reitz wrote:
> On 12.08.19 13:09, Vladimir Sementsov-Ogievskiy wrote:
>> 09.08.2019 19:13, Max Reitz wrote:
>>> This includes some permission limiting (for example, we only need to
>>> take the RESIZE permission for active commits where the base is smaller
>>> than the top).
>>>
>>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>>> ---
>>>    block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>>    blockdev.c     |  47 +++++++++++++++++---
>>>    2 files changed, 131 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/block/mirror.c b/block/mirror.c
>>> index 54bafdf176..6ddbfb9708 100644
>>> --- a/block/mirror.c
>>> +++ b/block/mirror.c
>>
>>
>> [..]
>>
>>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>>        /* In commit_active_start() all intermediate nodes disappear, so
>>>         * any jobs in them must be blocked */
>>>        if (target_is_backing) {
>>> -        BlockDriverState *iter;
>>> -        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
>>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> -             * ourselves at s->base (if writes are blocked for a node, they are
>>> -             * also blocked for its backing file). The other options would be a
>>> -             * second filter driver above s->base (== target). */
>>> +        BlockDriverState *iter, *filtered_target;
>>> +        uint64_t iter_shared_perms;
>>> +
>>> +        /*
>>> +         * The topmost node with
>>> +         * bdrv_skip_rw_filters(filtered_target) == bdrv_skip_rw_filters(target)
>>> +         */
>>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, target));
>>> +
>>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>>> +               bdrv_skip_rw_filters(target));
>>> +
>>> +        /*
>>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> +         * ourselves at s->base (if writes are blocked for a node, they are
>>> +         * also blocked for its backing file). The other options would be a
>>> +         * second filter driver above s->base (== target).
>>> +         */
>>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>>> +
>>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>>> +             iter = bdrv_filtered_bs(iter))
>>> +        {
>>> +            if (iter == filtered_target) {
>>> +                /*
>>> +                 * From here on, all nodes are filters on the base.
>>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
>>> +                 */
>>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
>>
>>
>> Hmm, I don't understand, why read from upper nodes is not shared?
> 
> Because they don’t represent a consistent disk state during the commit.
> 
> Please don’t ask me details about CONSISTENT_READ, because I always
> pretend I understand it, but I never really do, actually.
> 
> (My problem is that I do understand why the intermediate nodes shouldn’t
> share CONSISTENT_READ: It’s because they only read garbage, effectively.
>   But I don’t understand how any block job target (like our base here)
> can have CONSISTENT_READ.

I know such example: it's image fleecing scheme, when for backup job source
is a backing for target. If serialization of requests works well target represents
consistent state of disk ate backup-start point in time.

But yes, it's not about mirror or commit.

>  Block job targets are mostly written front to
> back (except with sync=none), so they too don’t “[represent] the
> contents of a disk at a specific point.”
> But that is how it was, so that is how it should be kept.)
> 
> If it makes you any happier, BLK_PERM_CONSISTENT_READ’s description
> explicitly notes that it will not be shared on intermediate nodes of a
> commit job.
> 
> Max
> 
>>> +            }
>>> +
>>>                ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
>>> -                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
>>> -                                     errp);
>>> +                                     iter_shared_perms, errp);
>>>                if (ret < 0) {
>>>                    goto fail;
>>>                }
>>
>> [..]
>>
> 
> 


-- 
Best regards,
Vladimir