block: Graph locking part 6 (bs->file/backing)

[PATCH 16/24] block: Mark bdrv_replace_node() GRAPH_WRLOCK

Posted by Kevin Wolf 1 year, 1 month ago

Instead of taking the writer lock internally, require callers to already
hold it when calling bdrv_replace_node(). Its callers may already want
to hold the graph lock and so wouldn't be able to call functions that
take it internally.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/block-global-state.h |  6 ++++--
 block.c                            | 26 +++++++-------------------
 block/commit.c                     | 13 +++++++++++--
 block/mirror.c                     | 26 ++++++++++++++++----------
 blockdev.c                         |  5 +++++
 tests/unit/test-bdrv-drain.c       |  6 ++++++
 tests/unit/test-bdrv-graph-mod.c   | 13 +++++++++++--
 7 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index a1fd70ec97..9e0ccc1c32 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -71,8 +71,10 @@ bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
 BlockDriverState *bdrv_new(void);
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp);
-int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
-                      Error **errp);
+
+int GRAPH_WRLOCK
+bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp);
+
 int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
                           Error **errp);
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
diff --git a/block.c b/block.c
index c7409cf658..eb7eeb85e9 100644
--- a/block.c
+++ b/block.c
@@ -5484,25 +5484,7 @@ out:
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                       Error **errp)
 {
-    int ret;
-
-    GLOBAL_STATE_CODE();
-
-    /* Make sure that @from doesn't go away until we have successfully attached
-     * all of its parents to @to. */
-    bdrv_ref(from);
-    bdrv_drained_begin(from);
-    bdrv_drained_begin(to);
-    bdrv_graph_wrlock(to);
-
-    ret = bdrv_replace_node_common(from, to, true, false, errp);
-
-    bdrv_graph_wrunlock();
-    bdrv_drained_end(to);
-    bdrv_drained_end(from);
-    bdrv_unref(from);
-
-    return ret;
+    return bdrv_replace_node_common(from, to, true, false, errp);
 }
 
 int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
@@ -5717,9 +5699,15 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
         goto fail;
     }
 
+    bdrv_ref(bs);
     bdrv_drained_begin(bs);
+    bdrv_drained_begin(new_node_bs);
+    bdrv_graph_wrlock(new_node_bs);
     ret = bdrv_replace_node(bs, new_node_bs, errp);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(new_node_bs);
     bdrv_drained_end(bs);
+    bdrv_unref(bs);
 
     if (ret < 0) {
         error_prepend(errp, "Could not replace node: ");
diff --git a/block/commit.c b/block/commit.c
index d92af02ead..2fecdce86f 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -68,6 +68,7 @@ static void commit_abort(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     BlockDriverState *top_bs = blk_bs(s->top);
+    BlockDriverState *commit_top_backing_bs;
 
     if (s->chain_frozen) {
         bdrv_graph_rdlock_main_loop();
@@ -94,8 +95,12 @@ static void commit_abort(Job *job)
      * XXX Can (or should) we somehow keep 'consistent read' blocked even
      * after the failed/cancelled commit job is gone? If we already wrote
      * something to base, the intermediate images aren't valid any more. */
-    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
-                      &error_abort);
+    commit_top_backing_bs = s->commit_top_bs->backing->bs;
+    bdrv_drained_begin(commit_top_backing_bs);
+    bdrv_graph_wrlock(commit_top_backing_bs);
+    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(commit_top_backing_bs);
 
     bdrv_unref(s->commit_top_bs);
     bdrv_unref(top_bs);
@@ -425,7 +430,11 @@ fail:
     /* commit_top_bs has to be replaced after deleting the block job,
      * otherwise this would fail because of lack of permissions. */
     if (commit_top_bs) {
+        bdrv_graph_wrlock(top);
+        bdrv_drained_begin(top);
         bdrv_replace_node(commit_top_bs, top, &error_abort);
+        bdrv_drained_end(top);
+        bdrv_graph_wrunlock();
     }
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 304bd3208a..e2e325ec56 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -704,6 +704,7 @@ static int mirror_exit_common(Job *job)
      * these permissions any more means that we can't allow any new requests on
      * mirror_top_bs from now on, so keep it drained. */
     bdrv_drained_begin(mirror_top_bs);
+    bdrv_drained_begin(target_bs);
     bs_opaque->stop = true;
 
     bdrv_graph_rdlock_main_loop();
@@ -749,15 +750,13 @@ static int mirror_exit_common(Job *job)
         /* The mirror job has no requests in flight any more, but we need to
          * drain potential other users of the BDS before changing the graph. */
         assert(s->in_drain);
-        bdrv_drained_begin(target_bs);
+        bdrv_drained_begin(to_replace);
         /*
          * Cannot use check_to_replace_node() here, because that would
          * check for an op blocker on @to_replace, and we have our own
          * there.
-         *
-         * TODO Pull out the writer lock from bdrv_replace_node() to here
          */
-        bdrv_graph_rdlock_main_loop();
+        bdrv_graph_wrlock(target_bs);
         if (bdrv_recurse_can_replace(src, to_replace)) {
             bdrv_replace_node(to_replace, target_bs, &local_err);
         } else {
@@ -766,8 +765,8 @@ static int mirror_exit_common(Job *job)
                        "would not lead to an abrupt change of visible data",
                        to_replace->node_name, target_bs->node_name);
         }
-        bdrv_graph_rdunlock_main_loop();
-        bdrv_drained_end(target_bs);
+        bdrv_graph_wrunlock();
+        bdrv_drained_end(to_replace);
         if (local_err) {
             error_report_err(local_err);
             ret = -EPERM;
@@ -782,7 +781,6 @@ static int mirror_exit_common(Job *job)
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_unref(target_bs);
 
     /*
      * Remove the mirror filter driver from the graph. Before this, get rid of
@@ -790,7 +788,12 @@ static int mirror_exit_common(Job *job)
      * valid.
      */
     block_job_remove_all_bdrv(bjob);
+    bdrv_graph_wrlock(mirror_top_bs);
     bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_graph_wrunlock();
+
+    bdrv_drained_end(target_bs);
+    bdrv_unref(target_bs);
 
     bs_opaque->job = NULL;
 
@@ -1930,11 +1933,14 @@ fail:
     }
 
     bs_opaque->stop = true;
-    bdrv_graph_rdlock_main_loop();
+    bdrv_drained_begin(bs);
+    bdrv_graph_wrlock(bs);
+    assert(mirror_top_bs->backing->bs == bs);
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
-    bdrv_graph_rdunlock_main_loop();
-    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_replace_node(mirror_top_bs, bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(bs);
 
     bdrv_unref(mirror_top_bs);
 
diff --git a/blockdev.c b/blockdev.c
index 368cec3747..f36e169ea3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1601,7 +1601,12 @@ static void external_snapshot_abort(void *opaque)
                 aio_context_acquire(aio_context);
             }
 
+            bdrv_drained_begin(state->new_bs);
+            bdrv_graph_wrlock(state->old_bs);
             bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
+            bdrv_graph_wrunlock();
+            bdrv_drained_end(state->new_bs);
+
             bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
 
             aio_context_release(aio_context);
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 40d17b4c5a..b16f831c23 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -2000,7 +2000,13 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
     parent_s->was_undrained = false;
 
     g_assert(parent_bs->quiesce_counter == old_drain_count);
+    bdrv_drained_begin(old_child_bs);
+    bdrv_drained_begin(new_child_bs);
+    bdrv_graph_wrlock(NULL);
     bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(new_child_bs);
+    bdrv_drained_end(old_child_bs);
     g_assert(parent_bs->quiesce_counter == new_drain_count);
 
     if (!old_drain_count && !new_drain_count) {
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index 8609f7f42b..22d4cd83f6 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -234,11 +234,16 @@ static void test_parallel_exclusive_write(void)
     BlockDriverState *fl1 = pass_through_node("fl1");
     BlockDriverState *fl2 = pass_through_node("fl2");
 
+    bdrv_drained_begin(fl1);
+    bdrv_drained_begin(fl2);
+
     /*
      * bdrv_attach_child() eats child bs reference, so we need two @base
-     * references for two filters:
+     * references for two filters. We also need an additional @fl1 reference so
+     * that it still exists when we want to undrain it.
      */
     bdrv_ref(base);
+    bdrv_ref(fl1);
 
     bdrv_graph_wrlock(NULL);
     bdrv_attach_child(top, fl1, "backing", &child_of_bds,
@@ -250,10 +255,14 @@ static void test_parallel_exclusive_write(void)
     bdrv_attach_child(fl2, base, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
-    bdrv_graph_wrunlock();
 
     bdrv_replace_node(fl1, fl2, &error_abort);
+    bdrv_graph_wrunlock();
+
+    bdrv_drained_end(fl2);
+    bdrv_drained_end(fl1);
 
+    bdrv_unref(fl1);
     bdrv_unref(fl2);
     bdrv_unref(top);
 }
-- 
2.41.0

Re: [PATCH 16/24] block: Mark bdrv_replace_node() GRAPH_WRLOCK

Posted by Eric Blake 1 year, 1 month ago

On Fri, Oct 27, 2023 at 05:53:25PM +0200, Kevin Wolf wrote:
> Instead of taking the writer lock internally, require callers to already
> hold it when calling bdrv_replace_node(). Its callers may already want
> to hold the graph lock and so wouldn't be able to call functions that
> take it internally.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  include/block/block-global-state.h |  6 ++++--
>  block.c                            | 26 +++++++-------------------
>  block/commit.c                     | 13 +++++++++++--
>  block/mirror.c                     | 26 ++++++++++++++++----------
>  blockdev.c                         |  5 +++++
>  tests/unit/test-bdrv-drain.c       |  6 ++++++
>  tests/unit/test-bdrv-graph-mod.c   | 13 +++++++++++--
>  7 files changed, 60 insertions(+), 35 deletions(-)
> 
> +++ b/block.c
> @@ -5484,25 +5484,7 @@ out:
>  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
>                        Error **errp)
>  {
> -    int ret;
> -
> -    GLOBAL_STATE_CODE();
> -
> -    /* Make sure that @from doesn't go away until we have successfully attached
> -     * all of its parents to @to. */

Useful comment that you just moved here in the previous patch...

> -    bdrv_ref(from);
> -    bdrv_drained_begin(from);
> -    bdrv_drained_begin(to);
> -    bdrv_graph_wrlock(to);
> -
> -    ret = bdrv_replace_node_common(from, to, true, false, errp);
> -
> -    bdrv_graph_wrunlock();
> -    bdrv_drained_end(to);
> -    bdrv_drained_end(from);
> -    bdrv_unref(from);
> -
> -    return ret;
> +    return bdrv_replace_node_common(from, to, true, false, errp);
>  }
>  
>  int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
> @@ -5717,9 +5699,15 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
>          goto fail;
>      }
>  
> +    bdrv_ref(bs);

...but now it is gone.  Intentional?

>      bdrv_drained_begin(bs);
> +    bdrv_drained_begin(new_node_bs);
> +    bdrv_graph_wrlock(new_node_bs);
>      ret = bdrv_replace_node(bs, new_node_bs, errp);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(new_node_bs);
>      bdrv_drained_end(bs);
> +    bdrv_unref(bs);
>  
>      if (ret < 0) {
>          error_prepend(errp, "Could not replace node: ");
> diff --git a/block/commit.c b/block/commit.c
> index d92af02ead..2fecdce86f 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -68,6 +68,7 @@ static void commit_abort(Job *job)
>  {
>      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
>      BlockDriverState *top_bs = blk_bs(s->top);
> +    BlockDriverState *commit_top_backing_bs;
>  
>      if (s->chain_frozen) {
>          bdrv_graph_rdlock_main_loop();
> @@ -94,8 +95,12 @@ static void commit_abort(Job *job)
>       * XXX Can (or should) we somehow keep 'consistent read' blocked even
>       * after the failed/cancelled commit job is gone? If we already wrote
>       * something to base, the intermediate images aren't valid any more. */
> -    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
> -                      &error_abort);
> +    commit_top_backing_bs = s->commit_top_bs->backing->bs;
> +    bdrv_drained_begin(commit_top_backing_bs);
> +    bdrv_graph_wrlock(commit_top_backing_bs);

Here, and elsewhere in the patch, drained_begin/end is outside
wr(un)lock...

> +    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(commit_top_backing_bs);
>  
>      bdrv_unref(s->commit_top_bs);
>      bdrv_unref(top_bs);
> @@ -425,7 +430,11 @@ fail:
>      /* commit_top_bs has to be replaced after deleting the block job,
>       * otherwise this would fail because of lack of permissions. */
>      if (commit_top_bs) {
> +        bdrv_graph_wrlock(top);
> +        bdrv_drained_begin(top);
>          bdrv_replace_node(commit_top_bs, top, &error_abort);
> +        bdrv_drained_end(top);
> +        bdrv_graph_wrunlock();

...but here you do it in the opposite order.  Intentional?

> +++ b/tests/unit/test-bdrv-drain.c
> @@ -2000,7 +2000,13 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
>      parent_s->was_undrained = false;
>  
>      g_assert(parent_bs->quiesce_counter == old_drain_count);
> +    bdrv_drained_begin(old_child_bs);
> +    bdrv_drained_begin(new_child_bs);
> +    bdrv_graph_wrlock(NULL);

Why is this locking on NULL instead of new_child_bs?

>      bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(new_child_bs);
> +    bdrv_drained_end(old_child_bs);
>      g_assert(parent_bs->quiesce_counter == new_drain_count);
>  
>      if (!old_drain_count && !new_drain_count) {

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: [PATCH 16/24] block: Mark bdrv_replace_node() GRAPH_WRLOCK

Posted by Kevin Wolf 1 year ago

Am 27.10.2023 um 23:33 hat Eric Blake geschrieben:
> On Fri, Oct 27, 2023 at 05:53:25PM +0200, Kevin Wolf wrote:
> > Instead of taking the writer lock internally, require callers to already
> > hold it when calling bdrv_replace_node(). Its callers may already want
> > to hold the graph lock and so wouldn't be able to call functions that
> > take it internally.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  include/block/block-global-state.h |  6 ++++--
> >  block.c                            | 26 +++++++-------------------
> >  block/commit.c                     | 13 +++++++++++--
> >  block/mirror.c                     | 26 ++++++++++++++++----------
> >  blockdev.c                         |  5 +++++
> >  tests/unit/test-bdrv-drain.c       |  6 ++++++
> >  tests/unit/test-bdrv-graph-mod.c   | 13 +++++++++++--
> >  7 files changed, 60 insertions(+), 35 deletions(-)
> > 
> > +++ b/block.c
> > @@ -5484,25 +5484,7 @@ out:
> >  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
> >                        Error **errp)
> >  {
> > -    int ret;
> > -
> > -    GLOBAL_STATE_CODE();
> > -
> > -    /* Make sure that @from doesn't go away until we have successfully attached
> > -     * all of its parents to @to. */
> 
> Useful comment that you just moved here in the previous patch...
> 
> > -    bdrv_ref(from);
> > -    bdrv_drained_begin(from);
> > -    bdrv_drained_begin(to);
> > -    bdrv_graph_wrlock(to);
> > -
> > -    ret = bdrv_replace_node_common(from, to, true, false, errp);
> > -
> > -    bdrv_graph_wrunlock();
> > -    bdrv_drained_end(to);
> > -    bdrv_drained_end(from);
> > -    bdrv_unref(from);
> > -
> > -    return ret;
> > +    return bdrv_replace_node_common(from, to, true, false, errp);
> >  }
> >  
> >  int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
> > @@ -5717,9 +5699,15 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
> >          goto fail;
> >      }
> >  
> > +    bdrv_ref(bs);
> 
> ...but now it is gone.  Intentional?

I figured it was obvious enough that bdrv_ref() is always called to make
sure that the node doesn't go away too early, but I can add it back.

> >      bdrv_drained_begin(bs);
> > +    bdrv_drained_begin(new_node_bs);
> > +    bdrv_graph_wrlock(new_node_bs);
> >      ret = bdrv_replace_node(bs, new_node_bs, errp);
> > +    bdrv_graph_wrunlock();
> > +    bdrv_drained_end(new_node_bs);
> >      bdrv_drained_end(bs);
> > +    bdrv_unref(bs);
> >  
> >      if (ret < 0) {
> >          error_prepend(errp, "Could not replace node: ");
> > diff --git a/block/commit.c b/block/commit.c
> > index d92af02ead..2fecdce86f 100644
> > --- a/block/commit.c
> > +++ b/block/commit.c
> > @@ -68,6 +68,7 @@ static void commit_abort(Job *job)
> >  {
> >      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
> >      BlockDriverState *top_bs = blk_bs(s->top);
> > +    BlockDriverState *commit_top_backing_bs;
> >  
> >      if (s->chain_frozen) {
> >          bdrv_graph_rdlock_main_loop();
> > @@ -94,8 +95,12 @@ static void commit_abort(Job *job)
> >       * XXX Can (or should) we somehow keep 'consistent read' blocked even
> >       * after the failed/cancelled commit job is gone? If we already wrote
> >       * something to base, the intermediate images aren't valid any more. */
> > -    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
> > -                      &error_abort);
> > +    commit_top_backing_bs = s->commit_top_bs->backing->bs;
> > +    bdrv_drained_begin(commit_top_backing_bs);
> > +    bdrv_graph_wrlock(commit_top_backing_bs);
> 
> Here, and elsewhere in the patch, drained_begin/end is outside
> wr(un)lock...
> 
> > +    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
> > +    bdrv_graph_wrunlock();
> > +    bdrv_drained_end(commit_top_backing_bs);
> >  
> >      bdrv_unref(s->commit_top_bs);
> >      bdrv_unref(top_bs);
> > @@ -425,7 +430,11 @@ fail:
> >      /* commit_top_bs has to be replaced after deleting the block job,
> >       * otherwise this would fail because of lack of permissions. */
> >      if (commit_top_bs) {
> > +        bdrv_graph_wrlock(top);
> > +        bdrv_drained_begin(top);
> >          bdrv_replace_node(commit_top_bs, top, &error_abort);
> > +        bdrv_drained_end(top);
> > +        bdrv_graph_wrunlock();
> 
> ...but here you do it in the opposite order.  Intentional?

No, this is actually wrong. bdrv_drained_begin() has a nested event
loop, and running a nested event loop while holding the graph lock can
cause deadlocks, so it's forbidden. Thanks for catching this!

> > +++ b/tests/unit/test-bdrv-drain.c
> > @@ -2000,7 +2000,13 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
> >      parent_s->was_undrained = false;
> >  
> >      g_assert(parent_bs->quiesce_counter == old_drain_count);
> > +    bdrv_drained_begin(old_child_bs);
> > +    bdrv_drained_begin(new_child_bs);
> > +    bdrv_graph_wrlock(NULL);
> 
> Why is this locking on NULL instead of new_child_bs?

The parameter for bdrv_graph_wrlock() is a BDS whose AioContext is
locked and needs to be temporarily unlocked to avoid deadlocks. We don't
hold any AioContext lock here, so NULL is right.

> >      bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
> > +    bdrv_graph_wrunlock();
> > +    bdrv_drained_end(new_child_bs);
> > +    bdrv_drained_end(old_child_bs);
> >      g_assert(parent_bs->quiesce_counter == new_drain_count);
> >  
> >      if (!old_drain_count && !new_drain_count) {

Since the two comments above are the only thing you found in the review,
I'll just directly fix them while applying the series.

Kevin

Re: [PATCH 16/24] block: Mark bdrv_replace_node() GRAPH_WRLOCK

Posted by Eric Blake 1 year ago

On Fri, Nov 03, 2023 at 11:32:39AM +0100, Kevin Wolf wrote:
...
> > > -    GLOBAL_STATE_CODE();
> > > -
> > > -    /* Make sure that @from doesn't go away until we have successfully attached
> > > -     * all of its parents to @to. */
> > 
> > Useful comment that you just moved here in the previous patch...
> > 
> > > -    bdrv_ref(from);
...
> > > @@ -5717,9 +5699,15 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
> > >          goto fail;
> > >      }
> > >  
> > > +    bdrv_ref(bs);
> > 
> > ...but now it is gone.  Intentional?
> 
> I figured it was obvious enough that bdrv_ref() is always called to make
> sure that the node doesn't go away too early, but I can add it back.

Your call.

> > > @@ -94,8 +95,12 @@ static void commit_abort(Job *job)
> > >       * XXX Can (or should) we somehow keep 'consistent read' blocked even
> > >       * after the failed/cancelled commit job is gone? If we already wrote
> > >       * something to base, the intermediate images aren't valid any more. */
> > > -    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
> > > -                      &error_abort);
> > > +    commit_top_backing_bs = s->commit_top_bs->backing->bs;
> > > +    bdrv_drained_begin(commit_top_backing_bs);
> > > +    bdrv_graph_wrlock(commit_top_backing_bs);
> > 
> > Here, and elsewhere in the patch, drained_begin/end is outside
> > wr(un)lock...
> > 
> > > +    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
> > > +    bdrv_graph_wrunlock();
> > > +    bdrv_drained_end(commit_top_backing_bs);
> > >  
> > >      bdrv_unref(s->commit_top_bs);
> > >      bdrv_unref(top_bs);
> > > @@ -425,7 +430,11 @@ fail:
> > >      /* commit_top_bs has to be replaced after deleting the block job,
> > >       * otherwise this would fail because of lack of permissions. */
> > >      if (commit_top_bs) {
> > > +        bdrv_graph_wrlock(top);
> > > +        bdrv_drained_begin(top);
> > >          bdrv_replace_node(commit_top_bs, top, &error_abort);
> > > +        bdrv_drained_end(top);
> > > +        bdrv_graph_wrunlock();
> > 
> > ...but here you do it in the opposite order.  Intentional?
> 
> No, this is actually wrong. bdrv_drained_begin() has a nested event
> loop, and running a nested event loop while holding the graph lock can
> cause deadlocks, so it's forbidden. Thanks for catching this!

That's what review is for!

> 
> Since the two comments above are the only thing you found in the review,
> I'll just directly fix them while applying the series.

Sounds good to me. With the deadlock fixed by swapping order,

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org