[PULL 16/25] block: Mark bdrv_replace_node() GRAPH_WRLOCK

Maintainers: Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>, Paolo Bonzini <pbonzini@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, "Denis V. Lunev" <den@openvz.org>, Eric Blake <eblake@redhat.com>, Wen Congyang <wencongyang2@huawei.com>, Xie Changlong <xiechanglong.d@gmail.com>, Stefan Weil <sw@weilnetz.de>, Jeff Cody <codyprime@gmail.com>, Juan Quintela <quintela@redhat.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Leonardo Bras <leobras@redhat.com>
There is a newer version of this series
[PULL 16/25] block: Mark bdrv_replace_node() GRAPH_WRLOCK
Posted by Kevin Wolf 1 year ago
Instead of taking the writer lock internally, require callers to already
hold it when calling bdrv_replace_node(). Its callers may already want
to hold the graph lock and so wouldn't be able to call functions that
take it internally.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231027155333.420094-17-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/block-global-state.h |  6 ++++--
 block.c                            | 30 +++++++++++-------------------
 block/commit.c                     | 13 +++++++++++--
 block/mirror.c                     | 26 ++++++++++++++++----------
 blockdev.c                         |  5 +++++
 tests/unit/test-bdrv-drain.c       |  6 ++++++
 tests/unit/test-bdrv-graph-mod.c   | 13 +++++++++++--
 7 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index a1fd70ec97..9e0ccc1c32 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -71,8 +71,10 @@ bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
 BlockDriverState *bdrv_new(void);
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp);
-int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
-                      Error **errp);
+
+int GRAPH_WRLOCK
+bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp);
+
 int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
                           Error **errp);
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
diff --git a/block.c b/block.c
index c7409cf658..cac517ab8b 100644
--- a/block.c
+++ b/block.c
@@ -5484,25 +5484,7 @@ out:
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                       Error **errp)
 {
-    int ret;
-
-    GLOBAL_STATE_CODE();
-
-    /* Make sure that @from doesn't go away until we have successfully attached
-     * all of its parents to @to. */
-    bdrv_ref(from);
-    bdrv_drained_begin(from);
-    bdrv_drained_begin(to);
-    bdrv_graph_wrlock(to);
-
-    ret = bdrv_replace_node_common(from, to, true, false, errp);
-
-    bdrv_graph_wrunlock();
-    bdrv_drained_end(to);
-    bdrv_drained_end(from);
-    bdrv_unref(from);
-
-    return ret;
+    return bdrv_replace_node_common(from, to, true, false, errp);
 }
 
 int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
@@ -5717,9 +5699,19 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
         goto fail;
     }
 
+    /*
+     * Make sure that @bs doesn't go away until we have successfully attached
+     * all of its parents to @new_node_bs and undrained it again.
+     */
+    bdrv_ref(bs);
     bdrv_drained_begin(bs);
+    bdrv_drained_begin(new_node_bs);
+    bdrv_graph_wrlock(new_node_bs);
     ret = bdrv_replace_node(bs, new_node_bs, errp);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(new_node_bs);
     bdrv_drained_end(bs);
+    bdrv_unref(bs);
 
     if (ret < 0) {
         error_prepend(errp, "Could not replace node: ");
diff --git a/block/commit.c b/block/commit.c
index d92af02ead..30bc082edc 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -68,6 +68,7 @@ static void commit_abort(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     BlockDriverState *top_bs = blk_bs(s->top);
+    BlockDriverState *commit_top_backing_bs;
 
     if (s->chain_frozen) {
         bdrv_graph_rdlock_main_loop();
@@ -94,8 +95,12 @@ static void commit_abort(Job *job)
      * XXX Can (or should) we somehow keep 'consistent read' blocked even
      * after the failed/cancelled commit job is gone? If we already wrote
      * something to base, the intermediate images aren't valid any more. */
-    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
-                      &error_abort);
+    commit_top_backing_bs = s->commit_top_bs->backing->bs;
+    bdrv_drained_begin(commit_top_backing_bs);
+    bdrv_graph_wrlock(commit_top_backing_bs);
+    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(commit_top_backing_bs);
 
     bdrv_unref(s->commit_top_bs);
     bdrv_unref(top_bs);
@@ -425,7 +430,11 @@ fail:
     /* commit_top_bs has to be replaced after deleting the block job,
      * otherwise this would fail because of lack of permissions. */
     if (commit_top_bs) {
+        bdrv_drained_begin(top);
+        bdrv_graph_wrlock(top);
         bdrv_replace_node(commit_top_bs, top, &error_abort);
+        bdrv_graph_wrunlock();
+        bdrv_drained_end(top);
     }
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index f8e439371c..dfc1c416e8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -712,6 +712,7 @@ static int mirror_exit_common(Job *job)
      * these permissions any more means that we can't allow any new requests on
      * mirror_top_bs from now on, so keep it drained. */
     bdrv_drained_begin(mirror_top_bs);
+    bdrv_drained_begin(target_bs);
     bs_opaque->stop = true;
 
     bdrv_graph_rdlock_main_loop();
@@ -757,15 +758,13 @@ static int mirror_exit_common(Job *job)
         /* The mirror job has no requests in flight any more, but we need to
          * drain potential other users of the BDS before changing the graph. */
         assert(s->in_drain);
-        bdrv_drained_begin(target_bs);
+        bdrv_drained_begin(to_replace);
         /*
          * Cannot use check_to_replace_node() here, because that would
          * check for an op blocker on @to_replace, and we have our own
          * there.
-         *
-         * TODO Pull out the writer lock from bdrv_replace_node() to here
          */
-        bdrv_graph_rdlock_main_loop();
+        bdrv_graph_wrlock(target_bs);
         if (bdrv_recurse_can_replace(src, to_replace)) {
             bdrv_replace_node(to_replace, target_bs, &local_err);
         } else {
@@ -774,8 +773,8 @@ static int mirror_exit_common(Job *job)
                        "would not lead to an abrupt change of visible data",
                        to_replace->node_name, target_bs->node_name);
         }
-        bdrv_graph_rdunlock_main_loop();
-        bdrv_drained_end(target_bs);
+        bdrv_graph_wrunlock();
+        bdrv_drained_end(to_replace);
         if (local_err) {
             error_report_err(local_err);
             ret = -EPERM;
@@ -790,7 +789,6 @@ static int mirror_exit_common(Job *job)
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_unref(target_bs);
 
     /*
      * Remove the mirror filter driver from the graph. Before this, get rid of
@@ -798,7 +796,12 @@ static int mirror_exit_common(Job *job)
      * valid.
      */
     block_job_remove_all_bdrv(bjob);
+    bdrv_graph_wrlock(mirror_top_bs);
     bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_graph_wrunlock();
+
+    bdrv_drained_end(target_bs);
+    bdrv_unref(target_bs);
 
     bs_opaque->job = NULL;
 
@@ -1987,11 +1990,14 @@ fail:
     }
 
     bs_opaque->stop = true;
-    bdrv_graph_rdlock_main_loop();
+    bdrv_drained_begin(bs);
+    bdrv_graph_wrlock(bs);
+    assert(mirror_top_bs->backing->bs == bs);
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
-    bdrv_graph_rdunlock_main_loop();
-    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_replace_node(mirror_top_bs, bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(bs);
 
     bdrv_unref(mirror_top_bs);
 
diff --git a/blockdev.c b/blockdev.c
index f04faf6373..5bc921236c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1610,7 +1610,12 @@ static void external_snapshot_abort(void *opaque)
                 aio_context_acquire(aio_context);
             }
 
+            bdrv_drained_begin(state->new_bs);
+            bdrv_graph_wrlock(state->old_bs);
             bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
+            bdrv_graph_wrunlock();
+            bdrv_drained_end(state->new_bs);
+
             bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
 
             aio_context_release(aio_context);
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 40d17b4c5a..b16f831c23 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -2000,7 +2000,13 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
     parent_s->was_undrained = false;
 
     g_assert(parent_bs->quiesce_counter == old_drain_count);
+    bdrv_drained_begin(old_child_bs);
+    bdrv_drained_begin(new_child_bs);
+    bdrv_graph_wrlock(NULL);
     bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
+    bdrv_graph_wrunlock();
+    bdrv_drained_end(new_child_bs);
+    bdrv_drained_end(old_child_bs);
     g_assert(parent_bs->quiesce_counter == new_drain_count);
 
     if (!old_drain_count && !new_drain_count) {
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index 8609f7f42b..22d4cd83f6 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -234,11 +234,16 @@ static void test_parallel_exclusive_write(void)
     BlockDriverState *fl1 = pass_through_node("fl1");
     BlockDriverState *fl2 = pass_through_node("fl2");
 
+    bdrv_drained_begin(fl1);
+    bdrv_drained_begin(fl2);
+
     /*
      * bdrv_attach_child() eats child bs reference, so we need two @base
-     * references for two filters:
+     * references for two filters. We also need an additional @fl1 reference so
+     * that it still exists when we want to undrain it.
      */
     bdrv_ref(base);
+    bdrv_ref(fl1);
 
     bdrv_graph_wrlock(NULL);
     bdrv_attach_child(top, fl1, "backing", &child_of_bds,
@@ -250,10 +255,14 @@ static void test_parallel_exclusive_write(void)
     bdrv_attach_child(fl2, base, "backing", &child_of_bds,
                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                       &error_abort);
-    bdrv_graph_wrunlock();
 
     bdrv_replace_node(fl1, fl2, &error_abort);
+    bdrv_graph_wrunlock();
+
+    bdrv_drained_end(fl2);
+    bdrv_drained_end(fl1);
 
+    bdrv_unref(fl1);
     bdrv_unref(fl2);
     bdrv_unref(top);
 }
-- 
2.41.0