[RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines

Paolo Bonzini posted 1 patch 1 year, 4 months ago
Failed in applying to current master (apply log)
tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
1 file changed, 46 insertions(+), 17 deletions(-)
[RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Paolo Bonzini 1 year, 4 months ago
Changes to the BlockDriverState graph will have to take the
corresponding lock for writing, and therefore cannot be done
inside a coroutine.  Move them outside the test body.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 17 deletions(-)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 6ae44116fe79..d85083dd4f9e 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
     }
 }
 
+static BlockBackend *blk;
+static BlockDriverState *bs, *backing;
+
+static void test_drv_cb_init(void)
+{
+    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
+                              &error_abort);
+    blk_insert_bs(blk, bs, &error_abort);
+
+    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+    bdrv_set_backing_hd(bs, backing, &error_abort);
+}
+
+static void test_drv_cb_fini(void)
+{
+    bdrv_unref(backing);
+    bdrv_unref(bs);
+    blk_unref(blk);
+    backing = NULL;
+    bs = NULL;
+    blk = NULL;
+}
+
 static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 {
-    BlockBackend *blk;
-    BlockDriverState *bs, *backing;
     BDRVTestState *s, *backing_s;
     BlockAIOCB *acb;
     int aio_ret;
 
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
 
-    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
-    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
-                              &error_abort);
     s = bs->opaque;
-    blk_insert_bs(blk, bs, &error_abort);
-
-    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
     backing_s = backing->opaque;
-    bdrv_set_backing_hd(bs, backing, &error_abort);
 
     /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
     g_assert_cmpint(s->drain_count, ==, 0);
@@ -252,30 +267,44 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 
     g_assert_cmpint(s->drain_count, ==, 0);
     g_assert_cmpint(backing_s->drain_count, ==, 0);
-
-    bdrv_unref(backing);
-    bdrv_unref(bs);
-    blk_unref(blk);
 }
 
-static void test_drv_cb_drain_all(void)
+static void test_drv_cb_do_drain_all(void)
 {
     test_drv_cb_common(BDRV_DRAIN_ALL, true);
 }
 
-static void test_drv_cb_drain(void)
+static void test_drv_cb_do_drain(void)
 {
     test_drv_cb_common(BDRV_DRAIN, false);
 }
 
+static void test_drv_cb_drain_all(void)
+{
+    test_drv_cb_init();
+    test_drv_cb_do_drain_all();
+    test_drv_cb_fini();
+}
+
+static void test_drv_cb_drain(void)
+{
+    test_drv_cb_init();
+    test_drv_cb_do_drain();
+    test_drv_cb_fini();
+}
+
 static void test_drv_cb_co_drain_all(void)
 {
-    call_in_coroutine(test_drv_cb_drain_all);
+    test_drv_cb_init();
+    call_in_coroutine(test_drv_cb_do_drain_all);
+    test_drv_cb_fini();
 }
 
 static void test_drv_cb_co_drain(void)
 {
-    call_in_coroutine(test_drv_cb_drain);
+    test_drv_cb_init();
+    call_in_coroutine(test_drv_cb_do_drain);
+    test_drv_cb_fini();
 }
 
 static void test_quiesce_common(enum drain_type drain_type, bool recursive)
-- 
2.38.1
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Emanuele Giuseppe Esposito 1 year, 4 months ago

Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
> Changes to the BlockDriverState graph will have to take the
> corresponding lock for writing, and therefore cannot be done
> inside a coroutine.  Move them outside the test body.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
>  1 file changed, 46 insertions(+), 17 deletions(-)
> 
> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
> index 6ae44116fe79..d85083dd4f9e 100644
> --- a/tests/unit/test-bdrv-drain.c
> +++ b/tests/unit/test-bdrv-drain.c
> @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
>      }
>  }
>  
> +static BlockBackend *blk;
> +static BlockDriverState *bs, *backing;
> +
> +static void test_drv_cb_init(void)
> +{
> +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
> +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
> +                              &error_abort);
> +    blk_insert_bs(blk, bs, &error_abort);
> +
> +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
> +    bdrv_set_backing_hd(bs, backing, &error_abort);
> +}
> +
> +static void test_drv_cb_fini(void)

fini stands for "finito"? :)

Anyways, an alternative solution for this is also here (probably coming
from you too):
https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html

Thank you,
Emanuele

> +{
> +    bdrv_unref(backing);
> +    bdrv_unref(bs);
> +    blk_unref(blk);
> +    backing = NULL;
> +    bs = NULL;
> +    blk = NULL;
> +}
> +
>  static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
>  {
> -    BlockBackend *blk;
> -    BlockDriverState *bs, *backing;
>      BDRVTestState *s, *backing_s;
>      BlockAIOCB *acb;
>      int aio_ret;
>  
>      QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
>  
> -    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
> -    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
> -                              &error_abort);
>      s = bs->opaque;
> -    blk_insert_bs(blk, bs, &error_abort);
> -
> -    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
>      backing_s = backing->opaque;
> -    bdrv_set_backing_hd(bs, backing, &error_abort);
>  
>      /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
>      g_assert_cmpint(s->drain_count, ==, 0);
> @@ -252,30 +267,44 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
>  
>      g_assert_cmpint(s->drain_count, ==, 0);
>      g_assert_cmpint(backing_s->drain_count, ==, 0);
> -
> -    bdrv_unref(backing);
> -    bdrv_unref(bs);
> -    blk_unref(blk);
>  }
>  
> -static void test_drv_cb_drain_all(void)
> +static void test_drv_cb_do_drain_all(void)
>  {
>      test_drv_cb_common(BDRV_DRAIN_ALL, true);
>  }
>  
> -static void test_drv_cb_drain(void)
> +static void test_drv_cb_do_drain(void)
>  {
>      test_drv_cb_common(BDRV_DRAIN, false);
>  }
>  
> +static void test_drv_cb_drain_all(void)
> +{
> +    test_drv_cb_init();
> +    test_drv_cb_do_drain_all();
> +    test_drv_cb_fini();
> +}
> +
> +static void test_drv_cb_drain(void)
> +{
> +    test_drv_cb_init();
> +    test_drv_cb_do_drain();
> +    test_drv_cb_fini();
> +}
> +
>  static void test_drv_cb_co_drain_all(void)
>  {
> -    call_in_coroutine(test_drv_cb_drain_all);
> +    test_drv_cb_init();
> +    call_in_coroutine(test_drv_cb_do_drain_all);
> +    test_drv_cb_fini();
>  }
>  
>  static void test_drv_cb_co_drain(void)
>  {
> -    call_in_coroutine(test_drv_cb_drain);
> +    test_drv_cb_init();
> +    call_in_coroutine(test_drv_cb_do_drain);
> +    test_drv_cb_fini();
>  }
>  
>  static void test_quiesce_common(enum drain_type drain_type, bool recursive)
>
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Paolo Bonzini 1 year, 4 months ago
On 12/2/22 14:42, Emanuele Giuseppe Esposito wrote:
> 
> 
> Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
>> Changes to the BlockDriverState graph will have to take the
>> corresponding lock for writing, and therefore cannot be done
>> inside a coroutine.  Move them outside the test body.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
>>   1 file changed, 46 insertions(+), 17 deletions(-)
>>
>> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
>> index 6ae44116fe79..d85083dd4f9e 100644
>> --- a/tests/unit/test-bdrv-drain.c
>> +++ b/tests/unit/test-bdrv-drain.c
>> @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
>>       }
>>   }
>>   
>> +static BlockBackend *blk;
>> +static BlockDriverState *bs, *backing;
>> +
>> +static void test_drv_cb_init(void)
>> +{
>> +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
>> +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
>> +                              &error_abort);
>> +    blk_insert_bs(blk, bs, &error_abort);
>> +
>> +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
>> +    bdrv_set_backing_hd(bs, backing, &error_abort);
>> +}
>> +
>> +static void test_drv_cb_fini(void)
> 
> fini stands for "finito"? :)

No, for finish :) 
http://ftp.math.utah.edu/u/ma/hohn/linux/misc/elf/node3.html

> Anyways, an alternative solution for this is also here (probably coming
> from you too):
> https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html

Much better.  At least patches 7-8 from that series have to be salvaged, 
possibly 10 as well.

Paolo
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Kevin Wolf 1 year, 4 months ago
Am 02.12.2022 um 18:22 hat Paolo Bonzini geschrieben:
> On 12/2/22 14:42, Emanuele Giuseppe Esposito wrote:
> > 
> > 
> > Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
> > > Changes to the BlockDriverState graph will have to take the
> > > corresponding lock for writing, and therefore cannot be done
> > > inside a coroutine.  Move them outside the test body.
> > > 
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > ---
> > >   tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
> > >   1 file changed, 46 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
> > > index 6ae44116fe79..d85083dd4f9e 100644
> > > --- a/tests/unit/test-bdrv-drain.c
> > > +++ b/tests/unit/test-bdrv-drain.c
> > > @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
> > >       }
> > >   }
> > > +static BlockBackend *blk;
> > > +static BlockDriverState *bs, *backing;
> > > +
> > > +static void test_drv_cb_init(void)
> > > +{
> > > +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
> > > +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
> > > +                              &error_abort);
> > > +    blk_insert_bs(blk, bs, &error_abort);
> > > +
> > > +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
> > > +    bdrv_set_backing_hd(bs, backing, &error_abort);
> > > +}
> > > +
> > > +static void test_drv_cb_fini(void)
> > 
> > fini stands for "finito"? :)
> 
> No, for finish :)
> http://ftp.math.utah.edu/u/ma/hohn/linux/misc/elf/node3.html
> 
> > Anyways, an alternative solution for this is also here (probably coming
> > from you too):
> > https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html
> 
> Much better.  At least patches 7-8 from that series have to be salvaged,
> possibly 10 as well.

I wonder if we need a more general solution for this because this test
is not the only place that calls this kind of functions in a coroutine.
The one I'm aware of in particular is all the .bdrv_co_create
implementations, but I'm almost sure there are more.

Can we use a yield_to_drain()-like mechanism for these functions? Maybe
even something like the opposite of co_wrapper, a no_co_wrapper that
generates a foo_co() variant that drops out of coroutine context before
calling foo()?

Kevin
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Emanuele Giuseppe Esposito 1 year, 4 months ago

Am 05/12/2022 um 14:01 schrieb Kevin Wolf:
> Am 02.12.2022 um 18:22 hat Paolo Bonzini geschrieben:
>> On 12/2/22 14:42, Emanuele Giuseppe Esposito wrote:
>>>
>>>
>>> Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
>>>> Changes to the BlockDriverState graph will have to take the
>>>> corresponding lock for writing, and therefore cannot be done
>>>> inside a coroutine.  Move them outside the test body.
>>>>
>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> ---
>>>>   tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
>>>>   1 file changed, 46 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
>>>> index 6ae44116fe79..d85083dd4f9e 100644
>>>> --- a/tests/unit/test-bdrv-drain.c
>>>> +++ b/tests/unit/test-bdrv-drain.c
>>>> @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
>>>>       }
>>>>   }
>>>> +static BlockBackend *blk;
>>>> +static BlockDriverState *bs, *backing;
>>>> +
>>>> +static void test_drv_cb_init(void)
>>>> +{
>>>> +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
>>>> +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
>>>> +                              &error_abort);
>>>> +    blk_insert_bs(blk, bs, &error_abort);
>>>> +
>>>> +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
>>>> +    bdrv_set_backing_hd(bs, backing, &error_abort);
>>>> +}
>>>> +
>>>> +static void test_drv_cb_fini(void)
>>>
>>> fini stands for "finito"? :)
>>
>> No, for finish :)
>> http://ftp.math.utah.edu/u/ma/hohn/linux/misc/elf/node3.html
>>
>>> Anyways, an alternative solution for this is also here (probably coming
>>> from you too):
>>> https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html
>>
>> Much better.  At least patches 7-8 from that series have to be salvaged,
>> possibly 10 as well.
> 
> I wonder if we need a more general solution for this because this test
> is not the only place that calls this kind of functions in a coroutine.
> The one I'm aware of in particular is all the .bdrv_co_create
> implementations, but I'm almost sure there are more.
> 
> Can we use a yield_to_drain()-like mechanism for these functions? Maybe
> even something like the opposite of co_wrapper, a no_co_wrapper that
> generates a foo_co() variant that drops out of coroutine context before
> calling foo()?
> 

I implemented something like yield_to_drain as you suggested, but when
thinking about it aren't we making a fix that will cost us even more
work in the future? If we use a yield_to_drain-like function, we are
doing something similar to g_c_w, and losing track of whether the caller
is a coroutine or not. And the function could then be used potentially
everywhere. Then we will realize "oh we need to get rid of this and
split the functions differentiating the coroutine context" and
eventually go through ALL the callers again to figure what is doing
what, and implement the same fix of this patch or my series once again.

Instead, even though this is just a test, we have a clear separation and
one less case to worry about in the future.

Thank you,
Emanuele
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Emanuele Giuseppe Esposito 1 year, 4 months ago

Am 09/12/2022 um 13:18 schrieb Emanuele Giuseppe Esposito:
> 
> 
> Am 05/12/2022 um 14:01 schrieb Kevin Wolf:
>> Am 02.12.2022 um 18:22 hat Paolo Bonzini geschrieben:
>>> On 12/2/22 14:42, Emanuele Giuseppe Esposito wrote:
>>>>
>>>>
>>>> Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
>>>>> Changes to the BlockDriverState graph will have to take the
>>>>> corresponding lock for writing, and therefore cannot be done
>>>>> inside a coroutine.  Move them outside the test body.
>>>>>
>>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>>>> ---
>>>>>   tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
>>>>>   1 file changed, 46 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
>>>>> index 6ae44116fe79..d85083dd4f9e 100644
>>>>> --- a/tests/unit/test-bdrv-drain.c
>>>>> +++ b/tests/unit/test-bdrv-drain.c
>>>>> @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *
>>>>>       }
>>>>>   }
>>>>> +static BlockBackend *blk;
>>>>> +static BlockDriverState *bs, *backing;
>>>>> +
>>>>> +static void test_drv_cb_init(void)
>>>>> +{
>>>>> +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
>>>>> +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
>>>>> +                              &error_abort);
>>>>> +    blk_insert_bs(blk, bs, &error_abort);
>>>>> +
>>>>> +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
>>>>> +    bdrv_set_backing_hd(bs, backing, &error_abort);
>>>>> +}
>>>>> +
>>>>> +static void test_drv_cb_fini(void)
>>>>
>>>> fini stands for "finito"? :)
>>>
>>> No, for finish :)
>>> http://ftp.math.utah.edu/u/ma/hohn/linux/misc/elf/node3.html
>>>
>>>> Anyways, an alternative solution for this is also here (probably coming
>>>> from you too):
>>>> https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html
>>>
>>> Much better.  At least patches 7-8 from that series have to be salvaged,
>>> possibly 10 as well.
>>
>> I wonder if we need a more general solution for this because this test
>> is not the only place that calls this kind of functions in a coroutine.
>> The one I'm aware of in particular is all the .bdrv_co_create
>> implementations, but I'm almost sure there are more.
>>
>> Can we use a yield_to_drain()-like mechanism for these functions? Maybe
>> even something like the opposite of co_wrapper, a no_co_wrapper that
>> generates a foo_co() variant that drops out of coroutine context before
>> calling foo()?
>>
> 
> I implemented something like yield_to_drain as you suggested, but when
> thinking about it aren't we making a fix that will cost us even more
> work in the future? If we use a yield_to_drain-like function, we are
> doing something similar to g_c_w, and losing track of whether the caller
> is a coroutine or not. And the function could then be used potentially
> everywhere. Then we will realize "oh we need to get rid of this and
> split the functions differentiating the coroutine context" and
> eventually go through ALL the callers again to figure what is doing
> what, and implement the same fix of this patch or my series once again.
> 
> Instead, even though this is just a test, we have a clear separation and
> one less case to worry about in the future.
> 
At least the above is valid if the change you are proposing is the
following (tested already, works)


diff --git a/block.c b/block.c
index 6191ac1f44..8d28c1daa4 100644
--- a/block.c
+++ b/block.c
@@ -42,6 +42,7 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/replay.h"
 #include "qemu/notify.h"
 #include "qemu/option.h"
 #include "qemu/coroutine.h"
@@ -2831,6 +2832,94 @@ uint64_t
bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
     return permissions[qapi_perm];
 }

+typedef struct {
+    Coroutine *co;
+    BlockDriverState *new_bs;
+    BdrvChild *child;
+    AioContext *ctx;
+    bool done;
+} BdrvCoGraphModData;
+
+static void bdrv_co_graph_mod_bh_cb(void *opaque)
+{
+    BdrvCoGraphModData *data = opaque;
+    Coroutine *co = data->co;
+    BlockDriverState *old_bs = data->child->bs;
+    BlockDriverState *new_bs = data->new_bs;
+
+    aio_context_acquire(data->ctx);
+
+     if (old_bs) {
+        bdrv_dec_in_flight(old_bs);
+    }
+
+    if (new_bs) {
+        bdrv_dec_in_flight(new_bs);
+    }
+    bdrv_replace_child_noperm(data->child, data->new_bs);
+    aio_context_release(data->ctx);
+
+    data->done = true;
+    aio_co_wake(co);
+}
+
+/*
+ * If bdrv_replace_child_noperm is called in a coroutine, defer the
work to the
+ * Main Loop by scheduling a BH.
+ */
+static void bdrv_co_yield_to_graph_mod(BdrvChild *child,
BlockDriverState *new_bs)
+{
+    BdrvCoGraphModData data;
+    Coroutine *self = qemu_coroutine_self();
+    AioContext *ctx = bdrv_get_aio_context(new_bs);
+    BlockDriverState *old_bs = child->bs;
+
+    GLOBAL_STATE_CODE();
+    assert(qemu_in_coroutine());
+
+    if (old_bs) {
+        bdrv_inc_in_flight(old_bs);
+        ctx = bdrv_get_aio_context(old_bs);
+    }
+
+    if (new_bs) {
+        bdrv_inc_in_flight(new_bs);
+    }
+
+    data = (BdrvCoGraphModData) {
+        .co = self,
+        .new_bs = new_bs,
+        .child = child,
+        .ctx = ctx,
+        .done = false,
+    };
+
+    /*
+     * Temporarily drop the lock across yield or we would get deadlocks.
+     * bdrv_co_drain_bh_cb() reaquires the lock as needed.
+     *
+     * When we yield below, the lock for the current context will be
+     * released, so if this is actually the lock that protects bs,
don't drop
+     * it a second time.
+     */
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_release(ctx);
+    }
+    replay_bh_schedule_oneshot_event(ctx, bdrv_co_graph_mod_bh_cb, &data);
+
+    qemu_coroutine_yield();
+    /*
+     * If we are resumed from some other event (such as an aio
completion or a
+     * timer callback), it is a bug in the caller that should be fixed.
+     */
+    assert(data.done);
+
+    /* Reaquire the AioContext of bs if we dropped it */
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_acquire(ctx);
+    }
+}
+
 /*
  * Replaces the node that a BdrvChild points to without updating
permissions.
  *
@@ -2875,6 +2964,12 @@ static void bdrv_replace_child_noperm(BdrvChild
*child,
         assert(bdrv_get_aio_context(old_bs) ==
bdrv_get_aio_context(new_bs));
     }

+    /* No graph modification is allowed in a coroutine! */
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_graph_mod(child, new_bs);
+        return;
+    }
+
     if (old_bs) {
         if (child->klass->detach) {
             child->klass->detach(child);
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Kevin Wolf 1 year, 4 months ago
Am 09.12.2022 um 13:20 hat Emanuele Giuseppe Esposito geschrieben:
> Am 09/12/2022 um 13:18 schrieb Emanuele Giuseppe Esposito:
> > Am 05/12/2022 um 14:01 schrieb Kevin Wolf:
> >> I wonder if we need a more general solution for this because this test
> >> is not the only place that calls this kind of functions in a coroutine.
> >> The one I'm aware of in particular is all the .bdrv_co_create
> >> implementations, but I'm almost sure there are more.
> >>
> >> Can we use a yield_to_drain()-like mechanism for these functions? Maybe
> >> even something like the opposite of co_wrapper, a no_co_wrapper that
> >> generates a foo_co() variant that drops out of coroutine context before
> >> calling foo()?
> > 
> > I implemented something like yield_to_drain as you suggested, but when
> > thinking about it aren't we making a fix that will cost us even more
> > work in the future? If we use a yield_to_drain-like function, we are
> > doing something similar to g_c_w, and losing track of whether the caller
> > is a coroutine or not.

That's not what I had in mind. I really meant a no_co_wrapper, not a
no_co_wrapper_mixed.

> > And the function could then be used potentially everywhere. Then we
> > will realize "oh we need to get rid of this and split the functions
> > differentiating the coroutine context" and eventually go through ALL
> > the callers again to figure what is doing what, and implement the
> > same fix of this patch or my series once again.
> > 
> > Instead, even though this is just a test, we have a clear separation
> > and one less case to worry about in the future.

I'm not suggesting not fixing the root cause (which is calling functions
in coroutines that aren't supposed to be called in coroutines), but just
wondering if generated functions to drop out of coroutine would be a
nice tool to keep the fixes simple.

We already have bdrv_co_drained_begin/end that use an open-coded version
of this. We would want a bdrv_co_open() that can be used by the
.bdrv_co_create implementations and a bdrv_co_new_open_driver() and
blk_co_insert_bs() that could be used by this test case. All of these
are functions that are normally forbidden to be called from a coroutine,
but when you first drop out of the coroutine, they are fine.

The alternative solution is not just merging this test case patch, but
we still need to fix all of the .bdrv_co_create implementations, even if
we decide to write the code manually instead of generating it.

> At least the above is valid if the change you are proposing is the
> following (tested already, works)

bdrv_replace_child_noperm() is the wrong place to do things like this,
it's way too deep down the call chain. Callers really want things to be
atomic there, and involving a BH would make that impossible.

Kevin
Re: [RFC PATCH] test-bdrv-drain: keep graph manipulations out of coroutines
Posted by Emanuele Giuseppe Esposito 1 year, 4 months ago

Am 02/12/2022 um 18:22 schrieb Paolo Bonzini:
> On 12/2/22 14:42, Emanuele Giuseppe Esposito wrote:
>>
>>
>> Am 02/12/2022 um 14:27 schrieb Paolo Bonzini:
>>> Changes to the BlockDriverState graph will have to take the
>>> corresponding lock for writing, and therefore cannot be done
>>> inside a coroutine.  Move them outside the test body.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>   tests/unit/test-bdrv-drain.c | 63 ++++++++++++++++++++++++++----------
>>>   1 file changed, 46 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
>>> index 6ae44116fe79..d85083dd4f9e 100644
>>> --- a/tests/unit/test-bdrv-drain.c
>>> +++ b/tests/unit/test-bdrv-drain.c
>>> @@ -199,25 +199,40 @@ static void do_drain_end_unlocked(enum
>>> drain_type drain_type, BlockDriverState *
>>>       }
>>>   }
>>>   +static BlockBackend *blk;
>>> +static BlockDriverState *bs, *backing;
>>> +
>>> +static void test_drv_cb_init(void)
>>> +{
>>> +    blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
>>> +    bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
>>> +                              &error_abort);
>>> +    blk_insert_bs(blk, bs, &error_abort);
>>> +
>>> +    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0,
>>> &error_abort);
>>> +    bdrv_set_backing_hd(bs, backing, &error_abort);
>>> +}
>>> +
>>> +static void test_drv_cb_fini(void)
>>
>> fini stands for "finito"? :)
> 
> No, for finish :)
> http://ftp.math.utah.edu/u/ma/hohn/linux/misc/elf/node3.html

Cool :)
> 
>> Anyways, an alternative solution for this is also here (probably coming
>> from you too):
>> https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03517.html
> 
> Much better.  At least patches 7-8 from that series have to be salvaged,
> possibly 10 as well.

Serie sent:
https://patchew.org/QEMU/20221205121029.1089209-1-eesposit@redhat.com/

Yes theoretically also patch 9, but I think there's no need to respin
them. If someone is interested they are there.

Thank you,
Emanuele
> 
> Paolo
>