block/blkdebug.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-)
blkdebug events can be called from either non-coroutine or coroutine
contexts. However, some actions (specifically suspend actions and
errors reported with immediately=off) only make sense from within
a coroutine.
Currently, using those action would lead to an abort() in
qemu_coroutine_yield() ("Co-routine is yielding to no one").
Catch them and print an error instead.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
block/blkdebug.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bbf2948703..bf0aedb17d 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -31,6 +31,7 @@
#include "block/qdict.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/error-report.h"
#include "qapi/qapi-visit-block-core.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qlist.h"
@@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_mutex_unlock(&s->lock);
if (!immediately) {
- aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
- qemu_coroutine_yield();
+ if (qemu_in_coroutine()) {
+ aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
+ qemu_coroutine_yield();
+ } else {
+ error_report("Non-coroutine event %s needs immediately = off\n",
+ BlkdebugEvent_lookup.array[rule->event]);
+ }
}
return -error;
@@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
}
while (actions_count[ACTION_SUSPEND] > 0) {
- qemu_coroutine_yield();
+ if (qemu_in_coroutine()) {
+ qemu_coroutine_yield();
+ } else {
+ error_report("Non-coroutine event %s cannot suspend\n",
+ BlkdebugEvent_lookup.array[event]);
+ }
actions_count[ACTION_SUSPEND]--;
}
}
--
2.37.3
Paolo Bonzini <pbonzini@redhat.com> writes:
> blkdebug events can be called from either non-coroutine or coroutine
> contexts. However, some actions (specifically suspend actions and
> errors reported with immediately=off) only make sense from within
> a coroutine.
>
> Currently, using those action would lead to an abort() in
> qemu_coroutine_yield() ("Co-routine is yielding to no one").
> Catch them and print an error instead.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> block/blkdebug.c | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/block/blkdebug.c b/block/blkdebug.c
> index bbf2948703..bf0aedb17d 100644
> --- a/block/blkdebug.c
> +++ b/block/blkdebug.c
> @@ -31,6 +31,7 @@
> #include "block/qdict.h"
> #include "qemu/module.h"
> #include "qemu/option.h"
> +#include "qemu/error-report.h"
> #include "qapi/qapi-visit-block-core.h"
> #include "qapi/qmp/qdict.h"
> #include "qapi/qmp/qlist.h"
> @@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
>
> qemu_mutex_unlock(&s->lock);
> if (!immediately) {
> - aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
> - qemu_coroutine_yield();
> + if (qemu_in_coroutine()) {
> + aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
> + qemu_coroutine_yield();
> + } else {
> + error_report("Non-coroutine event %s needs immediately = off\n",
> + BlkdebugEvent_lookup.array[rule->event]);
rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(),
blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(),
blkdebug_co_block_status() (all marked coroutine_fn), and
blkdebug_co_flush() (which looks like it should be marked coroutine_fn).
Ignorant question: how could it be called outside coroutine context?
Also, code smell: reporting an error without taking an error path. But
let's worry about that only after I understand the problem you're trying
to fix.
> + }
> }
>
> return -error;
> @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
> }
>
> while (actions_count[ACTION_SUSPEND] > 0) {
> - qemu_coroutine_yield();
> + if (qemu_in_coroutine()) {
> + qemu_coroutine_yield();
> + } else {
> + error_report("Non-coroutine event %s cannot suspend\n",
> + BlkdebugEvent_lookup.array[event]);
> + }
> actions_count[ACTION_SUSPEND]--;
> }
> }
On 10/13/22 12:56, Markus Armbruster wrote: > rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(), > blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(), > blkdebug_co_block_status() (all marked coroutine_fn), and > blkdebug_co_flush() (which looks like it should be marked coroutine_fn). Yes (separate patch sent, https://lore.kernel.org/qemu-devel/20221013123711.620631-11-pbonzini@redhat.com/T/#u). > Ignorant question: how could it be called outside coroutine context? You're right, only blkdebug_debug_event() can be called outside coroutine context. I confused process_rule() (called by blkdebug_debug_event(), both inside and outside coroutine context) with rule_check() (called in coroutine context). > Also, code smell: reporting an error without taking an error path. But > let's worry about that only after I understand the problem you're trying > to fix. Unfortunately there's no way to know in advance if an event will be called inside vs. outside a coroutine. I can keep the abort() if you think it's preferrable, so what you get is still a crash but with a nicer error message. Since this is debugging code either solution has pros and cons. Paolo
Paolo Bonzini <pbonzini@redhat.com> writes:
> On 10/13/22 12:56, Markus Armbruster wrote:
>> rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(),
>> blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(),
>> blkdebug_co_block_status() (all marked coroutine_fn), and
>> blkdebug_co_flush() (which looks like it should be marked coroutine_fn).
>
> Yes (separate patch sent, https://lore.kernel.org/qemu-devel/20221013123711.620631-11-pbonzini@redhat.com/T/#u).
>
>> Ignorant question: how could it be called outside coroutine context?
>
> You're right, only blkdebug_debug_event() can be called outside coroutine context. I confused process_rule() (called by
> blkdebug_debug_event(), both inside and outside coroutine context) with rule_check() (called in coroutine context).
Let's drop the rule_check() hunk then.
>> Also, code smell: reporting an error without taking an error path. But
>> let's worry about that only after I understand the problem you're trying
>> to fix.
>
> Unfortunately there's no way to know in advance if an event will be called inside vs. outside a coroutine. I can keep the abort() if you
> think it's preferrable, so what you get is still a crash but with a nicer error message. Since this is debugging code either solution has
> pros and cons.
Let's have another look at the remaining patch hunk:
@@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
}
while (actions_count[ACTION_SUSPEND] > 0) {
- qemu_coroutine_yield();
+ if (qemu_in_coroutine()) {
+ qemu_coroutine_yield();
+ } else {
+ error_report("Non-coroutine event %s cannot suspend\n",
+ BlkdebugEvent_lookup.array[event]);
+ }
actions_count[ACTION_SUSPEND]--;
}
}
If I understand this correctly, the user asked us to suspend, but it now
turns out suspend doesn't make sense, so we ignore the request.
Correct?
warn_report()? info_report()?
On 10/13/22 15:28, Markus Armbruster wrote:
> Let's have another look at the remaining patch hunk:
>
> @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
> }
>
> while (actions_count[ACTION_SUSPEND] > 0) {
> - qemu_coroutine_yield();
> + if (qemu_in_coroutine()) {
> + qemu_coroutine_yield();
> + } else {
> + error_report("Non-coroutine event %s cannot suspend\n",
> + BlkdebugEvent_lookup.array[event]);
> + }
> actions_count[ACTION_SUSPEND]--;
> }
> }
>
> If I understand this correctly, the user asked us to suspend, but it now
> turns out suspend doesn't make sense, so we ignore the request.
> Correct?
Yes.
> warn_report()? info_report()?
Sure, warn_report() can work too.
Paolo
© 2016 - 2026 Red Hat, Inc.