block/blkdebug.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-)
blkdebug events can be called from either non-coroutine or coroutine
contexts. However, some actions (specifically suspend actions and
errors reported with immediately=off) only make sense from within
a coroutine.
Currently, using those action would lead to an abort() in
qemu_coroutine_yield() ("Co-routine is yielding to no one").
Catch them and print an error instead.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
block/blkdebug.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bbf2948703..bf0aedb17d 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -31,6 +31,7 @@
#include "block/qdict.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/error-report.h"
#include "qapi/qapi-visit-block-core.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qlist.h"
@@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_mutex_unlock(&s->lock);
if (!immediately) {
- aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
- qemu_coroutine_yield();
+ if (qemu_in_coroutine()) {
+ aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
+ qemu_coroutine_yield();
+ } else {
+ error_report("Non-coroutine event %s needs immediately = off\n",
+ BlkdebugEvent_lookup.array[rule->event]);
+ }
}
return -error;
@@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
}
while (actions_count[ACTION_SUSPEND] > 0) {
- qemu_coroutine_yield();
+ if (qemu_in_coroutine()) {
+ qemu_coroutine_yield();
+ } else {
+ error_report("Non-coroutine event %s cannot suspend\n",
+ BlkdebugEvent_lookup.array[event]);
+ }
actions_count[ACTION_SUSPEND]--;
}
}
--
2.37.3
Paolo Bonzini <pbonzini@redhat.com> writes: > blkdebug events can be called from either non-coroutine or coroutine > contexts. However, some actions (specifically suspend actions and > errors reported with immediately=off) only make sense from within > a coroutine. > > Currently, using those action would lead to an abort() in > qemu_coroutine_yield() ("Co-routine is yielding to no one"). > Catch them and print an error instead. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > block/blkdebug.c | 17 ++++++++++++++--- > 1 file changed, 14 insertions(+), 3 deletions(-) > > diff --git a/block/blkdebug.c b/block/blkdebug.c > index bbf2948703..bf0aedb17d 100644 > --- a/block/blkdebug.c > +++ b/block/blkdebug.c > @@ -31,6 +31,7 @@ > #include "block/qdict.h" > #include "qemu/module.h" > #include "qemu/option.h" > +#include "qemu/error-report.h" > #include "qapi/qapi-visit-block-core.h" > #include "qapi/qmp/qdict.h" > #include "qapi/qmp/qlist.h" > @@ -623,8 +624,13 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, > > qemu_mutex_unlock(&s->lock); > if (!immediately) { > - aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self()); > - qemu_coroutine_yield(); > + if (qemu_in_coroutine()) { > + aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self()); > + qemu_coroutine_yield(); > + } else { > + error_report("Non-coroutine event %s needs immediately = off\n", > + BlkdebugEvent_lookup.array[rule->event]); rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(), blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(), blkdebug_co_block_status() (all marked coroutine_fn), and blkdebug_co_flush() (which looks like it should be marked coroutine_fn). Ignorant question: how could it be called outside coroutine context? Also, code smell: reporting an error without taking an error path. But let's worry about that only after I understand the problem you're trying to fix. > + } > } > > return -error; > @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) > } > > while (actions_count[ACTION_SUSPEND] > 0) { > - qemu_coroutine_yield(); > + if (qemu_in_coroutine()) { > + qemu_coroutine_yield(); > + } else { > + error_report("Non-coroutine event %s cannot suspend\n", > + BlkdebugEvent_lookup.array[event]); > + } > actions_count[ACTION_SUSPEND]--; > } > }
On 10/13/22 12:56, Markus Armbruster wrote: > rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(), > blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(), > blkdebug_co_block_status() (all marked coroutine_fn), and > blkdebug_co_flush() (which looks like it should be marked coroutine_fn). Yes (separate patch sent, https://lore.kernel.org/qemu-devel/20221013123711.620631-11-pbonzini@redhat.com/T/#u). > Ignorant question: how could it be called outside coroutine context? You're right, only blkdebug_debug_event() can be called outside coroutine context. I confused process_rule() (called by blkdebug_debug_event(), both inside and outside coroutine context) with rule_check() (called in coroutine context). > Also, code smell: reporting an error without taking an error path. But > let's worry about that only after I understand the problem you're trying > to fix. Unfortunately there's no way to know in advance if an event will be called inside vs. outside a coroutine. I can keep the abort() if you think it's preferrable, so what you get is still a crash but with a nicer error message. Since this is debugging code either solution has pros and cons. Paolo
Paolo Bonzini <pbonzini@redhat.com> writes: > On 10/13/22 12:56, Markus Armbruster wrote: >> rule_check() is called from blkdebug_co_preadv(), blkdebug_co_pwritev(), >> blkdebug_co_pwrite_zeroes(), blkdebug_co_pdiscard(), >> blkdebug_co_block_status() (all marked coroutine_fn), and >> blkdebug_co_flush() (which looks like it should be marked coroutine_fn). > > Yes (separate patch sent, https://lore.kernel.org/qemu-devel/20221013123711.620631-11-pbonzini@redhat.com/T/#u). > >> Ignorant question: how could it be called outside coroutine context? > > You're right, only blkdebug_debug_event() can be called outside coroutine context. I confused process_rule() (called by > blkdebug_debug_event(), both inside and outside coroutine context) with rule_check() (called in coroutine context). Let's drop the rule_check() hunk then. >> Also, code smell: reporting an error without taking an error path. But >> let's worry about that only after I understand the problem you're trying >> to fix. > > Unfortunately there's no way to know in advance if an event will be called inside vs. outside a coroutine. I can keep the abort() if you > think it's preferrable, so what you get is still a crash but with a nicer error message. Since this is debugging code either solution has > pros and cons. Let's have another look at the remaining patch hunk: @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) } while (actions_count[ACTION_SUSPEND] > 0) { - qemu_coroutine_yield(); + if (qemu_in_coroutine()) { + qemu_coroutine_yield(); + } else { + error_report("Non-coroutine event %s cannot suspend\n", + BlkdebugEvent_lookup.array[event]); + } actions_count[ACTION_SUSPEND]--; } } If I understand this correctly, the user asked us to suspend, but it now turns out suspend doesn't make sense, so we ignore the request. Correct? warn_report()? info_report()?
On 10/13/22 15:28, Markus Armbruster wrote: > Let's have another look at the remaining patch hunk: > > @@ -858,7 +864,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) > } > > while (actions_count[ACTION_SUSPEND] > 0) { > - qemu_coroutine_yield(); > + if (qemu_in_coroutine()) { > + qemu_coroutine_yield(); > + } else { > + error_report("Non-coroutine event %s cannot suspend\n", > + BlkdebugEvent_lookup.array[event]); > + } > actions_count[ACTION_SUSPEND]--; > } > } > > If I understand this correctly, the user asked us to suspend, but it now > turns out suspend doesn't make sense, so we ignore the request. > Correct? Yes. > warn_report()? info_report()? Sure, warn_report() can work too. Paolo
© 2016 - 2024 Red Hat, Inc.