block/qed.c | 6 ++++++ 1 file changed, 6 insertions(+)
It's not possible to access the image file while there is an incoming
migration in progress, the QEMU process doesn't hold any locks to the
storage at this point so nodes are inactive. Attempting to drain leads
to an assert at bdrv_co_write_req_prepare():
assert(!(bs->open_flags & BDRV_O_INACTIVE))
The issue is reproducible by running iotest 181 on a host under cpu
load. The migration must coincide with the header already containing
the QED_F_NEED_CHECK flag.
The sequence of events is as follows, with the respective call stacks
referenced below:
During block device init, bdrv_qed_attach_aio_context() starts the
'need_check' timer. The timer will not fire during incoming migration
as it uses QEMU_CLOCK_VIRTUAL (to avoid this very issue, as the code
comment indicates). (0)
However, there's still bdrv_qed_drain_begin() which uses the fact that
the timer is live to decide whether to start the
qed_need_check_timer_entry() directly. (1)
The qed_need_check_timer_entry() eventually calls into
qed_write_header() -> bdrv_co_pwrite() leading to the assert. (2)
Since we don't have an API for checking if a timer is enabled, an
alternative is to skip this logic whenever the runstate is
INMIGRATE. This actually matches the setting of the BDRV_O_INACTIVE
flag at blockdev.c.
The stacks:
(0) == issues timer_mod ==
#6 in qed_start_need_check_timer at ../block/qed.c:340
#7 in bdrv_qed_attach_aio_context at ../block/qed.c:373
#8 in bdrv_qed_do_open at ../block/qed.c:556
#9 in bdrv_qed_open_entry at ../block/qed.c:582
#10 in coroutine_trampoline at ../util/coroutine-ucontext.c:175
#0 in qemu_coroutine_switch<+120> at ../util/coroutine-ucontext.c:321
#1 in qemu_aio_coroutine_enter<+356> at ../util/qemu-coroutine.c:293
#2 in aio_co_enter<+179> at ../util/async.c:710
#3 in aio_co_wake<+53> at ../util/async.c:695
#4 in thread_pool_co_cb<+47> at ../util/thread-pool.c:283
#5 in thread_pool_completion_bh<+241> at ../util/thread-pool.c:202
#6 in aio_bh_call<+109> at ../util/async.c:173
#7 in aio_bh_poll<+299> at ../util/async.c:220
#8 in aio_poll<+690> at ../util/aio-posix.c:745
#9 in bdrv_qed_open<+392> at ../block/qed.c:607
#10 in bdrv_open_driver<+327> at ../block.c:1678
#11 in bdrv_open_common<+1619> at ../block.c:2008
#12 in bdrv_open_inherit<+2556> at ../block.c:4191
#13 in bdrv_open<+118> at ../block.c:4286
#14 in blk_new_open<+199> at ../block/block-backend.c:458
#15 in blockdev_init<+2011> at ../blockdev.c:612
#16 in drive_new<+3008> at ../blockdev.c:1008
#17 in drive_init_func<+51> at ../system/vl.c:662
#18 in qemu_opts_foreach<+227> at ../util/qemu-option.c:1148
#19 in configure_blockdev<+350> at ../system/vl.c:721
#20 in qemu_create_early_backends<+343> at ../system/vl.c:2076
#21 in qemu_init<+12483> at ../system/vl.c:3778
#22 in main<+46> at ../system/main.c:71
(1) == sees timer_pending ==
#6 in bdrv_qed_drain_begin at ../block/qed.c:391
#7 in bdrv_do_drained_begin at ../block/io.c:366
#8 in bdrv_do_drained_begin_quiesce at ../block/io.c:386
#9 in bdrv_child_cb_drained_begin at ../block.c:1207
#10 in bdrv_parent_drained_begin_single at ../block/io.c:133
#11 in bdrv_parent_drained_begin at ../block/io.c:64
#12 in bdrv_do_drained_begin at ../block/io.c:364
#13 in bdrv_drained_begin at ../block/io.c:393
#14 in blk_drain at ../block/block-backend.c:2101
#15 in blk_unref at ../block/block-backend.c:544
#16 in bdrv_open_inherit at ../block.c:4197
#17 in bdrv_open at ../block.c:4286
#18 in blk_new_open at ../block/block-backend.c:458
#19 in blockdev_init at ../blockdev.c:612
#20 in drive_new at ../blockdev.c:1008
#21 in drive_init_func at ../system/vl.c:662
#22 in qemu_opts_foreach at ../util/qemu-option.c:1148
#23 in configure_blockdev at ../system/vl.c:721
#24 in qemu_create_early_backends at ../system/vl.c:2076
#25 in qemu_init at ../system/vl.c:3778
#26 in main at ../system/main.c:71
(2) == crashes ==
#5 in __assert_fail (assertion="!(bs->open_flags & BDRV_O_INACTIVE)", file="../block/io.c", line=1977
#6 in bdrv_co_write_req_prepare at ../block/io.c:1977
#7 in bdrv_aligned_pwritev at ../block/io.c:2099
#8 in bdrv_co_pwritev_part at ../block/io.c:2316
#9 in bdrv_co_pwritev at ../block/io.c:2233
#10 in bdrv_co_pwrite at ../include/block/block_int-io.h:77
#11 in qed_write_header at ../block/qed.c:128
#12 in qed_need_check_timer at ../block/qed.c:305
#13 in qed_need_check_timer_entry at ../block/qed.c:319
Note that this issue is not exactly the same as what's been reported
in Gitlab, but given how easily this reproduces, I imagine it has to
be happening in that setup as well.
Link: https://gitlab.com/qemu-project/qemu/-/work_items/3515
Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
CI run: https://gitlab.com/farosas/qemu/-/pipelines/2557314306
---
block/qed.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/block/qed.c b/block/qed.c
index da23a83d62..ccf32bb4ae 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,6 +21,7 @@
#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/memalign.h"
+#include "system/runstate.h"
#include "trace.h"
#include "qed.h"
#include "system/block-backend.h"
@@ -373,6 +374,11 @@ static void bdrv_qed_drain_begin(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
+ /* Nodes are inactive while waiting for an incoming migration. */
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ return;
+ }
+
/* Fire the timer immediately in order to start doing I/O as soon as the
* header is flushed.
*/
--
2.51.0
Am 28.05.2026 um 15:55 hat Fabiano Rosas geschrieben:
> It's not possible to access the image file while there is an incoming
> migration in progress, the QEMU process doesn't hold any locks to the
> storage at this point so nodes are inactive. Attempting to drain leads
> to an assert at bdrv_co_write_req_prepare():
>
> assert(!(bs->open_flags & BDRV_O_INACTIVE))
>
> The issue is reproducible by running iotest 181 on a host under cpu
> load. The migration must coincide with the header already containing
> the QED_F_NEED_CHECK flag.
>
> The sequence of events is as follows, with the respective call stacks
> referenced below:
>
> During block device init, bdrv_qed_attach_aio_context() starts the
> 'need_check' timer. The timer will not fire during incoming migration
> as it uses QEMU_CLOCK_VIRTUAL (to avoid this very issue, as the code
> comment indicates). (0)
>
> However, there's still bdrv_qed_drain_begin() which uses the fact that
> the timer is live to decide whether to start the
> qed_need_check_timer_entry() directly. (1)
>
> The qed_need_check_timer_entry() eventually calls into
> qed_write_header() -> bdrv_co_pwrite() leading to the assert. (2)
>
> Since we don't have an API for checking if a timer is enabled, an
> alternative is to skip this logic whenever the runstate is
> INMIGRATE. This actually matches the setting of the BDRV_O_INACTIVE
> flag at blockdev.c.
I'm not sure what you mean by "checking if a timer is enabled", because
bdrv_qed_drain_begin() has a timer_pending() check that sounds exactly
what you say doesn't exist.
But it's also not clear to me why s->need_check_timer is true on an
inactive image. I don't think this is supposed to be the case? It looks
like qed is missing a .bdrv_inactivate implementation that cancels the
timer after flushing everything (including the header) to disk.
>
> The stacks:
>
> (0) == issues timer_mod ==
> #6 in qed_start_need_check_timer at ../block/qed.c:340
> #7 in bdrv_qed_attach_aio_context at ../block/qed.c:373
> #8 in bdrv_qed_do_open at ../block/qed.c:556
> #9 in bdrv_qed_open_entry at ../block/qed.c:582
> #10 in coroutine_trampoline at ../util/coroutine-ucontext.c:175
> #0 in qemu_coroutine_switch<+120> at ../util/coroutine-ucontext.c:321
> #1 in qemu_aio_coroutine_enter<+356> at ../util/qemu-coroutine.c:293
> #2 in aio_co_enter<+179> at ../util/async.c:710
> #3 in aio_co_wake<+53> at ../util/async.c:695
> #4 in thread_pool_co_cb<+47> at ../util/thread-pool.c:283
> #5 in thread_pool_completion_bh<+241> at ../util/thread-pool.c:202
> #6 in aio_bh_call<+109> at ../util/async.c:173
> #7 in aio_bh_poll<+299> at ../util/async.c:220
> #8 in aio_poll<+690> at ../util/aio-posix.c:745
> #9 in bdrv_qed_open<+392> at ../block/qed.c:607
> #10 in bdrv_open_driver<+327> at ../block.c:1678
> #11 in bdrv_open_common<+1619> at ../block.c:2008
> #12 in bdrv_open_inherit<+2556> at ../block.c:4191
> #13 in bdrv_open<+118> at ../block.c:4286
> #14 in blk_new_open<+199> at ../block/block-backend.c:458
> #15 in blockdev_init<+2011> at ../blockdev.c:612
> #16 in drive_new<+3008> at ../blockdev.c:1008
> #17 in drive_init_func<+51> at ../system/vl.c:662
> #18 in qemu_opts_foreach<+227> at ../util/qemu-option.c:1148
> #19 in configure_blockdev<+350> at ../system/vl.c:721
> #20 in qemu_create_early_backends<+343> at ../system/vl.c:2076
> #21 in qemu_init<+12483> at ../system/vl.c:3778
> #22 in main<+46> at ../system/main.c:71
>
> (1) == sees timer_pending ==
> #6 in bdrv_qed_drain_begin at ../block/qed.c:391
> #7 in bdrv_do_drained_begin at ../block/io.c:366
> #8 in bdrv_do_drained_begin_quiesce at ../block/io.c:386
> #9 in bdrv_child_cb_drained_begin at ../block.c:1207
> #10 in bdrv_parent_drained_begin_single at ../block/io.c:133
> #11 in bdrv_parent_drained_begin at ../block/io.c:64
> #12 in bdrv_do_drained_begin at ../block/io.c:364
> #13 in bdrv_drained_begin at ../block/io.c:393
> #14 in blk_drain at ../block/block-backend.c:2101
> #15 in blk_unref at ../block/block-backend.c:544
> #16 in bdrv_open_inherit at ../block.c:4197
> #17 in bdrv_open at ../block.c:4286
> #18 in blk_new_open at ../block/block-backend.c:458
> #19 in blockdev_init at ../blockdev.c:612
> #20 in drive_new at ../blockdev.c:1008
> #21 in drive_init_func at ../system/vl.c:662
> #22 in qemu_opts_foreach at ../util/qemu-option.c:1148
> #23 in configure_blockdev at ../system/vl.c:721
> #24 in qemu_create_early_backends at ../system/vl.c:2076
> #25 in qemu_init at ../system/vl.c:3778
> #26 in main at ../system/main.c:71
>
> (2) == crashes ==
> #5 in __assert_fail (assertion="!(bs->open_flags & BDRV_O_INACTIVE)", file="../block/io.c", line=1977
> #6 in bdrv_co_write_req_prepare at ../block/io.c:1977
> #7 in bdrv_aligned_pwritev at ../block/io.c:2099
> #8 in bdrv_co_pwritev_part at ../block/io.c:2316
> #9 in bdrv_co_pwritev at ../block/io.c:2233
> #10 in bdrv_co_pwrite at ../include/block/block_int-io.h:77
> #11 in qed_write_header at ../block/qed.c:128
> #12 in qed_need_check_timer at ../block/qed.c:305
> #13 in qed_need_check_timer_entry at ../block/qed.c:319
>
> Note that this issue is not exactly the same as what's been reported
> in Gitlab, but given how easily this reproduces, I imagine it has to
> be happening in that setup as well.
>
> Link: https://gitlab.com/qemu-project/qemu/-/work_items/3515
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
> CI run: https://gitlab.com/farosas/qemu/-/pipelines/2557314306
> ---
> block/qed.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/block/qed.c b/block/qed.c
> index da23a83d62..ccf32bb4ae 100644
> --- a/block/qed.c
> +++ b/block/qed.c
> @@ -21,6 +21,7 @@
> #include "qemu/module.h"
> #include "qemu/option.h"
> #include "qemu/memalign.h"
> +#include "system/runstate.h"
> #include "trace.h"
> #include "qed.h"
> #include "system/block-backend.h"
> @@ -373,6 +374,11 @@ static void bdrv_qed_drain_begin(BlockDriverState *bs)
> {
> BDRVQEDState *s = bs->opaque;
>
> + /* Nodes are inactive while waiting for an incoming migration. */
> + if (runstate_check(RUN_STATE_INMIGRATE)) {
If you're interested in whether the node is inactive, this is what you
should check. RUN_STATE_INMIGRATE is only one of the conditions in which
it might (but doesn't have to) be true. There is bdrv_is_inactive() to
check this.
Of course, if we're fixing the root cause (that QED_F_NEED_CHECK and
s->need_check_timer are set on an inactive image), this hunk might not
be needed at all.
> + return;
> + }
> +
> /* Fire the timer immediately in order to start doing I/O as soon as the
> * header is flushed.
> */
Kevin
Kevin Wolf <kwolf@redhat.com> writes:
> Am 28.05.2026 um 15:55 hat Fabiano Rosas geschrieben:
>> It's not possible to access the image file while there is an incoming
>> migration in progress, the QEMU process doesn't hold any locks to the
>> storage at this point so nodes are inactive. Attempting to drain leads
>> to an assert at bdrv_co_write_req_prepare():
>>
>> assert(!(bs->open_flags & BDRV_O_INACTIVE))
>>
>> The issue is reproducible by running iotest 181 on a host under cpu
>> load. The migration must coincide with the header already containing
>> the QED_F_NEED_CHECK flag.
>>
>> The sequence of events is as follows, with the respective call stacks
>> referenced below:
>>
>> During block device init, bdrv_qed_attach_aio_context() starts the
>> 'need_check' timer. The timer will not fire during incoming migration
>> as it uses QEMU_CLOCK_VIRTUAL (to avoid this very issue, as the code
>> comment indicates). (0)
>>
>> However, there's still bdrv_qed_drain_begin() which uses the fact that
>> the timer is live to decide whether to start the
>> qed_need_check_timer_entry() directly. (1)
>>
>> The qed_need_check_timer_entry() eventually calls into
>> qed_write_header() -> bdrv_co_pwrite() leading to the assert. (2)
>>
>> Since we don't have an API for checking if a timer is enabled, an
>> alternative is to skip this logic whenever the runstate is
>> INMIGRATE. This actually matches the setting of the BDRV_O_INACTIVE
>> flag at blockdev.c.
>
> I'm not sure what you mean by "checking if a timer is enabled", because
> bdrv_qed_drain_begin() has a timer_pending() check that sounds exactly
> what you say doesn't exist.
>
Sorry, I meant to say checking if a *clock* is enabled.
The code comment at qed_start_need_check_timer() says:
/* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
* migration.
*/
It seems to assume that since QEMU_CLOCK_VIRTUAL is disabled when vcpus
are paused then the timer function will never execute while the image is
inactive. If bdrv_qed_drain_begin() could check clock->enabled, then it
would be under the same assumption.
> But it's also not clear to me why s->need_check_timer is true on an
> inactive image. I don't think this is supposed to be the case?
bdrv_qed_do_open() reads the header during QEMU startup. Whatever is set
in the header at that moment will be picked up, including
QED_F_NEED_CHECK, which is what the code uses to determine the timer
should be started.
> It looks like qed is missing a .bdrv_inactivate implementation that
> cancels the timer after flushing everything (including the header) to
> disk.
>
Hmm, but bdrv_inactivate happens on the migration source. The
destination machine could still start at any moment while the image is
dirty and see the QED_F_NEED_CHECK flag. Even doing something on the
destination machine wouldn't help I think given how early the crash
happens.
>>
>> The stacks:
>>
>> (0) == issues timer_mod ==
>> #6 in qed_start_need_check_timer at ../block/qed.c:340
>> #7 in bdrv_qed_attach_aio_context at ../block/qed.c:373
>> #8 in bdrv_qed_do_open at ../block/qed.c:556
>> #9 in bdrv_qed_open_entry at ../block/qed.c:582
>> #10 in coroutine_trampoline at ../util/coroutine-ucontext.c:175
>> #0 in qemu_coroutine_switch<+120> at ../util/coroutine-ucontext.c:321
>> #1 in qemu_aio_coroutine_enter<+356> at ../util/qemu-coroutine.c:293
>> #2 in aio_co_enter<+179> at ../util/async.c:710
>> #3 in aio_co_wake<+53> at ../util/async.c:695
>> #4 in thread_pool_co_cb<+47> at ../util/thread-pool.c:283
>> #5 in thread_pool_completion_bh<+241> at ../util/thread-pool.c:202
>> #6 in aio_bh_call<+109> at ../util/async.c:173
>> #7 in aio_bh_poll<+299> at ../util/async.c:220
>> #8 in aio_poll<+690> at ../util/aio-posix.c:745
>> #9 in bdrv_qed_open<+392> at ../block/qed.c:607
>> #10 in bdrv_open_driver<+327> at ../block.c:1678
>> #11 in bdrv_open_common<+1619> at ../block.c:2008
>> #12 in bdrv_open_inherit<+2556> at ../block.c:4191
>> #13 in bdrv_open<+118> at ../block.c:4286
>> #14 in blk_new_open<+199> at ../block/block-backend.c:458
>> #15 in blockdev_init<+2011> at ../blockdev.c:612
>> #16 in drive_new<+3008> at ../blockdev.c:1008
>> #17 in drive_init_func<+51> at ../system/vl.c:662
>> #18 in qemu_opts_foreach<+227> at ../util/qemu-option.c:1148
>> #19 in configure_blockdev<+350> at ../system/vl.c:721
>> #20 in qemu_create_early_backends<+343> at ../system/vl.c:2076
>> #21 in qemu_init<+12483> at ../system/vl.c:3778
>> #22 in main<+46> at ../system/main.c:71
>>
>> (1) == sees timer_pending ==
>> #6 in bdrv_qed_drain_begin at ../block/qed.c:391
>> #7 in bdrv_do_drained_begin at ../block/io.c:366
>> #8 in bdrv_do_drained_begin_quiesce at ../block/io.c:386
>> #9 in bdrv_child_cb_drained_begin at ../block.c:1207
>> #10 in bdrv_parent_drained_begin_single at ../block/io.c:133
>> #11 in bdrv_parent_drained_begin at ../block/io.c:64
>> #12 in bdrv_do_drained_begin at ../block/io.c:364
>> #13 in bdrv_drained_begin at ../block/io.c:393
>> #14 in blk_drain at ../block/block-backend.c:2101
>> #15 in blk_unref at ../block/block-backend.c:544
>> #16 in bdrv_open_inherit at ../block.c:4197
>> #17 in bdrv_open at ../block.c:4286
>> #18 in blk_new_open at ../block/block-backend.c:458
>> #19 in blockdev_init at ../blockdev.c:612
>> #20 in drive_new at ../blockdev.c:1008
>> #21 in drive_init_func at ../system/vl.c:662
>> #22 in qemu_opts_foreach at ../util/qemu-option.c:1148
>> #23 in configure_blockdev at ../system/vl.c:721
>> #24 in qemu_create_early_backends at ../system/vl.c:2076
>> #25 in qemu_init at ../system/vl.c:3778
>> #26 in main at ../system/main.c:71
>>
>> (2) == crashes ==
>> #5 in __assert_fail (assertion="!(bs->open_flags & BDRV_O_INACTIVE)", file="../block/io.c", line=1977
>> #6 in bdrv_co_write_req_prepare at ../block/io.c:1977
>> #7 in bdrv_aligned_pwritev at ../block/io.c:2099
>> #8 in bdrv_co_pwritev_part at ../block/io.c:2316
>> #9 in bdrv_co_pwritev at ../block/io.c:2233
>> #10 in bdrv_co_pwrite at ../include/block/block_int-io.h:77
>> #11 in qed_write_header at ../block/qed.c:128
>> #12 in qed_need_check_timer at ../block/qed.c:305
>> #13 in qed_need_check_timer_entry at ../block/qed.c:319
>>
>> Note that this issue is not exactly the same as what's been reported
>> in Gitlab, but given how easily this reproduces, I imagine it has to
>> be happening in that setup as well.
>>
>> Link: https://gitlab.com/qemu-project/qemu/-/work_items/3515
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> ---
>> CI run: https://gitlab.com/farosas/qemu/-/pipelines/2557314306
>> ---
>> block/qed.c | 6 ++++++
>> 1 file changed, 6 insertions(+)
>>
>> diff --git a/block/qed.c b/block/qed.c
>> index da23a83d62..ccf32bb4ae 100644
>> --- a/block/qed.c
>> +++ b/block/qed.c
>> @@ -21,6 +21,7 @@
>> #include "qemu/module.h"
>> #include "qemu/option.h"
>> #include "qemu/memalign.h"
>> +#include "system/runstate.h"
>> #include "trace.h"
>> #include "qed.h"
>> #include "system/block-backend.h"
>> @@ -373,6 +374,11 @@ static void bdrv_qed_drain_begin(BlockDriverState *bs)
>> {
>> BDRVQEDState *s = bs->opaque;
>>
>> + /* Nodes are inactive while waiting for an incoming migration. */
>> + if (runstate_check(RUN_STATE_INMIGRATE)) {
>
> If you're interested in whether the node is inactive, this is what you
> should check. RUN_STATE_INMIGRATE is only one of the conditions in which
> it might (but doesn't have to) be true. There is bdrv_is_inactive() to
> check this.
>
> Of course, if we're fixing the root cause (that QED_F_NEED_CHECK and
> s->need_check_timer are set on an inactive image), this hunk might not
> be needed at all.
>
>> + return;
>> + }
>> +
>> /* Fire the timer immediately in order to start doing I/O as soon as the
>> * header is flushed.
>> */
>
> Kevin
© 2016 - 2026 Red Hat, Inc.