When the reconnect in NBD client is in progress, the iochannel used for
NBD connection doesn't exist. Therefore an attempt to detach it from
the aio_context of the parent BlockDriverState results in a NULL pointer
dereference.
The problem is triggerable, in particular, when an outgoing migration is
about to finish, and stopping the dataplane tries to move the
BlockDriverState from the iothread aio_context to the main loop. If the
NBD connection is lost before this point, and the NBD client has entered
the reconnect procedure, QEMU crashes:
at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452
at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151
new_context=new_context@entry=0x562a260c9580,
ignore=ignore@entry=0x7feeadc9b780)
at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230
(bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580,
ignore_child=<optimized out>, errp=<optimized out>)
at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332
new_context=0x562a260c9580,
update_root_node=update_root_node@entry=true, errp=errp@entry=0x0)
at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989
new_context=<optimized out>, errp=errp@entry=0x0)
at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010
out>)
at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292
at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245
running=0, state=<optimized out>)
at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220
state=state@entry=RUN_STATE_FINISH_MIGRATE)
at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275
send_stop=<optimized out>)
at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032
at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914
at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275
at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439
at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519
at pthread_create.c:333
oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0
<__func__.28102>, newlen=0)
at ../sysdeps/unix/sysv/linux/sysctl.c:30
Fix it by checking that the iochannel is non-null before trying to
detach it from the aio_context. If it is null, no detaching is needed,
and it will get reattached in the proper aio_context once the connection
is reestablished.
Signed-off-by: Roman Kagan <rvkagan@yandex-team.ru>
---
block/nbd.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/block/nbd.c b/block/nbd.c
index 42e10c7c93..bcd6641e90 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -235,7 +235,14 @@ static void nbd_client_detach_aio_context(BlockDriverState *bs)
/* Timer is deleted in nbd_client_co_drain_begin() */
assert(!s->reconnect_delay_timer);
- qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+ /*
+ * If reconnect is in progress we may have no ->ioc. It will be
+ * re-instantiated in the proper aio context once the connection is
+ * reestablished.
+ */
+ if (s->ioc) {
+ qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+ }
}
static void nbd_client_attach_aio_context_bh(void *opaque)
--
2.29.2
28.01.2021 23:14, Roman Kagan wrote:
> When the reconnect in NBD client is in progress, the iochannel used for
> NBD connection doesn't exist. Therefore an attempt to detach it from
> the aio_context of the parent BlockDriverState results in a NULL pointer
> dereference.
>
> The problem is triggerable, in particular, when an outgoing migration is
> about to finish, and stopping the dataplane tries to move the
> BlockDriverState from the iothread aio_context to the main loop. If the
> NBD connection is lost before this point, and the NBD client has entered
> the reconnect procedure, QEMU crashes:
>
> at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452
> at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151
> new_context=new_context@entry=0x562a260c9580,
> ignore=ignore@entry=0x7feeadc9b780)
> at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230
> (bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580,
> ignore_child=<optimized out>, errp=<optimized out>)
> at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332
> new_context=0x562a260c9580,
> update_root_node=update_root_node@entry=true, errp=errp@entry=0x0)
> at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989
> new_context=<optimized out>, errp=errp@entry=0x0)
> at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010
> out>)
> at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292
> at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245
> running=0, state=<optimized out>)
> at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220
> state=state@entry=RUN_STATE_FINISH_MIGRATE)
> at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275
> send_stop=<optimized out>)
> at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032
> at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914
> at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275
> at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439
> at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519
> at pthread_create.c:333
> oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0
> <__func__.28102>, newlen=0)
> at ../sysdeps/unix/sysv/linux/sysctl.c:30
function names are somehow lost :(
>
> Fix it by checking that the iochannel is non-null before trying to
> detach it from the aio_context. If it is null, no detaching is needed,
> and it will get reattached in the proper aio_context once the connection
> is reestablished.
>
> Signed-off-by: Roman Kagan <rvkagan@yandex-team.ru>
Thanks!
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> block/nbd.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/block/nbd.c b/block/nbd.c
> index 42e10c7c93..bcd6641e90 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -235,7 +235,14 @@ static void nbd_client_detach_aio_context(BlockDriverState *bs)
>
> /* Timer is deleted in nbd_client_co_drain_begin() */
> assert(!s->reconnect_delay_timer);
> - qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
> + /*
> + * If reconnect is in progress we may have no ->ioc. It will be
> + * re-instantiated in the proper aio context once the connection is
> + * reestablished.
> + */
> + if (s->ioc) {
> + qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
> + }
> }
>
> static void nbd_client_attach_aio_context_bh(void *opaque)
>
--
Best regards,
Vladimir
On Fri, Jan 29, 2021 at 08:37:13AM +0300, Vladimir Sementsov-Ogievskiy wrote: > 28.01.2021 23:14, Roman Kagan wrote: > > When the reconnect in NBD client is in progress, the iochannel used for > > NBD connection doesn't exist. Therefore an attempt to detach it from > > the aio_context of the parent BlockDriverState results in a NULL pointer > > dereference. > > > > The problem is triggerable, in particular, when an outgoing migration is > > about to finish, and stopping the dataplane tries to move the > > BlockDriverState from the iothread aio_context to the main loop. If the > > NBD connection is lost before this point, and the NBD client has entered > > the reconnect procedure, QEMU crashes: > > > > at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452 > > at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151 > > new_context=new_context@entry=0x562a260c9580, > > ignore=ignore@entry=0x7feeadc9b780) > > at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230 > > (bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580, > > ignore_child=<optimized out>, errp=<optimized out>) > > at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332 > > new_context=0x562a260c9580, > > update_root_node=update_root_node@entry=true, errp=errp@entry=0x0) > > at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989 > > new_context=<optimized out>, errp=errp@entry=0x0) > > at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010 > > out>) > > at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292 > > at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245 > > running=0, state=<optimized out>) > > at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220 > > state=state@entry=RUN_STATE_FINISH_MIGRATE) > > at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275 > > send_stop=<optimized out>) > > at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032 > > at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914 > > at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275 > > at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439 > > at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519 > > at pthread_create.c:333 > > oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0 > > <__func__.28102>, newlen=0) > > at ../sysdeps/unix/sysv/linux/sysctl.c:30 > > function names are somehow lost :( Oops. Backtraces in gdb have frame numbers prefixed with a hash which got interpreted as comments by git commit; I should have offset the backtrace when pasting. Will respin with fixed log messages, thanks. Roman.
© 2016 - 2026 Red Hat, Inc.