:p
atchew
Login
Current logic assumes that channel connections on the destination side are always established in the same order as the source and the first one will always be the main channel followed by the multifid or post-copy preemption channel. This may not be always true, as even if a channel has a connection established on the source side it can be in the pending state on the destination side and a newer connection can be established first. Basically causing out of order mapping of channels on the destination side. Currently, all channels except post-copy preempt send a magic number, this patch uses that magic number to decide the type of channel. This logic is applicable only for precopy(multifd) live migration, as mentioned, the post-copy preempt channel does not send any magic number. Also, tls live migrations already does tls handshake before creating other channels, so this issue is not possible with tls, hence this logic is avoided for tls live migrations. This patch uses MSG_PEEK to check the magic number of channels so that current data/control stream management remains un-effected. Suggested-by: Daniel P. Berrangé <berrange@redhat.com> Signed-off-by: manish.mishra <manish.mishra@nutanix.com> v2: TLS does not support MSG_PEEK, so V1 was broken for tls live migrations. For tls live migration, while initializing main channel tls handshake is done before we can create other channels, so this issue is not possible for tls live migrations. In V2 added a check to avoid checking magic number for tls live migration and fallback to older method to decide mapping of channels on destination side. --- include/io/channel.h | 25 +++++++++++++++++++++++ io/channel-socket.c | 27 ++++++++++++++++++++++++ io/channel.c | 39 +++++++++++++++++++++++++++++++++++ migration/migration.c | 44 +++++++++++++++++++++++++++++----------- migration/multifd.c | 12 ++++------- migration/multifd.h | 2 +- migration/postcopy-ram.c | 5 +---- migration/postcopy-ram.h | 2 +- 8 files changed, 130 insertions(+), 26 deletions(-) diff --git a/include/io/channel.h b/include/io/channel.h index XXXXXXX..XXXXXXX 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -XXX,XX +XXX,XX @@ struct QIOChannelClass { int **fds, size_t *nfds, Error **errp); + ssize_t (*io_read_peek)(QIOChannel *ioc, + void *buf, + size_t nbytes, + Error **errp); int (*io_close)(QIOChannel *ioc, Error **errp); GSource * (*io_create_watch)(QIOChannel *ioc, @@ -XXX,XX +XXX,XX @@ int qio_channel_write_all(QIOChannel *ioc, size_t buflen, Error **errp); +/** + * qio_channel_read_peek_all: + * @ioc: the channel object + * @buf: the memory region to read in data + * @nbytes: the number of bytes to read + * @errp: pointer to a NULL-initialized error object + * + * Read given @nbytes data from peek of channel into + * memory region @buf. + * + * The function will be blocked until read size is + * equal to requested size. + * + * Returns: 1 if all bytes were read, 0 if end-of-file + * occurs without data, or -1 on error + */ +int qio_channel_read_peek_all(QIOChannel *ioc, + void* buf, + size_t nbytes, + Error **errp); + /** * qio_channel_set_blocking: * @ioc: the channel object diff --git a/io/channel-socket.c b/io/channel-socket.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, } #endif /* WIN32 */ +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc, + void *buf, + size_t nbytes, + Error **errp) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t bytes = 0; + +retry: + bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK); + + if (bytes < 0) { + if (errno == EINTR) { + goto retry; + } + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + + error_setg_errno(errp, errno, + "Unable to read from peek of socket"); + return -1; + } + + return bytes; +} #ifdef QEMU_MSG_ZEROCOPY static int qio_channel_socket_flush(QIOChannel *ioc, @@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_class_init(ObjectClass *klass, ioc_klass->io_writev = qio_channel_socket_writev; ioc_klass->io_readv = qio_channel_socket_readv; + ioc_klass->io_read_peek = qio_channel_socket_read_peek; ioc_klass->io_set_blocking = qio_channel_socket_set_blocking; ioc_klass->io_close = qio_channel_socket_close; ioc_klass->io_shutdown = qio_channel_socket_shutdown; diff --git a/io/channel.c b/io/channel.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel.c +++ b/io/channel.c @@ -XXX,XX +XXX,XX @@ int qio_channel_write_all(QIOChannel *ioc, return qio_channel_writev_all(ioc, &iov, 1, errp); } +int qio_channel_read_peek_all(QIOChannel *ioc, + void* buf, + size_t nbytes, + Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + ssize_t bytes = 0; + + if (!klass->io_read_peek) { + error_setg(errp, "Channel does not support read peek"); + return -1; + } + + while (bytes < nbytes) { + bytes = klass->io_read_peek(ioc, + buf, + nbytes, + errp); + + if (bytes == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); + } else { + qio_channel_wait(ioc, G_IO_OUT); + } + continue; + } + if (bytes == 0) { + error_setg(errp, + "Unexpected end-of-file on channel"); + return 0; + } + if (bytes < 0) { + return -1; + } + } + + return 1; +} int qio_channel_set_blocking(QIOChannel *ioc, bool enabled, diff --git a/migration/migration.c b/migration/migration.c index XXXXXXX..XXXXXXX 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -XXX,XX +XXX,XX @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) { MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; - bool start_migration; QEMUFile *f; + bool default_channel = true; + uint32_t channel_magic = 0; + int ret = 0; - if (!mis->from_src_file) { - /* The first connection (multifd may have multiple) */ + if (migrate_use_multifd() && !migration_in_postcopy() && + !migrate_use_tls()) { + /* + * With multiple channels, it is possible that we receive channels + * out of order on destination side, causing incorrect mapping of + * source channels on destination side. Check channel MAGIC to + * decide type of channel. Please note this is best effort, postcopy + * preempt channel does not send any magic number so avoid it for + * postcopy live migration. Also tls live migration already does + * tls handshake while initializing main channel so with tls this + * issue is not possible. + */ + ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic, + sizeof(channel_magic), &local_err); + + if (ret != 1) { + error_propagate(errp, local_err); + return; + } + + default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); + } else { + default_channel = !mis->from_src_file; + } + + if (default_channel) { f = qemu_file_new_input(ioc); if (!migration_incoming_setup(f, errp)) { return; } - - /* - * Common migration only needs one channel, so we can start - * right now. Some features need more than one channel, we wait. - */ - start_migration = !migration_needs_multiple_sockets(); } else { /* Multiple connections */ assert(migration_needs_multiple_sockets()); if (migrate_use_multifd()) { - start_migration = multifd_recv_new_channel(ioc, &local_err); + multifd_recv_new_channel(ioc, &local_err); } else { assert(migrate_postcopy_preempt()); f = qemu_file_new_input(ioc); - start_migration = postcopy_preempt_new_channel(mis, f); + postcopy_preempt_new_channel(mis, f); } if (local_err) { error_propagate(errp, local_err); @@ -XXX,XX +XXX,XX @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) } } - if (start_migration) { + if (migration_has_all_channels()) { /* If it's a recovery, we're done */ if (postcopy_try_recover()) { return; diff --git a/migration/multifd.c b/migration/multifd.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -XXX,XX +XXX,XX @@ bool multifd_recv_all_channels_created(void) /* * Try to receive all multifd channels to get ready for the migration. - * - Return true and do not set @errp when correctly receiving all channels; - * - Return false and do not set @errp when correctly receiving the current one; - * - Return false and set @errp when failing to receive the current channel. + * Sets @errp when failing to receive the current channel. */ -bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) +void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) { MultiFDRecvParams *p; Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) "failed to receive packet" " via multifd channel %d: ", qatomic_read(&multifd_recv_state->count)); - return false; + return; } trace_multifd_recv_new_channel(id); @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) id); multifd_recv_terminate_threads(local_err); error_propagate(errp, local_err); - return false; + return; } p->c = ioc; object_ref(OBJECT(ioc)); @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, QEMU_THREAD_JOINABLE); qatomic_inc(&multifd_recv_state->count); - return qatomic_read(&multifd_recv_state->count) == - migrate_multifd_channels(); } diff --git a/migration/multifd.h b/migration/multifd.h index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -XXX,XX +XXX,XX @@ void multifd_save_cleanup(void); int multifd_load_setup(Error **errp); int multifd_load_cleanup(Error **errp); bool multifd_recv_all_channels_created(void); -bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); +void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); void multifd_recv_sync_main(void); int multifd_send_sync_main(QEMUFile *f); int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index XXXXXXX..XXXXXXX 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -XXX,XX +XXX,XX @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) } } -bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) +void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) { /* * The new loading channel has its own threads, so it needs to be @@ -XXX,XX +XXX,XX @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) qemu_file_set_blocking(file, true); mis->postcopy_qemufile_dst = file; trace_postcopy_preempt_new_channel(); - - /* Start the migration immediately */ - return true; } /* diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index XXXXXXX..XXXXXXX 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -XXX,XX +XXX,XX @@ enum PostcopyChannels { RAM_CHANNEL_MAX, }; -bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); +void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); int postcopy_preempt_setup(MigrationState *s, Error **errp); int postcopy_preempt_wait_channel(MigrationState *s); -- 2.22.3
Current logic assumes that channel connections on the destination side are always established in the same order as the source and the first one will always be the main channel followed by the multifid or post-copy preemption channel. This may not be always true, as even if a channel has a connection established on the source side it can be in the pending state on the destination side and a newer connection can be established first. Basically causing out of order mapping of channels on the destination side. Currently, all channels except post-copy preempt send a magic number, this patch uses that magic number to decide the type of channel. This logic is applicable only for precopy(multifd) live migration, as mentioned, the post-copy preempt channel does not send any magic number. Also, tls live migrations already does tls handshake before creating other channels, so this issue is not possible with tls, hence this logic is avoided for tls live migrations. This patch uses MSG_PEEK to check the magic number of channels so that current data/control stream management remains un-effected. v2: TLS does not support MSG_PEEK, so V1 was broken for tls live migrations. For tls live migration, while initializing main channel tls handshake is done before we can create other channels, so this issue is not possible for tls live migrations. In V2 added a check to avoid checking magic number for tls live migration and fallback to older method to decide mapping of channels on destination side. v3: 1. Split change in two patches, io patch for read_peek routines, migration patch for migration related changes. 2. Add flags to io_readv calls to get extra read flags like MSG_PEEK. 3. Some other minor fixes. v4: 1. Removed common *all_eof routines for read peek and added one specific to live migration. 2. Updated to use qemu_co_sleep_ns instead of qio_channel_yield. 3. Some other minor fixes. manish.mishra (2): io: Add support for MSG_PEEK for socket channel migration: check magic value for deciding the mapping of channels chardev/char-socket.c | 4 +-- include/io/channel.h | 6 ++++ io/channel-buffer.c | 1 + io/channel-command.c | 1 + io/channel-file.c | 1 + io/channel-null.c | 1 + io/channel-socket.c | 17 ++++++++++- io/channel-tls.c | 1 + io/channel-websock.c | 1 + io/channel.c | 16 +++++++--- migration/channel-block.c | 1 + migration/channel.c | 46 +++++++++++++++++++++++++++++ migration/channel.h | 5 ++++ migration/migration.c | 45 ++++++++++++++++++++-------- migration/multifd.c | 12 +++----- migration/multifd.h | 2 +- migration/postcopy-ram.c | 5 +--- migration/postcopy-ram.h | 2 +- scsi/qemu-pr-helper.c | 2 +- tests/qtest/tpm-emu.c | 2 +- tests/unit/test-io-channel-socket.c | 1 + util/vhost-user-server.c | 2 +- 22 files changed, 138 insertions(+), 36 deletions(-) -- 2.22.3
MSG_PEEK reads from the peek of channel, The data is treated as unread and the next read shall still return this data. This support is currently added only for socket class. Extra parameter 'flags' is added to io_readv calls to pass extra read flags like MSG_PEEK. Suggested-by: Daniel P. Berrangé <berrange@redhat.com Signed-off-by: manish.mishra <manish.mishra@nutanix.com> --- chardev/char-socket.c | 4 ++-- include/io/channel.h | 6 ++++++ io/channel-buffer.c | 1 + io/channel-command.c | 1 + io/channel-file.c | 1 + io/channel-null.c | 1 + io/channel-socket.c | 17 ++++++++++++++++- io/channel-tls.c | 1 + io/channel-websock.c | 1 + io/channel.c | 16 ++++++++++++---- migration/channel-block.c | 1 + scsi/qemu-pr-helper.c | 2 +- tests/qtest/tpm-emu.c | 2 +- tests/unit/test-io-channel-socket.c | 1 + util/vhost-user-server.c | 2 +- 15 files changed, 47 insertions(+), 10 deletions(-) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index XXXXXXX..XXXXXXX 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -XXX,XX +XXX,XX @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ret = qio_channel_readv_full(s->ioc, &iov, 1, &msgfds, &msgfds_num, - NULL); + 0, NULL); } else { ret = qio_channel_readv_full(s->ioc, &iov, 1, NULL, NULL, - NULL); + 0, NULL); } if (msgfds_num) { diff --git a/include/io/channel.h b/include/io/channel.h index XXXXXXX..XXXXXXX 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 +#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 + typedef enum QIOChannelFeature QIOChannelFeature; enum QIOChannelFeature { @@ -XXX,XX +XXX,XX @@ enum QIOChannelFeature { QIO_CHANNEL_FEATURE_SHUTDOWN, QIO_CHANNEL_FEATURE_LISTEN, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + QIO_CHANNEL_FEATURE_READ_MSG_PEEK, }; @@ -XXX,XX +XXX,XX @@ struct QIOChannelClass { size_t niov, int **fds, size_t *nfds, + int flags, Error **errp); int (*io_close)(QIOChannel *ioc, Error **errp); @@ -XXX,XX +XXX,XX @@ void qio_channel_set_name(QIOChannel *ioc, * @niov: the length of the @iov array * @fds: pointer to an array that will received file handles * @nfds: pointer filled with number of elements in @fds on return + * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) * @errp: pointer to a NULL-initialized error object * * Read data from the IO channel, storing it in the @@ -XXX,XX +XXX,XX @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp); diff --git a/io/channel-buffer.c b/io/channel-buffer.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-buffer.c +++ b/io/channel-buffer.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); diff --git a/io/channel-command.c b/io/channel-command.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); diff --git a/io/channel-file.c b/io/channel-file.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); diff --git a/io/channel-null.c b/io/channel-null.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-null.c +++ b/io/channel-null.c @@ -XXX,XX +XXX,XX @@ qio_channel_null_readv(QIOChannel *ioc, size_t niov, int **fds G_GNUC_UNUSED, size_t *nfds G_GNUC_UNUSED, + int flags, Error **errp) { QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); diff --git a/io/channel-socket.c b/io/channel-socket.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -XXX,XX +XXX,XX @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, } #endif + qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK); + return 0; } @@ -XXX,XX +XXX,XX @@ qio_channel_socket_accept(QIOChannelSocket *ioc, } #endif /* WIN32 */ + qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK); + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); return cioc; @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, } + if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { + sflags |= MSG_PEEK; + } + retry: ret = recvmsg(sioc->fd, &msg, sflags); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ssize_t done = 0; ssize_t i; + int sflags = 0; + + if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { + sflags |= MSG_PEEK; + } for (i = 0; i < niov; i++) { ssize_t ret; @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, ret = recv(sioc->fd, iov[i].iov_base, iov[i].iov_len, - 0); + sflags); if (ret < 0) { if (errno == EAGAIN) { if (done) { diff --git a/io/channel-tls.c b/io/channel-tls.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); diff --git a/io/channel-websock.c b/io/channel-websock.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -XXX,XX +XXX,XX @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); diff --git a/io/channel.c b/io/channel.c index XXXXXXX..XXXXXXX 100644 --- a/io/channel.c +++ b/io/channel.c @@ -XXX,XX +XXX,XX @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); @@ -XXX,XX +XXX,XX @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, return -1; } - return klass->io_readv(ioc, iov, niov, fds, nfds, errp); + if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && + !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + error_setg_errno(errp, EINVAL, + "Channel does not support peek read"); + return -1; + } + + return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); } @@ -XXX,XX +XXX,XX @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, while ((nlocal_iov > 0) || local_fds) { ssize_t len; len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, - local_nfds, errp); + local_nfds, 0, errp); if (len == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { qio_channel_yield(ioc, G_IO_IN); @@ -XXX,XX +XXX,XX @@ ssize_t qio_channel_readv(QIOChannel *ioc, size_t niov, Error **errp) { - return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); + return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); } @@ -XXX,XX +XXX,XX @@ ssize_t qio_channel_read(QIOChannel *ioc, Error **errp) { struct iovec iov = { .iov_base = buf, .iov_len = buflen }; - return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); + return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); } diff --git a/migration/channel-block.c b/migration/channel-block.c index XXXXXXX..XXXXXXX 100644 --- a/migration/channel-block.c +++ b/migration/channel-block.c @@ -XXX,XX +XXX,XX @@ qio_channel_block_readv(QIOChannel *ioc, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c index XXXXXXX..XXXXXXX 100644 --- a/scsi/qemu-pr-helper.c +++ b/scsi/qemu-pr-helper.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, iov.iov_base = buf; iov.iov_len = sz; n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, - &fds, &nfds, errp); + &fds, &nfds, 0, errp); if (n_read == QIO_CHANNEL_ERR_BLOCK) { qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/tpm-emu.c +++ b/tests/qtest/tpm-emu.c @@ -XXX,XX +XXX,XX @@ void *tpm_emu_ctrl_thread(void *data) int *pfd = NULL; size_t nfd = 0; - qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); + qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); cmd = be32_to_cpu(cmd); g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); g_assert_cmpint(nfd, ==, 1); diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c index XXXXXXX..XXXXXXX 100644 --- a/tests/unit/test-io-channel-socket.c +++ b/tests/unit/test-io-channel-socket.c @@ -XXX,XX +XXX,XX @@ static void test_io_channel_unix_fd_pass(void) G_N_ELEMENTS(iorecv), &fdrecv, &nfdrecv, + 0, &error_abort); g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index XXXXXXX..XXXXXXX 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) * qio_channel_readv_full may have short reads, keeping calling it * until getting VHOST_USER_HDR_SIZE or 0 bytes in total */ - rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); + rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); if (rc < 0) { if (rc == QIO_CHANNEL_ERR_BLOCK) { assert(local_err == NULL); -- 2.22.3
Current logic assumes that channel connections on the destination side are always established in the same order as the source and the first one will always be the main channel followed by the multifid or post-copy preemption channel. This may not be always true, as even if a channel has a connection established on the source side it can be in the pending state on the destination side and a newer connection can be established first. Basically causing out of order mapping of channels on the destination side. Currently, all channels except post-copy preempt send a magic number, this patch uses that magic number to decide the type of channel. This logic is applicable only for precopy(multifd) live migration, as mentioned, the post-copy preempt channel does not send any magic number. Also, tls live migrations already does tls handshake before creating other channels, so this issue is not possible with tls, hence this logic is avoided for tls live migrations. This patch uses read peek to check the magic number of channels so that current data/control stream management remains un-effected. Reviewed-by: Peter Xu <peterx@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.co Suggested-by: Daniel P. Berrangé <berrange@redhat.com Signed-off-by: manish.mishra <manish.mishra@nutanix.com> --- migration/channel.c | 46 ++++++++++++++++++++++++++++++++++++++++ migration/channel.h | 5 +++++ migration/migration.c | 45 ++++++++++++++++++++++++++++----------- migration/multifd.c | 12 ++++------- migration/multifd.h | 2 +- migration/postcopy-ram.c | 5 +---- migration/postcopy-ram.h | 2 +- 7 files changed, 91 insertions(+), 26 deletions(-) diff --git a/migration/channel.c b/migration/channel.c index XXXXXXX..XXXXXXX 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -XXX,XX +XXX,XX @@ void migration_channel_connect(MigrationState *s, migrate_fd_connect(s, error); error_free(error); } + + +/** + * @migration_channel_read_peek - Read from the peek of migration channel, + * without actually removing it from channel buffer. + * + * @ioc: the channel object + * @buf: the memory region to read data into + * @buflen: the number of bytes to read in @buf + * @errp: pointer to a NULL-initialized error object + * + * Returns 0 if successful, returns -1 and sets @errp if fails. + */ +int migration_channel_read_peek(QIOChannel *ioc, + const char *buf, + const size_t buflen, + Error **errp) +{ + ssize_t len = 0; + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; + + while (len < buflen) { + len = qio_channel_readv_full(ioc, &iov, 1, NULL, + NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); + + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + /* 1ms sleep. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + } else { + qio_channel_wait(ioc, G_IO_IN); + } + continue; + } + if (len == 0) { + error_setg(errp, + "Unexpected end-of-file on channel"); + return -1; + } + if (len < 0) { + return -1; + } + } + + return 0; +} diff --git a/migration/channel.h b/migration/channel.h index XXXXXXX..XXXXXXX 100644 --- a/migration/channel.h +++ b/migration/channel.h @@ -XXX,XX +XXX,XX @@ void migration_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname, Error *error_in); + +int migration_channel_read_peek(QIOChannel *ioc, + const char *buf, + const size_t buflen, + Error **errp); #endif diff --git a/migration/migration.c b/migration/migration.c index XXXXXXX..XXXXXXX 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -XXX,XX +XXX,XX @@ #include "migration.h" #include "savevm.h" #include "qemu-file.h" +#include "channel.h" #include "migration/vmstate.h" #include "block/block.h" #include "qapi/error.h" @@ -XXX,XX +XXX,XX @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) { MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; - bool start_migration; QEMUFile *f; + bool default_channel = true; + uint32_t channel_magic = 0; + int ret = 0; - if (!mis->from_src_file) { - /* The first connection (multifd may have multiple) */ + if (migrate_use_multifd() && !migrate_postcopy_ram() && + qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + /* + * With multiple channels, it is possible that we receive channels + * out of order on destination side, causing incorrect mapping of + * source channels on destination side. Check channel MAGIC to + * decide type of channel. Please note this is best effort, postcopy + * preempt channel does not send any magic number so avoid it for + * postcopy live migration. Also tls live migration already does + * tls handshake while initializing main channel so with tls this + * issue is not possible. + */ + ret = migration_channel_read_peek(ioc, (void *)&channel_magic, + sizeof(channel_magic), &local_err); + + if (ret != 0) { + error_propagate(errp, local_err); + return; + } + + default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); + } else { + default_channel = !mis->from_src_file; + } + + if (default_channel) { f = qemu_file_new_input(ioc); if (!migration_incoming_setup(f, errp)) { return; } - - /* - * Common migration only needs one channel, so we can start - * right now. Some features need more than one channel, we wait. - */ - start_migration = !migration_needs_multiple_sockets(); } else { /* Multiple connections */ assert(migration_needs_multiple_sockets()); if (migrate_use_multifd()) { - start_migration = multifd_recv_new_channel(ioc, &local_err); + multifd_recv_new_channel(ioc, &local_err); } else { assert(migrate_postcopy_preempt()); f = qemu_file_new_input(ioc); - start_migration = postcopy_preempt_new_channel(mis, f); + postcopy_preempt_new_channel(mis, f); } if (local_err) { error_propagate(errp, local_err); @@ -XXX,XX +XXX,XX @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) } } - if (start_migration) { + if (migration_has_all_channels()) { /* If it's a recovery, we're done */ if (postcopy_try_recover()) { return; diff --git a/migration/multifd.c b/migration/multifd.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -XXX,XX +XXX,XX @@ bool multifd_recv_all_channels_created(void) /* * Try to receive all multifd channels to get ready for the migration. - * - Return true and do not set @errp when correctly receiving all channels; - * - Return false and do not set @errp when correctly receiving the current one; - * - Return false and set @errp when failing to receive the current channel. + * Sets @errp when failing to receive the current channel. */ -bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) +void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) { MultiFDRecvParams *p; Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) "failed to receive packet" " via multifd channel %d: ", qatomic_read(&multifd_recv_state->count)); - return false; + return; } trace_multifd_recv_new_channel(id); @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) id); multifd_recv_terminate_threads(local_err); error_propagate(errp, local_err); - return false; + return; } p->c = ioc; object_ref(OBJECT(ioc)); @@ -XXX,XX +XXX,XX @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, QEMU_THREAD_JOINABLE); qatomic_inc(&multifd_recv_state->count); - return qatomic_read(&multifd_recv_state->count) == - migrate_multifd_channels(); } diff --git a/migration/multifd.h b/migration/multifd.h index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -XXX,XX +XXX,XX @@ void multifd_save_cleanup(void); int multifd_load_setup(Error **errp); int multifd_load_cleanup(Error **errp); bool multifd_recv_all_channels_created(void); -bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); +void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); void multifd_recv_sync_main(void); int multifd_send_sync_main(QEMUFile *f); int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index XXXXXXX..XXXXXXX 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -XXX,XX +XXX,XX @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) } } -bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) +void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) { /* * The new loading channel has its own threads, so it needs to be @@ -XXX,XX +XXX,XX @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) qemu_file_set_blocking(file, true); mis->postcopy_qemufile_dst = file; trace_postcopy_preempt_new_channel(); - - /* Start the migration immediately */ - return true; } /* diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index XXXXXXX..XXXXXXX 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -XXX,XX +XXX,XX @@ enum PostcopyChannels { RAM_CHANNEL_MAX, }; -bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); +void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); int postcopy_preempt_setup(MigrationState *s, Error **errp); int postcopy_preempt_wait_channel(MigrationState *s); -- 2.22.3