We're currently changing the way the source multifd migration handles
the shutdown of the multifd channels when TLS is in use to perform a
clean termination by calling gnutls_bye().
Older src QEMUs will always close the channel without terminating the
TLS session. New dst QEMUs treat an unclean termination as an error.
Add multifd_clean_tls_termination (default true) that can be switched
on the destination whenever a src QEMU <= 9.2 is in use.
(Note that the compat property is only strictly necessary for src
QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs
9.1 and 9.2 can put the destination in a condition where it doesn't
see the unclean termination. Still, make the property more inclusive
to facilitate potential backports.)
Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
hw/core/machine.c | 1 +
migration/migration.h | 33 +++++++++++++++++++++++++++++++++
migration/multifd.c | 15 +++++++++++++--
migration/multifd.h | 2 ++
migration/options.c | 2 ++
5 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 254cc20c4c..02cff735b3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = {
{ "virtio-balloon-pci-transitional", "vectors", "0" },
{ "virtio-balloon-pci-non-transitional", "vectors", "0" },
{ "virtio-mem-pci", "vectors", "0" },
+ { "migration", "multifd-clean-tls-termination", "false" },
};
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
diff --git a/migration/migration.h b/migration/migration.h
index 4c1fafc2b5..77def0b437 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -443,6 +443,39 @@ struct MigrationState {
* Default value is false. (since 8.1)
*/
bool multifd_flush_after_each_section;
+
+ /*
+ * This variable only makes sense when set on the machine that is
+ * the destination of a multifd migration with TLS enabled. It
+ * affects the behavior of the last send->recv iteration with
+ * regards to termination of the TLS session.
+ *
+ * When set:
+ *
+ * - the destination QEMU instance can expect to never get a
+ * GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
+ * message: "The TLS connection was non-properly terminated".
+ *
+ * When clear:
+ *
+ * - the destination QEMU instance can expect to see a
+ * GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
+ * whenever the last recv() call of that channel happens after
+ * the source QEMU instance has already issued shutdown() on the
+ * channel.
+ *
+ * Commit 637280aeb2 (since 9.1) introduced a side effect that
+ * causes the destination instance to not be affected by the
+ * premature termination, while commit 1d457daf86 (since 10.0)
+ * causes the premature termination condition to be once again
+ * reachable.
+ *
+ * NOTE: Regardless of the state of this option, a premature
+ * termination of the TLS connection might happen due to error at
+ * any moment prior to the last send->recv iteration.
+ */
+ bool multifd_clean_tls_termination;
+
/*
* This decides the size of guest memory chunk that will be used
* to track dirty bitmap clearing. The size of memory chunk will
diff --git a/migration/multifd.c b/migration/multifd.c
index 0296758c08..8045197be8 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void)
static void *multifd_recv_thread(void *opaque)
{
+ MigrationState *s = migrate_get_current();
MultiFDRecvParams *p = opaque;
Error *local_err = NULL;
bool use_packets = multifd_use_packets();
@@ -1159,18 +1160,28 @@ static void *multifd_recv_thread(void *opaque)
trace_multifd_recv_thread_start(p->id);
rcu_register_thread();
+ if (!s->multifd_clean_tls_termination) {
+ p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
+ }
+
while (true) {
uint32_t flags = 0;
bool has_data = false;
p->normal_num = 0;
+
if (use_packets) {
+ struct iovec iov = {
+ .iov_base = (void *)p->packet,
+ .iov_len = p->packet_len
+ };
+
if (multifd_recv_should_exit()) {
break;
}
- ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
- p->packet_len, &local_err);
+ ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
+ p->read_flags, &local_err);
if (!ret) {
/* EOF */
assert(!local_err);
diff --git a/migration/multifd.h b/migration/multifd.h
index bd785b9873..cf408ff721 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -244,6 +244,8 @@ typedef struct {
uint32_t zero_num;
/* used for de-compression methods */
void *compress_data;
+ /* Flags for the QIOChannel */
+ int read_flags;
} MultiFDRecvParams;
typedef struct {
diff --git a/migration/options.c b/migration/options.c
index 1ad950e397..feda354935 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -99,6 +99,8 @@ const Property migration_properties[] = {
clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
preempt_pre_7_2, false),
+ DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
+ multifd_clean_tls_termination, true),
/* Migration parameters */
DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
--
2.35.3
On Fri, Feb 07, 2025 at 04:53:58PM -0300, Fabiano Rosas wrote: > We're currently changing the way the source multifd migration handles > the shutdown of the multifd channels when TLS is in use to perform a > clean termination by calling gnutls_bye(). > > Older src QEMUs will always close the channel without terminating the > TLS session. New dst QEMUs treat an unclean termination as an error. > > Add multifd_clean_tls_termination (default true) that can be switched > on the destination whenever a src QEMU <= 9.2 is in use. > > (Note that the compat property is only strictly necessary for src > QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs > 9.1 and 9.2 can put the destination in a condition where it doesn't > see the unclean termination. Still, make the property more inclusive > to facilitate potential backports.) > > Signed-off-by: Fabiano Rosas <farosas@suse.de> Reviewed-by: Peter Xu <peterx@redhat.com> One nitpick.. > --- > hw/core/machine.c | 1 + > migration/migration.h | 33 +++++++++++++++++++++++++++++++++ > migration/multifd.c | 15 +++++++++++++-- > migration/multifd.h | 2 ++ > migration/options.c | 2 ++ > 5 files changed, 51 insertions(+), 2 deletions(-) > > diff --git a/hw/core/machine.c b/hw/core/machine.c > index 254cc20c4c..02cff735b3 100644 > --- a/hw/core/machine.c > +++ b/hw/core/machine.c > @@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = { > { "virtio-balloon-pci-transitional", "vectors", "0" }, > { "virtio-balloon-pci-non-transitional", "vectors", "0" }, > { "virtio-mem-pci", "vectors", "0" }, > + { "migration", "multifd-clean-tls-termination", "false" }, > }; > const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); > > diff --git a/migration/migration.h b/migration/migration.h > index 4c1fafc2b5..77def0b437 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -443,6 +443,39 @@ struct MigrationState { > * Default value is false. (since 8.1) > */ > bool multifd_flush_after_each_section; > + > + /* > + * This variable only makes sense when set on the machine that is > + * the destination of a multifd migration with TLS enabled. It > + * affects the behavior of the last send->recv iteration with > + * regards to termination of the TLS session. > + * > + * When set: > + * > + * - the destination QEMU instance can expect to never get a > + * GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error > + * message: "The TLS connection was non-properly terminated". > + * > + * When clear: > + * > + * - the destination QEMU instance can expect to see a > + * GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel > + * whenever the last recv() call of that channel happens after > + * the source QEMU instance has already issued shutdown() on the > + * channel. > + * > + * Commit 637280aeb2 (since 9.1) introduced a side effect that > + * causes the destination instance to not be affected by the > + * premature termination, while commit 1d457daf86 (since 10.0) > + * causes the premature termination condition to be once again > + * reachable. > + * > + * NOTE: Regardless of the state of this option, a premature > + * termination of the TLS connection might happen due to error at > + * any moment prior to the last send->recv iteration. > + */ > + bool multifd_clean_tls_termination; > + > /* > * This decides the size of guest memory chunk that will be used > * to track dirty bitmap clearing. The size of memory chunk will > diff --git a/migration/multifd.c b/migration/multifd.c > index 0296758c08..8045197be8 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void) > > static void *multifd_recv_thread(void *opaque) > { > + MigrationState *s = migrate_get_current(); > MultiFDRecvParams *p = opaque; > Error *local_err = NULL; > bool use_packets = multifd_use_packets(); > @@ -1159,18 +1160,28 @@ static void *multifd_recv_thread(void *opaque) > trace_multifd_recv_thread_start(p->id); > rcu_register_thread(); > > + if (!s->multifd_clean_tls_termination) { > + p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF; > + } > + > while (true) { > uint32_t flags = 0; > bool has_data = false; > p->normal_num = 0; > > + Extra newline (can be fixed when merge) > if (use_packets) { > + struct iovec iov = { > + .iov_base = (void *)p->packet, > + .iov_len = p->packet_len > + }; > + > if (multifd_recv_should_exit()) { > break; > } > > - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, > - p->packet_len, &local_err); > + ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL, > + p->read_flags, &local_err); > if (!ret) { > /* EOF */ > assert(!local_err); > diff --git a/migration/multifd.h b/migration/multifd.h > index bd785b9873..cf408ff721 100644 > --- a/migration/multifd.h > +++ b/migration/multifd.h > @@ -244,6 +244,8 @@ typedef struct { > uint32_t zero_num; > /* used for de-compression methods */ > void *compress_data; > + /* Flags for the QIOChannel */ > + int read_flags; > } MultiFDRecvParams; > > typedef struct { > diff --git a/migration/options.c b/migration/options.c > index 1ad950e397..feda354935 100644 > --- a/migration/options.c > +++ b/migration/options.c > @@ -99,6 +99,8 @@ const Property migration_properties[] = { > clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), > DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, > preempt_pre_7_2, false), > + DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState, > + multifd_clean_tls_termination, true), > > /* Migration parameters */ > DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, > -- > 2.35.3 > -- Peter Xu
© 2016 - 2025 Red Hat, Inc.