In our current code, when multifd is used during migration, if there
is an error before the destination receives all new channels, the
source keeps running, however the destination does not exit but keeps
waiting until the source is killed deliberately.
Fix this by simply killing the destination when it fails to receive
packet via some channel.
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Fei Li <fli@suse.com>
---
migration/channel.c | 7 ++++++-
migration/migration.c | 9 +++++++--
migration/migration.h | 2 +-
migration/ram.c | 17 ++++++++++++++---
migration/ram.h | 2 +-
5 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/migration/channel.c b/migration/channel.c
index 33e0e9b82f..572be4245a 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -44,7 +44,12 @@ void migration_channel_process_incoming(QIOChannel *ioc)
error_report_err(local_err);
}
} else {
- migration_ioc_process_incoming(ioc);
+ Error *local_err = NULL;
+ migration_ioc_process_incoming(ioc, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ exit(EXIT_FAILURE);
+ }
}
}
diff --git a/migration/migration.c b/migration/migration.c
index 8b36e7f184..87dfc7374f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -541,7 +541,7 @@ void migration_fd_process_incoming(QEMUFile *f)
migration_incoming_process();
}
-void migration_ioc_process_incoming(QIOChannel *ioc)
+void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
bool start_migration;
@@ -563,9 +563,14 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
*/
start_migration = !migrate_use_multifd();
} else {
+ Error *local_err = NULL;
/* Multiple connections */
assert(migrate_use_multifd());
- start_migration = multifd_recv_new_channel(ioc);
+ start_migration = multifd_recv_new_channel(ioc, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
if (start_migration) {
diff --git a/migration/migration.h b/migration/migration.h
index f7813f8261..7df4d426d0 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -229,7 +229,7 @@ struct MigrationState
void migrate_set_state(int *state, int old_state, int new_state);
void migration_fd_process_incoming(QEMUFile *f);
-void migration_ioc_process_incoming(QIOChannel *ioc);
+void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
void migration_incoming_process(void);
bool migration_has_all_channels(void);
diff --git a/migration/ram.c b/migration/ram.c
index 4db3b3e8f4..8f03afe228 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1072,6 +1072,7 @@ out:
static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
{
MultiFDSendParams *p = opaque;
+ MigrationState *s = migrate_get_current();
QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
Error *local_err = NULL;
@@ -1080,6 +1081,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
}
if (qio_task_propagate_error(task, &local_err)) {
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
if (multifd_save_cleanup(&local_err) != 0) {
migrate_set_error(migrate_get_current(), local_err);
}
@@ -1337,16 +1339,20 @@ bool multifd_recv_all_channels_created(void)
}
/* Return true if multifd is ready for the migration, otherwise false */
-bool multifd_recv_new_channel(QIOChannel *ioc)
+bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
{
+ MigrationIncomingState *mis = migration_incoming_get_current();
MultiFDRecvParams *p;
Error *local_err = NULL;
int id;
id = multifd_recv_initial_packet(ioc, &local_err);
if (id < 0) {
+ error_propagate_prepend(errp, local_err,
+ "failed to receive packet via multifd channel %x: ",
+ multifd_recv_state->count);
multifd_recv_terminate_threads(local_err, false);
- return false;
+ goto fail;
}
p = &multifd_recv_state->params[id];
@@ -1354,7 +1360,8 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
error_setg(&local_err, "multifd: received id '%d' already setup'",
id);
multifd_recv_terminate_threads(local_err, true);
- return false;
+ error_propagate(errp, local_err);
+ goto fail;
}
p->c = ioc;
object_ref(OBJECT(ioc));
@@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
QEMU_THREAD_JOINABLE);
atomic_inc(&multifd_recv_state->count);
return multifd_recv_state->count == migrate_multifd_channels();
+fail:
+ qemu_fclose(mis->from_src_file);
+ mis->from_src_file = NULL;
+ return false;
}
/**
diff --git a/migration/ram.h b/migration/ram.h
index 83ff1bc11a..046d3074be 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -47,7 +47,7 @@ int multifd_save_cleanup(Error **errp);
int multifd_load_setup(void);
int multifd_load_cleanup(Error **errp);
bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc);
+bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
uint64_t ram_pagesize_summary(void);
int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
--
2.13.7
Hi all,
I create a new thread to inquiry one live migration issue when using
multifd :)
I am not so sure with the rule that when and how to use the multifd is
correct,
so I'd like to confirm. This is because when I use the current upstream qemu
code and run into a failed case: "Migration status: failed (Unable to
write to
socket: Connection reset by peer)".
The detailed is as follows and the "failed" situation can not reproduce
100%.
But as far as I tested, if I do the live migration using multifd* just
after the
guest started for less than one minute*, I almost can reproduce this for
100%.
My steps are:
1. start the vm in the src side;
2. start the -incoming in the dst side;
3. after the vm started for a little while (After I open a file inside
the vm),
I begin the live migration, steps are:
- on src: migrate_set_capability x-multifd on
- on src: migrate_set_parameter x-multifd-channels 4
- on dst: migrate_set_capability x-multifd on
- on dst: migrate_set_parameter x-multifd-channels 4
- on src: migrate -d tcp:192.168.120.5:4444
Errors are:
[src]
linux-50ts:/mnt/live-migration # ./sle12-source.sh
QEMU 3.0.50 monitor - type 'help' for more information
(qemu) Running QEMU with SDL 1.2 is deprecated, and will be removed
in a future release. Please switch to SDL 2.0 instead
migrate_set_capability x-multifd on
(qemu) migrate_set_parameter x-multifd-channels 4
(qemu) migrate -d tcp:192.168.120.5:4444
(qemu) info migrate
globals:
store-global-state: on
only-migratable: off
send-configuration: on
send-section-footer: on
decompress-error-check: on
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off postcopy-ram: off x-colo: off
release-ram: off block: off return-path: off pause-before-switchover:
off x-multifd: on dirty-bitmaps: off postcopy-blocktime: off
late-block-activate: off
Migration status: failed (Unable to write to socket: Connection reset by
peer)
total time: 0 milliseconds
[dst]
linux-p6v6:/mnt/live-migration # ./sle12-dest.sh
QEMU 3.0.50 monitor - type 'help' for more information
(qemu) migrate_set_capability x-multifd on
(qemu) migrate_set_parameter x-multifd-channels 4
(qemu) info migrate
globals:
store-global-state: on
only-migratable: off
send-configuration: on
send-section-footer: on
decompress-error-check: on
Hope this does not bother you too much. ;)
Have a nice day, thanks again
Fei
On 10/29/2018 08:58 PM, Fei Li wrote:
> In our current code, when multifd is used during migration, if there
> is an error before the destination receives all new channels, the
> source keeps running, however the destination does not exit but keeps
> waiting until the source is killed deliberately.
>
> Fix this by simply killing the destination when it fails to receive
> packet via some channel.
>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: Fei Li <fli@suse.com>
> ---
> migration/channel.c | 7 ++++++-
> migration/migration.c | 9 +++++++--
> migration/migration.h | 2 +-
> migration/ram.c | 17 ++++++++++++++---
> migration/ram.h | 2 +-
> 5 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/migration/channel.c b/migration/channel.c
> index 33e0e9b82f..572be4245a 100644
> --- a/migration/channel.c
> +++ b/migration/channel.c
> @@ -44,7 +44,12 @@ void migration_channel_process_incoming(QIOChannel *ioc)
> error_report_err(local_err);
> }
> } else {
> - migration_ioc_process_incoming(ioc);
> + Error *local_err = NULL;
> + migration_ioc_process_incoming(ioc, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + exit(EXIT_FAILURE);
> + }
> }
> }
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 8b36e7f184..87dfc7374f 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -541,7 +541,7 @@ void migration_fd_process_incoming(QEMUFile *f)
> migration_incoming_process();
> }
>
> -void migration_ioc_process_incoming(QIOChannel *ioc)
> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> {
> MigrationIncomingState *mis = migration_incoming_get_current();
> bool start_migration;
> @@ -563,9 +563,14 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
> */
> start_migration = !migrate_use_multifd();
> } else {
> + Error *local_err = NULL;
> /* Multiple connections */
> assert(migrate_use_multifd());
> - start_migration = multifd_recv_new_channel(ioc);
> + start_migration = multifd_recv_new_channel(ioc, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> }
>
> if (start_migration) {
> diff --git a/migration/migration.h b/migration/migration.h
> index f7813f8261..7df4d426d0 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -229,7 +229,7 @@ struct MigrationState
> void migrate_set_state(int *state, int old_state, int new_state);
>
> void migration_fd_process_incoming(QEMUFile *f);
> -void migration_ioc_process_incoming(QIOChannel *ioc);
> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
> void migration_incoming_process(void);
>
> bool migration_has_all_channels(void);
> diff --git a/migration/ram.c b/migration/ram.c
> index 4db3b3e8f4..8f03afe228 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1072,6 +1072,7 @@ out:
> static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> {
> MultiFDSendParams *p = opaque;
> + MigrationState *s = migrate_get_current();
> QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
> Error *local_err = NULL;
>
> @@ -1080,6 +1081,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> }
>
> if (qio_task_propagate_error(task, &local_err)) {
> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> if (multifd_save_cleanup(&local_err) != 0) {
> migrate_set_error(migrate_get_current(), local_err);
> }
> @@ -1337,16 +1339,20 @@ bool multifd_recv_all_channels_created(void)
> }
>
> /* Return true if multifd is ready for the migration, otherwise false */
> -bool multifd_recv_new_channel(QIOChannel *ioc)
> +bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
> {
> + MigrationIncomingState *mis = migration_incoming_get_current();
> MultiFDRecvParams *p;
> Error *local_err = NULL;
> int id;
>
> id = multifd_recv_initial_packet(ioc, &local_err);
> if (id < 0) {
> + error_propagate_prepend(errp, local_err,
> + "failed to receive packet via multifd channel %x: ",
> + multifd_recv_state->count);
> multifd_recv_terminate_threads(local_err, false);
> - return false;
> + goto fail;
> }
>
> p = &multifd_recv_state->params[id];
> @@ -1354,7 +1360,8 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
> error_setg(&local_err, "multifd: received id '%d' already setup'",
> id);
> multifd_recv_terminate_threads(local_err, true);
> - return false;
> + error_propagate(errp, local_err);
> + goto fail;
> }
> p->c = ioc;
> object_ref(OBJECT(ioc));
> @@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
> QEMU_THREAD_JOINABLE);
> atomic_inc(&multifd_recv_state->count);
> return multifd_recv_state->count == migrate_multifd_channels();
> +fail:
> + qemu_fclose(mis->from_src_file);
> + mis->from_src_file = NULL;
> + return false;
> }
>
> /**
> diff --git a/migration/ram.h b/migration/ram.h
> index 83ff1bc11a..046d3074be 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -47,7 +47,7 @@ int multifd_save_cleanup(Error **errp);
> int multifd_load_setup(void);
> int multifd_load_cleanup(Error **errp);
> bool multifd_recv_all_channels_created(void);
> -bool multifd_recv_new_channel(QIOChannel *ioc);
> +bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
>
> uint64_t ram_pagesize_summary(void);
> int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
On Mon, Oct 29, 2018 at 08:58:16PM +0800, Fei Li wrote:
> In our current code, when multifd is used during migration, if there
> is an error before the destination receives all new channels, the
> source keeps running, however the destination does not exit but keeps
> waiting until the source is killed deliberately.
>
> Fix this by simply killing the destination when it fails to receive
> packet via some channel.
>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: Fei Li <fli@suse.com>
> ---
> migration/channel.c | 7 ++++++-
> migration/migration.c | 9 +++++++--
> migration/migration.h | 2 +-
> migration/ram.c | 17 ++++++++++++++---
> migration/ram.h | 2 +-
> 5 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/migration/channel.c b/migration/channel.c
> index 33e0e9b82f..572be4245a 100644
> --- a/migration/channel.c
> +++ b/migration/channel.c
> @@ -44,7 +44,12 @@ void migration_channel_process_incoming(QIOChannel *ioc)
> error_report_err(local_err);
[1]
> }
> } else {
> - migration_ioc_process_incoming(ioc);
> + Error *local_err = NULL;
> + migration_ioc_process_incoming(ioc, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + exit(EXIT_FAILURE);
I would still suggest that you don't quit. See TLS error at [1], it
only dumps the error. IMHO users can quit easily for dst vm, I'll
just let them decide if they want.
Then you can merge the error path for both.
> + }
> }
> }
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 8b36e7f184..87dfc7374f 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -541,7 +541,7 @@ void migration_fd_process_incoming(QEMUFile *f)
> migration_incoming_process();
> }
>
> -void migration_ioc_process_incoming(QIOChannel *ioc)
> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> {
> MigrationIncomingState *mis = migration_incoming_get_current();
> bool start_migration;
> @@ -563,9 +563,14 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
> */
> start_migration = !migrate_use_multifd();
> } else {
> + Error *local_err = NULL;
> /* Multiple connections */
> assert(migrate_use_multifd());
> - start_migration = multifd_recv_new_channel(ioc);
> + start_migration = multifd_recv_new_channel(ioc, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> }
>
> if (start_migration) {
> diff --git a/migration/migration.h b/migration/migration.h
> index f7813f8261..7df4d426d0 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -229,7 +229,7 @@ struct MigrationState
> void migrate_set_state(int *state, int old_state, int new_state);
>
> void migration_fd_process_incoming(QEMUFile *f);
> -void migration_ioc_process_incoming(QIOChannel *ioc);
> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
> void migration_incoming_process(void);
>
> bool migration_has_all_channels(void);
> diff --git a/migration/ram.c b/migration/ram.c
> index 4db3b3e8f4..8f03afe228 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1072,6 +1072,7 @@ out:
> static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> {
> MultiFDSendParams *p = opaque;
> + MigrationState *s = migrate_get_current();
This seems to be the source part, then I'll suggest you split the
patch and keep this patch only touches the dest vm path.
> QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
> Error *local_err = NULL;
>
> @@ -1080,6 +1081,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> }
>
> if (qio_task_propagate_error(task, &local_err)) {
> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> if (multifd_save_cleanup(&local_err) != 0) {
> migrate_set_error(migrate_get_current(), local_err);
> }
> @@ -1337,16 +1339,20 @@ bool multifd_recv_all_channels_created(void)
> }
>
> /* Return true if multifd is ready for the migration, otherwise false */
> -bool multifd_recv_new_channel(QIOChannel *ioc)
> +bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
> {
> + MigrationIncomingState *mis = migration_incoming_get_current();
> MultiFDRecvParams *p;
> Error *local_err = NULL;
> int id;
>
> id = multifd_recv_initial_packet(ioc, &local_err);
> if (id < 0) {
> + error_propagate_prepend(errp, local_err,
> + "failed to receive packet via multifd channel %x: ",
> + multifd_recv_state->count);
> multifd_recv_terminate_threads(local_err, false);
> - return false;
> + goto fail;
> }
>
> p = &multifd_recv_state->params[id];
> @@ -1354,7 +1360,8 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
> error_setg(&local_err, "multifd: received id '%d' already setup'",
> id);
> multifd_recv_terminate_threads(local_err, true);
> - return false;
> + error_propagate(errp, local_err);
> + goto fail;
> }
> p->c = ioc;
> object_ref(OBJECT(ioc));
> @@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
> QEMU_THREAD_JOINABLE);
> atomic_inc(&multifd_recv_state->count);
> return multifd_recv_state->count == migrate_multifd_channels();
> +fail:
> + qemu_fclose(mis->from_src_file);
> + mis->from_src_file = NULL;
> + return false;
Do we need this?
I'd suggest to put all cleanups into a single function. For dest vm
I say it's process_incoming_migration_bh.
Regards,
--
Peter Xu
On 10/30/2018 02:05 PM, Peter Xu wrote:
> On Mon, Oct 29, 2018 at 08:58:16PM +0800, Fei Li wrote:
>> In our current code, when multifd is used during migration, if there
>> is an error before the destination receives all new channels, the
>> source keeps running, however the destination does not exit but keeps
>> waiting until the source is killed deliberately.
>>
>> Fix this by simply killing the destination when it fails to receive
>> packet via some channel.
>>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Cc: Peter Xu <peterx@redhat.com>
>> Signed-off-by: Fei Li <fli@suse.com>
>> ---
>> migration/channel.c | 7 ++++++-
>> migration/migration.c | 9 +++++++--
>> migration/migration.h | 2 +-
>> migration/ram.c | 17 ++++++++++++++---
>> migration/ram.h | 2 +-
>> 5 files changed, 29 insertions(+), 8 deletions(-)
>>
>> diff --git a/migration/channel.c b/migration/channel.c
>> index 33e0e9b82f..572be4245a 100644
>> --- a/migration/channel.c
>> +++ b/migration/channel.c
>> @@ -44,7 +44,12 @@ void migration_channel_process_incoming(QIOChannel *ioc)
>> error_report_err(local_err);
> [1]
>
>> }
>> } else {
>> - migration_ioc_process_incoming(ioc);
>> + Error *local_err = NULL;
>> + migration_ioc_process_incoming(ioc, &local_err);
>> + if (local_err) {
>> + error_report_err(local_err);
>> + exit(EXIT_FAILURE);
> I would still suggest that you don't quit. See TLS error at [1], it
> only dumps the error. IMHO users can quit easily for dst vm, I'll
> just let them decide if they want.
>
> Then you can merge the error path for both.
Ok, got it, thanks :)
>
>> + }
>> }
>> }
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 8b36e7f184..87dfc7374f 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -541,7 +541,7 @@ void migration_fd_process_incoming(QEMUFile *f)
>> migration_incoming_process();
>> }
>>
>> -void migration_ioc_process_incoming(QIOChannel *ioc)
>> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>> {
>> MigrationIncomingState *mis = migration_incoming_get_current();
>> bool start_migration;
>> @@ -563,9 +563,14 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
>> */
>> start_migration = !migrate_use_multifd();
>> } else {
>> + Error *local_err = NULL;
>> /* Multiple connections */
>> assert(migrate_use_multifd());
>> - start_migration = multifd_recv_new_channel(ioc);
>> + start_migration = multifd_recv_new_channel(ioc, &local_err);
>> + if (local_err) {
>> + error_propagate(errp, local_err);
>> + return;
>> + }
>> }
>>
>> if (start_migration) {
>> diff --git a/migration/migration.h b/migration/migration.h
>> index f7813f8261..7df4d426d0 100644
>> --- a/migration/migration.h
>> +++ b/migration/migration.h
>> @@ -229,7 +229,7 @@ struct MigrationState
>> void migrate_set_state(int *state, int old_state, int new_state);
>>
>> void migration_fd_process_incoming(QEMUFile *f);
>> -void migration_ioc_process_incoming(QIOChannel *ioc);
>> +void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
>> void migration_incoming_process(void);
>>
>> bool migration_has_all_channels(void);
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 4db3b3e8f4..8f03afe228 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1072,6 +1072,7 @@ out:
>> static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>> {
>> MultiFDSendParams *p = opaque;
>> + MigrationState *s = migrate_get_current();
> This seems to be the source part, then I'll suggest you split the
> patch and keep this patch only touches the dest vm path.
ok
>
>> QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
>> Error *local_err = NULL;
>>
>> @@ -1080,6 +1081,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>> }
>>
>> if (qio_task_propagate_error(task, &local_err)) {
>> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>> if (multifd_save_cleanup(&local_err) != 0) {
>> migrate_set_error(migrate_get_current(), local_err);
>> }
>> @@ -1337,16 +1339,20 @@ bool multifd_recv_all_channels_created(void)
>> }
>>
>> /* Return true if multifd is ready for the migration, otherwise false */
>> -bool multifd_recv_new_channel(QIOChannel *ioc)
>> +bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>> {
>> + MigrationIncomingState *mis = migration_incoming_get_current();
>> MultiFDRecvParams *p;
>> Error *local_err = NULL;
>> int id;
>>
>> id = multifd_recv_initial_packet(ioc, &local_err);
>> if (id < 0) {
>> + error_propagate_prepend(errp, local_err,
>> + "failed to receive packet via multifd channel %x: ",
>> + multifd_recv_state->count);
>> multifd_recv_terminate_threads(local_err, false);
>> - return false;
>> + goto fail;
>> }
>>
>> p = &multifd_recv_state->params[id];
>> @@ -1354,7 +1360,8 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
>> error_setg(&local_err, "multifd: received id '%d' already setup'",
>> id);
>> multifd_recv_terminate_threads(local_err, true);
>> - return false;
>> + error_propagate(errp, local_err);
>> + goto fail;
>> }
>> p->c = ioc;
>> object_ref(OBJECT(ioc));
>> @@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc)
>> QEMU_THREAD_JOINABLE);
>> atomic_inc(&multifd_recv_state->count);
>> return multifd_recv_state->count == migrate_multifd_channels();
>> +fail:
>> + qemu_fclose(mis->from_src_file);
>> + mis->from_src_file = NULL;
>> + return false;
> Do we need this?
>
> I'd suggest to put all cleanups into a single function. For dest vm
> I say it's process_incoming_migration_bh.
>
> Regards,
>
Not sure whether I understand correctly, if multifd_recv_new_channel()
fails,
that means migration_incoming_process() will not be called, then
process_incoming_migration_co() and process_incoming_migration_bh()
will not be called either. In that way, there is no cleanup.
Have a nice day, thanks
Fei
On Tue, Oct 30, 2018 at 06:05:18PM +0800, Fei Li wrote: [...] > > > @@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc) > > > QEMU_THREAD_JOINABLE); > > > atomic_inc(&multifd_recv_state->count); > > > return multifd_recv_state->count == migrate_multifd_channels(); > > > +fail: > > > + qemu_fclose(mis->from_src_file); > > > + mis->from_src_file = NULL; > > > + return false; > > Do we need this? > > > > I'd suggest to put all cleanups into a single function. For dest vm > > I say it's process_incoming_migration_bh. > > > > Regards, > > > Not sure whether I understand correctly, if multifd_recv_new_channel() > fails, > that means migration_incoming_process() will not be called, then > process_incoming_migration_co() and process_incoming_migration_bh() > will not be called either. In that way, there is no cleanup. Sorry the funtion name I wanted to paste is something like migration_incoming_state_destroy()... Anyway I still don't feel that right to close the mis->from_src_file in a multifd special path. For now, I'll either ignore the cleanup part (AFAIU the TLS failure will also ignore it when migration_tls_channel_process_incoming fails) and just print the extra error message, or you can also look into how to cleanup the dest vm in a better way. That could be someting like calling migration_incoming_state_destroy() somewhere in migration_channel_process_incoming() when failure happens but I'm not sure. Regards, -- Peter Xu
On 10/31/2018 06:18 AM, Peter Xu wrote: > On Tue, Oct 30, 2018 at 06:05:18PM +0800, Fei Li wrote: > > [...] > >>>> @@ -1366,6 +1373,10 @@ bool multifd_recv_new_channel(QIOChannel *ioc) >>>> QEMU_THREAD_JOINABLE); >>>> atomic_inc(&multifd_recv_state->count); >>>> return multifd_recv_state->count == migrate_multifd_channels(); >>>> +fail: >>>> + qemu_fclose(mis->from_src_file); >>>> + mis->from_src_file = NULL; >>>> + return false; >>> Do we need this? >>> >>> I'd suggest to put all cleanups into a single function. For dest vm >>> I say it's process_incoming_migration_bh. >>> >>> Regards, >>> >> Not sure whether I understand correctly, if multifd_recv_new_channel() >> fails, >> that means migration_incoming_process() will not be called, then >> process_incoming_migration_co() and process_incoming_migration_bh() >> will not be called either. In that way, there is no cleanup. > Sorry the funtion name I wanted to paste is something like > migration_incoming_state_destroy()... Anyway I still don't feel that > right to close the mis->from_src_file in a multifd special path. > > For now, I'll either ignore the cleanup part (AFAIU the TLS failure > will also ignore it when migration_tls_channel_process_incoming fails) > and just print the extra error message, I will adopt this option, thanks for the suggestion :) Have a nice day, thanks Fei > or you can also look into how > to cleanup the dest vm in a better way. That could be someting like > calling migration_incoming_state_destroy() somewhere in > migration_channel_process_incoming() when failure happens but I'm not > sure. > > Regards, >
© 2016 - 2026 Red Hat, Inc.