migration/migration.c | 22 +++++++--------------- tests/qtest/migration-helpers.c | 13 ++++++++++--- tests/qtest/migration-helpers.h | 3 ++- tests/qtest/migration-test.c | 14 +++++++------- 4 files changed, 26 insertions(+), 26 deletions(-)
We do set MIGRATION_FAILED state, but don't give a chance to
orchestrator to query migration state and get the error.
Let's report an error through QAPI like we do on outgoing migration.
migration-test is updated correspondingly.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
Doubt: is exiting on failure a contract? Will this commit break
something in Libvirt? Finally, could we just change the logic, or I need
and additional migration-parameter for new behavior?
migration/migration.c | 22 +++++++---------------
tests/qtest/migration-helpers.c | 13 ++++++++++---
tests/qtest/migration-helpers.h | 3 ++-
tests/qtest/migration-test.c | 14 +++++++-------
4 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 86bf76e925..3c203e767d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyState ps;
int ret;
+ Error *local_err = NULL;
assert(mis->from_src_file);
if (compress_threads_load_setup(mis->from_src_file)) {
- error_report("Failed to setup decompress threads");
+ error_setg(&local_err, "Failed to setup decompress threads");
goto fail;
}
@@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
}
if (ret < 0) {
- MigrationState *s = migrate_get_current();
-
- if (migrate_has_error(s)) {
- WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
- error_report_err(s->error);
- }
- }
- error_report("load of migration failed: %s", strerror(-ret));
+ error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
goto fail;
}
if (colo_incoming_co() < 0) {
+ error_setg(&local_err, "colo incoming failed");
goto fail;
}
migration_bh_schedule(process_incoming_migration_bh, mis);
return;
fail:
+ migrate_set_error(migrate_get_current(), local_err);
+ error_report_err(local_err);
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
- qemu_fclose(mis->from_src_file);
-
- multifd_recv_cleanup();
- compress_threads_load_cleanup();
-
- exit(EXIT_FAILURE);
+ migration_incoming_state_destroy();
}
/**
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index e451dbdbed..91c13bd566 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
wait_for_migration_status(who, "completed", NULL);
}
-void wait_for_migration_fail(QTestState *from, bool allow_active)
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+ bool is_incoming)
{
g_test_timer_start();
QDict *rsp_return;
@@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
/* Is the machine currently running? */
rsp_return = qtest_qmp_assert_success_ref(from,
"{ 'execute': 'query-status' }");
- g_assert(qdict_haskey(rsp_return, "running"));
- g_assert(qdict_get_bool(rsp_return, "running"));
+ if (is_incoming) {
+ if (qdict_haskey(rsp_return, "running")) {
+ g_assert(!qdict_get_bool(rsp_return, "running"));
+ }
+ } else {
+ g_assert(qdict_haskey(rsp_return, "running"));
+ g_assert(qdict_get_bool(rsp_return, "running"));
+ }
qobject_unref(rsp_return);
}
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 3bf7ded1b9..7bd07059ae 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
void wait_for_migration_complete(QTestState *who);
-void wait_for_migration_fail(QTestState *from, bool allow_active);
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+ bool is_incoming);
char *find_common_machine_version(const char *mtype, const char *var1,
const char *var2);
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 1d2cee87ea..e00b755f05 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1670,7 +1670,7 @@ static void test_baddest(void)
return;
}
migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
- wait_for_migration_fail(from, false);
+ wait_for_migration_fail(from, false, false);
test_migrate_end(from, to, false);
}
@@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
if (args->result != MIG_TEST_SUCCEED) {
bool allow_active = args->result == MIG_TEST_FAIL;
- wait_for_migration_fail(from, allow_active);
+ wait_for_migration_fail(from, allow_active, false);
if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
- qtest_set_expected_status(to, EXIT_FAILURE);
+ wait_for_migration_fail(to, true, true);
}
} else {
if (args->live) {
@@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
migrate_qmp(from, uri, "{}");
if (should_fail) {
- qtest_set_expected_status(to, EXIT_FAILURE);
- wait_for_migration_fail(from, true);
+ wait_for_migration_fail(to, true, true);
+ wait_for_migration_fail(from, true, false);
} else {
wait_for_migration_complete(from);
}
@@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
migrate_cancel(from);
/* Make sure QEMU process "to" exited */
- qtest_set_expected_status(to, EXIT_FAILURE);
- qtest_wait_qemu(to);
+ wait_for_migration_fail(to, true, true);
+ qtest_quit(to);
args = (MigrateStart){
.only_target = true,
--
2.34.1
On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: > We do set MIGRATION_FAILED state, but don't give a chance to > orchestrator to query migration state and get the error. > > Let's report an error through QAPI like we do on outgoing migration. > > migration-test is updated correspondingly. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > --- > > Doubt: is exiting on failure a contract? Will this commit break > something in Libvirt? Finally, could we just change the logic, or I need > and additional migration-parameter for new behavior? There's a decent risk that this could break apps, whether libvirt or something else, especially if the app is just launching QEMU with '-incoming URI', rather than using '-incoming defer' and then explicitly using QMP to start the incoming migration. I'd say that with '-incoming defer' we should *not* exit on migration error, because that arg implies the app explicitly wants to be using QMP to control migration. With the legacy '-incoming URI' it is probably best to keep exit on error, as that's comparatively more likely to be used in adhoc scenarios where the app/user is ignoring QMP on the dst side. None the less, I think we need to check how libvirt behaves with this patch to be sure of no surprises. > > migration/migration.c | 22 +++++++--------------- > tests/qtest/migration-helpers.c | 13 ++++++++++--- > tests/qtest/migration-helpers.h | 3 ++- > tests/qtest/migration-test.c | 14 +++++++------- > 4 files changed, 26 insertions(+), 26 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 86bf76e925..3c203e767d 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque) > MigrationIncomingState *mis = migration_incoming_get_current(); > PostcopyState ps; > int ret; > + Error *local_err = NULL; > > assert(mis->from_src_file); > > if (compress_threads_load_setup(mis->from_src_file)) { > - error_report("Failed to setup decompress threads"); > + error_setg(&local_err, "Failed to setup decompress threads"); > goto fail; > } > > @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque) > } > > if (ret < 0) { > - MigrationState *s = migrate_get_current(); > - > - if (migrate_has_error(s)) { > - WITH_QEMU_LOCK_GUARD(&s->error_mutex) { > - error_report_err(s->error); > - } > - } > - error_report("load of migration failed: %s", strerror(-ret)); > + error_setg(&local_err, "load of migration failed: %s", strerror(-ret)); > goto fail; > } > > if (colo_incoming_co() < 0) { > + error_setg(&local_err, "colo incoming failed"); > goto fail; > } > > migration_bh_schedule(process_incoming_migration_bh, mis); > return; > fail: > + migrate_set_error(migrate_get_current(), local_err); > + error_report_err(local_err); > migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, > MIGRATION_STATUS_FAILED); > - qemu_fclose(mis->from_src_file); > - > - multifd_recv_cleanup(); > - compress_threads_load_cleanup(); > - > - exit(EXIT_FAILURE); > + migration_incoming_state_destroy(); > } > > /** > diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c > index e451dbdbed..91c13bd566 100644 > --- a/tests/qtest/migration-helpers.c > +++ b/tests/qtest/migration-helpers.c > @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who) > wait_for_migration_status(who, "completed", NULL); > } > > -void wait_for_migration_fail(QTestState *from, bool allow_active) > +void wait_for_migration_fail(QTestState *from, bool allow_active, > + bool is_incoming) > { > g_test_timer_start(); > QDict *rsp_return; > @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) > /* Is the machine currently running? */ > rsp_return = qtest_qmp_assert_success_ref(from, > "{ 'execute': 'query-status' }"); > - g_assert(qdict_haskey(rsp_return, "running")); > - g_assert(qdict_get_bool(rsp_return, "running")); > + if (is_incoming) { > + if (qdict_haskey(rsp_return, "running")) { > + g_assert(!qdict_get_bool(rsp_return, "running")); > + } > + } else { > + g_assert(qdict_haskey(rsp_return, "running")); > + g_assert(qdict_get_bool(rsp_return, "running")); > + } > qobject_unref(rsp_return); > } > > diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h > index 3bf7ded1b9..7bd07059ae 100644 > --- a/tests/qtest/migration-helpers.h > +++ b/tests/qtest/migration-helpers.h > @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who, > > void wait_for_migration_complete(QTestState *who); > > -void wait_for_migration_fail(QTestState *from, bool allow_active); > +void wait_for_migration_fail(QTestState *from, bool allow_active, > + bool is_incoming); > > char *find_common_machine_version(const char *mtype, const char *var1, > const char *var2); > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c > index 1d2cee87ea..e00b755f05 100644 > --- a/tests/qtest/migration-test.c > +++ b/tests/qtest/migration-test.c > @@ -1670,7 +1670,7 @@ static void test_baddest(void) > return; > } > migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); > - wait_for_migration_fail(from, false); > + wait_for_migration_fail(from, false, false); > test_migrate_end(from, to, false); > } > > @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args) > > if (args->result != MIG_TEST_SUCCEED) { > bool allow_active = args->result == MIG_TEST_FAIL; > - wait_for_migration_fail(from, allow_active); > + wait_for_migration_fail(from, allow_active, false); > > if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { > - qtest_set_expected_status(to, EXIT_FAILURE); > + wait_for_migration_fail(to, true, true); > } > } else { > if (args->live) { > @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) > migrate_qmp(from, uri, "{}"); > > if (should_fail) { > - qtest_set_expected_status(to, EXIT_FAILURE); > - wait_for_migration_fail(from, true); > + wait_for_migration_fail(to, true, true); > + wait_for_migration_fail(from, true, false); > } else { > wait_for_migration_complete(from); > } > @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void) > migrate_cancel(from); > > /* Make sure QEMU process "to" exited */ > - qtest_set_expected_status(to, EXIT_FAILURE); > - qtest_wait_qemu(to); > + wait_for_migration_fail(to, true, true); > + qtest_quit(to); > > args = (MigrateStart){ > .only_target = true, > -- > 2.34.1 > > With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 18.04.24 17:37, Daniel P. Berrangé wrote: > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: >> We do set MIGRATION_FAILED state, but don't give a chance to >> orchestrator to query migration state and get the error. >> >> Let's report an error through QAPI like we do on outgoing migration. >> >> migration-test is updated correspondingly. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >> --- >> >> Doubt: is exiting on failure a contract? Will this commit break >> something in Libvirt? Finally, could we just change the logic, or I need >> and additional migration-parameter for new behavior? > > There's a decent risk that this could break apps, whether > libvirt or something else, especially if the app is just > launching QEMU with '-incoming URI', rather than using > '-incoming defer' and then explicitly using QMP to start the > incoming migration. > > I'd say that with '-incoming defer' we should *not* exit on > migration error, because that arg implies the app explicitly > wants to be using QMP to control migration. > > With the legacy '-incoming URI' it is probably best to keep > exit on error, as that's comparatively more likely to be used > in adhoc scenarios where the app/user is ignoring QMP on the > dst side. > > None the less, I think we need to check how libvirt behaves > with this patch to be sure of no surprises. > Sounds reasonable, thanks! I'll rework it to behave the new way only with "-incoming defer", and check how libvirt behave with it. >> >> migration/migration.c | 22 +++++++--------------- >> tests/qtest/migration-helpers.c | 13 ++++++++++--- >> tests/qtest/migration-helpers.h | 3 ++- >> tests/qtest/migration-test.c | 14 +++++++------- >> 4 files changed, 26 insertions(+), 26 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index 86bf76e925..3c203e767d 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque) >> MigrationIncomingState *mis = migration_incoming_get_current(); >> PostcopyState ps; >> int ret; >> + Error *local_err = NULL; >> >> assert(mis->from_src_file); >> >> if (compress_threads_load_setup(mis->from_src_file)) { >> - error_report("Failed to setup decompress threads"); >> + error_setg(&local_err, "Failed to setup decompress threads"); >> goto fail; >> } >> >> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque) >> } >> >> if (ret < 0) { >> - MigrationState *s = migrate_get_current(); >> - >> - if (migrate_has_error(s)) { >> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) { >> - error_report_err(s->error); >> - } >> - } >> - error_report("load of migration failed: %s", strerror(-ret)); >> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret)); >> goto fail; >> } >> >> if (colo_incoming_co() < 0) { >> + error_setg(&local_err, "colo incoming failed"); >> goto fail; >> } >> >> migration_bh_schedule(process_incoming_migration_bh, mis); >> return; >> fail: >> + migrate_set_error(migrate_get_current(), local_err); >> + error_report_err(local_err); >> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, >> MIGRATION_STATUS_FAILED); >> - qemu_fclose(mis->from_src_file); >> - >> - multifd_recv_cleanup(); >> - compress_threads_load_cleanup(); >> - >> - exit(EXIT_FAILURE); >> + migration_incoming_state_destroy(); >> } >> >> /** >> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c >> index e451dbdbed..91c13bd566 100644 >> --- a/tests/qtest/migration-helpers.c >> +++ b/tests/qtest/migration-helpers.c >> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who) >> wait_for_migration_status(who, "completed", NULL); >> } >> >> -void wait_for_migration_fail(QTestState *from, bool allow_active) >> +void wait_for_migration_fail(QTestState *from, bool allow_active, >> + bool is_incoming) >> { >> g_test_timer_start(); >> QDict *rsp_return; >> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) >> /* Is the machine currently running? */ >> rsp_return = qtest_qmp_assert_success_ref(from, >> "{ 'execute': 'query-status' }"); >> - g_assert(qdict_haskey(rsp_return, "running")); >> - g_assert(qdict_get_bool(rsp_return, "running")); >> + if (is_incoming) { >> + if (qdict_haskey(rsp_return, "running")) { >> + g_assert(!qdict_get_bool(rsp_return, "running")); >> + } >> + } else { >> + g_assert(qdict_haskey(rsp_return, "running")); >> + g_assert(qdict_get_bool(rsp_return, "running")); >> + } >> qobject_unref(rsp_return); >> } >> >> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h >> index 3bf7ded1b9..7bd07059ae 100644 >> --- a/tests/qtest/migration-helpers.h >> +++ b/tests/qtest/migration-helpers.h >> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who, >> >> void wait_for_migration_complete(QTestState *who); >> >> -void wait_for_migration_fail(QTestState *from, bool allow_active); >> +void wait_for_migration_fail(QTestState *from, bool allow_active, >> + bool is_incoming); >> >> char *find_common_machine_version(const char *mtype, const char *var1, >> const char *var2); >> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c >> index 1d2cee87ea..e00b755f05 100644 >> --- a/tests/qtest/migration-test.c >> +++ b/tests/qtest/migration-test.c >> @@ -1670,7 +1670,7 @@ static void test_baddest(void) >> return; >> } >> migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); >> - wait_for_migration_fail(from, false); >> + wait_for_migration_fail(from, false, false); >> test_migrate_end(from, to, false); >> } >> >> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args) >> >> if (args->result != MIG_TEST_SUCCEED) { >> bool allow_active = args->result == MIG_TEST_FAIL; >> - wait_for_migration_fail(from, allow_active); >> + wait_for_migration_fail(from, allow_active, false); >> >> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { >> - qtest_set_expected_status(to, EXIT_FAILURE); >> + wait_for_migration_fail(to, true, true); >> } >> } else { >> if (args->live) { >> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) >> migrate_qmp(from, uri, "{}"); >> >> if (should_fail) { >> - qtest_set_expected_status(to, EXIT_FAILURE); >> - wait_for_migration_fail(from, true); >> + wait_for_migration_fail(to, true, true); >> + wait_for_migration_fail(from, true, false); >> } else { >> wait_for_migration_complete(from); >> } >> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void) >> migrate_cancel(from); >> >> /* Make sure QEMU process "to" exited */ >> - qtest_set_expected_status(to, EXIT_FAILURE); >> - qtest_wait_qemu(to); >> + wait_for_migration_fail(to, true, true); >> + qtest_quit(to); >> >> args = (MigrateStart){ >> .only_target = true, >> -- >> 2.34.1 >> >> > > With regards, > Daniel -- Best regards, Vladimir
On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote: > On 18.04.24 17:37, Daniel P. Berrangé wrote: > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: > > > We do set MIGRATION_FAILED state, but don't give a chance to > > > orchestrator to query migration state and get the error. > > > > > > Let's report an error through QAPI like we do on outgoing migration. > > > > > > migration-test is updated correspondingly. > > > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > > > --- > > > > > > Doubt: is exiting on failure a contract? Will this commit break > > > something in Libvirt? Finally, could we just change the logic, or I need > > > and additional migration-parameter for new behavior? > > > > There's a decent risk that this could break apps, whether > > libvirt or something else, especially if the app is just > > launching QEMU with '-incoming URI', rather than using > > '-incoming defer' and then explicitly using QMP to start the > > incoming migration. > > > > I'd say that with '-incoming defer' we should *not* exit on > > migration error, because that arg implies the app explicitly > > wants to be using QMP to control migration. > > > > With the legacy '-incoming URI' it is probably best to keep > > exit on error, as that's comparatively more likely to be used > > in adhoc scenarios where the app/user is ignoring QMP on the > > dst side. > > > > None the less, I think we need to check how libvirt behaves > > with this patch to be sure of no surprises. > > > > Sounds reasonable, thanks! I'll rework it to behave the new > way only with "-incoming defer", and check how libvirt behave with it. If there are problems and/or we want to be super safe wrt backcompat, we could add a new '-incoming managed' as being equivalent to '-incoming defer' but without the implicit exit. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 18.04.24 18:43, Daniel P. Berrangé wrote: > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote: >> On 18.04.24 17:37, Daniel P. Berrangé wrote: >>> On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: >>>> We do set MIGRATION_FAILED state, but don't give a chance to >>>> orchestrator to query migration state and get the error. >>>> >>>> Let's report an error through QAPI like we do on outgoing migration. >>>> >>>> migration-test is updated correspondingly. >>>> >>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >>>> --- >>>> >>>> Doubt: is exiting on failure a contract? Will this commit break >>>> something in Libvirt? Finally, could we just change the logic, or I need >>>> and additional migration-parameter for new behavior? >>> >>> There's a decent risk that this could break apps, whether >>> libvirt or something else, especially if the app is just >>> launching QEMU with '-incoming URI', rather than using >>> '-incoming defer' and then explicitly using QMP to start the >>> incoming migration. >>> >>> I'd say that with '-incoming defer' we should *not* exit on >>> migration error, because that arg implies the app explicitly >>> wants to be using QMP to control migration. >>> >>> With the legacy '-incoming URI' it is probably best to keep >>> exit on error, as that's comparatively more likely to be used >>> in adhoc scenarios where the app/user is ignoring QMP on the >>> dst side. >>> >>> None the less, I think we need to check how libvirt behaves >>> with this patch to be sure of no surprises. >>> >> >> Sounds reasonable, thanks! I'll rework it to behave the new >> way only with "-incoming defer", and check how libvirt behave with it. > > If there are problems and/or we want to be super safe wrt > backcompat, we could add a new '-incoming managed' as > being equivalent to '-incoming defer' but without the > implicit exit. > Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally. And we can in the same time deprecate "-incoming defer" in favor of new behavior. -- Best regards, Vladimir
On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote: > On 18.04.24 18:43, Daniel P. Berrangé wrote: > > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote: > > > On 18.04.24 17:37, Daniel P. Berrangé wrote: > > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: > > > > > We do set MIGRATION_FAILED state, but don't give a chance to > > > > > orchestrator to query migration state and get the error. > > > > > > > > > > Let's report an error through QAPI like we do on outgoing migration. > > > > > > > > > > migration-test is updated correspondingly. > > > > > > > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > > > > > --- > > > > > > > > > > Doubt: is exiting on failure a contract? Will this commit break > > > > > something in Libvirt? Finally, could we just change the logic, or I need > > > > > and additional migration-parameter for new behavior? > > > > > > > > There's a decent risk that this could break apps, whether > > > > libvirt or something else, especially if the app is just > > > > launching QEMU with '-incoming URI', rather than using > > > > '-incoming defer' and then explicitly using QMP to start the > > > > incoming migration. > > > > > > > > I'd say that with '-incoming defer' we should *not* exit on > > > > migration error, because that arg implies the app explicitly > > > > wants to be using QMP to control migration. > > > > > > > > With the legacy '-incoming URI' it is probably best to keep > > > > exit on error, as that's comparatively more likely to be used > > > > in adhoc scenarios where the app/user is ignoring QMP on the > > > > dst side. > > > > > > > > None the less, I think we need to check how libvirt behaves > > > > with this patch to be sure of no surprises. > > > > > > > > > > Sounds reasonable, thanks! I'll rework it to behave the new > > > way only with "-incoming defer", and check how libvirt behave with it. > > > > If there are problems and/or we want to be super safe wrt > > backcompat, we could add a new '-incoming managed' as > > being equivalent to '-incoming defer' but without the > > implicit exit. > > > > Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally. > And we can in the same time deprecate "-incoming defer" in favor of new behavior. Or just make it a new migration parameter? Then we keep all existing interfaces untouched, no risk of breaking anyone, and then it'll also apply to anyone who uses things like -incoming tcp but still wants to keep the qemu instance alive? Thanks, -- Peter Xu
On Thu, Apr 18, 2024 at 12:43:42PM -0400, Peter Xu wrote: > On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote: > > On 18.04.24 18:43, Daniel P. Berrangé wrote: > > > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote: > > > > On 18.04.24 17:37, Daniel P. Berrangé wrote: > > > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote: > > > > > > We do set MIGRATION_FAILED state, but don't give a chance to > > > > > > orchestrator to query migration state and get the error. > > > > > > > > > > > > Let's report an error through QAPI like we do on outgoing migration. > > > > > > > > > > > > migration-test is updated correspondingly. > > > > > > > > > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > > > > > > --- > > > > > > > > > > > > Doubt: is exiting on failure a contract? Will this commit break > > > > > > something in Libvirt? Finally, could we just change the logic, or I need > > > > > > and additional migration-parameter for new behavior? > > > > > > > > > > There's a decent risk that this could break apps, whether > > > > > libvirt or something else, especially if the app is just > > > > > launching QEMU with '-incoming URI', rather than using > > > > > '-incoming defer' and then explicitly using QMP to start the > > > > > incoming migration. > > > > > > > > > > I'd say that with '-incoming defer' we should *not* exit on > > > > > migration error, because that arg implies the app explicitly > > > > > wants to be using QMP to control migration. > > > > > > > > > > With the legacy '-incoming URI' it is probably best to keep > > > > > exit on error, as that's comparatively more likely to be used > > > > > in adhoc scenarios where the app/user is ignoring QMP on the > > > > > dst side. > > > > > > > > > > None the less, I think we need to check how libvirt behaves > > > > > with this patch to be sure of no surprises. > > > > > > > > > > > > > Sounds reasonable, thanks! I'll rework it to behave the new > > > > way only with "-incoming defer", and check how libvirt behave with it. > > > > > > If there are problems and/or we want to be super safe wrt > > > backcompat, we could add a new '-incoming managed' as > > > being equivalent to '-incoming defer' but without the > > > implicit exit. > > > > > > > Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally. > > And we can in the same time deprecate "-incoming defer" in favor of new behavior. > > Or just make it a new migration parameter? Then we keep all existing > interfaces untouched, no risk of breaking anyone, and then it'll also apply > to anyone who uses things like -incoming tcp but still wants to keep the > qemu instance alive? True, or even more simply, an argument to the 'migrate-incoming' command diff --git a/qapi/migration.json b/qapi/migration.json index 8c65b90328..6882aef328 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1889,7 +1889,8 @@ ## { 'command': 'migrate-incoming', 'data': {'*uri': 'str', - '*channels': [ 'MigrationChannel' ] } } + '*channels': [ 'MigrationChannel' ], + '*exit-on-error': 'bool' } } ## # @xen-save-devices-state: With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > We do set MIGRATION_FAILED state, but don't give a chance to > orchestrator to query migration state and get the error. > > Let's report an error through QAPI like we do on outgoing migration. > > migration-test is updated correspondingly. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > --- > > Doubt: is exiting on failure a contract? Will this commit break > something in Libvirt? Finally, could we just change the logic, or I need > and additional migration-parameter for new behavior? It seems we depend on the non-zero value: 4aead69241 ("migration: reflect incoming failure to shell") Author: Eric Blake <eblake@redhat.com> Date: Tue Apr 16 15:50:41 2013 -0600 migration: reflect incoming failure to shell Management apps like libvirt don't know to pay attention to stderr unless there is a non-zero exit status. * migration.c (process_incoming_migration_co): Exit with non-zero status on failure. Signed-off-by: Eric Blake <eblake@redhat.com> Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> One idea would be to plumb the s->error somehow through migration_shutdown() and allow qemu_cleanup() to change the status value. > migration/migration.c | 22 +++++++--------------- > tests/qtest/migration-helpers.c | 13 ++++++++++--- > tests/qtest/migration-helpers.h | 3 ++- > tests/qtest/migration-test.c | 14 +++++++------- > 4 files changed, 26 insertions(+), 26 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 86bf76e925..3c203e767d 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque) > MigrationIncomingState *mis = migration_incoming_get_current(); > PostcopyState ps; > int ret; > + Error *local_err = NULL; > > assert(mis->from_src_file); > > if (compress_threads_load_setup(mis->from_src_file)) { > - error_report("Failed to setup decompress threads"); > + error_setg(&local_err, "Failed to setup decompress threads"); > goto fail; > } > > @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque) > } > > if (ret < 0) { > - MigrationState *s = migrate_get_current(); > - > - if (migrate_has_error(s)) { > - WITH_QEMU_LOCK_GUARD(&s->error_mutex) { > - error_report_err(s->error); > - } > - } > - error_report("load of migration failed: %s", strerror(-ret)); > + error_setg(&local_err, "load of migration failed: %s", strerror(-ret)); > goto fail; > } > > if (colo_incoming_co() < 0) { > + error_setg(&local_err, "colo incoming failed"); > goto fail; > } > > migration_bh_schedule(process_incoming_migration_bh, mis); > return; > fail: > + migrate_set_error(migrate_get_current(), local_err); > + error_report_err(local_err); This will report an different error from the QMP one if s->error happens to be already set. Either use s->error here or prepend the "load of migration..." error to the s->error above. > migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, > MIGRATION_STATUS_FAILED); > - qemu_fclose(mis->from_src_file); > - > - multifd_recv_cleanup(); > - compress_threads_load_cleanup(); > - > - exit(EXIT_FAILURE); > + migration_incoming_state_destroy(); > } > > /** > diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c > index e451dbdbed..91c13bd566 100644 > --- a/tests/qtest/migration-helpers.c > +++ b/tests/qtest/migration-helpers.c > @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who) > wait_for_migration_status(who, "completed", NULL); > } > > -void wait_for_migration_fail(QTestState *from, bool allow_active) > +void wait_for_migration_fail(QTestState *from, bool allow_active, > + bool is_incoming) > { > g_test_timer_start(); > QDict *rsp_return; > @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) > /* Is the machine currently running? */ > rsp_return = qtest_qmp_assert_success_ref(from, > "{ 'execute': 'query-status' }"); > - g_assert(qdict_haskey(rsp_return, "running")); > - g_assert(qdict_get_bool(rsp_return, "running")); > + if (is_incoming) { > + if (qdict_haskey(rsp_return, "running")) { > + g_assert(!qdict_get_bool(rsp_return, "running")); > + } > + } else { > + g_assert(qdict_haskey(rsp_return, "running")); > + g_assert(qdict_get_bool(rsp_return, "running")); > + } > qobject_unref(rsp_return); > } > > diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h > index 3bf7ded1b9..7bd07059ae 100644 > --- a/tests/qtest/migration-helpers.h > +++ b/tests/qtest/migration-helpers.h > @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who, > > void wait_for_migration_complete(QTestState *who); > > -void wait_for_migration_fail(QTestState *from, bool allow_active); > +void wait_for_migration_fail(QTestState *from, bool allow_active, > + bool is_incoming); > > char *find_common_machine_version(const char *mtype, const char *var1, > const char *var2); > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c > index 1d2cee87ea..e00b755f05 100644 > --- a/tests/qtest/migration-test.c > +++ b/tests/qtest/migration-test.c > @@ -1670,7 +1670,7 @@ static void test_baddest(void) > return; > } > migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); > - wait_for_migration_fail(from, false); > + wait_for_migration_fail(from, false, false); > test_migrate_end(from, to, false); > } > > @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args) > > if (args->result != MIG_TEST_SUCCEED) { > bool allow_active = args->result == MIG_TEST_FAIL; > - wait_for_migration_fail(from, allow_active); > + wait_for_migration_fail(from, allow_active, false); > > if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { > - qtest_set_expected_status(to, EXIT_FAILURE); > + wait_for_migration_fail(to, true, true); > } > } else { > if (args->live) { > @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) > migrate_qmp(from, uri, "{}"); > > if (should_fail) { > - qtest_set_expected_status(to, EXIT_FAILURE); > - wait_for_migration_fail(from, true); > + wait_for_migration_fail(to, true, true); > + wait_for_migration_fail(from, true, false); > } else { > wait_for_migration_complete(from); > } > @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void) > migrate_cancel(from); > > /* Make sure QEMU process "to" exited */ > - qtest_set_expected_status(to, EXIT_FAILURE); > - qtest_wait_qemu(to); > + wait_for_migration_fail(to, true, true); > + qtest_quit(to); > > args = (MigrateStart){ > .only_target = true,
On 18.04.24 17:27, Fabiano Rosas wrote: > Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > >> We do set MIGRATION_FAILED state, but don't give a chance to >> orchestrator to query migration state and get the error. >> >> Let's report an error through QAPI like we do on outgoing migration. >> >> migration-test is updated correspondingly. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >> --- >> >> Doubt: is exiting on failure a contract? Will this commit break >> something in Libvirt? Finally, could we just change the logic, or I need >> and additional migration-parameter for new behavior? > > It seems we depend on the non-zero value: > > 4aead69241 ("migration: reflect incoming failure to shell") > Author: Eric Blake <eblake@redhat.com> > Date: Tue Apr 16 15:50:41 2013 -0600 > > migration: reflect incoming failure to shell > > Management apps like libvirt don't know to pay attention to > stderr unless there is a non-zero exit status. > > * migration.c (process_incoming_migration_co): Exit with non-zero > status on failure. > > Signed-off-by: Eric Blake <eblake@redhat.com> > Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com > Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> > > One idea would be to plumb the s->error somehow through > migration_shutdown() and allow qemu_cleanup() to change the status > value. The idea is not to exit at all, and wait for 'quit' QMP command to exit. But I agree with Daniel that new behavior is good only for -incoming defer. > >> migration/migration.c | 22 +++++++--------------- >> tests/qtest/migration-helpers.c | 13 ++++++++++--- >> tests/qtest/migration-helpers.h | 3 ++- >> tests/qtest/migration-test.c | 14 +++++++------- >> 4 files changed, 26 insertions(+), 26 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index 86bf76e925..3c203e767d 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque) >> MigrationIncomingState *mis = migration_incoming_get_current(); >> PostcopyState ps; >> int ret; >> + Error *local_err = NULL; >> >> assert(mis->from_src_file); >> >> if (compress_threads_load_setup(mis->from_src_file)) { >> - error_report("Failed to setup decompress threads"); >> + error_setg(&local_err, "Failed to setup decompress threads"); >> goto fail; >> } >> >> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque) >> } >> >> if (ret < 0) { >> - MigrationState *s = migrate_get_current(); >> - >> - if (migrate_has_error(s)) { >> - WITH_QEMU_LOCK_GUARD(&s->error_mutex) { >> - error_report_err(s->error); >> - } >> - } >> - error_report("load of migration failed: %s", strerror(-ret)); >> + error_setg(&local_err, "load of migration failed: %s", strerror(-ret)); >> goto fail; >> } >> >> if (colo_incoming_co() < 0) { >> + error_setg(&local_err, "colo incoming failed"); >> goto fail; >> } >> >> migration_bh_schedule(process_incoming_migration_bh, mis); >> return; >> fail: >> + migrate_set_error(migrate_get_current(), local_err); >> + error_report_err(local_err); > > This will report an different error from the QMP one if s->error happens > to be already set. Either use s->error here or prepend the "load of > migration..." error to the s->error above. I had another idea: first, modify migrate_set_error so that it always prints the error. This way we should not care to print it here. > >> migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, >> MIGRATION_STATUS_FAILED); >> - qemu_fclose(mis->from_src_file); >> - >> - multifd_recv_cleanup(); >> - compress_threads_load_cleanup(); >> - >> - exit(EXIT_FAILURE); >> + migration_incoming_state_destroy(); >> } >> >> /** >> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c >> index e451dbdbed..91c13bd566 100644 >> --- a/tests/qtest/migration-helpers.c >> +++ b/tests/qtest/migration-helpers.c >> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who) >> wait_for_migration_status(who, "completed", NULL); >> } >> >> -void wait_for_migration_fail(QTestState *from, bool allow_active) >> +void wait_for_migration_fail(QTestState *from, bool allow_active, >> + bool is_incoming) >> { >> g_test_timer_start(); >> QDict *rsp_return; >> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) >> /* Is the machine currently running? */ >> rsp_return = qtest_qmp_assert_success_ref(from, >> "{ 'execute': 'query-status' }"); >> - g_assert(qdict_haskey(rsp_return, "running")); >> - g_assert(qdict_get_bool(rsp_return, "running")); >> + if (is_incoming) { >> + if (qdict_haskey(rsp_return, "running")) { >> + g_assert(!qdict_get_bool(rsp_return, "running")); >> + } >> + } else { >> + g_assert(qdict_haskey(rsp_return, "running")); >> + g_assert(qdict_get_bool(rsp_return, "running")); >> + } >> qobject_unref(rsp_return); >> } >> >> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h >> index 3bf7ded1b9..7bd07059ae 100644 >> --- a/tests/qtest/migration-helpers.h >> +++ b/tests/qtest/migration-helpers.h >> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who, >> >> void wait_for_migration_complete(QTestState *who); >> >> -void wait_for_migration_fail(QTestState *from, bool allow_active); >> +void wait_for_migration_fail(QTestState *from, bool allow_active, >> + bool is_incoming); >> >> char *find_common_machine_version(const char *mtype, const char *var1, >> const char *var2); >> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c >> index 1d2cee87ea..e00b755f05 100644 >> --- a/tests/qtest/migration-test.c >> +++ b/tests/qtest/migration-test.c >> @@ -1670,7 +1670,7 @@ static void test_baddest(void) >> return; >> } >> migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); >> - wait_for_migration_fail(from, false); >> + wait_for_migration_fail(from, false, false); >> test_migrate_end(from, to, false); >> } >> >> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args) >> >> if (args->result != MIG_TEST_SUCCEED) { >> bool allow_active = args->result == MIG_TEST_FAIL; >> - wait_for_migration_fail(from, allow_active); >> + wait_for_migration_fail(from, allow_active, false); >> >> if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { >> - qtest_set_expected_status(to, EXIT_FAILURE); >> + wait_for_migration_fail(to, true, true); >> } >> } else { >> if (args->live) { >> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) >> migrate_qmp(from, uri, "{}"); >> >> if (should_fail) { >> - qtest_set_expected_status(to, EXIT_FAILURE); >> - wait_for_migration_fail(from, true); >> + wait_for_migration_fail(to, true, true); >> + wait_for_migration_fail(from, true, false); >> } else { >> wait_for_migration_complete(from); >> } >> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void) >> migrate_cancel(from); >> >> /* Make sure QEMU process "to" exited */ >> - qtest_set_expected_status(to, EXIT_FAILURE); >> - qtest_wait_qemu(to); >> + wait_for_migration_fail(to, true, true); >> + qtest_quit(to); >> >> args = (MigrateStart){ >> .only_target = true, -- Best regards, Vladimir
© 2016 - 2024 Red Hat, Inc.