Migration object can be freed (e.g. after migration_shutdown()) before some
other device codes can still run, while we do have a bunch of migration
helpers exported in migration/misc.h that logically can be invoked at any
time of QEMU, even during destruction of a VM.
Make all these functions safe to be called, especially, not crashing after
the migration object is unreferenced from the main context.
To achieve it, only reference global_migration variable in the exported
functions. The variable is only reset with BQL, so it's safe to access.
Add a comment in the header explaining how to guarantee thread safe on
using these functions, and we choose BQL because fundamentally that's how
it's working now. We can move to other things (e.g. RCU) whenever
necessary in the future but it's an overkill if we have BQL anyway in
most/all existing callers.
When at it, update some comments, e.g. migrate_announce_params() is
exported from options.c now.
Cc: Cédric Le Goater <clg@redhat.com>
Cc: Avihai Horon <avihaih@nvidia.com>
Cc: Fabiano Rosas <farosas@suse.de>
Cc: Dr. David Alan Gilbert <dave@treblig.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/migration/misc.h | 26 +++++++++++++++++++++-----
migration/migration.c | 32 ++++++++++++++++++++++++++------
2 files changed, 47 insertions(+), 11 deletions(-)
diff --git a/include/migration/misc.h b/include/migration/misc.h
index df57be6b5e..48892f9672 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -19,8 +19,19 @@
#include "qapi/qapi-types-net.h"
#include "migration/client-options.h"
-/* migration/ram.c */
+/*
+ * Misc migration functions exported to be used in QEMU generic system
+ * code outside migration/.
+ *
+ * By default, BQL is recommended before using below functions to avoid
+ * race conditions (concurrent updates to global_migration variable). It's
+ * caller's responsibility to make sure it's thread safe otherwise when
+ * below helpers are used without BQL held. When unsure, take BQL always.
+ */
+/*
+ * migration/ram.c
+ */
typedef enum PrecopyNotifyReason {
PRECOPY_NOTIFY_SETUP = 0,
PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC = 1,
@@ -43,14 +54,19 @@ void ram_mig_init(void);
void qemu_guest_free_page_hint(void *addr, size_t len);
bool migrate_ram_is_ignored(RAMBlock *block);
-/* migration/block.c */
-
+/*
+ * migration/options.c
+ */
AnnounceParameters *migrate_announce_params(void);
-/* migration/savevm.c */
+/*
+ * migration/savevm.c
+ */
void dump_vmstate_json_to_file(FILE *out_fp);
-/* migration/migration.c */
+/*
+ * migration/migration.c
+ */
void migration_object_init(void);
void migration_shutdown(void);
bool migration_is_idle(void);
diff --git a/migration/migration.c b/migration/migration.c
index c4adad7972..667816cc65 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1157,7 +1157,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
*/
bool migration_is_setup_or_active(void)
{
- MigrationState *s = current_migration;
+ MigrationState *s = global_migration;
+
+ if (!s) {
+ return false;
+ }
switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
@@ -1174,7 +1178,6 @@ bool migration_is_setup_or_active(void)
default:
return false;
-
}
}
@@ -1721,7 +1724,11 @@ bool migration_is_idle(void)
bool migration_is_active(void)
{
- MigrationState *s = current_migration;
+ MigrationState *s = global_migration;
+
+ if (!s) {
+ return false;
+ }
return (s->state == MIGRATION_STATUS_ACTIVE ||
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -1729,14 +1736,23 @@ bool migration_is_active(void)
bool migration_is_device(void)
{
- MigrationState *s = current_migration;
+ MigrationState *s = global_migration;
+
+ if (!s) {
+ return false;
+ }
return s->state == MIGRATION_STATUS_DEVICE;
}
bool migration_thread_is_self(void)
{
- MigrationState *s = current_migration;
+ MigrationState *s = global_migration;
+
+ /* If no migration object, must not be the migration thread */
+ if (!s) {
+ return false;
+ }
return qemu_thread_is_self(&s->thread);
}
@@ -3113,7 +3129,11 @@ static MigThrError postcopy_pause(MigrationState *s)
void migration_file_set_error(int ret, Error *err)
{
- MigrationState *s = current_migration;
+ MigrationState *s = global_migration;
+
+ if (!s) {
+ return;
+ }
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
if (s->to_dst_file) {
--
2.45.0
On Wed, Oct 23, 2024, 2:02 p.m. Peter Xu <peterx@redhat.com> wrote: > Migration object can be freed (e.g. after migration_shutdown()) before some > other device codes can still run, while we do have a bunch of migration > helpers exported in migration/misc.h that logically can be invoked at any > time of QEMU, even during destruction of a VM. > > Make all these functions safe to be called, especially, not crashing after > the migration object is unreferenced from the main context. > > To achieve it, only reference global_migration variable in the exported > functions. The variable is only reset with BQL, so it's safe to access. > > Add a comment in the header explaining how to guarantee thread safe on > using these functions, and we choose BQL because fundamentally that's how > it's working now. We can move to other things (e.g. RCU) whenever > necessary in the future but it's an overkill if we have BQL anyway in > most/all existing callers. > > When at it, update some comments, e.g. migrate_announce_params() is > exported from options.c now. > Err I wrongly fetched the list email then lost my local English fix with s/when/while/. I'll make sure it's fixed ultimately when repost. > Cc: Cédric Le Goater <clg@redhat.com> > Cc: Avihai Horon <avihaih@nvidia.com> > Cc: Fabiano Rosas <farosas@suse.de> > Cc: Dr. David Alan Gilbert <dave@treblig.org> > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > include/migration/misc.h | 26 +++++++++++++++++++++----- > migration/migration.c | 32 ++++++++++++++++++++++++++------ > 2 files changed, 47 insertions(+), 11 deletions(-) > > diff --git a/include/migration/misc.h b/include/migration/misc.h > index df57be6b5e..48892f9672 100644 > --- a/include/migration/misc.h > +++ b/include/migration/misc.h > @@ -19,8 +19,19 @@ > #include "qapi/qapi-types-net.h" > #include "migration/client-options.h" > > -/* migration/ram.c */ > +/* > + * Misc migration functions exported to be used in QEMU generic system > + * code outside migration/. > + * > + * By default, BQL is recommended before using below functions to avoid > + * race conditions (concurrent updates to global_migration variable). > It's > + * caller's responsibility to make sure it's thread safe otherwise when > + * below helpers are used without BQL held. When unsure, take BQL always. > + */ > > +/* > + * migration/ram.c > + */ > typedef enum PrecopyNotifyReason { > PRECOPY_NOTIFY_SETUP = 0, > PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC = 1, > @@ -43,14 +54,19 @@ void ram_mig_init(void); > void qemu_guest_free_page_hint(void *addr, size_t len); > bool migrate_ram_is_ignored(RAMBlock *block); > > -/* migration/block.c */ > - > +/* > + * migration/options.c > + */ > AnnounceParameters *migrate_announce_params(void); > -/* migration/savevm.c */ > > +/* > + * migration/savevm.c > + */ > void dump_vmstate_json_to_file(FILE *out_fp); > > -/* migration/migration.c */ > +/* > + * migration/migration.c > + */ > void migration_object_init(void); > void migration_shutdown(void); > bool migration_is_idle(void); > diff --git a/migration/migration.c b/migration/migration.c > index c4adad7972..667816cc65 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1157,7 +1157,11 @@ void > migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) > */ > bool migration_is_setup_or_active(void) > { > - MigrationState *s = current_migration; > + MigrationState *s = global_migration; > + > + if (!s) { > + return false; > + } > > switch (s->state) { > case MIGRATION_STATUS_ACTIVE: > @@ -1174,7 +1178,6 @@ bool migration_is_setup_or_active(void) > > default: > return false; > - > } > } > > @@ -1721,7 +1724,11 @@ bool migration_is_idle(void) > > bool migration_is_active(void) > { > - MigrationState *s = current_migration; > + MigrationState *s = global_migration; > + > + if (!s) { > + return false; > + } > > return (s->state == MIGRATION_STATUS_ACTIVE || > s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); > @@ -1729,14 +1736,23 @@ bool migration_is_active(void) > > bool migration_is_device(void) > { > - MigrationState *s = current_migration; > + MigrationState *s = global_migration; > + > + if (!s) { > + return false; > + } > > return s->state == MIGRATION_STATUS_DEVICE; > } > > bool migration_thread_is_self(void) > { > - MigrationState *s = current_migration; > + MigrationState *s = global_migration; > + > + /* If no migration object, must not be the migration thread */ > + if (!s) { > + return false; > + } > > return qemu_thread_is_self(&s->thread); > } > @@ -3113,7 +3129,11 @@ static MigThrError postcopy_pause(MigrationState *s) > > void migration_file_set_error(int ret, Error *err) > { > - MigrationState *s = current_migration; > + MigrationState *s = global_migration; > + > + if (!s) { > + return; > + } > > WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { > if (s->to_dst_file) { > -- > 2.45.0 > >
© 2016 - 2024 Red Hat, Inc.