:p
atchew
Login
The following changes since commit 2b81c046252fbfb375ad30632362fc16e6e22bd5: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2024-09-17 14:02:18 +0100) are available in the Git repository at: https://gitlab.com/peterx/qemu.git tags/migration-20240917-pull-request for you to fetch changes up to 7b8b4c0e59d2b7928836072536a5528820d8a041: migration/multifd: Fix rb->receivedmap cleanup race (2024-09-17 17:50:45 -0400) ---------------------------------------------------------------- Migration pull for 9.2 - Fabiano's patch to move two tests to slow tests. - Peter's patch to fix qatzip builds - Stefan's multifd-zstd fix on unsigned diff comparisons - Fea's bug fix to consistently use memattrs when map() address space - Fabiano's bug fix on multifd race condition against receivedmap ---------------------------------------------------------------- Fabiano Rosas (3): tests/qtest/migration: Move a couple of slow tests under g_test_slow migration/savevm: Remove extra load cleanup calls migration/multifd: Fix rb->receivedmap cleanup race Fea.Wang (1): softmmu/physmem.c: Keep transaction attribute in address_space_map() Peter Xu (1): migration/multifd: Fix build for qatzip Stefan Weil (1): migration/multifd: Fix loop conditions in multifd_zstd_send_prepare and multifd_zstd_recv migration/migration.c | 5 +++++ migration/multifd-qatzip.c | 18 +++++++++--------- migration/multifd-zstd.c | 8 ++++---- migration/savevm.c | 8 ++++---- system/physmem.c | 2 +- tests/qtest/migration-test.c | 8 +++++--- 6 files changed, 28 insertions(+), 21 deletions(-) -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> The xbzrel and vcpu_dirty_limit are the two slowest tests from migration-test. Move them under g_test_slow() to save about 40s per run. Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240911145204.17692-1-farosas@suse.de Signed-off-by: Peter Xu <peterx@redhat.com> --- tests/qtest/migration-test.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) migration_test_add("/migration/precopy/unix/plain", test_precopy_unix_plain); - migration_test_add("/migration/precopy/unix/xbzrle", - test_precopy_unix_xbzrle); + if (g_test_slow()) { + migration_test_add("/migration/precopy/unix/xbzrle", + test_precopy_unix_xbzrle); + } migration_test_add("/migration/precopy/file", test_precopy_file); migration_test_add("/migration/precopy/file/offset", @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) if (g_str_equal(arch, "x86_64") && has_kvm && kvm_dirty_ring_supported()) { migration_test_add("/migration/dirty_ring", test_precopy_unix_dirty_ring); - if (qtest_has_machine("pc")) { + if (qtest_has_machine("pc") && g_test_slow()) { migration_test_add("/migration/vcpu_dirty_limit", test_vcpu_dirty_limit); } -- 2.45.0
The qatzip series was based on an older commit, it applied cleanly even though it has conflicts. Neither CI nor myself found the build will break as it's skipped by default when qatzip library was missing. Fix the build issues. No need to copy stable as it just landed 9.2. Cc: Yichen Wang <yichen.wang@bytedance.com> Cc: Bryan Zhang <bryan.zhang@bytedance.com> Cc: Hao Xiang <hao.xiang@linux.dev> Cc: Yuan Liu <yuan1.liu@intel.com> Fixes: 80484f9459 ("migration: Introduce 'qatzip' compression method") Link: https://lore.kernel.org/r/20240910210450.3835123-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/multifd-qatzip.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd-qatzip.c +++ b/migration/multifd-qatzip.c @@ -XXX,XX +XXX,XX @@ static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) */ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) { - MultiFDPages_t *pages = p->pages; + uint32_t page_size = multifd_ram_page_size(); + MultiFDPages_t *pages = &p->data->u.ram; QatzipData *q = p->compress_data; int ret; unsigned int in_len, out_len; @@ -XXX,XX +XXX,XX @@ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) * implementation. */ for (int i = 0; i < pages->normal_num; i++) { - memcpy(q->in_buf + (i * p->page_size), + memcpy(q->in_buf + (i * page_size), pages->block->host + pages->offset[i], - p->page_size); + page_size); } - in_len = pages->normal_num * p->page_size; + in_len = pages->normal_num * page_size; if (in_len > q->in_len) { error_setg(errp, "multifd %u: unexpectedly large input", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) p->id, ret); return -1; } - if (in_len != pages->normal_num * p->page_size) { + if (in_len != pages->normal_num * page_size) { error_setg(errp, "multifd %u: QATzip failed to compress all input", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) int ret; unsigned int in_len, out_len; uint32_t in_size = p->next_packet_size; - uint32_t expected_size = p->normal_num * p->page_size; + uint32_t page_size = multifd_ram_page_size(); + uint32_t expected_size = p->normal_num * page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; if (in_size > q->in_len) { @@ -XXX,XX +XXX,XX @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) /* Copy each page to its appropriate location. */ for (int i = 0; i < p->normal_num; i++) { - memcpy(p->host + p->normal[i], - q->out_buf + p->page_size * i, - p->page_size); + memcpy(p->host + p->normal[i], q->out_buf + page_size * i, page_size); } return 0; } -- 2.45.0
From: Stefan Weil via <qemu-devel@nongnu.org> GitHub's CodeQL reports four critical errors which are fixed by this commit: Unsigned difference expression compared to zero An expression (u - v > 0) with unsigned values u, v is only false if u == v, so all changed expressions did not work as expected. Signed-off-by: Stefan Weil <sw@weilnetz.de> Link: https://lore.kernel.org/r/20240910054138.1458555-1-sw@weilnetz.de Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/multifd-zstd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -XXX,XX +XXX,XX @@ static int multifd_zstd_send_prepare(MultiFDSendParams *p, Error **errp) */ do { ret = ZSTD_compressStream2(z->zcs, &z->out, &z->in, flush); - } while (ret > 0 && (z->in.size - z->in.pos > 0) - && (z->out.size - z->out.pos > 0)); - if (ret > 0 && (z->in.size - z->in.pos > 0)) { + } while (ret > 0 && (z->in.size > z->in.pos) + && (z->out.size > z->out.pos)); + if (ret > 0 && (z->in.size > z->in.pos)) { error_setg(errp, "multifd %u: compressStream buffer too small", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int multifd_zstd_recv(MultiFDRecvParams *p, Error **errp) */ do { ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); - } while (ret > 0 && (z->in.size - z->in.pos > 0) + } while (ret > 0 && (z->in.size > z->in.pos) && (z->out.pos < page_size)); if (ret > 0 && (z->out.pos < page_size)) { error_setg(errp, "multifd %u: decompressStream buffer too small", -- 2.45.0
From: "Fea.Wang" <fea.wang@sifive.com> The follow-up transactions may use the data in the attribution, so keep the value of attribution from the function parameter just as flatview_translate() above. Signed-off-by: Fea.Wang <fea.wang@sifive.com> Cc: qemu-stable@nongnu.org Fixes: f26404fbee ("Make address_space_map() take a MemTxAttrs argument") Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Link: https://lore.kernel.org/r/20240912070404.2993976-2-fea.wang@sifive.com Signed-off-by: Peter Xu <peterx@redhat.com> --- system/physmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/physmem.c b/system/physmem.c index XXXXXXX..XXXXXXX 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -XXX,XX +XXX,XX @@ void *address_space_map(AddressSpace *as, bounce->len = l; if (!is_write) { - flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, + flatview_read(fv, addr, attrs, bounce->buffer, l); } -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> There are two qemu_loadvm_state_cleanup() calls that were introduced when qemu_loadvm_state_setup() was still called before loading the configuration section, so there was state to be cleaned up if the header checks failed. However, commit 9e14b84908 ("migration/savevm: load_header before load_setup") has moved that configuration section part to qemu_loadvm_state_header() which now happens before qemu_loadvm_state_setup(). Remove the cleanup calls that are now misplaced. Note that we didn't use Fixes because it's benign to cleanup() even if setup() is not invoked. So this patch is not needed for stable, as it falls into cleanup category. Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240917185802.15619-2-farosas@suse.de [peterx: added last paragraph of commit message] Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/savevm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index XXXXXXX..XXXXXXX 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -XXX,XX +XXX,XX @@ static int qemu_loadvm_state_header(QEMUFile *f) if (migrate_get_current()->send_configuration) { if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { error_report("Configuration section missing"); - qemu_loadvm_state_cleanup(); return -EINVAL; } ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); if (ret) { - qemu_loadvm_state_cleanup(); return ret; } } -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> Fix a segmentation fault in multifd when rb->receivedmap is cleared too early. After commit 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults"), multifd started using the rb->receivedmap bitmap, which belongs to ram.c and is initialized and *freed* from the ram SaveVMHandlers. Multifd threads are live until migration_incoming_state_destroy(), which is called after qemu_loadvm_state_cleanup(), leading to a crash when accessing rb->receivedmap. process_incoming_migration_co() ... qemu_loadvm_state() multifd_nocomp_recv() qemu_loadvm_state_cleanup() ramblock_recv_bitmap_set_offset() rb->receivedmap = NULL set_bit_atomic(..., rb->receivedmap) ... migration_incoming_state_destroy() multifd_recv_cleanup() multifd_recv_terminate_threads(NULL) Move the loadvm cleanup into migration_incoming_state_destroy(), after multifd_recv_cleanup() to ensure multifd threads have already exited when rb->receivedmap is cleared. Adjust the postcopy listen thread comment to indicate that we still want to skip the cpu synchronization. CC: qemu-stable@nongnu.org Fixes: 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults") Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240917185802.15619-3-farosas@suse.de [peterx: added comment in migration_incoming_state_destroy()] Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/migration.c | 5 +++++ migration/savevm.c | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index XXXXXXX..XXXXXXX 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void) struct MigrationIncomingState *mis = migration_incoming_get_current(); multifd_recv_cleanup(); + /* + * RAM state cleanup needs to happen after multifd cleanup, because + * multifd threads can use some of its states (receivedmap). + */ + qemu_loadvm_state_cleanup(); if (mis->to_src_file) { /* Tell source that we are done */ diff --git a/migration/savevm.c b/migration/savevm.c index XXXXXXX..XXXXXXX 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) trace_qemu_loadvm_state_post_main(ret); if (mis->have_listen_thread) { - /* Listen thread still going, can't clean up yet */ + /* + * Postcopy listen thread still going, don't synchronize the + * cpus yet. + */ return ret; } @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) } } - qemu_loadvm_state_cleanup(); cpu_synchronize_all_post_init(); return ret; -- 2.45.0
The following changes since commit 2b81c046252fbfb375ad30632362fc16e6e22bd5: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2024-09-17 14:02:18 +0100) are available in the Git repository at: https://gitlab.com/peterx/qemu.git tags/migration-20240917-pull-request for you to fetch changes up to 4ce56229087860805877075ddb29dd44578365a9: migration/multifd: Fix rb->receivedmap cleanup race (2024-09-18 14:27:39 -0400) ---------------------------------------------------------------- Migration pull request for 9.2 - Fabiano's patch to move two tests to slow tests. - Peter's patch to fix qatzip builds - Stefan's multifd-zstd fix on unsigned diff comparisons - Fea's bug fix to consistently use memattrs when map() address space - Fabiano's bug fix on multifd race condition against receivedmap ---------------------------------------------------------------- Fabiano Rosas (3): tests/qtest/migration: Move a couple of slow tests under g_test_slow migration/savevm: Remove extra load cleanup calls migration/multifd: Fix rb->receivedmap cleanup race Fea.Wang (1): softmmu/physmem.c: Keep transaction attribute in address_space_map() Peter Xu (1): migration/multifd: Fix build for qatzip Stefan Weil (1): migration/multifd: Fix loop conditions in multifd_zstd_send_prepare and multifd_zstd_recv migration/migration.c | 5 +++++ migration/multifd-qatzip.c | 18 +++++++++--------- migration/multifd-zstd.c | 8 ++++---- migration/savevm.c | 8 ++++---- system/physmem.c | 2 +- tests/qtest/migration-test.c | 8 +++++--- 6 files changed, 28 insertions(+), 21 deletions(-) -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> The xbzrel and vcpu_dirty_limit are the two slowest tests from migration-test. Move them under g_test_slow() to save about 40s per run. Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240911145204.17692-1-farosas@suse.de Signed-off-by: Peter Xu <peterx@redhat.com> --- tests/qtest/migration-test.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) migration_test_add("/migration/precopy/unix/plain", test_precopy_unix_plain); - migration_test_add("/migration/precopy/unix/xbzrle", - test_precopy_unix_xbzrle); + if (g_test_slow()) { + migration_test_add("/migration/precopy/unix/xbzrle", + test_precopy_unix_xbzrle); + } migration_test_add("/migration/precopy/file", test_precopy_file); migration_test_add("/migration/precopy/file/offset", @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) if (g_str_equal(arch, "x86_64") && has_kvm && kvm_dirty_ring_supported()) { migration_test_add("/migration/dirty_ring", test_precopy_unix_dirty_ring); - if (qtest_has_machine("pc")) { + if (qtest_has_machine("pc") && g_test_slow()) { migration_test_add("/migration/vcpu_dirty_limit", test_vcpu_dirty_limit); } -- 2.45.0
The qatzip series was based on an older commit, it applied cleanly even though it has conflicts. Neither CI nor myself found the build will break as it's skipped by default when qatzip library was missing. Fix the build issues. No need to copy stable as it just landed 9.2. Cc: Yichen Wang <yichen.wang@bytedance.com> Cc: Bryan Zhang <bryan.zhang@bytedance.com> Cc: Hao Xiang <hao.xiang@linux.dev> Cc: Yuan Liu <yuan1.liu@intel.com> Fixes: 80484f9459 ("migration: Introduce 'qatzip' compression method") Link: https://lore.kernel.org/r/20240910210450.3835123-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/multifd-qatzip.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd-qatzip.c +++ b/migration/multifd-qatzip.c @@ -XXX,XX +XXX,XX @@ static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) */ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) { - MultiFDPages_t *pages = p->pages; + uint32_t page_size = multifd_ram_page_size(); + MultiFDPages_t *pages = &p->data->u.ram; QatzipData *q = p->compress_data; int ret; unsigned int in_len, out_len; @@ -XXX,XX +XXX,XX @@ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) * implementation. */ for (int i = 0; i < pages->normal_num; i++) { - memcpy(q->in_buf + (i * p->page_size), + memcpy(q->in_buf + (i * page_size), pages->block->host + pages->offset[i], - p->page_size); + page_size); } - in_len = pages->normal_num * p->page_size; + in_len = pages->normal_num * page_size; if (in_len > q->in_len) { error_setg(errp, "multifd %u: unexpectedly large input", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) p->id, ret); return -1; } - if (in_len != pages->normal_num * p->page_size) { + if (in_len != pages->normal_num * page_size) { error_setg(errp, "multifd %u: QATzip failed to compress all input", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) int ret; unsigned int in_len, out_len; uint32_t in_size = p->next_packet_size; - uint32_t expected_size = p->normal_num * p->page_size; + uint32_t page_size = multifd_ram_page_size(); + uint32_t expected_size = p->normal_num * page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; if (in_size > q->in_len) { @@ -XXX,XX +XXX,XX @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) /* Copy each page to its appropriate location. */ for (int i = 0; i < p->normal_num; i++) { - memcpy(p->host + p->normal[i], - q->out_buf + p->page_size * i, - p->page_size); + memcpy(p->host + p->normal[i], q->out_buf + page_size * i, page_size); } return 0; } -- 2.45.0
From: Stefan Weil <sw@weilnetz.de> GitHub's CodeQL reports four critical errors which are fixed by this commit: Unsigned difference expression compared to zero An expression (u - v > 0) with unsigned values u, v is only false if u == v, so all changed expressions did not work as expected. Signed-off-by: Stefan Weil <sw@weilnetz.de> Link: https://lore.kernel.org/r/20240910054138.1458555-1-sw@weilnetz.de [peterx: Fix mangled email for author] Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/multifd-zstd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index XXXXXXX..XXXXXXX 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -XXX,XX +XXX,XX @@ static int multifd_zstd_send_prepare(MultiFDSendParams *p, Error **errp) */ do { ret = ZSTD_compressStream2(z->zcs, &z->out, &z->in, flush); - } while (ret > 0 && (z->in.size - z->in.pos > 0) - && (z->out.size - z->out.pos > 0)); - if (ret > 0 && (z->in.size - z->in.pos > 0)) { + } while (ret > 0 && (z->in.size > z->in.pos) + && (z->out.size > z->out.pos)); + if (ret > 0 && (z->in.size > z->in.pos)) { error_setg(errp, "multifd %u: compressStream buffer too small", p->id); return -1; @@ -XXX,XX +XXX,XX @@ static int multifd_zstd_recv(MultiFDRecvParams *p, Error **errp) */ do { ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); - } while (ret > 0 && (z->in.size - z->in.pos > 0) + } while (ret > 0 && (z->in.size > z->in.pos) && (z->out.pos < page_size)); if (ret > 0 && (z->out.pos < page_size)) { error_setg(errp, "multifd %u: decompressStream buffer too small", -- 2.45.0
From: "Fea.Wang" <fea.wang@sifive.com> The follow-up transactions may use the data in the attribution, so keep the value of attribution from the function parameter just as flatview_translate() above. Signed-off-by: Fea.Wang <fea.wang@sifive.com> Cc: qemu-stable@nongnu.org Fixes: f26404fbee ("Make address_space_map() take a MemTxAttrs argument") Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Link: https://lore.kernel.org/r/20240912070404.2993976-2-fea.wang@sifive.com Signed-off-by: Peter Xu <peterx@redhat.com> --- system/physmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/physmem.c b/system/physmem.c index XXXXXXX..XXXXXXX 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -XXX,XX +XXX,XX @@ void *address_space_map(AddressSpace *as, bounce->len = l; if (!is_write) { - flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, + flatview_read(fv, addr, attrs, bounce->buffer, l); } -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> There are two qemu_loadvm_state_cleanup() calls that were introduced when qemu_loadvm_state_setup() was still called before loading the configuration section, so there was state to be cleaned up if the header checks failed. However, commit 9e14b84908 ("migration/savevm: load_header before load_setup") has moved that configuration section part to qemu_loadvm_state_header() which now happens before qemu_loadvm_state_setup(). Remove the cleanup calls that are now misplaced. Note that we didn't use Fixes because it's benign to cleanup() even if setup() is not invoked. So this patch is not needed for stable, as it falls into cleanup category. Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240917185802.15619-2-farosas@suse.de [peterx: added last paragraph of commit message] Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/savevm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index XXXXXXX..XXXXXXX 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -XXX,XX +XXX,XX @@ static int qemu_loadvm_state_header(QEMUFile *f) if (migrate_get_current()->send_configuration) { if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { error_report("Configuration section missing"); - qemu_loadvm_state_cleanup(); return -EINVAL; } ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); if (ret) { - qemu_loadvm_state_cleanup(); return ret; } } -- 2.45.0
From: Fabiano Rosas <farosas@suse.de> Fix a segmentation fault in multifd when rb->receivedmap is cleared too early. After commit 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults"), multifd started using the rb->receivedmap bitmap, which belongs to ram.c and is initialized and *freed* from the ram SaveVMHandlers. Multifd threads are live until migration_incoming_state_destroy(), which is called after qemu_loadvm_state_cleanup(), leading to a crash when accessing rb->receivedmap. process_incoming_migration_co() ... qemu_loadvm_state() multifd_nocomp_recv() qemu_loadvm_state_cleanup() ramblock_recv_bitmap_set_offset() rb->receivedmap = NULL set_bit_atomic(..., rb->receivedmap) ... migration_incoming_state_destroy() multifd_recv_cleanup() multifd_recv_terminate_threads(NULL) Move the loadvm cleanup into migration_incoming_state_destroy(), after multifd_recv_cleanup() to ensure multifd threads have already exited when rb->receivedmap is cleared. Adjust the postcopy listen thread comment to indicate that we still want to skip the cpu synchronization. CC: qemu-stable@nongnu.org Fixes: 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults") Signed-off-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240917185802.15619-3-farosas@suse.de [peterx: added comment in migration_incoming_state_destroy()] Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/migration.c | 5 +++++ migration/savevm.c | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index XXXXXXX..XXXXXXX 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void) struct MigrationIncomingState *mis = migration_incoming_get_current(); multifd_recv_cleanup(); + /* + * RAM state cleanup needs to happen after multifd cleanup, because + * multifd threads can use some of its states (receivedmap). + */ + qemu_loadvm_state_cleanup(); if (mis->to_src_file) { /* Tell source that we are done */ diff --git a/migration/savevm.c b/migration/savevm.c index XXXXXXX..XXXXXXX 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) trace_qemu_loadvm_state_post_main(ret); if (mis->have_listen_thread) { - /* Listen thread still going, can't clean up yet */ + /* + * Postcopy listen thread still going, don't synchronize the + * cpus yet. + */ return ret; } @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) } } - qemu_loadvm_state_cleanup(); cpu_synchronize_all_post_init(); return ret; -- 2.45.0