From: Ivan Ren <ivanren@tencent.com>
When encounter error, multifd_send_thread should always notify who pay
attention to it before exit. Otherwise it may block migration_thread
at multifd_send_sync_main forever.
Error as follow:
-------------------------------------------------------------------------------
(gdb) bt
#0 0x00007f4d669dfa0b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0
#1 0x00007f4d669dfa9f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0
#2 0x00007f4d669dfb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0
#3 0x0000562ccf0a5614 in qemu_sem_wait (sem=sem@entry=0x562cd1b698e8) at util/qemu-thread-posix.c:319
#4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099
#5 0x0000562ccecb95f4 in ram_save_iterate (f=0x562cd0ecc000, opaque=<optimized out>) at /qemu/migration/ram.c:3550
#6 0x0000562ccef43c23 in qemu_savevm_state_iterate (f=0x562cd0ecc000, postcopy=false) at migration/savevm.c:1189
#7 0x0000562ccef3dcf3 in migration_iteration_run (s=0x562cd09fabf0) at migration/migration.c:3131
#8 migration_thread (opaque=opaque@entry=0x562cd09fabf0) at migration/migration.c:3258
#9 0x0000562ccf0a4c26 in qemu_thread_start (args=<optimized out>) at util/qemu-thread-posix.c:502
#10 0x00007f4d669d9e25 in start_thread () from /lib64/libpthread.so.0
#11 0x00007f4d6670635d in clone () from /lib64/libc.so.6
(gdb) f 4
#4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099
1099 qemu_sem_wait(&p->sem_sync);
(gdb) list
1094 }
1095 for (i = 0; i < migrate_multifd_channels(); i++) {
1096 MultiFDSendParams *p = &multifd_send_state->params[i];
1097
1098 trace_multifd_send_sync_main_wait(p->id);
1099 qemu_sem_wait(&p->sem_sync);
1100 }
1101 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1102 }
1103
(gdb) p i
$1 = 0
(gdb) p multifd_send_state->params[0].pending_job
$2 = 2 //It means the job before MULTIFD_FLAG_SYNC has already fail
(gdb) p multifd_send_state->params[0].quit
$3 = true
Signed-off-by: Ivan Ren <ivanren@tencent.com>
---
migration/ram.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index b01a37e7ca..0047286b7e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1112,6 +1112,7 @@ static void *multifd_send_thread(void *opaque)
rcu_register_thread();
if (multifd_send_initial_packet(p, &local_err) < 0) {
+ ret = -1;
goto out;
}
/* initial packet */
@@ -1179,9 +1180,7 @@ out:
* who pay attention to me.
*/
if (ret != 0) {
- if (flags & MULTIFD_FLAG_SYNC) {
- qemu_sem_post(&p->sem_sync);
- }
+ qemu_sem_post(&p->sem_sync);
qemu_sem_post(&multifd_send_state->channels_ready);
}
--
2.17.2 (Apple Git-113)
Ivan Ren <renyime@gmail.com> wrote: > From: Ivan Ren <ivanren@tencent.com> > > When encounter error, multifd_send_thread should always notify who pay > attention to it before exit. Otherwise it may block migration_thread > at multifd_send_sync_main forever. > > Error as follow: > ------------------------------------------------------------------------------- > (gdb) bt > #0 0x00007f4d669dfa0b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0 > #1 0x00007f4d669dfa9f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0 > #2 0x00007f4d669dfb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0 > #3 0x0000562ccf0a5614 in qemu_sem_wait (sem=sem@entry=0x562cd1b698e8) at util/qemu-thread-posix.c:319 > #4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099 > #5 0x0000562ccecb95f4 in ram_save_iterate (f=0x562cd0ecc000, opaque=<optimized out>) at /qemu/migration/ram.c:3550 > #6 0x0000562ccef43c23 in qemu_savevm_state_iterate (f=0x562cd0ecc000, postcopy=false) at migration/savevm.c:1189 > #7 0x0000562ccef3dcf3 in migration_iteration_run (s=0x562cd09fabf0) at migration/migration.c:3131 > #8 migration_thread (opaque=opaque@entry=0x562cd09fabf0) at migration/migration.c:3258 > #9 0x0000562ccf0a4c26 in qemu_thread_start (args=<optimized out>) at util/qemu-thread-posix.c:502 > #10 0x00007f4d669d9e25 in start_thread () from /lib64/libpthread.so.0 > #11 0x00007f4d6670635d in clone () from /lib64/libc.so.6 > (gdb) f 4 > #4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099 > 1099 qemu_sem_wait(&p->sem_sync); > (gdb) list > 1094 } > 1095 for (i = 0; i < migrate_multifd_channels(); i++) { > 1096 MultiFDSendParams *p = &multifd_send_state->params[i]; > 1097 > 1098 trace_multifd_send_sync_main_wait(p->id); > 1099 qemu_sem_wait(&p->sem_sync); > 1100 } > 1101 trace_multifd_send_sync_main(multifd_send_state->packet_num); > 1102 } > 1103 > (gdb) p i > $1 = 0 > (gdb) p multifd_send_state->params[0].pending_job > $2 = 2 //It means the job before MULTIFD_FLAG_SYNC has already fail > (gdb) p multifd_send_state->params[0].quit > $3 = true > > Signed-off-by: Ivan Ren <ivanren@tencent.com> Reviewed-by: Juan Quintela <quintela@redhat.com>
Queued * Ivan Ren (renyime@gmail.com) wrote: > From: Ivan Ren <ivanren@tencent.com> > > When encounter error, multifd_send_thread should always notify who pay > attention to it before exit. Otherwise it may block migration_thread > at multifd_send_sync_main forever. > > Error as follow: > ------------------------------------------------------------------------------- > (gdb) bt > #0 0x00007f4d669dfa0b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0 > #1 0x00007f4d669dfa9f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0 > #2 0x00007f4d669dfb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0 > #3 0x0000562ccf0a5614 in qemu_sem_wait (sem=sem@entry=0x562cd1b698e8) at util/qemu-thread-posix.c:319 > #4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099 > #5 0x0000562ccecb95f4 in ram_save_iterate (f=0x562cd0ecc000, opaque=<optimized out>) at /qemu/migration/ram.c:3550 > #6 0x0000562ccef43c23 in qemu_savevm_state_iterate (f=0x562cd0ecc000, postcopy=false) at migration/savevm.c:1189 > #7 0x0000562ccef3dcf3 in migration_iteration_run (s=0x562cd09fabf0) at migration/migration.c:3131 > #8 migration_thread (opaque=opaque@entry=0x562cd09fabf0) at migration/migration.c:3258 > #9 0x0000562ccf0a4c26 in qemu_thread_start (args=<optimized out>) at util/qemu-thread-posix.c:502 > #10 0x00007f4d669d9e25 in start_thread () from /lib64/libpthread.so.0 > #11 0x00007f4d6670635d in clone () from /lib64/libc.so.6 > (gdb) f 4 > #4 0x0000562ccecb4752 in multifd_send_sync_main (rs=<optimized out>) at /qemu/migration/ram.c:1099 > 1099 qemu_sem_wait(&p->sem_sync); > (gdb) list > 1094 } > 1095 for (i = 0; i < migrate_multifd_channels(); i++) { > 1096 MultiFDSendParams *p = &multifd_send_state->params[i]; > 1097 > 1098 trace_multifd_send_sync_main_wait(p->id); > 1099 qemu_sem_wait(&p->sem_sync); > 1100 } > 1101 trace_multifd_send_sync_main(multifd_send_state->packet_num); > 1102 } > 1103 > (gdb) p i > $1 = 0 > (gdb) p multifd_send_state->params[0].pending_job > $2 = 2 //It means the job before MULTIFD_FLAG_SYNC has already fail > (gdb) p multifd_send_state->params[0].quit > $3 = true > > Signed-off-by: Ivan Ren <ivanren@tencent.com> > --- > migration/ram.c | 5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index b01a37e7ca..0047286b7e 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1112,6 +1112,7 @@ static void *multifd_send_thread(void *opaque) > rcu_register_thread(); > > if (multifd_send_initial_packet(p, &local_err) < 0) { > + ret = -1; > goto out; > } > /* initial packet */ > @@ -1179,9 +1180,7 @@ out: > * who pay attention to me. > */ > if (ret != 0) { > - if (flags & MULTIFD_FLAG_SYNC) { > - qemu_sem_post(&p->sem_sync); > - } > + qemu_sem_post(&p->sem_sync); > qemu_sem_post(&multifd_send_state->channels_ready); > } > > -- > 2.17.2 (Apple Git-113) > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
© 2016 - 2024 Red Hat, Inc.