Now after threadified dest VM load during precopy, we will always in a
thread context rather than within a coroutine. We can remove this path
now.
With that, migration_started_on_destination can go away too.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/rdma.c | 102 +++++++++++++++++++----------------------------
1 file changed, 41 insertions(+), 61 deletions(-)
diff --git a/migration/rdma.c b/migration/rdma.c
index 2b995513aa..7751262460 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -29,7 +29,6 @@
#include "qemu/rcu.h"
#include "qemu/sockets.h"
#include "qemu/bitmap.h"
-#include "qemu/coroutine.h"
#include "system/memory.h"
#include <sys/socket.h>
#include <netdb.h>
@@ -357,13 +356,6 @@ typedef struct RDMAContext {
/* Index of the next RAMBlock received during block registration */
unsigned int next_src_index;
- /*
- * Migration on *destination* started.
- * Then use coroutine yield function.
- * Source runs in a thread, so we don't care.
- */
- int migration_started_on_destination;
-
int total_registrations;
int total_writes;
@@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
struct rdma_cm_event *cm_event;
/*
- * Coroutine doesn't start until migration_fd_process_incoming()
- * so don't yield unless we know we're running inside of a coroutine.
+ * This is the source or dest side, either during precopy or
+ * postcopy. We're always in a separate thread when reaching here.
+ * Poll the fd. We need to be able to handle 'cancel' or an error
+ * without hanging forever.
*/
- if (rdma->migration_started_on_destination &&
- migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
- qemu_in_coroutine()) {
- yield_until_fd_readable(comp_channel->fd);
- } else {
- /* This is the source side, we're in a separate thread
- * or destination prior to migration_fd_process_incoming()
- * after postcopy, the destination also in a separate thread.
- * we can't yield; so we have to poll the fd.
- * But we need to be able to handle 'cancel' or an error
- * without hanging forever.
- */
- while (!rdma->errored && !rdma->received_error) {
- GPollFD pfds[2];
- pfds[0].fd = comp_channel->fd;
- pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
- pfds[0].revents = 0;
-
- pfds[1].fd = rdma->channel->fd;
- pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
- pfds[1].revents = 0;
-
- /* 0.1s timeout, should be fine for a 'cancel' */
- switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
- case 2:
- case 1: /* fd active */
- if (pfds[0].revents) {
- return 0;
- }
+ while (!rdma->errored && !rdma->received_error) {
+ GPollFD pfds[2];
+ pfds[0].fd = comp_channel->fd;
+ pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+ pfds[0].revents = 0;
+
+ pfds[1].fd = rdma->channel->fd;
+ pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+ pfds[1].revents = 0;
+
+ /* 0.1s timeout, should be fine for a 'cancel' */
+ switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
+ case 2:
+ case 1: /* fd active */
+ if (pfds[0].revents) {
+ return 0;
+ }
- if (pfds[1].revents) {
- if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
- return -1;
- }
+ if (pfds[1].revents) {
+ if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
+ return -1;
+ }
- if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
- cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
- rdma_ack_cm_event(cm_event);
- return -1;
- }
+ if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
+ cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
rdma_ack_cm_event(cm_event);
+ return -1;
}
- break;
+ rdma_ack_cm_event(cm_event);
+ }
+ break;
- case 0: /* Timeout, go around again */
- break;
+ case 0: /* Timeout, go around again */
+ break;
- default: /* Error of some type -
- * I don't trust errno from qemu_poll_ns
- */
- return -1;
- }
+ default: /* Error of some type -
+ * I don't trust errno from qemu_poll_ns
+ */
+ return -1;
+ }
- if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
- /* Bail out and let the cancellation happen */
- return -1;
- }
+ if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
+ /* Bail out and let the cancellation happen */
+ return -1;
}
}
@@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
return;
}
- rdma->migration_started_on_destination = 1;
migration_fd_process_incoming(f);
}
--
2.50.1
On 28/08/2025 04:59, Peter Xu wrote: > Now after threadified dest VM load during precopy, we will always in a > thread context rather than within a coroutine. We can remove this path > now. > > With that, migration_started_on_destination can go away too. > > Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Li Zhijian <lizhijian@fujitsu.com> Thanks Zhijian > --- > migration/rdma.c | 102 +++++++++++++++++++---------------------------- > 1 file changed, 41 insertions(+), 61 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 2b995513aa..7751262460 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -29,7 +29,6 @@ > #include "qemu/rcu.h" > #include "qemu/sockets.h" > #include "qemu/bitmap.h" > -#include "qemu/coroutine.h" > #include "system/memory.h" > #include <sys/socket.h> > #include <netdb.h> > @@ -357,13 +356,6 @@ typedef struct RDMAContext { > /* Index of the next RAMBlock received during block registration */ > unsigned int next_src_index; > > - /* > - * Migration on *destination* started. > - * Then use coroutine yield function. > - * Source runs in a thread, so we don't care. > - */ > - int migration_started_on_destination; > - > int total_registrations; > int total_writes; > > @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, > struct rdma_cm_event *cm_event; > > /* > - * Coroutine doesn't start until migration_fd_process_incoming() > - * so don't yield unless we know we're running inside of a coroutine. > + * This is the source or dest side, either during precopy or > + * postcopy. We're always in a separate thread when reaching here. > + * Poll the fd. We need to be able to handle 'cancel' or an error > + * without hanging forever. > */ > - if (rdma->migration_started_on_destination && > - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE && > - qemu_in_coroutine()) { > - yield_until_fd_readable(comp_channel->fd); > - } else { > - /* This is the source side, we're in a separate thread > - * or destination prior to migration_fd_process_incoming() > - * after postcopy, the destination also in a separate thread. > - * we can't yield; so we have to poll the fd. > - * But we need to be able to handle 'cancel' or an error > - * without hanging forever. > - */ > - while (!rdma->errored && !rdma->received_error) { > - GPollFD pfds[2]; > - pfds[0].fd = comp_channel->fd; > - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[0].revents = 0; > - > - pfds[1].fd = rdma->channel->fd; > - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[1].revents = 0; > - > - /* 0.1s timeout, should be fine for a 'cancel' */ > - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { > - case 2: > - case 1: /* fd active */ > - if (pfds[0].revents) { > - return 0; > - } > + while (!rdma->errored && !rdma->received_error) { > + GPollFD pfds[2]; > + pfds[0].fd = comp_channel->fd; > + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[0].revents = 0; > + > + pfds[1].fd = rdma->channel->fd; > + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[1].revents = 0; > + > + /* 0.1s timeout, should be fine for a 'cancel' */ > + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { > + case 2: > + case 1: /* fd active */ > + if (pfds[0].revents) { > + return 0; > + } > > - if (pfds[1].revents) { > - if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > - return -1; > - } > + if (pfds[1].revents) { > + if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > + return -1; > + } > > - if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > - cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > - rdma_ack_cm_event(cm_event); > - return -1; > - } > + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > rdma_ack_cm_event(cm_event); > + return -1; > } > - break; > + rdma_ack_cm_event(cm_event); > + } > + break; > > - case 0: /* Timeout, go around again */ > - break; > + case 0: /* Timeout, go around again */ > + break; > > - default: /* Error of some type - > - * I don't trust errno from qemu_poll_ns > - */ > - return -1; > - } > + default: /* Error of some type - > + * I don't trust errno from qemu_poll_ns > + */ > + return -1; > + } > > - if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { > - /* Bail out and let the cancellation happen */ > - return -1; > - } > + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { > + /* Bail out and let the cancellation happen */ > + return -1; > } > } > > @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque) > return; > } > > - rdma->migration_started_on_destination = 1; > migration_fd_process_incoming(f); > } >
Peter Xu <peterx@redhat.com> writes: > Now after threadified dest VM load during precopy, we will always in a > thread context rather than within a coroutine. We can remove this path > now. > > With that, migration_started_on_destination can go away too. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > migration/rdma.c | 102 +++++++++++++++++++---------------------------- > 1 file changed, 41 insertions(+), 61 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 2b995513aa..7751262460 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -29,7 +29,6 @@ > #include "qemu/rcu.h" > #include "qemu/sockets.h" > #include "qemu/bitmap.h" > -#include "qemu/coroutine.h" > #include "system/memory.h" > #include <sys/socket.h> > #include <netdb.h> > @@ -357,13 +356,6 @@ typedef struct RDMAContext { > /* Index of the next RAMBlock received during block registration */ > unsigned int next_src_index; > > - /* > - * Migration on *destination* started. > - * Then use coroutine yield function. > - * Source runs in a thread, so we don't care. > - */ > - int migration_started_on_destination; > - > int total_registrations; > int total_writes; > > @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, > struct rdma_cm_event *cm_event; > > /* > - * Coroutine doesn't start until migration_fd_process_incoming() > - * so don't yield unless we know we're running inside of a coroutine. > + * This is the source or dest side, either during precopy or > + * postcopy. We're always in a separate thread when reaching here. > + * Poll the fd. We need to be able to handle 'cancel' or an error > + * without hanging forever. > */ > - if (rdma->migration_started_on_destination && > - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE && > - qemu_in_coroutine()) { > - yield_until_fd_readable(comp_channel->fd); > - } else { > - /* This is the source side, we're in a separate thread > - * or destination prior to migration_fd_process_incoming() > - * after postcopy, the destination also in a separate thread. > - * we can't yield; so we have to poll the fd. > - * But we need to be able to handle 'cancel' or an error > - * without hanging forever. > - */ > - while (!rdma->errored && !rdma->received_error) { > - GPollFD pfds[2]; > - pfds[0].fd = comp_channel->fd; > - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[0].revents = 0; > - > - pfds[1].fd = rdma->channel->fd; > - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > - pfds[1].revents = 0; > - > - /* 0.1s timeout, should be fine for a 'cancel' */ > - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { > - case 2: > - case 1: /* fd active */ > - if (pfds[0].revents) { > - return 0; > - } > + while (!rdma->errored && !rdma->received_error) { > + GPollFD pfds[2]; > + pfds[0].fd = comp_channel->fd; > + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[0].revents = 0; > + > + pfds[1].fd = rdma->channel->fd; > + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + pfds[1].revents = 0; > + > + /* 0.1s timeout, should be fine for a 'cancel' */ > + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) { Don't glib have facilities for polling? Isn't this what qio_channel_rdma_create_watch() is for already? > + case 2: > + case 1: /* fd active */ > + if (pfds[0].revents) { > + return 0; > + } > > - if (pfds[1].revents) { > - if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > - return -1; > - } > + if (pfds[1].revents) { > + if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) { > + return -1; > + } > > - if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > - cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > - rdma_ack_cm_event(cm_event); > - return -1; > - } > + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > rdma_ack_cm_event(cm_event); > + return -1; > } > - break; > + rdma_ack_cm_event(cm_event); > + } > + break; > > - case 0: /* Timeout, go around again */ > - break; > + case 0: /* Timeout, go around again */ > + break; > > - default: /* Error of some type - > - * I don't trust errno from qemu_poll_ns > - */ > - return -1; > - } > + default: /* Error of some type - > + * I don't trust errno from qemu_poll_ns > + */ > + return -1; > + } > > - if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { > - /* Bail out and let the cancellation happen */ > - return -1; > - } > + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { > + /* Bail out and let the cancellation happen */ > + return -1; > } > } > > @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque) > return; > } > > - rdma->migration_started_on_destination = 1; > migration_fd_process_incoming(f); > }
© 2016 - 2025 Red Hat, Inc.