Now after threadified dest VM load during precopy, we will always in a
thread context rather than within a coroutine. We can remove this path
now.
With that, migration_started_on_destination can go away too.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/rdma.c | 102 +++++++++++++++++++----------------------------
1 file changed, 41 insertions(+), 61 deletions(-)
diff --git a/migration/rdma.c b/migration/rdma.c
index 2b995513aa..7751262460 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -29,7 +29,6 @@
#include "qemu/rcu.h"
#include "qemu/sockets.h"
#include "qemu/bitmap.h"
-#include "qemu/coroutine.h"
#include "system/memory.h"
#include <sys/socket.h>
#include <netdb.h>
@@ -357,13 +356,6 @@ typedef struct RDMAContext {
/* Index of the next RAMBlock received during block registration */
unsigned int next_src_index;
- /*
- * Migration on *destination* started.
- * Then use coroutine yield function.
- * Source runs in a thread, so we don't care.
- */
- int migration_started_on_destination;
-
int total_registrations;
int total_writes;
@@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
struct rdma_cm_event *cm_event;
/*
- * Coroutine doesn't start until migration_fd_process_incoming()
- * so don't yield unless we know we're running inside of a coroutine.
+ * This is the source or dest side, either during precopy or
+ * postcopy. We're always in a separate thread when reaching here.
+ * Poll the fd. We need to be able to handle 'cancel' or an error
+ * without hanging forever.
*/
- if (rdma->migration_started_on_destination &&
- migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
- qemu_in_coroutine()) {
- yield_until_fd_readable(comp_channel->fd);
- } else {
- /* This is the source side, we're in a separate thread
- * or destination prior to migration_fd_process_incoming()
- * after postcopy, the destination also in a separate thread.
- * we can't yield; so we have to poll the fd.
- * But we need to be able to handle 'cancel' or an error
- * without hanging forever.
- */
- while (!rdma->errored && !rdma->received_error) {
- GPollFD pfds[2];
- pfds[0].fd = comp_channel->fd;
- pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
- pfds[0].revents = 0;
-
- pfds[1].fd = rdma->channel->fd;
- pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
- pfds[1].revents = 0;
-
- /* 0.1s timeout, should be fine for a 'cancel' */
- switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
- case 2:
- case 1: /* fd active */
- if (pfds[0].revents) {
- return 0;
- }
+ while (!rdma->errored && !rdma->received_error) {
+ GPollFD pfds[2];
+ pfds[0].fd = comp_channel->fd;
+ pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+ pfds[0].revents = 0;
+
+ pfds[1].fd = rdma->channel->fd;
+ pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+ pfds[1].revents = 0;
+
+ /* 0.1s timeout, should be fine for a 'cancel' */
+ switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
+ case 2:
+ case 1: /* fd active */
+ if (pfds[0].revents) {
+ return 0;
+ }
- if (pfds[1].revents) {
- if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
- return -1;
- }
+ if (pfds[1].revents) {
+ if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
+ return -1;
+ }
- if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
- cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
- rdma_ack_cm_event(cm_event);
- return -1;
- }
+ if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
+ cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
rdma_ack_cm_event(cm_event);
+ return -1;
}
- break;
+ rdma_ack_cm_event(cm_event);
+ }
+ break;
- case 0: /* Timeout, go around again */
- break;
+ case 0: /* Timeout, go around again */
+ break;
- default: /* Error of some type -
- * I don't trust errno from qemu_poll_ns
- */
- return -1;
- }
+ default: /* Error of some type -
+ * I don't trust errno from qemu_poll_ns
+ */
+ return -1;
+ }
- if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
- /* Bail out and let the cancellation happen */
- return -1;
- }
+ if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
+ /* Bail out and let the cancellation happen */
+ return -1;
}
}
@@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
return;
}
- rdma->migration_started_on_destination = 1;
migration_fd_process_incoming(f);
}
--
2.50.1
On 28/08/2025 04:59, Peter Xu wrote:
> Now after threadified dest VM load during precopy, we will always in a
> thread context rather than within a coroutine. We can remove this path
> now.
>
> With that, migration_started_on_destination can go away too.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Thanks
Zhijian
> ---
> migration/rdma.c | 102 +++++++++++++++++++----------------------------
> 1 file changed, 41 insertions(+), 61 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2b995513aa..7751262460 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -29,7 +29,6 @@
> #include "qemu/rcu.h"
> #include "qemu/sockets.h"
> #include "qemu/bitmap.h"
> -#include "qemu/coroutine.h"
> #include "system/memory.h"
> #include <sys/socket.h>
> #include <netdb.h>
> @@ -357,13 +356,6 @@ typedef struct RDMAContext {
> /* Index of the next RAMBlock received during block registration */
> unsigned int next_src_index;
>
> - /*
> - * Migration on *destination* started.
> - * Then use coroutine yield function.
> - * Source runs in a thread, so we don't care.
> - */
> - int migration_started_on_destination;
> -
> int total_registrations;
> int total_writes;
>
> @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
> struct rdma_cm_event *cm_event;
>
> /*
> - * Coroutine doesn't start until migration_fd_process_incoming()
> - * so don't yield unless we know we're running inside of a coroutine.
> + * This is the source or dest side, either during precopy or
> + * postcopy. We're always in a separate thread when reaching here.
> + * Poll the fd. We need to be able to handle 'cancel' or an error
> + * without hanging forever.
> */
> - if (rdma->migration_started_on_destination &&
> - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> - qemu_in_coroutine()) {
> - yield_until_fd_readable(comp_channel->fd);
> - } else {
> - /* This is the source side, we're in a separate thread
> - * or destination prior to migration_fd_process_incoming()
> - * after postcopy, the destination also in a separate thread.
> - * we can't yield; so we have to poll the fd.
> - * But we need to be able to handle 'cancel' or an error
> - * without hanging forever.
> - */
> - while (!rdma->errored && !rdma->received_error) {
> - GPollFD pfds[2];
> - pfds[0].fd = comp_channel->fd;
> - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> - pfds[0].revents = 0;
> -
> - pfds[1].fd = rdma->channel->fd;
> - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> - pfds[1].revents = 0;
> -
> - /* 0.1s timeout, should be fine for a 'cancel' */
> - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> - case 2:
> - case 1: /* fd active */
> - if (pfds[0].revents) {
> - return 0;
> - }
> + while (!rdma->errored && !rdma->received_error) {
> + GPollFD pfds[2];
> + pfds[0].fd = comp_channel->fd;
> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> + pfds[0].revents = 0;
> +
> + pfds[1].fd = rdma->channel->fd;
> + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> + pfds[1].revents = 0;
> +
> + /* 0.1s timeout, should be fine for a 'cancel' */
> + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> + case 2:
> + case 1: /* fd active */
> + if (pfds[0].revents) {
> + return 0;
> + }
>
> - if (pfds[1].revents) {
> - if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> - return -1;
> - }
> + if (pfds[1].revents) {
> + if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> + return -1;
> + }
>
> - if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> - cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> - rdma_ack_cm_event(cm_event);
> - return -1;
> - }
> + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> rdma_ack_cm_event(cm_event);
> + return -1;
> }
> - break;
> + rdma_ack_cm_event(cm_event);
> + }
> + break;
>
> - case 0: /* Timeout, go around again */
> - break;
> + case 0: /* Timeout, go around again */
> + break;
>
> - default: /* Error of some type -
> - * I don't trust errno from qemu_poll_ns
> - */
> - return -1;
> - }
> + default: /* Error of some type -
> + * I don't trust errno from qemu_poll_ns
> + */
> + return -1;
> + }
>
> - if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> - /* Bail out and let the cancellation happen */
> - return -1;
> - }
> + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> + /* Bail out and let the cancellation happen */
> + return -1;
> }
> }
>
> @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
> return;
> }
>
> - rdma->migration_started_on_destination = 1;
> migration_fd_process_incoming(f);
> }
>
Peter Xu <peterx@redhat.com> writes:
> Now after threadified dest VM load during precopy, we will always in a
> thread context rather than within a coroutine. We can remove this path
> now.
>
> With that, migration_started_on_destination can go away too.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
> migration/rdma.c | 102 +++++++++++++++++++----------------------------
> 1 file changed, 41 insertions(+), 61 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2b995513aa..7751262460 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -29,7 +29,6 @@
> #include "qemu/rcu.h"
> #include "qemu/sockets.h"
> #include "qemu/bitmap.h"
> -#include "qemu/coroutine.h"
> #include "system/memory.h"
> #include <sys/socket.h>
> #include <netdb.h>
> @@ -357,13 +356,6 @@ typedef struct RDMAContext {
> /* Index of the next RAMBlock received during block registration */
> unsigned int next_src_index;
>
> - /*
> - * Migration on *destination* started.
> - * Then use coroutine yield function.
> - * Source runs in a thread, so we don't care.
> - */
> - int migration_started_on_destination;
> -
> int total_registrations;
> int total_writes;
>
> @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
> struct rdma_cm_event *cm_event;
>
> /*
> - * Coroutine doesn't start until migration_fd_process_incoming()
> - * so don't yield unless we know we're running inside of a coroutine.
> + * This is the source or dest side, either during precopy or
> + * postcopy. We're always in a separate thread when reaching here.
> + * Poll the fd. We need to be able to handle 'cancel' or an error
> + * without hanging forever.
> */
> - if (rdma->migration_started_on_destination &&
> - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> - qemu_in_coroutine()) {
> - yield_until_fd_readable(comp_channel->fd);
> - } else {
> - /* This is the source side, we're in a separate thread
> - * or destination prior to migration_fd_process_incoming()
> - * after postcopy, the destination also in a separate thread.
> - * we can't yield; so we have to poll the fd.
> - * But we need to be able to handle 'cancel' or an error
> - * without hanging forever.
> - */
> - while (!rdma->errored && !rdma->received_error) {
> - GPollFD pfds[2];
> - pfds[0].fd = comp_channel->fd;
> - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> - pfds[0].revents = 0;
> -
> - pfds[1].fd = rdma->channel->fd;
> - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> - pfds[1].revents = 0;
> -
> - /* 0.1s timeout, should be fine for a 'cancel' */
> - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> - case 2:
> - case 1: /* fd active */
> - if (pfds[0].revents) {
> - return 0;
> - }
> + while (!rdma->errored && !rdma->received_error) {
> + GPollFD pfds[2];
> + pfds[0].fd = comp_channel->fd;
> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> + pfds[0].revents = 0;
> +
> + pfds[1].fd = rdma->channel->fd;
> + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> + pfds[1].revents = 0;
> +
> + /* 0.1s timeout, should be fine for a 'cancel' */
> + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
Don't glib have facilities for polling? Isn't this what
qio_channel_rdma_create_watch() is for already?
> + case 2:
> + case 1: /* fd active */
> + if (pfds[0].revents) {
> + return 0;
> + }
>
> - if (pfds[1].revents) {
> - if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> - return -1;
> - }
> + if (pfds[1].revents) {
> + if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> + return -1;
> + }
>
> - if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> - cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> - rdma_ack_cm_event(cm_event);
> - return -1;
> - }
> + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> rdma_ack_cm_event(cm_event);
> + return -1;
> }
> - break;
> + rdma_ack_cm_event(cm_event);
> + }
> + break;
>
> - case 0: /* Timeout, go around again */
> - break;
> + case 0: /* Timeout, go around again */
> + break;
>
> - default: /* Error of some type -
> - * I don't trust errno from qemu_poll_ns
> - */
> - return -1;
> - }
> + default: /* Error of some type -
> + * I don't trust errno from qemu_poll_ns
> + */
> + return -1;
> + }
>
> - if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> - /* Bail out and let the cancellation happen */
> - return -1;
> - }
> + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> + /* Bail out and let the cancellation happen */
> + return -1;
> }
> }
>
> @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
> return;
> }
>
> - rdma->migration_started_on_destination = 1;
> migration_fd_process_incoming(f);
> }
On Tue, Sep 16, 2025 at 07:39:30PM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
>
> > Now after threadified dest VM load during precopy, we will always in a
> > thread context rather than within a coroutine. We can remove this path
> > now.
> >
> > With that, migration_started_on_destination can go away too.
> >
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> > migration/rdma.c | 102 +++++++++++++++++++----------------------------
> > 1 file changed, 41 insertions(+), 61 deletions(-)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 2b995513aa..7751262460 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -29,7 +29,6 @@
> > #include "qemu/rcu.h"
> > #include "qemu/sockets.h"
> > #include "qemu/bitmap.h"
> > -#include "qemu/coroutine.h"
> > #include "system/memory.h"
> > #include <sys/socket.h>
> > #include <netdb.h>
> > @@ -357,13 +356,6 @@ typedef struct RDMAContext {
> > /* Index of the next RAMBlock received during block registration */
> > unsigned int next_src_index;
> >
> > - /*
> > - * Migration on *destination* started.
> > - * Then use coroutine yield function.
> > - * Source runs in a thread, so we don't care.
> > - */
> > - int migration_started_on_destination;
> > -
> > int total_registrations;
> > int total_writes;
> >
> > @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
> > struct rdma_cm_event *cm_event;
> >
> > /*
> > - * Coroutine doesn't start until migration_fd_process_incoming()
> > - * so don't yield unless we know we're running inside of a coroutine.
> > + * This is the source or dest side, either during precopy or
> > + * postcopy. We're always in a separate thread when reaching here.
> > + * Poll the fd. We need to be able to handle 'cancel' or an error
> > + * without hanging forever.
> > */
> > - if (rdma->migration_started_on_destination &&
> > - migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> > - qemu_in_coroutine()) {
> > - yield_until_fd_readable(comp_channel->fd);
> > - } else {
> > - /* This is the source side, we're in a separate thread
> > - * or destination prior to migration_fd_process_incoming()
> > - * after postcopy, the destination also in a separate thread.
> > - * we can't yield; so we have to poll the fd.
> > - * But we need to be able to handle 'cancel' or an error
> > - * without hanging forever.
> > - */
> > - while (!rdma->errored && !rdma->received_error) {
> > - GPollFD pfds[2];
> > - pfds[0].fd = comp_channel->fd;
> > - pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > - pfds[0].revents = 0;
> > -
> > - pfds[1].fd = rdma->channel->fd;
> > - pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > - pfds[1].revents = 0;
> > -
> > - /* 0.1s timeout, should be fine for a 'cancel' */
> > - switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> > - case 2:
> > - case 1: /* fd active */
> > - if (pfds[0].revents) {
> > - return 0;
> > - }
> > + while (!rdma->errored && !rdma->received_error) {
> > + GPollFD pfds[2];
> > + pfds[0].fd = comp_channel->fd;
> > + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > + pfds[0].revents = 0;
> > +
> > + pfds[1].fd = rdma->channel->fd;
> > + pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > + pfds[1].revents = 0;
> > +
> > + /* 0.1s timeout, should be fine for a 'cancel' */
> > + switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
>
> Don't glib have facilities for polling? Isn't this what
> qio_channel_rdma_create_watch() is for already?
Yes. I don't know why the RDMA channel is done like this; I didn't dig
deeper. I bet Dan has more clues (as author of 6ddd2d76ca6f). The hope is I
also don't need to dig it if I only want to make the loadvm to work in a
thread. :)
I also replied to your other email, that should have some more info
regarding to why I think rdma's io_create_watch() isn't used.. or seems
broken.
For this patch alone, it almost only removed the "if()" section, these
lines are untouched except indentation changes.
--
Peter Xu
© 2016 - 2026 Red Hat, Inc.