[PATCH RFC 6/9] migration/rdma: Remove coroutine path in qemu_rdma_wait_comp_channel

Peter Xu posted 9 patches 5 months, 2 weeks ago
Maintainers: Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Hailiang Zhang <zhanghailiang@xfusion.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Li Zhijian <lizhijian@fujitsu.com>
There is a newer version of this series
[PATCH RFC 6/9] migration/rdma: Remove coroutine path in qemu_rdma_wait_comp_channel
Posted by Peter Xu 5 months, 2 weeks ago
Now after threadified dest VM load during precopy, we will always in a
thread context rather than within a coroutine.  We can remove this path
now.

With that, migration_started_on_destination can go away too.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/rdma.c | 102 +++++++++++++++++++----------------------------
 1 file changed, 41 insertions(+), 61 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 2b995513aa..7751262460 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -29,7 +29,6 @@
 #include "qemu/rcu.h"
 #include "qemu/sockets.h"
 #include "qemu/bitmap.h"
-#include "qemu/coroutine.h"
 #include "system/memory.h"
 #include <sys/socket.h>
 #include <netdb.h>
@@ -357,13 +356,6 @@ typedef struct RDMAContext {
     /* Index of the next RAMBlock received during block registration */
     unsigned int    next_src_index;
 
-    /*
-     * Migration on *destination* started.
-     * Then use coroutine yield function.
-     * Source runs in a thread, so we don't care.
-     */
-    int migration_started_on_destination;
-
     int total_registrations;
     int total_writes;
 
@@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
     struct rdma_cm_event *cm_event;
 
     /*
-     * Coroutine doesn't start until migration_fd_process_incoming()
-     * so don't yield unless we know we're running inside of a coroutine.
+     * This is the source or dest side, either during precopy or
+     * postcopy.  We're always in a separate thread when reaching here.
+     * Poll the fd.  We need to be able to handle 'cancel' or an error
+     * without hanging forever.
      */
-    if (rdma->migration_started_on_destination &&
-        migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
-        qemu_in_coroutine()) {
-        yield_until_fd_readable(comp_channel->fd);
-    } else {
-        /* This is the source side, we're in a separate thread
-         * or destination prior to migration_fd_process_incoming()
-         * after postcopy, the destination also in a separate thread.
-         * we can't yield; so we have to poll the fd.
-         * But we need to be able to handle 'cancel' or an error
-         * without hanging forever.
-         */
-        while (!rdma->errored && !rdma->received_error) {
-            GPollFD pfds[2];
-            pfds[0].fd = comp_channel->fd;
-            pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
-            pfds[0].revents = 0;
-
-            pfds[1].fd = rdma->channel->fd;
-            pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
-            pfds[1].revents = 0;
-
-            /* 0.1s timeout, should be fine for a 'cancel' */
-            switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
-            case 2:
-            case 1: /* fd active */
-                if (pfds[0].revents) {
-                    return 0;
-                }
+    while (!rdma->errored && !rdma->received_error) {
+        GPollFD pfds[2];
+        pfds[0].fd = comp_channel->fd;
+        pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+        pfds[0].revents = 0;
+
+        pfds[1].fd = rdma->channel->fd;
+        pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+        pfds[1].revents = 0;
+
+        /* 0.1s timeout, should be fine for a 'cancel' */
+        switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
+        case 2:
+        case 1: /* fd active */
+            if (pfds[0].revents) {
+                return 0;
+            }
 
-                if (pfds[1].revents) {
-                    if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
-                        return -1;
-                    }
+            if (pfds[1].revents) {
+                if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
+                    return -1;
+                }
 
-                    if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
-                        cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
-                        rdma_ack_cm_event(cm_event);
-                        return -1;
-                    }
+                if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
+                    cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
                     rdma_ack_cm_event(cm_event);
+                    return -1;
                 }
-                break;
+                rdma_ack_cm_event(cm_event);
+            }
+            break;
 
-            case 0: /* Timeout, go around again */
-                break;
+        case 0: /* Timeout, go around again */
+            break;
 
-            default: /* Error of some type -
-                      * I don't trust errno from qemu_poll_ns
-                     */
-                return -1;
-            }
+        default: /* Error of some type -
+                  * I don't trust errno from qemu_poll_ns
+                  */
+            return -1;
+        }
 
-            if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
-                /* Bail out and let the cancellation happen */
-                return -1;
-            }
+        if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
+            /* Bail out and let the cancellation happen */
+            return -1;
         }
     }
 
@@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
         return;
     }
 
-    rdma->migration_started_on_destination = 1;
     migration_fd_process_incoming(f);
 }
 
-- 
2.50.1
Re: [PATCH RFC 6/9] migration/rdma: Remove coroutine path in qemu_rdma_wait_comp_channel
Posted by Zhijian Li (Fujitsu) 4 months, 2 weeks ago

On 28/08/2025 04:59, Peter Xu wrote:
> Now after threadified dest VM load during precopy, we will always in a
> thread context rather than within a coroutine.  We can remove this path
> now.
> 
> With that, migration_started_on_destination can go away too.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>


Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>


Thanks
Zhijian

> ---
>   migration/rdma.c | 102 +++++++++++++++++++----------------------------
>   1 file changed, 41 insertions(+), 61 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2b995513aa..7751262460 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -29,7 +29,6 @@
>   #include "qemu/rcu.h"
>   #include "qemu/sockets.h"
>   #include "qemu/bitmap.h"
> -#include "qemu/coroutine.h"
>   #include "system/memory.h"
>   #include <sys/socket.h>
>   #include <netdb.h>
> @@ -357,13 +356,6 @@ typedef struct RDMAContext {
>       /* Index of the next RAMBlock received during block registration */
>       unsigned int    next_src_index;
>   
> -    /*
> -     * Migration on *destination* started.
> -     * Then use coroutine yield function.
> -     * Source runs in a thread, so we don't care.
> -     */
> -    int migration_started_on_destination;
> -
>       int total_registrations;
>       int total_writes;
>   
> @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
>       struct rdma_cm_event *cm_event;
>   
>       /*
> -     * Coroutine doesn't start until migration_fd_process_incoming()
> -     * so don't yield unless we know we're running inside of a coroutine.
> +     * This is the source or dest side, either during precopy or
> +     * postcopy.  We're always in a separate thread when reaching here.
> +     * Poll the fd.  We need to be able to handle 'cancel' or an error
> +     * without hanging forever.
>        */
> -    if (rdma->migration_started_on_destination &&
> -        migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> -        qemu_in_coroutine()) {
> -        yield_until_fd_readable(comp_channel->fd);
> -    } else {
> -        /* This is the source side, we're in a separate thread
> -         * or destination prior to migration_fd_process_incoming()
> -         * after postcopy, the destination also in a separate thread.
> -         * we can't yield; so we have to poll the fd.
> -         * But we need to be able to handle 'cancel' or an error
> -         * without hanging forever.
> -         */
> -        while (!rdma->errored && !rdma->received_error) {
> -            GPollFD pfds[2];
> -            pfds[0].fd = comp_channel->fd;
> -            pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> -            pfds[0].revents = 0;
> -
> -            pfds[1].fd = rdma->channel->fd;
> -            pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> -            pfds[1].revents = 0;
> -
> -            /* 0.1s timeout, should be fine for a 'cancel' */
> -            switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> -            case 2:
> -            case 1: /* fd active */
> -                if (pfds[0].revents) {
> -                    return 0;
> -                }
> +    while (!rdma->errored && !rdma->received_error) {
> +        GPollFD pfds[2];
> +        pfds[0].fd = comp_channel->fd;
> +        pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +        pfds[0].revents = 0;
> +
> +        pfds[1].fd = rdma->channel->fd;
> +        pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +        pfds[1].revents = 0;
> +
> +        /* 0.1s timeout, should be fine for a 'cancel' */
> +        switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> +        case 2:
> +        case 1: /* fd active */
> +            if (pfds[0].revents) {
> +                return 0;
> +            }
>   
> -                if (pfds[1].revents) {
> -                    if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> -                        return -1;
> -                    }
> +            if (pfds[1].revents) {
> +                if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> +                    return -1;
> +                }
>   
> -                    if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> -                        cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> -                        rdma_ack_cm_event(cm_event);
> -                        return -1;
> -                    }
> +                if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> +                    cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
>                       rdma_ack_cm_event(cm_event);
> +                    return -1;
>                   }
> -                break;
> +                rdma_ack_cm_event(cm_event);
> +            }
> +            break;
>   
> -            case 0: /* Timeout, go around again */
> -                break;
> +        case 0: /* Timeout, go around again */
> +            break;
>   
> -            default: /* Error of some type -
> -                      * I don't trust errno from qemu_poll_ns
> -                     */
> -                return -1;
> -            }
> +        default: /* Error of some type -
> +                  * I don't trust errno from qemu_poll_ns
> +                  */
> +            return -1;
> +        }
>   
> -            if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> -                /* Bail out and let the cancellation happen */
> -                return -1;
> -            }
> +        if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> +            /* Bail out and let the cancellation happen */
> +            return -1;
>           }
>       }
>   
> @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
>           return;
>       }
>   
> -    rdma->migration_started_on_destination = 1;
>       migration_fd_process_incoming(f);
>   }
>   
Re: [PATCH RFC 6/9] migration/rdma: Remove coroutine path in qemu_rdma_wait_comp_channel
Posted by Fabiano Rosas 4 months, 3 weeks ago
Peter Xu <peterx@redhat.com> writes:

> Now after threadified dest VM load during precopy, we will always in a
> thread context rather than within a coroutine.  We can remove this path
> now.
>
> With that, migration_started_on_destination can go away too.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/rdma.c | 102 +++++++++++++++++++----------------------------
>  1 file changed, 41 insertions(+), 61 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2b995513aa..7751262460 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -29,7 +29,6 @@
>  #include "qemu/rcu.h"
>  #include "qemu/sockets.h"
>  #include "qemu/bitmap.h"
> -#include "qemu/coroutine.h"
>  #include "system/memory.h"
>  #include <sys/socket.h>
>  #include <netdb.h>
> @@ -357,13 +356,6 @@ typedef struct RDMAContext {
>      /* Index of the next RAMBlock received during block registration */
>      unsigned int    next_src_index;
>  
> -    /*
> -     * Migration on *destination* started.
> -     * Then use coroutine yield function.
> -     * Source runs in a thread, so we don't care.
> -     */
> -    int migration_started_on_destination;
> -
>      int total_registrations;
>      int total_writes;
>  
> @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
>      struct rdma_cm_event *cm_event;
>  
>      /*
> -     * Coroutine doesn't start until migration_fd_process_incoming()
> -     * so don't yield unless we know we're running inside of a coroutine.
> +     * This is the source or dest side, either during precopy or
> +     * postcopy.  We're always in a separate thread when reaching here.
> +     * Poll the fd.  We need to be able to handle 'cancel' or an error
> +     * without hanging forever.
>       */
> -    if (rdma->migration_started_on_destination &&
> -        migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> -        qemu_in_coroutine()) {
> -        yield_until_fd_readable(comp_channel->fd);
> -    } else {
> -        /* This is the source side, we're in a separate thread
> -         * or destination prior to migration_fd_process_incoming()
> -         * after postcopy, the destination also in a separate thread.
> -         * we can't yield; so we have to poll the fd.
> -         * But we need to be able to handle 'cancel' or an error
> -         * without hanging forever.
> -         */
> -        while (!rdma->errored && !rdma->received_error) {
> -            GPollFD pfds[2];
> -            pfds[0].fd = comp_channel->fd;
> -            pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> -            pfds[0].revents = 0;
> -
> -            pfds[1].fd = rdma->channel->fd;
> -            pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> -            pfds[1].revents = 0;
> -
> -            /* 0.1s timeout, should be fine for a 'cancel' */
> -            switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> -            case 2:
> -            case 1: /* fd active */
> -                if (pfds[0].revents) {
> -                    return 0;
> -                }
> +    while (!rdma->errored && !rdma->received_error) {
> +        GPollFD pfds[2];
> +        pfds[0].fd = comp_channel->fd;
> +        pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +        pfds[0].revents = 0;
> +
> +        pfds[1].fd = rdma->channel->fd;
> +        pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +        pfds[1].revents = 0;
> +
> +        /* 0.1s timeout, should be fine for a 'cancel' */
> +        switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {

Don't glib have facilities for polling? Isn't this what
qio_channel_rdma_create_watch() is for already?

> +        case 2:
> +        case 1: /* fd active */
> +            if (pfds[0].revents) {
> +                return 0;
> +            }
>  
> -                if (pfds[1].revents) {
> -                    if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> -                        return -1;
> -                    }
> +            if (pfds[1].revents) {
> +                if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
> +                    return -1;
> +                }
>  
> -                    if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> -                        cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
> -                        rdma_ack_cm_event(cm_event);
> -                        return -1;
> -                    }
> +                if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
> +                    cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
>                      rdma_ack_cm_event(cm_event);
> +                    return -1;
>                  }
> -                break;
> +                rdma_ack_cm_event(cm_event);
> +            }
> +            break;
>  
> -            case 0: /* Timeout, go around again */
> -                break;
> +        case 0: /* Timeout, go around again */
> +            break;
>  
> -            default: /* Error of some type -
> -                      * I don't trust errno from qemu_poll_ns
> -                     */
> -                return -1;
> -            }
> +        default: /* Error of some type -
> +                  * I don't trust errno from qemu_poll_ns
> +                  */
> +            return -1;
> +        }
>  
> -            if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> -                /* Bail out and let the cancellation happen */
> -                return -1;
> -            }
> +        if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) {
> +            /* Bail out and let the cancellation happen */
> +            return -1;
>          }
>      }
>  
> @@ -3817,7 +3798,6 @@ static void rdma_accept_incoming_migration(void *opaque)
>          return;
>      }
>  
> -    rdma->migration_started_on_destination = 1;
>      migration_fd_process_incoming(f);
>  }
Re: [PATCH RFC 6/9] migration/rdma: Remove coroutine path in qemu_rdma_wait_comp_channel
Posted by Peter Xu 4 months ago
On Tue, Sep 16, 2025 at 07:39:30PM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > Now after threadified dest VM load during precopy, we will always in a
> > thread context rather than within a coroutine.  We can remove this path
> > now.
> >
> > With that, migration_started_on_destination can go away too.
> >
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  migration/rdma.c | 102 +++++++++++++++++++----------------------------
> >  1 file changed, 41 insertions(+), 61 deletions(-)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 2b995513aa..7751262460 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -29,7 +29,6 @@
> >  #include "qemu/rcu.h"
> >  #include "qemu/sockets.h"
> >  #include "qemu/bitmap.h"
> > -#include "qemu/coroutine.h"
> >  #include "system/memory.h"
> >  #include <sys/socket.h>
> >  #include <netdb.h>
> > @@ -357,13 +356,6 @@ typedef struct RDMAContext {
> >      /* Index of the next RAMBlock received during block registration */
> >      unsigned int    next_src_index;
> >  
> > -    /*
> > -     * Migration on *destination* started.
> > -     * Then use coroutine yield function.
> > -     * Source runs in a thread, so we don't care.
> > -     */
> > -    int migration_started_on_destination;
> > -
> >      int total_registrations;
> >      int total_writes;
> >  
> > @@ -1353,66 +1345,55 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
> >      struct rdma_cm_event *cm_event;
> >  
> >      /*
> > -     * Coroutine doesn't start until migration_fd_process_incoming()
> > -     * so don't yield unless we know we're running inside of a coroutine.
> > +     * This is the source or dest side, either during precopy or
> > +     * postcopy.  We're always in a separate thread when reaching here.
> > +     * Poll the fd.  We need to be able to handle 'cancel' or an error
> > +     * without hanging forever.
> >       */
> > -    if (rdma->migration_started_on_destination &&
> > -        migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE &&
> > -        qemu_in_coroutine()) {
> > -        yield_until_fd_readable(comp_channel->fd);
> > -    } else {
> > -        /* This is the source side, we're in a separate thread
> > -         * or destination prior to migration_fd_process_incoming()
> > -         * after postcopy, the destination also in a separate thread.
> > -         * we can't yield; so we have to poll the fd.
> > -         * But we need to be able to handle 'cancel' or an error
> > -         * without hanging forever.
> > -         */
> > -        while (!rdma->errored && !rdma->received_error) {
> > -            GPollFD pfds[2];
> > -            pfds[0].fd = comp_channel->fd;
> > -            pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > -            pfds[0].revents = 0;
> > -
> > -            pfds[1].fd = rdma->channel->fd;
> > -            pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > -            pfds[1].revents = 0;
> > -
> > -            /* 0.1s timeout, should be fine for a 'cancel' */
> > -            switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> > -            case 2:
> > -            case 1: /* fd active */
> > -                if (pfds[0].revents) {
> > -                    return 0;
> > -                }
> > +    while (!rdma->errored && !rdma->received_error) {
> > +        GPollFD pfds[2];
> > +        pfds[0].fd = comp_channel->fd;
> > +        pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > +        pfds[0].revents = 0;
> > +
> > +        pfds[1].fd = rdma->channel->fd;
> > +        pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> > +        pfds[1].revents = 0;
> > +
> > +        /* 0.1s timeout, should be fine for a 'cancel' */
> > +        switch (qemu_poll_ns(pfds, 2, 100 * 1000 * 1000)) {
> 
> Don't glib have facilities for polling? Isn't this what
> qio_channel_rdma_create_watch() is for already?

Yes.  I don't know why the RDMA channel is done like this; I didn't dig
deeper. I bet Dan has more clues (as author of 6ddd2d76ca6f). The hope is I
also don't need to dig it if I only want to make the loadvm to work in a
thread. :)

I also replied to your other email, that should have some more info
regarding to why I think rdma's io_create_watch() isn't used.. or seems
broken.

For this patch alone, it almost only removed the "if()" section, these
lines are untouched except indentation changes.

-- 
Peter Xu