[PATCH RFC 4/9] migration/rdma: Change io_create_watch() to return immediately

Peter Xu posted 9 patches 1 month ago
Maintainers: Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Hailiang Zhang <zhanghailiang@xfusion.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Li Zhijian <lizhijian@fujitsu.com>
[PATCH RFC 4/9] migration/rdma: Change io_create_watch() to return immediately
Posted by Peter Xu 1 month ago
The old RDMA's io_create_watch() isn't really doing much work anyway.  For
G_IO_OUT, it already does return immediately.  For G_IO_IN, it will try to
detect some RDMA context length however normally nobody will be able to set
it at all.

Simplify the code so that RDMA iochannels simply always rely on synchronous
reads and writes.  It is highly likely what 6ddd2d76ca6f86f was talking
about, that the async model isn't really working well.

This helps because this is almost the only dependency that the migration
core would need a coroutine for rdma channels.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/rdma.c | 69 +++---------------------------------------------
 1 file changed, 3 insertions(+), 66 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index ed4e20b988..bcd7aae2f2 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2789,56 +2789,14 @@ static gboolean
 qio_channel_rdma_source_prepare(GSource *source,
                                 gint *timeout)
 {
-    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
-    RDMAContext *rdma;
-    GIOCondition cond = 0;
     *timeout = -1;
-
-    RCU_READ_LOCK_GUARD();
-    if (rsource->condition == G_IO_IN) {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
-    } else {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
-    }
-
-    if (!rdma) {
-        error_report("RDMAContext is NULL when prepare Gsource");
-        return FALSE;
-    }
-
-    if (rdma->wr_data[0].control_len) {
-        cond |= G_IO_IN;
-    }
-    cond |= G_IO_OUT;
-
-    return cond & rsource->condition;
+    return TRUE;
 }
 
 static gboolean
 qio_channel_rdma_source_check(GSource *source)
 {
-    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
-    RDMAContext *rdma;
-    GIOCondition cond = 0;
-
-    RCU_READ_LOCK_GUARD();
-    if (rsource->condition == G_IO_IN) {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
-    } else {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
-    }
-
-    if (!rdma) {
-        error_report("RDMAContext is NULL when check Gsource");
-        return FALSE;
-    }
-
-    if (rdma->wr_data[0].control_len) {
-        cond |= G_IO_IN;
-    }
-    cond |= G_IO_OUT;
-
-    return cond & rsource->condition;
+    return TRUE;
 }
 
 static gboolean
@@ -2848,29 +2806,8 @@ qio_channel_rdma_source_dispatch(GSource *source,
 {
     QIOChannelFunc func = (QIOChannelFunc)callback;
     QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
-    RDMAContext *rdma;
-    GIOCondition cond = 0;
-
-    RCU_READ_LOCK_GUARD();
-    if (rsource->condition == G_IO_IN) {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
-    } else {
-        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
-    }
-
-    if (!rdma) {
-        error_report("RDMAContext is NULL when dispatch Gsource");
-        return FALSE;
-    }
-
-    if (rdma->wr_data[0].control_len) {
-        cond |= G_IO_IN;
-    }
-    cond |= G_IO_OUT;
 
-    return (*func)(QIO_CHANNEL(rsource->rioc),
-                   (cond & rsource->condition),
-                   user_data);
+    return (*func)(QIO_CHANNEL(rsource->rioc), rsource->condition, user_data);
 }
 
 static void
-- 
2.50.1
Re: [PATCH RFC 4/9] migration/rdma: Change io_create_watch() to return immediately
Posted by Zhijian Li (Fujitsu) 2 days, 12 hours ago

On 28/08/2025 04:59, Peter Xu wrote:
> The old RDMA's io_create_watch() isn't really doing much work anyway.  For
> G_IO_OUT, it already does return immediately.  For G_IO_IN, it will try to
> detect some RDMA context length however normally nobody will be able to set
> it at all.
> 


First, RDMA migration works well with this patch applied.

Tested-by: Li Zhijian <lizhijian@fujitsu.com>


I have a small question. While testing, I didn't observe any callers to
qio_channel_rdma_create_watch() during a complete RDMA migration using
the default capabilities and parameters.
I was wondering in which case this function is expected to be called?
(I see io_create_watch() is mandatory for QIOChannelClass)


Thanks
Zhijian


> Simplify the code so that RDMA iochannels simply always rely on synchronous
> reads and writes.  It is highly likely what 6ddd2d76ca6f86f was talking
> about, that the async model isn't really working well.
> 
> This helps because this is almost the only dependency that the migration
> core would need a coroutine for rdma channels.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   migration/rdma.c | 69 +++---------------------------------------------
>   1 file changed, 3 insertions(+), 66 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index ed4e20b988..bcd7aae2f2 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2789,56 +2789,14 @@ static gboolean
>   qio_channel_rdma_source_prepare(GSource *source,
>                                   gint *timeout)
>   {
> -    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
>       *timeout = -1;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when prepare Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
> -
> -    return cond & rsource->condition;
> +    return TRUE;
>   }
>   
>   static gboolean
>   qio_channel_rdma_source_check(GSource *source)
>   {
> -    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when check Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
> -
> -    return cond & rsource->condition;
> +    return TRUE;
>   }
>   
>   static gboolean
> @@ -2848,29 +2806,8 @@ qio_channel_rdma_source_dispatch(GSource *source,
>   {
>       QIOChannelFunc func = (QIOChannelFunc)callback;
>       QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when dispatch Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
>   
> -    return (*func)(QIO_CHANNEL(rsource->rioc),
> -                   (cond & rsource->condition),
> -                   user_data);
> +    return (*func)(QIO_CHANNEL(rsource->rioc), rsource->condition, user_data);
>   }
>   
>   static void
Re: [PATCH RFC 4/9] migration/rdma: Change io_create_watch() to return immediately
Posted by Fabiano Rosas 1 week, 4 days ago
Peter Xu <peterx@redhat.com> writes:

> The old RDMA's io_create_watch() isn't really doing much work anyway.  For
> G_IO_OUT, it already does return immediately.  For G_IO_IN, it will try to
> detect some RDMA context length however normally nobody will be able to set
> it at all.
>
> Simplify the code so that RDMA iochannels simply always rely on synchronous
> reads and writes.  It is highly likely what 6ddd2d76ca6f86f was talking
> about, that the async model isn't really working well.
>
> This helps because this is almost the only dependency that the migration
> core would need a coroutine for rdma channels.
>

I don't understand this. How does this code require a coroutine? Isn't
the io_watch exactly the strategy used when there is no coroutine?

> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/rdma.c | 69 +++---------------------------------------------
>  1 file changed, 3 insertions(+), 66 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index ed4e20b988..bcd7aae2f2 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2789,56 +2789,14 @@ static gboolean
>  qio_channel_rdma_source_prepare(GSource *source,
>                                  gint *timeout)
>  {
> -    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
>      *timeout = -1;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when prepare Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
> -
> -    return cond & rsource->condition;
> +    return TRUE;
>  }
>  
>  static gboolean
>  qio_channel_rdma_source_check(GSource *source)
>  {
> -    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when check Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
> -
> -    return cond & rsource->condition;
> +    return TRUE;

These are fine if we want the source to run as soon as possible, I
think. But then...

>  }
>  
>  static gboolean
> @@ -2848,29 +2806,8 @@ qio_channel_rdma_source_dispatch(GSource *source,
>  {
>      QIOChannelFunc func = (QIOChannelFunc)callback;
>      QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
> -    RDMAContext *rdma;
> -    GIOCondition cond = 0;
> -
> -    RCU_READ_LOCK_GUARD();
> -    if (rsource->condition == G_IO_IN) {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmain);
> -    } else {
> -        rdma = qatomic_rcu_read(&rsource->rioc->rdmaout);
> -    }
> -
> -    if (!rdma) {
> -        error_report("RDMAContext is NULL when dispatch Gsource");
> -        return FALSE;
> -    }
> -
> -    if (rdma->wr_data[0].control_len) {
> -        cond |= G_IO_IN;
> -    }
> -    cond |= G_IO_OUT;
>  
> -    return (*func)(QIO_CHANNEL(rsource->rioc),
> -                   (cond & rsource->condition),
> -                   user_data);
> +    return (*func)(QIO_CHANNEL(rsource->rioc), rsource->condition, user_data);

No idea who even calls g_source_set_callback() in this case. What is func?

>  }
>  
>  static void