ibv_dereg_mr wait for a long time for big memory size virtual server.
The test result is:
10GB 326ms
20GB 699ms
30GB 1021ms
40GB 1387ms
50GB 1712ms
60GB 2034ms
70GB 2457ms
80GB 2807ms
90GB 3107ms
100GB 3474ms
110GB 3735ms
120GB 4064ms
130GB 4567ms
140GB 4886ms
this will cause the guest os hang for a while when migration finished.
So create a dedicated thread to release rdma resource.
Signed-off-by: Lidong Chen <lidongchen@tencent.com>
---
migration/rdma.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/migration/rdma.c b/migration/rdma.c
index dfa4f77..1b9e261 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
}
}
-static int qio_channel_rdma_close(QIOChannel *ioc,
- Error **errp)
+static void *qio_channel_rdma_close_thread(void *arg)
{
- QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+ QIOChannelRDMA *rioc = arg;
RDMAContext *rdmain, *rdmaout;
- trace_qemu_rdma_close();
+
+ rcu_register_thread();
rdmain = rioc->rdmain;
if (rdmain) {
@@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
g_free(rdmain);
g_free(rdmaout);
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static int qio_channel_rdma_close(QIOChannel *ioc,
+ Error **errp)
+{
+ QemuThread t;
+ QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+ trace_qemu_rdma_close();
+
+ qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
+ rioc, QEMU_THREAD_DETACHED);
return 0;
}
--
1.8.3.1
* Lidong Chen (jemmy858585@gmail.com) wrote:
> ibv_dereg_mr wait for a long time for big memory size virtual server.
>
> The test result is:
> 10GB 326ms
> 20GB 699ms
> 30GB 1021ms
> 40GB 1387ms
> 50GB 1712ms
> 60GB 2034ms
> 70GB 2457ms
> 80GB 2807ms
> 90GB 3107ms
> 100GB 3474ms
> 110GB 3735ms
> 120GB 4064ms
> 130GB 4567ms
> 140GB 4886ms
>
> this will cause the guest os hang for a while when migration finished.
> So create a dedicated thread to release rdma resource.
>
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
> migration/rdma.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index dfa4f77..1b9e261 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
> }
> }
>
> -static int qio_channel_rdma_close(QIOChannel *ioc,
> - Error **errp)
> +static void *qio_channel_rdma_close_thread(void *arg)
> {
> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> + QIOChannelRDMA *rioc = arg;
> RDMAContext *rdmain, *rdmaout;
> - trace_qemu_rdma_close();
> +
> + rcu_register_thread();
>
> rdmain = rioc->rdmain;
> if (rdmain) {
> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
> g_free(rdmain);
> g_free(rdmaout);
>
> + rcu_unregister_thread();
> + return NULL;
> +}
> +
> +static int qio_channel_rdma_close(QIOChannel *ioc,
> + Error **errp)
> +{
> + QemuThread t;
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> + trace_qemu_rdma_close();
> +
> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
> + rioc, QEMU_THREAD_DETACHED);
I don't think this can be this simple; consider the lock in patch 4;
now that lock means qui_channel_rdma_close() can't be called in
parallel; but with this change it means:
f->lock
qemu_thread_create (1)
!f->lock
f->lock
qemu_thread_create
!f->lock
so we don't really protect the thing you were trying to lock
Dave
> return 0;
> }
>
> --
> 1.8.3.1
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
> * Lidong Chen (jemmy858585@gmail.com) wrote:
>> ibv_dereg_mr wait for a long time for big memory size virtual server.
>>
>> The test result is:
>> 10GB 326ms
>> 20GB 699ms
>> 30GB 1021ms
>> 40GB 1387ms
>> 50GB 1712ms
>> 60GB 2034ms
>> 70GB 2457ms
>> 80GB 2807ms
>> 90GB 3107ms
>> 100GB 3474ms
>> 110GB 3735ms
>> 120GB 4064ms
>> 130GB 4567ms
>> 140GB 4886ms
>>
>> this will cause the guest os hang for a while when migration finished.
>> So create a dedicated thread to release rdma resource.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> ---
>> migration/rdma.c | 21 +++++++++++++++++----
>> 1 file changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index dfa4f77..1b9e261 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
>> }
>> }
>>
>> -static int qio_channel_rdma_close(QIOChannel *ioc,
>> - Error **errp)
>> +static void *qio_channel_rdma_close_thread(void *arg)
>> {
>> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> + QIOChannelRDMA *rioc = arg;
>> RDMAContext *rdmain, *rdmaout;
>> - trace_qemu_rdma_close();
>> +
>> + rcu_register_thread();
>>
>> rdmain = rioc->rdmain;
>> if (rdmain) {
>> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
>> g_free(rdmain);
>> g_free(rdmaout);
>>
>> + rcu_unregister_thread();
>> + return NULL;
>> +}
>> +
>> +static int qio_channel_rdma_close(QIOChannel *ioc,
>> + Error **errp)
>> +{
>> + QemuThread t;
>> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> + trace_qemu_rdma_close();
>> +
>> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>> + rioc, QEMU_THREAD_DETACHED);
>
> I don't think this can be this simple; consider the lock in patch 4;
> now that lock means qui_channel_rdma_close() can't be called in
> parallel; but with this change it means:
>
>
> f->lock
> qemu_thread_create (1)
> !f->lock
> f->lock
> qemu_thread_create
> !f->lock
>
> so we don't really protect the thing you were trying to lock
yes, I should not use rioc as the thread arg.
static int qio_channel_rdma_close(QIOChannel *ioc,
Error **errp)
{
QemuThread t;
RDMAContext *rdma[2];
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
trace_qemu_rdma_close();
if (rioc->rdmain || rioc->rdmaout) {
rdma[0] = rioc->rdmain;
rdma[1] = rioc->rdmaout;
qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
rdma, QEMU_THREAD_DETACHED);
rioc->rdmain = NULL;
rioc->rdmaout = NULL;
}
return 0;
}
>
> Dave
>
>> return 0;
>> }
>>
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* 858585 jemmy (jemmy858585@gmail.com) wrote:
> On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> > * Lidong Chen (jemmy858585@gmail.com) wrote:
> >> ibv_dereg_mr wait for a long time for big memory size virtual server.
> >>
> >> The test result is:
> >> 10GB 326ms
> >> 20GB 699ms
> >> 30GB 1021ms
> >> 40GB 1387ms
> >> 50GB 1712ms
> >> 60GB 2034ms
> >> 70GB 2457ms
> >> 80GB 2807ms
> >> 90GB 3107ms
> >> 100GB 3474ms
> >> 110GB 3735ms
> >> 120GB 4064ms
> >> 130GB 4567ms
> >> 140GB 4886ms
> >>
> >> this will cause the guest os hang for a while when migration finished.
> >> So create a dedicated thread to release rdma resource.
> >>
> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> >> ---
> >> migration/rdma.c | 21 +++++++++++++++++----
> >> 1 file changed, 17 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index dfa4f77..1b9e261 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
> >> }
> >> }
> >>
> >> -static int qio_channel_rdma_close(QIOChannel *ioc,
> >> - Error **errp)
> >> +static void *qio_channel_rdma_close_thread(void *arg)
> >> {
> >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> >> + QIOChannelRDMA *rioc = arg;
> >> RDMAContext *rdmain, *rdmaout;
> >> - trace_qemu_rdma_close();
> >> +
> >> + rcu_register_thread();
> >>
> >> rdmain = rioc->rdmain;
> >> if (rdmain) {
> >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
> >> g_free(rdmain);
> >> g_free(rdmaout);
> >>
> >> + rcu_unregister_thread();
> >> + return NULL;
> >> +}
> >> +
> >> +static int qio_channel_rdma_close(QIOChannel *ioc,
> >> + Error **errp)
> >> +{
> >> + QemuThread t;
> >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> >> + trace_qemu_rdma_close();
> >> +
> >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
> >> + rioc, QEMU_THREAD_DETACHED);
> >
> > I don't think this can be this simple; consider the lock in patch 4;
> > now that lock means qui_channel_rdma_close() can't be called in
> > parallel; but with this change it means:
> >
> >
> > f->lock
> > qemu_thread_create (1)
> > !f->lock
> > f->lock
> > qemu_thread_create
> > !f->lock
> >
> > so we don't really protect the thing you were trying to lock
>
> yes, I should not use rioc as the thread arg.
>
> static int qio_channel_rdma_close(QIOChannel *ioc,
> Error **errp)
> {
> QemuThread t;
> RDMAContext *rdma[2];
> QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>
> trace_qemu_rdma_close();
> if (rioc->rdmain || rioc->rdmaout) {
> rdma[0] = rioc->rdmain;
> rdma[1] = rioc->rdmaout;
> qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
> rdma, QEMU_THREAD_DETACHED);
> rioc->rdmain = NULL;
> rioc->rdmaout = NULL;
Is it safe to close both directions at once?
For example, if you get the close from the return path thread, might the
main thread be still using it's QEMUFile in the opposite direction;
it'll call close a little bit later?
Dave
> }
> return 0;
> }
>
> >
> > Dave
> >
> >> return 0;
> >> }
> >>
> >> --
> >> 1.8.3.1
> >>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, May 31, 2018 at 6:55 PM, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
> * 858585 jemmy (jemmy858585@gmail.com) wrote:
>> On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
>> <dgilbert@redhat.com> wrote:
>> > * Lidong Chen (jemmy858585@gmail.com) wrote:
>> >> ibv_dereg_mr wait for a long time for big memory size virtual server.
>> >>
>> >> The test result is:
>> >> 10GB 326ms
>> >> 20GB 699ms
>> >> 30GB 1021ms
>> >> 40GB 1387ms
>> >> 50GB 1712ms
>> >> 60GB 2034ms
>> >> 70GB 2457ms
>> >> 80GB 2807ms
>> >> 90GB 3107ms
>> >> 100GB 3474ms
>> >> 110GB 3735ms
>> >> 120GB 4064ms
>> >> 130GB 4567ms
>> >> 140GB 4886ms
>> >>
>> >> this will cause the guest os hang for a while when migration finished.
>> >> So create a dedicated thread to release rdma resource.
>> >>
>> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> >> ---
>> >> migration/rdma.c | 21 +++++++++++++++++----
>> >> 1 file changed, 17 insertions(+), 4 deletions(-)
>> >>
>> >> diff --git a/migration/rdma.c b/migration/rdma.c
>> >> index dfa4f77..1b9e261 100644
>> >> --- a/migration/rdma.c
>> >> +++ b/migration/rdma.c
>> >> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
>> >> }
>> >> }
>> >>
>> >> -static int qio_channel_rdma_close(QIOChannel *ioc,
>> >> - Error **errp)
>> >> +static void *qio_channel_rdma_close_thread(void *arg)
>> >> {
>> >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> >> + QIOChannelRDMA *rioc = arg;
>> >> RDMAContext *rdmain, *rdmaout;
>> >> - trace_qemu_rdma_close();
>> >> +
>> >> + rcu_register_thread();
>> >>
>> >> rdmain = rioc->rdmain;
>> >> if (rdmain) {
>> >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
>> >> g_free(rdmain);
>> >> g_free(rdmaout);
>> >>
>> >> + rcu_unregister_thread();
>> >> + return NULL;
>> >> +}
>> >> +
>> >> +static int qio_channel_rdma_close(QIOChannel *ioc,
>> >> + Error **errp)
>> >> +{
>> >> + QemuThread t;
>> >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> >> + trace_qemu_rdma_close();
>> >> +
>> >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>> >> + rioc, QEMU_THREAD_DETACHED);
>> >
>> > I don't think this can be this simple; consider the lock in patch 4;
>> > now that lock means qui_channel_rdma_close() can't be called in
>> > parallel; but with this change it means:
>> >
>> >
>> > f->lock
>> > qemu_thread_create (1)
>> > !f->lock
>> > f->lock
>> > qemu_thread_create
>> > !f->lock
>> >
>> > so we don't really protect the thing you were trying to lock
>>
>> yes, I should not use rioc as the thread arg.
>>
>> static int qio_channel_rdma_close(QIOChannel *ioc,
>> Error **errp)
>> {
>> QemuThread t;
>> RDMAContext *rdma[2];
>> QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>>
>> trace_qemu_rdma_close();
>> if (rioc->rdmain || rioc->rdmaout) {
>> rdma[0] = rioc->rdmain;
>> rdma[1] = rioc->rdmaout;
>> qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>> rdma, QEMU_THREAD_DETACHED);
>> rioc->rdmain = NULL;
>> rioc->rdmaout = NULL;
>
> Is it safe to close both directions at once?
> For example, if you get the close from the return path thread, might the
> main thread be still using it's QEMUFile in the opposite direction;
> it'll call close a little bit later?
I use rcu to protect this. qio_channel_rdma_close_thread call synchronize_rcu,
it will wait until all other thread not access rdmain and rdmaout.
And if the return patch close the qemu file, the migration thread qemu
file will be set error soon
because the QIOChannel is closed. QIOChannelSocket also work this way.
>
> Dave
>
>> }
>> return 0;
>> }
>>
>> >
>> > Dave
>> >
>> >> return 0;
>> >> }
>> >>
>> >> --
>> >> 1.8.3.1
>> >>
>> > --
>> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
© 2016 - 2025 Red Hat, Inc.