[Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side

Juan Quintela posted 19 patches 8 years, 6 months ago
There is a newer version of this series
[Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Juan Quintela 8 years, 6 months ago
We make the locking and the transfer of information specific, even if we
are still receiving things through the main thread.

Signed-off-by: Juan Quintela <quintela@redhat.com>

--

We split when we create the main channel and where we start the main
migration thread, so we wait for the creation of the other threads.

Use multifd_clear_group().
---
 migration/migration.c |  7 ++++---
 migration/migration.h |  1 +
 migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
 migration/socket.c    |  2 +-
 4 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 8e9505a..b78dffc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -381,7 +381,7 @@ static void migration_incoming_setup(QEMUFile *f)
     qemu_file_set_blocking(f, false);
 }
 
-static void migration_incoming_process(void)
+void migration_incoming_process(void)
 {
     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
     qemu_coroutine_enter(co);
@@ -400,9 +400,10 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
     if (!mis->from_src_file) {
         QEMUFile *f = qemu_fopen_channel_input(ioc);
         mis->from_src_file = f;
-        migration_fd_process_incoming(f);
+        migration_incoming_setup(f);
+        return;
     }
-    /* We still only have a single channel.  Nothing to do here yet */
+    multifd_new_channel(ioc);
 }
 
 /**
diff --git a/migration/migration.h b/migration/migration.h
index ddf8c89..1442b33 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -154,6 +154,7 @@ void migrate_set_state(int *state, int old_state, int new_state);
 
 void migration_fd_process_incoming(QEMUFile *f);
 void migration_ioc_process_incoming(QIOChannel *ioc);
+void migration_incoming_process(void);
 
 bool  migration_has_all_channels(void);
 
diff --git a/migration/ram.c b/migration/ram.c
index 169cfc9..eb0015e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -572,12 +572,17 @@ static uint16_t multifd_send_page(uint8_t *address, bool last_page)
 }
 
 struct MultiFDRecvParams {
+    /* not changed */
     uint8_t id;
     QemuThread thread;
     QIOChannel *c;
+    QemuSemaphore ready;
     QemuSemaphore sem;
     QemuMutex mutex;
+    /* proteced by param mutex */
     bool quit;
+    multifd_pages_t pages;
+    bool done;
 };
 typedef struct MultiFDRecvParams MultiFDRecvParams;
 
@@ -587,6 +592,7 @@ struct {
     int count;
     /* Should we finish */
     bool quit;
+    multifd_pages_t pages;
 } *multifd_recv_state;
 
 static void terminate_multifd_recv_threads(void)
@@ -602,6 +608,7 @@ static void terminate_multifd_recv_threads(void)
         p->quit = true;
         qemu_sem_post(&p->sem);
         qemu_mutex_unlock(&p->mutex);
+        multifd_clear_group(&p->pages);
     }
 }
 
@@ -625,6 +632,7 @@ void multifd_load_cleanup(void)
     }
     g_free(multifd_recv_state->params);
     multifd_recv_state->params = NULL;
+    multifd_clear_group(&multifd_recv_state->pages);
     g_free(multifd_recv_state);
     multifd_recv_state = NULL;
 }
@@ -633,12 +641,20 @@ static void *multifd_recv_thread(void *opaque)
 {
     MultiFDRecvParams *p = opaque;
 
+    qemu_sem_post(&p->ready);
     while (true) {
         qemu_mutex_lock(&p->mutex);
         if (p->quit) {
             qemu_mutex_unlock(&p->mutex);
             break;
         }
+        if (p->pages.num) {
+            p->pages.num = 0;
+            p->done = true;
+            qemu_mutex_unlock(&p->mutex);
+            qemu_sem_post(&p->ready);
+            continue;
+        }
         qemu_mutex_unlock(&p->mutex);
         qemu_sem_wait(&p->sem);
     }
@@ -682,8 +698,11 @@ void multifd_new_channel(QIOChannel *ioc)
     }
     qemu_mutex_init(&p->mutex);
     qemu_sem_init(&p->sem, 0);
+    qemu_sem_init(&p->ready, 0);
     p->quit = false;
     p->id = id;
+    p->done = false;
+    multifd_init_group(&p->pages);
     p->c = ioc;
     atomic_set(&multifd_recv_state->params[id], p);
     multifd_recv_state->count++;
@@ -703,6 +722,7 @@ int multifd_load_setup(void)
     multifd_recv_state->params = g_new0(MultiFDRecvParams *, thread_count);
     multifd_recv_state->count = 0;
     multifd_recv_state->quit = false;
+    multifd_init_group(&multifd_recv_state->pages);
     return 0;
 }
 
@@ -711,6 +731,36 @@ int multifd_created_threads(void)
     return multifd_recv_state->count;
 }
 
+static void multifd_recv_page(uint8_t *address, uint16_t fd_num)
+{
+    int thread_count;
+    MultiFDRecvParams *p;
+    multifd_pages_t *pages = &multifd_recv_state->pages;
+
+    pages->iov[pages->num].iov_base = address;
+    pages->iov[pages->num].iov_len = TARGET_PAGE_SIZE;
+    pages->num++;
+
+    if (fd_num == MULTIFD_CONTINUE) {
+        return;
+    }
+
+    thread_count = migrate_multifd_threads();
+    assert(fd_num < thread_count);
+    p = multifd_recv_state->params[fd_num];
+
+    qemu_sem_wait(&p->ready);
+
+    qemu_mutex_lock(&p->mutex);
+    p->done = false;
+    iov_copy(p->pages.iov, pages->num, pages->iov, pages->num, 0,
+             iov_size(pages->iov, pages->num));
+    p->pages.num = pages->num;
+    pages->num = 0;
+    qemu_mutex_unlock(&p->mutex);
+    qemu_sem_post(&p->sem);
+}
+
 /**
  * save_page_header: write page header to wire
  *
@@ -3022,10 +3072,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
         case RAM_SAVE_FLAG_MULTIFD_PAGE:
             fd_num = qemu_get_be16(f);
-            if (fd_num != 0) {
-                /* this is yet an unused variable, changed later */
-                fd_num = fd_num;
-            }
+            multifd_recv_page(host, fd_num);
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
             break;
 
diff --git a/migration/socket.c b/migration/socket.c
index 5dd6f42..3af9f7c 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -183,12 +183,12 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
 
     qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
     migration_channel_process_incoming(QIO_CHANNEL(sioc));
-    object_unref(OBJECT(sioc));
 
 out:
     if (migration_has_all_channels()) {
         /* Close listening socket as its no longer needed */
         qio_channel_close(ioc, NULL);
+        migration_incoming_process();
         return G_SOURCE_REMOVE;
     } else {
         return G_SOURCE_CONTINUE;
-- 
2.9.4


Re: [Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Peter Xu 8 years, 6 months ago
On Tue, Aug 08, 2017 at 06:26:25PM +0200, Juan Quintela wrote:
> We make the locking and the transfer of information specific, even if we
> are still receiving things through the main thread.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> 
> --
> 
> We split when we create the main channel and where we start the main
> migration thread, so we wait for the creation of the other threads.
> 
> Use multifd_clear_group().
> ---
>  migration/migration.c |  7 ++++---
>  migration/migration.h |  1 +
>  migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
>  migration/socket.c    |  2 +-
>  4 files changed, 57 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 8e9505a..b78dffc 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -381,7 +381,7 @@ static void migration_incoming_setup(QEMUFile *f)
>      qemu_file_set_blocking(f, false);
>  }
>  
> -static void migration_incoming_process(void)
> +void migration_incoming_process(void)
>  {
>      Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
>      qemu_coroutine_enter(co);
> @@ -400,9 +400,10 @@ void migration_ioc_process_incoming(QIOChannel *ioc)
>      if (!mis->from_src_file) {
>          QEMUFile *f = qemu_fopen_channel_input(ioc);
>          mis->from_src_file = f;
> -        migration_fd_process_incoming(f);
> +        migration_incoming_setup(f);

Here now we only setup the incoming channels, but not processing it
any more. Then would it be good we rename the function name as well?
The old "migration_ioc_process_incoming" has hints that it processed
something... And...

> +        return;
>      }
> -    /* We still only have a single channel.  Nothing to do here yet */
> +    multifd_new_channel(ioc);
>  }

[...]

> @@ -183,12 +183,12 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
>  
>      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
>      migration_channel_process_incoming(QIO_CHANNEL(sioc));
> -    object_unref(OBJECT(sioc));
>  
>  out:
>      if (migration_has_all_channels()) {
>          /* Close listening socket as its no longer needed */
>          qio_channel_close(ioc, NULL);
> +        migration_incoming_process();

... here we only added migration_incoming_process() for sockets. Would
that break fd/exec migration?

Thanks,

>          return G_SOURCE_REMOVE;
>      } else {
>          return G_SOURCE_CONTINUE;

-- 
Peter Xu

Re: [Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Daniel P. Berrange 8 years, 6 months ago
On Tue, Aug 08, 2017 at 06:26:25PM +0200, Juan Quintela wrote:
> We make the locking and the transfer of information specific, even if we
> are still receiving things through the main thread.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> 
> --
> 
> We split when we create the main channel and where we start the main
> migration thread, so we wait for the creation of the other threads.
> 
> Use multifd_clear_group().
> ---
>  migration/migration.c |  7 ++++---
>  migration/migration.h |  1 +
>  migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
>  migration/socket.c    |  2 +-
>  4 files changed, 57 insertions(+), 8 deletions(-)


> diff --git a/migration/socket.c b/migration/socket.c
> index 5dd6f42..3af9f7c 100644
> --- a/migration/socket.c
> +++ b/migration/socket.c
> @@ -183,12 +183,12 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
>  
>      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
>      migration_channel_process_incoming(QIO_CHANNEL(sioc));
> -    object_unref(OBJECT(sioc));

AFAICT, migration_channel_process_incoming() acquires its own reference
on 'sioc', so removing this object_unref means the code is now leaking a
reference

>  
>  out:
>      if (migration_has_all_channels()) {
>          /* Close listening socket as its no longer needed */
>          qio_channel_close(ioc, NULL);
> +        migration_incoming_process();
>          return G_SOURCE_REMOVE;
>      } else {
>          return G_SOURCE_CONTINUE;
> -- 
> 2.9.4
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Juan Quintela 8 years, 4 months ago
"Daniel P. Berrange" <berrange@redhat.com> wrote:
> On Tue, Aug 08, 2017 at 06:26:25PM +0200, Juan Quintela wrote:
>> We make the locking and the transfer of information specific, even if we
>> are still receiving things through the main thread.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> 
>> --
>> 
>> We split when we create the main channel and where we start the main
>> migration thread, so we wait for the creation of the other threads.
>> 
>> Use multifd_clear_group().
>> ---
>>  migration/migration.c |  7 ++++---
>>  migration/migration.h |  1 +
>>  migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
>>  migration/socket.c    |  2 +-
>>  4 files changed, 57 insertions(+), 8 deletions(-)
>
>
>> diff --git a/migration/socket.c b/migration/socket.c
>> index 5dd6f42..3af9f7c 100644
>> --- a/migration/socket.c
>> +++ b/migration/socket.c
>> @@ -183,12 +183,12 @@ static gboolean
>> socket_accept_incoming_migration(QIOChannel *ioc,
>>  
>>      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
>>      migration_channel_process_incoming(QIO_CHANNEL(sioc));
>> -    object_unref(OBJECT(sioc));
>
> AFAICT, migration_channel_process_incoming() acquires its own reference
> on 'sioc', so removing this object_unref means the code is now leaking a
> reference

Done.

Thanks, Juan.

Re: [Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Juan Quintela 8 years, 4 months ago
"Daniel P. Berrange" <berrange@redhat.com> wrote:
> On Tue, Aug 08, 2017 at 06:26:25PM +0200, Juan Quintela wrote:
>> We make the locking and the transfer of information specific, even if we
>> are still receiving things through the main thread.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> 
>> --
>> 
>> We split when we create the main channel and where we start the main
>> migration thread, so we wait for the creation of the other threads.
>> 
>> Use multifd_clear_group().
>> ---
>>  migration/migration.c |  7 ++++---
>>  migration/migration.h |  1 +
>>  migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
>>  migration/socket.c    |  2 +-
>>  4 files changed, 57 insertions(+), 8 deletions(-)
>
>
>> diff --git a/migration/socket.c b/migration/socket.c
>> index 5dd6f42..3af9f7c 100644
>> --- a/migration/socket.c
>> +++ b/migration/socket.c
>> @@ -183,12 +183,12 @@ static gboolean
>> socket_accept_incoming_migration(QIOChannel *ioc,
>>  
>>      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
>>      migration_channel_process_incoming(QIO_CHANNEL(sioc));
>> -    object_unref(OBJECT(sioc));
>
> AFAICT, migration_channel_process_incoming() acquires its own reference
> on 'sioc', so removing this object_unref means the code is now leaking a
> reference

Nack.

I did it and ended with a segmentation fault because reference count was
bad.

(qemu) 
Thread 6 "multifdrecv_0" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fff9f9ff700 (LWP 10065)]
0x00005555559c12f8 in type_is_ancestor (type=0xb60f18247c894860, 
    target_type=0x555556531b30) at /mnt/kvm/qemu/cleanup/qom/object.c:217
217	    while (type) {
(gdb) bt
#0  0x00005555559c12f8 in type_is_ancestor (type=0xb60f18247c894860, target_type=0x555556531b30) at /mnt/kvm/qemu/cleanup/qom/object.c:217
#1  0x00005555559c1dbb in object_class_dynamic_cast (class=class@entry=0x7ffff2cb3ce8 <main_arena+520>, typename=typename@entry=0x555555aa6d91 "qio-channel")
    at /mnt/kvm/qemu/cleanup/qom/object.c:691
#2  0x00005555559c1ed2 in object_class_dynamic_cast_assert (class=0x7ffff2cb3ce8 <main_arena+520>, typename=typename@entry=0x555555aa6d91 "qio-channel", file=file@entry=0x555555b88208 "/mnt/kvm/qemu/cleanup/io/channel.c", line=line@entry=56, func=func@entry=0x555555b884e0 <__func__.22671> "qio_channel_readv_full")
    at /mnt/kvm/qemu/cleanup/qom/object.c:723
#3  0x0000555555a3d4d7 in qio_channel_readv_full (ioc=0x5555568c5a00, iov=0x7fff900008c0, niov=15, fds=0x0, nfds=0x0, errp=0x7fff9f9fe950)
    at /mnt/kvm/qemu/cleanup/io/channel.c:56
#4  0x0000555555a3ddc2 in qio_channel_readv (errp=0x7fff9f9fe950, niov=<optimized out>, iov=<optimized out>, ioc=0x5555568c5a00)
    at /mnt/kvm/qemu/cleanup/io/channel.c:197
#5  0x0000555555a3ddc2 in qio_channel_readv_all_eof (ioc=0x5555568c5a00, iov=<optimized out>, niov=<optimized out>, errp=errp@entry=0x7fff9f9fe950)
    at /mnt/kvm/qemu/cleanup/io/channel.c:106
#6  0x0000555555a3de79 in qio_channel_readv_all (ioc=<optimized out>, iov=<optimized out>, niov=<optimized out>, errp=errp@entry=0x7fff9f9fe950)
    at /mnt/kvm/qemu/cleanup/io/channel.c:142
#7  0x0000555555794768 in multifd_recv_thread (opaque=0x5555570e5e00)
---Type <return> to continue, or q <return> to quit---up
    at /mnt/kvm/qemu/cleanup/migration/ram.c:722
#8  0x00007ffff2cc036d in start_thread (arg=0x7fff9f9ff700)
    at pthread_create.c:456
#9  0x00007ffff29f8bbf in clone ()
    at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
(gdb) up

Removing this unref fixed things, so I think that the accounting is good
(famous last words).

Later, Juan.

Re: [Qemu-devel] [PATCH v6 15/19] migration: Create thread infrastructure for multifd recv side
Posted by Daniel P. Berrange 8 years, 4 months ago
On Wed, Sep 13, 2017 at 01:26:07PM +0200, Juan Quintela wrote:
> "Daniel P. Berrange" <berrange@redhat.com> wrote:
> > On Tue, Aug 08, 2017 at 06:26:25PM +0200, Juan Quintela wrote:
> >> We make the locking and the transfer of information specific, even if we
> >> are still receiving things through the main thread.
> >> 
> >> Signed-off-by: Juan Quintela <quintela@redhat.com>
> >> 
> >> --
> >> 
> >> We split when we create the main channel and where we start the main
> >> migration thread, so we wait for the creation of the other threads.
> >> 
> >> Use multifd_clear_group().
> >> ---
> >>  migration/migration.c |  7 ++++---
> >>  migration/migration.h |  1 +
> >>  migration/ram.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++++----
> >>  migration/socket.c    |  2 +-
> >>  4 files changed, 57 insertions(+), 8 deletions(-)
> >
> >
> >> diff --git a/migration/socket.c b/migration/socket.c
> >> index 5dd6f42..3af9f7c 100644
> >> --- a/migration/socket.c
> >> +++ b/migration/socket.c
> >> @@ -183,12 +183,12 @@ static gboolean
> >> socket_accept_incoming_migration(QIOChannel *ioc,
> >>  
> >>      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
> >>      migration_channel_process_incoming(QIO_CHANNEL(sioc));
> >> -    object_unref(OBJECT(sioc));
> >
> > AFAICT, migration_channel_process_incoming() acquires its own reference
> > on 'sioc', so removing this object_unref means the code is now leaking a
> > reference
> 
> Nack.
> 
> I did it and ended with a segmentation fault because reference count was
> bad.
> 
> (qemu) 
> Thread 6 "multifdrecv_0" received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fff9f9ff700 (LWP 10065)]
> 0x00005555559c12f8 in type_is_ancestor (type=0xb60f18247c894860, 
>     target_type=0x555556531b30) at /mnt/kvm/qemu/cleanup/qom/object.c:217
> 217	    while (type) {
> (gdb) bt
> #0  0x00005555559c12f8 in type_is_ancestor (type=0xb60f18247c894860, target_type=0x555556531b30) at /mnt/kvm/qemu/cleanup/qom/object.c:217
> #1  0x00005555559c1dbb in object_class_dynamic_cast (class=class@entry=0x7ffff2cb3ce8 <main_arena+520>, typename=typename@entry=0x555555aa6d91 "qio-channel")
>     at /mnt/kvm/qemu/cleanup/qom/object.c:691
> #2  0x00005555559c1ed2 in object_class_dynamic_cast_assert (class=0x7ffff2cb3ce8 <main_arena+520>, typename=typename@entry=0x555555aa6d91 "qio-channel", file=file@entry=0x555555b88208 "/mnt/kvm/qemu/cleanup/io/channel.c", line=line@entry=56, func=func@entry=0x555555b884e0 <__func__.22671> "qio_channel_readv_full")
>     at /mnt/kvm/qemu/cleanup/qom/object.c:723
> #3  0x0000555555a3d4d7 in qio_channel_readv_full (ioc=0x5555568c5a00, iov=0x7fff900008c0, niov=15, fds=0x0, nfds=0x0, errp=0x7fff9f9fe950)
>     at /mnt/kvm/qemu/cleanup/io/channel.c:56
> #4  0x0000555555a3ddc2 in qio_channel_readv (errp=0x7fff9f9fe950, niov=<optimized out>, iov=<optimized out>, ioc=0x5555568c5a00)
>     at /mnt/kvm/qemu/cleanup/io/channel.c:197
> #5  0x0000555555a3ddc2 in qio_channel_readv_all_eof (ioc=0x5555568c5a00, iov=<optimized out>, niov=<optimized out>, errp=errp@entry=0x7fff9f9fe950)
>     at /mnt/kvm/qemu/cleanup/io/channel.c:106
> #6  0x0000555555a3de79 in qio_channel_readv_all (ioc=<optimized out>, iov=<optimized out>, niov=<optimized out>, errp=errp@entry=0x7fff9f9fe950)
>     at /mnt/kvm/qemu/cleanup/io/channel.c:142
> #7  0x0000555555794768 in multifd_recv_thread (opaque=0x5555570e5e00)
> ---Type <return> to continue, or q <return> to quit---up
>     at /mnt/kvm/qemu/cleanup/migration/ram.c:722
> #8  0x00007ffff2cc036d in start_thread (arg=0x7fff9f9ff700)
>     at pthread_create.c:456
> #9  0x00007ffff29f8bbf in clone ()
>     at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) up
> 
> Removing this unref fixed things, so I think that the accounting is good
> (famous last words).

No, this is just papering over a bug elsewhere. The multifd_new_channel
metohd needs to be acquiring a reference for the multifd_recv_thread to
hold onto.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|