[PULL 08/14] migration: add new migration state wait-unplug

Michael S. Tsirkin posted 14 patches 6 years, 3 months ago
[PULL 08/14] migration: add new migration state wait-unplug
Posted by Michael S. Tsirkin 6 years, 3 months ago
From: Jens Freimann <jfreimann@redhat.com>

This patch adds a new migration state called wait-unplug.  It is entered
after the SETUP state if failover devices are present. It will transition
into ACTIVE once all devices were succesfully unplugged from the guest.

So if a guest doesn't respond or takes long to honor the unplug request
the user will see the migration state 'wait-unplug'.

In the migration thread we query failover devices if they're are still
pending the guest unplug. When all are unplugged the migration
continues. If one device won't unplug migration will stay in wait_unplug
state.

Signed-off-by: Jens Freimann <jfreimann@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20191029114905.6856-9-jfreimann@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/migration/vmstate.h |  2 ++
 migration/migration.c       | 21 +++++++++++++++++++++
 migration/migration.h       |  3 +++
 migration/savevm.c          | 31 +++++++++++++++++++++++++++++++
 migration/savevm.h          |  2 ++
 qapi/migration.json         |  5 ++++-
 6 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index b9ee563aa4..ac4f46a67d 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -186,6 +186,8 @@ struct VMStateDescription {
     int (*pre_save)(void *opaque);
     int (*post_save)(void *opaque);
     bool (*needed)(void *opaque);
+    bool (*dev_unplug_pending)(void *opaque);
+
     const VMStateField *fields;
     const VMStateDescription **subsections;
 };
diff --git a/migration/migration.c b/migration/migration.c
index 4133ed2684..354ad072fa 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -52,6 +52,7 @@
 #include "hw/qdev-properties.h"
 #include "monitor/monitor.h"
 #include "net/announce.h"
+#include "qemu/queue.h"
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
 
@@ -819,6 +820,7 @@ bool migration_is_setup_or_active(int state)
     case MIGRATION_STATUS_SETUP:
     case MIGRATION_STATUS_PRE_SWITCHOVER:
     case MIGRATION_STATUS_DEVICE:
+    case MIGRATION_STATUS_WAIT_UNPLUG:
         return true;
 
     default:
@@ -954,6 +956,9 @@ static void fill_source_migration_info(MigrationInfo *info)
     case MIGRATION_STATUS_CANCELLED:
         info->has_status = true;
         break;
+    case MIGRATION_STATUS_WAIT_UNPLUG:
+        info->has_status = true;
+        break;
     }
     info->status = s->state;
 }
@@ -1694,6 +1699,7 @@ bool migration_is_idle(void)
     case MIGRATION_STATUS_COLO:
     case MIGRATION_STATUS_PRE_SWITCHOVER:
     case MIGRATION_STATUS_DEVICE:
+    case MIGRATION_STATUS_WAIT_UNPLUG:
         return false;
     case MIGRATION_STATUS__MAX:
         g_assert_not_reached();
@@ -3264,6 +3270,19 @@ static void *migration_thread(void *opaque)
 
     qemu_savevm_state_setup(s->to_dst_file);
 
+    if (qemu_savevm_nr_failover_devices()) {
+        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                          MIGRATION_STATUS_WAIT_UNPLUG);
+
+        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+               qemu_savevm_state_guest_unplug_pending()) {
+            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+        }
+
+        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+                MIGRATION_STATUS_ACTIVE);
+    }
+
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                       MIGRATION_STATUS_ACTIVE);
@@ -3511,6 +3530,7 @@ static void migration_instance_finalize(Object *obj)
     qemu_mutex_destroy(&ms->qemu_file_lock);
     g_free(params->tls_hostname);
     g_free(params->tls_creds);
+    qemu_sem_destroy(&ms->wait_unplug_sem);
     qemu_sem_destroy(&ms->rate_limit_sem);
     qemu_sem_destroy(&ms->pause_sem);
     qemu_sem_destroy(&ms->postcopy_pause_sem);
@@ -3556,6 +3576,7 @@ static void migration_instance_init(Object *obj)
     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
     qemu_sem_init(&ms->rp_state.rp_sem, 0);
     qemu_sem_init(&ms->rate_limit_sem, 0);
+    qemu_sem_init(&ms->wait_unplug_sem, 0);
     qemu_mutex_init(&ms->qemu_file_lock);
 }
 
diff --git a/migration/migration.h b/migration/migration.h
index 4f2fe193dc..79b3dda146 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -206,6 +206,9 @@ struct MigrationState
     /* Flag set once the migration thread called bdrv_inactivate_all */
     bool block_inactive;
 
+    /* Migration is waiting for guest to unplug device */
+    QemuSemaphore wait_unplug_sem;
+
     /* Migration is paused due to pause-before-switchover */
     QemuSemaphore pause_sem;
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 8d95e261f6..966a9c3bdb 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1113,6 +1113,37 @@ void qemu_savevm_state_header(QEMUFile *f)
     }
 }
 
+int qemu_savevm_nr_failover_devices(void)
+{
+    SaveStateEntry *se;
+    int n = 0;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (se->vmsd && se->vmsd->dev_unplug_pending) {
+            n++;
+        }
+    }
+
+    return n;
+}
+
+bool qemu_savevm_state_guest_unplug_pending(void)
+{
+    SaveStateEntry *se;
+    int n = 0;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->vmsd || !se->vmsd->dev_unplug_pending) {
+            continue;
+        }
+        if (se->vmsd->dev_unplug_pending(se->opaque)) {
+            n++;
+        }
+    }
+
+    return n > 0;
+}
+
 void qemu_savevm_state_setup(QEMUFile *f)
 {
     SaveStateEntry *se;
diff --git a/migration/savevm.h b/migration/savevm.h
index 51a4b9caa8..c42b9c80ee 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -31,6 +31,8 @@
 
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_nr_failover_devices(void);
+bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
diff --git a/qapi/migration.json b/qapi/migration.json
index e9e7a97c03..b7348d0c8b 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -133,6 +133,9 @@
 # @device: During device serialisation when pause-before-switchover is enabled
 #        (since 2.11)
 #
+# @wait-unplug: wait for device unplug request by guest OS to be completed.
+#               (since 4.2)
+#
 # Since: 2.3
 #
 ##
@@ -140,7 +143,7 @@
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
             'active', 'postcopy-active', 'postcopy-paused',
             'postcopy-recover', 'completed', 'failed', 'colo',
-            'pre-switchover', 'device' ] }
+            'pre-switchover', 'device', 'wait-unplug' ] }
 
 ##
 # @MigrationInfo:
-- 
MST


Re: [PULL 08/14] migration: add new migration state wait-unplug
Posted by Peter Maydell 5 years, 7 months ago
On Tue, 29 Oct 2019 at 23:00, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> From: Jens Freimann <jfreimann@redhat.com>
>
> This patch adds a new migration state called wait-unplug.  It is entered
> after the SETUP state if failover devices are present. It will transition
> into ACTIVE once all devices were succesfully unplugged from the guest.
>
> So if a guest doesn't respond or takes long to honor the unplug request
> the user will see the migration state 'wait-unplug'.
>
> In the migration thread we query failover devices if they're are still
> pending the guest unplug. When all are unplugged the migration
> continues. If one device won't unplug migration will stay in wait_unplug
> state.
>
> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Message-Id: <20191029114905.6856-9-jfreimann@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---

Hi; Coverity has just (rather belatedly) noticed a possible
issue in this code (CID 1429995):

> @@ -3264,6 +3270,19 @@ static void *migration_thread(void *opaque)
>
>      qemu_savevm_state_setup(s->to_dst_file);
>
> +    if (qemu_savevm_nr_failover_devices()) {
> +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> +                          MIGRATION_STATUS_WAIT_UNPLUG);
> +
> +        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
> +               qemu_savevm_state_guest_unplug_pending()) {
> +            qemu_sem_timedwait(&s->wait_unplug_sem, 250);

Here we call qemu_sem_timedwait() but ignore the return value,
whereas all the other callsites for that function do something
with the return value. Is the code correct? (This is just a
heuristic Coverity has, and it's wrong a fair amount of the
time, so if it's wrong here too I can just mark it as a
false-positive in the Coverity UI.)

thanks
-- PMM

Re: [PULL 08/14] migration: add new migration state wait-unplug
Posted by Dr. David Alan Gilbert 5 years, 7 months ago
* Peter Maydell (peter.maydell@linaro.org) wrote:
> On Tue, 29 Oct 2019 at 23:00, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > From: Jens Freimann <jfreimann@redhat.com>
> >
> > This patch adds a new migration state called wait-unplug.  It is entered
> > after the SETUP state if failover devices are present. It will transition
> > into ACTIVE once all devices were succesfully unplugged from the guest.
> >
> > So if a guest doesn't respond or takes long to honor the unplug request
> > the user will see the migration state 'wait-unplug'.
> >
> > In the migration thread we query failover devices if they're are still
> > pending the guest unplug. When all are unplugged the migration
> > continues. If one device won't unplug migration will stay in wait_unplug
> > state.
> >
> > Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Message-Id: <20191029114905.6856-9-jfreimann@redhat.com>
> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> 
> Hi; Coverity has just (rather belatedly) noticed a possible
> issue in this code (CID 1429995):
> 
> > @@ -3264,6 +3270,19 @@ static void *migration_thread(void *opaque)
> >
> >      qemu_savevm_state_setup(s->to_dst_file);
> >
> > +    if (qemu_savevm_nr_failover_devices()) {
> > +        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> > +                          MIGRATION_STATUS_WAIT_UNPLUG);
> > +
> > +        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
> > +               qemu_savevm_state_guest_unplug_pending()) {
> > +            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
> 
> Here we call qemu_sem_timedwait() but ignore the return value,
> whereas all the other callsites for that function do something
> with the return value. Is the code correct? (This is just a
> heuristic Coverity has, and it's wrong a fair amount of the
> time, so if it's wrong here too I can just mark it as a
> false-positive in the Coverity UI.)

Hmm it's OK; that semaphore isn't really used at the moment,
so it's pretty much just a sleep. And the loop always
calls the qemu_savevm_state_guest_unplug_pending() before
it goes around again; so it doesn't care if the return
value indicates timeout or not.

Dave

> thanks
> -- PMM
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


Re: [PULL 08/14] migration: add new migration state wait-unplug
Posted by Peter Maydell 5 years, 7 months ago
On Mon, 29 Jun 2020 at 13:09, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Peter Maydell (peter.maydell@linaro.org) wrote:
> > Here we call qemu_sem_timedwait() but ignore the return value,
> > whereas all the other callsites for that function do something
> > with the return value. Is the code correct? (This is just a
> > heuristic Coverity has, and it's wrong a fair amount of the
> > time, so if it's wrong here too I can just mark it as a
> > false-positive in the Coverity UI.)
>
> Hmm it's OK; that semaphore isn't really used at the moment,
> so it's pretty much just a sleep. And the loop always
> calls the qemu_savevm_state_guest_unplug_pending() before
> it goes around again; so it doesn't care if the return
> value indicates timeout or not.

Thanks; have marked it as a false-positive in the UI.

-- PMM