[PATCH 09/26] hypervisor: expand available shutdown actions

Daniel P. Berrangé posted 26 patches 1 week, 5 days ago
[PATCH 09/26] hypervisor: expand available shutdown actions
Posted by Daniel P. Berrangé 1 week, 5 days ago
The auto shutdown code can currently only perform managed save,
which may fail in some cases, for example when PCI devices are
assigned. On failure, shutdown inhibitors remain in place which
may be undesirable.

This expands the logic to try a sequence of operations

 * Managed save
 * Graceful shutdown
 * Forced poweroff

Each of these operations can be enabled or disabled, but they
are always applied in this order.

With the shutdown option, a configurable time is allowed for
shutdown to complete, defaulting to 30 seconds, before moving
onto the forced poweroff phase.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 src/hypervisor/domain_driver.c | 113 +++++++++++++++++++++++++++------
 src/hypervisor/domain_driver.h |   4 ++
 src/qemu/qemu_driver.c         |   3 +
 3 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/src/hypervisor/domain_driver.c b/src/hypervisor/domain_driver.c
index 949e3d01f1..ea3f1cbfcd 100644
--- a/src/hypervisor/domain_driver.c
+++ b/src/hypervisor/domain_driver.c
@@ -714,9 +714,26 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg)
     g_autoptr(virConnect) conn = NULL;
     int numDomains = 0;
     size_t i;
-    int state;
     virDomainPtr *domains = NULL;
-    g_autofree unsigned int *flags = NULL;
+
+    VIR_DEBUG("Run autoshutdown uri=%s trySave=%d tryShutdown=%d poweroff=%d"
+              "waitShutdownSecs=%d",
+              cfg->uri, cfg->trySave, cfg->tryShutdown, cfg->poweroff,
+              cfg->waitShutdownSecs);
+
+    /*
+     * Ideally guests will shutdown in a few seconds, but it would
+     * not be unsual for it to take a while longer, especially under
+     * load, or if the guest OS has inhibitors slowing down shutdown.
+     *
+     * If we wait too long, then guests which ignore the shutdown
+     * request will significantly delay host shutdown.
+     *
+     * Pick 30 seconds as a moderately safe default, assuming that
+     * most guests are well behaved.
+     */
+    if (cfg->waitShutdownSecs <= 0)
+        cfg->waitShutdownSecs = 30;
 
     if (!(conn = virConnectOpen(cfg->uri)))
         goto cleanup;
@@ -726,31 +743,87 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg)
                                                VIR_CONNECT_LIST_DOMAINS_ACTIVE)) < 0)
         goto cleanup;
 
-    flags = g_new0(unsigned int, numDomains);
+    VIR_DEBUG("Auto shutdown with %d running domains", numDomains);
+    if (cfg->trySave) {
+        g_autofree unsigned int *flags = g_new0(unsigned int, numDomains);
+        for (i = 0; i < numDomains; i++) {
+            int state;
+            /*
+             * Pause all VMs to make them stop dirtying pages,
+             * so save is quicker. We remember if any VMs were
+             * paused so we can restore that on resume.
+             */
+            flags[i] = VIR_DOMAIN_SAVE_RUNNING;
+            if (virDomainGetState(domains[i], &state, NULL, 0) == 0) {
+                if (state == VIR_DOMAIN_PAUSED)
+                    flags[i] = VIR_DOMAIN_SAVE_PAUSED;
+            }
+            virDomainSuspend(domains[i]);
+        }
+
+        for (i = 0; i < numDomains; i++) {
+            if (virDomainManagedSave(domains[i], flags[i]) < 0) {
+                VIR_WARN("Unable to perform managed save of '%s': %s",
+                         virDomainGetName(domains[i]),
+                         virGetLastErrorMessage());
+                continue;
+            }
+            virObjectUnref(domains[i]);
+            domains[i] = NULL;
+        }
+    }
+
+    if (cfg->tryShutdown) {
+        GTimer *timer = NULL;
+        for (i = 0; i < numDomains; i++) {
+            if (domains[i] == NULL)
+                continue;
+            if (virDomainShutdown(domains[i]) < 0) {
+                VIR_WARN("Unable to perform graceful shutdown of '%s': %s",
+                         virDomainGetName(domains[i]),
+                         virGetLastErrorMessage());
+                break;
+            }
+        }
+
+        timer = g_timer_new();
+        while (1) {
+            bool anyRunning = false;
+            for (i = 0; i < numDomains; i++) {
+                if (!domains[i])
+                    continue;
 
-    /* First we pause all VMs to make them stop dirtying
-       pages, etc. We remember if any VMs were paused so
-       we can restore that on resume. */
-    for (i = 0; i < numDomains; i++) {
-        flags[i] = VIR_DOMAIN_SAVE_RUNNING;
-        if (virDomainGetState(domains[i], &state, NULL, 0) == 0) {
-            if (state == VIR_DOMAIN_PAUSED)
-                flags[i] = VIR_DOMAIN_SAVE_PAUSED;
+                if (virDomainIsActive(domains[i]) == 1) {
+                    anyRunning = true;
+                } else {
+                    virObjectUnref(domains[i]);
+                    domains[i] = NULL;
+                }
+            }
+
+            if (!anyRunning)
+                break;
+            if (g_timer_elapsed(timer, NULL) > cfg->waitShutdownSecs)
+                break;
+            g_usleep(1000*500);
         }
-        virDomainSuspend(domains[i]);
+        g_timer_destroy(timer);
     }
 
-    /* Then we save the VMs to disk */
-    for (i = 0; i < numDomains; i++)
-        if (virDomainManagedSave(domains[i], flags[i]) < 0)
-            VIR_WARN("Unable to perform managed save of '%s': %s",
-                     virDomainGetName(domains[i]),
-                     virGetLastErrorMessage());
+    if (cfg->poweroff) {
+        for (i = 0; i < numDomains; i++) {
+            if (domains[i] == NULL)
+                continue;
+            virDomainDestroy(domains[i]);
+        }
+    }
 
  cleanup:
     if (domains) {
-        for (i = 0; i < numDomains; i++)
-            virObjectUnref(domains[i]);
+        for (i = 0; i < numDomains; i++) {
+            if (domains[i])
+                virObjectUnref(domains[i]);
+        }
         VIR_FREE(domains);
     }
 }
diff --git a/src/hypervisor/domain_driver.h b/src/hypervisor/domain_driver.h
index d1588ea712..25c4b0c664 100644
--- a/src/hypervisor/domain_driver.h
+++ b/src/hypervisor/domain_driver.h
@@ -92,6 +92,10 @@ void virDomainDriverAutoStart(virDomainObjList *domains,
 
 typedef struct _virDomainDriverAutoShutdownConfig {
     const char *uri;
+    bool trySave;
+    bool tryShutdown;
+    bool poweroff;
+    int waitShutdownSecs;
 } virDomainDriverAutoShutdownConfig;
 
 void virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 103369ac93..2c97a6fb98 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -947,6 +947,9 @@ qemuStateStop(void)
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(qemu_driver);
     virDomainDriverAutoShutdownConfig ascfg = {
         .uri = cfg->uri,
+        .trySave = true,
+        .tryShutdown = false,
+        .poweroff = false,
     };
 
     if (!qemu_driver->privileged)
-- 
2.47.1