The auto shutdown code can currently only perform managed save,
which may fail in some cases, for example when PCI devices are
assigned. On failure, shutdown inhibitors remain in place which
may be undesirable.
This expands the logic to try a sequence of operations
* Managed save
* Graceful shutdown
* Forced poweroff
Each of these operations can be enabled or disabled, but they
are always applied in this order.
With the shutdown option, a configurable time is allowed for
shutdown to complete, defaulting to 30 seconds, before moving
onto the forced poweroff phase.
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
src/hypervisor/domain_driver.c | 130 ++++++++++++++++++++++++++++-----
src/hypervisor/domain_driver.h | 7 ++
src/qemu/qemu_driver.c | 3 +
3 files changed, 121 insertions(+), 19 deletions(-)
diff --git a/src/hypervisor/domain_driver.c b/src/hypervisor/domain_driver.c
index 0ca482275f..e625726c07 100644
--- a/src/hypervisor/domain_driver.c
+++ b/src/hypervisor/domain_driver.c
@@ -720,9 +720,25 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg)
g_autoptr(virConnect) conn = NULL;
int numDomains = 0;
size_t i;
- int state;
virDomainPtr *domains = NULL;
- g_autofree unsigned int *flags = NULL;
+
+ VIR_DEBUG("Run autoshutdown uri=%s trySave=%d tryShutdown=%d poweroff=%d waitShutdownSecs=%u",
+ cfg->uri, cfg->trySave, cfg->tryShutdown, cfg->poweroff,
+ cfg->waitShutdownSecs);
+
+ /*
+ * Ideally guests will shutdown in a few seconds, but it would
+ * not be unsual for it to take a while longer, especially under
+ * load, or if the guest OS has inhibitors slowing down shutdown.
+ *
+ * If we wait too long, then guests which ignore the shutdown
+ * request will significantly delay host shutdown.
+ *
+ * Pick 30 seconds as a moderately safe default, assuming that
+ * most guests are well behaved.
+ */
+ if (cfg->waitShutdownSecs <= 0)
+ cfg->waitShutdownSecs = 30;
if (!(conn = virConnectOpen(cfg->uri)))
goto cleanup;
@@ -732,31 +748,107 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg)
VIR_CONNECT_LIST_DOMAINS_ACTIVE)) < 0)
goto cleanup;
- flags = g_new0(unsigned int, numDomains);
+ VIR_DEBUG("Auto shutdown with %d running domains", numDomains);
+ if (cfg->trySave) {
+ g_autofree unsigned int *flags = g_new0(unsigned int, numDomains);
+ for (i = 0; i < numDomains; i++) {
+ int state;
+ /*
+ * Pause all VMs to make them stop dirtying pages,
+ * so save is quicker. We remember if any VMs were
+ * paused so we can restore that on resume.
+ */
+ flags[i] = VIR_DOMAIN_SAVE_RUNNING;
+ if (virDomainGetState(domains[i], &state, NULL, 0) == 0) {
+ if (state == VIR_DOMAIN_PAUSED)
+ flags[i] = VIR_DOMAIN_SAVE_PAUSED;
+ }
+ if (flags[i] & VIR_DOMAIN_SAVE_RUNNING)
+ virDomainSuspend(domains[i]);
+ }
+
+ for (i = 0; i < numDomains; i++) {
+ if (virDomainManagedSave(domains[i], flags[i]) < 0) {
+ VIR_WARN("Unable to perform managed save of '%s': %s",
+ virDomainGetName(domains[i]),
+ virGetLastErrorMessage());
+ if (flags[i] & VIR_DOMAIN_SAVE_RUNNING)
+ virDomainResume(domains[i]);
+ continue;
+ }
+ virObjectUnref(domains[i]);
+ domains[i] = NULL;
+ }
+ }
+
+ if (cfg->tryShutdown) {
+ GTimer *timer = NULL;
+ for (i = 0; i < numDomains; i++) {
+ if (domains[i] == NULL)
+ continue;
+ if (virDomainShutdown(domains[i]) < 0) {
+ VIR_WARN("Unable to request graceful shutdown of '%s': %s",
+ virDomainGetName(domains[i]),
+ virGetLastErrorMessage());
+ break;
+ }
+ }
+
+ timer = g_timer_new();
+ while (1) {
+ bool anyRunning = false;
+ for (i = 0; i < numDomains; i++) {
+ if (!domains[i])
+ continue;
- /* First we pause all VMs to make them stop dirtying
- pages, etc. We remember if any VMs were paused so
- we can restore that on resume. */
- for (i = 0; i < numDomains; i++) {
- flags[i] = VIR_DOMAIN_SAVE_RUNNING;
- if (virDomainGetState(domains[i], &state, NULL, 0) == 0) {
- if (state == VIR_DOMAIN_PAUSED)
- flags[i] = VIR_DOMAIN_SAVE_PAUSED;
+ if (virDomainIsActive(domains[i]) == 1) {
+ anyRunning = true;
+ } else {
+ virObjectUnref(domains[i]);
+ domains[i] = NULL;
+ }
+ }
+
+ if (!anyRunning)
+ break;
+ if (g_timer_elapsed(timer, NULL) > cfg->waitShutdownSecs)
+ break;
+ g_usleep(1000*500);
}
- virDomainSuspend(domains[i]);
+ g_timer_destroy(timer);
}
- /* Then we save the VMs to disk */
- for (i = 0; i < numDomains; i++)
- if (virDomainManagedSave(domains[i], flags[i]) < 0)
- VIR_WARN("Unable to perform managed save of '%s': %s",
- virDomainGetName(domains[i]),
- virGetLastErrorMessage());
+ if (cfg->poweroff) {
+ for (i = 0; i < numDomains; i++) {
+ if (domains[i] == NULL)
+ continue;
+ /*
+ * NB might fail if we gave up on waiting for
+ * virDomainShutdown, but it then completed anyway,
+ * hence we're not checking for failure
+ */
+ virDomainDestroy(domains[i]);
+
+ virObjectUnref(domains[i]);
+ domains[i] = NULL;
+ }
+ }
cleanup:
if (domains) {
- for (i = 0; i < numDomains; i++)
+ /* Anything non-NULL in this list indicates none of
+ * the configured ations were successful in processing
+ * the domain. There's not much we can do about that
+ * as the host is powering off, logging at least lets
+ * admins know
+ */
+ for (i = 0; i < numDomains; i++) {
+ if (domains[i] == NULL)
+ continue;
+ VIR_WARN("Domain '%s' not successfully shut off by any action",
+ virDomainGetName(domains[i]));
virObjectUnref(domains[i]);
+ }
VIR_FREE(domains);
}
}
diff --git a/src/hypervisor/domain_driver.h b/src/hypervisor/domain_driver.h
index f36db5c6f0..ff68517edd 100644
--- a/src/hypervisor/domain_driver.h
+++ b/src/hypervisor/domain_driver.h
@@ -93,6 +93,13 @@ void virDomainDriverAutoStart(virDomainObjList *domains,
typedef struct _virDomainDriverAutoShutdownConfig {
const char *uri;
+ bool trySave;
+ bool tryShutdown;
+ bool poweroff;
+ unsigned int waitShutdownSecs; /* Seconds to wait for VM to shutdown
+ * before moving onto next action.
+ * If 0 a default is used (currently 30 secs)
+ */
} virDomainDriverAutoShutdownConfig;
void virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 7e6ce5f69b..946f1c7e96 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -947,6 +947,9 @@ qemuStateStop(void)
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(qemu_driver);
virDomainDriverAutoShutdownConfig ascfg = {
.uri = cfg->uri,
+ .trySave = true,
+ .tryShutdown = false,
+ .poweroff = false,
};
if (!qemu_driver->privileged)
--
2.48.1
On Wed, Mar 12, 2025 at 17:17:43 +0000, Daniel P. Berrangé wrote: > The auto shutdown code can currently only perform managed save, > which may fail in some cases, for example when PCI devices are > assigned. On failure, shutdown inhibitors remain in place which > may be undesirable. > > This expands the logic to try a sequence of operations > > * Managed save > * Graceful shutdown > * Forced poweroff > > Each of these operations can be enabled or disabled, but they > are always applied in this order. > > With the shutdown option, a configurable time is allowed for > shutdown to complete, defaulting to 30 seconds, before moving > onto the forced poweroff phase. > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> > --- > src/hypervisor/domain_driver.c | 130 ++++++++++++++++++++++++++++----- > src/hypervisor/domain_driver.h | 7 ++ > src/qemu/qemu_driver.c | 3 + > 3 files changed, 121 insertions(+), 19 deletions(-) [...] > > @@ -732,31 +748,107 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg) > VIR_CONNECT_LIST_DOMAINS_ACTIVE)) < 0) > goto cleanup; > > - flags = g_new0(unsigned int, numDomains); > + VIR_DEBUG("Auto shutdown with %d running domains", numDomains); > + if (cfg->trySave) { > + g_autofree unsigned int *flags = g_new0(unsigned int, numDomains); > + for (i = 0; i < numDomains; i++) { > + int state; > + /* > + * Pause all VMs to make them stop dirtying pages, > + * so save is quicker. We remember if any VMs were > + * paused so we can restore that on resume. > + */ > + flags[i] = VIR_DOMAIN_SAVE_RUNNING; > + if (virDomainGetState(domains[i], &state, NULL, 0) == 0) { > + if (state == VIR_DOMAIN_PAUSED) > + flags[i] = VIR_DOMAIN_SAVE_PAUSED; > + } > + if (flags[i] & VIR_DOMAIN_SAVE_RUNNING) > + virDomainSuspend(domains[i]); > + } > + > + for (i = 0; i < numDomains; i++) { > + if (virDomainManagedSave(domains[i], flags[i]) < 0) { > + VIR_WARN("Unable to perform managed save of '%s': %s", > + virDomainGetName(domains[i]), virDomainGetName() calls virResetLastError() > + virGetLastErrorMessage()); IIRC argument evaluation order is not defined this might have random results. > + if (flags[i] & VIR_DOMAIN_SAVE_RUNNING) > + virDomainResume(domains[i]); > + continue; > + } > + virObjectUnref(domains[i]); > + domains[i] = NULL; > + } > + } > + > + if (cfg->tryShutdown) { > + GTimer *timer = NULL; > + for (i = 0; i < numDomains; i++) { > + if (domains[i] == NULL) > + continue; > + if (virDomainShutdown(domains[i]) < 0) { > + VIR_WARN("Unable to request graceful shutdown of '%s': %s", > + virDomainGetName(domains[i]), > + virGetLastErrorMessage()); Same as above > + break; > + } > + } > + > + timer = g_timer_new(); > + while (1) { > + bool anyRunning = false; > + for (i = 0; i < numDomains; i++) { > + if (!domains[i]) > + continue; > > - /* First we pause all VMs to make them stop dirtying > - pages, etc. We remember if any VMs were paused so > - we can restore that on resume. */ > - for (i = 0; i < numDomains; i++) { > - flags[i] = VIR_DOMAIN_SAVE_RUNNING; > - if (virDomainGetState(domains[i], &state, NULL, 0) == 0) { > - if (state == VIR_DOMAIN_PAUSED) > - flags[i] = VIR_DOMAIN_SAVE_PAUSED; > + if (virDomainIsActive(domains[i]) == 1) { > + anyRunning = true; > + } else { > + virObjectUnref(domains[i]); > + domains[i] = NULL; > + } > + } > + > + if (!anyRunning) > + break; > + if (g_timer_elapsed(timer, NULL) > cfg->waitShutdownSecs) > + break; > + g_usleep(1000*500); > } > - virDomainSuspend(domains[i]); > + g_timer_destroy(timer); > } > > - /* Then we save the VMs to disk */ > - for (i = 0; i < numDomains; i++) > - if (virDomainManagedSave(domains[i], flags[i]) < 0) > - VIR_WARN("Unable to perform managed save of '%s': %s", > - virDomainGetName(domains[i]), > - virGetLastErrorMessage()); and here too > + if (cfg->poweroff) { > + for (i = 0; i < numDomains; i++) { > + if (domains[i] == NULL) > + continue; > + /* > + * NB might fail if we gave up on waiting for > + * virDomainShutdown, but it then completed anyway, > + * hence we're not checking for failure > + */ > + virDomainDestroy(domains[i]); > + > + virObjectUnref(domains[i]); > + domains[i] = NULL; > + } > + } > > cleanup: > if (domains) { > - for (i = 0; i < numDomains; i++) > + /* Anything non-NULL in this list indicates none of > + * the configured ations were successful in processing > + * the domain. There's not much we can do about that > + * as the host is powering off, logging at least lets > + * admins know > + */ > + for (i = 0; i < numDomains; i++) { > + if (domains[i] == NULL) > + continue; > + VIR_WARN("Domain '%s' not successfully shut off by any action", I'd also suggest to prefix all of these warnings with e.g.: "host shutdown: Domain ..." so that there's no any doubt when the warning was produced. > + virDomainGetName(domains[i])); > virObjectUnref(domains[i]); > + } > VIR_FREE(domains); > } > } Inside libvirt we should be able to do the "gross hack" of actually using the virDomain object definition and access name directly without the public API call to avoid the reset of the error as all other options would require most likely copying the error message. Reviewed-by: Peter Krempa <pkrempa@redhat.com>
On Thu, Mar 13, 2025 at 09:00:58AM +0100, Peter Krempa wrote: > On Wed, Mar 12, 2025 at 17:17:43 +0000, Daniel P. Berrangé wrote: > > The auto shutdown code can currently only perform managed save, > > which may fail in some cases, for example when PCI devices are > > assigned. On failure, shutdown inhibitors remain in place which > > may be undesirable. > > > > This expands the logic to try a sequence of operations > > > > * Managed save > > * Graceful shutdown > > * Forced poweroff > > > > Each of these operations can be enabled or disabled, but they > > are always applied in this order. > > > > With the shutdown option, a configurable time is allowed for > > shutdown to complete, defaulting to 30 seconds, before moving > > onto the forced poweroff phase. > > > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> > > --- > > src/hypervisor/domain_driver.c | 130 ++++++++++++++++++++++++++++----- > > src/hypervisor/domain_driver.h | 7 ++ > > src/qemu/qemu_driver.c | 3 + > > 3 files changed, 121 insertions(+), 19 deletions(-) > > [...] > > > > > @@ -732,31 +748,107 @@ virDomainDriverAutoShutdown(virDomainDriverAutoShutdownConfig *cfg) > > VIR_CONNECT_LIST_DOMAINS_ACTIVE)) < 0) > > goto cleanup; > > > > - flags = g_new0(unsigned int, numDomains); > > + VIR_DEBUG("Auto shutdown with %d running domains", numDomains); > > + if (cfg->trySave) { > > + g_autofree unsigned int *flags = g_new0(unsigned int, numDomains); > > + for (i = 0; i < numDomains; i++) { > > + int state; > > + /* > > + * Pause all VMs to make them stop dirtying pages, > > + * so save is quicker. We remember if any VMs were > > + * paused so we can restore that on resume. > > + */ > > + flags[i] = VIR_DOMAIN_SAVE_RUNNING; > > + if (virDomainGetState(domains[i], &state, NULL, 0) == 0) { > > + if (state == VIR_DOMAIN_PAUSED) > > + flags[i] = VIR_DOMAIN_SAVE_PAUSED; > > + } > > + if (flags[i] & VIR_DOMAIN_SAVE_RUNNING) > > + virDomainSuspend(domains[i]); > > + } > > + > > + for (i = 0; i < numDomains; i++) { > > + if (virDomainManagedSave(domains[i], flags[i]) < 0) { > > + VIR_WARN("Unable to perform managed save of '%s': %s", > > + virDomainGetName(domains[i]), > > virDomainGetName() calls virResetLastError() > > > + virGetLastErrorMessage()); > > IIRC argument evaluation order is not defined this might have random > results. In fact it is worse than that. If virGetLastErrorMessage runs second (order as written) it will get no error message. If it runs first (reverse as written) it will get the error message which virDomainGetName will cause to be free()d, so it will be passed freed memory to VIR_WARN. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
© 2016 - 2025 Red Hat, Inc.