src/qemu/qemu_domain.c | 8 +++ src/qemu/qemu_domain.h | 2 + src/qemu/qemu_driver.c | 18 ++++++ src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_process.h | 1 + 5 files changed, 148 insertions(+), 5 deletions(-)
In cases when a QEMU process takes longer than the time sigterm and
sigkill are issued to kill the process do not simply fail and leave the
VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
an fd on /proc/$pid and get notified when the QEMU process finally has
terminated to cleanup the VM state.
Resolves: https://issues.redhat.com/browse/RHEL-28819
Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
---
src/qemu/qemu_domain.c | 8 +++
src/qemu/qemu_domain.h | 2 +
src/qemu/qemu_driver.c | 18 ++++++
src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_process.h | 1 +
5 files changed, 148 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2134b11038..8147ff02fd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
virChrdevFree(priv->devs);
+ if (priv->pidMonitored >= 0) {
+ virEventRemoveHandle(priv->pidMonitored);
+ priv->pidMonitored = -1;
+ }
+
/* This should never be non-NULL if we get here, but just in case... */
if (priv->mon) {
VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
@@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
priv->blockjobs = virHashNew(virObjectUnref);
priv->fds = virHashNew(g_object_unref);
+ priv->pidMonitored = -1;
+
/* agent commands block by default, user can choose different behavior */
priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
@@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
case QEMU_PROCESS_EVENT_RESET:
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
case QEMU_PROCESS_EVENT_MONITOR_EOF:
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index d777559119..a5092dd7f0 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
bool beingDestroyed;
char *pidfile;
+ int pidMonitored;
virDomainPCIAddressSet *pciaddrs;
virDomainUSBAddressSet *usbaddrs;
@@ -469,6 +470,7 @@ typedef enum {
QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
QEMU_PROCESS_EVENT_RESET,
QEMU_PROCESS_EVENT_NBDKIT_EXITED,
+ QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
QEMU_PROCESS_EVENT_LAST
} qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9f3013e231..6b1e4084f6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
}
+static void
+processShutdownCompletedEvent(virQEMUDriver *driver,
+ virDomainObj *vm)
+{
+ if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+ return;
+
+ if (virDomainObjIsActive(vm))
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_UNKNOWN,
+ VIR_ASYNC_JOB_NONE, 0);
+
+ virDomainObjEndJob(vm);
+}
+
+
static void qemuProcessEventHandler(void *data, void *opaque)
{
struct qemuProcessEvent *processEvent = data;
@@ -4101,6 +4116,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
processNbdkitExitedEvent(vm, processEvent->data);
break;
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
+ processShutdownCompletedEvent(driver, vm);
+ break;
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..c6f7d34168 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,9 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#if WITH_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
@@ -8387,9 +8390,114 @@ qemuProcessCreatePretendCmdBuild(virDomainObj *vm,
}
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+typedef struct {
+ virDomainObj *vm;
+ int pidfd;
+} qemuProcessInShutdownEventData;
+
+
+static qemuProcessInShutdownEventData*
+qemuProcessInShutdownEventDataNew(virDomainObj *vm, int pidfd)
+{
+ qemuProcessInShutdownEventData *d = g_new(qemuProcessInShutdownEventData, 1);
+ d->vm = virObjectRef(vm);
+ d->pidfd = pidfd;
+ return d;
+}
+
+
+static void
+qemuProcessInShutdownEventDataFree(qemuProcessInShutdownEventData *d)
+{
+ virObjectUnref(d->vm);
+ VIR_FORCE_CLOSE(d->pidfd);
+ g_free(d);
+}
+
+
+static void
+qemuProcessInShutdownPidfdCb(int watch,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuProcessInShutdownEventData *data = opaque;
+ virDomainObj *vm = data->vm;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld fd=%d",
+ vm, vm->def->name, (long long)vm->pid, fd);
+
+ virEventRemoveHandle(watch);
+
+ virObjectLock(vm);
+
+ VIR_INFO("QEMU process %lld finally completed termination",
+ (long long)vm->pid);
+
+ QEMU_DOMAIN_PRIVATE(vm)->pidMonitored = -1;
+ qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
+ 0, 0, NULL);
+
+ virObjectUnlock(vm);
+}
+#endif /* WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open) */
+
+
+static int
+qemuProcessInShutdownStartMonitor(virDomainObj *vm)
+{
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+ qemuDomainObjPrivate *priv = vm->privateData;
+ qemuProcessInShutdownEventData *data;
+ int pidfd;
+ int ret = -1;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld pidMonitored=%d",
+ vm, vm->def->name, (long long)vm->pid,
+ priv->pidMonitored);
+
+ if (priv->pidMonitored >= 0) {
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i already set up", vm->pid);
+ goto cleanup;
+ }
+
+ pidfd = syscall(SYS_pidfd_open, vm->pid, 0);
+ if (pidfd < 0) {
+ if (errno == ESRCH) /* process has already terminated */
+ ret = 1;
+ goto cleanup;
+ }
+
+ data = qemuProcessInShutdownEventDataNew(vm, pidfd);
+ if ((priv->pidMonitored = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuProcessInShutdownPidfdCb,
+ data,
+ (virFreeCallback)qemuProcessInShutdownEventDataFree)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to monitor qemu in-shutdown process %1$i"),
+ vm->pid);
+ qemuProcessInShutdownEventDataFree(data);
+ goto cleanup;
+ }
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i for termination", vm->pid);
+ ret = 0;
+
+ cleanup:
+ return ret;
+#else /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+ VIR_DEBUG("Monitoring qemu process %i not implemented", vm->pid);
+ return -1;
+#endif /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+}
+
+
int
qemuProcessKill(virDomainObj *vm, unsigned int flags)
{
+ int ret = -1;
+
VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
vm, vm->def->name,
(long long)vm->pid, flags);
@@ -8410,10 +8518,16 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags)
/* Request an extra delay of two seconds per current nhostdevs
* to be safe against stalls by the kernel freeing up the resources */
- return virProcessKillPainfullyDelay(vm->pid,
- !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
- vm->def->nhostdevs * 2,
- false);
+ ret = virProcessKillPainfullyDelay(vm->pid,
+ !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
+ vm->def->nhostdevs * 2,
+ false);
+
+ if (ret < 0 && (flags & VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR))
+ if (qemuProcessInShutdownStartMonitor(vm) == 1)
+ ret = 0; /* process termination detected */
+
+ return ret;
}
@@ -8438,7 +8552,7 @@ qemuProcessBeginStopJob(virDomainObj *vm,
* cleared inside qemuProcessStop */
priv->beingDestroyed = true;
- if (qemuProcessKill(vm, killFlags) < 0)
+ if (qemuProcessKill(vm, killFlags|VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR) < 0)
goto error;
/* Wake up anything waiting on domain condition */
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index cb67bfcd2d..2324aeb7bd 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -180,6 +180,7 @@ typedef enum {
VIR_QEMU_PROCESS_KILL_FORCE = 1 << 0,
VIR_QEMU_PROCESS_KILL_NOWAIT = 1 << 1,
VIR_QEMU_PROCESS_KILL_NOCHECK = 1 << 2, /* bypass the running vm check */
+ VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR = 1 << 3, /* on error enable process monitor */
} virQemuProcessKillMode;
int qemuProcessKill(virDomainObj *vm, unsigned int flags);
--
2.45.0
On 7/19/24 17:44, Boris Fiuczynski wrote: > In cases when a QEMU process takes longer than the time sigterm and > sigkill are issued to kill the process do not simply fail and leave the > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up > an fd on /proc/$pid and get notified when the QEMU process finally has > terminated to cleanup the VM state. > > Resolves: https://issues.redhat.com/browse/RHEL-28819 > Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> > --- > src/qemu/qemu_domain.c | 8 +++ > src/qemu/qemu_domain.h | 2 + > src/qemu/qemu_driver.c | 18 ++++++ > src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- > src/qemu/qemu_process.h | 1 + > 5 files changed, 148 insertions(+), 5 deletions(-) > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 2134b11038..8147ff02fd 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) > > virChrdevFree(priv->devs); > > + if (priv->pidMonitored >= 0) { > + virEventRemoveHandle(priv->pidMonitored); > + priv->pidMonitored = -1; > + } > + > /* This should never be non-NULL if we get here, but just in case... */ > if (priv->mon) { > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > priv->blockjobs = virHashNew(virObjectUnref); > priv->fds = virHashNew(g_object_unref); > > + priv->pidMonitored = -1; > + > /* agent commands block by default, user can choose different behavior */ > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > case QEMU_PROCESS_EVENT_RESET: > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > case QEMU_PROCESS_EVENT_MONITOR_EOF: > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > case QEMU_PROCESS_EVENT_LAST: > break; > } > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index d777559119..a5092dd7f0 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { > > bool beingDestroyed; > char *pidfile; > + int pidMonitored; > > virDomainPCIAddressSet *pciaddrs; > virDomainUSBAddressSet *usbaddrs; > @@ -469,6 +470,7 @@ typedef enum { > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, > QEMU_PROCESS_EVENT_RESET, > QEMU_PROCESS_EVENT_NBDKIT_EXITED, > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > > QEMU_PROCESS_EVENT_LAST > } qemuProcessEventType; > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 9f3013e231..6b1e4084f6 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, > } > > > +static void > +processShutdownCompletedEvent(virQEMUDriver *driver, > + virDomainObj *vm) > +{ > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) > + return; Shouldn't this be: if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) return; Otherwise looking good. No need to resend, I can fix that before pushing. Michal
On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote: > On 7/19/24 17:44, Boris Fiuczynski wrote: > > In cases when a QEMU process takes longer than the time sigterm and > > sigkill are issued to kill the process do not simply fail and leave the > > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up > > an fd on /proc/$pid and get notified when the QEMU process finally has > > terminated to cleanup the VM state. > > > > Resolves: https://issues.redhat.com/browse/RHEL-28819 > > Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> > > --- > > src/qemu/qemu_domain.c | 8 +++ > > src/qemu/qemu_domain.h | 2 + > > src/qemu/qemu_driver.c | 18 ++++++ > > src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- > > src/qemu/qemu_process.h | 1 + > > 5 files changed, 148 insertions(+), 5 deletions(-) > > > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index 2134b11038..8147ff02fd 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) > > > > virChrdevFree(priv->devs); > > > > + if (priv->pidMonitored >= 0) { > > + virEventRemoveHandle(priv->pidMonitored); > > + priv->pidMonitored = -1; > > + } > > + > > /* This should never be non-NULL if we get here, but just in case... */ > > if (priv->mon) { > > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); > > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > > priv->blockjobs = virHashNew(virObjectUnref); > > priv->fds = virHashNew(g_object_unref); > > > > + priv->pidMonitored = -1; > > + > > /* agent commands block by default, user can choose different behavior */ > > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > > case QEMU_PROCESS_EVENT_RESET: > > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > > case QEMU_PROCESS_EVENT_MONITOR_EOF: > > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > > case QEMU_PROCESS_EVENT_LAST: > > break; > > } > > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > > index d777559119..a5092dd7f0 100644 > > --- a/src/qemu/qemu_domain.h > > +++ b/src/qemu/qemu_domain.h > > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { > > > > bool beingDestroyed; > > char *pidfile; > > + int pidMonitored; > > > > virDomainPCIAddressSet *pciaddrs; > > virDomainUSBAddressSet *usbaddrs; > > @@ -469,6 +470,7 @@ typedef enum { > > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, > > QEMU_PROCESS_EVENT_RESET, > > QEMU_PROCESS_EVENT_NBDKIT_EXITED, > > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > > > > QEMU_PROCESS_EVENT_LAST > > } qemuProcessEventType; > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > > index 9f3013e231..6b1e4084f6 100644 > > --- a/src/qemu/qemu_driver.c > > +++ b/src/qemu/qemu_driver.c > > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, > > } > > > > > > +static void > > +processShutdownCompletedEvent(virQEMUDriver *driver, > > + virDomainObj *vm) > > +{ > > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) > > + return; > > Shouldn't this be: > > if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) > return; > > Otherwise looking good. No need to resend, I can fix that before pushing. And followed by qemuProcessEndStopJob after calling qemuProcessStop. Jirka
On 7/22/24 2:01 PM, Jiri Denemark wrote: > On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote: >> On 7/19/24 17:44, Boris Fiuczynski wrote: >>> In cases when a QEMU process takes longer than the time sigterm and >>> sigkill are issued to kill the process do not simply fail and leave the >>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up >>> an fd on /proc/$pid and get notified when the QEMU process finally has >>> terminated to cleanup the VM state. >>> >>> Resolves: https://issues.redhat.com/browse/RHEL-28819 >>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> >>> --- >>> src/qemu/qemu_domain.c | 8 +++ >>> src/qemu/qemu_domain.h | 2 + >>> src/qemu/qemu_driver.c | 18 ++++++ >>> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- >>> src/qemu/qemu_process.h | 1 + >>> 5 files changed, 148 insertions(+), 5 deletions(-) >>> >>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c >>> index 2134b11038..8147ff02fd 100644 >>> --- a/src/qemu/qemu_domain.c >>> +++ b/src/qemu/qemu_domain.c >>> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) >>> >>> virChrdevFree(priv->devs); >>> >>> + if (priv->pidMonitored >= 0) { >>> + virEventRemoveHandle(priv->pidMonitored); >>> + priv->pidMonitored = -1; >>> + } >>> + >>> /* This should never be non-NULL if we get here, but just in case... */ >>> if (priv->mon) { >>> VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); >>> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) >>> priv->blockjobs = virHashNew(virObjectUnref); >>> priv->fds = virHashNew(g_object_unref); >>> >>> + priv->pidMonitored = -1; >>> + >>> /* agent commands block by default, user can choose different behavior */ >>> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; >>> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; >>> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) >>> case QEMU_PROCESS_EVENT_RESET: >>> case QEMU_PROCESS_EVENT_NBDKIT_EXITED: >>> case QEMU_PROCESS_EVENT_MONITOR_EOF: >>> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: >>> case QEMU_PROCESS_EVENT_LAST: >>> break; >>> } >>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h >>> index d777559119..a5092dd7f0 100644 >>> --- a/src/qemu/qemu_domain.h >>> +++ b/src/qemu/qemu_domain.h >>> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { >>> >>> bool beingDestroyed; >>> char *pidfile; >>> + int pidMonitored; >>> >>> virDomainPCIAddressSet *pciaddrs; >>> virDomainUSBAddressSet *usbaddrs; >>> @@ -469,6 +470,7 @@ typedef enum { >>> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, >>> QEMU_PROCESS_EVENT_RESET, >>> QEMU_PROCESS_EVENT_NBDKIT_EXITED, >>> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, >>> >>> QEMU_PROCESS_EVENT_LAST >>> } qemuProcessEventType; >>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c >>> index 9f3013e231..6b1e4084f6 100644 >>> --- a/src/qemu/qemu_driver.c >>> +++ b/src/qemu/qemu_driver.c >>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, >>> } >>> >>> >>> +static void >>> +processShutdownCompletedEvent(virQEMUDriver *driver, >>> + virDomainObj *vm) >>> +{ >>> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) >>> + return; >> >> Shouldn't this be: >> >> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) >> return; >> >> Otherwise looking good. No need to resend, I can fix that before pushing. > > And followed by qemuProcessEndStopJob after calling qemuProcessStop. > > Jirka Just to make sure I retested successfully with the changes above. -- Mit freundlichen Grüßen/Kind regards Boris Fiuczynski IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Wolfgang Wendt Geschäftsführung: David Faller Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294
On 7/23/24 17:57, Boris Fiuczynski wrote: > On 7/22/24 2:01 PM, Jiri Denemark wrote: >> On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote: >>> On 7/19/24 17:44, Boris Fiuczynski wrote: >>>> In cases when a QEMU process takes longer than the time sigterm and >>>> sigkill are issued to kill the process do not simply fail and leave the >>>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up >>>> an fd on /proc/$pid and get notified when the QEMU process finally has >>>> terminated to cleanup the VM state. >>>> >>>> Resolves: https://issues.redhat.com/browse/RHEL-28819 >>>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> >>>> --- >>>> src/qemu/qemu_domain.c | 8 +++ >>>> src/qemu/qemu_domain.h | 2 + >>>> src/qemu/qemu_driver.c | 18 ++++++ >>>> src/qemu/qemu_process.c | 124 >>>> ++++++++++++++++++++++++++++++++++++++-- >>>> src/qemu/qemu_process.h | 1 + >>>> 5 files changed, 148 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c >>>> index 9f3013e231..6b1e4084f6 100644 >>>> --- a/src/qemu/qemu_driver.c >>>> +++ b/src/qemu/qemu_driver.c >>>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, >>>> } >>>> +static void >>>> +processShutdownCompletedEvent(virQEMUDriver *driver, >>>> + virDomainObj *vm) >>>> +{ >>>> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) >>>> + return; >>> >>> Shouldn't this be: >>> >>> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) >>> return; >>> >>> Otherwise looking good. No need to resend, I can fix that before >>> pushing. >> >> And followed by qemuProcessEndStopJob after calling qemuProcessStop. >> >> Jirka > > Just to make sure I retested successfully with the changes above. > Perfect! I'll do the change before merging (among with some code style adjustments like curly braces around multiline if-s, etc. Nothing too intrusive). Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal
On 7/22/24 10:55 AM, Michal Prívozník wrote: > On 7/19/24 17:44, Boris Fiuczynski wrote: >> In cases when a QEMU process takes longer than the time sigterm and >> sigkill are issued to kill the process do not simply fail and leave the >> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up >> an fd on /proc/$pid and get notified when the QEMU process finally has >> terminated to cleanup the VM state. >> >> Resolves: https://issues.redhat.com/browse/RHEL-28819 >> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com> >> --- >> src/qemu/qemu_domain.c | 8 +++ >> src/qemu/qemu_domain.h | 2 + >> src/qemu/qemu_driver.c | 18 ++++++ >> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- >> src/qemu/qemu_process.h | 1 + >> 5 files changed, 148 insertions(+), 5 deletions(-) >> >> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c >> index 2134b11038..8147ff02fd 100644 >> --- a/src/qemu/qemu_domain.c >> +++ b/src/qemu/qemu_domain.c >> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) >> >> virChrdevFree(priv->devs); >> >> + if (priv->pidMonitored >= 0) { >> + virEventRemoveHandle(priv->pidMonitored); >> + priv->pidMonitored = -1; >> + } >> + >> /* This should never be non-NULL if we get here, but just in case... */ >> if (priv->mon) { >> VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); >> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) >> priv->blockjobs = virHashNew(virObjectUnref); >> priv->fds = virHashNew(g_object_unref); >> >> + priv->pidMonitored = -1; >> + >> /* agent commands block by default, user can choose different behavior */ >> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; >> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; >> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) >> case QEMU_PROCESS_EVENT_RESET: >> case QEMU_PROCESS_EVENT_NBDKIT_EXITED: >> case QEMU_PROCESS_EVENT_MONITOR_EOF: >> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: >> case QEMU_PROCESS_EVENT_LAST: >> break; >> } >> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h >> index d777559119..a5092dd7f0 100644 >> --- a/src/qemu/qemu_domain.h >> +++ b/src/qemu/qemu_domain.h >> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { >> >> bool beingDestroyed; >> char *pidfile; >> + int pidMonitored; >> >> virDomainPCIAddressSet *pciaddrs; >> virDomainUSBAddressSet *usbaddrs; >> @@ -469,6 +470,7 @@ typedef enum { >> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, >> QEMU_PROCESS_EVENT_RESET, >> QEMU_PROCESS_EVENT_NBDKIT_EXITED, >> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, >> >> QEMU_PROCESS_EVENT_LAST >> } qemuProcessEventType; >> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c >> index 9f3013e231..6b1e4084f6 100644 >> --- a/src/qemu/qemu_driver.c >> +++ b/src/qemu/qemu_driver.c >> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, >> } >> >> >> +static void >> +processShutdownCompletedEvent(virQEMUDriver *driver, >> + virDomainObj *vm) >> +{ >> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) >> + return; > > Shouldn't this be: > > if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) > return; > > Otherwise looking good. No need to resend, I can fix that before pushing. > > Michal I was looking at qemuDomainSutdownFlagsMonitor but VIR_JOB_DESTROY seems to be the better match as it is also used in processMonitorEOFEvent and the should have been shutdown already. -- Mit freundlichen Grüßen/Kind regards Boris Fiuczynski IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Wolfgang Wendt Geschäftsführung: David Faller Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294
© 2016 - 2024 Red Hat, Inc.