src/qemu/qemu_domain.c | 8 +++ src/qemu/qemu_domain.h | 2 + src/qemu/qemu_driver.c | 18 ++++++ src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_process.h | 1 + 5 files changed, 148 insertions(+), 5 deletions(-)
In cases when a QEMU process takes longer than the time sigterm and
sigkill are issued to kill the process do not simply fail and leave the
VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
an fd on /proc/$pid and get notified when the QEMU process finally has
terminated to cleanup the VM state.
Resolves: https://issues.redhat.com/browse/RHEL-28819
Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
---
src/qemu/qemu_domain.c | 8 +++
src/qemu/qemu_domain.h | 2 +
src/qemu/qemu_driver.c | 18 ++++++
src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_process.h | 1 +
5 files changed, 148 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2134b11038..8147ff02fd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
virChrdevFree(priv->devs);
+ if (priv->pidMonitored >= 0) {
+ virEventRemoveHandle(priv->pidMonitored);
+ priv->pidMonitored = -1;
+ }
+
/* This should never be non-NULL if we get here, but just in case... */
if (priv->mon) {
VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
@@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
priv->blockjobs = virHashNew(virObjectUnref);
priv->fds = virHashNew(g_object_unref);
+ priv->pidMonitored = -1;
+
/* agent commands block by default, user can choose different behavior */
priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
@@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
case QEMU_PROCESS_EVENT_RESET:
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
case QEMU_PROCESS_EVENT_MONITOR_EOF:
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index d777559119..a5092dd7f0 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
bool beingDestroyed;
char *pidfile;
+ int pidMonitored;
virDomainPCIAddressSet *pciaddrs;
virDomainUSBAddressSet *usbaddrs;
@@ -469,6 +470,7 @@ typedef enum {
QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
QEMU_PROCESS_EVENT_RESET,
QEMU_PROCESS_EVENT_NBDKIT_EXITED,
+ QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
QEMU_PROCESS_EVENT_LAST
} qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9f3013e231..6b1e4084f6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
}
+static void
+processShutdownCompletedEvent(virQEMUDriver *driver,
+ virDomainObj *vm)
+{
+ if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+ return;
+
+ if (virDomainObjIsActive(vm))
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_UNKNOWN,
+ VIR_ASYNC_JOB_NONE, 0);
+
+ virDomainObjEndJob(vm);
+}
+
+
static void qemuProcessEventHandler(void *data, void *opaque)
{
struct qemuProcessEvent *processEvent = data;
@@ -4101,6 +4116,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
processNbdkitExitedEvent(vm, processEvent->data);
break;
+ case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
+ processShutdownCompletedEvent(driver, vm);
+ break;
case QEMU_PROCESS_EVENT_LAST:
break;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..c6f7d34168 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,9 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#if WITH_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
@@ -8387,9 +8390,114 @@ qemuProcessCreatePretendCmdBuild(virDomainObj *vm,
}
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+typedef struct {
+ virDomainObj *vm;
+ int pidfd;
+} qemuProcessInShutdownEventData;
+
+
+static qemuProcessInShutdownEventData*
+qemuProcessInShutdownEventDataNew(virDomainObj *vm, int pidfd)
+{
+ qemuProcessInShutdownEventData *d = g_new(qemuProcessInShutdownEventData, 1);
+ d->vm = virObjectRef(vm);
+ d->pidfd = pidfd;
+ return d;
+}
+
+
+static void
+qemuProcessInShutdownEventDataFree(qemuProcessInShutdownEventData *d)
+{
+ virObjectUnref(d->vm);
+ VIR_FORCE_CLOSE(d->pidfd);
+ g_free(d);
+}
+
+
+static void
+qemuProcessInShutdownPidfdCb(int watch,
+ int fd,
+ int events G_GNUC_UNUSED,
+ void *opaque)
+{
+ qemuProcessInShutdownEventData *data = opaque;
+ virDomainObj *vm = data->vm;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld fd=%d",
+ vm, vm->def->name, (long long)vm->pid, fd);
+
+ virEventRemoveHandle(watch);
+
+ virObjectLock(vm);
+
+ VIR_INFO("QEMU process %lld finally completed termination",
+ (long long)vm->pid);
+
+ QEMU_DOMAIN_PRIVATE(vm)->pidMonitored = -1;
+ qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
+ 0, 0, NULL);
+
+ virObjectUnlock(vm);
+}
+#endif /* WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open) */
+
+
+static int
+qemuProcessInShutdownStartMonitor(virDomainObj *vm)
+{
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+ qemuDomainObjPrivate *priv = vm->privateData;
+ qemuProcessInShutdownEventData *data;
+ int pidfd;
+ int ret = -1;
+
+ VIR_DEBUG("vm=%p name=%s pid=%lld pidMonitored=%d",
+ vm, vm->def->name, (long long)vm->pid,
+ priv->pidMonitored);
+
+ if (priv->pidMonitored >= 0) {
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i already set up", vm->pid);
+ goto cleanup;
+ }
+
+ pidfd = syscall(SYS_pidfd_open, vm->pid, 0);
+ if (pidfd < 0) {
+ if (errno == ESRCH) /* process has already terminated */
+ ret = 1;
+ goto cleanup;
+ }
+
+ data = qemuProcessInShutdownEventDataNew(vm, pidfd);
+ if ((priv->pidMonitored = virEventAddHandle(pidfd,
+ VIR_EVENT_HANDLE_READABLE,
+ qemuProcessInShutdownPidfdCb,
+ data,
+ (virFreeCallback)qemuProcessInShutdownEventDataFree)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to monitor qemu in-shutdown process %1$i"),
+ vm->pid);
+ qemuProcessInShutdownEventDataFree(data);
+ goto cleanup;
+ }
+ VIR_DEBUG("Monitoring qemu in-shutdown process %i for termination", vm->pid);
+ ret = 0;
+
+ cleanup:
+ return ret;
+#else /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+ VIR_DEBUG("Monitoring qemu process %i not implemented", vm->pid);
+ return -1;
+#endif /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+}
+
+
int
qemuProcessKill(virDomainObj *vm, unsigned int flags)
{
+ int ret = -1;
+
VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
vm, vm->def->name,
(long long)vm->pid, flags);
@@ -8410,10 +8518,16 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags)
/* Request an extra delay of two seconds per current nhostdevs
* to be safe against stalls by the kernel freeing up the resources */
- return virProcessKillPainfullyDelay(vm->pid,
- !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
- vm->def->nhostdevs * 2,
- false);
+ ret = virProcessKillPainfullyDelay(vm->pid,
+ !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
+ vm->def->nhostdevs * 2,
+ false);
+
+ if (ret < 0 && (flags & VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR))
+ if (qemuProcessInShutdownStartMonitor(vm) == 1)
+ ret = 0; /* process termination detected */
+
+ return ret;
}
@@ -8438,7 +8552,7 @@ qemuProcessBeginStopJob(virDomainObj *vm,
* cleared inside qemuProcessStop */
priv->beingDestroyed = true;
- if (qemuProcessKill(vm, killFlags) < 0)
+ if (qemuProcessKill(vm, killFlags|VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR) < 0)
goto error;
/* Wake up anything waiting on domain condition */
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index cb67bfcd2d..2324aeb7bd 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -180,6 +180,7 @@ typedef enum {
VIR_QEMU_PROCESS_KILL_FORCE = 1 << 0,
VIR_QEMU_PROCESS_KILL_NOWAIT = 1 << 1,
VIR_QEMU_PROCESS_KILL_NOCHECK = 1 << 2, /* bypass the running vm check */
+ VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR = 1 << 3, /* on error enable process monitor */
} virQemuProcessKillMode;
int qemuProcessKill(virDomainObj *vm, unsigned int flags);
--
2.45.0
On 7/19/24 17:44, Boris Fiuczynski wrote:
> In cases when a QEMU process takes longer than the time sigterm and
> sigkill are issued to kill the process do not simply fail and leave the
> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
> an fd on /proc/$pid and get notified when the QEMU process finally has
> terminated to cleanup the VM state.
>
> Resolves: https://issues.redhat.com/browse/RHEL-28819
> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
> ---
> src/qemu/qemu_domain.c | 8 +++
> src/qemu/qemu_domain.h | 2 +
> src/qemu/qemu_driver.c | 18 ++++++
> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
> src/qemu/qemu_process.h | 1 +
> 5 files changed, 148 insertions(+), 5 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 2134b11038..8147ff02fd 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>
> virChrdevFree(priv->devs);
>
> + if (priv->pidMonitored >= 0) {
> + virEventRemoveHandle(priv->pidMonitored);
> + priv->pidMonitored = -1;
> + }
> +
> /* This should never be non-NULL if we get here, but just in case... */
> if (priv->mon) {
> VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
> priv->blockjobs = virHashNew(virObjectUnref);
> priv->fds = virHashNew(g_object_unref);
>
> + priv->pidMonitored = -1;
> +
> /* agent commands block by default, user can choose different behavior */
> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> case QEMU_PROCESS_EVENT_RESET:
> case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
> case QEMU_PROCESS_EVENT_MONITOR_EOF:
> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
> case QEMU_PROCESS_EVENT_LAST:
> break;
> }
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index d777559119..a5092dd7f0 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>
> bool beingDestroyed;
> char *pidfile;
> + int pidMonitored;
>
> virDomainPCIAddressSet *pciaddrs;
> virDomainUSBAddressSet *usbaddrs;
> @@ -469,6 +470,7 @@ typedef enum {
> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
> QEMU_PROCESS_EVENT_RESET,
> QEMU_PROCESS_EVENT_NBDKIT_EXITED,
> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>
> QEMU_PROCESS_EVENT_LAST
> } qemuProcessEventType;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 9f3013e231..6b1e4084f6 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
> }
>
>
> +static void
> +processShutdownCompletedEvent(virQEMUDriver *driver,
> + virDomainObj *vm)
> +{
> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> + return;
Shouldn't this be:
if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
return;
Otherwise looking good. No need to resend, I can fix that before pushing.
Michal
On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
> On 7/19/24 17:44, Boris Fiuczynski wrote:
> > In cases when a QEMU process takes longer than the time sigterm and
> > sigkill are issued to kill the process do not simply fail and leave the
> > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
> > an fd on /proc/$pid and get notified when the QEMU process finally has
> > terminated to cleanup the VM state.
> >
> > Resolves: https://issues.redhat.com/browse/RHEL-28819
> > Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
> > ---
> > src/qemu/qemu_domain.c | 8 +++
> > src/qemu/qemu_domain.h | 2 +
> > src/qemu/qemu_driver.c | 18 ++++++
> > src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
> > src/qemu/qemu_process.h | 1 +
> > 5 files changed, 148 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> > index 2134b11038..8147ff02fd 100644
> > --- a/src/qemu/qemu_domain.c
> > +++ b/src/qemu/qemu_domain.c
> > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
> >
> > virChrdevFree(priv->devs);
> >
> > + if (priv->pidMonitored >= 0) {
> > + virEventRemoveHandle(priv->pidMonitored);
> > + priv->pidMonitored = -1;
> > + }
> > +
> > /* This should never be non-NULL if we get here, but just in case... */
> > if (priv->mon) {
> > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
> > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
> > priv->blockjobs = virHashNew(virObjectUnref);
> > priv->fds = virHashNew(g_object_unref);
> >
> > + priv->pidMonitored = -1;
> > +
> > /* agent commands block by default, user can choose different behavior */
> > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
> > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> > case QEMU_PROCESS_EVENT_RESET:
> > case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
> > case QEMU_PROCESS_EVENT_MONITOR_EOF:
> > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
> > case QEMU_PROCESS_EVENT_LAST:
> > break;
> > }
> > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> > index d777559119..a5092dd7f0 100644
> > --- a/src/qemu/qemu_domain.h
> > +++ b/src/qemu/qemu_domain.h
> > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
> >
> > bool beingDestroyed;
> > char *pidfile;
> > + int pidMonitored;
> >
> > virDomainPCIAddressSet *pciaddrs;
> > virDomainUSBAddressSet *usbaddrs;
> > @@ -469,6 +470,7 @@ typedef enum {
> > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
> > QEMU_PROCESS_EVENT_RESET,
> > QEMU_PROCESS_EVENT_NBDKIT_EXITED,
> > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
> >
> > QEMU_PROCESS_EVENT_LAST
> > } qemuProcessEventType;
> > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> > index 9f3013e231..6b1e4084f6 100644
> > --- a/src/qemu/qemu_driver.c
> > +++ b/src/qemu/qemu_driver.c
> > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
> > }
> >
> >
> > +static void
> > +processShutdownCompletedEvent(virQEMUDriver *driver,
> > + virDomainObj *vm)
> > +{
> > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> > + return;
>
> Shouldn't this be:
>
> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
> return;
>
> Otherwise looking good. No need to resend, I can fix that before pushing.
And followed by qemuProcessEndStopJob after calling qemuProcessStop.
Jirka
On 7/22/24 2:01 PM, Jiri Denemark wrote:
> On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
>> On 7/19/24 17:44, Boris Fiuczynski wrote:
>>> In cases when a QEMU process takes longer than the time sigterm and
>>> sigkill are issued to kill the process do not simply fail and leave the
>>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>>> an fd on /proc/$pid and get notified when the QEMU process finally has
>>> terminated to cleanup the VM state.
>>>
>>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>>> ---
>>> src/qemu/qemu_domain.c | 8 +++
>>> src/qemu/qemu_domain.h | 2 +
>>> src/qemu/qemu_driver.c | 18 ++++++
>>> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
>>> src/qemu/qemu_process.h | 1 +
>>> 5 files changed, 148 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>>> index 2134b11038..8147ff02fd 100644
>>> --- a/src/qemu/qemu_domain.c
>>> +++ b/src/qemu/qemu_domain.c
>>> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>>>
>>> virChrdevFree(priv->devs);
>>>
>>> + if (priv->pidMonitored >= 0) {
>>> + virEventRemoveHandle(priv->pidMonitored);
>>> + priv->pidMonitored = -1;
>>> + }
>>> +
>>> /* This should never be non-NULL if we get here, but just in case... */
>>> if (priv->mon) {
>>> VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
>>> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
>>> priv->blockjobs = virHashNew(virObjectUnref);
>>> priv->fds = virHashNew(g_object_unref);
>>>
>>> + priv->pidMonitored = -1;
>>> +
>>> /* agent commands block by default, user can choose different behavior */
>>> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
>>> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
>>> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>>> case QEMU_PROCESS_EVENT_RESET:
>>> case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
>>> case QEMU_PROCESS_EVENT_MONITOR_EOF:
>>> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
>>> case QEMU_PROCESS_EVENT_LAST:
>>> break;
>>> }
>>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>>> index d777559119..a5092dd7f0 100644
>>> --- a/src/qemu/qemu_domain.h
>>> +++ b/src/qemu/qemu_domain.h
>>> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>>>
>>> bool beingDestroyed;
>>> char *pidfile;
>>> + int pidMonitored;
>>>
>>> virDomainPCIAddressSet *pciaddrs;
>>> virDomainUSBAddressSet *usbaddrs;
>>> @@ -469,6 +470,7 @@ typedef enum {
>>> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
>>> QEMU_PROCESS_EVENT_RESET,
>>> QEMU_PROCESS_EVENT_NBDKIT_EXITED,
>>> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>>>
>>> QEMU_PROCESS_EVENT_LAST
>>> } qemuProcessEventType;
>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>> index 9f3013e231..6b1e4084f6 100644
>>> --- a/src/qemu/qemu_driver.c
>>> +++ b/src/qemu/qemu_driver.c
>>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>>> }
>>>
>>>
>>> +static void
>>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>>> + virDomainObj *vm)
>>> +{
>>> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>>> + return;
>>
>> Shouldn't this be:
>>
>> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>> return;
>>
>> Otherwise looking good. No need to resend, I can fix that before pushing.
>
> And followed by qemuProcessEndStopJob after calling qemuProcessStop.
>
> Jirka
Just to make sure I retested successfully with the changes above.
--
Mit freundlichen Grüßen/Kind regards
Boris Fiuczynski
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294
On 7/23/24 17:57, Boris Fiuczynski wrote:
> On 7/22/24 2:01 PM, Jiri Denemark wrote:
>> On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
>>> On 7/19/24 17:44, Boris Fiuczynski wrote:
>>>> In cases when a QEMU process takes longer than the time sigterm and
>>>> sigkill are issued to kill the process do not simply fail and leave the
>>>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>>>> an fd on /proc/$pid and get notified when the QEMU process finally has
>>>> terminated to cleanup the VM state.
>>>>
>>>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>>>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>>>> ---
>>>> src/qemu/qemu_domain.c | 8 +++
>>>> src/qemu/qemu_domain.h | 2 +
>>>> src/qemu/qemu_driver.c | 18 ++++++
>>>> src/qemu/qemu_process.c | 124
>>>> ++++++++++++++++++++++++++++++++++++++--
>>>> src/qemu/qemu_process.h | 1 +
>>>> 5 files changed, 148 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>>> index 9f3013e231..6b1e4084f6 100644
>>>> --- a/src/qemu/qemu_driver.c
>>>> +++ b/src/qemu/qemu_driver.c
>>>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>>>> }
>>>> +static void
>>>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>>>> + virDomainObj *vm)
>>>> +{
>>>> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>>>> + return;
>>>
>>> Shouldn't this be:
>>>
>>> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>>> return;
>>>
>>> Otherwise looking good. No need to resend, I can fix that before
>>> pushing.
>>
>> And followed by qemuProcessEndStopJob after calling qemuProcessStop.
>>
>> Jirka
>
> Just to make sure I retested successfully with the changes above.
>
Perfect!
I'll do the change before merging (among with some code style
adjustments like curly braces around multiline if-s, etc. Nothing too
intrusive).
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Michal
On 7/22/24 10:55 AM, Michal Prívozník wrote:
> On 7/19/24 17:44, Boris Fiuczynski wrote:
>> In cases when a QEMU process takes longer than the time sigterm and
>> sigkill are issued to kill the process do not simply fail and leave the
>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>> an fd on /proc/$pid and get notified when the QEMU process finally has
>> terminated to cleanup the VM state.
>>
>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>> ---
>> src/qemu/qemu_domain.c | 8 +++
>> src/qemu/qemu_domain.h | 2 +
>> src/qemu/qemu_driver.c | 18 ++++++
>> src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
>> src/qemu/qemu_process.h | 1 +
>> 5 files changed, 148 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>> index 2134b11038..8147ff02fd 100644
>> --- a/src/qemu/qemu_domain.c
>> +++ b/src/qemu/qemu_domain.c
>> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>>
>> virChrdevFree(priv->devs);
>>
>> + if (priv->pidMonitored >= 0) {
>> + virEventRemoveHandle(priv->pidMonitored);
>> + priv->pidMonitored = -1;
>> + }
>> +
>> /* This should never be non-NULL if we get here, but just in case... */
>> if (priv->mon) {
>> VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
>> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
>> priv->blockjobs = virHashNew(virObjectUnref);
>> priv->fds = virHashNew(g_object_unref);
>>
>> + priv->pidMonitored = -1;
>> +
>> /* agent commands block by default, user can choose different behavior */
>> priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
>> priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
>> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>> case QEMU_PROCESS_EVENT_RESET:
>> case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
>> case QEMU_PROCESS_EVENT_MONITOR_EOF:
>> + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
>> case QEMU_PROCESS_EVENT_LAST:
>> break;
>> }
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index d777559119..a5092dd7f0 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>>
>> bool beingDestroyed;
>> char *pidfile;
>> + int pidMonitored;
>>
>> virDomainPCIAddressSet *pciaddrs;
>> virDomainUSBAddressSet *usbaddrs;
>> @@ -469,6 +470,7 @@ typedef enum {
>> QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
>> QEMU_PROCESS_EVENT_RESET,
>> QEMU_PROCESS_EVENT_NBDKIT_EXITED,
>> + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>>
>> QEMU_PROCESS_EVENT_LAST
>> } qemuProcessEventType;
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 9f3013e231..6b1e4084f6 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>> }
>>
>>
>> +static void
>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>> + virDomainObj *vm)
>> +{
>> + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>> + return;
>
> Shouldn't this be:
>
> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
> return;
>
> Otherwise looking good. No need to resend, I can fix that before pushing.
>
> Michal
I was looking at qemuDomainSutdownFlagsMonitor but VIR_JOB_DESTROY seems
to be the better match as it is also used in processMonitorEOFEvent and
the should have been shutdown already.
--
Mit freundlichen Grüßen/Kind regards
Boris Fiuczynski
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294
© 2016 - 2026 Red Hat, Inc.