[PATCH v3] qemu: add a monitor to /proc/$pid when killing times out

Boris Fiuczynski posted 1 patch 1 month, 2 weeks ago
src/qemu/qemu_domain.c  |   8 +++
src/qemu/qemu_domain.h  |   2 +
src/qemu/qemu_driver.c  |  18 ++++++
src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_process.h |   1 +
5 files changed, 148 insertions(+), 5 deletions(-)
[PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Boris Fiuczynski 1 month, 2 weeks ago
In cases when a QEMU process takes longer than the time sigterm and
sigkill are issued to kill the process do not simply fail and leave the
VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
an fd on /proc/$pid and get notified when the QEMU process finally has
terminated to cleanup the VM state.

Resolves: https://issues.redhat.com/browse/RHEL-28819
Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
---
 src/qemu/qemu_domain.c  |   8 +++
 src/qemu/qemu_domain.h  |   2 +
 src/qemu/qemu_driver.c  |  18 ++++++
 src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
 src/qemu/qemu_process.h |   1 +
 5 files changed, 148 insertions(+), 5 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2134b11038..8147ff02fd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
 
     virChrdevFree(priv->devs);
 
+    if (priv->pidMonitored >= 0) {
+        virEventRemoveHandle(priv->pidMonitored);
+        priv->pidMonitored = -1;
+    }
+
     /* This should never be non-NULL if we get here, but just in case... */
     if (priv->mon) {
         VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
@@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
     priv->blockjobs = virHashNew(virObjectUnref);
     priv->fds = virHashNew(g_object_unref);
 
+    priv->pidMonitored = -1;
+
     /* agent commands block by default, user can choose different behavior */
     priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
     priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
@@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
     case QEMU_PROCESS_EVENT_RESET:
     case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
     case QEMU_PROCESS_EVENT_MONITOR_EOF:
+    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
     case QEMU_PROCESS_EVENT_LAST:
         break;
     }
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index d777559119..a5092dd7f0 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
 
     bool beingDestroyed;
     char *pidfile;
+    int pidMonitored;
 
     virDomainPCIAddressSet *pciaddrs;
     virDomainUSBAddressSet *usbaddrs;
@@ -469,6 +470,7 @@ typedef enum {
     QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
     QEMU_PROCESS_EVENT_RESET,
     QEMU_PROCESS_EVENT_NBDKIT_EXITED,
+    QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
 
     QEMU_PROCESS_EVENT_LAST
 } qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9f3013e231..6b1e4084f6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
 }
 
 
+static void
+processShutdownCompletedEvent(virQEMUDriver *driver,
+                              virDomainObj *vm)
+{
+    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
+        return;
+
+    if (virDomainObjIsActive(vm))
+        qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_UNKNOWN,
+                        VIR_ASYNC_JOB_NONE, 0);
+
+    virDomainObjEndJob(vm);
+}
+
+
 static void qemuProcessEventHandler(void *data, void *opaque)
 {
     struct qemuProcessEvent *processEvent = data;
@@ -4101,6 +4116,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
     case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
         processNbdkitExitedEvent(vm, processEvent->data);
         break;
+    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
+        processShutdownCompletedEvent(driver, vm);
+        break;
     case QEMU_PROCESS_EVENT_LAST:
         break;
     }
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..c6f7d34168 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,9 @@
 #include <unistd.h>
 #include <signal.h>
 #include <sys/stat.h>
+#if WITH_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
 #if defined(__linux__)
 # include <linux/capability.h>
 #elif defined(__FreeBSD__)
@@ -8387,9 +8390,114 @@ qemuProcessCreatePretendCmdBuild(virDomainObj *vm,
 }
 
 
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+typedef struct {
+    virDomainObj *vm;
+    int pidfd;
+} qemuProcessInShutdownEventData;
+
+
+static qemuProcessInShutdownEventData*
+qemuProcessInShutdownEventDataNew(virDomainObj *vm, int pidfd)
+{
+    qemuProcessInShutdownEventData *d = g_new(qemuProcessInShutdownEventData, 1);
+    d->vm = virObjectRef(vm);
+    d->pidfd = pidfd;
+    return d;
+}
+
+
+static void
+qemuProcessInShutdownEventDataFree(qemuProcessInShutdownEventData *d)
+{
+    virObjectUnref(d->vm);
+    VIR_FORCE_CLOSE(d->pidfd);
+    g_free(d);
+}
+
+
+static void
+qemuProcessInShutdownPidfdCb(int watch,
+                             int fd,
+                             int events G_GNUC_UNUSED,
+                             void *opaque)
+{
+    qemuProcessInShutdownEventData *data = opaque;
+    virDomainObj *vm = data->vm;
+
+    VIR_DEBUG("vm=%p name=%s pid=%lld fd=%d",
+              vm, vm->def->name, (long long)vm->pid, fd);
+
+    virEventRemoveHandle(watch);
+
+    virObjectLock(vm);
+
+    VIR_INFO("QEMU process %lld finally completed termination",
+             (long long)vm->pid);
+
+    QEMU_DOMAIN_PRIVATE(vm)->pidMonitored = -1;
+    qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
+                           0, 0, NULL);
+
+    virObjectUnlock(vm);
+}
+#endif /* WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open) */
+
+
+static int
+qemuProcessInShutdownStartMonitor(virDomainObj *vm)
+{
+#if WITH_SYS_SYSCALL_H && defined(SYS_pidfd_open)
+    qemuDomainObjPrivate *priv = vm->privateData;
+    qemuProcessInShutdownEventData *data;
+    int pidfd;
+    int ret = -1;
+
+    VIR_DEBUG("vm=%p name=%s pid=%lld pidMonitored=%d",
+              vm, vm->def->name, (long long)vm->pid,
+              priv->pidMonitored);
+
+    if (priv->pidMonitored >= 0) {
+        VIR_DEBUG("Monitoring qemu in-shutdown process %i already set up", vm->pid);
+        goto cleanup;
+    }
+
+    pidfd = syscall(SYS_pidfd_open, vm->pid, 0);
+    if (pidfd < 0) {
+        if (errno == ESRCH) /* process has already terminated */
+            ret = 1;
+        goto cleanup;
+    }
+
+    data = qemuProcessInShutdownEventDataNew(vm, pidfd);
+    if ((priv->pidMonitored = virEventAddHandle(pidfd,
+                                                VIR_EVENT_HANDLE_READABLE,
+                                                qemuProcessInShutdownPidfdCb,
+                                                data,
+                                                (virFreeCallback)qemuProcessInShutdownEventDataFree)) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                    _("failed to monitor qemu in-shutdown process %1$i"),
+                    vm->pid);
+        qemuProcessInShutdownEventDataFree(data);
+        goto cleanup;
+    }
+    VIR_DEBUG("Monitoring qemu in-shutdown process %i for termination", vm->pid);
+    ret = 0;
+
+ cleanup:
+    return ret;
+#else /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+    VIR_DEBUG("Monitoring qemu process %i not implemented", vm->pid);
+    return -1;
+#endif /* !WITH_SYS_SYSCALL_H || !defined(SYS_pidfd_open) */
+}
+
+
 int
 qemuProcessKill(virDomainObj *vm, unsigned int flags)
 {
+    int ret = -1;
+
     VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x",
               vm, vm->def->name,
               (long long)vm->pid, flags);
@@ -8410,10 +8518,16 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags)
 
     /* Request an extra delay of two seconds per current nhostdevs
      * to be safe against stalls by the kernel freeing up the resources */
-    return virProcessKillPainfullyDelay(vm->pid,
-                                        !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
-                                        vm->def->nhostdevs * 2,
-                                        false);
+    ret = virProcessKillPainfullyDelay(vm->pid,
+                                       !!(flags & VIR_QEMU_PROCESS_KILL_FORCE),
+                                       vm->def->nhostdevs * 2,
+                                       false);
+
+    if (ret < 0 && (flags & VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR))
+        if (qemuProcessInShutdownStartMonitor(vm) == 1)
+            ret = 0; /* process termination detected */
+
+    return ret;
 }
 
 
@@ -8438,7 +8552,7 @@ qemuProcessBeginStopJob(virDomainObj *vm,
      * cleared inside qemuProcessStop */
     priv->beingDestroyed = true;
 
-    if (qemuProcessKill(vm, killFlags) < 0)
+    if (qemuProcessKill(vm, killFlags|VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR) < 0)
         goto error;
 
     /* Wake up anything waiting on domain condition */
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index cb67bfcd2d..2324aeb7bd 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -180,6 +180,7 @@ typedef enum {
    VIR_QEMU_PROCESS_KILL_FORCE  = 1 << 0,
    VIR_QEMU_PROCESS_KILL_NOWAIT = 1 << 1,
    VIR_QEMU_PROCESS_KILL_NOCHECK = 1 << 2, /* bypass the running vm check */
+   VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR = 1 << 3, /* on error enable process monitor */
 } virQemuProcessKillMode;
 
 int qemuProcessKill(virDomainObj *vm, unsigned int flags);
-- 
2.45.0
Re: [PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Michal Prívozník 1 month, 2 weeks ago
On 7/19/24 17:44, Boris Fiuczynski wrote:
> In cases when a QEMU process takes longer than the time sigterm and
> sigkill are issued to kill the process do not simply fail and leave the
> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
> an fd on /proc/$pid and get notified when the QEMU process finally has
> terminated to cleanup the VM state.
> 
> Resolves: https://issues.redhat.com/browse/RHEL-28819
> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
> ---
>  src/qemu/qemu_domain.c  |   8 +++
>  src/qemu/qemu_domain.h  |   2 +
>  src/qemu/qemu_driver.c  |  18 ++++++
>  src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
>  src/qemu/qemu_process.h |   1 +
>  5 files changed, 148 insertions(+), 5 deletions(-)
> 
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 2134b11038..8147ff02fd 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>  
>      virChrdevFree(priv->devs);
>  
> +    if (priv->pidMonitored >= 0) {
> +        virEventRemoveHandle(priv->pidMonitored);
> +        priv->pidMonitored = -1;
> +    }
> +
>      /* This should never be non-NULL if we get here, but just in case... */
>      if (priv->mon) {
>          VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
>      priv->blockjobs = virHashNew(virObjectUnref);
>      priv->fds = virHashNew(g_object_unref);
>  
> +    priv->pidMonitored = -1;
> +
>      /* agent commands block by default, user can choose different behavior */
>      priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
>      priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>      case QEMU_PROCESS_EVENT_RESET:
>      case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
>      case QEMU_PROCESS_EVENT_MONITOR_EOF:
> +    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
>      case QEMU_PROCESS_EVENT_LAST:
>          break;
>      }
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index d777559119..a5092dd7f0 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>  
>      bool beingDestroyed;
>      char *pidfile;
> +    int pidMonitored;
>  
>      virDomainPCIAddressSet *pciaddrs;
>      virDomainUSBAddressSet *usbaddrs;
> @@ -469,6 +470,7 @@ typedef enum {
>      QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
>      QEMU_PROCESS_EVENT_RESET,
>      QEMU_PROCESS_EVENT_NBDKIT_EXITED,
> +    QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>  
>      QEMU_PROCESS_EVENT_LAST
>  } qemuProcessEventType;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 9f3013e231..6b1e4084f6 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>  }
>  
>  
> +static void
> +processShutdownCompletedEvent(virQEMUDriver *driver,
> +                              virDomainObj *vm)
> +{
> +    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> +        return;

Shouldn't this be:

    if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
        return;

Otherwise looking good. No need to resend, I can fix that before pushing.

Michal
Re: [PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Jiri Denemark 1 month, 2 weeks ago
On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
> On 7/19/24 17:44, Boris Fiuczynski wrote:
> > In cases when a QEMU process takes longer than the time sigterm and
> > sigkill are issued to kill the process do not simply fail and leave the
> > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
> > an fd on /proc/$pid and get notified when the QEMU process finally has
> > terminated to cleanup the VM state.
> > 
> > Resolves: https://issues.redhat.com/browse/RHEL-28819
> > Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
> > ---
> >  src/qemu/qemu_domain.c  |   8 +++
> >  src/qemu/qemu_domain.h  |   2 +
> >  src/qemu/qemu_driver.c  |  18 ++++++
> >  src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
> >  src/qemu/qemu_process.h |   1 +
> >  5 files changed, 148 insertions(+), 5 deletions(-)
> > 
> > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> > index 2134b11038..8147ff02fd 100644
> > --- a/src/qemu/qemu_domain.c
> > +++ b/src/qemu/qemu_domain.c
> > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
> >  
> >      virChrdevFree(priv->devs);
> >  
> > +    if (priv->pidMonitored >= 0) {
> > +        virEventRemoveHandle(priv->pidMonitored);
> > +        priv->pidMonitored = -1;
> > +    }
> > +
> >      /* This should never be non-NULL if we get here, but just in case... */
> >      if (priv->mon) {
> >          VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
> > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
> >      priv->blockjobs = virHashNew(virObjectUnref);
> >      priv->fds = virHashNew(g_object_unref);
> >  
> > +    priv->pidMonitored = -1;
> > +
> >      /* agent commands block by default, user can choose different behavior */
> >      priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
> >      priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
> > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> >      case QEMU_PROCESS_EVENT_RESET:
> >      case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
> >      case QEMU_PROCESS_EVENT_MONITOR_EOF:
> > +    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
> >      case QEMU_PROCESS_EVENT_LAST:
> >          break;
> >      }
> > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> > index d777559119..a5092dd7f0 100644
> > --- a/src/qemu/qemu_domain.h
> > +++ b/src/qemu/qemu_domain.h
> > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
> >  
> >      bool beingDestroyed;
> >      char *pidfile;
> > +    int pidMonitored;
> >  
> >      virDomainPCIAddressSet *pciaddrs;
> >      virDomainUSBAddressSet *usbaddrs;
> > @@ -469,6 +470,7 @@ typedef enum {
> >      QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
> >      QEMU_PROCESS_EVENT_RESET,
> >      QEMU_PROCESS_EVENT_NBDKIT_EXITED,
> > +    QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
> >  
> >      QEMU_PROCESS_EVENT_LAST
> >  } qemuProcessEventType;
> > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> > index 9f3013e231..6b1e4084f6 100644
> > --- a/src/qemu/qemu_driver.c
> > +++ b/src/qemu/qemu_driver.c
> > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
> >  }
> >  
> >  
> > +static void
> > +processShutdownCompletedEvent(virQEMUDriver *driver,
> > +                              virDomainObj *vm)
> > +{
> > +    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
> > +        return;
> 
> Shouldn't this be:
> 
>     if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>         return;
> 
> Otherwise looking good. No need to resend, I can fix that before pushing.

And followed by qemuProcessEndStopJob after calling qemuProcessStop.

Jirka
Re: [PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Boris Fiuczynski 1 month, 2 weeks ago
On 7/22/24 2:01 PM, Jiri Denemark wrote:
> On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
>> On 7/19/24 17:44, Boris Fiuczynski wrote:
>>> In cases when a QEMU process takes longer than the time sigterm and
>>> sigkill are issued to kill the process do not simply fail and leave the
>>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>>> an fd on /proc/$pid and get notified when the QEMU process finally has
>>> terminated to cleanup the VM state.
>>>
>>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>>> ---
>>>   src/qemu/qemu_domain.c  |   8 +++
>>>   src/qemu/qemu_domain.h  |   2 +
>>>   src/qemu/qemu_driver.c  |  18 ++++++
>>>   src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
>>>   src/qemu/qemu_process.h |   1 +
>>>   5 files changed, 148 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>>> index 2134b11038..8147ff02fd 100644
>>> --- a/src/qemu/qemu_domain.c
>>> +++ b/src/qemu/qemu_domain.c
>>> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>>>   
>>>       virChrdevFree(priv->devs);
>>>   
>>> +    if (priv->pidMonitored >= 0) {
>>> +        virEventRemoveHandle(priv->pidMonitored);
>>> +        priv->pidMonitored = -1;
>>> +    }
>>> +
>>>       /* This should never be non-NULL if we get here, but just in case... */
>>>       if (priv->mon) {
>>>           VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
>>> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
>>>       priv->blockjobs = virHashNew(virObjectUnref);
>>>       priv->fds = virHashNew(g_object_unref);
>>>   
>>> +    priv->pidMonitored = -1;
>>> +
>>>       /* agent commands block by default, user can choose different behavior */
>>>       priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
>>>       priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
>>> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>>>       case QEMU_PROCESS_EVENT_RESET:
>>>       case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
>>>       case QEMU_PROCESS_EVENT_MONITOR_EOF:
>>> +    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
>>>       case QEMU_PROCESS_EVENT_LAST:
>>>           break;
>>>       }
>>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>>> index d777559119..a5092dd7f0 100644
>>> --- a/src/qemu/qemu_domain.h
>>> +++ b/src/qemu/qemu_domain.h
>>> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>>>   
>>>       bool beingDestroyed;
>>>       char *pidfile;
>>> +    int pidMonitored;
>>>   
>>>       virDomainPCIAddressSet *pciaddrs;
>>>       virDomainUSBAddressSet *usbaddrs;
>>> @@ -469,6 +470,7 @@ typedef enum {
>>>       QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
>>>       QEMU_PROCESS_EVENT_RESET,
>>>       QEMU_PROCESS_EVENT_NBDKIT_EXITED,
>>> +    QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>>>   
>>>       QEMU_PROCESS_EVENT_LAST
>>>   } qemuProcessEventType;
>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>> index 9f3013e231..6b1e4084f6 100644
>>> --- a/src/qemu/qemu_driver.c
>>> +++ b/src/qemu/qemu_driver.c
>>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>>>   }
>>>   
>>>   
>>> +static void
>>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>>> +                              virDomainObj *vm)
>>> +{
>>> +    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>>> +        return;
>>
>> Shouldn't this be:
>>
>>      if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>>          return;
>>
>> Otherwise looking good. No need to resend, I can fix that before pushing.
> 
> And followed by qemuProcessEndStopJob after calling qemuProcessStop.
> 
> Jirka

Just to make sure I retested successfully with the changes above.

-- 
Mit freundlichen Grüßen/Kind regards
    Boris Fiuczynski

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294
Re: [PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Michal Prívozník 1 month, 2 weeks ago
On 7/23/24 17:57, Boris Fiuczynski wrote:
> On 7/22/24 2:01 PM, Jiri Denemark wrote:
>> On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote:
>>> On 7/19/24 17:44, Boris Fiuczynski wrote:
>>>> In cases when a QEMU process takes longer than the time sigterm and
>>>> sigkill are issued to kill the process do not simply fail and leave the
>>>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>>>> an fd on /proc/$pid and get notified when the QEMU process finally has
>>>> terminated to cleanup the VM state.
>>>>
>>>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>>>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>>>> ---
>>>>   src/qemu/qemu_domain.c  |   8 +++
>>>>   src/qemu/qemu_domain.h  |   2 +
>>>>   src/qemu/qemu_driver.c  |  18 ++++++
>>>>   src/qemu/qemu_process.c | 124
>>>> ++++++++++++++++++++++++++++++++++++++--
>>>>   src/qemu/qemu_process.h |   1 +
>>>>   5 files changed, 148 insertions(+), 5 deletions(-)
>>>>


>>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>>> index 9f3013e231..6b1e4084f6 100644
>>>> --- a/src/qemu/qemu_driver.c
>>>> +++ b/src/qemu/qemu_driver.c
>>>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>>>>   }
>>>>     +static void
>>>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>>>> +                              virDomainObj *vm)
>>>> +{
>>>> +    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>>>> +        return;
>>>
>>> Shouldn't this be:
>>>
>>>      if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>>>          return;
>>>
>>> Otherwise looking good. No need to resend, I can fix that before
>>> pushing.
>>
>> And followed by qemuProcessEndStopJob after calling qemuProcessStop.
>>
>> Jirka
> 
> Just to make sure I retested successfully with the changes above.
> 

Perfect!

I'll do the change before merging (among with some code style
adjustments like curly braces around multiline if-s, etc. Nothing too
intrusive).

Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

Michal
Re: [PATCH v3] qemu: add a monitor to /proc/$pid when killing times out
Posted by Boris Fiuczynski 1 month, 2 weeks ago
On 7/22/24 10:55 AM, Michal Prívozník wrote:
> On 7/19/24 17:44, Boris Fiuczynski wrote:
>> In cases when a QEMU process takes longer than the time sigterm and
>> sigkill are issued to kill the process do not simply fail and leave the
>> VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up
>> an fd on /proc/$pid and get notified when the QEMU process finally has
>> terminated to cleanup the VM state.
>>
>> Resolves: https://issues.redhat.com/browse/RHEL-28819
>> Signed-off-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
>> ---
>>   src/qemu/qemu_domain.c  |   8 +++
>>   src/qemu/qemu_domain.h  |   2 +
>>   src/qemu/qemu_driver.c  |  18 ++++++
>>   src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++--
>>   src/qemu/qemu_process.h |   1 +
>>   5 files changed, 148 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>> index 2134b11038..8147ff02fd 100644
>> --- a/src/qemu/qemu_domain.c
>> +++ b/src/qemu/qemu_domain.c
>> @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data)
>>   
>>       virChrdevFree(priv->devs);
>>   
>> +    if (priv->pidMonitored >= 0) {
>> +        virEventRemoveHandle(priv->pidMonitored);
>> +        priv->pidMonitored = -1;
>> +    }
>> +
>>       /* This should never be non-NULL if we get here, but just in case... */
>>       if (priv->mon) {
>>           VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion"));
>> @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque)
>>       priv->blockjobs = virHashNew(virObjectUnref);
>>       priv->fds = virHashNew(g_object_unref);
>>   
>> +    priv->pidMonitored = -1;
>> +
>>       /* agent commands block by default, user can choose different behavior */
>>       priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK;
>>       priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX;
>> @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
>>       case QEMU_PROCESS_EVENT_RESET:
>>       case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
>>       case QEMU_PROCESS_EVENT_MONITOR_EOF:
>> +    case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED:
>>       case QEMU_PROCESS_EVENT_LAST:
>>           break;
>>       }
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index d777559119..a5092dd7f0 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate {
>>   
>>       bool beingDestroyed;
>>       char *pidfile;
>> +    int pidMonitored;
>>   
>>       virDomainPCIAddressSet *pciaddrs;
>>       virDomainUSBAddressSet *usbaddrs;
>> @@ -469,6 +470,7 @@ typedef enum {
>>       QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
>>       QEMU_PROCESS_EVENT_RESET,
>>       QEMU_PROCESS_EVENT_NBDKIT_EXITED,
>> +    QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED,
>>   
>>       QEMU_PROCESS_EVENT_LAST
>>   } qemuProcessEventType;
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 9f3013e231..6b1e4084f6 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm,
>>   }
>>   
>>   
>> +static void
>> +processShutdownCompletedEvent(virQEMUDriver *driver,
>> +                              virDomainObj *vm)
>> +{
>> +    if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
>> +        return;
> 
> Shouldn't this be:
> 
>      if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
>          return;
> 
> Otherwise looking good. No need to resend, I can fix that before pushing.
> 
> Michal

I was looking at qemuDomainSutdownFlagsMonitor but VIR_JOB_DESTROY seems 
to be the better match as it is also used in processMonitorEOFEvent and 
the should have been shutdown already.

-- 
Mit freundlichen Grüßen/Kind regards
    Boris Fiuczynski

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294