src/qemu/qemu_driver.c | 4 ++-- src/qemu/qemu_process.c | 20 ++++++++++++++++---- src/qemu/qemu_process.h | 1 + 3 files changed, 19 insertions(+), 6 deletions(-)
Recent commit v10.4.0-87-gd9935a5c4f made a reasonable change to only
reset beingDestroyed back to false when vm->def->id is reset to make
sure other code can detect a domain is (about to become) inactive. It
even added a comment saying any caller of qemuProcessBeginStopJob is
supposed to call qemuProcessStop to clear beingDestroyed. But not every
caller really does so because they first call qemuProcessBeginStopJob
and then check whether a domain is still running. If not the
qemuProcessStop call is skipped leaving beingDestroyed=true. In case of
a persistent domain this may block incoming migrations of such domain as
the migration code would think the domain died unexpectedly (even though
it's still running).
The qemuProcessBeginStopJob function is a wrapper around
virDomainObjBeginJob, but virDomainObjEndJob was used directly for
cleanup. This patch introduces a new qemuProcessEndStopJob wrapper
around virDomainObjEndJob to properly undo everything
qemuProcessBeginStopJob did.
https://issues.redhat.com/browse/RHEL-43309
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
src/qemu/qemu_driver.c | 4 ++--
src/qemu/qemu_process.c | 20 ++++++++++++++++----
src/qemu/qemu_process.h | 1 +
3 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index f898d85667..9f3013e231 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2102,7 +2102,7 @@ qemuDomainDestroyFlags(virDomainPtr dom,
endjob:
if (ret == 0)
qemuDomainRemoveInactive(driver, vm, 0, false);
- virDomainObjEndJob(vm);
+ qemuProcessEndStopJob(vm);
cleanup:
virDomainObjEndAPI(&vm);
@@ -3888,7 +3888,7 @@ processMonitorEOFEvent(virQEMUDriver *driver,
endjob:
qemuDomainRemoveInactive(driver, vm, 0, false);
- virDomainObjEndJob(vm);
+ qemuProcessEndStopJob(vm);
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 7cbe521a6e..25dfd04272 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -8421,7 +8421,8 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags)
* qemuProcessBeginStopJob:
*
* Stop all current jobs by killing the domain and start a new one for
- * qemuProcessStop.
+ * qemuProcessStop. The caller has to make sure qemuProcessEndStopJob is
+ * called to properly cleanup the job.
*/
int
qemuProcessBeginStopJob(virDomainObj *vm,
@@ -8448,8 +8449,9 @@ qemuProcessBeginStopJob(virDomainObj *vm,
goto error;
/* priv->beingDestroyed is deliberately left set to 'true' here. Caller
- * is supposed to call qemuProcessStop, which will reset it after
- * 'vm->def->id' is set to -1 */
+ * is supposed to call qemuProcessStop (which will reset it after
+ * 'vm->def->id' is set to -1) and/or qemuProcessEndStopJob to do proper
+ * cleanup. */
return 0;
error:
@@ -8458,6 +8460,16 @@ qemuProcessBeginStopJob(virDomainObj *vm,
}
+void
+qemuProcessEndStopJob(virDomainObj *vm)
+{
+ if (!virDomainObjIsActive(vm))
+ QEMU_DOMAIN_PRIVATE(vm)->beingDestroyed = false;
+
+ virDomainObjEndJob(vm);
+}
+
+
void qemuProcessStop(virQEMUDriver *driver,
virDomainObj *vm,
virDomainShutoffReason reason,
@@ -8800,7 +8812,7 @@ qemuProcessAutoDestroy(virDomainObj *dom,
qemuDomainRemoveInactive(driver, dom, 0, false);
- virDomainObjEndJob(dom);
+ qemuProcessEndStopJob(dom);
virObjectEventStateQueue(driver->domainEventState, event);
}
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index c1ea949215..cb67bfcd2d 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -169,6 +169,7 @@ typedef enum {
int qemuProcessBeginStopJob(virDomainObj *vm,
virDomainJob job,
bool forceKill);
+void qemuProcessEndStopJob(virDomainObj *vm);
void qemuProcessStop(virQEMUDriver *driver,
virDomainObj *vm,
virDomainShutoffReason reason,
--
2.45.2
On Thu, Jul 11, 2024 at 14:55:17 +0200, Jiri Denemark wrote: > Recent commit v10.4.0-87-gd9935a5c4f made a reasonable change to only > reset beingDestroyed back to false when vm->def->id is reset to make > sure other code can detect a domain is (about to become) inactive. It > even added a comment saying any caller of qemuProcessBeginStopJob is > supposed to call qemuProcessStop to clear beingDestroyed. But not every > caller really does so because they first call qemuProcessBeginStopJob > and then check whether a domain is still running. If not the > qemuProcessStop call is skipped leaving beingDestroyed=true. In case of > a persistent domain this may block incoming migrations of such domain as > the migration code would think the domain died unexpectedly (even though > it's still running). > > The qemuProcessBeginStopJob function is a wrapper around > virDomainObjBeginJob, but virDomainObjEndJob was used directly for > cleanup. This patch introduces a new qemuProcessEndStopJob wrapper > around virDomainObjEndJob to properly undo everything > qemuProcessBeginStopJob did. > > https://issues.redhat.com/browse/RHEL-43309 > > Signed-off-by: Jiri Denemark <jdenemar@redhat.com> > --- > src/qemu/qemu_driver.c | 4 ++-- > src/qemu/qemu_process.c | 20 ++++++++++++++++---- > src/qemu/qemu_process.h | 1 + > 3 files changed, 19 insertions(+), 6 deletions(-) For consistency you should also fix qemuProcessAutoDestroy which uses qemuProcessBeginStopJob even when it doesn't jump out before calling qemuProcessStop. Reviewed-by: Peter Krempa <pkrempa@redhat.com>
© 2016 - 2024 Red Hat, Inc.