[libvirt PATCH v8 26/37] qemu: improve error handling when restarting nbdkit

Jonathon Jongsma posted 37 patches 2 years, 5 months ago
Only 36 patches received!
[libvirt PATCH v8 26/37] qemu: improve error handling when restarting nbdkit
Posted by Jonathon Jongsma 2 years, 5 months ago
Change the return value for qemuNbdkitProcessRestart() and
qemuNbdkitStorageSourceManageProcess() to return an error status. The
main effect of this change is that when libvirt starts up and reconnects
to an already-running domain with an nbdkit-backed disk, it will return
an error if it fails to restart any nbdkit processes that are not found
to be running or if it fails to monitor any running nbdkit processes.
These failures will result in the domain being killed.

Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
---
 src/qemu/qemu_driver.c  |  3 ++-
 src/qemu/qemu_nbdkit.c  | 34 ++++++++++++++++------------------
 src/qemu/qemu_nbdkit.h  |  4 ++--
 src/qemu/qemu_process.c |  6 ++++--
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c4236b872f..a469b1d04a 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4047,7 +4047,8 @@ processNbdkitExitedEvent(virDomainObj *vm,
     if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
         return;
 
-    qemuNbdkitProcessRestart(nbdkit, vm);
+    if (qemuNbdkitProcessRestart(nbdkit, vm) < 0)
+        virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART);
 
     virDomainObjEndJob(vm);
 }
diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
index b9ced1e5cf..2ad34d6484 100644
--- a/src/qemu/qemu_nbdkit.c
+++ b/src/qemu/qemu_nbdkit.c
@@ -619,7 +619,7 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
 }
 
 
-void
+int
 qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
                          virDomainObj *vm)
 {
@@ -629,10 +629,7 @@ qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
     /* clean up resources associated with process */
     qemuNbdkitProcessStop(proc);
 
-    if (qemuNbdkitProcessStart(proc, vm, driver) < 0) {
-        VIR_DEBUG("Unable to restart nbkdit process");
-        virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART);
-    }
+    return qemuNbdkitProcessStart(proc, vm, driver);
 }
 
 
@@ -775,7 +772,7 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source,
 }
 
 
-static void
+static int
 qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
                                         virDomainObj *vm)
 {
@@ -784,33 +781,31 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
     qemuNbdkitProcess *proc;
 
     if (!srcpriv)
-        return;
+        return 0;
 
     proc = srcpriv->nbdkitProcess;
 
     if (!proc)
-        return;
+        return 0;
 
     if (!proc->caps)
         proc->caps = qemuGetNbdkitCaps(vmpriv->driver);
 
     if (proc->pid <= 0) {
         if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) {
-            VIR_WARN("Unable to read pidfile '%s'", proc->pidfile);
-            return;
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           _("Unable to read pidfile '%1$s'"),
+                           proc->pidfile);
+            return -1;
         }
     }
 
     if (virProcessKill(proc->pid, 0) < 0) {
         VIR_DEBUG("nbdkit process %i is not alive", proc->pid);
-        qemuNbdkitProcessRestart(proc, vm);
-        return;
+        return qemuNbdkitProcessRestart(proc, vm);
     }
 
-    if (qemuNbdkitProcessStartMonitor(proc, vm) < 0) {
-        VIR_DEBUG("unable monitor nbdkit process");
-        virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART);
-    }
+    return qemuNbdkitProcessStartMonitor(proc, vm);
 }
 
 /**
@@ -822,13 +817,16 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
  * @source. It is intended to be called after libvirt restarts and has loaded its current state from
  * disk and is attempting to re-connect to active domains.
  */
-void
+int
 qemuNbdkitStorageSourceManageProcess(virStorageSource *source,
                                      virDomainObj *vm)
 {
     virStorageSource *backing;
     for (backing = source; backing != NULL; backing = backing->backingStore)
-        qemuNbdkitStorageSourceManageProcessOne(backing, vm);
+        if (qemuNbdkitStorageSourceManageProcessOne(backing, vm) < 0)
+            return -1;
+
+    return 0;
 }
 
 
diff --git a/src/qemu/qemu_nbdkit.h b/src/qemu/qemu_nbdkit.h
index f33b049d38..a818ff65e1 100644
--- a/src/qemu/qemu_nbdkit.h
+++ b/src/qemu/qemu_nbdkit.h
@@ -68,7 +68,7 @@ qemuNbdkitStartStorageSource(virQEMUDriver *driver,
 void
 qemuNbdkitStopStorageSource(virStorageSource *src);
 
-void
+int
 qemuNbdkitStorageSourceManageProcess(virStorageSource *src,
                                      virDomainObj *vm);
 
@@ -100,7 +100,7 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
                        virDomainObj *vm,
                        virQEMUDriver *driver);
 
-void
+int
 qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
                          virDomainObj *vm);
 
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a1c6cfd91e..ddf95a064f 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -8993,10 +8993,12 @@ qemuProcessReconnect(void *opaque)
     }
 
     for (i = 0; i < obj->def->ndisks; i++)
-        qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src, obj);
+        if (qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src, obj) < 0)
+            goto error;
 
     if (obj->def->os.loader && obj->def->os.loader->nvram)
-        qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj);
+        if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj) < 0)
+            goto error;
 
     /* update domain state XML with possibly updated state in virDomainObj */
     if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0)
-- 
2.41.0
Re: [libvirt PATCH v8 26/37] qemu: improve error handling when restarting nbdkit
Posted by Peter Krempa 2 years, 5 months ago
On Thu, Aug 31, 2023 at 16:40:06 -0500, Jonathon Jongsma wrote:
> Change the return value for qemuNbdkitProcessRestart() and
> qemuNbdkitStorageSourceManageProcess() to return an error status. The
> main effect of this change is that when libvirt starts up and reconnects
> to an already-running domain with an nbdkit-backed disk, it will return
> an error if it fails to restart any nbdkit processes that are not found
> to be running or if it fails to monitor any running nbdkit processes.
> These failures will result in the domain being killed.

I don't quite understand why this is a separate patch, but ...

> Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
> ---
>  src/qemu/qemu_driver.c  |  3 ++-
>  src/qemu/qemu_nbdkit.c  | 34 ++++++++++++++++------------------
>  src/qemu/qemu_nbdkit.h  |  4 ++--
>  src/qemu/qemu_process.c |  6 ++++--
>  4 files changed, 24 insertions(+), 23 deletions(-)

Reviewed-by: Peter Krempa <pkrempa@redhat.com>
Re: [libvirt PATCH v8 26/37] qemu: improve error handling when restarting nbdkit
Posted by Jonathon Jongsma 2 years, 5 months ago
On 9/5/23 2:55 AM, Peter Krempa wrote:
> On Thu, Aug 31, 2023 at 16:40:06 -0500, Jonathon Jongsma wrote:
>> Change the return value for qemuNbdkitProcessRestart() and
>> qemuNbdkitStorageSourceManageProcess() to return an error status. The
>> main effect of this change is that when libvirt starts up and reconnects
>> to an already-running domain with an nbdkit-backed disk, it will return
>> an error if it fails to restart any nbdkit processes that are not found
>> to be running or if it fails to monitor any running nbdkit processes.
>> These failures will result in the domain being killed.
> 
> I don't quite understand why this is a separate patch, but ...
> 
>> Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
>> ---
>>   src/qemu/qemu_driver.c  |  3 ++-
>>   src/qemu/qemu_nbdkit.c  | 34 ++++++++++++++++------------------
>>   src/qemu/qemu_nbdkit.h  |  4 ++--
>>   src/qemu/qemu_process.c |  6 ++++--
>>   4 files changed, 24 insertions(+), 23 deletions(-)
> 
> Reviewed-by: Peter Krempa <pkrempa@redhat.com>
> 


Yes, I probably should have explained why it was separate. I initially 
made it separate in case you felt that the behavior change was not 
acceptable and wanted me to drop it. I will squash it into the previous 
patch.

Jonathon