[PATCH] libxl: Fix domain startup failure error reporting

Cole Robinson posted 1 patch 1 year, 10 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/e800e1a3edf585e01cdd637e8d31985780b22371.1655501391.git.crobinso@redhat.com
src/libxl/libxl_domain.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
[PATCH] libxl: Fix domain startup failure error reporting
Posted by Cole Robinson 1 year, 10 months ago
When domain startup fails, domain cleanup calls
libxlNetworkUnwindDevices, which calls virGetConnectNetwork, which
is a top level API entry point, which resets the initial saved error,
leading to clients seeing:

  error: An error occurred, but the cause is unknown

This preserves the error from before virGetConnectNetwork is called.

Signed-off-by: Cole Robinson <crobinso@redhat.com>
---
 src/libxl/libxl_domain.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 17b347de4e..bda110e9e6 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -830,12 +830,17 @@ libxlNetworkUnwindDevices(virDomainDef *def)
             /* cleanup actual device */
             virDomainNetRemoveHostdev(def, net);
             if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
-                g_autoptr(virConnect) conn = virGetConnectNetwork();
+                g_autoptr(virConnect) conn = NULL;
+                virErrorPtr save_err;
+
+                virErrorPreserveLast(&save_err);
+                conn = virGetConnectNetwork();
 
                 if (conn)
                     virDomainNetReleaseActualDevice(conn, def, net);
                 else
                     VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname));
+                virErrorRestore(&save_err);
             }
         }
     }
-- 
2.36.1
Re: [PATCH] libxl: Fix domain startup failure error reporting
Posted by Michal Prívozník 1 year, 10 months ago
On 6/17/22 23:29, Cole Robinson wrote:
> When domain startup fails, domain cleanup calls
> libxlNetworkUnwindDevices, which calls virGetConnectNetwork, which
> is a top level API entry point, which resets the initial saved error,
> leading to clients seeing:
> 
>   error: An error occurred, but the cause is unknown
> 
> This preserves the error from before virGetConnectNetwork is called.
> 
> Signed-off-by: Cole Robinson <crobinso@redhat.com>
> ---
>  src/libxl/libxl_domain.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index 17b347de4e..bda110e9e6 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -830,12 +830,17 @@ libxlNetworkUnwindDevices(virDomainDef *def)
>              /* cleanup actual device */
>              virDomainNetRemoveHostdev(def, net);
>              if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
> -                g_autoptr(virConnect) conn = virGetConnectNetwork();
> +                g_autoptr(virConnect) conn = NULL;
> +                virErrorPtr save_err;
> +
> +                virErrorPreserveLast(&save_err);
> +                conn = virGetConnectNetwork();
>  
>                  if (conn)
>                      virDomainNetReleaseActualDevice(conn, def, net);
>                  else
>                      VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname));
> +                virErrorRestore(&save_err);
>              }
>          }
>      }

This fixes this particular function. I wonder whether we should mimic
what QEMU driver does and wrap whole qemuProcessShutdown(), I mean
libxlDomainCleanup() in virErrorPreserveLast(). Something like this:

diff --git i/src/libxl/libxl_domain.c w/src/libxl/libxl_domain.c
index bda110e9e6..8e8ddd284a 100644
--- i/src/libxl/libxl_domain.c
+++ w/src/libxl/libxl_domain.c
@@ -908,10 +908,13 @@ libxlDomainCleanup(libxlDriverPrivate *driver,
     virHostdevManager *hostdev_mgr = driver->hostdevMgr;
     unsigned int hostdev_flags = VIR_HOSTDEV_SP_PCI;
     size_t i;
+    virErrorPtr save_err;
 
     VIR_DEBUG("Cleaning up domain with id '%d' and name '%s'",
               vm->def->id, vm->def->name);
 
+    virErrorPreserveLast(&save_err);
+
     hostdev_flags |= VIR_HOSTDEV_SP_USB;
 
     /* Call hook with stopped operation. Ignore error and continue with cleanup */
@@ -984,6 +987,7 @@ libxlDomainCleanup(libxlDriverPrivate *driver,
                                     VIR_HOOK_SUBOP_END, NULL));
 
     virDomainObjRemoveTransientDef(vm);
+    virErrorRestore(&save_err);
 }
 
 /*
@@ -1245,6 +1249,7 @@ libxlDomainStartPrepare(libxlDriverPrivate *driver,
 {
     virHostdevManager *hostdev_mgr = driver->hostdevMgr;
     unsigned int hostdev_flags = VIR_HOSTDEV_SP_PCI | VIR_HOSTDEV_SP_USB;
+    virErrorPtr save_err;
 
     if (virDomainObjSetDefTransient(driver->xmlopt, vm, NULL) < 0)
         return -1;
@@ -1272,10 +1277,12 @@ libxlDomainStartPrepare(libxlDriverPrivate *driver,
     return 0;
 
  error:
+    virErrorPreserveLast(&save_err);
     libxlNetworkUnwindDevices(vm->def);
     virHostdevReAttachDomainDevices(hostdev_mgr, LIBXL_DRIVER_INTERNAL_NAME,
                                     vm->def, hostdev_flags);
     virDomainObjRemoveTransientDef(vm);
+    virErrorRestore(&save_err);
     return -1;
 }
 

If this works, replace your patch with this diff, apply my:

Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

and push.

Michal
Re: [PATCH] libxl: Fix domain startup failure error reporting
Posted by Cole Robinson 1 year, 10 months ago
On 6/21/22 3:55 AM, Michal Prívozník wrote:
> On 6/17/22 23:29, Cole Robinson wrote:
>> When domain startup fails, domain cleanup calls
>> libxlNetworkUnwindDevices, which calls virGetConnectNetwork, which
>> is a top level API entry point, which resets the initial saved error,
>> leading to clients seeing:
>>
>>   error: An error occurred, but the cause is unknown
>>
>> This preserves the error from before virGetConnectNetwork is called.
>>
>> Signed-off-by: Cole Robinson <crobinso@redhat.com>
>> ---
>>  src/libxl/libxl_domain.c | 7 ++++++-
>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
>> index 17b347de4e..bda110e9e6 100644
>> --- a/src/libxl/libxl_domain.c
>> +++ b/src/libxl/libxl_domain.c
>> @@ -830,12 +830,17 @@ libxlNetworkUnwindDevices(virDomainDef *def)
>>              /* cleanup actual device */
>>              virDomainNetRemoveHostdev(def, net);
>>              if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
>> -                g_autoptr(virConnect) conn = virGetConnectNetwork();
>> +                g_autoptr(virConnect) conn = NULL;
>> +                virErrorPtr save_err;
>> +
>> +                virErrorPreserveLast(&save_err);
>> +                conn = virGetConnectNetwork();
>>  
>>                  if (conn)
>>                      virDomainNetReleaseActualDevice(conn, def, net);
>>                  else
>>                      VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname));
>> +                virErrorRestore(&save_err);
>>              }
>>          }
>>      }
> 
> This fixes this particular function. I wonder whether we should mimic
> what QEMU driver does and wrap whole qemuProcessShutdown(), I mean
> libxlDomainCleanup() in virErrorPreserveLast(). Something like this:
> 
> diff --git i/src/libxl/libxl_domain.c w/src/libxl/libxl_domain.c
> index bda110e9e6..8e8ddd284a 100644
> --- i/src/libxl/libxl_domain.c
> +++ w/src/libxl/libxl_domain.c
> @@ -908,10 +908,13 @@ libxlDomainCleanup(libxlDriverPrivate *driver,
>      virHostdevManager *hostdev_mgr = driver->hostdevMgr;
>      unsigned int hostdev_flags = VIR_HOSTDEV_SP_PCI;
>      size_t i;
> +    virErrorPtr save_err;
>  
>      VIR_DEBUG("Cleaning up domain with id '%d' and name '%s'",
>                vm->def->id, vm->def->name);
>  
> +    virErrorPreserveLast(&save_err);
> +
>      hostdev_flags |= VIR_HOSTDEV_SP_USB;
>  
>      /* Call hook with stopped operation. Ignore error and continue with cleanup */
> @@ -984,6 +987,7 @@ libxlDomainCleanup(libxlDriverPrivate *driver,
>                                      VIR_HOOK_SUBOP_END, NULL));
>  
>      virDomainObjRemoveTransientDef(vm);
> +    virErrorRestore(&save_err);
>  }
>  
>  /*
> @@ -1245,6 +1249,7 @@ libxlDomainStartPrepare(libxlDriverPrivate *driver,
>  {
>      virHostdevManager *hostdev_mgr = driver->hostdevMgr;
>      unsigned int hostdev_flags = VIR_HOSTDEV_SP_PCI | VIR_HOSTDEV_SP_USB;
> +    virErrorPtr save_err;
>  
>      if (virDomainObjSetDefTransient(driver->xmlopt, vm, NULL) < 0)
>          return -1;
> @@ -1272,10 +1277,12 @@ libxlDomainStartPrepare(libxlDriverPrivate *driver,
>      return 0;
>  
>   error:
> +    virErrorPreserveLast(&save_err);
>      libxlNetworkUnwindDevices(vm->def);
>      virHostdevReAttachDomainDevices(hostdev_mgr, LIBXL_DRIVER_INTERNAL_NAME,
>                                      vm->def, hostdev_flags);
>      virDomainObjRemoveTransientDef(vm);
> +    virErrorRestore(&save_err);
>      return -1;
>  }
>  
> 
> If this works, replace your patch with this diff, apply my:
> 
> Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

Thanks, I made that change and pushed now

- Cole