[libvirt] [PATCH] libxl: Fix lock manager lock ordering

Jim Fehlig posted 1 patch 4 years, 6 months ago
Test syntax-check passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20191014213910.25181-1-jfehlig@suse.com
src/libxl/libxl_domain.c    | 14 +++++++-------
src/libxl/libxl_migration.c | 14 +++++---------
2 files changed, 12 insertions(+), 16 deletions(-)
[libvirt] [PATCH] libxl: Fix lock manager lock ordering
Posted by Jim Fehlig 4 years, 6 months ago
The ordering of lock manager locks in the libxl driver has a flaw that was
uncovered by a migration error path. In the perform phase of migration, the
source host calls virDomainLockProcessPause to release the lock before
sending the VM to the destination host. If the send fails an attempt is made
to reacquire the lock with virDomainLockProcessResume, but that too can fail
if the destination host has not finished cleaning up the failed VM and
releasing the lock it acquired when starting to receive the VM.

This change delays calling virDomainLockProcessResume in libxlDomainStart
until the VM is successfully created, but before it is unpaused. A similar
approach is used by the qemu driver, avoiding the need to release the lock
if VM creation fails. In the migration perform phase, releasing the lock
with virDomainLockProcessPause is delayed until the VM is successfully
sent to the destination, which avoids reacquiring the lock if the send
fails.

Signed-off-by: Jim Fehlig <jfehlig@suse.com>
---
 src/libxl/libxl_domain.c    | 14 +++++++-------
 src/libxl/libxl_migration.c | 14 +++++---------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 4073bf8d46..a830a19b99 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -1364,13 +1364,6 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
                                   NULL) < 0)
         goto cleanup;
 
-    if (virDomainLockProcessResume(driver->lockManager,
-                                  "xen:///system",
-                                  vm,
-                                  priv->lockState) < 0)
-        goto cleanup;
-    VIR_FREE(priv->lockState);
-
     if (libxlNetworkPrepareDevices(vm->def) < 0)
         goto cleanup_dom;
 
@@ -1453,6 +1446,13 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
 
     libxlLoggerOpenFile(cfg->logger, domid, vm->def->name, config_json);
 
+    if (virDomainLockProcessResume(driver->lockManager,
+                                  "xen:///system",
+                                  vm,
+                                  priv->lockState) < 0)
+        goto destroy_dom;
+    VIR_FREE(priv->lockState);
+
     /* Always enable domain death events */
     if (libxl_evenable_domain_death(cfg->ctx, vm->def->id, 0, &priv->deathW))
         goto destroy_dom;
diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
index a1021d499b..8e64dc5d04 100644
--- a/src/libxl/libxl_migration.c
+++ b/src/libxl/libxl_migration.c
@@ -1253,20 +1253,16 @@ libxlDomainMigrationSrcPerform(libxlDriverPrivatePtr driver,
     sockfd = virNetSocketDupFD(sock, true);
     virObjectUnref(sock);
 
-    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
-        VIR_WARN("Unable to release lease on %s", vm->def->name);
-    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
-
     /* suspend vm and send saved data to dst through socket fd */
     virObjectUnlock(vm);
     ret = libxlDoMigrateSrcSend(driver, vm, flags, sockfd);
     virObjectLock(vm);
 
-    if (ret < 0) {
-        virDomainLockProcessResume(driver->lockManager,
-                                   "xen:///system",
-                                   vm,
-                                   priv->lockState);
+    if (ret == 0) {
+        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
+            VIR_WARN("Unable to release lease on %s", vm->def->name);
+        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
+    } else {
         /*
          * Confirm phase will not be executed if perform fails. End the
          * job started in begin phase.
-- 
2.23.0


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] libxl: Fix lock manager lock ordering
Posted by Cole Robinson 4 years, 5 months ago
On 10/14/19 5:39 PM, Jim Fehlig wrote:
> The ordering of lock manager locks in the libxl driver has a flaw that was
> uncovered by a migration error path. In the perform phase of migration, the
> source host calls virDomainLockProcessPause to release the lock before
> sending the VM to the destination host. If the send fails an attempt is made
> to reacquire the lock with virDomainLockProcessResume, but that too can fail
> if the destination host has not finished cleaning up the failed VM and
> releasing the lock it acquired when starting to receive the VM.
> 
> This change delays calling virDomainLockProcessResume in libxlDomainStart
> until the VM is successfully created, but before it is unpaused. A similar
> approach is used by the qemu driver, avoiding the need to release the lock
> if VM creation fails. In the migration perform phase, releasing the lock
> with virDomainLockProcessPause is delayed until the VM is successfully
> sent to the destination, which avoids reacquiring the lock if the send
> fails.
> 
> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
> ---
>  src/libxl/libxl_domain.c    | 14 +++++++-------
>  src/libxl/libxl_migration.c | 14 +++++---------
>  2 files changed, 12 insertions(+), 16 deletions(-)

Reviewed-by: Cole Robinson <crobinso@redhat.com>

- Cole

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] libxl: Fix lock manager lock ordering
Posted by Jim Fehlig 4 years, 5 months ago
Does anyone have a few moments to review this patch? Thanks!

Regards,
Jim

On 10/14/19 3:39 PM, Jim Fehlig wrote:
> The ordering of lock manager locks in the libxl driver has a flaw that was
> uncovered by a migration error path. In the perform phase of migration, the
> source host calls virDomainLockProcessPause to release the lock before
> sending the VM to the destination host. If the send fails an attempt is made
> to reacquire the lock with virDomainLockProcessResume, but that too can fail
> if the destination host has not finished cleaning up the failed VM and
> releasing the lock it acquired when starting to receive the VM.
> 
> This change delays calling virDomainLockProcessResume in libxlDomainStart
> until the VM is successfully created, but before it is unpaused. A similar
> approach is used by the qemu driver, avoiding the need to release the lock
> if VM creation fails. In the migration perform phase, releasing the lock
> with virDomainLockProcessPause is delayed until the VM is successfully
> sent to the destination, which avoids reacquiring the lock if the send
> fails.
> 
> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
> ---
>   src/libxl/libxl_domain.c    | 14 +++++++-------
>   src/libxl/libxl_migration.c | 14 +++++---------
>   2 files changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index 4073bf8d46..a830a19b99 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -1364,13 +1364,6 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
>                                     NULL) < 0)
>           goto cleanup;
>   
> -    if (virDomainLockProcessResume(driver->lockManager,
> -                                  "xen:///system",
> -                                  vm,
> -                                  priv->lockState) < 0)
> -        goto cleanup;
> -    VIR_FREE(priv->lockState);
> -
>       if (libxlNetworkPrepareDevices(vm->def) < 0)
>           goto cleanup_dom;
>   
> @@ -1453,6 +1446,13 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
>   
>       libxlLoggerOpenFile(cfg->logger, domid, vm->def->name, config_json);
>   
> +    if (virDomainLockProcessResume(driver->lockManager,
> +                                  "xen:///system",
> +                                  vm,
> +                                  priv->lockState) < 0)
> +        goto destroy_dom;
> +    VIR_FREE(priv->lockState);
> +
>       /* Always enable domain death events */
>       if (libxl_evenable_domain_death(cfg->ctx, vm->def->id, 0, &priv->deathW))
>           goto destroy_dom;
> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
> index a1021d499b..8e64dc5d04 100644
> --- a/src/libxl/libxl_migration.c
> +++ b/src/libxl/libxl_migration.c
> @@ -1253,20 +1253,16 @@ libxlDomainMigrationSrcPerform(libxlDriverPrivatePtr driver,
>       sockfd = virNetSocketDupFD(sock, true);
>       virObjectUnref(sock);
>   
> -    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
> -        VIR_WARN("Unable to release lease on %s", vm->def->name);
> -    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
> -
>       /* suspend vm and send saved data to dst through socket fd */
>       virObjectUnlock(vm);
>       ret = libxlDoMigrateSrcSend(driver, vm, flags, sockfd);
>       virObjectLock(vm);
>   
> -    if (ret < 0) {
> -        virDomainLockProcessResume(driver->lockManager,
> -                                   "xen:///system",
> -                                   vm,
> -                                   priv->lockState);
> +    if (ret == 0) {
> +        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
> +            VIR_WARN("Unable to release lease on %s", vm->def->name);
> +        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
> +    } else {
>           /*
>            * Confirm phase will not be executed if perform fails. End the
>            * job started in begin phase.
> 


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list