[PATCH v3 08/10] qemuSnapshotActiveInternalDeleteGetDevices: Add warning when deleting inconsistent snapshot

Peter Krempa posted 10 patches 1 year, 4 months ago
[PATCH v3 08/10] qemuSnapshotActiveInternalDeleteGetDevices: Add warning when deleting inconsistent snapshot
Posted by Peter Krempa 1 year, 4 months ago
As explained in the commit which added the new internal snapshot
deletion code we don't want to do any form of strict checking whether
the libvirt metadata is consistent with the on-disk state as we didn't
historically do that.

In order to be able to spot the cases add a warning into the logs if
such state is encountered. While warnings are easy to miss it's the only
reasonable way to do that. Users will be encouraged to file an issue
with the information, without requiring them to enable debug logs as
the reproduction of that issue may include very old historical state.

The checker is deliberately added separately so that it can be easily
reverted once it's no longer needed.

Signed-off-by: Peter Krempa <pkrempa@redhat.com>
---
 src/qemu/qemu_snapshot.c | 54 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index 4927ca0bfc..02c876c881 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -3731,6 +3731,14 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
     g_auto(GStrv) devices = g_new0(char *, vm->def->ndisks + 2);
     size_t ndevs = 0;
     size_t i = 0;
+    /* variables below are used for checking of corner cases */
+    g_autoptr(GHashTable) foundDisks = virHashNew(NULL);
+    g_auto(virBuffer) errMissingSnap = VIR_BUFFER_INITIALIZER;
+    g_auto(virBuffer) errUnexpectedSnap = VIR_BUFFER_INITIALIZER;
+    g_auto(virBuffer) errExtraDisks = VIR_BUFFER_INITIALIZER;
+    GHashTableIter htitr;
+    void *key;
+    bool warn = false;

     if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, VIR_ASYNC_JOB_SNAPSHOT)))
         return NULL;
@@ -3759,6 +3767,7 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
             continue;

         devices[ndevs++] = g_strdup(format_nodename);
+        g_hash_table_insert(foundDisks, g_strdup(domdisk->dst), NULL);
     }

     if (vm->def->os.loader &&
@@ -3775,6 +3784,51 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
         }
     }

+    /* We currently don't want this code to become stricter than what 'delvm'
+     * did thus we'll report if the on-disk state mismatches the snapshot
+     * definition only as a warning */
+    for (i = 0; i < snapdef->ndisks; i++) {
+        virDomainSnapshotDiskDef *snapdisk = snapdef->disks + i;
+
+        switch (snapdisk->snapshot) {
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_INTERNAL:
+            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
+                g_hash_table_remove(foundDisks, snapdisk->name);
+            } else {
+                virBufferAsprintf(&errMissingSnap, "%s ", snapdisk->name);
+                warn = true;
+            }
+            break;
+
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_EXTERNAL:
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_MANUAL:
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_NO:
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_DEFAULT:
+        case VIR_DOMAIN_SNAPSHOT_LOCATION_LAST:
+            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
+                virBufferAsprintf(&errUnexpectedSnap, "%s ", snapdisk->name);
+                warn = true;
+            } else {
+                g_hash_table_remove(foundDisks, snapdisk->name);
+            }
+        }
+    }
+
+    g_hash_table_iter_init(&htitr, foundDisks);
+
+    while (g_hash_table_iter_next(&htitr, &key, NULL)) {
+        warn = true;
+        virBufferAsprintf(&errExtraDisks, "%s ", (const char *) key);
+    }
+
+    if (warn) {
+        VIR_WARN("inconsistent internal snapshot state (deletion): VM='%s' snapshot='%s' missing='%s' unexpected='%s' extra='%s",
+                 vm->def->name, snapname,
+                 virBufferCurrentContent(&errMissingSnap),
+                 virBufferCurrentContent(&errUnexpectedSnap),
+                 virBufferCurrentContent(&errExtraDisks));
+    }
+
     return g_steal_pointer(&devices);
 }

-- 
2.46.0
Re: [PATCH v3 08/10] qemuSnapshotActiveInternalDeleteGetDevices: Add warning when deleting inconsistent snapshot
Posted by Nikolai Barybin via Devel 1 year, 4 months ago
On 10/3/24 15:46, Peter Krempa wrote:
> As explained in the commit which added the new internal snapshot
> deletion code we don't want to do any form of strict checking whether
> the libvirt metadata is consistent with the on-disk state as we didn't
> historically do that.
>
> In order to be able to spot the cases add a warning into the logs if
> such state is encountered. While warnings are easy to miss it's the only
> reasonable way to do that. Users will be encouraged to file an issue
> with the information, without requiring them to enable debug logs as
> the reproduction of that issue may include very old historical state.
>
> The checker is deliberately added separately so that it can be easily
> reverted once it's no longer needed.
>
> Signed-off-by: Peter Krempa<pkrempa@redhat.com>
> ---
>   src/qemu/qemu_snapshot.c | 54 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 54 insertions(+)
>
> diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
> index 4927ca0bfc..02c876c881 100644
> --- a/src/qemu/qemu_snapshot.c
> +++ b/src/qemu/qemu_snapshot.c
> @@ -3731,6 +3731,14 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
>       g_auto(GStrv) devices = g_new0(char *, vm->def->ndisks + 2);
>       size_t ndevs = 0;
>       size_t i = 0;
> +    /* variables below are used for checking of corner cases */
> +    g_autoptr(GHashTable) foundDisks = virHashNew(NULL);
> +    g_auto(virBuffer) errMissingSnap = VIR_BUFFER_INITIALIZER;
> +    g_auto(virBuffer) errUnexpectedSnap = VIR_BUFFER_INITIALIZER;
> +    g_auto(virBuffer) errExtraDisks = VIR_BUFFER_INITIALIZER;
> +    GHashTableIter htitr;
> +    void *key;
> +    bool warn = false;
>
>       if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, VIR_ASYNC_JOB_SNAPSHOT)))
>           return NULL;
> @@ -3759,6 +3767,7 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
>               continue;
>
>           devices[ndevs++] = g_strdup(format_nodename);
> +        g_hash_table_insert(foundDisks, g_strdup(domdisk->dst), NULL);
>       }
>
>       if (vm->def->os.loader &&
> @@ -3775,6 +3784,51 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
>           }
>       }
>
> +    /* We currently don't want this code to become stricter than what 'delvm'
> +     * did thus we'll report if the on-disk state mismatches the snapshot
> +     * definition only as a warning */
> +    for (i = 0; i < snapdef->ndisks; i++) {
> +        virDomainSnapshotDiskDef *snapdisk = snapdef->disks + i;
> +
> +        switch (snapdisk->snapshot) {
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_INTERNAL:
> +            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
> +                g_hash_table_remove(foundDisks, snapdisk->name);
> +            } else {
> +                virBufferAsprintf(&errMissingSnap, "%s ", snapdisk->name);
> +                warn = true;
> +            }
> +            break;
> +
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_EXTERNAL:
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_MANUAL:
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_NO:
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_DEFAULT:
> +        case VIR_DOMAIN_SNAPSHOT_LOCATION_LAST:
> +            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
> +                virBufferAsprintf(&errUnexpectedSnap, "%s ", snapdisk->name);
> +                warn = true;
> +            } else {
> +                g_hash_table_remove(foundDisks, snapdisk->name);
> +            }

This if-else statement looks illogical or I don't understand how it 
works. Probably you meant

  if (!g_hash_table_contains... ?

> +        }
> +    }
> +
> +    g_hash_table_iter_init(&htitr, foundDisks);
> +
> +    while (g_hash_table_iter_next(&htitr, &key, NULL)) {
> +        warn = true;
> +        virBufferAsprintf(&errExtraDisks, "%s ", (const char *) key);
> +    }
> +
> +    if (warn) {
> +        VIR_WARN("inconsistent internal snapshot state (deletion): VM='%s' snapshot='%s' missing='%s' unexpected='%s' extra='%s",
> +                 vm->def->name, snapname,
> +                 virBufferCurrentContent(&errMissingSnap),
> +                 virBufferCurrentContent(&errUnexpectedSnap),
> +                 virBufferCurrentContent(&errExtraDisks));
> +    }
> +
>       return g_steal_pointer(&devices);
>   }
>
Re: [PATCH v3 08/10] qemuSnapshotActiveInternalDeleteGetDevices: Add warning when deleting inconsistent snapshot
Posted by Peter Krempa 1 year, 4 months ago
On Fri, Oct 04, 2024 at 10:25:07 +0200, Nikolai Barybin wrote:
> 
> On 10/3/24 15:46, Peter Krempa wrote:
> > As explained in the commit which added the new internal snapshot
> > deletion code we don't want to do any form of strict checking whether
> > the libvirt metadata is consistent with the on-disk state as we didn't
> > historically do that.
> > 
> > In order to be able to spot the cases add a warning into the logs if
> > such state is encountered. While warnings are easy to miss it's the only
> > reasonable way to do that. Users will be encouraged to file an issue
> > with the information, without requiring them to enable debug logs as
> > the reproduction of that issue may include very old historical state.
> > 
> > The checker is deliberately added separately so that it can be easily
> > reverted once it's no longer needed.
> > 
> > Signed-off-by: Peter Krempa<pkrempa@redhat.com>
> > ---
> >   src/qemu/qemu_snapshot.c | 54 ++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 54 insertions(+)
> > 
> > diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
> > index 4927ca0bfc..02c876c881 100644
> > --- a/src/qemu/qemu_snapshot.c
> > +++ b/src/qemu/qemu_snapshot.c
> > @@ -3731,6 +3731,14 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
> >       g_auto(GStrv) devices = g_new0(char *, vm->def->ndisks + 2);
> >       size_t ndevs = 0;
> >       size_t i = 0;
> > +    /* variables below are used for checking of corner cases */
> > +    g_autoptr(GHashTable) foundDisks = virHashNew(NULL);
> > +    g_auto(virBuffer) errMissingSnap = VIR_BUFFER_INITIALIZER;
> > +    g_auto(virBuffer) errUnexpectedSnap = VIR_BUFFER_INITIALIZER;
> > +    g_auto(virBuffer) errExtraDisks = VIR_BUFFER_INITIALIZER;
> > +    GHashTableIter htitr;
> > +    void *key;
> > +    bool warn = false;
> > 
> >       if (!(blockNamedNodeData = qemuBlockGetNamedNodeData(vm, VIR_ASYNC_JOB_SNAPSHOT)))
> >           return NULL;
> > @@ -3759,6 +3767,7 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
> >               continue;
> > 
> >           devices[ndevs++] = g_strdup(format_nodename);
> > +        g_hash_table_insert(foundDisks, g_strdup(domdisk->dst), NULL);
> >       }
> > 
> >       if (vm->def->os.loader &&
> > @@ -3775,6 +3784,51 @@ qemuSnapshotActiveInternalDeleteGetDevices(virDomainObj *vm,
> >           }
> >       }
> > 
> > +    /* We currently don't want this code to become stricter than what 'delvm'
> > +     * did thus we'll report if the on-disk state mismatches the snapshot
> > +     * definition only as a warning */
> > +    for (i = 0; i < snapdef->ndisks; i++) {
> > +        virDomainSnapshotDiskDef *snapdisk = snapdef->disks + i;
> > +
> > +        switch (snapdisk->snapshot) {
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_INTERNAL:
> > +            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
> > +                g_hash_table_remove(foundDisks, snapdisk->name);
> > +            } else {
> > +                virBufferAsprintf(&errMissingSnap, "%s ", snapdisk->name);
> > +                warn = true;
> > +            }
> > +            break;
> > +
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_EXTERNAL:
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_MANUAL:
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_NO:
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_DEFAULT:
> > +        case VIR_DOMAIN_SNAPSHOT_LOCATION_LAST:
> > +            if (g_hash_table_contains(foundDisks, snapdisk->name)) {
> > +                virBufferAsprintf(&errUnexpectedSnap, "%s ", snapdisk->name);
> > +                warn = true;
> > +            } else {
> > +                g_hash_table_remove(foundDisks, snapdisk->name);
> > +            }
> 
> This if-else statement looks illogical or I don't understand how it works.
> Probably you meant
> 
>  if (!g_hash_table_contains... ?

Yes there is a problem but the condition is correct. The code is
supposted to prepare the warning if the snapshot metadata doesn't say
that a disk should have an intenal snapshot but the disk image does have
it.

What's incorrect is that it shouldn't have the " } else {" line and the
entry should be removed from the hash table when the warning is
prepared ...

> 
> > +        }
> > +    }
> > +
> > +    g_hash_table_iter_init(&htitr, foundDisks);
> > +
> > +    while (g_hash_table_iter_next(&htitr, &key, NULL)) {
> > +        warn = true;
> > +        virBufferAsprintf(&errExtraDisks, "%s ", (const char *) key);

... so that it doesn't trigger this.

> > +    }
> > +
> > +    if (warn) {
> > +        VIR_WARN("inconsistent internal snapshot state (deletion): VM='%s' snapshot='%s' missing='%s' unexpected='%s' extra='%s",
> > +                 vm->def->name, snapname,
> > +                 virBufferCurrentContent(&errMissingSnap),
> > +                 virBufferCurrentContent(&errUnexpectedSnap),
> > +                 virBufferCurrentContent(&errExtraDisks));
> > +    }
> > +
> >       return g_steal_pointer(&devices);
> >   }


I'll delete the '} else {' line on my local branch.