[libvirt PATCH 17/80] qemu: Restore async job start timestamp on reconnect

Jiri Denemark posted 80 patches 3 years, 9 months ago
There is a newer version of this series
[libvirt PATCH 17/80] qemu: Restore async job start timestamp on reconnect
Posted by Jiri Denemark 3 years, 9 months ago
Jobs that are supposed to remain active even when libvirt daemon
restarts were reported as started at the time the daemon was restarted.
This is not very helpful, we should restore the original timestamp.

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
 src/qemu/qemu_domainjob.c                     | 20 +++++++++++++------
 src/qemu/qemu_domainjob.h                     |  1 +
 src/qemu/qemu_process.c                       |  4 +++-
 .../migration-in-params-in.xml                |  2 +-
 .../migration-out-nbd-bitmaps-in.xml          |  2 +-
 .../migration-out-nbd-out.xml                 |  2 +-
 .../migration-out-nbd-tls-out.xml             |  2 +-
 .../migration-out-params-in.xml               |  2 +-
 8 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/qemu/qemu_domainjob.c b/src/qemu/qemu_domainjob.c
index 1f82457bd4..8e8d229afe 100644
--- a/src/qemu/qemu_domainjob.c
+++ b/src/qemu/qemu_domainjob.c
@@ -235,6 +235,7 @@ qemuDomainObjPreserveJob(virDomainObj *obj,
     job->owner = priv->job.owner;
     job->asyncJob = priv->job.asyncJob;
     job->asyncOwner = priv->job.asyncOwner;
+    job->asyncStarted = priv->job.asyncStarted;
     job->phase = priv->job.phase;
     job->privateData = g_steal_pointer(&priv->job.privateData);
     job->apiFlags = priv->job.apiFlags;
@@ -254,6 +255,7 @@ void
 qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
                              virDomainAsyncJob asyncJob,
                              int phase,
+                             unsigned long long started,
                              virDomainJobOperation operation,
                              qemuDomainJobStatsType statsType,
                              virDomainJobStatus status,
@@ -261,18 +263,15 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
 {
     qemuDomainObjPrivate *priv = vm->privateData;
     qemuDomainJobObj *job = &priv->job;
-    unsigned long long now;
 
     VIR_DEBUG("Restoring %s async job for domain %s",
               virDomainAsyncJobTypeToString(asyncJob), vm->def->name);
 
-    ignore_value(virTimeMillisNow(&now));
-
     priv->job.jobsQueued++;
     job->asyncJob = asyncJob;
     job->phase = phase;
     job->asyncOwnerAPI = g_strdup(virThreadJobGet());
-    job->asyncStarted = now;
+    job->asyncStarted = started;
 
     qemuDomainObjSetAsyncJobMask(vm, allowedJobs);
 
@@ -280,7 +279,7 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
     qemuDomainJobSetStatsType(priv->job.current, statsType);
     job->current->operation = operation;
     job->current->status = status;
-    job->current->started = now;
+    job->current->started = started;
 }
 
 
@@ -1250,8 +1249,10 @@ qemuDomainObjPrivateXMLFormatJob(virBuffer *buf,
                                                           priv->job.phase));
     }
 
-    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE)
+    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE) {
         virBufferAsprintf(&attrBuf, " flags='0x%lx'", priv->job.apiFlags);
+        virBufferAsprintf(&attrBuf, " asyncStarted='%llu'", priv->job.asyncStarted);
+    }
 
     if (priv->job.cb &&
         priv->job.cb->formatJob(&childBuf, &priv->job, vm) < 0)
@@ -1307,6 +1308,13 @@ qemuDomainObjPrivateXMLParseJob(virDomainObj *vm,
             }
             VIR_FREE(tmp);
         }
+
+        if (virXPathULongLong("string(@asyncStarted)", ctxt,
+                              &priv->job.asyncStarted) == -2) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("Invalid async job start"));
+            return -1;
+        }
     }
 
     if (virXPathULongHex("string(@flags)", ctxt, &priv->job.apiFlags) == -2) {
diff --git a/src/qemu/qemu_domainjob.h b/src/qemu/qemu_domainjob.h
index 069bb9f8cb..707d4e91ed 100644
--- a/src/qemu/qemu_domainjob.h
+++ b/src/qemu/qemu_domainjob.h
@@ -164,6 +164,7 @@ void
 qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
                              virDomainAsyncJob asyncJob,
                              int phase,
+                             unsigned long long started,
                              virDomainJobOperation operation,
                              qemuDomainJobStatsType statsType,
                              virDomainJobStatus status,
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 3d73c716f1..53221ea9ff 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3473,7 +3473,8 @@ qemuProcessRestoreMigrationJob(virDomainObj *vm,
         allowedJobs = VIR_JOB_DEFAULT_MASK | JOB_MASK(VIR_JOB_MIGRATION_OP);
     }
 
-    qemuDomainObjRestoreAsyncJob(vm, job->asyncJob, job->phase, op,
+    qemuDomainObjRestoreAsyncJob(vm, job->asyncJob, job->phase,
+                                 job->asyncStarted, op,
                                  QEMU_DOMAIN_JOB_STATS_TYPE_MIGRATION,
                                  VIR_DOMAIN_JOB_STATUS_PAUSED,
                                  allowedJobs);
@@ -3746,6 +3747,7 @@ qemuProcessRecoverJob(virQEMUDriver *driver,
     case VIR_ASYNC_JOB_BACKUP:
         /* Restore the config of the async job which is not persisted */
         qemuDomainObjRestoreAsyncJob(vm, VIR_ASYNC_JOB_BACKUP, 0,
+                                     job->asyncStarted,
                                      VIR_DOMAIN_JOB_OPERATION_BACKUP,
                                      QEMU_DOMAIN_JOB_STATS_TYPE_BACKUP,
                                      VIR_DOMAIN_JOB_STATUS_ACTIVE,
diff --git a/tests/qemustatusxml2xmldata/migration-in-params-in.xml b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
index f4bc5753c4..9e9c2deac6 100644
--- a/tests/qemustatusxml2xmldata/migration-in-params-in.xml
+++ b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
@@ -238,7 +238,7 @@
     <flag name='dump-completed'/>
     <flag name='hda-output'/>
   </qemuCaps>
-  <job type='none' async='migration in' phase='prepare' flags='0x900'>
+  <job type='none' async='migration in' phase='prepare' flags='0x900' asyncStarted='0'>
     <migParams>
       <param name='compress-level' value='1'/>
       <param name='compress-threads' value='8'/>
diff --git a/tests/qemustatusxml2xmldata/migration-out-nbd-bitmaps-in.xml b/tests/qemustatusxml2xmldata/migration-out-nbd-bitmaps-in.xml
index c88996f923..1e14efe79a 100644
--- a/tests/qemustatusxml2xmldata/migration-out-nbd-bitmaps-in.xml
+++ b/tests/qemustatusxml2xmldata/migration-out-nbd-bitmaps-in.xml
@@ -262,7 +262,7 @@
     <flag name='cpu-max'/>
     <flag name='migration-param.block-bitmap-mapping'/>
   </qemuCaps>
-  <job type='none' async='migration out' phase='perform3' flags='0x42'>
+  <job type='none' async='migration out' phase='perform3' flags='0x42' asyncStarted='0'>
     <disk dev='hda' migrating='no'/>
     <disk dev='vda' migrating='yes'>
       <migrationSource type='network' format='raw'>
diff --git a/tests/qemustatusxml2xmldata/migration-out-nbd-out.xml b/tests/qemustatusxml2xmldata/migration-out-nbd-out.xml
index 039dcacc58..12abd0c81a 100644
--- a/tests/qemustatusxml2xmldata/migration-out-nbd-out.xml
+++ b/tests/qemustatusxml2xmldata/migration-out-nbd-out.xml
@@ -231,7 +231,7 @@
     <flag name='dump-completed'/>
     <flag name='hda-output'/>
   </qemuCaps>
-  <job type='none' async='migration out' phase='perform3' flags='0x0'>
+  <job type='none' async='migration out' phase='perform3' flags='0x0' asyncStarted='0'>
     <disk dev='vdb' migrating='yes'/>
     <disk dev='hda' migrating='no'/>
   </job>
diff --git a/tests/qemustatusxml2xmldata/migration-out-nbd-tls-out.xml b/tests/qemustatusxml2xmldata/migration-out-nbd-tls-out.xml
index 3d1ddd5771..75c656d6c4 100644
--- a/tests/qemustatusxml2xmldata/migration-out-nbd-tls-out.xml
+++ b/tests/qemustatusxml2xmldata/migration-out-nbd-tls-out.xml
@@ -235,7 +235,7 @@
     <flag name='nbd-tls'/>
     <flag name='blockdev-del'/>
   </qemuCaps>
-  <job type='none' async='migration out' phase='perform3' flags='0x0'>
+  <job type='none' async='migration out' phase='perform3' flags='0x0' asyncStarted='0'>
     <disk dev='vdb' migrating='yes'>
       <migrationSource type='network' format='raw'>
         <source protocol='nbd' name='drive-virtio-disk1' tlsFromConfig='0'>
diff --git a/tests/qemustatusxml2xmldata/migration-out-params-in.xml b/tests/qemustatusxml2xmldata/migration-out-params-in.xml
index cd9dbccd3a..c23c3c3083 100644
--- a/tests/qemustatusxml2xmldata/migration-out-params-in.xml
+++ b/tests/qemustatusxml2xmldata/migration-out-params-in.xml
@@ -238,7 +238,7 @@
     <flag name='dump-completed'/>
     <flag name='hda-output'/>
   </qemuCaps>
-  <job type='none' async='migration out' phase='perform3' flags='0x802'>
+  <job type='none' async='migration out' phase='perform3' flags='0x802' asyncStarted='0'>
     <disk dev='vda' migrating='no'/>
     <migParams>
       <param name='compress-level' value='1'/>
-- 
2.35.1
Re: [libvirt PATCH 17/80] qemu: Restore async job start timestamp on reconnect
Posted by Peter Krempa 3 years, 9 months ago
On Tue, May 10, 2022 at 17:20:38 +0200, Jiri Denemark wrote:
> Jobs that are supposed to remain active even when libvirt daemon
> restarts were reported as started at the time the daemon was restarted.
> This is not very helpful, we should restore the original timestamp.
> 
> Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
> ---
>  src/qemu/qemu_domainjob.c                     | 20 +++++++++++++------
>  src/qemu/qemu_domainjob.h                     |  1 +
>  src/qemu/qemu_process.c                       |  4 +++-
>  .../migration-in-params-in.xml                |  2 +-
>  .../migration-out-nbd-bitmaps-in.xml          |  2 +-
>  .../migration-out-nbd-out.xml                 |  2 +-
>  .../migration-out-nbd-tls-out.xml             |  2 +-
>  .../migration-out-params-in.xml               |  2 +-
>  8 files changed, 23 insertions(+), 12 deletions(-)
> 
> diff --git a/src/qemu/qemu_domainjob.c b/src/qemu/qemu_domainjob.c
> index 1f82457bd4..8e8d229afe 100644
> --- a/src/qemu/qemu_domainjob.c
> +++ b/src/qemu/qemu_domainjob.c

[...]

> @@ -261,18 +263,15 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
>  {
>      qemuDomainObjPrivate *priv = vm->privateData;
>      qemuDomainJobObj *job = &priv->job;
> -    unsigned long long now;
>  
>      VIR_DEBUG("Restoring %s async job for domain %s",
>                virDomainAsyncJobTypeToString(asyncJob), vm->def->name);
>  
> -    ignore_value(virTimeMillisNow(&now));
> -
>      priv->job.jobsQueued++;
>      job->asyncJob = asyncJob;
>      job->phase = phase;
>      job->asyncOwnerAPI = g_strdup(virThreadJobGet());
> -    job->asyncStarted = now;
> +    job->asyncStarted = started;
>  
>      qemuDomainObjSetAsyncJobMask(vm, allowedJobs);
>  
> @@ -280,7 +279,7 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
>      qemuDomainJobSetStatsType(priv->job.current, statsType);
>      job->current->operation = operation;
>      job->current->status = status;
> -    job->current->started = now;
> +    job->current->started = started;
>  }

You don't seem to have any fallback when reading back older status XML
where ...

>  
>  
> @@ -1250,8 +1249,10 @@ qemuDomainObjPrivateXMLFormatJob(virBuffer *buf,
>                                                            priv->job.phase));
>      }
>  
> -    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE)
> +    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE) {
>          virBufferAsprintf(&attrBuf, " flags='0x%lx'", priv->job.apiFlags);
> +        virBufferAsprintf(&attrBuf, " asyncStarted='%llu'", priv->job.asyncStarted);

... the start timestamp is not printed, so ...


> +    }
>  
>      if (priv->job.cb &&
>          priv->job.cb->formatJob(&childBuf, &priv->job, vm) < 0)
> @@ -1307,6 +1308,13 @@ qemuDomainObjPrivateXMLParseJob(virDomainObj *vm,
>              }
>              VIR_FREE(tmp);
>          }
> +
> +        if (virXPathULongLong("string(@asyncStarted)", ctxt,
> +                              &priv->job.asyncStarted) == -2) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                           _("Invalid async job start"));

... this will keep it initialized to 0 ...

> +            return -1;
> +        }
>      }
>  
>      if (virXPathULongHex("string(@flags)", ctxt, &priv->job.apiFlags) == -2) {

[...]

> diff --git a/tests/qemustatusxml2xmldata/migration-in-params-in.xml b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> index f4bc5753c4..9e9c2deac6 100644
> --- a/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> +++ b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> @@ -238,7 +238,7 @@
>      <flag name='dump-completed'/>
>      <flag name='hda-output'/>
>    </qemuCaps>
> -  <job type='none' async='migration in' phase='prepare' flags='0x900'>

... so  existing backup jobs will seem to be started at the beginning of
epoch:

> +  <job type='none' async='migration in' phase='prepare' flags='0x900' asyncStarted='0'>
>      <migParams>
>        <param name='compress-level' value='1'/>
>        <param name='compress-threads' value='8'/>

Probably not a big problem but we IMO should initialize it to current
time. That will break the test though unless you mock the time getting
function.
Re: [libvirt PATCH 17/80] qemu: Restore async job start timestamp on reconnect
Posted by Jiri Denemark 3 years, 9 months ago
On Wed, May 11, 2022 at 15:04:53 +0200, Peter Krempa wrote:
> On Tue, May 10, 2022 at 17:20:38 +0200, Jiri Denemark wrote:
> > Jobs that are supposed to remain active even when libvirt daemon
> > restarts were reported as started at the time the daemon was restarted.
> > This is not very helpful, we should restore the original timestamp.
> > 
> > Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
> > ---
> >  src/qemu/qemu_domainjob.c                     | 20 +++++++++++++------
> >  src/qemu/qemu_domainjob.h                     |  1 +
> >  src/qemu/qemu_process.c                       |  4 +++-
> >  .../migration-in-params-in.xml                |  2 +-
> >  .../migration-out-nbd-bitmaps-in.xml          |  2 +-
> >  .../migration-out-nbd-out.xml                 |  2 +-
> >  .../migration-out-nbd-tls-out.xml             |  2 +-
> >  .../migration-out-params-in.xml               |  2 +-
> >  8 files changed, 23 insertions(+), 12 deletions(-)
> > 
> > diff --git a/src/qemu/qemu_domainjob.c b/src/qemu/qemu_domainjob.c
> > index 1f82457bd4..8e8d229afe 100644
> > --- a/src/qemu/qemu_domainjob.c
> > +++ b/src/qemu/qemu_domainjob.c
> 
> [...]
> 
> > @@ -261,18 +263,15 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
> >  {
> >      qemuDomainObjPrivate *priv = vm->privateData;
> >      qemuDomainJobObj *job = &priv->job;
> > -    unsigned long long now;
> >  
> >      VIR_DEBUG("Restoring %s async job for domain %s",
> >                virDomainAsyncJobTypeToString(asyncJob), vm->def->name);
> >  
> > -    ignore_value(virTimeMillisNow(&now));
> > -
> >      priv->job.jobsQueued++;
> >      job->asyncJob = asyncJob;
> >      job->phase = phase;
> >      job->asyncOwnerAPI = g_strdup(virThreadJobGet());
> > -    job->asyncStarted = now;
> > +    job->asyncStarted = started;
> >  
> >      qemuDomainObjSetAsyncJobMask(vm, allowedJobs);
> >  
> > @@ -280,7 +279,7 @@ qemuDomainObjRestoreAsyncJob(virDomainObj *vm,
> >      qemuDomainJobSetStatsType(priv->job.current, statsType);
> >      job->current->operation = operation;
> >      job->current->status = status;
> > -    job->current->started = now;
> > +    job->current->started = started;
> >  }
> 
> You don't seem to have any fallback when reading back older status XML
> where ...
> 
> >  
> >  
> > @@ -1250,8 +1249,10 @@ qemuDomainObjPrivateXMLFormatJob(virBuffer *buf,
> >                                                            priv->job.phase));
> >      }
> >  
> > -    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE)
> > +    if (priv->job.asyncJob != VIR_ASYNC_JOB_NONE) {
> >          virBufferAsprintf(&attrBuf, " flags='0x%lx'", priv->job.apiFlags);
> > +        virBufferAsprintf(&attrBuf, " asyncStarted='%llu'", priv->job.asyncStarted);
> 
> ... the start timestamp is not printed, so ...
> 
> 
> > +    }
> >  
> >      if (priv->job.cb &&
> >          priv->job.cb->formatJob(&childBuf, &priv->job, vm) < 0)
> > @@ -1307,6 +1308,13 @@ qemuDomainObjPrivateXMLParseJob(virDomainObj *vm,
> >              }
> >              VIR_FREE(tmp);
> >          }
> > +
> > +        if (virXPathULongLong("string(@asyncStarted)", ctxt,
> > +                              &priv->job.asyncStarted) == -2) {
> > +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                           _("Invalid async job start"));
> 
> ... this will keep it initialized to 0 ...
> 
> > +            return -1;
> > +        }
> >      }
> >  
> >      if (virXPathULongHex("string(@flags)", ctxt, &priv->job.apiFlags) == -2) {
> 
> [...]
> 
> > diff --git a/tests/qemustatusxml2xmldata/migration-in-params-in.xml b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> > index f4bc5753c4..9e9c2deac6 100644
> > --- a/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> > +++ b/tests/qemustatusxml2xmldata/migration-in-params-in.xml
> > @@ -238,7 +238,7 @@
> >      <flag name='dump-completed'/>
> >      <flag name='hda-output'/>
> >    </qemuCaps>
> > -  <job type='none' async='migration in' phase='prepare' flags='0x900'>
> 
> ... so  existing backup jobs will seem to be started at the beginning of
> epoch:
> 
> > +  <job type='none' async='migration in' phase='prepare' flags='0x900' asyncStarted='0'>
> >      <migParams>
> >        <param name='compress-level' value='1'/>
> >        <param name='compress-threads' value='8'/>
> 
> Probably not a big problem but we IMO should initialize it to current
> time. That will break the test though unless you mock the time getting
> function.

We can keep this fallback in qemuDomainObjRestoreAsyncJob instead of
putting it in the parser to avoid test issues.

Jirka