[PATCH] dump: introduce dump-cancel QMP command

Hogan Wang via posted 1 patch 1 year, 9 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20220721062118.2015-1-hogan.wang@huawei.com
Maintainers: "Marc-André Lureau" <marcandre.lureau@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
dump/dump.c               | 38 ++++++++++++++++++++++++++++++++++++--
include/sysemu/runstate.h |  1 +
qapi/dump.json            | 21 ++++++++++++++++++++-
3 files changed, 57 insertions(+), 3 deletions(-)
[PATCH] dump: introduce dump-cancel QMP command
Posted by Hogan Wang via 1 year, 9 months ago
There's no way to cancel the current executing dump process, lead to the
virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
job after daemon restart.

Add the 'cancelling' and 'cancelled' dump states.

Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
The dump process check the 'cancelling' state and break loops. 
The 'cancelled' state mark the dump process cancelled success.

---
 dump/dump.c               | 38 ++++++++++++++++++++++++++++++++++++--
 include/sysemu/runstate.h |  1 +
 qapi/dump.json            | 21 ++++++++++++++++++++-
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 4d9658ffa2..a0ac85aa02 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -118,6 +118,10 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
     DumpState *s = opaque;
     size_t written_size;
 
+    if (qemu_system_dump_cancelling()) {
+        return -ECANCELED;
+    }
+
     written_size = qemu_write_full(s->fd, buf, size);
     if (written_size != size) {
         return -errno;
@@ -627,6 +631,10 @@ static void dump_iterate(DumpState *s, Error **errp)
 
     do {
         block = s->next_block;
+        if (qemu_system_dump_cancelling()) {
+            error_setg(errp, "dump: job cancelled");
+            return;
+        }
 
         size = block->target_end - block->target_start;
         if (s->has_filter) {
@@ -1321,6 +1329,11 @@ static void write_dump_pages(DumpState *s, Error **errp)
      * first page of page section
      */
     while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
+        if (qemu_system_dump_cancelling()) {
+            error_setg(errp, "dump: job cancelled");
+            goto out;
+        }
+
         /* check zero page */
         if (buffer_is_zero(buf, s->dump_info.page_size)) {
             ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
@@ -1540,6 +1553,22 @@ bool qemu_system_dump_in_progress(void)
     return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
 }
 
+bool qemu_system_dump_cancelling(void)
+{
+    DumpState *state = &dump_state_global;
+    return (qatomic_read(&state->status) == DUMP_STATUS_CANCELLING);
+}
+
+void qmp_dump_cancel(Error **errp)
+{
+    DumpState *state = &dump_state_global;
+    if (!qemu_system_dump_in_progress()) {
+        return;
+    }
+    qatomic_set(&state->status, DUMP_STATUS_CANCELLING);
+}
+
+
 /* calculate total size of memory to be dumped (taking filter into
  * acoount.) */
 static int64_t dump_calculate_size(DumpState *s)
@@ -1838,8 +1867,13 @@ static void dump_process(DumpState *s, Error **errp)
 
     /* make sure status is written after written_size updates */
     smp_wmb();
-    qatomic_set(&s->status,
-               (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
+    if (qemu_system_dump_cancelling()) {
+        qatomic_set(&s->status, DUMP_STATUS_CANCELLED);
+    } else if (*errp) {
+        qatomic_set(&s->status, DUMP_STATUS_FAILED);
+    } else {
+        qatomic_set(&s->status, DUMP_STATUS_COMPLETED);
+    }
 
     /* send DUMP_COMPLETED message (unconditionally) */
     result = qmp_query_dump(NULL);
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index f3ed52548e..a36c1d43f6 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -76,6 +76,7 @@ void qemu_system_reset(ShutdownCause reason);
 void qemu_system_guest_panicked(GuestPanicInformation *info);
 void qemu_system_guest_crashloaded(GuestPanicInformation *info);
 bool qemu_system_dump_in_progress(void);
+bool qemu_system_dump_cancelling(void);
 
 #endif
 
diff --git a/qapi/dump.json b/qapi/dump.json
index 90859c5483..6dfbb6b7de 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -108,7 +108,7 @@
 # Since: 2.6
 ##
 { 'enum': 'DumpStatus',
-  'data': [ 'none', 'active', 'completed', 'failed' ] }
+  'data': [ 'none', 'active', 'completed', 'failed', 'cancelling', 'cancelled' ] }
 
 ##
 # @DumpQueryResult:
@@ -200,3 +200,22 @@
 ##
 { 'command': 'query-dump-guest-memory-capability',
   'returns': 'DumpGuestMemoryCapability' }
+
+##
+# @dump-cancel:
+#
+# Cancel the current executing dump process.
+#
+# Returns: nothing on success
+#
+# Notes: This command succeeds even if there is no dump process running.
+#
+# Since: 7.2
+#
+# Example:
+#
+# -> { "execute": "dump-cancel" }
+# <- { "return": {} }
+#
+##
+{ 'command': 'dump-cancel' }
-- 
2.33.0
Re: [PATCH] dump: introduce dump-cancel QMP command
Posted by Daniel P. Berrangé 1 year, 9 months ago
On Thu, Jul 21, 2022 at 02:21:18PM +0800, Hogan Wang via wrote:
> There's no way to cancel the current executing dump process, lead to the
> virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
> job after daemon restart.
> 
> Add the 'cancelling' and 'cancelled' dump states.
> 
> Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
> The dump process check the 'cancelling' state and break loops. 
> The 'cancelled' state mark the dump process cancelled success.

On the one hand this patch is fairly simple which is obviously
desirable.

On the other hand though, this feels like it is further re-inventing
the jobs concept.

IMHO ideally the 'dump' command probably ought to get a 'job-id'
parameter, and integrate with the generic background jobs  framework.
This would unlock the ability to use existing commands like
'job-cancel', 'job-pause', 'job-resume', 'queyr-jobs' to interact
with it.

> 
> ---
>  dump/dump.c               | 38 ++++++++++++++++++++++++++++++++++++--
>  include/sysemu/runstate.h |  1 +
>  qapi/dump.json            | 21 ++++++++++++++++++++-
>  3 files changed, 57 insertions(+), 3 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 4d9658ffa2..a0ac85aa02 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -118,6 +118,10 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
>      DumpState *s = opaque;
>      size_t written_size;
>  
> +    if (qemu_system_dump_cancelling()) {
> +        return -ECANCELED;
> +    }
> +
>      written_size = qemu_write_full(s->fd, buf, size);
>      if (written_size != size) {
>          return -errno;
> @@ -627,6 +631,10 @@ static void dump_iterate(DumpState *s, Error **errp)
>  
>      do {
>          block = s->next_block;
> +        if (qemu_system_dump_cancelling()) {
> +            error_setg(errp, "dump: job cancelled");
> +            return;
> +        }
>  
>          size = block->target_end - block->target_start;
>          if (s->has_filter) {
> @@ -1321,6 +1329,11 @@ static void write_dump_pages(DumpState *s, Error **errp)
>       * first page of page section
>       */
>      while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
> +        if (qemu_system_dump_cancelling()) {
> +            error_setg(errp, "dump: job cancelled");
> +            goto out;
> +        }
> +
>          /* check zero page */
>          if (buffer_is_zero(buf, s->dump_info.page_size)) {
>              ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
> @@ -1540,6 +1553,22 @@ bool qemu_system_dump_in_progress(void)
>      return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
>  }
>  
> +bool qemu_system_dump_cancelling(void)
> +{
> +    DumpState *state = &dump_state_global;
> +    return (qatomic_read(&state->status) == DUMP_STATUS_CANCELLING);
> +}
> +
> +void qmp_dump_cancel(Error **errp)
> +{
> +    DumpState *state = &dump_state_global;
> +    if (!qemu_system_dump_in_progress()) {
> +        return;
> +    }
> +    qatomic_set(&state->status, DUMP_STATUS_CANCELLING);
> +}
> +
> +
>  /* calculate total size of memory to be dumped (taking filter into
>   * acoount.) */
>  static int64_t dump_calculate_size(DumpState *s)
> @@ -1838,8 +1867,13 @@ static void dump_process(DumpState *s, Error **errp)
>  
>      /* make sure status is written after written_size updates */
>      smp_wmb();
> -    qatomic_set(&s->status,
> -               (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
> +    if (qemu_system_dump_cancelling()) {
> +        qatomic_set(&s->status, DUMP_STATUS_CANCELLED);
> +    } else if (*errp) {
> +        qatomic_set(&s->status, DUMP_STATUS_FAILED);
> +    } else {
> +        qatomic_set(&s->status, DUMP_STATUS_COMPLETED);
> +    }
>  
>      /* send DUMP_COMPLETED message (unconditionally) */
>      result = qmp_query_dump(NULL);
> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
> index f3ed52548e..a36c1d43f6 100644
> --- a/include/sysemu/runstate.h
> +++ b/include/sysemu/runstate.h
> @@ -76,6 +76,7 @@ void qemu_system_reset(ShutdownCause reason);
>  void qemu_system_guest_panicked(GuestPanicInformation *info);
>  void qemu_system_guest_crashloaded(GuestPanicInformation *info);
>  bool qemu_system_dump_in_progress(void);
> +bool qemu_system_dump_cancelling(void);
>  
>  #endif
>  
> diff --git a/qapi/dump.json b/qapi/dump.json
> index 90859c5483..6dfbb6b7de 100644
> --- a/qapi/dump.json
> +++ b/qapi/dump.json
> @@ -108,7 +108,7 @@
>  # Since: 2.6
>  ##
>  { 'enum': 'DumpStatus',
> -  'data': [ 'none', 'active', 'completed', 'failed' ] }
> +  'data': [ 'none', 'active', 'completed', 'failed', 'cancelling', 'cancelled' ] }
>  
>  ##
>  # @DumpQueryResult:
> @@ -200,3 +200,22 @@
>  ##
>  { 'command': 'query-dump-guest-memory-capability',
>    'returns': 'DumpGuestMemoryCapability' }
> +
> +##
> +# @dump-cancel:
> +#
> +# Cancel the current executing dump process.
> +#
> +# Returns: nothing on success
> +#
> +# Notes: This command succeeds even if there is no dump process running.
> +#
> +# Since: 7.2
> +#
> +# Example:
> +#
> +# -> { "execute": "dump-cancel" }
> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'dump-cancel' }
> -- 
> 2.33.0
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH] dump: introduce dump-cancel QMP command
Posted by Daniel P. Berrangé 1 year, 9 months ago
On Thu, Jul 21, 2022 at 09:13:53AM +0100, Daniel P. Berrangé wrote:
> On Thu, Jul 21, 2022 at 02:21:18PM +0800, Hogan Wang via wrote:
> > There's no way to cancel the current executing dump process, lead to the
> > virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
> > job after daemon restart.
> > 
> > Add the 'cancelling' and 'cancelled' dump states.
> > 
> > Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
> > The dump process check the 'cancelling' state and break loops. 
> > The 'cancelled' state mark the dump process cancelled success.
> 
> On the one hand this patch is fairly simple which is obviously
> desirable.
> 
> On the other hand though, this feels like it is further re-inventing
> the jobs concept.
> 
> IMHO ideally the 'dump' command probably ought to get a 'job-id'

I meant to say an *optional* job-id field, since we need to keep
back compat. Possibly we could secretly create a job anyway
internally if job-id is omitted, if it makes code easier.

> parameter, and integrate with the generic background jobs  framework.
> This would unlock the ability to use existing commands like
> 'job-cancel', 'job-pause', 'job-resume', 'queyr-jobs' to interact
> with it.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH] dump: introduce dump-cancel QMP command
Posted by Markus Armbruster 1 year, 9 months ago
Daniel P. Berrangé <berrange@redhat.com> writes:

> On Thu, Jul 21, 2022 at 02:21:18PM +0800, Hogan Wang via wrote:
>> There's no way to cancel the current executing dump process, lead to the
>> virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
>> job after daemon restart.
>> 
>> Add the 'cancelling' and 'cancelled' dump states.
>> 
>> Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
>> The dump process check the 'cancelling' state and break loops. 
>> The 'cancelled' state mark the dump process cancelled success.
>
> On the one hand this patch is fairly simple which is obviously
> desirable.
>
> On the other hand though, this feels like it is further re-inventing
> the jobs concept.
>
> IMHO ideally the 'dump' command probably ought to get a 'job-id'
> parameter, and integrate with the generic background jobs  framework.
> This would unlock the ability to use existing commands like
> 'job-cancel', 'job-pause', 'job-resume', 'queyr-jobs' to interact
> with it.

Seconded.

Hogan Wang, would you be interested in rebasing the dump feature onto
the jobs infrastructure?
Re: [PATCH] dump: introduce dump-cancel QMP command
Posted by Marc-André Lureau 1 year, 9 months ago
Hi

On Thu, Jul 21, 2022 at 10:24 AM Hogan Wang via <qemu-devel@nongnu.org>
wrote:

> There's no way to cancel the current executing dump process, lead to the
> virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
> job after daemon restart.
>
> Add the 'cancelling' and 'cancelled' dump states.
>
> Use 'dump-cancel' qmp command Set the dump state as 'cancelling'.
> The dump process check the 'cancelling' state and break loops.
> The 'cancelled' state mark the dump process cancelled success.
>

This will only really work if the dump was detached. You should explain
that in the commit message & in the documentation.

>
> ---
>  dump/dump.c               | 38 ++++++++++++++++++++++++++++++++++++--
>  include/sysemu/runstate.h |  1 +
>  qapi/dump.json            | 21 ++++++++++++++++++++-
>  3 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index 4d9658ffa2..a0ac85aa02 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -118,6 +118,10 @@ static int fd_write_vmcore(const void *buf, size_t
> size, void *opaque)
>      DumpState *s = opaque;
>      size_t written_size;
>
> +    if (qemu_system_dump_cancelling()) {
> +        return -ECANCELED;
> +    }
> +
>      written_size = qemu_write_full(s->fd, buf, size);
>      if (written_size != size) {
>          return -errno;
> @@ -627,6 +631,10 @@ static void dump_iterate(DumpState *s, Error **errp)
>
>      do {
>          block = s->next_block;
> +        if (qemu_system_dump_cancelling()) {
> +            error_setg(errp, "dump: job cancelled");
> +            return;
> +        }
>
>          size = block->target_end - block->target_start;
>          if (s->has_filter) {
> @@ -1321,6 +1329,11 @@ static void write_dump_pages(DumpState *s, Error
> **errp)
>       * first page of page section
>       */
>      while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
> +        if (qemu_system_dump_cancelling()) {
> +            error_setg(errp, "dump: job cancelled");
> +            goto out;
> +        }
> +
>          /* check zero page */
>          if (buffer_is_zero(buf, s->dump_info.page_size)) {
>              ret = write_cache(&page_desc, &pd_zero,
> sizeof(PageDescriptor),
> @@ -1540,6 +1553,22 @@ bool qemu_system_dump_in_progress(void)
>      return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
>  }
>
> +bool qemu_system_dump_cancelling(void)
> +{
> +    DumpState *state = &dump_state_global;
> +    return (qatomic_read(&state->status) == DUMP_STATUS_CANCELLING);
> +}
> +
> +void qmp_dump_cancel(Error **errp)
> +{
> +    DumpState *state = &dump_state_global;
> +    if (!qemu_system_dump_in_progress()) {
> +        return;
> +    }
> +    qatomic_set(&state->status, DUMP_STATUS_CANCELLING);
> +}
>

qemu_system_dump_in_progress() should probably be updated to take
CANCELLING state into account (and avoid another dump to be created
concurrently)



> +
> +
>  /* calculate total size of memory to be dumped (taking filter into
>   * acoount.) */
>  static int64_t dump_calculate_size(DumpState *s)
> @@ -1838,8 +1867,13 @@ static void dump_process(DumpState *s, Error **errp)
>
>      /* make sure status is written after written_size updates */
>      smp_wmb();
> -    qatomic_set(&s->status,
> -               (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
> +    if (qemu_system_dump_cancelling()) {
> +        qatomic_set(&s->status, DUMP_STATUS_CANCELLED);
> +    } else if (*errp) {
> +        qatomic_set(&s->status, DUMP_STATUS_FAILED);
> +    } else {
> +        qatomic_set(&s->status, DUMP_STATUS_COMPLETED);
> +    }
>
>      /* send DUMP_COMPLETED message (unconditionally) */
>      result = qmp_query_dump(NULL);
> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
> index f3ed52548e..a36c1d43f6 100644
> --- a/include/sysemu/runstate.h
> +++ b/include/sysemu/runstate.h
> @@ -76,6 +76,7 @@ void qemu_system_reset(ShutdownCause reason);
>  void qemu_system_guest_panicked(GuestPanicInformation *info);
>  void qemu_system_guest_crashloaded(GuestPanicInformation *info);
>  bool qemu_system_dump_in_progress(void);
> +bool qemu_system_dump_cancelling(void);
>
>  #endif
>
> diff --git a/qapi/dump.json b/qapi/dump.json
> index 90859c5483..6dfbb6b7de 100644
> --- a/qapi/dump.json
> +++ b/qapi/dump.json
> @@ -108,7 +108,7 @@
>  # Since: 2.6
>  ##
>  { 'enum': 'DumpStatus',
> -  'data': [ 'none', 'active', 'completed', 'failed' ] }
> +  'data': [ 'none', 'active', 'completed', 'failed', 'cancelling',
> 'cancelled' ] }
>
>  ##
>  # @DumpQueryResult:
> @@ -200,3 +200,22 @@
>  ##
>  { 'command': 'query-dump-guest-memory-capability',
>    'returns': 'DumpGuestMemoryCapability' }
> +
> +##
> +# @dump-cancel:
> +#
> +# Cancel the current executing dump process.
> +#
> +# Returns: nothing on success
> +#
> +# Notes: This command succeeds even if there is no dump process running.
> +#
> +# Since: 7.2
> +#
> +# Example:
> +#
> +# -> { "execute": "dump-cancel" }
> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'dump-cancel' }
> --
> 2.33.0
>

Looks good to me, but I wish there would be some tests. Could you try to
write some?


-- 
Marc-André Lureau