[v2] migration: discard non-migratable RAMBlocks

[Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Cédric Le Goater 7 years, 10 months ago

On the POWER9 processor, the XIVE interrupt controller can control
interrupt sources using MMIO to trigger events, to EOI or to turn off
the sources. Priority management and interrupt acknowledgment is also
controlled by MMIO in the presenter sub-engine.

These MMIO regions are exposed to guests in QEMU with a set of 'ram
device' memory mappings, similarly to VFIO, and the VMAs are populated
dynamically with the appropriate pages using a fault handler.

But, these regions are an issue for migration. We need to discard the
associated RAMBlocks from the RAM state on the source VM and let the
destination VM rebuild the memory mappings on the new host in the
post_load() operation just before resuming the system.

To achieve this goal, the following introduces a new RAMBlock flag
RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
vmstate_unregister_ram() routines. This flag is then used by the
migration to identify RAMBlocks to discard on the source. Some checks
are also performed on the destination to make sure nothing invalid was
sent.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---

 Changes since v1:

 - introduced a new RAMBlock flag RAM_MIGRATABLE
 - fixed RAMBLOCK_FOREACH_MIGRATABLE() macro to handle code that
   omitted {}. 

 exec.c                    | 10 ++++++++++
 include/exec/cpu-common.h |  1 +
 migration/ram.c           | 42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/exec.c b/exec.c
index 02b1efebb7c3..e9432c06294e 100644
--- a/exec.c
+++ b/exec.c
@@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
  * (Set during postcopy)
  */
 #define RAM_UF_ZEROPAGE (1 << 3)
+
+/* RAM can be migrated */
+#define RAM_MIGRATABLE (1 << 4)
 #endif
 
 #ifdef TARGET_PAGE_BITS_VARY
@@ -1807,6 +1810,11 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
     rb->flags |= RAM_UF_ZEROPAGE;
 }
 
+bool qemu_ram_is_migratable(RAMBlock *rb)
+{
+    return rb->flags & RAM_MIGRATABLE;
+}
+
 /* Called with iothread lock held.  */
 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
 {
@@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
         }
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+    new_block->flags |= RAM_MIGRATABLE;
 
     rcu_read_lock();
     RAMBLOCK_FOREACH(block) {
@@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
      */
     if (block) {
         memset(block->idstr, 0, sizeof(block->idstr));
+        block->flags &= ~RAM_MIGRATABLE;
     }
 }
 
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 24d335f95d45..96854519b514 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -75,6 +75,7 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
+bool qemu_ram_is_migratable(RAMBlock *rb);
 
 size_t qemu_ram_pagesize(RAMBlock *block);
 size_t qemu_ram_pagesize_largest(void);
diff --git a/migration/ram.c b/migration/ram.c
index 0e90efa09236..89c3accc4e26 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -187,6 +187,11 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
                       nr);
 }
 
+/* Should be holding either ram_list.mutex, or the RCU lock. */
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
+    RAMBLOCK_FOREACH(block)                            \
+        if (!qemu_ram_is_migratable(block)) {} else
+
 /*
  * An outstanding page request, on the source, having been received
  * and queued
@@ -780,6 +785,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
     unsigned long *bitmap = rb->bmap;
     unsigned long next;
 
+    if (!qemu_ram_is_migratable(rb)) {
+        return size;
+    }
+
     if (rs->ram_bulk_stage && start > 0) {
         next = start + 1;
     } else {
@@ -825,7 +834,7 @@ uint64_t ram_pagesize_summary(void)
     RAMBlock *block;
     uint64_t summary = 0;
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         summary |= block->page_size;
     }
 
@@ -849,7 +858,7 @@ static void migration_bitmap_sync(RAMState *rs)
 
     qemu_mutex_lock(&rs->bitmap_mutex);
     rcu_read_lock();
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
     }
     rcu_read_unlock();
@@ -1499,6 +1508,10 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
     size_t pagesize_bits =
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
 
+    if (!qemu_ram_is_migratable(pss->block)) {
+        return 0;
+    }
+
     do {
         tmppages = ram_save_target_page(rs, pss, last_stage);
         if (tmppages < 0) {
@@ -1587,7 +1600,7 @@ uint64_t ram_bytes_total(void)
     uint64_t total = 0;
 
     rcu_read_lock();
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         total += block->used_length;
     }
     rcu_read_unlock();
@@ -1642,7 +1655,7 @@ static void ram_save_cleanup(void *opaque)
      */
     memory_global_dirty_log_stop();
 
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         g_free(block->bmap);
         block->bmap = NULL;
         g_free(block->unsentmap);
@@ -1705,7 +1718,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
 {
     struct RAMBlock *block;
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         unsigned long *bitmap = block->bmap;
         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
@@ -1783,7 +1796,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
     struct RAMBlock *block;
     int ret;
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         PostcopyDiscardState *pds =
             postcopy_discard_send_init(ms, block->idstr);
 
@@ -1991,7 +2004,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
     rs->last_sent_block = NULL;
     rs->last_page = 0;
 
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
         unsigned long *bitmap = block->bmap;
         unsigned long *unsentmap = block->unsentmap;
@@ -2150,7 +2163,7 @@ static void ram_list_init_bitmaps(void)
 
     /* Skip setting bitmap if there is no RAM */
     if (ram_bytes_total()) {
-        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
             block->bmap = bitmap_new(pages);
             bitmap_set(block->bmap, 0, pages);
@@ -2226,7 +2239,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
-    RAMBLOCK_FOREACH(block) {
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->used_length);
@@ -2471,6 +2484,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
         return NULL;
     }
 
+    if (!qemu_ram_is_migratable(block)) {
+        error_report("block %s should not be migrated !", id);
+        return NULL;
+    }
+
     return block;
 }
 
@@ -2921,7 +2939,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 length = qemu_get_be64(f);
 
                 block = qemu_ram_block_by_name(id);
-                if (block) {
+                if (block && !qemu_ram_is_migratable(block)) {
+                    error_report("block %s should not be migrated !", id);
+                    ret = -EINVAL;
+
+                } else if (block) {
                     if (length != block->used_length) {
                         Error *local_err = NULL;
 
-- 
2.13.6

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by no-reply@patchew.org 7 years, 10 months ago

Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180413075200.15217-1-clg@kaod.org
Subject: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
    echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
    if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
        failed=1
        echo
    fi
    n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 t [tag update]            patchew/1523372486-13190-1-git-send-email-walling@linux.ibm.com -> patchew/1523372486-13190-1-git-send-email-walling@linux.ibm.com
 * [new tag]               patchew/20180413075200.15217-1-clg@kaod.org -> patchew/20180413075200.15217-1-clg@kaod.org
Switched to a new branch 'test'
59f7dd4351 migration: discard non-migratable RAMBlocks

=== OUTPUT BEGIN ===
Checking PATCH 1/1: migration: discard non-migratable RAMBlocks...
ERROR: Macros with multiple statements should be enclosed in a do - while loop
#96: FILE: migration/ram.c:191:
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
+    RAMBLOCK_FOREACH(block)                            \
+        if (!qemu_ram_is_migratable(block)) {} else

ERROR: trailing statements should be on next line
#98: FILE: migration/ram.c:193:
+        if (!qemu_ram_is_migratable(block)) {} else

total: 2 errors, 0 warnings, 167 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Juan Quintela 7 years, 10 months ago

no-reply@patchew.org wrote:
> Hi,
>
> This series seems to have some coding style problems. See output below for
> more information:
>
> Type: series
> Message-id: 20180413075200.15217-1-clg@kaod.org
> Subject: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks
>

> === OUTPUT BEGIN ===
> Checking PATCH 1/1: migration: discard non-migratable RAMBlocks...
> ERROR: Macros with multiple statements should be enclosed in a do - while loop
> #96: FILE: migration/ram.c:191:
> +#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
> +    RAMBLOCK_FOREACH(block)                            \
> +        if (!qemu_ram_is_migratable(block)) {} else
>
> ERROR: trailing statements should be on next line
> #98: FILE: migration/ram.c:193:
> +        if (!qemu_ram_is_migratable(block)) {} else
>
> total: 2 errors, 0 warnings, 167 lines checked

To be fair, I don't know how to fix this outside of:

BEGIN_RAMBLOCK_FOREACH(...)

END_RAMBLOCK_FOREACH()

type of construct.  I was wondering about what could happen if the
RAMBLOCK_FOREACH was insied an if, but checkpatch beated me to it.

Later, Juan.

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 10 months ago

On 13 April 2018 at 12:18, Juan Quintela <quintela@redhat.com> wrote:
> no-reply@patchew.org wrote:
>> === OUTPUT BEGIN ===
>> Checking PATCH 1/1: migration: discard non-migratable RAMBlocks...
>> ERROR: Macros with multiple statements should be enclosed in a do - while loop
>> #96: FILE: migration/ram.c:191:
>> +#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
>> +    RAMBLOCK_FOREACH(block)                            \
>> +        if (!qemu_ram_is_migratable(block)) {} else
>>
>> ERROR: trailing statements should be on next line
>> #98: FILE: migration/ram.c:193:
>> +        if (!qemu_ram_is_migratable(block)) {} else
>>
>> total: 2 errors, 0 warnings, 167 lines checked
>
> To be fair, I don't know how to fix this outside of:
>
> BEGIN_RAMBLOCK_FOREACH(...)
>
> END_RAMBLOCK_FOREACH()
>
> type of construct.  I was wondering about what could happen if the
> RAMBLOCK_FOREACH was insied an if, but checkpatch beated me to it.

RAMBLOCK_FOREACH inside if() should work fine here. I don't
think we can expect checkpatch to be able to cope with all
of the complicated macro magic we do, so if we're satisfied
that the macro is doing what we want then we should just
ignore the checkpatch error.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Dr. David Alan Gilbert 7 years, 9 months ago

* Cédric Le Goater (clg@kaod.org) wrote:
> On the POWER9 processor, the XIVE interrupt controller can control
> interrupt sources using MMIO to trigger events, to EOI or to turn off
> the sources. Priority management and interrupt acknowledgment is also
> controlled by MMIO in the presenter sub-engine.
> 
> These MMIO regions are exposed to guests in QEMU with a set of 'ram
> device' memory mappings, similarly to VFIO, and the VMAs are populated
> dynamically with the appropriate pages using a fault handler.
> 
> But, these regions are an issue for migration. We need to discard the
> associated RAMBlocks from the RAM state on the source VM and let the
> destination VM rebuild the memory mappings on the new host in the
> post_load() operation just before resuming the system.
> 
> To achieve this goal, the following introduces a new RAMBlock flag
> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> vmstate_unregister_ram() routines. This flag is then used by the
> migration to identify RAMBlocks to discard on the source. Some checks
> are also performed on the destination to make sure nothing invalid was
> sent.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Hi Cédric,
  Yes, this is looking nicer; the macro makes the changes quite small.
A couple of questions:

> ---
> 
>  Changes since v1:
> 
>  - introduced a new RAMBlock flag RAM_MIGRATABLE
>  - fixed RAMBLOCK_FOREACH_MIGRATABLE() macro to handle code that
>    omitted {}. 
> 
>  exec.c                    | 10 ++++++++++
>  include/exec/cpu-common.h |  1 +
>  migration/ram.c           | 42 ++++++++++++++++++++++++++++++++----------
>  3 files changed, 43 insertions(+), 10 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 02b1efebb7c3..e9432c06294e 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
>   * (Set during postcopy)
>   */
>  #define RAM_UF_ZEROPAGE (1 << 3)
> +
> +/* RAM can be migrated */
> +#define RAM_MIGRATABLE (1 << 4)
>  #endif
>  
>  #ifdef TARGET_PAGE_BITS_VARY
> @@ -1807,6 +1810,11 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
>      rb->flags |= RAM_UF_ZEROPAGE;
>  }
>  
> +bool qemu_ram_is_migratable(RAMBlock *rb)
> +{
> +    return rb->flags & RAM_MIGRATABLE;
> +}
> +
>  /* Called with iothread lock held.  */
>  void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>  {
> @@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>          }
>      }
>      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
> +    new_block->flags |= RAM_MIGRATABLE;
>  
>      rcu_read_lock();
>      RAMBLOCK_FOREACH(block) {
> @@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
>       */
>      if (block) {
>          memset(block->idstr, 0, sizeof(block->idstr));
> +        block->flags &= ~RAM_MIGRATABLE;
>      }
>  }

Why in qemu_ram_set_idstr and qemu_ram_(un)set_idstr ?  It seems an
odd place to put them.


> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> index 24d335f95d45..96854519b514 100644
> --- a/include/exec/cpu-common.h
> +++ b/include/exec/cpu-common.h
> @@ -75,6 +75,7 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
>  bool qemu_ram_is_shared(RAMBlock *rb);
>  bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
>  void qemu_ram_set_uf_zeroable(RAMBlock *rb);
> +bool qemu_ram_is_migratable(RAMBlock *rb);
>  
>  size_t qemu_ram_pagesize(RAMBlock *block);
>  size_t qemu_ram_pagesize_largest(void);
> diff --git a/migration/ram.c b/migration/ram.c
> index 0e90efa09236..89c3accc4e26 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -187,6 +187,11 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
>                        nr);
>  }
>  
> +/* Should be holding either ram_list.mutex, or the RCU lock. */
> +#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
> +    RAMBLOCK_FOREACH(block)                            \
> +        if (!qemu_ram_is_migratable(block)) {} else
> +
>  /*
>   * An outstanding page request, on the source, having been received
>   * and queued
> @@ -780,6 +785,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      unsigned long *bitmap = rb->bmap;
>      unsigned long next;
>  
> +    if (!qemu_ram_is_migratable(rb)) {
> +        return size;
> +    }
> +
>      if (rs->ram_bulk_stage && start > 0) {
>          next = start + 1;
>      } else {
> @@ -825,7 +834,7 @@ uint64_t ram_pagesize_summary(void)
>      RAMBlock *block;
>      uint64_t summary = 0;
>  
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          summary |= block->page_size;
>      }
>  
> @@ -849,7 +858,7 @@ static void migration_bitmap_sync(RAMState *rs)
>  
>      qemu_mutex_lock(&rs->bitmap_mutex);
>      rcu_read_lock();
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          migration_bitmap_sync_range(rs, block, 0, block->used_length);
>      }
>      rcu_read_unlock();
> @@ -1499,6 +1508,10 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>      size_t pagesize_bits =
>          qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>  
> +    if (!qemu_ram_is_migratable(pss->block)) {
> +        return 0;
> +    }
> +

We might want to print a diagnostic there - it shouldn't happen right?

>      do {
>          tmppages = ram_save_target_page(rs, pss, last_stage);
>          if (tmppages < 0) {
> @@ -1587,7 +1600,7 @@ uint64_t ram_bytes_total(void)
>      uint64_t total = 0;
>  
>      rcu_read_lock();
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          total += block->used_length;
>      }
>      rcu_read_unlock();
> @@ -1642,7 +1655,7 @@ static void ram_save_cleanup(void *opaque)
>       */
>      memory_global_dirty_log_stop();
>  
> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          g_free(block->bmap);
>          block->bmap = NULL;
>          g_free(block->unsentmap);
> @@ -1705,7 +1718,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
>  {
>      struct RAMBlock *block;
>  
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          unsigned long *bitmap = block->bmap;
>          unsigned long range = block->used_length >> TARGET_PAGE_BITS;
>          unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
> @@ -1783,7 +1796,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
>      struct RAMBlock *block;
>      int ret;
>  
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          PostcopyDiscardState *pds =
>              postcopy_discard_send_init(ms, block->idstr);
>  
> @@ -1991,7 +2004,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
>      rs->last_sent_block = NULL;
>      rs->last_page = 0;
>  
> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
>          unsigned long *bitmap = block->bmap;
>          unsigned long *unsentmap = block->unsentmap;
> @@ -2150,7 +2163,7 @@ static void ram_list_init_bitmaps(void)
>  
>      /* Skip setting bitmap if there is no RAM */
>      if (ram_bytes_total()) {
> -        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> +        RAMBLOCK_FOREACH_MIGRATABLE(block) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
>              block->bmap = bitmap_new(pages);
>              bitmap_set(block->bmap, 0, pages);
> @@ -2226,7 +2239,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>  
>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>  
> -    RAMBLOCK_FOREACH(block) {
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>          qemu_put_byte(f, strlen(block->idstr));
>          qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
>          qemu_put_be64(f, block->used_length);

Good, that's all quite neat;  we've just got to watch out we don't
accidentally add any RAMBLOCK_FOREACH's back.

> @@ -2471,6 +2484,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
>          return NULL;
>      }
>  
> +    if (!qemu_ram_is_migratable(block)) {
> +        error_report("block %s should not be migrated !", id);
> +        return NULL;
> +    }
> +
>      return block;
>  }
>  
> @@ -2921,7 +2939,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>                  length = qemu_get_be64(f);
>  
>                  block = qemu_ram_block_by_name(id);
> -                if (block) {
> +                if (block && !qemu_ram_is_migratable(block)) {
> +                    error_report("block %s should not be migrated !", id);
> +                    ret = -EINVAL;
> +
> +                } else if (block) {
>                      if (length != block->used_length) {
>                          Error *local_err = NULL;
>  
> -- 
> 2.13.6

Dave

> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Cédric Le Goater 7 years, 9 months ago

Hello David,

On 04/19/2018 06:58 PM, Dr. David Alan Gilbert wrote:
> * Cédric Le Goater (clg@kaod.org) wrote:
>> On the POWER9 processor, the XIVE interrupt controller can control
>> interrupt sources using MMIO to trigger events, to EOI or to turn off
>> the sources. Priority management and interrupt acknowledgment is also
>> controlled by MMIO in the presenter sub-engine.
>>
>> These MMIO regions are exposed to guests in QEMU with a set of 'ram
>> device' memory mappings, similarly to VFIO, and the VMAs are populated
>> dynamically with the appropriate pages using a fault handler.
>>
>> But, these regions are an issue for migration. We need to discard the
>> associated RAMBlocks from the RAM state on the source VM and let the
>> destination VM rebuild the memory mappings on the new host in the
>> post_load() operation just before resuming the system.
>>
>> To achieve this goal, the following introduces a new RAMBlock flag
>> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
>> vmstate_unregister_ram() routines. This flag is then used by the
>> migration to identify RAMBlocks to discard on the source. Some checks
>> are also performed on the destination to make sure nothing invalid was
>> sent.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Hi Cédric,
>   Yes, this is looking nicer; the macro makes the changes quite small.
> A couple of questions:
> 
>> ---
>>
>>  Changes since v1:
>>
>>  - introduced a new RAMBlock flag RAM_MIGRATABLE
>>  - fixed RAMBLOCK_FOREACH_MIGRATABLE() macro to handle code that
>>    omitted {}. 
>>
>>  exec.c                    | 10 ++++++++++
>>  include/exec/cpu-common.h |  1 +
>>  migration/ram.c           | 42 ++++++++++++++++++++++++++++++++----------
>>  3 files changed, 43 insertions(+), 10 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 02b1efebb7c3..e9432c06294e 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
>>   * (Set during postcopy)
>>   */
>>  #define RAM_UF_ZEROPAGE (1 << 3)
>> +
>> +/* RAM can be migrated */
>> +#define RAM_MIGRATABLE (1 << 4)
>>  #endif
>>  
>>  #ifdef TARGET_PAGE_BITS_VARY
>> @@ -1807,6 +1810,11 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
>>      rb->flags |= RAM_UF_ZEROPAGE;
>>  }
>>  
>> +bool qemu_ram_is_migratable(RAMBlock *rb)
>> +{
>> +    return rb->flags & RAM_MIGRATABLE;
>> +}
>> +
>>  /* Called with iothread lock held.  */
>>  void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>>  {
>> @@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>>          }
>>      }
>>      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
>> +    new_block->flags |= RAM_MIGRATABLE;
>>  
>>      rcu_read_lock();
>>      RAMBLOCK_FOREACH(block) {
>> @@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
>>       */
>>      if (block) {
>>          memset(block->idstr, 0, sizeof(block->idstr));
>> +        block->flags &= ~RAM_MIGRATABLE;
>>      }
>>  }
> 
> Why in qemu_ram_set_idstr and qemu_ram_(un)set_idstr ?  It seems an
> odd place to put them.

The only place where this routines are called is from vmstate_un/register_ram()
It seemed unnecessary to add an extra interface qemu_ram_un/set_migratable().

May be should just rename qemu_ram_un/set_idstr() ?

> 
>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>> index 24d335f95d45..96854519b514 100644
>> --- a/include/exec/cpu-common.h
>> +++ b/include/exec/cpu-common.h
>> @@ -75,6 +75,7 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
>>  bool qemu_ram_is_shared(RAMBlock *rb);
>>  bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
>>  void qemu_ram_set_uf_zeroable(RAMBlock *rb);
>> +bool qemu_ram_is_migratable(RAMBlock *rb);
>>  
>>  size_t qemu_ram_pagesize(RAMBlock *block);
>>  size_t qemu_ram_pagesize_largest(void);
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 0e90efa09236..89c3accc4e26 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -187,6 +187,11 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
>>                        nr);
>>  }
>>  
>> +/* Should be holding either ram_list.mutex, or the RCU lock. */
>> +#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
>> +    RAMBLOCK_FOREACH(block)                            \
>> +        if (!qemu_ram_is_migratable(block)) {} else
>> +
>>  /*
>>   * An outstanding page request, on the source, having been received
>>   * and queued
>> @@ -780,6 +785,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>>      unsigned long *bitmap = rb->bmap;
>>      unsigned long next;
>>  
>> +    if (!qemu_ram_is_migratable(rb)) {
>> +        return size;
>> +    }
>> +
>>      if (rs->ram_bulk_stage && start > 0) {
>>          next = start + 1;
>>      } else {
>> @@ -825,7 +834,7 @@ uint64_t ram_pagesize_summary(void)
>>      RAMBlock *block;
>>      uint64_t summary = 0;
>>  
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          summary |= block->page_size;
>>      }
>>  
>> @@ -849,7 +858,7 @@ static void migration_bitmap_sync(RAMState *rs)
>>  
>>      qemu_mutex_lock(&rs->bitmap_mutex);
>>      rcu_read_lock();
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          migration_bitmap_sync_range(rs, block, 0, block->used_length);
>>      }
>>      rcu_read_unlock();
>> @@ -1499,6 +1508,10 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>      size_t pagesize_bits =
>>          qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>>  
>> +    if (!qemu_ram_is_migratable(pss->block)) {
>> +        return 0;
>> +    }
>> +
> 
> We might want to print a diagnostic there - it shouldn't happen right?

yes it should not. it should even be fatal. 

>>      do {
>>          tmppages = ram_save_target_page(rs, pss, last_stage);
>>          if (tmppages < 0) {
>> @@ -1587,7 +1600,7 @@ uint64_t ram_bytes_total(void)
>>      uint64_t total = 0;
>>  
>>      rcu_read_lock();
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          total += block->used_length;
>>      }
>>      rcu_read_unlock();
>> @@ -1642,7 +1655,7 @@ static void ram_save_cleanup(void *opaque)
>>       */
>>      memory_global_dirty_log_stop();
>>  
>> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          g_free(block->bmap);
>>          block->bmap = NULL;
>>          g_free(block->unsentmap);
>> @@ -1705,7 +1718,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
>>  {
>>      struct RAMBlock *block;
>>  
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          unsigned long *bitmap = block->bmap;
>>          unsigned long range = block->used_length >> TARGET_PAGE_BITS;
>>          unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
>> @@ -1783,7 +1796,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
>>      struct RAMBlock *block;
>>      int ret;
>>  
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          PostcopyDiscardState *pds =
>>              postcopy_discard_send_init(ms, block->idstr);
>>  
>> @@ -1991,7 +2004,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
>>      rs->last_sent_block = NULL;
>>      rs->last_page = 0;
>>  
>> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
>>          unsigned long *bitmap = block->bmap;
>>          unsigned long *unsentmap = block->unsentmap;
>> @@ -2150,7 +2163,7 @@ static void ram_list_init_bitmaps(void)
>>  
>>      /* Skip setting bitmap if there is no RAM */
>>      if (ram_bytes_total()) {
>> -        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>              pages = block->max_length >> TARGET_PAGE_BITS;
>>              block->bmap = bitmap_new(pages);
>>              bitmap_set(block->bmap, 0, pages);
>> @@ -2226,7 +2239,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>  
>>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>  
>> -    RAMBLOCK_FOREACH(block) {
>> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
>>          qemu_put_byte(f, strlen(block->idstr));
>>          qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
>>          qemu_put_be64(f, block->used_length);
> 
> Good, that's all quite neat;  we've just got to watch out we don't
> accidentally add any RAMBLOCK_FOREACH's back.

Thank,

C.

>> @@ -2471,6 +2484,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
>>          return NULL;
>>      }
>>  
>> +    if (!qemu_ram_is_migratable(block)) {
>> +        error_report("block %s should not be migrated !", id);
>> +        return NULL;
>> +    }
>> +
>>      return block;
>>  }
>>  
>> @@ -2921,7 +2939,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                  length = qemu_get_be64(f);
>>  
>>                  block = qemu_ram_block_by_name(id);
>> -                if (block) {
>> +                if (block && !qemu_ram_is_migratable(block)) {
>> +                    error_report("block %s should not be migrated !", id);
>> +                    ret = -EINVAL;
>> +
>> +                } else if (block) {
>>                      if (length != block->used_length) {
>>                          Error *local_err = NULL;
>>  
>> -- 
>> 2.13.6
> 
> Dave
> 
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 9 months ago

On 20 April 2018 at 07:59, Cédric Le Goater <clg@kaod.org> wrote:
> Hello David,
>
> On 04/19/2018 06:58 PM, Dr. David Alan Gilbert wrote:
>> * Cédric Le Goater (clg@kaod.org) wrote:
>>> @@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>>>          }
>>>      }
>>>      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
>>> +    new_block->flags |= RAM_MIGRATABLE;
>>>
>>>      rcu_read_lock();
>>>      RAMBLOCK_FOREACH(block) {
>>> @@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
>>>       */
>>>      if (block) {
>>>          memset(block->idstr, 0, sizeof(block->idstr));
>>> +        block->flags &= ~RAM_MIGRATABLE;
>>>      }
>>>  }
>>
>> Why in qemu_ram_set_idstr and qemu_ram_(un)set_idstr ?  It seems an
>> odd place to put them.
>
> The only place where this routines are called is from vmstate_un/register_ram()
> It seemed unnecessary to add an extra interface qemu_ram_un/set_migratable().

Why not set the flags in vmstate_{register,unregister}_ram() ?

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Cédric Le Goater 7 years, 9 months ago

On 04/20/2018 10:47 AM, Peter Maydell wrote:
> On 20 April 2018 at 07:59, Cédric Le Goater <clg@kaod.org> wrote:
>> Hello David,
>>
>> On 04/19/2018 06:58 PM, Dr. David Alan Gilbert wrote:
>>> * Cédric Le Goater (clg@kaod.org) wrote:
>>>> @@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
>>>>          }
>>>>      }
>>>>      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
>>>> +    new_block->flags |= RAM_MIGRATABLE;
>>>>
>>>>      rcu_read_lock();
>>>>      RAMBLOCK_FOREACH(block) {
>>>> @@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
>>>>       */
>>>>      if (block) {
>>>>          memset(block->idstr, 0, sizeof(block->idstr));
>>>> +        block->flags &= ~RAM_MIGRATABLE;
>>>>      }
>>>>  }
>>>
>>> Why in qemu_ram_set_idstr and qemu_ram_(un)set_idstr ?  It seems an
>>> odd place to put them.
>>
>> The only place where this routines are called is from vmstate_un/register_ram()
>> It seemed unnecessary to add an extra interface qemu_ram_un/set_migratable().
> 
> Why not set the flags in vmstate_{register,unregister}_ram() ?

because the RAM flags are not exposed outside exec.c so it would 
require some qemu_ram_un/set_migratable() helpers like this is done
for RAM_UF_ZEROPAGE. That is possible of course. 

C.

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Dr. David Alan Gilbert 7 years, 9 months ago

This thread seems to have stalled; we've got the list of potential
migration breaks that Peter found and only some minor comments on the
actual patch.

I'd like to get it going again sicne as well as Cédric, and Peter's
cases, there's Lai Jiangshan and now Eric Wheeler was asking for
something similar.

Anyone understand the way forward? Do we have to go and fix the
odd board failures first?

Dave

* Cédric Le Goater (clg@kaod.org) wrote:
> Hello David,
> 
> On 04/19/2018 06:58 PM, Dr. David Alan Gilbert wrote:
> > * Cédric Le Goater (clg@kaod.org) wrote:
> >> On the POWER9 processor, the XIVE interrupt controller can control
> >> interrupt sources using MMIO to trigger events, to EOI or to turn off
> >> the sources. Priority management and interrupt acknowledgment is also
> >> controlled by MMIO in the presenter sub-engine.
> >>
> >> These MMIO regions are exposed to guests in QEMU with a set of 'ram
> >> device' memory mappings, similarly to VFIO, and the VMAs are populated
> >> dynamically with the appropriate pages using a fault handler.
> >>
> >> But, these regions are an issue for migration. We need to discard the
> >> associated RAMBlocks from the RAM state on the source VM and let the
> >> destination VM rebuild the memory mappings on the new host in the
> >> post_load() operation just before resuming the system.
> >>
> >> To achieve this goal, the following introduces a new RAMBlock flag
> >> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> >> vmstate_unregister_ram() routines. This flag is then used by the
> >> migration to identify RAMBlocks to discard on the source. Some checks
> >> are also performed on the destination to make sure nothing invalid was
> >> sent.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > 
> > Hi Cédric,
> >   Yes, this is looking nicer; the macro makes the changes quite small.
> > A couple of questions:
> > 
> >> ---
> >>
> >>  Changes since v1:
> >>
> >>  - introduced a new RAMBlock flag RAM_MIGRATABLE
> >>  - fixed RAMBLOCK_FOREACH_MIGRATABLE() macro to handle code that
> >>    omitted {}. 
> >>
> >>  exec.c                    | 10 ++++++++++
> >>  include/exec/cpu-common.h |  1 +
> >>  migration/ram.c           | 42 ++++++++++++++++++++++++++++++++----------
> >>  3 files changed, 43 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/exec.c b/exec.c
> >> index 02b1efebb7c3..e9432c06294e 100644
> >> --- a/exec.c
> >> +++ b/exec.c
> >> @@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
> >>   * (Set during postcopy)
> >>   */
> >>  #define RAM_UF_ZEROPAGE (1 << 3)
> >> +
> >> +/* RAM can be migrated */
> >> +#define RAM_MIGRATABLE (1 << 4)
> >>  #endif
> >>  
> >>  #ifdef TARGET_PAGE_BITS_VARY
> >> @@ -1807,6 +1810,11 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
> >>      rb->flags |= RAM_UF_ZEROPAGE;
> >>  }
> >>  
> >> +bool qemu_ram_is_migratable(RAMBlock *rb)
> >> +{
> >> +    return rb->flags & RAM_MIGRATABLE;
> >> +}
> >> +
> >>  /* Called with iothread lock held.  */
> >>  void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
> >>  {
> >> @@ -1823,6 +1831,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
> >>          }
> >>      }
> >>      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
> >> +    new_block->flags |= RAM_MIGRATABLE;
> >>  
> >>      rcu_read_lock();
> >>      RAMBLOCK_FOREACH(block) {
> >> @@ -1845,6 +1854,7 @@ void qemu_ram_unset_idstr(RAMBlock *block)
> >>       */
> >>      if (block) {
> >>          memset(block->idstr, 0, sizeof(block->idstr));
> >> +        block->flags &= ~RAM_MIGRATABLE;
> >>      }
> >>  }
> > 
> > Why in qemu_ram_set_idstr and qemu_ram_(un)set_idstr ?  It seems an
> > odd place to put them.
> 
> The only place where this routines are called is from vmstate_un/register_ram()
> It seemed unnecessary to add an extra interface qemu_ram_un/set_migratable().
> 
> May be should just rename qemu_ram_un/set_idstr() ?
> 
> > 
> >> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> >> index 24d335f95d45..96854519b514 100644
> >> --- a/include/exec/cpu-common.h
> >> +++ b/include/exec/cpu-common.h
> >> @@ -75,6 +75,7 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
> >>  bool qemu_ram_is_shared(RAMBlock *rb);
> >>  bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
> >>  void qemu_ram_set_uf_zeroable(RAMBlock *rb);
> >> +bool qemu_ram_is_migratable(RAMBlock *rb);
> >>  
> >>  size_t qemu_ram_pagesize(RAMBlock *block);
> >>  size_t qemu_ram_pagesize_largest(void);
> >> diff --git a/migration/ram.c b/migration/ram.c
> >> index 0e90efa09236..89c3accc4e26 100644
> >> --- a/migration/ram.c
> >> +++ b/migration/ram.c
> >> @@ -187,6 +187,11 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
> >>                        nr);
> >>  }
> >>  
> >> +/* Should be holding either ram_list.mutex, or the RCU lock. */
> >> +#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
> >> +    RAMBLOCK_FOREACH(block)                            \
> >> +        if (!qemu_ram_is_migratable(block)) {} else
> >> +
> >>  /*
> >>   * An outstanding page request, on the source, having been received
> >>   * and queued
> >> @@ -780,6 +785,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
> >>      unsigned long *bitmap = rb->bmap;
> >>      unsigned long next;
> >>  
> >> +    if (!qemu_ram_is_migratable(rb)) {
> >> +        return size;
> >> +    }
> >> +
> >>      if (rs->ram_bulk_stage && start > 0) {
> >>          next = start + 1;
> >>      } else {
> >> @@ -825,7 +834,7 @@ uint64_t ram_pagesize_summary(void)
> >>      RAMBlock *block;
> >>      uint64_t summary = 0;
> >>  
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          summary |= block->page_size;
> >>      }
> >>  
> >> @@ -849,7 +858,7 @@ static void migration_bitmap_sync(RAMState *rs)
> >>  
> >>      qemu_mutex_lock(&rs->bitmap_mutex);
> >>      rcu_read_lock();
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          migration_bitmap_sync_range(rs, block, 0, block->used_length);
> >>      }
> >>      rcu_read_unlock();
> >> @@ -1499,6 +1508,10 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
> >>      size_t pagesize_bits =
> >>          qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
> >>  
> >> +    if (!qemu_ram_is_migratable(pss->block)) {
> >> +        return 0;
> >> +    }
> >> +
> > 
> > We might want to print a diagnostic there - it shouldn't happen right?
> 
> yes it should not. it should even be fatal. 
> 
> >>      do {
> >>          tmppages = ram_save_target_page(rs, pss, last_stage);
> >>          if (tmppages < 0) {
> >> @@ -1587,7 +1600,7 @@ uint64_t ram_bytes_total(void)
> >>      uint64_t total = 0;
> >>  
> >>      rcu_read_lock();
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          total += block->used_length;
> >>      }
> >>      rcu_read_unlock();
> >> @@ -1642,7 +1655,7 @@ static void ram_save_cleanup(void *opaque)
> >>       */
> >>      memory_global_dirty_log_stop();
> >>  
> >> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          g_free(block->bmap);
> >>          block->bmap = NULL;
> >>          g_free(block->unsentmap);
> >> @@ -1705,7 +1718,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
> >>  {
> >>      struct RAMBlock *block;
> >>  
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          unsigned long *bitmap = block->bmap;
> >>          unsigned long range = block->used_length >> TARGET_PAGE_BITS;
> >>          unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
> >> @@ -1783,7 +1796,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
> >>      struct RAMBlock *block;
> >>      int ret;
> >>  
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          PostcopyDiscardState *pds =
> >>              postcopy_discard_send_init(ms, block->idstr);
> >>  
> >> @@ -1991,7 +2004,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
> >>      rs->last_sent_block = NULL;
> >>      rs->last_page = 0;
> >>  
> >> -    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
> >>          unsigned long *bitmap = block->bmap;
> >>          unsigned long *unsentmap = block->unsentmap;
> >> @@ -2150,7 +2163,7 @@ static void ram_list_init_bitmaps(void)
> >>  
> >>      /* Skip setting bitmap if there is no RAM */
> >>      if (ram_bytes_total()) {
> >> -        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> >> +        RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>              pages = block->max_length >> TARGET_PAGE_BITS;
> >>              block->bmap = bitmap_new(pages);
> >>              bitmap_set(block->bmap, 0, pages);
> >> @@ -2226,7 +2239,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >>  
> >>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> >>  
> >> -    RAMBLOCK_FOREACH(block) {
> >> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>          qemu_put_byte(f, strlen(block->idstr));
> >>          qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> >>          qemu_put_be64(f, block->used_length);
> > 
> > Good, that's all quite neat;  we've just got to watch out we don't
> > accidentally add any RAMBLOCK_FOREACH's back.
> 
> Thank,
> 
> C.
> 
> >> @@ -2471,6 +2484,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
> >>          return NULL;
> >>      }
> >>  
> >> +    if (!qemu_ram_is_migratable(block)) {
> >> +        error_report("block %s should not be migrated !", id);
> >> +        return NULL;
> >> +    }
> >> +
> >>      return block;
> >>  }
> >>  
> >> @@ -2921,7 +2939,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >>                  length = qemu_get_be64(f);
> >>  
> >>                  block = qemu_ram_block_by_name(id);
> >> -                if (block) {
> >> +                if (block && !qemu_ram_is_migratable(block)) {
> >> +                    error_report("block %s should not be migrated !", id);
> >> +                    ret = -EINVAL;
> >> +
> >> +                } else if (block) {
> >>                      if (length != block->used_length) {
> >>                          Error *local_err = NULL;
> >>  
> >> -- 
> >> 2.13.6
> > 
> > Dave
> > 
> >>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 9 months ago

On 9 May 2018 at 12:08, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
> This thread seems to have stalled; we've got the list of potential
> migration breaks that Peter found and only some minor comments on the
> actual patch.
>
> I'd like to get it going again sicne as well as Cédric, and Peter's
> cases, there's Lai Jiangshan and now Eric Wheeler was asking for
> something similar.
>
> Anyone understand the way forward? Do we have to go and fix the
> odd board failures first?

aspeed and highbank I already changed in master. That leaves

hw/mips/boston.c:    memory_region_init_rom_nomigrate(flash, NULL,
hw/mips/mips_malta.c:    memory_region_init_ram_nomigrate(bios_copy,
NULL, "bios.1fc", BIOS_SIZE,
hw/net/dp8393x.c:    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
  [device only used on mips jazz board]
hw/pci-host/xilinx-pcie.c:    memory_region_init_ram_nomigrate(&s->io,
OBJECT(s), "io", 16, NULL);
  [device only used on mips boston board]

which affect mips boston, malta and jazz boards.

I don't think we need to fix those first, but:

 * we should note in the commit message for this patch that
   it is a de-facto migration break for those boards
 * we should fix those devices/boards in this release cycle,
   since we've broken migration compat anyway
 * we should check with the MIPS maintainers that they're ok
   with a cross-version migration compat break for those boards
   (I've cc'd them)

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Paolo Bonzini 7 years, 9 months ago

On 09/05/2018 13:33, Peter Maydell wrote:
> 
> I don't think we need to fix those first, but:
> 
>  * we should note in the commit message for this patch that
>    it is a de-facto migration break for those boards
>  * we should fix those devices/boards in this release cycle,
>    since we've broken migration compat anyway
>  * we should check with the MIPS maintainers that they're ok
>    with a cross-version migration compat break for those boards
>    (I've cc'd them)

Isn't that implicitly the case for non-versioned machine types?...

Paolo

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Dr. David Alan Gilbert 7 years, 9 months ago

* Peter Maydell (peter.maydell@linaro.org) wrote:
> On 9 May 2018 at 12:08, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
> > This thread seems to have stalled; we've got the list of potential
> > migration breaks that Peter found and only some minor comments on the
> > actual patch.
> >
> > I'd like to get it going again sicne as well as Cédric, and Peter's
> > cases, there's Lai Jiangshan and now Eric Wheeler was asking for
> > something similar.
> >
> > Anyone understand the way forward? Do we have to go and fix the
> > odd board failures first?
> 
> aspeed and highbank I already changed in master. That leaves
> 
> hw/mips/boston.c:    memory_region_init_rom_nomigrate(flash, NULL,
> hw/mips/mips_malta.c:    memory_region_init_ram_nomigrate(bios_copy,
> NULL, "bios.1fc", BIOS_SIZE,
> hw/net/dp8393x.c:    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
>   [device only used on mips jazz board]
> hw/pci-host/xilinx-pcie.c:    memory_region_init_ram_nomigrate(&s->io,
> OBJECT(s), "io", 16, NULL);
>   [device only used on mips boston board]
> 
> which affect mips boston, malta and jazz boards.
> 
> I don't think we need to fix those first, but:
> 
>  * we should note in the commit message for this patch that
>    it is a de-facto migration break for those boards
>  * we should fix those devices/boards in this release cycle,
>    since we've broken migration compat anyway
>  * we should check with the MIPS maintainers that they're ok
>    with a cross-version migration compat break for those boards
>    (I've cc'd them)

OK, in that case, as long as they're OK, then I think
we should go for it.

Cédric: Were you going to post a version that didn't
do it in set_idstr?

Dave

> thanks
> -- PMM
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Cédric Le Goater 7 years, 9 months ago

Hello David,

On 05/10/2018 09:15 PM, Dr. David Alan Gilbert wrote:
> * Peter Maydell (peter.maydell@linaro.org) wrote:
>> On 9 May 2018 at 12:08, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
>>> This thread seems to have stalled; we've got the list of potential
>>> migration breaks that Peter found and only some minor comments on the
>>> actual patch.
>>>
>>> I'd like to get it going again sicne as well as Cédric, and Peter's
>>> cases, there's Lai Jiangshan and now Eric Wheeler was asking for
>>> something similar.
>>>
>>> Anyone understand the way forward? Do we have to go and fix the
>>> odd board failures first?
>>
>> aspeed and highbank I already changed in master. That leaves
>>
>> hw/mips/boston.c:    memory_region_init_rom_nomigrate(flash, NULL,
>> hw/mips/mips_malta.c:    memory_region_init_ram_nomigrate(bios_copy,
>> NULL, "bios.1fc", BIOS_SIZE,
>> hw/net/dp8393x.c:    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
>>   [device only used on mips jazz board]
>> hw/pci-host/xilinx-pcie.c:    memory_region_init_ram_nomigrate(&s->io,
>> OBJECT(s), "io", 16, NULL);
>>   [device only used on mips boston board]
>>
>> which affect mips boston, malta and jazz boards.
>>
>> I don't think we need to fix those first, but:
>>
>>  * we should note in the commit message for this patch that
>>    it is a de-facto migration break for those boards
>>  * we should fix those devices/boards in this release cycle,
>>    since we've broken migration compat anyway
>>  * we should check with the MIPS maintainers that they're ok
>>    with a cross-version migration compat break for those boards
>>    (I've cc'd them)
> 
> OK, in that case, as long as they're OK, then I think
> we should go for it.
> 
> Cédric: Were you going to post a version that didn't
> do it in set_idstr?

I have a v3, ready to send, adding an error_report() in ram_save_host_page() 
for consistency with the rest of the code. It really should not happen as 
the 'found' bool in ram_find_and_save_block() is protecting the call.

As for setting RAM_MIGRATABLE in vmstate_un/register_ram() instead of 
qemu_ram_un/set_idstr(), yes, I can add this change in v3 also. 
This is not a problem.

C.

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 9 months ago

On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
> On the POWER9 processor, the XIVE interrupt controller can control
> interrupt sources using MMIO to trigger events, to EOI or to turn off
> the sources. Priority management and interrupt acknowledgment is also
> controlled by MMIO in the presenter sub-engine.
>
> These MMIO regions are exposed to guests in QEMU with a set of 'ram
> device' memory mappings, similarly to VFIO, and the VMAs are populated
> dynamically with the appropriate pages using a fault handler.
>
> But, these regions are an issue for migration. We need to discard the
> associated RAMBlocks from the RAM state on the source VM and let the
> destination VM rebuild the memory mappings on the new host in the
> post_load() operation just before resuming the system.
>
> To achieve this goal, the following introduces a new RAMBlock flag
> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> vmstate_unregister_ram() routines. This flag is then used by the
> migration to identify RAMBlocks to discard on the source. Some checks
> are also performed on the destination to make sure nothing invalid was
> sent.

I think in theory this should Just Work for allowing us to
enable migration when the xilinx-spips device is using its
execute-from-mmio feature (we would just need to remove the
code that puts in the migration blocker).

Xilinx folks -- do you have a test image for a board that uses
xilinx-spips and exercises the execute-from-mmio code, that
we can use to test migration/vmsave with?

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Edgar E. Iglesias 7 years, 9 months ago

On Thu, Apr 19, 2018 at 06:32:07PM +0100, Peter Maydell wrote:
> On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
> > On the POWER9 processor, the XIVE interrupt controller can control
> > interrupt sources using MMIO to trigger events, to EOI or to turn off
> > the sources. Priority management and interrupt acknowledgment is also
> > controlled by MMIO in the presenter sub-engine.
> >
> > These MMIO regions are exposed to guests in QEMU with a set of 'ram
> > device' memory mappings, similarly to VFIO, and the VMAs are populated
> > dynamically with the appropriate pages using a fault handler.
> >
> > But, these regions are an issue for migration. We need to discard the
> > associated RAMBlocks from the RAM state on the source VM and let the
> > destination VM rebuild the memory mappings on the new host in the
> > post_load() operation just before resuming the system.
> >
> > To achieve this goal, the following introduces a new RAMBlock flag
> > RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> > vmstate_unregister_ram() routines. This flag is then used by the
> > migration to identify RAMBlocks to discard on the source. Some checks
> > are also performed on the destination to make sure nothing invalid was
> > sent.
> 
> I think in theory this should Just Work for allowing us to
> enable migration when the xilinx-spips device is using its
> execute-from-mmio feature (we would just need to remove the
> code that puts in the migration blocker).
> 
> Xilinx folks -- do you have a test image for a board that uses
> xilinx-spips and exercises the execute-from-mmio code, that
> we can use to test migration/vmsave with?

CC:ing Fred

I got an image from Fred a while back ago, I probably still have it
somewhere but I've totally forgotten the name for it.

Fred, do you have the image or roughly remember the filename?

Cheers,
Edgar

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by KONRAD Frederic 7 years, 9 months ago


On 04/19/2018 07:45 PM, Edgar E. Iglesias wrote:
> On Thu, Apr 19, 2018 at 06:32:07PM +0100, Peter Maydell wrote:
>> On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
>>> On the POWER9 processor, the XIVE interrupt controller can control
>>> interrupt sources using MMIO to trigger events, to EOI or to turn off
>>> the sources. Priority management and interrupt acknowledgment is also
>>> controlled by MMIO in the presenter sub-engine.
>>>
>>> These MMIO regions are exposed to guests in QEMU with a set of 'ram
>>> device' memory mappings, similarly to VFIO, and the VMAs are populated
>>> dynamically with the appropriate pages using a fault handler.
>>>
>>> But, these regions are an issue for migration. We need to discard the
>>> associated RAMBlocks from the RAM state on the source VM and let the
>>> destination VM rebuild the memory mappings on the new host in the
>>> post_load() operation just before resuming the system.
>>>
>>> To achieve this goal, the following introduces a new RAMBlock flag
>>> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
>>> vmstate_unregister_ram() routines. This flag is then used by the
>>> migration to identify RAMBlocks to discard on the source. Some checks
>>> are also performed on the destination to make sure nothing invalid was
>>> sent.
>>
>> I think in theory this should Just Work for allowing us to
>> enable migration when the xilinx-spips device is using its
>> execute-from-mmio feature (we would just need to remove the
>> code that puts in the migration blocker).
>>
>> Xilinx folks -- do you have a test image for a board that uses
>> xilinx-spips and exercises the execute-from-mmio code, that
>> we can use to test migration/vmsave with?
> 
> CC:ing Fred
> 
> I got an image from Fred a while back ago, I probably still have it
> somewhere but I've totally forgotten the name for it.
> 
> Fred, do you have the image or roughly remember the filename?
> 
> Cheers,
> Edgar
> 

Edgar,

That was a while ago! I don't recall the name sorry for that!

In the worse case maybe building and putting an u-boot binary for
zynq in the SPIs will do the job?

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Edgar E. Iglesias 7 years, 9 months ago

On Fri, Apr 20, 2018 at 10:14:15AM +0200, KONRAD Frederic wrote:
> 
> 
> On 04/19/2018 07:45 PM, Edgar E. Iglesias wrote:
> > On Thu, Apr 19, 2018 at 06:32:07PM +0100, Peter Maydell wrote:
> > > On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
> > > > On the POWER9 processor, the XIVE interrupt controller can control
> > > > interrupt sources using MMIO to trigger events, to EOI or to turn off
> > > > the sources. Priority management and interrupt acknowledgment is also
> > > > controlled by MMIO in the presenter sub-engine.
> > > > 
> > > > These MMIO regions are exposed to guests in QEMU with a set of 'ram
> > > > device' memory mappings, similarly to VFIO, and the VMAs are populated
> > > > dynamically with the appropriate pages using a fault handler.
> > > > 
> > > > But, these regions are an issue for migration. We need to discard the
> > > > associated RAMBlocks from the RAM state on the source VM and let the
> > > > destination VM rebuild the memory mappings on the new host in the
> > > > post_load() operation just before resuming the system.
> > > > 
> > > > To achieve this goal, the following introduces a new RAMBlock flag
> > > > RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> > > > vmstate_unregister_ram() routines. This flag is then used by the
> > > > migration to identify RAMBlocks to discard on the source. Some checks
> > > > are also performed on the destination to make sure nothing invalid was
> > > > sent.
> > > 
> > > I think in theory this should Just Work for allowing us to
> > > enable migration when the xilinx-spips device is using its
> > > execute-from-mmio feature (we would just need to remove the
> > > code that puts in the migration blocker).
> > > 
> > > Xilinx folks -- do you have a test image for a board that uses
> > > xilinx-spips and exercises the execute-from-mmio code, that
> > > we can use to test migration/vmsave with?
> > 
> > CC:ing Fred
> > 
> > I got an image from Fred a while back ago, I probably still have it
> > somewhere but I've totally forgotten the name for it.
> > 
> > Fred, do you have the image or roughly remember the filename?
> > 
> > Cheers,
> > Edgar
> > 
> 
> Edgar,
> 
> That was a while ago! I don't recall the name sorry for that!
> 
> In the worse case maybe building and putting an u-boot binary for
> zynq in the SPIs will do the job?
> 

Hi again,

I've found the test-case and it still runs OK. Who should I send the
tarball to? 

Cédric Le Goater <clg@kaod.org>
Peter Maydell <peter.maydell@linaro.org>

Anyone else who needs it?
Just let me know and I'll email it outside of the mailing list.

Cheers,
Edgar

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 9 months ago

On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
> On the POWER9 processor, the XIVE interrupt controller can control
> interrupt sources using MMIO to trigger events, to EOI or to turn off
> the sources. Priority management and interrupt acknowledgment is also
> controlled by MMIO in the presenter sub-engine.
>
> These MMIO regions are exposed to guests in QEMU with a set of 'ram
> device' memory mappings, similarly to VFIO, and the VMAs are populated
> dynamically with the appropriate pages using a fault handler.
>
> But, these regions are an issue for migration. We need to discard the
> associated RAMBlocks from the RAM state on the source VM and let the
> destination VM rebuild the memory mappings on the new host in the
> post_load() operation just before resuming the system.
>
> To achieve this goal, the following introduces a new RAMBlock flag
> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
> vmstate_unregister_ram() routines. This flag is then used by the
> migration to identify RAMBlocks to discard on the source. Some checks
> are also performed on the destination to make sure nothing invalid was
> sent.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

So, this is conceptually the right thing, but it is a migration
compat break for any board which has a ram block which it doesn't
call vmstate_register_ram() for. These are mostly going to be
places that call one of the _nomigrate() functions to create an
MMIO region, and then don't register the ram by hand either.
Those are bugs, in my view, but we should consider fixing
them while we're here.

Here's the results of a grep and eyeballing of the results:

These places are OK, because they register the memory region
by hand one way or another:

numa stuff, registers it when the backend is used:
backends/hostmem-ram.c:
memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend),
path,

registers it globally by hand:
numa.c:            memory_region_init_ram_nomigrate(mr, owner, name,
ram_size, &error_fatal);
numa.c:        memory_region_init_ram_nomigrate(mr, owner, name,
ram_size, &error_fatal);
hw/arm/aspeed_soc.c:    memory_region_init_ram_nomigrate(&s->sram,
OBJECT(dev), "aspeed.sram",
hw/block/onenand.c:    memory_region_init_ram_nomigrate(&s->ram,
OBJECT(s), "onenand.ram",
hw/display/cg3.c:    memory_region_init_ram_nomigrate(&s->rom, obj,
"cg3.prom", FCODE_MAX_ROM_SIZE,
hw/display/tcx.c:    memory_region_init_ram_nomigrate(&s->rom, obj,
"tcx.prom", FCODE_MAX_ROM_SIZE,
hw/display/tcx.c:    memory_region_init_ram_nomigrate(&s->vram_mem,
OBJECT(s), "tcx.vram",
hw/display/vga.c:    memory_region_init_ram_nomigrate(&s->vram, obj,
"vga.vram", s->vram_size,
hw/input/milkymist-softusb.c:
memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s),
"milkymist-softusb.pmem",
hw/input/milkymist-softusb.c:
memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s),
"milkymist-softusb.dmem",
hw/net/milkymist-minimac2.c:
memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev),
"milkymist-minimac2.buffers",
hw/pci-host/prep.c:    memory_region_init_ram_nomigrate(&s->bios,
OBJECT(s), "bios", BIOS_SIZE,
hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->mem, obj,
hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->mem, obj,
"sun4m.afx", 4, &error_fatal);
hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->prom, obj,
"sun4m.prom", PROM_SIZE_MAX,
hw/sparc64/sun4u.c:    memory_region_init_ram_nomigrate(&s->prom, obj,
"sun4u.prom", PROM_SIZE_MAX,
hw/sparc64/sun4u.c:    memory_region_init_ram_nomigrate(&d->ram,
OBJECT(d), "sun4u.ram", d->size,
hw/xtensa/xtfpga.c:    memory_region_init_ram_nomigrate(ram,
OBJECT(s), "open_eth.ram", 16384,

registers it non-globally by hand:
hw/xen/xen_pt_load_rom.c:
memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size,
&error_abort);

These callsites are not OK, because they don't register the ram:

hw/arm/aspeed.c:        memory_region_init_rom_nomigrate(boot_rom,
OBJECT(bmc), "aspeed.boot_rom",
hw/arm/highbank.c:    memory_region_init_ram_nomigrate(sysram, NULL,
"highbank.sysram", 0x8000,
hw/mips/boston.c:    memory_region_init_rom_nomigrate(flash, NULL,
hw/mips/mips_malta.c:    memory_region_init_ram_nomigrate(bios_copy,
NULL, "bios.1fc", BIOS_SIZE,
hw/net/dp8393x.c:    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
  [device only used on mips jazz board]
hw/pci-host/xilinx-pcie.c:    memory_region_init_ram_nomigrate(&s->io,
OBJECT(s), "io", 16, NULL);
  [device only used on mips boston board]

Notes:
 * any device that registers a ramblock globally is a bit dodgy, because
it means you can't have more than one of it (the ramblock names would
clash). We should fix those devices for the cases where we're willing
to take the migration compat break.
 * the xen_pt_load_rom.c case could be cleaned up to use memory_region_init_ram
   rather than the _nomigrate function without breaking compat

NB: I haven't actually tested whether this is a migration compat break,
I merely believe it to be so :-)

I think we can take the compat break on aspeed, highbank, malta and jazz.
Not sure about boston, but I guess so given the board doesn't have
per-QEMU-version machine models.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Cédric Le Goater 7 years, 9 months ago

On 04/20/2018 02:09 PM, Peter Maydell wrote:
> On 13 April 2018 at 08:52, Cédric Le Goater <clg@kaod.org> wrote:
>> On the POWER9 processor, the XIVE interrupt controller can control
>> interrupt sources using MMIO to trigger events, to EOI or to turn off
>> the sources. Priority management and interrupt acknowledgment is also
>> controlled by MMIO in the presenter sub-engine.
>>
>> These MMIO regions are exposed to guests in QEMU with a set of 'ram
>> device' memory mappings, similarly to VFIO, and the VMAs are populated
>> dynamically with the appropriate pages using a fault handler.
>>
>> But, these regions are an issue for migration. We need to discard the
>> associated RAMBlocks from the RAM state on the source VM and let the
>> destination VM rebuild the memory mappings on the new host in the
>> post_load() operation just before resuming the system.
>>
>> To achieve this goal, the following introduces a new RAMBlock flag
>> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
>> vmstate_unregister_ram() routines. This flag is then used by the
>> migration to identify RAMBlocks to discard on the source. Some checks
>> are also performed on the destination to make sure nothing invalid was
>> sent.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> So, this is conceptually the right thing, but it is a migration
> compat break for any board which has a ram block which it doesn't
> call vmstate_register_ram() for. These are mostly going to be
> places that call one of the _nomigrate() functions to create an
> MMIO region, and then don't register the ram by hand either.> Those are bugs, in my view, but we should consider fixing
> them while we're here.
>
> Here's the results of a grep and eyeballing of the results:
> 
> These places are OK, because they register the memory region
> by hand one way or another:
> 
> numa stuff, registers it when the backend is used:
> backends/hostmem-ram.c:
> memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend),
> path,
> 
> registers it globally by hand:
> numa.c:            memory_region_init_ram_nomigrate(mr, owner, name,
> ram_size, &error_fatal);
> numa.c:        memory_region_init_ram_nomigrate(mr, owner, name,
> ram_size, &error_fatal);
> hw/arm/aspeed_soc.c:    memory_region_init_ram_nomigrate(&s->sram,
> OBJECT(dev), "aspeed.sram",
> hw/block/onenand.c:    memory_region_init_ram_nomigrate(&s->ram,
> OBJECT(s), "onenand.ram",
> hw/display/cg3.c:    memory_region_init_ram_nomigrate(&s->rom, obj,
> "cg3.prom", FCODE_MAX_ROM_SIZE,
> hw/display/tcx.c:    memory_region_init_ram_nomigrate(&s->rom, obj,
> "tcx.prom", FCODE_MAX_ROM_SIZE,
> hw/display/tcx.c:    memory_region_init_ram_nomigrate(&s->vram_mem,
> OBJECT(s), "tcx.vram",
> hw/display/vga.c:    memory_region_init_ram_nomigrate(&s->vram, obj,
> "vga.vram", s->vram_size,
> hw/input/milkymist-softusb.c:
> memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s),
> "milkymist-softusb.pmem",
> hw/input/milkymist-softusb.c:
> memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s),
> "milkymist-softusb.dmem",
> hw/net/milkymist-minimac2.c:
> memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev),
> "milkymist-minimac2.buffers",
> hw/pci-host/prep.c:    memory_region_init_ram_nomigrate(&s->bios,
> OBJECT(s), "bios", BIOS_SIZE,
> hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->mem, obj,
> hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->mem, obj,
> "sun4m.afx", 4, &error_fatal);
> hw/sparc/sun4m.c:    memory_region_init_ram_nomigrate(&s->prom, obj,
> "sun4m.prom", PROM_SIZE_MAX,
> hw/sparc64/sun4u.c:    memory_region_init_ram_nomigrate(&s->prom, obj,
> "sun4u.prom", PROM_SIZE_MAX,
> hw/sparc64/sun4u.c:    memory_region_init_ram_nomigrate(&d->ram,
> OBJECT(d), "sun4u.ram", d->size,
> hw/xtensa/xtfpga.c:    memory_region_init_ram_nomigrate(ram,
> OBJECT(s), "open_eth.ram", 16384,
> 
> registers it non-globally by hand:
> hw/xen/xen_pt_load_rom.c:
> memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size,
> &error_abort);
> 
> These callsites are not OK, because they don't register the ram:
> 
> hw/arm/aspeed.c:        memory_region_init_rom_nomigrate(boot_rom,
> OBJECT(bmc), "aspeed.boot_rom",
> hw/arm/highbank.c:    memory_region_init_ram_nomigrate(sysram, NULL,
> "highbank.sysram", 0x8000,
> hw/mips/boston.c:    memory_region_init_rom_nomigrate(flash, NULL,
> hw/mips/mips_malta.c:    memory_region_init_ram_nomigrate(bios_copy,
> NULL, "bios.1fc", BIOS_SIZE,
> hw/net/dp8393x.c:    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
>   [device only used on mips jazz board]
> hw/pci-host/xilinx-pcie.c:    memory_region_init_ram_nomigrate(&s->io,
> OBJECT(s), "io", 16, NULL);
>   [device only used on mips boston board]
> 
> Notes:
>  * any device that registers a ramblock globally is a bit dodgy, because
> it means you can't have more than one of it (the ramblock names would
> clash). We should fix those devices for the cases where we're willing
> to take the migration compat break.

There are quite a few models calling memory_region_init_ram() with 
a NULL owner, see below. Should we fix all these ? 

Thanks,

C. 

>  * the xen_pt_load_rom.c case could be cleaned up to use memory_region_init_ram
>    rather than the _nomigrate function without breaking compat
> 
> NB: I haven't actually tested whether this is a migration compat break,
> I merely believe it to be so :-)
> 
> I think we can take the compat break on aspeed, highbank, malta and jazz.
> Not sure about boston, but I guess so given the board doesn't have
> per-QEMU-version machine models.
> 
> thanks
> -- PMM
> 


hw/arm/msf2-soc.c:    memory_region_init_ram(sram, NULL, "MSF2.eSRAM", s->esram_size,
hw/arm/vexpress.c:    memory_region_init_ram(sram, NULL, "vexpress.a15sram", 0x10000,
hw/arm/vexpress.c:    memory_region_init_ram(sram, NULL, "vexpress.sram", sram_size,
hw/arm/vexpress.c:    memory_region_init_ram(vram, NULL, "vexpress.vram", vram_size,
hw/arm/palm.c:    memory_region_init_ram(flash, NULL, "palmte.flash", flash_size,
hw/arm/omap1.c:    memory_region_init_ram(&s->imif_ram, NULL, "omap1.sram", s->sram_size,
hw/arm/omap2.c:    memory_region_init_ram(&s->sram, NULL, "omap2.sram", s->sram_size,
hw/arm/fsl-imx6.c:    memory_region_init_ram(&s->ocram, NULL, "imx6.ocram", FSL_IMX6_OCRAM_SIZE,
hw/arm/mps2-tz.c:    memory_region_init_ram(mr, NULL, name, size, &error_fatal);
hw/arm/mainstone.c:    memory_region_init_ram(rom, NULL, "mainstone.rom", MAINSTONE_ROM,
hw/arm/tosa.c:    memory_region_init_ram(rom, NULL, "tosa.rom", TOSA_ROM, &error_fatal);
hw/arm/spitz.c:    memory_region_init_ram(rom, NULL, "spitz.rom", SPITZ_ROM, &error_fatal);
hw/arm/exynos4210.c:    memory_region_init_ram(&s->irom_mem, NULL, "exynos4210.irom",
hw/arm/exynos4210.c:    memory_region_init_ram(&s->iram_mem, NULL, "exynos4210.iram",
hw/arm/omap_sx1.c:    memory_region_init_ram(flash, NULL, "omap_sx1.flash0-0", flash_size,
hw/arm/omap_sx1.c:        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0",
hw/arm/stm32f205_soc.c:    memory_region_init_ram(flash, NULL, "STM32F205.flash", FLASH_SIZE,
hw/arm/stm32f205_soc.c:    memory_region_init_ram(sram, NULL, "STM32F205.sram", SRAM_SIZE,
hw/arm/iotkit.c:    memory_region_init_ram(&s->sram0, NULL, "iotkit.sram0", 0x00008000, &err);
hw/arm/fsl-imx31.c:    memory_region_init_ram(&s->iram, NULL, "imx31.iram", FSL_IMX31_IRAM_SIZE,
hw/arm/xilinx_zynq.c:    memory_region_init_ram(ocm_ram, NULL, "zynq.ocm_ram", 256 << 10,
hw/arm/xlnx-zynqmp.c:        memory_region_init_ram(&s->ocm_ram[i], NULL, ocm_name,
hw/arm/musicpal.c:    memory_region_init_ram(sram, NULL, "musicpal.sram", MP_SRAM_SIZE,
hw/arm/virt.c:    memory_region_init_ram(secram, NULL, "virt.secure-ram", size,
hw/arm/exynos4_boards.c:        memory_region_init_ram(&s->dram1_mem, NULL, "exynos4210.dram1",
hw/arm/exynos4_boards.c:    memory_region_init_ram(&s->dram0_mem, NULL, "exynos4210.dram0", mem_size,
hw/arm/stellaris.c:    memory_region_init_ram(flash, NULL, "stellaris.flash", flash_size,
hw/arm/stellaris.c:    memory_region_init_ram(sram, NULL, "stellaris.sram", sram_size,
hw/arm/pxa2xx.c:    memory_region_init_ram(&s->sdram, NULL, "pxa270.sdram", sdram_size,
hw/arm/pxa2xx.c:    memory_region_init_ram(&s->internal, NULL, "pxa270.internal", 0x40000,
hw/arm/pxa2xx.c:    memory_region_init_ram(&s->sdram, NULL, "pxa255.sdram", sdram_size,
hw/arm/pxa2xx.c:    memory_region_init_ram(&s->internal, NULL, "pxa255.internal",
hw/arm/fsl-imx25.c:    memory_region_init_ram(&s->iram, NULL, "imx25.iram", FSL_IMX25_IRAM_SIZE,
hw/arm/mps2.c:    memory_region_init_ram(mr, NULL, name, size, &error_fatal);
hw/arm/msf2-som.c:    memory_region_init_ram(ddr, NULL, "ddr-ram", DDR_SIZE,
hw/arm/realview.c:        memory_region_init_ram(ram_lo, NULL, "realview.lowmem", low_ram_size,
hw/arm/realview.c:    memory_region_init_ram(ram_hi, NULL, "realview.highmem", ram_size,
hw/arm/realview.c:    memory_region_init_ram(ram_hack, NULL, "realview.hack", 0x1000,
hw/tricore/tricore_testboard.c:    memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram",
hw/tricore/tricore_testboard.c:    memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram",
hw/tricore/tricore_testboard.c:    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48 * 1024,
hw/tricore/tricore_testboard.c:    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48 * 1024,
hw/tricore/tricore_testboard.c:    memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram",
hw/tricore/tricore_testboard.c:    memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram",
hw/nios2/10m50_devboard.c:    memory_region_init_ram(phys_tcm, NULL, "nios2.tcm", tcm_size,
hw/nios2/10m50_devboard.c:    memory_region_init_ram(phys_ram, NULL, "nios2.ram", ram_size,
hw/xtensa/xtensa_memory.c:        memory_region_init_ram(m, NULL, num_name->str,
hw/ppc/ppc405_boards.c:    memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
hw/ppc/ppc405_boards.c:        memory_region_init_ram(bios, NULL, "ef405ep.bios", BIOS_SIZE,
hw/ppc/ppc405_boards.c:        memory_region_init_ram(bios, NULL, "taihu_405ep.bios", BIOS_SIZE,
hw/ppc/mac_newworld.c:    memory_region_init_ram(bios, NULL, "ppc_core99.bios", BIOS_SIZE,
hw/ppc/mac_oldworld.c:    memory_region_init_ram(bios, NULL, "ppc_heathrow.bios", BIOS_SIZE,
hw/ppc/ppc440_uc.c:    memory_region_init_ram(&l2sram->bank[0], NULL, "ppc4xx.l2sram_bank0",
hw/ppc/ppc440_uc.c:    memory_region_init_ram(&l2sram->bank[1], NULL, "ppc4xx.l2sram_bank1",
hw/ppc/ppc440_uc.c:    memory_region_init_ram(&l2sram->bank[2], NULL, "ppc4xx.l2sram_bank2",
hw/ppc/ppc440_uc.c:    memory_region_init_ram(&l2sram->bank[3], NULL, "ppc4xx.l2sram_bank3",
hw/ppc/ppc405_uc.c:    memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4096,
hw/ppc/sam460ex.c:    memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 << 10,
hw/mips/mips_mipssim.c:    memory_region_init_ram(bios, NULL, "mips_mipssim.bios", BIOS_SIZE,
hw/mips/mips_fulong2e.c:    memory_region_init_ram(bios, NULL, "fulong2e.bios", bios_size,
hw/mips/mips_jazz.c:    memory_region_init_ram(bios, NULL, "mips_jazz.bios", MAGNUM_BIOS_SIZE,
hw/mips/mips_jazz.c:            memory_region_init_ram(rom_mr, NULL, "g364fb.rom", 0x80000,
hw/mips/mips_r4k.c:        memory_region_init_ram(bios, NULL, "mips_r4k.bios", BIOS_SIZE,
hw/sparc/leon3.c:    memory_region_init_ram(prom, NULL, "Leon3.bios", prom_size, &error_fatal);
hw/microblaze/petalogix_ml605_mmu.c:    memory_region_init_ram(phys_lmb_bram, NULL, "petalogix_ml605.lmb_bram",
hw/microblaze/petalogix_ml605_mmu.c:    memory_region_init_ram(phys_ram, NULL, "petalogix_ml605.ram", ram_size,
hw/microblaze/petalogix_s3adsp1800_mmu.c:    memory_region_init_ram(phys_lmb_bram, NULL,
hw/microblaze/petalogix_s3adsp1800_mmu.c:    memory_region_init_ram(phys_ram, NULL, "petalogix_s3adsp1800.ram",
hw/microblaze/xlnx-zynqmp-pmu.c:    memory_region_init_ram(pmu_ram, NULL, "xlnx-zynqmp-pmu.ram",
hw/cris/axis_dev88.c:    memory_region_init_ram(phys_intmem, NULL, "axisdev88.chipram",
hw/riscv/sifive_u.c:    memory_region_init_ram(main_mem, NULL, "riscv.sifive.u.ram",
hw/riscv/sifive_u.c:    memory_region_init_ram(boot_rom, NULL, "riscv.sifive.u.mrom",
hw/riscv/sifive_e.c:    memory_region_init_ram(mock_mmio, NULL, name, length, &error_fatal);
hw/riscv/sifive_e.c:    memory_region_init_ram(main_mem, NULL, "riscv.sifive.e.ram",
hw/riscv/sifive_e.c:    memory_region_init_ram(mask_rom, NULL, "riscv.sifive.e.mrom",
hw/riscv/sifive_e.c:    memory_region_init_ram(xip_mem, NULL, "riscv.sifive.e.xip",
hw/riscv/spike.c:    memory_region_init_ram(main_mem, NULL, "riscv.spike.ram",
hw/riscv/spike.c:    memory_region_init_ram(boot_rom, NULL, "riscv.spike.bootrom",
hw/riscv/spike.c:    memory_region_init_ram(main_mem, NULL, "riscv.spike.ram",
hw/riscv/spike.c:    memory_region_init_ram(boot_rom, NULL, "riscv.spike.bootrom",
hw/riscv/virt.c:    memory_region_init_ram(main_mem, NULL, "riscv_virt_board.ram",
hw/riscv/virt.c:    memory_region_init_ram(boot_rom, NULL, "riscv_virt_board.bootrom",
hw/sh4/shix.c:    memory_region_init_ram(rom, NULL, "shix.rom", 0x4000, &error_fatal);
hw/sh4/shix.c:    memory_region_init_ram(&sdram[0], NULL, "shix.sdram1", 0x01000000,
hw/sh4/shix.c:    memory_region_init_ram(&sdram[1], NULL, "shix.sdram2", 0x01000000,
hw/sh4/r2d.c:    memory_region_init_ram(sdram, NULL, "r2d.sdram", SDRAM_SIZE, &error_fatal);
hw/moxie/moxiesim.c:    memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal);
hw/moxie/moxiesim.c:    memory_region_init_ram(rom, NULL, "moxie.rom", FIRMWARE_SIZE, &error_fatal);
hw/m68k/mcf5208.c:    memory_region_init_ram(sram, NULL, "mcf5208.sram", 16384, &error_fatal);
hw/m68k/an5206.c:    memory_region_init_ram(sram, NULL, "an5206.sram", 512, &error_fatal);
hw/i386/pc_sysfw.c:    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
hw/i386/pc_sysfw.c:    memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
hw/i386/xen/xen-hvm.c:    memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
hw/i386/pc.c:    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
hw/display/tc6393xb.c:    memory_region_init_ram(&s->vram, NULL, "tc6393xb.vram", 0x100000,
hw/display/vmware_vga.c:    memory_region_init_ram(&s->fifo_ram, NULL, "vmsvga.fifo", s->fifo_size,
hw/display/cg3.c:    memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size,
hw/openrisc/openrisc_sim.c:    memory_region_init_ram(ram, NULL, "openrisc.ram", ram_size, &error_fatal);
hw/unicore32/puv3.c:    memory_region_init_ram(ram_memory, NULL, "puv3.ram", ram_size,

Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

Posted by Peter Maydell 7 years, 9 months ago

On 21 April 2018 at 09:52, Cédric Le Goater <clg@kaod.org> wrote:
> On 04/20/2018 02:09 PM, Peter Maydell wrote:
>> Notes:
>>  * any device that registers a ramblock globally is a bit dodgy, because
>> it means you can't have more than one of it (the ramblock names would
>> clash). We should fix those devices for the cases where we're willing
>> to take the migration compat break.
>
> There are quite a few models calling memory_region_init_ram() with
> a NULL owner, see below. Should we fix all these ?

Note that I said "any device". A board model that uses a NULL
owner (or does a register_global) is fine, because by definition
there is only one board. (The "null owner" case is really intended
for this -- traditionally there was no useful owner object available
to board code, though these days MachineState is an object
so it could be used.) Most of the examples you cite are in
board code and are fine. Where they are in devices we should
indeed consider making them pass in the device pointer as
the owner, though.

thanks
-- PMM