exec.c | 4 ++++ include/exec/cpu-common.h | 1 + include/qemu/option.h | 1 + migration/ram.c | 2 +- vl.c | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-)
Commit 18269069c310 ("migration: Introduce ignore-shared capability")
addes a ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.
This commit expected that QEMU doesn't write to guest RAM until
VM starts, but it does on aarch64 qemu:
Backtrace:
1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458
2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479
3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675
6 0x000055f4a2c9851d in main () at vl.c:4552
In address_space_write_rom_internal, if the ramblock is RAM of
ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend
file corruption.
But in normal case (!in_incoming_migration), this logic should be reserved.
Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability")
Signed-off-by: Catherine Ho <catherine.hecx@gmail.com>
Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru>
---
exec.c | 4 ++++
include/exec/cpu-common.h | 1 +
include/qemu/option.h | 1 +
migration/ram.c | 2 +-
vl.c | 2 ++
5 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/exec.c b/exec.c
index 86a38d3b3b..f08321fb7a 100644
--- a/exec.c
+++ b/exec.c
@@ -47,6 +47,7 @@
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
+#include "qemu/option.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <linux/falloc.h>
@@ -3455,6 +3456,9 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
l = memory_access_size(mr, l, addr1);
} else {
/* ROM/RAM case */
+ if (in_incoming_migration && ramblock_is_ignored(mr->ram_block)) {
+ break;
+ }
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (type) {
case WRITE_DATA:
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index cef8b88a2a..c80b7248a6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb);
ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
bool qemu_ram_is_shared(RAMBlock *rb);
+bool ramblock_is_ignored(RAMBlock *block);
bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
void qemu_ram_set_uf_zeroable(RAMBlock *rb);
bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/include/qemu/option.h b/include/qemu/option.h
index 844587cab3..7509435164 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -139,4 +139,5 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list);
QDict *keyval_parse(const char *params, const char *implied_key,
Error **errp);
+extern bool in_incoming_migration;
#endif
diff --git a/migration/ram.c b/migration/ram.c
index 35bd6213e9..d6de9d335d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,7 +159,7 @@ out:
return ret;
}
-static bool ramblock_is_ignored(RAMBlock *block)
+bool ramblock_is_ignored(RAMBlock *block)
{
return !qemu_ram_is_migratable(block) ||
(migrate_ignore_shared() && qemu_ram_is_shared(block));
diff --git a/vl.c b/vl.c
index c1d5484e12..b79aec9ac3 100644
--- a/vl.c
+++ b/vl.c
@@ -145,6 +145,7 @@ bool enable_cpu_pm = false;
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
+bool in_incoming_migration;
static enum {
RTC_BASE_UTC,
RTC_BASE_LOCALTIME,
@@ -3797,6 +3798,7 @@ int main(int argc, char **argv, char **envp)
runstate_set(RUN_STATE_INMIGRATE);
}
incoming = optarg;
+ in_incoming_migration = true;
break;
case QEMU_OPTION_only_migratable:
/*
--
2.17.1
On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote: > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > addes a ignore-shared capability to bypass the shared ramblock (e,g, > membackend + numa node). It does good to live migration. > > This commit expected that QEMU doesn't write to guest RAM until > VM starts, but it does on aarch64 qemu: > Backtrace: > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > In address_space_write_rom_internal, if the ramblock is RAM of > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > file corruption. > But in normal case (!in_incoming_migration), this logic should be reserved. (I am sorry if these questions are silly...) Could I ask when a ROM will be changed during execution of the guest, and why? I can understand that a rom_reset() should feed the real ROM buffer with the ROM data that provided, but I don't understand why it needs to be changed later on and why data could corrupt (in that case it's not only changed but shared with other processes with reasons). > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability") > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > --- > exec.c | 4 ++++ > include/exec/cpu-common.h | 1 + > include/qemu/option.h | 1 + > migration/ram.c | 2 +- > vl.c | 2 ++ > 5 files changed, 9 insertions(+), 1 deletion(-) > > diff --git a/exec.c b/exec.c > index 86a38d3b3b..f08321fb7a 100644 > --- a/exec.c > +++ b/exec.c > @@ -47,6 +47,7 @@ > #include "exec/address-spaces.h" > #include "sysemu/xen-mapcache.h" > #include "trace-root.h" > +#include "qemu/option.h" > > #ifdef CONFIG_FALLOCATE_PUNCH_HOLE > #include <linux/falloc.h> > @@ -3455,6 +3456,9 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, > l = memory_access_size(mr, l, addr1); > } else { > /* ROM/RAM case */ > + if (in_incoming_migration && ramblock_is_ignored(mr->ram_block)) { It's hackish IMHO... and this will be true not only during the incoming migration, but also for the rest lifecycle of the VM after migration finished. Is that really what you want (since after in_incoming_migration is set, it's never cleared)? Even if the answer is yes, you can also consider to reuse the "incoming" variable right above it and export that? Though maybe it needs a rename. Thanks, > @@ -3797,6 +3798,7 @@ int main(int argc, char **argv, char **envp) > runstate_set(RUN_STATE_INMIGRATE); > } > incoming = optarg; > + in_incoming_migration = true; > break; > case QEMU_OPTION_only_migratable: > /* > -- > 2.17.1 > > Regards, -- Peter Xu
On Wed, 20 Mar 2019 at 13:07, Peter Xu <peterx@redhat.com> wrote: > > On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote: > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > addes a ignore-shared capability to bypass the shared ramblock (e,g, > > membackend + numa node). It does good to live migration. > > > > This commit expected that QEMU doesn't write to guest RAM until > > VM starts, but it does on aarch64 qemu: > > Backtrace: > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > In address_space_write_rom_internal, if the ramblock is RAM of > > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > > file corruption. > > But in normal case (!in_incoming_migration), this logic should be reserved. > > (I am sorry if these questions are silly...) > > Could I ask when a ROM will be changed during execution of the guest, > and why? Sure :), as told by Peter "as part of reset, we write the contents of ROM blobs, ELF files loaded through -kernel, etc, to the guest memory." > > I can understand that a rom_reset() should feed the real ROM buffer > with the ROM data that provided, but I don't understand why it needs > to be changed later on and why data could corrupt (in that case it's > not only changed but shared with other processes with reasons). I guess (sorry about that), rom data should be restore on reset. I watched about several mega bytes passed to memcpy in my debuggin. But if the ram is memory-backend-file and shared, all the neccessry data had been reserved and unchanged in memory-backend file. Best regard, Catherine > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability") > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > --- > > exec.c | 4 ++++ > > include/exec/cpu-common.h | 1 + > > include/qemu/option.h | 1 + > > migration/ram.c | 2 +- > > vl.c | 2 ++ > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > diff --git a/exec.c b/exec.c > > index 86a38d3b3b..f08321fb7a 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -47,6 +47,7 @@ > > #include "exec/address-spaces.h" > > #include "sysemu/xen-mapcache.h" > > #include "trace-root.h" > > +#include "qemu/option.h" > > > > #ifdef CONFIG_FALLOCATE_PUNCH_HOLE > > #include <linux/falloc.h> > > @@ -3455,6 +3456,9 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, > > l = memory_access_size(mr, l, addr1); > > } else { > > /* ROM/RAM case */ > > + if (in_incoming_migration && ramblock_is_ignored(mr->ram_block)) { > > It's hackish IMHO... and this will be true not only during the > incoming migration, but also for the rest lifecycle of the VM after > migration finished. Is that really what you want (since after > in_incoming_migration is set, it's never cleared)? Even if the answer > is yes, you can also consider to reuse the "incoming" variable right > above it and export that? Though maybe it needs a rename. Thanks, I am considerring clear it after process_incoming_migration_co->qemu_loadvm_state > > Thanks, > > > @@ -3797,6 +3798,7 @@ int main(int argc, char **argv, char **envp) > > runstate_set(RUN_STATE_INMIGRATE); > > } > > incoming = optarg; > > + in_incoming_migration = true; > > break; > > case QEMU_OPTION_only_migratable: > > /* > > -- > > 2.17.1 > > > > > > Regards, > > -- > Peter Xu
On Wed, Mar 20, 2019 at 04:05:58PM +0800, chenxi He wrote: > On Wed, 20 Mar 2019 at 13:07, Peter Xu <peterx@redhat.com> wrote: > > > > On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote: > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > addes a ignore-shared capability to bypass the shared ramblock (e,g, > > > membackend + numa node). It does good to live migration. > > > > > > This commit expected that QEMU doesn't write to guest RAM until > > > VM starts, but it does on aarch64 qemu: > > > Backtrace: > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > In address_space_write_rom_internal, if the ramblock is RAM of > > > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > > > file corruption. > > > But in normal case (!in_incoming_migration), this logic should be reserved. > > > > (I am sorry if these questions are silly...) > > > > Could I ask when a ROM will be changed during execution of the guest, > > and why? > Sure :), as told by Peter > "as part of reset, we write the contents of ROM blobs, ELF files loaded through > -kernel, etc, to the guest memory." I see... I tried to dig on how x86 implemented the "-kernel" parameter and I noticed that it's dumping the kernel image into fw_cfg and probably that's also the reason why this should not be a problem for x86 because rom reset on x86 won't overwrite the image. Meanwhile, it seems totally different comparing to what has been done by ARM, which should probably be using rom_add_elf_program() to load the image if my reading is correct, so the kernel image is a ROM on ARM even if it can be written... Is my understanding correct? With that, I still feel that hacking into the memory write functions are tricky and I feel like it could bring hard to debug issues. Would it be possible that we identify somehow that this ROM is a fake one (since it can actually be written) and we ignore the reset of it when proper (e.g., the initial reset on destination)? Thanks, -- Peter Xu
Thanks, Peter See my comments below, please On Thu, 21 Mar 2019 at 14:10, Peter Xu <peterx@redhat.com> wrote: > > On Wed, Mar 20, 2019 at 04:05:58PM +0800, chenxi He wrote: > > On Wed, 20 Mar 2019 at 13:07, Peter Xu <peterx@redhat.com> wrote: > > > > > > On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote: > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > addes a ignore-shared capability to bypass the shared ramblock (e,g, > > > > membackend + numa node). It does good to live migration. > > > > > > > > This commit expected that QEMU doesn't write to guest RAM until > > > > VM starts, but it does on aarch64 qemu: > > > > Backtrace: > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > In address_space_write_rom_internal, if the ramblock is RAM of > > > > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > > > > file corruption. > > > > But in normal case (!in_incoming_migration), this logic should be reserved. > > > > > > (I am sorry if these questions are silly...) > > > > > > Could I ask when a ROM will be changed during execution of the guest, > > > and why? > > Sure :), as told by Peter > > "as part of reset, we write the contents of ROM blobs, ELF files loaded through > > -kernel, etc, to the guest memory." > > I see... > > I tried to dig on how x86 implemented the "-kernel" parameter and I > noticed that it's dumping the kernel image into fw_cfg and probably > that's also the reason why this should not be a problem for x86 > because rom reset on x86 won't overwrite the image. Meanwhile, it > seems totally different comparing to what has been done by ARM, which > should probably be using rom_add_elf_program() to load the image if my > reading is correct, so the kernel image is a ROM on ARM even if it can > be written... Is my understanding correct? > > With that, I still feel that hacking into the memory write functions > are tricky and I feel like it could bring hard to debug issues. Would > it be possible that we identify somehow that this ROM is a fake one > (since it can actually be written) and we ignore the reset of it when > proper (e.g., the initial reset on destination)? I found that special rom block is "dtb" See commit 4c4bf654746eae5a042bde6c150d534d8849b762 Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Fri Sep 12 14:06:50 2014 +0100 hw/arm/boot: load DTB as a ROM image In order to make the device tree blob (DTB) available in memory not only at first boot, but also after system reset, use rom_blob_add_fixed() to install it into memory. So in igore-shared case, dtb is not required here ? Maybe I could add a flag in struct Rom to set it when the rom is created by rom_add_blob_fixed_as()? And in rom_reset, just bypass this rom when in_incoming_migration && ignore_shared [+ Ard Biesheuvel] Best Regard, Catherine > > Thanks, > > -- > Peter Xu
On Thu, Mar 21, 2019 at 11:31:30PM +0800, Catherine Ho wrote: > Thanks, Peter > See my comments below, please > > On Thu, 21 Mar 2019 at 14:10, Peter Xu <peterx@redhat.com> wrote: > > > > On Wed, Mar 20, 2019 at 04:05:58PM +0800, chenxi He wrote: > > > On Wed, 20 Mar 2019 at 13:07, Peter Xu <peterx@redhat.com> wrote: > > > > > > > > On Tue, Mar 19, 2019 at 11:49:22AM -0400, Catherine Ho wrote: > > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > > addes a ignore-shared capability to bypass the shared ramblock (e,g, > > > > > membackend + numa node). It does good to live migration. > > > > > > > > > > This commit expected that QEMU doesn't write to guest RAM until > > > > > VM starts, but it does on aarch64 qemu: > > > > > Backtrace: > > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > > > In address_space_write_rom_internal, if the ramblock is RAM of > > > > > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > > > > > file corruption. > > > > > But in normal case (!in_incoming_migration), this logic should be reserved. > > > > > > > > (I am sorry if these questions are silly...) > > > > > > > > Could I ask when a ROM will be changed during execution of the guest, > > > > and why? > > > Sure :), as told by Peter > > > "as part of reset, we write the contents of ROM blobs, ELF files loaded through > > > -kernel, etc, to the guest memory." > > > > I see... > > > > I tried to dig on how x86 implemented the "-kernel" parameter and I > > noticed that it's dumping the kernel image into fw_cfg and probably > > that's also the reason why this should not be a problem for x86 > > because rom reset on x86 won't overwrite the image. Meanwhile, it > > seems totally different comparing to what has been done by ARM, which > > should probably be using rom_add_elf_program() to load the image if my > > reading is correct, so the kernel image is a ROM on ARM even if it can > > be written... Is my understanding correct? > > > > With that, I still feel that hacking into the memory write functions > > are tricky and I feel like it could bring hard to debug issues. Would > > it be possible that we identify somehow that this ROM is a fake one > > (since it can actually be written) and we ignore the reset of it when > > proper (e.g., the initial reset on destination)? > I found that special rom block is "dtb" > See > commit 4c4bf654746eae5a042bde6c150d534d8849b762 > Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> > Date: Fri Sep 12 14:06:50 2014 +0100 > > hw/arm/boot: load DTB as a ROM image > > In order to make the device tree blob (DTB) available in memory not only at > first boot, but also after system reset, use rom_blob_add_fixed() to install > it into memory. Ah ok. So "-kernel" is not the problem. > > So in igore-shared case, dtb is not required here ? I don't know ARM much, so I think I'll just leave it to others (I'm CCing Peter Maydell and Eric here since they're the ARM experts at least I know of)... > Maybe I could add a flag in struct Rom to set it when the rom is created by > rom_add_blob_fixed_as()? And in rom_reset, just bypass this rom when > in_incoming_migration && ignore_shared Yeah it sounds better than the original proposal at least to me. Now rom_add_blob() has a "read_only" flag that is always equals to "true" afaict. Maybe you can make the dtb the first one with read_only=false (the point where arm_load_dtb calls rom_add_blob_fixed_as but maybe you can directly call rom_add_blob) and store it in struct Rom too. Regards, -- Peter Xu
On Thu, 21 Mar 2019 at 15:32, Catherine Ho <catherine.hecx@gmail.com> wrote: > So in igore-shared case, dtb is not required here ? Can you explain what the "ignore-shared" case is? In general, when we reset the system, we want to bring it back to exactly the state that it was in when QEMU was first started. That means we need to reload all the rom blob data into memory (because the guest might have modified that memory while it was running). If I understand correctly from other threads, the idea is that some of the RAM is shared between source and destination so it does not need to be manually copied during migration. If that is correct, then perhaps the right thing is that in the rom_reset code: * if this rom blob is being loaded into a "shared" ram block * and this reset is happening specifically before an inbound migration * then skip loading the rom blob data ? I don't know the right way to determine either the "is this a shared ram area" or "is this the reset prior to inbound migration", but perhaps you can fill that in. > Maybe I could add a flag in struct Rom to set it when the rom is created by > rom_add_blob_fixed_as()? And in rom_reset, just bypass this rom when > in_incoming_migration && ignore_shared No, I think this is wrong -- the particular function used to create the rom blob data should not affect how we are treating it. thanks -- PMM
* Peter Maydell (peter.maydell@linaro.org) wrote: > On Thu, 21 Mar 2019 at 15:32, Catherine Ho <catherine.hecx@gmail.com> wrote: > > So in igore-shared case, dtb is not required here ? > > Can you explain what the "ignore-shared" case is? It's a migration capability added recently by Yury Kotov, which as you say below, skips migration of shared ram blocks. The definition of 'shared' is that the RAMBlock rb->flags has RAM_SHARED set, and that normally corresponds to when we call mmap with MAP_SHARED. The user visible version of that is normally if you set up memory with a hostmem backend it has a 'share' property. So 'ignore-shared' corresponds to 'share=on' on the memory backend. > In general, when we reset the system, we want to bring it > back to exactly the state that it was in when QEMU was > first started. That means we need to reload all the rom blob > data into memory (because the guest might have modified > that memory while it was running). > > If I understand correctly from other threads, the idea is > that some of the RAM is shared between source and destination > so it does not need to be manually copied during migration. > If that is correct, then perhaps the right thing is that > in the rom_reset code: > * if this rom blob is being loaded into a "shared" ram block > * and this reset is happening specifically before an > inbound migration > * then skip loading the rom blob data > > ? > > I don't know the right way to determine either the "is this > a shared ram area" or "is this the reset prior to inbound > migration", but perhaps you can fill that in. Right, so in Catherine's patch there's a simple in_incoming_migration and checking ramblock_is_ignored; it might be better to use the qemu_ram_is_shared() call and I wonder if we can just check for being in RUN_STATE_INMIGRATE - if that's early enough. Dave > > Maybe I could add a flag in struct Rom to set it when the rom is created by > > rom_add_blob_fixed_as()? And in rom_reset, just bypass this rom when > > in_incoming_migration && ignore_shared > > No, I think this is wrong -- the particular function used > to create the rom blob data should not affect how we are > treating it. > > thanks > -- PMM -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Fri, Mar 22, 2019 at 10:12:11AM +0000, Dr. David Alan Gilbert wrote: [...] > > In general, when we reset the system, we want to bring it > > back to exactly the state that it was in when QEMU was > > first started. That means we need to reload all the rom blob > > data into memory (because the guest might have modified > > that memory while it was running). > > > > If I understand correctly from other threads, the idea is > > that some of the RAM is shared between source and destination > > so it does not need to be manually copied during migration. > > If that is correct, then perhaps the right thing is that > > in the rom_reset code: > > * if this rom blob is being loaded into a "shared" ram block > > * and this reset is happening specifically before an > > inbound migration > > * then skip loading the rom blob data > > > > ? > > > > I don't know the right way to determine either the "is this > > a shared ram area" or "is this the reset prior to inbound > > migration", but perhaps you can fill that in. > > Right, so in Catherine's patch there's a simple in_incoming_migration > and checking ramblock_is_ignored; it might be better to use the > qemu_ram_is_shared() call and I wonder if we can just check for being in > RUN_STATE_INMIGRATE - if that's early enough. Yes I feel like runstate_check(RUN_STATE_INMIGRATE) should be enough to replace the new variable. And I'm even thinking whether we need to check qemu_ram_is_shared() at all... if rom_reset() simply refills the ROM data into the RAMs, then IIUC that operation could be skipped for all incoming migrations because all those ROM data (no matter they are filled into shared RAM or not) should already have been filled on the source side and: - if the ROM data's RAMBlock is shared backend, the latest data should already been there on the shared backend files, or, - if the ROM data's RAMBlock is not shared backend, they'll eventually be migrated to the destination later on after this rom_reset() on destination by the general RAM migration code. Regards, -- Peter Xu
Hi all, thanks for the comments, I am digging into another potential error in the ignore-shared live migration. After that, I will send out the v2 asap B.R. Catherine On Mon, 25 Mar 2019 at 11:39, Peter Xu <peterx@redhat.com> wrote: > > On Fri, Mar 22, 2019 at 10:12:11AM +0000, Dr. David Alan Gilbert wrote: > > [...] > > > > In general, when we reset the system, we want to bring it > > > back to exactly the state that it was in when QEMU was > > > first started. That means we need to reload all the rom blob > > > data into memory (because the guest might have modified > > > that memory while it was running). > > > > > > If I understand correctly from other threads, the idea is > > > that some of the RAM is shared between source and destination > > > so it does not need to be manually copied during migration. > > > If that is correct, then perhaps the right thing is that > > > in the rom_reset code: > > > * if this rom blob is being loaded into a "shared" ram block > > > * and this reset is happening specifically before an > > > inbound migration > > > * then skip loading the rom blob data > > > > > > ? > > > > > > I don't know the right way to determine either the "is this > > > a shared ram area" or "is this the reset prior to inbound > > > migration", but perhaps you can fill that in. > > > > Right, so in Catherine's patch there's a simple in_incoming_migration > > and checking ramblock_is_ignored; it might be better to use the > > qemu_ram_is_shared() call and I wonder if we can just check for being in > > RUN_STATE_INMIGRATE - if that's early enough. > > Yes I feel like runstate_check(RUN_STATE_INMIGRATE) should be enough > to replace the new variable. And I'm even thinking whether we need to > check qemu_ram_is_shared() at all... if rom_reset() simply refills the > ROM data into the RAMs, then IIUC that operation could be skipped for > all incoming migrations because all those ROM data (no matter they are > filled into shared RAM or not) should already have been filled on the > source side and: > > - if the ROM data's RAMBlock is shared backend, the latest data should > already been there on the shared backend files, or, > > - if the ROM data's RAMBlock is not shared backend, they'll eventually > be migrated to the destination later on after this rom_reset() on > destination by the general RAM migration code. > > Regards, > > -- > Peter Xu
Hi all, I found an insterested issue here besides writting "dtb" rom into ram. That is, should qemu support incoming from the ignore-shared memory backend file repeatedly? After I resolve the issue of writting "dtb" rom into ram, the incoming from the ignore-shared memory backend file works fine at the *first* time, but failed in the 2nd time. It will report: VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 Failed to load virtio-scsi:virtio error while loading state for instance 0x0 of device 'virtio-mmio@000000000a003e00/virtio-scsi' load of migration failed: Operation not permitted The root cause is the used idx is moved forward after 1st time incoming, and in 2nd time incoming, the last_avail_idx will be incorrectly restored from the saved device state file(not in the ram). I watched this even on x86 for a virtio-scsi disk Any ideas for supporting 2nd time, 3rd time... incoming restoring? B.R. Catherine On Wed, 27 Mar 2019 at 08:45, Catherine Ho <catherine.hecx@gmail.com> wrote: > Hi all, thanks for the comments, I am digging into another potential > error in the > ignore-shared live migration. After that, I will send out the v2 asap > B.R. > Catherine > > > On Mon, 25 Mar 2019 at 11:39, Peter Xu <peterx@redhat.com> wrote: > > > > On Fri, Mar 22, 2019 at 10:12:11AM +0000, Dr. David Alan Gilbert wrote: > > > > [...] > > > > > > In general, when we reset the system, we want to bring it > > > > back to exactly the state that it was in when QEMU was > > > > first started. That means we need to reload all the rom blob > > > > data into memory (because the guest might have modified > > > > that memory while it was running). > > > > > > > > If I understand correctly from other threads, the idea is > > > > that some of the RAM is shared between source and destination > > > > so it does not need to be manually copied during migration. > > > > If that is correct, then perhaps the right thing is that > > > > in the rom_reset code: > > > > * if this rom blob is being loaded into a "shared" ram block > > > > * and this reset is happening specifically before an > > > > inbound migration > > > > * then skip loading the rom blob data > > > > > > > > ? > > > > > > > > I don't know the right way to determine either the "is this > > > > a shared ram area" or "is this the reset prior to inbound > > > > migration", but perhaps you can fill that in. > > > > > > Right, so in Catherine's patch there's a simple in_incoming_migration > > > and checking ramblock_is_ignored; it might be better to use the > > > qemu_ram_is_shared() call and I wonder if we can just check for being > in > > > RUN_STATE_INMIGRATE - if that's early enough. > > > > Yes I feel like runstate_check(RUN_STATE_INMIGRATE) should be enough > > to replace the new variable. And I'm even thinking whether we need to > > check qemu_ram_is_shared() at all... if rom_reset() simply refills the > > ROM data into the RAMs, then IIUC that operation could be skipped for > > all incoming migrations because all those ROM data (no matter they are > > filled into shared RAM or not) should already have been filled on the > > source side and: > > > > - if the ROM data's RAMBlock is shared backend, the latest data should > > already been there on the shared backend files, or, > > > > - if the ROM data's RAMBlock is not shared backend, they'll eventually > > be migrated to the destination later on after this rom_reset() on > > destination by the general RAM migration code. > > > > Regards, > > > > -- > > Peter Xu >
On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> wrote: > The root cause is the used idx is moved forward after 1st time incoming, and in 2nd time incoming, > the last_avail_idx will be incorrectly restored from the saved device state file(not in the ram). > > I watched this even on x86 for a virtio-scsi disk > > Any ideas for supporting 2nd time, 3rd time... incoming restoring? Does the destination end go through reset between the 1st and 2nd incoming attempts? I'm not a migration expert, but I thought that devices were allowed to assume that their state is "state of the device following QEMU reset" before the start of an incoming migration attempt. thanks -- PMM
Hi Peter Maydell On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org> wrote: > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> > wrote: > > The root cause is the used idx is moved forward after 1st time incoming, > and in 2nd time incoming, > > the last_avail_idx will be incorrectly restored from the saved device > state file(not in the ram). > > > > I watched this even on x86 for a virtio-scsi disk > > > > Any ideas for supporting 2nd time, 3rd time... incoming restoring? > > Does the destination end go through reset between the 1st and 2nd > seems not, please see my step below > incoming attempts? I'm not a migration expert, but I thought that > devices were allowed to assume that their state is "state of the > device following QEMU reset" before the start of an incoming > migration attempt. > Here is my step: 1. start guest normal by qemu with shared memory-backend file 2. stop the vm. save the device state to another file via monitor migrate "exec: cat>..." 3. quit the vm 4. retore the vm by qemu -incoming "exec:cat ..." 5. continue the vm via monito, the 1st incoming works fine 6. quit the vm 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time 8. continue -> error happened Actually, this can be fixed by forcely restore the idx by virtio_queue_restore_last_avail_idx() But I am sure whether it is reasonable. B.R. > > thanks > -- PMM >
On Tue, 2 Apr 2019 at 15:47, Catherine Ho <catherine.hecx@gmail.com> wrote: > Hi Peter Maydell > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org> > wrote: > >> On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> >> wrote: >> > The root cause is the used idx is moved forward after 1st time >> incoming, and in 2nd time incoming, >> > the last_avail_idx will be incorrectly restored from the saved device >> state file(not in the ram). >> > >> > I watched this even on x86 for a virtio-scsi disk >> > >> > Any ideas for supporting 2nd time, 3rd time... incoming restoring? >> >> Does the destination end go through reset between the 1st and 2nd >> > seems not, please see my step below > >> incoming attempts? I'm not a migration expert, but I thought that >> devices were allowed to assume that their state is "state of the >> device following QEMU reset" before the start of an incoming >> migration attempt. >> > > Here is my step: > 1. start guest normal by qemu with shared memory-backend file > 2. stop the vm. save the device state to another file via monitor migrate > "exec: cat>..." > 3. quit the vm > via "quit" command of monitor > 4. retore the vm by qemu -incoming "exec:cat ..." > 5. continue the vm via monito, the 1st incoming works fine > 6. quit the vm > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > 8. continue -> error happened > Actually, this can be fixed by forcely restore the idx by > virtio_queue_restore_last_avail_idx() > But I am sure whether it is reasonable. > s/sure/not sure > > B.R. > >> >> thanks >> -- PMM >> >
On Tue, 2 Apr 2019 at 14:47, Catherine Ho <catherine.hecx@gmail.com> wrote: > > Here is my step: > 1. start guest normal by qemu with shared memory-backend file > 2. stop the vm. save the device state to another file via monitor migrate "exec: cat>..." > 3. quit the vm > 4. retore the vm by qemu -incoming "exec:cat ..." > 5. continue the vm via monito, the 1st incoming works fine > 6. quit the vm > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > 8. continue -> error happened I think I'm confused. If you're killing the QEMU process and then running qemu again, then we must have done a full system reset as part of the work we do creating the VM for the second time we start the qemu binary. thanks -- PMM
On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: > Hi Peter Maydell > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org> wrote: > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> > > wrote: > > > The root cause is the used idx is moved forward after 1st time incoming, > > and in 2nd time incoming, > > > the last_avail_idx will be incorrectly restored from the saved device > > state file(not in the ram). > > > > > > I watched this even on x86 for a virtio-scsi disk > > > > > > Any ideas for supporting 2nd time, 3rd time... incoming restoring? > > > > Does the destination end go through reset between the 1st and 2nd > > > seems not, please see my step below > > > incoming attempts? I'm not a migration expert, but I thought that > > devices were allowed to assume that their state is "state of the > > device following QEMU reset" before the start of an incoming > > migration attempt. > > > > Here is my step: > 1. start guest normal by qemu with shared memory-backend file > 2. stop the vm. save the device state to another file via monitor migrate > "exec: cat>..." > 3. quit the vm > 4. retore the vm by qemu -incoming "exec:cat ..." > 5. continue the vm via monito, the 1st incoming works fine > 6. quit the vm > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > 8. continue -> error happened > Actually, this can be fixed by forcely restore the idx by > virtio_queue_restore_last_avail_idx() > But I am sure whether it is reasonable. Yeah I really suspect its validity. IMHO normal migration streams keep the device state and RAM data together in the dumped file, so they always match. In your shared case, the device states are in the dumped file however the RAM data is located somewhere else. After you quit the VM from the 1st incoming migration the RAM is new (because that's a shared memory file) and the device data is still old. They do not match already, then I'd say you can't migrate with that any more. If you want to do that, you'd better take snapshot of the RAM backend file if your filesystem supports (or even simpler, to back it up before hand) before you start any incoming migration. Then with the dumped file (which contains the device states) and that snapshot file (which contains the exact RAM data that matches the device states) you'll alway be able to migrate for as many times as you want. Regards, -- Peter Xu
On Tue, 2 Apr 2019 at 15:58, Peter Xu <peterx@redhat.com> wrote: > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: > > Hi Peter Maydell > > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org> > wrote: > > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> > > > wrote: > > > > The root cause is the used idx is moved forward after 1st time > incoming, > > > and in 2nd time incoming, > > > > the last_avail_idx will be incorrectly restored from the saved device > > > state file(not in the ram). > > > > > > > > I watched this even on x86 for a virtio-scsi disk > > > > > > > > Any ideas for supporting 2nd time, 3rd time... incoming restoring? > > > > > > Does the destination end go through reset between the 1st and 2nd > > > > > seems not, please see my step below > > > > > incoming attempts? I'm not a migration expert, but I thought that > > > devices were allowed to assume that their state is "state of the > > > device following QEMU reset" before the start of an incoming > > > migration attempt. > > > > > > > Here is my step: > > 1. start guest normal by qemu with shared memory-backend file > > 2. stop the vm. save the device state to another file via monitor migrate > > "exec: cat>..." > > 3. quit the vm > > 4. retore the vm by qemu -incoming "exec:cat ..." > > 5. continue the vm via monito, the 1st incoming works fine > > 6. quit the vm > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > > 8. continue -> error happened > > Actually, this can be fixed by forcely restore the idx by > > virtio_queue_restore_last_avail_idx() > > But I am sure whether it is reasonable. > > Yeah I really suspect its validity. > > IMHO normal migration streams keep the device state and RAM data > together in the dumped file, so they always match. > > In your shared case, the device states are in the dumped file however > the RAM data is located somewhere else. After you quit the VM from > the 1st incoming migration the RAM is new (because that's a shared > memory file) and the device data is still old. They do not match > already, then I'd say you can't migrate with that any more. > > If you want to do that, you'd better take snapshot of the RAM backend > file if your filesystem supports (or even simpler, to back it up > before hand) before you start any incoming migration. Then with the > dumped file (which contains the device states) and that snapshot file > (which contains the exact RAM data that matches the device states) > you'll alway be able to migrate for as many times as you want. > Understood, thanks Peter Xu Is there any feasible way to indicate the snapshot of the RAM backend file is matched with the device data? >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 >Failed to load virtio-scsi:virtio Because I thought reporting above error is not so friendly. Could we add a version id in both RAM backend file and device date file? B.R. Catherine > > > Regards, > > -- > Peter Xu >
On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote: > On Tue, 2 Apr 2019 at 15:58, Peter Xu <peterx@redhat.com> wrote: > > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: > > > Hi Peter Maydell > > > > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org> > > wrote: > > > > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho <catherine.hecx@gmail.com> > > > > wrote: > > > > > The root cause is the used idx is moved forward after 1st time > > incoming, > > > > and in 2nd time incoming, > > > > > the last_avail_idx will be incorrectly restored from the saved device > > > > state file(not in the ram). > > > > > > > > > > I watched this even on x86 for a virtio-scsi disk > > > > > > > > > > Any ideas for supporting 2nd time, 3rd time... incoming restoring? > > > > > > > > Does the destination end go through reset between the 1st and 2nd > > > > > > > seems not, please see my step below > > > > > > > incoming attempts? I'm not a migration expert, but I thought that > > > > devices were allowed to assume that their state is "state of the > > > > device following QEMU reset" before the start of an incoming > > > > migration attempt. > > > > > > > > > > Here is my step: > > > 1. start guest normal by qemu with shared memory-backend file > > > 2. stop the vm. save the device state to another file via monitor migrate > > > "exec: cat>..." > > > 3. quit the vm > > > 4. retore the vm by qemu -incoming "exec:cat ..." > > > 5. continue the vm via monito, the 1st incoming works fine > > > 6. quit the vm > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > > > 8. continue -> error happened > > > Actually, this can be fixed by forcely restore the idx by > > > virtio_queue_restore_last_avail_idx() > > > But I am sure whether it is reasonable. > > > > Yeah I really suspect its validity. > > > > IMHO normal migration streams keep the device state and RAM data > > together in the dumped file, so they always match. > > > > In your shared case, the device states are in the dumped file however > > the RAM data is located somewhere else. After you quit the VM from > > the 1st incoming migration the RAM is new (because that's a shared > > memory file) and the device data is still old. They do not match > > already, then I'd say you can't migrate with that any more. > > > > If you want to do that, you'd better take snapshot of the RAM backend > > file if your filesystem supports (or even simpler, to back it up > > before hand) before you start any incoming migration. Then with the > > dumped file (which contains the device states) and that snapshot file > > (which contains the exact RAM data that matches the device states) > > you'll alway be able to migrate for as many times as you want. > > > > Understood, thanks Peter Xu > Is there any feasible way to indicate the snapshot of the RAM backend file > is > matched with the device data? > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 > >Failed to load virtio-scsi:virtio > > Because I thought reporting above error is not so friendly. Could we add a > version id in both RAM backend file and device date file? It would be non-trivial I'd say - AFAIK we don't have an existing way to tag the memory-backend-file content (IIUC that's what you use). And since you mentioned about versioning of these states, I just remembered that even with this you may not be able to get a complete matched state of the VM, because AFAICT actually besides RAM state & device state, you probably also need to consider the disk state as well. After you started the VM of the 1st incoming, there could be data flushed to the VM backend disk and then that state is changed as well. So here even if you snapshot the RAM file you'll still lose the disk state IIUC so it could still be broken. In other words, to make a migration/snapshot to work you'll need to make all these three states to match. Before we discuss further on the topic... could you share me with your requirement first? I started to get a bit confused now since when I thought about shared mem I was thinking about migrating within the same host to e.g. upgrade the hypervisor but that obviously does not need you to do incoming migration for multiple times. Then what do you finally want to achieve? Regards, -- Peter Xu
On Tue, 2 Apr 2019 at 20:37, Peter Xu <peterx@redhat.com> wrote: > On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote: > > On Tue, 2 Apr 2019 at 15:58, Peter Xu <peterx@redhat.com> wrote: > > > > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: > > > > Hi Peter Maydell > > > > > > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org > > > > > wrote: > > > > > > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho < > catherine.hecx@gmail.com> > > > > > wrote: > > > > > > The root cause is the used idx is moved forward after 1st time > > > incoming, > > > > > and in 2nd time incoming, > > > > > > the last_avail_idx will be incorrectly restored from the saved > device > > > > > state file(not in the ram). > > > > > > > > > > > > I watched this even on x86 for a virtio-scsi disk > > > > > > > > > > > > Any ideas for supporting 2nd time, 3rd time... incoming > restoring? > > > > > > > > > > Does the destination end go through reset between the 1st and 2nd > > > > > > > > > seems not, please see my step below > > > > > > > > > incoming attempts? I'm not a migration expert, but I thought that > > > > > devices were allowed to assume that their state is "state of the > > > > > device following QEMU reset" before the start of an incoming > > > > > migration attempt. > > > > > > > > > > > > > Here is my step: > > > > 1. start guest normal by qemu with shared memory-backend file > > > > 2. stop the vm. save the device state to another file via monitor > migrate > > > > "exec: cat>..." > > > > 3. quit the vm > > > > 4. retore the vm by qemu -incoming "exec:cat ..." > > > > 5. continue the vm via monito, the 1st incoming works fine > > > > 6. quit the vm > > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > > > > 8. continue -> error happened > > > > Actually, this can be fixed by forcely restore the idx by > > > > virtio_queue_restore_last_avail_idx() > > > > But I am sure whether it is reasonable. > > > > > > Yeah I really suspect its validity. > > > > > > IMHO normal migration streams keep the device state and RAM data > > > together in the dumped file, so they always match. > > > > > > In your shared case, the device states are in the dumped file however > > > the RAM data is located somewhere else. After you quit the VM from > > > the 1st incoming migration the RAM is new (because that's a shared > > > memory file) and the device data is still old. They do not match > > > already, then I'd say you can't migrate with that any more. > > > > > > If you want to do that, you'd better take snapshot of the RAM backend > > > file if your filesystem supports (or even simpler, to back it up > > > before hand) before you start any incoming migration. Then with the > > > dumped file (which contains the device states) and that snapshot file > > > (which contains the exact RAM data that matches the device states) > > > you'll alway be able to migrate for as many times as you want. > > > > > > > Understood, thanks Peter Xu > > Is there any feasible way to indicate the snapshot of the RAM backend > file > > is > > matched with the device data? > > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 > > >Failed to load virtio-scsi:virtio > > > > Because I thought reporting above error is not so friendly. Could we add > a > > version id in both RAM backend file and device date file? > > It would be non-trivial I'd say - AFAIK we don't have an existing way > to tag the memory-backend-file content (IIUC that's what you use). > > And since you mentioned about versioning of these states, I just > remembered that even with this you may not be able to get a complete > matched state of the VM, because AFAICT actually besides RAM state & > device state, you probably also need to consider the disk state as > well. After you started the VM of the 1st incoming, there could be > data flushed to the VM backend disk and then that state is changed as > well. So here even if you snapshot the RAM file you'll still lose the > disk state IIUC so it could still be broken. In other words, to make > a migration/snapshot to work you'll need to make all these three > states to match. > Yes, thanks > > Before we discuss further on the topic... could you share me with your > requirement first? I started to get a bit confused now since when I > thought about shared mem I was thinking about migrating within the > same host to e.g. upgrade the hypervisor but that obviously does not > need you to do incoming migration for multiple times. Then what do > you finally want to achieve? > Actually, I am investigating the ignore-shared capability case support on arm64. This feature is used in Kata containers project as "vm template" The rom reset failure is the first bug. Ok, now I can confirm that doing incoming migration for multiple times is supported. Thanks for the detailed explanation :) B.R. Catherine
On Tue, 2 Apr 2019 at 22:17, Catherine Ho <catherine.hecx@gmail.com> wrote: > > > On Tue, 2 Apr 2019 at 20:37, Peter Xu <peterx@redhat.com> wrote: > >> On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote: >> > On Tue, 2 Apr 2019 at 15:58, Peter Xu <peterx@redhat.com> wrote: >> > >> > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: >> > > > Hi Peter Maydell >> > > > >> > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell < >> peter.maydell@linaro.org> >> > > wrote: >> > > > >> > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho < >> catherine.hecx@gmail.com> >> > > > > wrote: >> > > > > > The root cause is the used idx is moved forward after 1st time >> > > incoming, >> > > > > and in 2nd time incoming, >> > > > > > the last_avail_idx will be incorrectly restored from the saved >> device >> > > > > state file(not in the ram). >> > > > > > >> > > > > > I watched this even on x86 for a virtio-scsi disk >> > > > > > >> > > > > > Any ideas for supporting 2nd time, 3rd time... incoming >> restoring? >> > > > > >> > > > > Does the destination end go through reset between the 1st and 2nd >> > > > > >> > > > seems not, please see my step below >> > > > >> > > > > incoming attempts? I'm not a migration expert, but I thought that >> > > > > devices were allowed to assume that their state is "state of the >> > > > > device following QEMU reset" before the start of an incoming >> > > > > migration attempt. >> > > > > >> > > > >> > > > Here is my step: >> > > > 1. start guest normal by qemu with shared memory-backend file >> > > > 2. stop the vm. save the device state to another file via monitor >> migrate >> > > > "exec: cat>..." >> > > > 3. quit the vm >> > > > 4. retore the vm by qemu -incoming "exec:cat ..." >> > > > 5. continue the vm via monito, the 1st incoming works fine >> > > > 6. quit the vm >> > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time >> > > > 8. continue -> error happened >> > > > Actually, this can be fixed by forcely restore the idx by >> > > > virtio_queue_restore_last_avail_idx() >> > > > But I am sure whether it is reasonable. >> > > >> > > Yeah I really suspect its validity. >> > > >> > > IMHO normal migration streams keep the device state and RAM data >> > > together in the dumped file, so they always match. >> > > >> > > In your shared case, the device states are in the dumped file however >> > > the RAM data is located somewhere else. After you quit the VM from >> > > the 1st incoming migration the RAM is new (because that's a shared >> > > memory file) and the device data is still old. They do not match >> > > already, then I'd say you can't migrate with that any more. >> > > >> > > If you want to do that, you'd better take snapshot of the RAM backend >> > > file if your filesystem supports (or even simpler, to back it up >> > > before hand) before you start any incoming migration. Then with the >> > > dumped file (which contains the device states) and that snapshot file >> > > (which contains the exact RAM data that matches the device states) >> > > you'll alway be able to migrate for as many times as you want. >> > > >> > >> > Understood, thanks Peter Xu >> > Is there any feasible way to indicate the snapshot of the RAM backend >> file >> > is >> > matched with the device data? >> > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 >> > >Failed to load virtio-scsi:virtio >> > >> > Because I thought reporting above error is not so friendly. Could we >> add a >> > version id in both RAM backend file and device date file? >> >> It would be non-trivial I'd say - AFAIK we don't have an existing way >> to tag the memory-backend-file content (IIUC that's what you use). >> >> And since you mentioned about versioning of these states, I just >> remembered that even with this you may not be able to get a complete >> matched state of the VM, because AFAICT actually besides RAM state & >> device state, you probably also need to consider the disk state as >> well. After you started the VM of the 1st incoming, there could be >> data flushed to the VM backend disk and then that state is changed as >> well. So here even if you snapshot the RAM file you'll still lose the >> disk state IIUC so it could still be broken. In other words, to make >> a migration/snapshot to work you'll need to make all these three >> states to match. >> > > Yes, thanks > >> >> Before we discuss further on the topic... could you share me with your >> requirement first? I started to get a bit confused now since when I >> thought about shared mem I was thinking about migrating within the >> same host to e.g. upgrade the hypervisor but that obviously does not >> need you to do incoming migration for multiple times. Then what do >> you finally want to achieve? >> > Actually, I am investigating the ignore-shared capability case support on > arm64. This feature is used in Kata containers project as "vm template" > The rom reset failure is the first bug. > Ok, now I can confirm that doing incoming migration for multiple times is > s/is /isn't, sorry ~ > supported. Thanks for the detailed explanation :) > B.R. > Catherine >
* Catherine Ho (catherine.hecx@gmail.com) wrote: > On Tue, 2 Apr 2019 at 20:37, Peter Xu <peterx@redhat.com> wrote: > > > On Tue, Apr 02, 2019 at 05:06:15PM +0800, Catherine Ho wrote: > > > On Tue, 2 Apr 2019 at 15:58, Peter Xu <peterx@redhat.com> wrote: > > > > > > > On Tue, Apr 02, 2019 at 03:47:16PM +0800, Catherine Ho wrote: > > > > > Hi Peter Maydell > > > > > > > > > > On Tue, 2 Apr 2019 at 11:05, Peter Maydell <peter.maydell@linaro.org > > > > > > > wrote: > > > > > > > > > > > On Tue, 2 Apr 2019 at 09:57, Catherine Ho < > > catherine.hecx@gmail.com> > > > > > > wrote: > > > > > > > The root cause is the used idx is moved forward after 1st time > > > > incoming, > > > > > > and in 2nd time incoming, > > > > > > > the last_avail_idx will be incorrectly restored from the saved > > device > > > > > > state file(not in the ram). > > > > > > > > > > > > > > I watched this even on x86 for a virtio-scsi disk > > > > > > > > > > > > > > Any ideas for supporting 2nd time, 3rd time... incoming > > restoring? > > > > > > > > > > > > Does the destination end go through reset between the 1st and 2nd > > > > > > > > > > > seems not, please see my step below > > > > > > > > > > > incoming attempts? I'm not a migration expert, but I thought that > > > > > > devices were allowed to assume that their state is "state of the > > > > > > device following QEMU reset" before the start of an incoming > > > > > > migration attempt. > > > > > > > > > > > > > > > > Here is my step: > > > > > 1. start guest normal by qemu with shared memory-backend file > > > > > 2. stop the vm. save the device state to another file via monitor > > migrate > > > > > "exec: cat>..." > > > > > 3. quit the vm > > > > > 4. retore the vm by qemu -incoming "exec:cat ..." > > > > > 5. continue the vm via monito, the 1st incoming works fine > > > > > 6. quit the vm > > > > > 7. retore the vm by qemu -incoming "exec:cat ..." for 2nd time > > > > > 8. continue -> error happened > > > > > Actually, this can be fixed by forcely restore the idx by > > > > > virtio_queue_restore_last_avail_idx() > > > > > But I am sure whether it is reasonable. > > > > > > > > Yeah I really suspect its validity. > > > > > > > > IMHO normal migration streams keep the device state and RAM data > > > > together in the dumped file, so they always match. > > > > > > > > In your shared case, the device states are in the dumped file however > > > > the RAM data is located somewhere else. After you quit the VM from > > > > the 1st incoming migration the RAM is new (because that's a shared > > > > memory file) and the device data is still old. They do not match > > > > already, then I'd say you can't migrate with that any more. > > > > > > > > If you want to do that, you'd better take snapshot of the RAM backend > > > > file if your filesystem supports (or even simpler, to back it up > > > > before hand) before you start any incoming migration. Then with the > > > > dumped file (which contains the device states) and that snapshot file > > > > (which contains the exact RAM data that matches the device states) > > > > you'll alway be able to migrate for as many times as you want. > > > > > > > > > > Understood, thanks Peter Xu > > > Is there any feasible way to indicate the snapshot of the RAM backend > > file > > > is > > > matched with the device data? > > > >VQ 2 size 0x400 < last_avail_idx 0x1639 - used_idx 0x2688 > > > >Failed to load virtio-scsi:virtio > > > > > > Because I thought reporting above error is not so friendly. Could we add > > a > > > version id in both RAM backend file and device date file? > > > > It would be non-trivial I'd say - AFAIK we don't have an existing way > > to tag the memory-backend-file content (IIUC that's what you use). > > > > And since you mentioned about versioning of these states, I just > > remembered that even with this you may not be able to get a complete > > matched state of the VM, because AFAICT actually besides RAM state & > > device state, you probably also need to consider the disk state as > > well. After you started the VM of the 1st incoming, there could be > > data flushed to the VM backend disk and then that state is changed as > > well. So here even if you snapshot the RAM file you'll still lose the > > disk state IIUC so it could still be broken. In other words, to make > > a migration/snapshot to work you'll need to make all these three > > states to match. > > > > Yes, thanks > > > > > Before we discuss further on the topic... could you share me with your > > requirement first? I started to get a bit confused now since when I > > thought about shared mem I was thinking about migrating within the > > same host to e.g. upgrade the hypervisor but that obviously does not > > need you to do incoming migration for multiple times. Then what do > > you finally want to achieve? > > > Actually, I am investigating the ignore-shared capability case support on > arm64. This feature is used in Kata containers project as "vm template" > The rom reset failure is the first bug. > Ok, now I can confirm that doing incoming migration for multiple times is > supported. Thanks for the detailed explanation :) I think the kata case should be OK; it's just you've got to be careful of what you're doing once you start splitting the state into separate chunks. That's always been true with the disk/ram split and it just gets a bit hairier when you split the RAM/devices. Dave > B.R. > Catherine -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Fri, 22 Mar 2019 at 10:12, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > Right, so in Catherine's patch there's a simple in_incoming_migration > and checking ramblock_is_ignored Mmm, but I think it is in the wrong place. It is being checked in address_space_write_rom_internal(). Either we want to suppress any and all writes to these RAM blocks, in which case I don't think that function covers all the ways that code can get hold of a RAM block and write to it; or we are confident that only the ROM blobs are an issue, in which case it is too low in the call stack and we should do the check in rom_reset(). Are there any other cases where we might write to RAM during reset/migration ? I thought of "user write via the debug stub or monitor", but perhaps those either can't happen or we define them as user error. But I there might be some other obscure cases, which perhaps argues for doing this at the lowest level possible. thanks -- PMM
* Peter Maydell (peter.maydell@linaro.org) wrote: > On Fri, 22 Mar 2019 at 10:12, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > Right, so in Catherine's patch there's a simple in_incoming_migration > > and checking ramblock_is_ignored > > Mmm, but I think it is in the wrong place. It is being checked > in address_space_write_rom_internal(). Either we want to > suppress any and all writes to these RAM blocks, in which > case I don't think that function covers all the ways that > code can get hold of a RAM block and write to it; or we are > confident that only the ROM blobs are an issue, in which > case it is too low in the call stack and we should do the > check in rom_reset(). > > Are there any other cases where we might write to RAM > during reset/migration ? I thought of "user write via > the debug stub or monitor", but perhaps those either > can't happen or we define them as user error. But I > there might be some other obscure cases, which perhaps > argues for doing this at the lowest level possible. Right, the thought of the 'might be other obscure cases' is why in Yury's 'QEMU may write to system_memory before guest starts' patch he marks all shared regions as read-only to see what hits it. I'm not sure; tbh inserting this type of check at the lowest level seems a bit invasive so I'd prefer doing it at the ROM blocks level; but we are bound to hit those obscure cases and then the failure is a real pain to debug when you find something has overwritten some of the RAM. Dave > thanks > -- PMM -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, 21 Mar 2019 at 06:20, Peter Xu <peterx@redhat.com> wrote: > I tried to dig on how x86 implemented the "-kernel" parameter and I > noticed that it's dumping the kernel image into fw_cfg and probably > that's also the reason why this should not be a problem for x86 > because rom reset on x86 won't overwrite the image. Meanwhile, it > seems totally different comparing to what has been done by ARM, which > should probably be using rom_add_elf_program() to load the image if my > reading is correct, so the kernel image is a ROM on ARM even if it can > be written... Is my understanding correct? The "usual" case for x86 passes the kernel through the fw_cfg device, whereas the "usual" case for arm has rom-blobs that are written directly to the backing RAMBlock for the flash device or for real RAM. But it's possible also to have x86 configurations and command lines which write to the real RAM. For instance if you use the 'generic loader' device (documented in docs/generic-loader.txt) to load auxiliary data files into RAM on x86 you will hit the same problem. > With that, I still feel that hacking into the memory write functions > are tricky and I feel like it could bring hard to debug issues. Would > it be possible that we identify somehow that this ROM is a fake one > (since it can actually be written) and we ignore the reset of it when > proper (e.g., the initial reset on destination)? I don't know what you mean by 'fake ROM' and anyway this all applies to RAM as well... thanks -- PMM
On 2019/3/19 23:49, Catherine Ho wrote: > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > addes a ignore-shared capability to bypass the shared ramblock (e,g, > membackend + numa node). It does good to live migration. > > This commit expected that QEMU doesn't write to guest RAM until > VM starts, but it does on aarch64 qemu: > Backtrace: > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > In address_space_write_rom_internal, if the ramblock is RAM of > ramblock_is_ignored(), memcpy this ramblock will cause the memory-backend > file corruption. > But in normal case (!in_incoming_migration), this logic should be reserved. > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability") > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> thanks, Catherine Tested on aarch64 kvm/tcg mode --- Cheers, Justin (Jia He) > --- > exec.c | 4 ++++ > include/exec/cpu-common.h | 1 + > include/qemu/option.h | 1 + > migration/ram.c | 2 +- > vl.c | 2 ++ > 5 files changed, 9 insertions(+), 1 deletion(-) > > diff --git a/exec.c b/exec.c > index 86a38d3b3b..f08321fb7a 100644 > --- a/exec.c > +++ b/exec.c > @@ -47,6 +47,7 @@ > #include "exec/address-spaces.h" > #include "sysemu/xen-mapcache.h" > #include "trace-root.h" > +#include "qemu/option.h" > > #ifdef CONFIG_FALLOCATE_PUNCH_HOLE > #include <linux/falloc.h> > @@ -3455,6 +3456,9 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, > l = memory_access_size(mr, l, addr1); > } else { > /* ROM/RAM case */ > + if (in_incoming_migration && ramblock_is_ignored(mr->ram_block)) { > + break; > + } > ptr = qemu_map_ram_ptr(mr->ram_block, addr1); > switch (type) { > case WRITE_DATA: > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index cef8b88a2a..c80b7248a6 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb); > ram_addr_t qemu_ram_get_offset(RAMBlock *rb); > ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); > bool qemu_ram_is_shared(RAMBlock *rb); > +bool ramblock_is_ignored(RAMBlock *block); > bool qemu_ram_is_uf_zeroable(RAMBlock *rb); > void qemu_ram_set_uf_zeroable(RAMBlock *rb); > bool qemu_ram_is_migratable(RAMBlock *rb); > diff --git a/include/qemu/option.h b/include/qemu/option.h > index 844587cab3..7509435164 100644 > --- a/include/qemu/option.h > +++ b/include/qemu/option.h > @@ -139,4 +139,5 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list); > QDict *keyval_parse(const char *params, const char *implied_key, > Error **errp); > > +extern bool in_incoming_migration; > #endif > diff --git a/migration/ram.c b/migration/ram.c > index 35bd6213e9..d6de9d335d 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -159,7 +159,7 @@ out: > return ret; > } > > -static bool ramblock_is_ignored(RAMBlock *block) > +bool ramblock_is_ignored(RAMBlock *block) > { > return !qemu_ram_is_migratable(block) || > (migrate_ignore_shared() && qemu_ram_is_shared(block)); > diff --git a/vl.c b/vl.c > index c1d5484e12..b79aec9ac3 100644 > --- a/vl.c > +++ b/vl.c > @@ -145,6 +145,7 @@ bool enable_cpu_pm = false; > int nb_nics; > NICInfo nd_table[MAX_NICS]; > int autostart; > +bool in_incoming_migration; > static enum { > RTC_BASE_UTC, > RTC_BASE_LOCALTIME, > @@ -3797,6 +3798,7 @@ int main(int argc, char **argv, char **envp) > runstate_set(RUN_STATE_INMIGRATE); > } > incoming = optarg; > + in_incoming_migration = true; > break; > case QEMU_OPTION_only_migratable: > /* --
Commit 18269069c310 ("migration: Introduce ignore-shared capability")
addes ignore-shared capability to bypass the shared ramblock (e,g,
membackend + numa node). It does good to live migration.
This commit expectes that QEMU doesn't write to guest RAM until
VM starts, but it does on aarch64 qemu:
Backtrace:
1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458
2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479
3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101
4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69
5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675
6 0x000055f4a2c9851d in main () at vl.c:4552
Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram
during rom_reset. In ignore-shared incoming case, this rom filling
is not required since all the data has been stored in memory backend file.
Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability")
Signed-off-by: Catherine Ho <catherine.hecx@gmail.com>
Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru>
---
hw/core/loader.c | 15 +++++++++++++++
include/exec/cpu-common.h | 1 +
migration/ram.c | 2 +-
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/hw/core/loader.c b/hw/core/loader.c
index fe5cb24122..861a03335b 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -53,6 +53,7 @@
#include "hw/nvram/fw_cfg.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
+#include "exec/cpu-common.h"
#include "hw/boards.h"
#include "qemu/cutils.h"
@@ -1086,6 +1087,9 @@ int rom_add_option(const char *file, int32_t bootindex)
static void rom_reset(void *unused)
{
Rom *rom;
+ MemoryRegion *mr;
+ hwaddr hw_addr;
+ hwaddr l;
QTAILQ_FOREACH(rom, &roms, next) {
if (rom->fw_file) {
@@ -1094,6 +1098,17 @@ static void rom_reset(void *unused)
if (rom->data == NULL) {
continue;
}
+
+ /* bypass the rom blob in ignore-shared migration case*/
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ rcu_read_lock();
+ mr = address_space_translate(rom->as, rom->addr, &hw_addr, &l,
+ true, MEMTXATTRS_UNSPECIFIED);
+ rcu_read_unlock();
+ if (mr->ram_block != NULL && ramblock_is_ignored(mr->ram_block))
+ continue;
+ }
+
if (rom->mr) {
void *host = memory_region_get_ram_ptr(rom->mr);
memcpy(host, rom->data, rom->datasize);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index cef8b88a2a..c80b7248a6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb);
ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
bool qemu_ram_is_shared(RAMBlock *rb);
+bool ramblock_is_ignored(RAMBlock *block);
bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
void qemu_ram_set_uf_zeroable(RAMBlock *rb);
bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/migration/ram.c b/migration/ram.c
index 35bd6213e9..d6de9d335d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,7 +159,7 @@ out:
return ret;
}
-static bool ramblock_is_ignored(RAMBlock *block)
+bool ramblock_is_ignored(RAMBlock *block)
{
return !qemu_ram_is_migratable(block) ||
(migrate_ignore_shared() && qemu_ram_is_shared(block));
--
2.17.1
On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > addes ignore-shared capability to bypass the shared ramblock (e,g, > membackend + numa node). It does good to live migration. > > This commit expectes that QEMU doesn't write to guest RAM until > VM starts, but it does on aarch64 qemu: > Backtrace: > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at exec.c:3458 > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram > during rom_reset. In ignore-shared incoming case, this rom filling > is not required since all the data has been stored in memory backend file. > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared capability") > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> (note: IIUC normally you should have your signed-off to be the last line before the suggested-by :) About the patch content, I have had a question on whether we should need to check ignore-shared at all... That question lies in: https://patchwork.kernel.org/patch/10859889/#22546487 And if my understanding was correct above, IMHO the patch could be as simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] below. Thanks, > --- > hw/core/loader.c | 15 +++++++++++++++ > include/exec/cpu-common.h | 1 + > migration/ram.c | 2 +- > 3 files changed, 17 insertions(+), 1 deletion(-) > > diff --git a/hw/core/loader.c b/hw/core/loader.c > index fe5cb24122..861a03335b 100644 > --- a/hw/core/loader.c > +++ b/hw/core/loader.c > @@ -53,6 +53,7 @@ > #include "hw/nvram/fw_cfg.h" > #include "exec/memory.h" > #include "exec/address-spaces.h" > +#include "exec/cpu-common.h" > #include "hw/boards.h" > #include "qemu/cutils.h" > > @@ -1086,6 +1087,9 @@ int rom_add_option(const char *file, int32_t bootindex) > static void rom_reset(void *unused) > { > Rom *rom; > + MemoryRegion *mr; > + hwaddr hw_addr; > + hwaddr l; [1] > > QTAILQ_FOREACH(rom, &roms, next) { > if (rom->fw_file) { > @@ -1094,6 +1098,17 @@ static void rom_reset(void *unused) > if (rom->data == NULL) { > continue; > } > + > + /* bypass the rom blob in ignore-shared migration case*/ > + if (runstate_check(RUN_STATE_INMIGRATE)) { > + rcu_read_lock(); > + mr = address_space_translate(rom->as, rom->addr, &hw_addr, &l, > + true, MEMTXATTRS_UNSPECIFIED); > + rcu_read_unlock(); > + if (mr->ram_block != NULL && ramblock_is_ignored(mr->ram_block)) > + continue; > + } > + > if (rom->mr) { > void *host = memory_region_get_ram_ptr(rom->mr); > memcpy(host, rom->data, rom->datasize); > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index cef8b88a2a..c80b7248a6 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb); > ram_addr_t qemu_ram_get_offset(RAMBlock *rb); > ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); > bool qemu_ram_is_shared(RAMBlock *rb); > +bool ramblock_is_ignored(RAMBlock *block); > bool qemu_ram_is_uf_zeroable(RAMBlock *rb); > void qemu_ram_set_uf_zeroable(RAMBlock *rb); > bool qemu_ram_is_migratable(RAMBlock *rb); > diff --git a/migration/ram.c b/migration/ram.c > index 35bd6213e9..d6de9d335d 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -159,7 +159,7 @@ out: > return ret; > } > > -static bool ramblock_is_ignored(RAMBlock *block) > +bool ramblock_is_ignored(RAMBlock *block) > { > return !qemu_ram_is_migratable(block) || > (migrate_ignore_shared() && qemu_ram_is_shared(block)); > -- > 2.17.1 > Regards, -- Peter Xu
Hi Peter Xu On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > membackend + numa node). It does good to live migration. > > > > This commit expectes that QEMU doesn't write to guest RAM until > > VM starts, but it does on aarch64 qemu: > > Backtrace: > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > exec.c:3458 > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram > > during rom_reset. In ignore-shared incoming case, this rom filling > > is not required since all the data has been stored in memory backend > file. > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > capability") > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > (note: IIUC normally you should have your signed-off to be the last > line before the suggested-by :) > > About the patch content, I have had a question on whether we should > need to check ignore-shared at all... That question lies in: > > https://patchwork.kernel.org/patch/10859889/#22546487 > > And if my understanding was correct above, IMHO the patch could be as > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > below. > > Thanks, but I thought this method would break the x86 rom_reset logic during RUN_STATE_INMIGRATE. Please see the debugging patch and log lines below: diff --git a/hw/core/loader.c b/hw/core/loader.c index fe5cb24122..b0c871af26 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t bootindex) static void rom_reset(void *unused) { Rom *rom; - QTAILQ_FOREACH(rom, &roms, next) { + if (runstate_check(RUN_STATE_INMIGRATE)) + printf("rom name=%s\n",rom->name); if (rom->fw_file) { continue; } rom name=kvmvapic.bin rom name=linuxboot_dma.bin rom name=bios-256k.bin rom name=etc/acpi/tables rom name=etc/table-loader rom name=etc/acpi/rsdp B.R. Catherine > Thanks, > > > --- > > hw/core/loader.c | 15 +++++++++++++++ > > include/exec/cpu-common.h | 1 + > > migration/ram.c | 2 +- > > 3 files changed, 17 insertions(+), 1 deletion(-) > > > > diff --git a/hw/core/loader.c b/hw/core/loader.c > > index fe5cb24122..861a03335b 100644 > > --- a/hw/core/loader.c > > +++ b/hw/core/loader.c > > @@ -53,6 +53,7 @@ > > #include "hw/nvram/fw_cfg.h" > > #include "exec/memory.h" > > #include "exec/address-spaces.h" > > +#include "exec/cpu-common.h" > > #include "hw/boards.h" > > #include "qemu/cutils.h" > > > > @@ -1086,6 +1087,9 @@ int rom_add_option(const char *file, int32_t > bootindex) > > static void rom_reset(void *unused) > > { > > Rom *rom; > > + MemoryRegion *mr; > > + hwaddr hw_addr; > > + hwaddr l; > > [1] > > > > > QTAILQ_FOREACH(rom, &roms, next) { > > if (rom->fw_file) { > > @@ -1094,6 +1098,17 @@ static void rom_reset(void *unused) > > if (rom->data == NULL) { > > continue; > > } > > + > > + /* bypass the rom blob in ignore-shared migration case*/ > > + if (runstate_check(RUN_STATE_INMIGRATE)) { > > + rcu_read_lock(); > > + mr = address_space_translate(rom->as, rom->addr, &hw_addr, > &l, > > + true, MEMTXATTRS_UNSPECIFIED); > > + rcu_read_unlock(); > > + if (mr->ram_block != NULL && > ramblock_is_ignored(mr->ram_block)) > > + continue; > > + } > > + > > if (rom->mr) { > > void *host = memory_region_get_ram_ptr(rom->mr); > > memcpy(host, rom->data, rom->datasize); > > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > > index cef8b88a2a..c80b7248a6 100644 > > --- a/include/exec/cpu-common.h > > +++ b/include/exec/cpu-common.h > > @@ -76,6 +76,7 @@ void *qemu_ram_get_host_addr(RAMBlock *rb); > > ram_addr_t qemu_ram_get_offset(RAMBlock *rb); > > ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); > > bool qemu_ram_is_shared(RAMBlock *rb); > > +bool ramblock_is_ignored(RAMBlock *block); > > bool qemu_ram_is_uf_zeroable(RAMBlock *rb); > > void qemu_ram_set_uf_zeroable(RAMBlock *rb); > > bool qemu_ram_is_migratable(RAMBlock *rb); > > diff --git a/migration/ram.c b/migration/ram.c > > index 35bd6213e9..d6de9d335d 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -159,7 +159,7 @@ out: > > return ret; > > } > > > > -static bool ramblock_is_ignored(RAMBlock *block) > > +bool ramblock_is_ignored(RAMBlock *block) > > { > > return !qemu_ram_is_migratable(block) || > > (migrate_ignore_shared() && qemu_ram_is_shared(block)); > > -- > > 2.17.1 > > > > Regards, > > -- > Peter Xu >
On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote: > Hi Peter Xu > > On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > > membackend + numa node). It does good to live migration. > > > > > > This commit expectes that QEMU doesn't write to guest RAM until > > > VM starts, but it does on aarch64 qemu: > > > Backtrace: > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > > exec.c:3458 > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into ram > > > during rom_reset. In ignore-shared incoming case, this rom filling > > > is not required since all the data has been stored in memory backend > > file. > > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > > capability") > > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > > > (note: IIUC normally you should have your signed-off to be the last > > line before the suggested-by :) > > > > About the patch content, I have had a question on whether we should > > need to check ignore-shared at all... That question lies in: > > > > https://patchwork.kernel.org/patch/10859889/#22546487 > > > > And if my understanding was correct above, IMHO the patch could be as > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > > below. > > > > > Thanks, but I thought this method would break the x86 rom_reset logic during > RUN_STATE_INMIGRATE. > Please see the debugging patch and log lines below: > diff --git a/hw/core/loader.c b/hw/core/loader.c > index fe5cb24122..b0c871af26 100644 > --- a/hw/core/loader.c > +++ b/hw/core/loader.c > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t > bootindex) > static void rom_reset(void *unused) > { > Rom *rom; > - > QTAILQ_FOREACH(rom, &roms, next) { > + if (runstate_check(RUN_STATE_INMIGRATE)) > + printf("rom name=%s\n",rom->name); > if (rom->fw_file) { > continue; > } > > rom name=kvmvapic.bin > rom name=linuxboot_dma.bin > rom name=bios-256k.bin > rom name=etc/acpi/tables > rom name=etc/table-loader > rom name=etc/acpi/rsdp Hi, Catherine, I only see that rom names were dumped. Could you help explain what is broken? Thanks, -- Peter Xu
Hi Peter Xu On Thu, 4 Apr 2019 at 12:25, Peter Xu <peterx@redhat.com> wrote: > On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote: > > Hi Peter Xu > > > > On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > > > > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > > > membackend + numa node). It does good to live migration. > > > > > > > > This commit expectes that QEMU doesn't write to guest RAM until > > > > VM starts, but it does on aarch64 qemu: > > > > Backtrace: > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > > > exec.c:3458 > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into > ram > > > > during rom_reset. In ignore-shared incoming case, this rom filling > > > > is not required since all the data has been stored in memory backend > > > file. > > > > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > > > capability") > > > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > > > > > (note: IIUC normally you should have your signed-off to be the last > > > line before the suggested-by :) > > > > > > About the patch content, I have had a question on whether we should > > > need to check ignore-shared at all... That question lies in: > > > > > > https://patchwork.kernel.org/patch/10859889/#22546487 > > > > > > And if my understanding was correct above, IMHO the patch could be as > > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > > > below. > > > > > > > > Thanks, but I thought this method would break the x86 rom_reset logic > during > > RUN_STATE_INMIGRATE. > > Please see the debugging patch and log lines below: > > diff --git a/hw/core/loader.c b/hw/core/loader.c > > index fe5cb24122..b0c871af26 100644 > > --- a/hw/core/loader.c > > +++ b/hw/core/loader.c > > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t > > bootindex) > > static void rom_reset(void *unused) > > { > > Rom *rom; > > - > > QTAILQ_FOREACH(rom, &roms, next) { > > + if (runstate_check(RUN_STATE_INMIGRATE)) > > + printf("rom name=%s\n",rom->name); > > if (rom->fw_file) { > > continue; > > } > > > > rom name=kvmvapic.bin > > rom name=linuxboot_dma.bin > > rom name=bios-256k.bin > > rom name=etc/acpi/tables > > rom name=etc/table-loader > > rom name=etc/acpi/rsdp > > Hi, Catherine, > > I only see that rom names were dumped. Could you help explain what is > broken? Thanks, > Thanks for the suggestion. I haven't seen any obvious errors on x86 with this change. I merely consider not to change the old code logic too much. Ok, I will change it as you suggested if no more comments. B.R. Catherine > > -- > Peter Xu >
On Thu, Apr 04, 2019 at 03:17:41PM +0800, Catherine Ho wrote: > Hi Peter Xu > > On Thu, 4 Apr 2019 at 12:25, Peter Xu <peterx@redhat.com> wrote: > > > On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote: > > > Hi Peter Xu > > > > > > On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > > > > > > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > > > > membackend + numa node). It does good to live migration. > > > > > > > > > > This commit expectes that QEMU doesn't write to guest RAM until > > > > > VM starts, but it does on aarch64 qemu: > > > > > Backtrace: > > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > > > > exec.c:3458 > > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into > > ram > > > > > during rom_reset. In ignore-shared incoming case, this rom filling > > > > > is not required since all the data has been stored in memory backend > > > > file. > > > > > > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > > > > capability") > > > > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > > > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > > > > > > > (note: IIUC normally you should have your signed-off to be the last > > > > line before the suggested-by :) > > > > > > > > About the patch content, I have had a question on whether we should > > > > need to check ignore-shared at all... That question lies in: > > > > > > > > https://patchwork.kernel.org/patch/10859889/#22546487 > > > > > > > > And if my understanding was correct above, IMHO the patch could be as > > > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > > > > below. > > > > > > > > > > > Thanks, but I thought this method would break the x86 rom_reset logic > > during > > > RUN_STATE_INMIGRATE. > > > Please see the debugging patch and log lines below: > > > diff --git a/hw/core/loader.c b/hw/core/loader.c > > > index fe5cb24122..b0c871af26 100644 > > > --- a/hw/core/loader.c > > > +++ b/hw/core/loader.c > > > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t > > > bootindex) > > > static void rom_reset(void *unused) > > > { > > > Rom *rom; > > > - > > > QTAILQ_FOREACH(rom, &roms, next) { > > > + if (runstate_check(RUN_STATE_INMIGRATE)) > > > + printf("rom name=%s\n",rom->name); > > > if (rom->fw_file) { > > > continue; > > > } > > > > > > rom name=kvmvapic.bin > > > rom name=linuxboot_dma.bin > > > rom name=bios-256k.bin > > > rom name=etc/acpi/tables > > > rom name=etc/table-loader > > > rom name=etc/acpi/rsdp > > > > Hi, Catherine, > > > > I only see that rom names were dumped. Could you help explain what is > > broken? Thanks, > > > Thanks for the suggestion. I haven't seen any obvious errors on x86 with > this change. > I merely consider not to change the old code logic too much. > Ok, I will change it as you suggested if no more comments. Yeah let's see whether others would have any opinion on that since so far I don't see a problem (after all those ROM data should be migrated from source later on no matter whether they were modified on source). Regards, -- Peter Xu
Hi Peter Xu On Thu, 4 Apr 2019 at 12:25, Peter Xu <peterx@redhat.com> wrote: > On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote: > > Hi Peter Xu > > > > On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > > > > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > > > membackend + numa node). It does good to live migration. > > > > > > > > This commit expectes that QEMU doesn't write to guest RAM until > > > > VM starts, but it does on aarch64 qemu: > > > > Backtrace: > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > > > exec.c:3458 > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into > ram > > > > during rom_reset. In ignore-shared incoming case, this rom filling > > > > is not required since all the data has been stored in memory backend > > > file. > > > > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > > > capability") > > > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > > > > > (note: IIUC normally you should have your signed-off to be the last > > > line before the suggested-by :) > > > > > > About the patch content, I have had a question on whether we should > > > need to check ignore-shared at all... That question lies in: > > > > > > https://patchwork.kernel.org/patch/10859889/#22546487 > > > > > > And if my understanding was correct above, IMHO the patch could be as > > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > > > below. > > > > > > > > Thanks, but I thought this method would break the x86 rom_reset logic > during > > RUN_STATE_INMIGRATE. > > Please see the debugging patch and log lines below: > > diff --git a/hw/core/loader.c b/hw/core/loader.c > > index fe5cb24122..b0c871af26 100644 > > --- a/hw/core/loader.c > > +++ b/hw/core/loader.c > > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t > > bootindex) > > static void rom_reset(void *unused) > > { > > Rom *rom; > > - > > QTAILQ_FOREACH(rom, &roms, next) { > > + if (runstate_check(RUN_STATE_INMIGRATE)) > > + printf("rom name=%s\n",rom->name); > > if (rom->fw_file) { > > continue; > > } > > > > rom name=kvmvapic.bin > > rom name=linuxboot_dma.bin > > rom name=bios-256k.bin > > rom name=etc/acpi/tables > > rom name=etc/table-loader > > rom name=etc/acpi/rsdp > > Hi, Catherine, > > I only see that rom names were dumped. Could you help explain what is > broken? Thanks, > > Sorry, I have another concern here. What if there is no memory_backend file? If there is no memory backend file (i.e. without -object memory-backend-file,id=dimm1,size=512M,mem-path=/path/memory) Should the rom blobs still be written into ram in such case? B.R. Catherine > -- > Peter Xu >
On Thu, Apr 04, 2019 at 03:33:20PM +0800, Catherine Ho wrote: > Hi Peter Xu > > On Thu, 4 Apr 2019 at 12:25, Peter Xu <peterx@redhat.com> wrote: > > > On Wed, Apr 03, 2019 at 11:21:47PM +0800, Catherine Ho wrote: > > > Hi Peter Xu > > > > > > On Wed, 3 Apr 2019 at 10:25, Peter Xu <peterx@redhat.com> wrote: > > > > > > > On Tue, Apr 02, 2019 at 11:30:01AM -0400, Catherine Ho wrote: > > > > > Commit 18269069c310 ("migration: Introduce ignore-shared capability") > > > > > addes ignore-shared capability to bypass the shared ramblock (e,g, > > > > > membackend + numa node). It does good to live migration. > > > > > > > > > > This commit expectes that QEMU doesn't write to guest RAM until > > > > > VM starts, but it does on aarch64 qemu: > > > > > Backtrace: > > > > > 1 0x000055f4a296dd84 in address_space_write_rom_internal () at > > > > exec.c:3458 > > > > > 2 0x000055f4a296de3a in address_space_write_rom () at exec.c:3479 > > > > > 3 0x000055f4a2d519ff in rom_reset () at hw/core/loader.c:1101 > > > > > 4 0x000055f4a2d475ec in qemu_devices_reset () at hw/core/reset.c:69 > > > > > 5 0x000055f4a2c90a28 in qemu_system_reset () at vl.c:1675 > > > > > 6 0x000055f4a2c9851d in main () at vl.c:4552 > > > > > > > > > > Actually, on arm64 virt marchine, ramblock "dtb" will be filled into > > ram > > > > > during rom_reset. In ignore-shared incoming case, this rom filling > > > > > is not required since all the data has been stored in memory backend > > > > file. > > > > > > > > > > Fixes: commit 18269069c310 ("migration: Introduce ignore-shared > > > > capability") > > > > > Signed-off-by: Catherine Ho <catherine.hecx@gmail.com> > > > > > Suggested-by: Yury Kotov <yury-kotov@yandex-team.ru> > > > > > > > > (note: IIUC normally you should have your signed-off to be the last > > > > line before the suggested-by :) > > > > > > > > About the patch content, I have had a question on whether we should > > > > need to check ignore-shared at all... That question lies in: > > > > > > > > https://patchwork.kernel.org/patch/10859889/#22546487 > > > > > > > > And if my understanding was correct above, IMHO the patch could be as > > > > simply be as "if (runstate_check(RUN_STATE_INMIGRATE)) return;" at [1] > > > > below. > > > > > > > > > > > Thanks, but I thought this method would break the x86 rom_reset logic > > during > > > RUN_STATE_INMIGRATE. > > > Please see the debugging patch and log lines below: > > > diff --git a/hw/core/loader.c b/hw/core/loader.c > > > index fe5cb24122..b0c871af26 100644 > > > --- a/hw/core/loader.c > > > +++ b/hw/core/loader.c > > > @@ -1086,8 +1086,9 @@ int rom_add_option(const char *file, int32_t > > > bootindex) > > > static void rom_reset(void *unused) > > > { > > > Rom *rom; > > > - > > > QTAILQ_FOREACH(rom, &roms, next) { > > > + if (runstate_check(RUN_STATE_INMIGRATE)) > > > + printf("rom name=%s\n",rom->name); > > > if (rom->fw_file) { > > > continue; > > > } > > > > > > rom name=kvmvapic.bin > > > rom name=linuxboot_dma.bin > > > rom name=bios-256k.bin > > > rom name=etc/acpi/tables > > > rom name=etc/table-loader > > > rom name=etc/acpi/rsdp > > > > Hi, Catherine, > > > > I only see that rom names were dumped. Could you help explain what is > > broken? Thanks, > > > > Sorry, I have another concern here. What if there is no memory_backend > file? > If there is no memory backend file (i.e. without -object > memory-backend-file,id=dimm1,size=512M,mem-path=/path/memory) > > Should the rom blobs still be written into ram in such case? Please see my previous reply - I think it shouldn't be needed because we should migrate very soon after this point for those RAM. In other words, IIUC even if we do rom_reset() now with these ROMs then the RAM data should be re-filled again too with the migration stream coming in. Regards, -- Peter Xu
© 2016 - 2024 Red Hat, Inc.