Currently when the number of KVM registers exposed by the source is
larger than the one exposed on the destination, the migration fails
with: "failed to load cpu:cpreg_vmstate_array_len"
This gives no information about which registers are causing the trouble.
This patch reworks the target/arm/machine code so that it becomes
able to handle an input stream with a larger set of registers than
the destination and print useful information about which registers
are causing the trouble. The migration outcome is unchanged:
- unexpected registers still will fail the migration
- missing ones are printed but will not fail the migration, as done today.
The input stream can contain MAX_CPREG_VMSTATE_ANOMALIES(10) extra
registers compared to what exists on the target.
If there are more registers we will still hit the previous
"load cpu:cpreg_vmstate_array_len" error.
At most, MAX_CPREG_VMSTATE_ANOMALIES missing registers
and MAX_CPREG_VMSTATE_ANOMALIES unexpected registers are printed.
Example:
qemu-system-aarch64: kvm_arm_cpu_post_load Missing register in input stream: 0 0x6030000000160003 fw feat reg 3
qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 0 0x603000000013c103 op0:3 op1:0 crn:2 crm:0 op2:3
qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 1 0x603000000013c512 op0:3 op1:0 crn:10 crm:2 op2:2
qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 2 0x603000000013c513 op0:3 op1:0 crn:10 crm:2 op2:3
qemu-system-aarch64: error while loading state for instance 0x0 of device 'cpu'
qemu-system-aarch64: load of migration failed: Operation not permitted
With TCG there is no user friendly formatting of the faulting
register indexes as with KVM. However the 2 added trace points
help to identify the culprit indexes.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
---
v2 -> v3:
- some extra typos (Connie)
- collected Connie's R-b
v1 -> v2:
- fixed some type in the commit msg
---
target/arm/cpu.h | 6 +++++
target/arm/kvm.c | 23 ++++++++++++++++
target/arm/machine.c | 58 ++++++++++++++++++++++++++++++++++++-----
target/arm/trace-events | 7 +++++
4 files changed, 88 insertions(+), 6 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1eaf5a3fddf..e900ef7937b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -939,6 +939,12 @@ struct ArchCPU {
uint64_t *cpreg_vmstate_values;
int32_t cpreg_vmstate_array_len;
+ #define MAX_CPREG_VMSTATE_ANOMALIES 10
+ uint64_t cpreg_vmstate_missing_indexes[MAX_CPREG_VMSTATE_ANOMALIES];
+ int32_t cpreg_vmstate_missing_indexes_array_len;
+ uint64_t cpreg_vmstate_unexpected_indexes[MAX_CPREG_VMSTATE_ANOMALIES];
+ int32_t cpreg_vmstate_unexpected_indexes_array_len;
+
DynamicGDBFeatureInfo dyn_sysreg_feature;
DynamicGDBFeatureInfo dyn_svereg_feature;
DynamicGDBFeatureInfo dyn_smereg_feature;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 48f853fff80..c6f0d0fc4e1 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1024,6 +1024,29 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu)
bool kvm_arm_cpu_post_load(ARMCPU *cpu)
{
+ int i;
+
+ for (i = 0; i < cpu->cpreg_vmstate_missing_indexes_array_len; i++) {
+ gchar *name;
+
+ name = kvm_print_register_name(cpu->cpreg_vmstate_missing_indexes[i]);
+ trace_kvm_arm_cpu_post_load_missing_reg(name);
+ g_free(name);
+ }
+
+ for (i = 0; i < cpu->cpreg_vmstate_unexpected_indexes_array_len; i++) {
+ gchar *name;
+
+ name = kvm_print_register_name(cpu->cpreg_vmstate_unexpected_indexes[i]);
+ error_report("%s Unexpected register in input stream: %i 0x%"PRIx64" %s",
+ __func__, i, cpu->cpreg_vmstate_unexpected_indexes[i], name);
+ g_free(name);
+ }
+ /* Fail the migration if we detect unexpected registers */
+ if (cpu->cpreg_vmstate_unexpected_indexes_array_len) {
+ return false;
+ }
+
if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) {
return false;
}
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 0befdb0b28a..f06a920aba1 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -10,6 +10,7 @@
#include "migration/vmstate.h"
#include "target/arm/gtimer.h"
#include "hw/arm/machines-qom.h"
+#include "trace.h"
static bool vfp_needed(void *opaque)
{
@@ -990,7 +991,13 @@ static int cpu_pre_load(void *opaque)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
+ int arraylen = cpu->cpreg_vmstate_array_len + MAX_CPREG_VMSTATE_ANOMALIES;
+ cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
+ arraylen);
+ cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
+ arraylen);
+ cpu->cpreg_vmstate_array_len = arraylen;
/*
* In an inbound migration where on the source FPSCR/FPSR/FPCR are 0,
* there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr()
@@ -1023,7 +1030,7 @@ static int cpu_post_load(void *opaque, int version_id)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
- int i, v;
+ int i = 0, j = 0, k = 0, v = 0;
/*
* Handle migration compatibility from old QEMU which didn't
@@ -1051,27 +1058,66 @@ static int cpu_post_load(void *opaque, int version_id)
* entries with the right slots in our own values array.
*/
- for (i = 0, v = 0; i < cpu->cpreg_array_len
- && v < cpu->cpreg_vmstate_array_len; i++) {
+ trace_cpu_post_load_len(cpu->cpreg_array_len, cpu->cpreg_vmstate_array_len);
+ for (; i < cpu->cpreg_array_len && v < cpu->cpreg_vmstate_array_len;) {
+ trace_cpu_post_load(i, v , cpu->cpreg_indexes[i]);
if (cpu->cpreg_vmstate_indexes[v] > cpu->cpreg_indexes[i]) {
/* register in our list but not incoming : skip it */
+ trace_cpu_post_load_missing(i, cpu->cpreg_indexes[i], v);
+ if (j < MAX_CPREG_VMSTATE_ANOMALIES) {
+ cpu->cpreg_vmstate_missing_indexes[j++] = cpu->cpreg_indexes[i];
+ }
+ i++;
continue;
}
if (cpu->cpreg_vmstate_indexes[v] < cpu->cpreg_indexes[i]) {
- /* register in their list but not ours: fail migration */
- return -1;
+ /* register in their list but not ours: those will fail migration */
+ trace_cpu_post_load_unexpected(v, cpu->cpreg_vmstate_indexes[v], i);
+ if (k < MAX_CPREG_VMSTATE_ANOMALIES) {
+ cpu->cpreg_vmstate_unexpected_indexes[k++] =
+ cpu->cpreg_vmstate_indexes[v];
+ }
+ v++;
+ continue;
}
/* matching register, copy the value over */
cpu->cpreg_values[i] = cpu->cpreg_vmstate_values[v];
v++;
+ i++;
}
+ /*
+ * if we have reached the end of the incoming array but there are
+ * still regs in cpreg, continue parsing the regs which are missing
+ * in the input stream
+ */
+ for ( ; i < cpu->cpreg_array_len; i++) {
+ if (j < MAX_CPREG_VMSTATE_ANOMALIES) {
+ trace_cpu_post_load_missing(i, cpu->cpreg_indexes[i], v);
+ cpu->cpreg_vmstate_missing_indexes[j++] = cpu->cpreg_indexes[i];
+ }
+ }
+ /*
+ * if we have reached the end of the cpreg array but there are
+ * still regs in the input stream, continue parsing the vmstate array
+ */
+ for ( ; v < cpu->cpreg_vmstate_array_len; v++) {
+ if (k < MAX_CPREG_VMSTATE_ANOMALIES) {
+ trace_cpu_post_load_unexpected(v, cpu->cpreg_vmstate_indexes[v], i);
+ cpu->cpreg_vmstate_unexpected_indexes[k++] =
+ cpu->cpreg_vmstate_indexes[v];
+ }
+ }
+
+ cpu->cpreg_vmstate_missing_indexes_array_len = j;
+ cpu->cpreg_vmstate_unexpected_indexes_array_len = k;
if (kvm_enabled()) {
if (!kvm_arm_cpu_post_load(cpu)) {
return -1;
}
} else {
- if (!write_list_to_cpustate(cpu)) {
+ if (cpu->cpreg_vmstate_unexpected_indexes_array_len ||
+ !write_list_to_cpustate(cpu)) {
return -1;
}
}
diff --git a/target/arm/trace-events b/target/arm/trace-events
index 676d29fe516..0a5ed3e69d5 100644
--- a/target/arm/trace-events
+++ b/target/arm/trace-events
@@ -13,6 +13,7 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d"
# kvm.c
kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64
+kvm_arm_cpu_post_load_missing_reg(char *name) "Missing register in input stream: %s"
# cpu.c
arm_cpu_reset(uint64_t mp_aff) "cpu %" PRIu64
@@ -26,3 +27,9 @@ arm_powerctl_reset_cpu(uint64_t mp_aff) "cpu %" PRIu64
# tcg/psci.c and hvf/hvf.c
arm_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpuid=0x%x"
+
+# machine.c
+cpu_post_load_len(int cpreg_array_len, int cpreg_vmstate_array_len) "cpreg_array_len=%d cpreg_vmstate_array_len=%d"
+cpu_post_load(int i, int v, uint64_t regidx) "i=%d v=%d regidx=0x%"PRIx64
+cpu_post_load_missing(int i, uint64_t regidx, int v) "missing register in input stream: i=%d index=0x%"PRIx64" (v=%d)"
+cpu_post_load_unexpected(int v, uint64_t regidx, int i) "unexpected register in input stream: v=%d index=0x%"PRIx64" (i=%d)"
--
2.52.0
On Mon, 26 Jan 2026 at 16:55, Eric Auger <eric.auger@redhat.com> wrote:
>
> Currently when the number of KVM registers exposed by the source is
> larger than the one exposed on the destination, the migration fails
> with: "failed to load cpu:cpreg_vmstate_array_len"
>
> This gives no information about which registers are causing the trouble.
>
> This patch reworks the target/arm/machine code so that it becomes
> able to handle an input stream with a larger set of registers than
> the destination and print useful information about which registers
> are causing the trouble. The migration outcome is unchanged:
> - unexpected registers still will fail the migration
> - missing ones are printed but will not fail the migration, as done today.
Improving the diagnostics here is a great idea.
> The input stream can contain MAX_CPREG_VMSTATE_ANOMALIES(10) extra
> registers compared to what exists on the target.
>
> If there are more registers we will still hit the previous
> "load cpu:cpreg_vmstate_array_len" error.
>
> At most, MAX_CPREG_VMSTATE_ANOMALIES missing registers
> and MAX_CPREG_VMSTATE_ANOMALIES unexpected registers are printed.
>
> Example:
>
> qemu-system-aarch64: kvm_arm_cpu_post_load Missing register in input stream: 0 0x6030000000160003 fw feat reg 3
> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 0 0x603000000013c103 op0:3 op1:0 crn:2 crm:0 op2:3
> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 1 0x603000000013c512 op0:3 op1:0 crn:10 crm:2 op2:2
> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input stream: 2 0x603000000013c513 op0:3 op1:0 crn:10 crm:2 op2:3
> qemu-system-aarch64: error while loading state for instance 0x0 of device 'cpu'
> qemu-system-aarch64: load of migration failed: Operation not permitted
>
> With TCG there is no user friendly formatting of the faulting
> register indexes as with KVM. However the 2 added trace points
> help to identify the culprit indexes.
Could we move kvm_print_register_name() out of kvm.c and into
somewhere that the TCG code can use it? (I did think when I
was reviewing the patch that added that that we might want it
for TCG too eventually.)
> @@ -990,7 +991,13 @@ static int cpu_pre_load(void *opaque)
> {
> ARMCPU *cpu = opaque;
> CPUARMState *env = &cpu->env;
> + int arraylen = cpu->cpreg_vmstate_array_len + MAX_CPREG_VMSTATE_ANOMALIES;
>
> + cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
> + arraylen);
> + cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
> + arraylen);
> + cpu->cpreg_vmstate_array_len = arraylen;
It seems a bit odd to extend these on cpu_pre_load, especially
since it means we'll do so on every cpu_pre_load call, which I
think can happen if you try an inbound migration, it fails, and
then you retry it.
I think it ought to be possible to both avoid this reallocation
and the problem noted in the commit message where more than 10
extra registers results in an unhelpful message, if we can
convert the vmstate fields from VMSTATE_VARRAY_INT32 to
VMSTATE_VARRAY_INT32_ALLOC. (That latter doesn't exist yet but
will be the INT32 equivalent of VMSTATE_VARRAY_UINT32_ALLOC.)
If I have read the code correctly, these should work by
having the inbound migration code allocate the buffer for the
array data instead of expecting it to be pre-allocated -- that
means our post_load function can look at all the data it got
without imposing a length limitation.
I think (but we should check :-)) that the data in the migration
stream is the same in both cases, so this will not be a compat break.
(Some existing code will need adjustment to avoid a memory leak,
e.g. g_free any existing array in pre_load.)
thanks
-- PMM
© 2016 - 2026 Red Hat, Inc.