Newer kernels sometimes revokes exposure of KVM regs to
userspace. This can happen if some registers were unconditionnally
exposed, by mistake, in previous versions and then conditionnally exposed.
Recently this happened with 3 registers: TCR2_EL1, PIRE0_EL1, PIR_EL1.
The associated kernel commit were:
0fcb4eea5345 KVM: arm64: Hide TCR2_EL1 from userspace when disabled for guests
a68cddbe47ef KVM: arm64: Hide S1PIE registers from userspace when disabled for guests
Those commits were actual fixes but the cons is that is breaks forward
miration from old to new host kernels as the number of KVM regs exposed
to userspace decreases and this leads to "failed to load cpu:cpreg_vmstate_array_len"
when migrating the CPU state.
This patchs adds a new CPU property, under the form of an array of reg
indices that teach (destination) QEMU that some registers must exist.
If they don't (because KVM does not expose them anymore), qemu fakes them but
does not care of their state.
This is meant to be applied as a cmahine type compat like:
static GlobalProperty arm_virt_kernel_compat_10_1[] = {
{ TYPE_ARM_CPU, "kvm-enforced-regs",
/* TCR_EL1, PIRE0_EL1, PIR_EL1 */
"0x603000000013c103, 0x603000000013c512, 0x603000000013c513" },
};
In case we migrate from a source where the KVM regs are not exposed and
where regs are faked to a destination where KVM expose them, this means that
anyway FEAT_TCR2 and S1PIE are not supported by the guest so it should
not matter that dumb values are written on dest.
But obviously introducing fake registers is a last resort solution.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
target/arm/cpu.h | 11 +++++++
target/arm/kvm.c | 63 +++++++++++++++++++++++++++++++++++++++--
target/arm/trace-events | 4 +++
3 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 121b4372b2..ea9045e5ff 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1035,6 +1035,17 @@ struct ArchCPU {
uint64_t *kvm_hidden_regs;
uint32_t nr_kvm_hidden_regs;
+ /*
+ * KVM registers whose presence must be enforced
+ * Either they must be exposed to user space by KVM or
+ * they must be faked for migration sake
+ */
+ uint64_t *kvm_enforced_regs;
+ uint32_t nr_kvm_enforced_regs;
+
+ /* registers among those to be enforced that are faked */
+ uint64_t *kvm_fake_regs;
+
/* Uniprocessor system with MP extensions */
bool mp_is_up;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 67675781f4..f8c36ffa2f 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -489,6 +489,11 @@ static const Property arm_cpu_kvm_compat_hidden_regs_property =
DEFINE_PROP_ARRAY("kvm-hidden-regs", ARMCPU,
nr_kvm_hidden_regs, kvm_hidden_regs, qdev_prop_uint64, uint64_t);
+static const Property arm_cpu_kvm_compat_enforced_regs_property =
+ DEFINE_PROP_ARRAY("kvm-enforced-regs", ARMCPU,
+ nr_kvm_enforced_regs, kvm_enforced_regs,
+ qdev_prop_uint64, uint64_t);
+
/* KVM VCPU properties should be prefixed with "kvm-". */
void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
{
@@ -512,6 +517,7 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
"Set off to disable KVM steal time.");
qdev_property_add_static(DEVICE(obj), &arm_cpu_kvm_compat_hidden_regs_property);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_kvm_compat_enforced_regs_property);
}
bool kvm_arm_pmu_supported(void)
@@ -772,6 +778,27 @@ static bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
}
}
+/**
+ * kvm_vcpu_compat_fake_reg:
+ * @cpu: ARMCPU
+ * @regidx: index of the register to check
+ *
+ * Depending on the CPU compat returns true if @regidx is a
+ * fake register that does not need any sync, false otherwise
+ */
+static inline bool
+kvm_vcpu_compat_fake_reg(ARMCPU *cpu, uint64_t regidx)
+{
+ for (int i = 0; i < cpu->nr_kvm_enforced_regs; i++) {
+ if (cpu->kvm_fake_regs[i] &&
+ cpu->kvm_enforced_regs[i] == regidx) {
+ trace_kvm_vcpu_compat_fake_reg(regidx);
+ return true;
+ }
+ }
+ return false;
+}
+
/**
* kvm_vcpu_compat_hidden_reg:
* @cpu: ARMCPU
@@ -806,7 +833,8 @@ static int kvm_arm_init_cpreg_list(ARMCPU *cpu)
{
struct kvm_reg_list rl;
struct kvm_reg_list *rlp;
- int i, ret, arraylen;
+ int i, ret, arraylen, rln;
+ int nr_fake_regs = 0;
CPUState *cs = CPU(cpu);
rl.n = 0;
@@ -814,12 +842,34 @@ static int kvm_arm_init_cpreg_list(ARMCPU *cpu)
if (ret != -E2BIG) {
return ret;
}
- rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
+ rln = rl.n + cpu->nr_kvm_enforced_regs;
+ rlp = g_malloc(sizeof(struct kvm_reg_list) + rln * sizeof(uint64_t));
rlp->n = rl.n;
ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
if (ret) {
goto out;
}
+
+ trace_kvm_arm_init_cpreg_list(rlp->n, cpu->nr_kvm_enforced_regs);
+
+ cpu->kvm_fake_regs = g_new0(uint64_t, cpu->nr_kvm_enforced_regs);
+
+ for (int j = 0; j < cpu->nr_kvm_enforced_regs; j++) {
+ uint64_t v64;
+ int res;
+
+ res = kvm_get_one_reg(cs, cpu->kvm_enforced_regs[j], &v64);
+
+ if (res != -ENOENT) {
+ trace_kvm_arm_init_cpreg_exposed(cpu->kvm_enforced_regs[j], v64, res);
+ continue;
+ }
+ rlp->reg[j + rl.n] = cpu->kvm_enforced_regs[j];
+ cpu->kvm_fake_regs[j] = true;
+ nr_fake_regs++;
+ }
+ rlp->n = rl.n + nr_fake_regs;
+
/* Sort the list we get back from the kernel, since cpreg_tuples
* must be in strictly ascending order.
*/
@@ -912,6 +962,10 @@ bool write_kvmstate_to_list(ARMCPU *cpu)
uint32_t v32;
int ret;
+ if (kvm_vcpu_compat_fake_reg(cpu, regidx)) {
+ continue;
+ }
+
switch (regidx & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U32:
ret = kvm_get_one_reg(cs, regidx, &v32);
@@ -947,6 +1001,10 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
continue;
}
+ if (kvm_vcpu_compat_fake_reg(cpu, regidx)) {
+ continue;
+ }
+
switch (regidx & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U32:
v32 = cpu->cpreg_values[i];
@@ -1654,6 +1712,7 @@ static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v,
s->kvm_eager_split_size = value;
}
+
void kvm_arch_accel_class_init(ObjectClass *oc)
{
object_class_property_add(oc, "eager-split-size", "size",
diff --git a/target/arm/trace-events b/target/arm/trace-events
index 1b4ab0c683..57d2b6afd4 100644
--- a/target/arm/trace-events
+++ b/target/arm/trace-events
@@ -15,3 +15,7 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d"
kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64
kvm_arm_init_cpreg_list_arraylen(uint32_t arraylen) "arraylen=%d"
kvm_vcpu_compat_hidden_reg(uint64_t regidx) "0x%"PRIx64" is hidden"
+kvm_arm_init_cpreg_list(uint32_t kvm_regs, uint32_t fake_regs) "%d regs exposed by KVM, %d enforced regs"
+kvm_vcpu_compat_fake_reg(uint64_t regidx) "0x%"PRIx64" is fake"
+kvm_arm_init_cpreg_exposed(uint64_t regidx, uint64_t val, int ret) "enforced reg 0x%"PRIx64" is already exposed by KVM: value=0x%"PRIx64 " ret=%d: nothing to do"
+
--
2.49.0
© 2016 - 2025 Red Hat, Inc.