[PULL 11/16] hw/intc/arm_gicv3_kvm: Avoid reading ICC_CTLR_EL1 from kernel in cpuif reset

Maintainers: Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>, Laurent Vivier <laurent@vivier.eu>, Peter Maydell <peter.maydell@linaro.org>, "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Jiri Pirko <jiri@resnulli.us>, Jason Wang <jasowang@redhat.com>
There is a newer version of this series
[PULL 11/16] hw/intc/arm_gicv3_kvm: Avoid reading ICC_CTLR_EL1 from kernel in cpuif reset
Posted by Peter Maydell 3 days, 20 hours ago
Currently in arm_gicv3_icc_reset() we read the kernel's value of
ICC_CTLR_EL1 as part of resetting the CPU interface.  This mostly
works, but we're actually breaking an assumption the kernel makes
that userspace only accesses the in-kernel GIC data when the VM is
totally paused, which may not be the case if a single vCPU is being
reset.  The effect is that it's possible that the read attempt
returns EBUSY.

Avoid this by reading the kernel's value of the reset ICC_CTLR_EL1
once in device realize. This brings ICC_CTLR_EL1 into line with
the other cpuif registers, where we assume we know what the kernel
is resetting them to and just update QEMU's data structures in
arm_gicv3_icc_reset().

Reviewed-by: Salil Mehta <salil.mehta@huawei.com>
Tested-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20251014102439.319915-1-peter.maydell@linaro.org
---
 include/hw/intc/arm_gicv3_common.h |  3 ++
 hw/intc/arm_gicv3_kvm.c            | 49 +++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 38aa1961c50..61d51915e07 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -166,6 +166,9 @@ struct GICv3CPUState {
     uint64_t icc_igrpen[3];
     uint64_t icc_ctlr_el3;
 
+    /* For KVM, cached copy of the kernel reset value of ICC_CTLR_EL1 */
+    uint64_t kvm_reset_icc_ctlr_el1;
+
     /* Virtualization control interface */
     uint64_t ich_apr[3][4]; /* ich_apr[GICV3_G1][x] never used */
     uint64_t ich_hcr_el2;
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 405496c7bb3..66b0dddfd4b 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -658,11 +658,24 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 
 static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    GICv3State *s;
-    GICv3CPUState *c;
+    GICv3CPUState *c = (GICv3CPUState *)env->gicv3state;
 
-    c = (GICv3CPUState *)env->gicv3state;
-    s = c->gic;
+    /*
+     * This function is called when each vcpu resets. The kernel
+     * API for the GIC assumes that it is only to be used when the
+     * whole VM is paused, so if we attempt to read the kernel's
+     * reset values here we might get EBUSY failures.
+     * So instead we assume we know what the kernel's reset values
+     * are (mostly zeroes) and only update the QEMU state struct
+     * fields. The exception is that we do need to know the kernel's
+     * idea of the ICC_CTLR_EL1 reset value, so we cache that at
+     * device realize time.
+     *
+     * This makes these sysregs different from the usual CPU ones,
+     * which can be validly read and written when only the single
+     * vcpu they apply to is paused, and where (in target/arm code)
+     * we read the reset values out of the kernel on every reset.
+     */
 
     c->icc_pmr_el1 = 0;
     /*
@@ -683,16 +696,8 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     memset(c->icc_apr, 0, sizeof(c->icc_apr));
     memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
 
-    if (s->migration_blocker) {
-        return;
-    }
-
-    /* Initialize to actual HW supported configuration */
-    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
-                      KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer),
-                      &c->icc_ctlr_el1[GICV3_NS], false, &error_abort);
-
-    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+    c->icc_ctlr_el1[GICV3_NS] = c->kvm_reset_icc_ctlr_el1;
+    c->icc_ctlr_el1[GICV3_S] = c->kvm_reset_icc_ctlr_el1;
 }
 
 static void kvm_arm_gicv3_reset_hold(Object *obj, ResetType type)
@@ -926,6 +931,22 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
                                     kvm_arm_gicv3_notifier,
                                     MIG_MODE_CPR_TRANSFER);
     }
+
+    /*
+     * Now we can read the kernel's initial value of ICC_CTLR_EL1, which
+     * we will need if a CPU interface is reset. If the kernel is ancient
+     * and doesn't support writing the GIC state then we don't need to
+     * care what reset does to QEMU's data structures.
+     */
+    if (!s->migration_blocker) {
+        for (i = 0; i < s->num_cpu; i++) {
+            GICv3CPUState *c = &s->cpu[i];
+
+            kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                              KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer),
+                              &c->kvm_reset_icc_ctlr_el1, false, &error_abort);
+        }
+    }
 }
 
 static void kvm_arm_gicv3_class_init(ObjectClass *klass, const void *data)
-- 
2.43.0