i386, misc changes for QEMU 8.0 soft freeze

[PULL 32/62] hw/xen: Implement EVTCHNOP_bind_virq

Posted by Paolo Bonzini 2 years, 11 months ago

From: David Woodhouse <dwmw@amazon.co.uk>

Add the array of virq ports to each vCPU so that we can deliver timers,
debug ports, etc. Global virqs are allocated against vCPU 0 initially,
but can be migrated to other vCPUs (when we implement that).

The kernel needs to know about VIRQ_TIMER in order to accelerate timers,
so tell it via KVM_XEN_VCPU_ATTR_TYPE_TIMER. Also save/restore the value
of the singleshot timer across migration, as the kernel will handle the
hypercalls automatically now.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 hw/i386/kvm/xen_evtchn.c  | 85 ++++++++++++++++++++++++++++++++++++
 hw/i386/kvm/xen_evtchn.h  |  2 +
 include/sysemu/kvm_xen.h  |  1 +
 target/i386/cpu.h         |  4 ++
 target/i386/kvm/xen-emu.c | 91 +++++++++++++++++++++++++++++++++++++++
 target/i386/machine.c     |  2 +
 6 files changed, 185 insertions(+)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 54707f1f9fe1..a3202d39abca 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -244,6 +244,11 @@ static bool valid_port(evtchn_port_t port)
     }
 }
 
+static bool valid_vcpu(uint32_t vcpu)
+{
+    return !!qemu_get_cpu(vcpu);
+}
+
 int xen_evtchn_status_op(struct evtchn_status *status)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
@@ -496,6 +501,43 @@ static void free_port(XenEvtchnState *s, evtchn_port_t port)
     clear_port_pending(s, port);
 }
 
+static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
+                         uint16_t val, evtchn_port_t *port)
+{
+    evtchn_port_t p = 1;
+
+    for (p = 1; valid_port(p); p++) {
+        if (s->port_table[p].type == EVTCHNSTAT_closed) {
+            s->port_table[p].vcpu = vcpu;
+            s->port_table[p].type = type;
+            s->port_table[p].type_val = val;
+
+            *port = p;
+
+            if (s->nr_ports < p + 1) {
+                s->nr_ports = p + 1;
+            }
+
+            return 0;
+        }
+    }
+    return -ENOSPC;
+}
+
+static bool virq_is_global(uint32_t virq)
+{
+    switch (virq) {
+    case VIRQ_TIMER:
+    case VIRQ_DEBUG:
+    case VIRQ_XENOPROF:
+    case VIRQ_XENPMU:
+        return false;
+
+    default:
+        return true;
+    }
+}
+
 static int close_port(XenEvtchnState *s, evtchn_port_t port)
 {
     XenEvtchnPort *p = &s->port_table[port];
@@ -504,6 +546,11 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port)
     case EVTCHNSTAT_closed:
         return -ENOENT;
 
+    case EVTCHNSTAT_virq:
+        kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
+                              p->type_val, 0);
+        break;
+
     default:
         break;
     }
@@ -555,3 +602,41 @@ int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
 
     return ret;
 }
+
+int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int ret;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    if (virq->virq >= NR_VIRQS) {
+        return -EINVAL;
+    }
+
+    /* Global VIRQ must be allocated on vCPU0 first */
+    if (virq_is_global(virq->virq) && virq->vcpu != 0) {
+        return -EINVAL;
+    }
+
+    if (!valid_vcpu(virq->vcpu)) {
+        return -ENOENT;
+    }
+
+    qemu_mutex_lock(&s->port_lock);
+
+    ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
+                        &virq->port);
+    if (!ret) {
+        ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
+        if (ret) {
+            free_port(s, virq->port);
+        }
+    }
+
+    qemu_mutex_unlock(&s->port_lock);
+
+    return ret;
+}
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index 69c6b0d743ae..0ea13dda3a25 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -18,8 +18,10 @@ int xen_evtchn_set_callback_param(uint64_t param);
 struct evtchn_status;
 struct evtchn_close;
 struct evtchn_unmask;
+struct evtchn_bind_virq;
 int xen_evtchn_status_op(struct evtchn_status *status);
 int xen_evtchn_close_op(struct evtchn_close *close);
 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask);
+int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq);
 
 #endif /* QEMU_XEN_EVTCHN_H */
diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h
index 644c7d889c6b..fbb7414eb797 100644
--- a/include/sysemu/kvm_xen.h
+++ b/include/sysemu/kvm_xen.h
@@ -23,6 +23,7 @@ int kvm_xen_soft_reset(void);
 uint32_t kvm_xen_get_caps(void);
 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
 
 #define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() &           \
                                  KVM_XEN_HVM_CONFIG_ ## cap))
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1c7603221dc6..4b70257db5dd 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -27,6 +27,8 @@
 #include "qapi/qapi-types-common.h"
 #include "qemu/cpu-float.h"
 
+#define XEN_NR_VIRQS 24
+
 /* The x86 has a strong memory model with some store-after-load re-ordering */
 #define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
@@ -1806,6 +1808,8 @@ typedef struct CPUArchState {
     uint64_t xen_vcpu_time_info_gpa;
     uint64_t xen_vcpu_runstate_gpa;
     uint8_t xen_vcpu_callback_vector;
+    uint16_t xen_virq[XEN_NR_VIRQS];
+    uint64_t xen_singleshot_timer_ns;
 #endif
 #if defined(CONFIG_HVF)
     HVFX86LazyFlags hvf_lflags;
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 884e1c9c11d1..e0417f3d1359 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -355,6 +355,53 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
     }
 }
 
+static int kvm_xen_set_vcpu_timer(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    struct kvm_xen_vcpu_attr va = {
+        .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+        .u.timer.port = env->xen_virq[VIRQ_TIMER],
+        .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+        .u.timer.expires_ns = env->xen_singleshot_timer_ns,
+    };
+
+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
+}
+
+static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
+{
+    kvm_xen_set_vcpu_timer(cs);
+}
+
+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
+{
+    CPUState *cs = qemu_get_cpu(vcpu_id);
+
+    if (!cs) {
+        return -ENOENT;
+    }
+
+    /* cpu.h doesn't include the actual Xen header. */
+    qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
+
+    if (virq >= NR_VIRQS) {
+        return -EINVAL;
+    }
+
+    if (port && X86_CPU(cs)->env.xen_virq[virq]) {
+        return -EEXIST;
+    }
+
+    X86_CPU(cs)->env.xen_virq[virq] = port;
+    if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
+        async_run_on_cpu(cs, do_set_vcpu_timer_virq,
+                         RUN_ON_CPU_HOST_INT(port));
+    }
+    return 0;
+}
+
 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -387,6 +434,8 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
     env->xen_vcpu_time_info_gpa = INVALID_GPA;
     env->xen_vcpu_runstate_gpa = INVALID_GPA;
     env->xen_vcpu_callback_vector = 0;
+    env->xen_singleshot_timer_ns = 0;
+    memset(env->xen_virq, 0, sizeof(env->xen_virq));
 
     set_vcpu_info(cs, INVALID_GPA);
     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
@@ -395,6 +444,7 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
                           INVALID_GPA);
     if (kvm_xen_has_cap(EVTCHN_SEND)) {
         kvm_xen_set_vcpu_callback_vector(cs);
+        kvm_xen_set_vcpu_timer(cs);
     }
 
 }
@@ -829,6 +879,21 @@ static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
         err = xen_evtchn_unmask_op(&unmask);
         break;
     }
+    case EVTCHNOP_bind_virq: {
+        struct evtchn_bind_virq virq;
+
+        qemu_build_assert(sizeof(virq) == 12);
+        if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_evtchn_bind_virq_op(&virq);
+        if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
+            err = -EFAULT;
+        }
+        break;
+    }
     default:
         return false;
     }
@@ -1060,6 +1125,12 @@ int kvm_put_xen_state(CPUState *cs)
         }
     }
 
+    if (env->xen_virq[VIRQ_TIMER]) {
+        ret = kvm_xen_set_vcpu_timer(cs);
+        if (ret < 0) {
+            return ret;
+        }
+    }
     return 0;
 }
 
@@ -1068,6 +1139,7 @@ int kvm_get_xen_state(CPUState *cs)
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
     uint64_t gpa;
+    int ret;
 
     /*
      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
@@ -1090,5 +1162,24 @@ int kvm_get_xen_state(CPUState *cs)
         }
     }
 
+    if (!kvm_xen_has_cap(EVTCHN_SEND)) {
+        return 0;
+    }
+
+    /*
+     * If the kernel is accelerating timers, read out the current value of the
+     * singleshot timer deadline.
+     */
+    if (env->xen_virq[VIRQ_TIMER]) {
+        struct kvm_xen_vcpu_attr va = {
+            .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+        };
+        ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
+        if (ret < 0) {
+            return ret;
+        }
+        env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
+    }
+
     return 0;
 }
diff --git a/target/i386/machine.c b/target/i386/machine.c
index a4874eda9041..603a1077e34a 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1275,6 +1275,8 @@ static const VMStateDescription vmstate_xen_vcpu = {
         VMSTATE_UINT64(env.xen_vcpu_time_info_gpa, X86CPU),
         VMSTATE_UINT64(env.xen_vcpu_runstate_gpa, X86CPU),
         VMSTATE_UINT8(env.xen_vcpu_callback_vector, X86CPU),
+        VMSTATE_UINT16_ARRAY(env.xen_virq, X86CPU, XEN_NR_VIRQS),
+        VMSTATE_UINT64(env.xen_singleshot_timer_ns, X86CPU),
         VMSTATE_END_OF_LIST()
     }
 };
-- 
2.39.1

Re: [PULL 32/62] hw/xen: Implement EVTCHNOP_bind_virq

Posted by Peter Maydell 2 years, 9 months ago

On Thu, 2 Mar 2023 at 12:39, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> Add the array of virq ports to each vCPU so that we can deliver timers,
> debug ports, etc. Global virqs are allocated against vCPU 0 initially,
> but can be migrated to other vCPUs (when we implement that).
>
> The kernel needs to know about VIRQ_TIMER in order to accelerate timers,
> so tell it via KVM_XEN_VCPU_ATTR_TYPE_TIMER. Also save/restore the value
> of the singleshot timer across migration, as the kernel will handle the
> hypercalls automatically now.
>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Reviewed-by: Paul Durrant <paul@xen.org>

Hi; Coverity points out (CID 1507534) that we seem to sometimes
access env->xen_singleshot_timer_ns under the protection of
env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event())
and sometimes not (the specific case Coverity complains about is
in do_vcpu_soft_reset()). Is this a false positive, or is there
missing locking here ?

>  /* The x86 has a strong memory model with some store-after-load re-ordering */
>  #define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL & ~TCG_MO_ST_LD)
>
> @@ -1806,6 +1808,8 @@ typedef struct CPUArchState {
>      uint64_t xen_vcpu_time_info_gpa;
>      uint64_t xen_vcpu_runstate_gpa;
>      uint8_t xen_vcpu_callback_vector;
> +    uint16_t xen_virq[XEN_NR_VIRQS];
> +    uint64_t xen_singleshot_timer_ns;
>  #endif
>  #if defined(CONFIG_HVF)
>      HVFX86LazyFlags hvf_lflags;

> @@ -387,6 +434,8 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
>      env->xen_vcpu_time_info_gpa = INVALID_GPA;
>      env->xen_vcpu_runstate_gpa = INVALID_GPA;
>      env->xen_vcpu_callback_vector = 0;
> +    env->xen_singleshot_timer_ns = 0;
> +    memset(env->xen_virq, 0, sizeof(env->xen_virq));
>
>      set_vcpu_info(cs, INVALID_GPA);
>      kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
> @@ -395,6 +444,7 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
>                            INVALID_GPA);
>      if (kvm_xen_has_cap(EVTCHN_SEND)) {
>          kvm_xen_set_vcpu_callback_vector(cs);
> +        kvm_xen_set_vcpu_timer(cs);
>      }
>
>  }

thanks
-- PMM

[PATCH] i386/xen: consistent locking around Xen singleshot timers

Posted by David Woodhouse 2 years, 8 months ago

From: David Woodhouse <dwmw@amazon.co.uk>

Coverity points out (CID 1507534) that we sometimes access
env->xen_singleshot_timer_ns under the protection of
env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event()) and
sometimes not (the specific case Coverity complains about is in
do_vcpu_soft_reset()).

This isn't strictly an issue. There are two modes for the timers; if
the kernel supports the EVTCHN_SEND capability then it handles all the
timer hypercalls and delivery internally, and all we need to do is an
ioctl to get/set the next timer as part of the vCPU state. If the
kernel doesn't have that support, then we do all the emulation within
qemu, and *those* are the code paths where we actually care about the
locking.

But it doesn't hurt to be a little bit more consistent and avoid having
to explain *why* it's OK.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---

On Tue, 2023-05-09 at 15:55 +0100, Peter Maydell wrote:
> Hi; Coverity points out (CID 1507534) that we seem to sometimes
> access env->xen_singleshot_timer_ns under the protection of
> env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event())
> and sometimes not (the specific case Coverity complains about is
> in do_vcpu_soft_reset()). Is this a false positive, or is there

Thanks. As noted, I think it's harmless but it doesn't hurt to clean it
up a bit. I've pushed this to my tree at
https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv
on top of the other fixes that didn't make 8.0.

 target/i386/kvm/xen-emu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index d7c7eb8d9c..36d2dd8d02 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -43,6 +43,7 @@
 
 static void xen_vcpu_singleshot_timer_event(void *opaque);
 static void xen_vcpu_periodic_timer_event(void *opaque);
+static int vcpuop_stop_singleshot_timer(CPUState *cs);
 
 #ifdef TARGET_X86_64
 #define hypercall_compat32(longmode) (!(longmode))
@@ -483,6 +484,7 @@ static int kvm_xen_set_vcpu_timer(CPUState *cs)
 
 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 {
+    QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
     kvm_xen_set_vcpu_timer(cs);
 }
 
@@ -545,7 +547,6 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
     env->xen_vcpu_time_info_gpa = INVALID_GPA;
     env->xen_vcpu_runstate_gpa = INVALID_GPA;
     env->xen_vcpu_callback_vector = 0;
-    env->xen_singleshot_timer_ns = 0;
     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 
     set_vcpu_info(cs, INVALID_GPA);
@@ -555,8 +556,13 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
                           INVALID_GPA);
     if (kvm_xen_has_cap(EVTCHN_SEND)) {
         kvm_xen_set_vcpu_callback_vector(cs);
+
+        QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
+        env->xen_singleshot_timer_ns = 0;
         kvm_xen_set_vcpu_timer(cs);
-    }
+    } else {
+        vcpuop_stop_singleshot_timer(cs);
+    };
 
 }
 
@@ -1844,10 +1850,7 @@ int kvm_put_xen_state(CPUState *cs)
     }
 
     if (env->xen_virq[VIRQ_TIMER]) {
-        ret = kvm_xen_set_vcpu_timer(cs);
-        if (ret < 0) {
-            return ret;
-        }
+        do_set_vcpu_timer_virq(cs, RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
     }
     return 0;
 }
-- 
2.34.1

Re: [PATCH] i386/xen: consistent locking around Xen singleshot timers

Posted by Peter Maydell 2 years, 8 months ago

On Mon, 22 May 2023 at 19:52, David Woodhouse <dwmw2@infradead.org> wrote:
>
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> Coverity points out (CID 1507534) that we sometimes access
> env->xen_singleshot_timer_ns under the protection of
> env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event()) and
> sometimes not (the specific case Coverity complains about is in
> do_vcpu_soft_reset()).
>
> This isn't strictly an issue. There are two modes for the timers; if
> the kernel supports the EVTCHN_SEND capability then it handles all the
> timer hypercalls and delivery internally, and all we need to do is an
> ioctl to get/set the next timer as part of the vCPU state. If the
> kernel doesn't have that support, then we do all the emulation within
> qemu, and *those* are the code paths where we actually care about the
> locking.
>
> But it doesn't hurt to be a little bit more consistent and avoid having
> to explain *why* it's OK.
>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>

Looking a bit more closely at the Coverity lists, there's also
CID 1507968 which is for the access to env->xen_singleshot_timer_ns
in kvm_get_xen_state(), which this patch doesn't touch, I think ?

thanks
-- PMM

Re: [PATCH] i386/xen: consistent locking around Xen singleshot timers

Posted by David Woodhouse 2 years, 7 months ago

On Fri, 2023-06-02 at 17:58 +0100, Peter Maydell wrote:
> On Mon, 22 May 2023 at 19:52, David Woodhouse <dwmw2@infradead.org> wrote:
> > 
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > Coverity points out (CID 1507534) that we sometimes access
> > env->xen_singleshot_timer_ns under the protection of
> > env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event()) and
> > sometimes not (the specific case Coverity complains about is in
> > do_vcpu_soft_reset()).
> > 
> > This isn't strictly an issue. There are two modes for the timers; if
> > the kernel supports the EVTCHN_SEND capability then it handles all the
> > timer hypercalls and delivery internally, and all we need to do is an
> > ioctl to get/set the next timer as part of the vCPU state. If the
> > kernel doesn't have that support, then we do all the emulation within
> > qemu, and *those* are the code paths where we actually care about the
> > locking.
> > 
> > But it doesn't hurt to be a little bit more consistent and avoid having
> > to explain *why* it's OK.
> > 
> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> 
> Looking a bit more closely at the Coverity lists, there's also
> CID 1507968 which is for the access to env->xen_singleshot_timer_ns
> in kvm_get_xen_state(), which this patch doesn't touch, I think ?

That one is basically a false positive since it's for the case where
the kernel is handling the timers and all we do is an ioctl to read the
state. Which is *inherently* racy if any other vCPU can be running and
modifying them at the time. And doesn't have anything to race *with* if
not :)

On the other hand, the one in kvm_put_xen_state() which calls
do_set_singleshot_timer() is for the userspace case, and *that* one
probably wants locking. Again in practice it's probably never going to
have anything to race with, but that's a poor excuse. New patch
follows.

[PATCH v2] i386/xen: consistent locking around Xen singleshot timers

Posted by David Woodhouse 2 years, 7 months ago

From: David Woodhouse <dwmw@amazon.co.uk>

Coverity points out (CID 1507534, 1507968) that we sometimes access
env->xen_singleshot_timer_ns under the protection of
env->xen_timers_lock and sometimes not.

This isn't always an issue. There are two modes for the timers; if the
kernel supports the EVTCHN_SEND capability then it handles all the timer
hypercalls and delivery internally, and all we use the field for is to
get/set the timer as part of the vCPU state via an ioctl(). If the
kernel doesn't have that support, then we do all the emulation within
qemu, and *those* are the code paths where we actually care about the
locking.

But it doesn't hurt to be a little bit more consistent and avoid having
to explain *why* it's OK.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 target/i386/kvm/xen-emu.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index d7c7eb8d9c..9946ff0905 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -43,6 +43,7 @@
 
 static void xen_vcpu_singleshot_timer_event(void *opaque);
 static void xen_vcpu_periodic_timer_event(void *opaque);
+static int vcpuop_stop_singleshot_timer(CPUState *cs);
 
 #ifdef TARGET_X86_64
 #define hypercall_compat32(longmode) (!(longmode))
@@ -466,6 +467,7 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
     }
 }
 
+/* Must always be called with xen_timers_lock held */
 static int kvm_xen_set_vcpu_timer(CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -483,6 +485,7 @@ static int kvm_xen_set_vcpu_timer(CPUState *cs)
 
 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 {
+    QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
     kvm_xen_set_vcpu_timer(cs);
 }
 
@@ -545,7 +548,6 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
     env->xen_vcpu_time_info_gpa = INVALID_GPA;
     env->xen_vcpu_runstate_gpa = INVALID_GPA;
     env->xen_vcpu_callback_vector = 0;
-    env->xen_singleshot_timer_ns = 0;
     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 
     set_vcpu_info(cs, INVALID_GPA);
@@ -555,8 +557,13 @@ static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
                           INVALID_GPA);
     if (kvm_xen_has_cap(EVTCHN_SEND)) {
         kvm_xen_set_vcpu_callback_vector(cs);
+
+        QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
+        env->xen_singleshot_timer_ns = 0;
         kvm_xen_set_vcpu_timer(cs);
-    }
+    } else {
+        vcpuop_stop_singleshot_timer(cs);
+    };
 
 }
 
@@ -1059,6 +1066,10 @@ static int vcpuop_stop_periodic_timer(CPUState *target)
     return 0;
 }
 
+/*
+ * Userspace handling of timer, for older kernels.
+ * Must always be called with xen_timers_lock held.
+ */
 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
                                    bool future, bool linux_wa)
 {
@@ -1086,12 +1097,8 @@ static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
         timeout_abs = now + delta;
     }
 
-    qemu_mutex_lock(&env->xen_timers_lock);
-
     timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
     env->xen_singleshot_timer_ns = now + delta;
-
-    qemu_mutex_unlock(&env->xen_timers_lock);
     return 0;
 }
 
@@ -1115,6 +1122,7 @@ static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
         return -EFAULT;
     }
 
+    QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
     return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
                                    !!(sst.flags & VCPU_SSHOTTMR_future),
                                    false);
@@ -1141,6 +1149,7 @@ static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
     if (unlikely(timeout == 0)) {
         err = vcpuop_stop_singleshot_timer(CPU(cpu));
     } else {
+        QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
         err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
     }
     exit->u.hcall.result = err;
@@ -1826,6 +1835,7 @@ int kvm_put_xen_state(CPUState *cs)
          * If the kernel has EVTCHN_SEND support then it handles timers too,
          * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
          */
+        QEMU_LOCK_GUARD(&env->xen_timers_lock);
         if (env->xen_singleshot_timer_ns) {
             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
                                     false, false);
@@ -1844,10 +1854,7 @@ int kvm_put_xen_state(CPUState *cs)
     }
 
     if (env->xen_virq[VIRQ_TIMER]) {
-        ret = kvm_xen_set_vcpu_timer(cs);
-        if (ret < 0) {
-            return ret;
-        }
+        do_set_vcpu_timer_virq(cs, RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
     }
     return 0;
 }
@@ -1896,6 +1903,15 @@ int kvm_get_xen_state(CPUState *cs)
         if (ret < 0) {
             return ret;
         }
+
+        /*
+         * This locking is fairly pointless, and is here to appease Coverity.
+         * There is an unavoidable race condition if a different vCPU sets a
+         * timer for this vCPU after the value has been read out. But that's
+         * OK in practice because *all* the vCPUs need to be stopped before
+         * we set about migrating their state.
+         */
+        QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
     }
 
-- 
2.34.1

Re: [PATCH v2] i386/xen: consistent locking around Xen singleshot timers

Posted by Paul Durrant 2 years, 7 months ago

On 04/07/2023 16:51, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> Coverity points out (CID 1507534, 1507968) that we sometimes access
> env->xen_singleshot_timer_ns under the protection of
> env->xen_timers_lock and sometimes not.
> 
> This isn't always an issue. There are two modes for the timers; if the
> kernel supports the EVTCHN_SEND capability then it handles all the timer
> hypercalls and delivery internally, and all we use the field for is to
> get/set the timer as part of the vCPU state via an ioctl(). If the
> kernel doesn't have that support, then we do all the emulation within
> qemu, and *those* are the code paths where we actually care about the
> locking.
> 
> But it doesn't hurt to be a little bit more consistent and avoid having
> to explain *why* it's OK.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   target/i386/kvm/xen-emu.c | 36 ++++++++++++++++++++++++++----------
>   1 file changed, 26 insertions(+), 10 deletions(-)
>

Reviewed-by: Paul Durrant <paul@xen.org>

Re: [PATCH] i386/xen: consistent locking around Xen singleshot timers

Posted by Paul Durrant 2 years, 8 months ago

On 22/05/2023 19:52, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> Coverity points out (CID 1507534) that we sometimes access
> env->xen_singleshot_timer_ns under the protection of
> env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event()) and
> sometimes not (the specific case Coverity complains about is in
> do_vcpu_soft_reset()).
> 
> This isn't strictly an issue. There are two modes for the timers; if
> the kernel supports the EVTCHN_SEND capability then it handles all the
> timer hypercalls and delivery internally, and all we need to do is an
> ioctl to get/set the next timer as part of the vCPU state. If the
> kernel doesn't have that support, then we do all the emulation within
> qemu, and *those* are the code paths where we actually care about the
> locking.
> 
> But it doesn't hurt to be a little bit more consistent and avoid having
> to explain *why* it's OK.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
> 
> On Tue, 2023-05-09 at 15:55 +0100, Peter Maydell wrote:
>> Hi; Coverity points out (CID 1507534) that we seem to sometimes
>> access env->xen_singleshot_timer_ns under the protection of
>> env->xen_timers_lock (eg in xen_vcpu_singleshot_timer_event())
>> and sometimes not (the specific case Coverity complains about is
>> in do_vcpu_soft_reset()). Is this a false positive, or is there
> 
> Thanks. As noted, I think it's harmless but it doesn't hurt to clean it
> up a bit. I've pushed this to my tree at
> https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv
> on top of the other fixes that didn't make 8.0.
> 
>   target/i386/kvm/xen-emu.c | 15 +++++++++------
>   1 file changed, 9 insertions(+), 6 deletions(-)
> 

Reviewed-by: Paul Durrant <paul@xen.org>