[PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand

Greg Kurz posted 1 patch 3 years, 4 months ago
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/160528045027.804522.6161091782230763832.stgit@bahia.lan
Maintainers: "Cédric Le Goater" <clg@kaod.org>, David Gibson <david@gibson.dropbear.id.au>
hw/intc/spapr_xive_kvm.c |  141 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 127 insertions(+), 14 deletions(-)
[PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand
Posted by Greg Kurz 3 years, 4 months ago
Recent commit acbdb9956fe9 introduced a dedicated path to create
IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with
the assumption that the IPI number is equal to the vCPU id. The
latter is wrong: the guest chooses an arbitrary LISN from the
"ibm,xive-lisn-ranges" and assigns it to a target vCPU with the
H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because
IPI numbers and vCPU ids happen to match by default. This is no
longer the case though when setting the VSMT machine property to
a value that is different from (ie. bigger than) the number of
vCPUs per core (ie. -smp threads). Wrong IPIs end up being created
in KVM but the guest still uses the ones it has allocated and gets
very confused (see BugLink below).

Fix this by creating the IPI at the only place where we have
its appropriate number : when the guest allocates it with the
H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because
it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall
arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus
list. It is now used instead of vcpu_id to filter unallocated IPIs
out in xive_source_is_valid(). It also allows to only reset the
IPI on the first call to H_INT_SET_SOURCE_CONFIG.

Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load().
Masked ones will be created when the guests eventually unmask them
with H_INT_SET_SOURCE_CONFIG.

Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the other sources")
BugLink: https://bugs.launchpad.net/qemu/+bug/1900241
Cc: clg@kaod.org
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/intc/spapr_xive_kvm.c |  141 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 127 insertions(+), 14 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 66bf4c06fe55..4e5871c3aac2 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -30,6 +30,7 @@
  */
 typedef struct KVMEnabledCPU {
     unsigned long vcpu_id;
+    XiveEAS *ipi_eas;
     QLIST_ENTRY(KVMEnabledCPU) node;
 } KVMEnabledCPU;
 
@@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs)
 
     enabled_cpu = g_malloc(sizeof(*enabled_cpu));
     enabled_cpu->vcpu_id = vcpu_id;
+    enabled_cpu->ipi_eas = NULL;
     QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
 }
 
@@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
  */
 typedef struct {
     SpaprXive *xive;
+    uint32_t lisn;
     Error *err;
     int rc;
 } XiveInitIPI;
 
 static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
 {
-    unsigned long ipi = kvm_arch_vcpu_id(cs);
     XiveInitIPI *s = arg.host_ptr;
+    unsigned long ipi = s->lisn;
     uint64_t state = 0;
 
     s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
                               &state, true, &s->err);
 }
 
-static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
+static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t lisn,
+                                 Error **errp)
 {
     XiveInitIPI s = {
         .xive = xive,
         .err  = NULL,
         .rc   = 0,
+        .lisn = lisn,
     };
 
     run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
@@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
         return ret;
     }
 
-    /* Create/reset the vCPU IPI */
-    ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
     kvm_cpu_enable(tctx->cs);
     return 0;
 }
@@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
  * XIVE Interrupt Source (KVM)
  */
 
+static bool spapr_xive_is_ipi(uint32_t lisn)
+{
+    return lisn < SPAPR_XIRQ_BASE;
+}
+
+static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn)
+{
+    KVMEnabledCPU *enabled_cpu;
+
+    g_assert(spapr_xive_is_ipi(lisn));
+
+    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+        if (enabled_cpu->ipi_eas == &xive->eat[lisn]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id,
+                          Error **errp)
+{
+    KVMEnabledCPU *enabled_cpu;
+
+    g_assert(spapr_xive_is_ipi(lisn));
+
+    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+        if (enabled_cpu->vcpu_id == vcpu_id) {
+            CPUState *cs = CPU(spapr_find_cpu(vcpu_id));
+            XiveEAS *eas = &xive->eat[lisn];
+
+            /* No change ? */
+            if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) {
+                return 0;
+            }
+
+            /* XXX: abort ? */
+            if (!cs) {
+                break;
+            }
+
+            /* Create/reset the vCPU IPI */
+            int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp);
+            if (ret < 0) {
+                return ret;
+            }
+
+            enabled_cpu->ipi_eas = eas;
+            return 0;
+        }
+    }
+
+    error_setg(errp, "vCPU #%d not found", vcpu_id);
+    return -ESRCH;
+}
+
 int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
                                   Error **errp)
 {
@@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
 
     spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
 
+    if (spapr_xive_is_ipi(lisn)) {
+        /* Create the vCPU IPI */
+        int ret = kvm_ipi_enable(xive, lisn, server, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
         KVM_XIVE_SOURCE_PRIORITY_MASK;
     kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
@@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
     assert(xive->fd != -1);
 
     /*
-     * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
+     * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config()
      * and not with all sources in kvmppc_xive_source_reset()
      */
     assert(srcno >= SPAPR_XIRQ_BASE);
@@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
  * To be valid, a source must have been claimed by the machine (valid
  * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
  * have been enabled, which means the IPI has been allocated in
- * kvmppc_xive_cpu_connect().
+ * kvmppc_xive_set_source_config().
  */
 static bool xive_source_is_valid(SpaprXive *xive, int i)
 {
     return xive_eas_is_valid(&xive->eat[i]) &&
-        (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
+        (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i));
 }
 
 static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
@@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
     int i;
 
     /*
-     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
-     * connected in kvmppc_xive_cpu_connect()
+     * Skip the vCPU IPIs. These are created/reset on-demand in
+     * kvmppc_xive_set_source_config().
      */
     for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
         int ret;
@@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     }
 
     /* Restore the EAT */
-    for (i = 0; i < xive->nr_irqs; i++) {
+
+    /* IPIs are restored from the appropriate vCPU context */
+    CPU_FOREACH(cs) {
+        /*
+         * The EAT has valid entries to accomodate all possible vCPUs,
+         * but we only want to allocate in KVM the IPIs that were
+         * actually allocated before migration. Let's consider the full
+         * list of IPIs to find an EAS that matches the vCPU id.
+         *
+         * If an IPI appears unmasked in the EAT, it is a proof that the
+         * guest did successfully call H_INT_SET_SOURCE_CONFIG and we
+         * should thus create the IPI at the KVM level if the END index
+         * matches the vCPU id.
+         *
+         * If an IPI appears masked in the EAT, then we don't know exactly
+         * what happened before migration but we don't care. The IPI will
+         * be created when the guest eventually unmasks it with a subsequent
+         * call to H_INT_SET_SOURCE_CONFIG.
+         */
+        for (i = 0; i < SPAPR_XIRQ_BASE; i++) {
+            XiveEAS *eas = &xive->eat[i];
+            uint32_t end_idx;
+            uint32_t end_blk;
+            uint8_t priority;
+            uint32_t server;
+
+            if (!xive_eas_is_valid(eas)) {
+                continue;
+            }
+
+            if (xive_eas_is_masked(eas)) {
+                continue;
+            }
+
+            end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+            end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+            spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+            if (server != kvm_arch_vcpu_id(cs)) {
+                continue;
+            }
+
+            ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err);
+            if (ret < 0) {
+                goto fail;
+            }
+            break;
+        }
+    }
+
+    /* Now restore non-IPIs */
+    for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) {
         if (!xive_source_is_valid(xive, i)) {
             continue;
         }



Re: [PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand
Posted by Cédric Le Goater 3 years, 4 months ago
On 11/13/20 4:14 PM, Greg Kurz wrote:
> Recent commit acbdb9956fe9 introduced a dedicated path to create
> IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with
> the assumption that the IPI number is equal to the vCPU id. The
> latter is wrong: the guest chooses an arbitrary LISN from the
> "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the
> H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because
> IPI numbers and vCPU ids happen to match by default. This is no
> longer the case though when setting the VSMT machine property to
> a value that is different from (ie. bigger than) the number of
> vCPUs per core (ie. -smp threads). Wrong IPIs end up being created
> in KVM but the guest still uses the ones it has allocated and gets
> very confused (see BugLink below).
> 
> Fix this by creating the IPI at the only place where we have
> its appropriate number : when the guest allocates it with the
> H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because
> it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall
> arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus
> list. It is now used instead of vcpu_id to filter unallocated IPIs
> out in xive_source_is_valid(). It also allows to only reset the
> IPI on the first call to H_INT_SET_SOURCE_CONFIG.
> 
> Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load().
> Masked ones will be created when the guests eventually unmask them
> with H_INT_SET_SOURCE_CONFIG.
> 
> Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
> Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the other sources")
> BugLink: https://bugs.launchpad.net/qemu/+bug/1900241
> Cc: clg@kaod.org
> Signed-off-by: Greg Kurz <groug@kaod.org>


I am quite certain this is correct but the complexity looks like a 
potential source of bugs. So I think it is simpler to revert the 
optimization introduced by acbdb9956fe9 and find a better solution 
covering SMT also. 

Thanks,

C.



> ---
>  hw/intc/spapr_xive_kvm.c |  141 +++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 127 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 66bf4c06fe55..4e5871c3aac2 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -30,6 +30,7 @@
>   */
>  typedef struct KVMEnabledCPU {
>      unsigned long vcpu_id;
> +    XiveEAS *ipi_eas;
>      QLIST_ENTRY(KVMEnabledCPU) node;
>  } KVMEnabledCPU;
>  
> @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs)
>  
>      enabled_cpu = g_malloc(sizeof(*enabled_cpu));
>      enabled_cpu->vcpu_id = vcpu_id;
> +    enabled_cpu->ipi_eas = NULL;
>      QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
>  }
>  
> @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>   */
>  typedef struct {
>      SpaprXive *xive;
> +    uint32_t lisn;
>      Error *err;
>      int rc;
>  } XiveInitIPI;
>  
>  static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
>  {
> -    unsigned long ipi = kvm_arch_vcpu_id(cs);
>      XiveInitIPI *s = arg.host_ptr;
> +    unsigned long ipi = s->lisn;
>      uint64_t state = 0;
>  
>      s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
>                                &state, true, &s->err);
>  }
>  
> -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
> +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t lisn,
> +                                 Error **errp)
>  {
>      XiveInitIPI s = {
>          .xive = xive,
>          .err  = NULL,
>          .rc   = 0,
> +        .lisn = lisn,
>      };
>  
>      run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
> @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>          return ret;
>      }
>  
> -    /* Create/reset the vCPU IPI */
> -    ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
> -    if (ret < 0) {
> -        return ret;
> -    }
> -
>      kvm_cpu_enable(tctx->cs);
>      return 0;
>  }
> @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>   * XIVE Interrupt Source (KVM)
>   */
>  
> +static bool spapr_xive_is_ipi(uint32_t lisn)
> +{
> +    return lisn < SPAPR_XIRQ_BASE;
> +}
> +
> +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn)
> +{
> +    KVMEnabledCPU *enabled_cpu;
> +
> +    g_assert(spapr_xive_is_ipi(lisn));
> +
> +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> +        if (enabled_cpu->ipi_eas == &xive->eat[lisn]) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id,
> +                          Error **errp)
> +{
> +    KVMEnabledCPU *enabled_cpu;
> +
> +    g_assert(spapr_xive_is_ipi(lisn));
> +
> +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> +        if (enabled_cpu->vcpu_id == vcpu_id) {
> +            CPUState *cs = CPU(spapr_find_cpu(vcpu_id));
> +            XiveEAS *eas = &xive->eat[lisn];
> +
> +            /* No change ? */
> +            if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) {
> +                return 0;
> +            }
> +
> +            /* XXX: abort ? */
> +            if (!cs) {
> +                break;
> +            }
> +
> +            /* Create/reset the vCPU IPI */
> +            int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +
> +            enabled_cpu->ipi_eas = eas;
> +            return 0;
> +        }
> +    }
> +
> +    error_setg(errp, "vCPU #%d not found", vcpu_id);
> +    return -ESRCH;
> +}
> +
>  int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
>                                    Error **errp)
>  {
> @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
>  
>      spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
>  
> +    if (spapr_xive_is_ipi(lisn)) {
> +        /* Create the vCPU IPI */
> +        int ret = kvm_ipi_enable(xive, lisn, server, errp);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
>      kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
>          KVM_XIVE_SOURCE_PRIORITY_MASK;
>      kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
> @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
>      assert(xive->fd != -1);
>  
>      /*
> -     * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
> +     * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config()
>       * and not with all sources in kvmppc_xive_source_reset()
>       */
>      assert(srcno >= SPAPR_XIRQ_BASE);
> @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
>   * To be valid, a source must have been claimed by the machine (valid
>   * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
>   * have been enabled, which means the IPI has been allocated in
> - * kvmppc_xive_cpu_connect().
> + * kvmppc_xive_set_source_config().
>   */
>  static bool xive_source_is_valid(SpaprXive *xive, int i)
>  {
>      return xive_eas_is_valid(&xive->eat[i]) &&
> -        (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
> +        (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i));
>  }
>  
>  static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>      int i;
>  
>      /*
> -     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
> -     * connected in kvmppc_xive_cpu_connect()
> +     * Skip the vCPU IPIs. These are created/reset on-demand in
> +     * kvmppc_xive_set_source_config().
>       */
>      for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
>          int ret;
> @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
>      }
>  
>      /* Restore the EAT */
> -    for (i = 0; i < xive->nr_irqs; i++) {
> +
> +    /* IPIs are restored from the appropriate vCPU context */
> +    CPU_FOREACH(cs) {
> +        /*
> +         * The EAT has valid entries to accomodate all possible vCPUs,
> +         * but we only want to allocate in KVM the IPIs that were
> +         * actually allocated before migration. Let's consider the full
> +         * list of IPIs to find an EAS that matches the vCPU id.
> +         *
> +         * If an IPI appears unmasked in the EAT, it is a proof that the
> +         * guest did successfully call H_INT_SET_SOURCE_CONFIG and we
> +         * should thus create the IPI at the KVM level if the END index
> +         * matches the vCPU id.
> +         *
> +         * If an IPI appears masked in the EAT, then we don't know exactly
> +         * what happened before migration but we don't care. The IPI will
> +         * be created when the guest eventually unmasks it with a subsequent
> +         * call to H_INT_SET_SOURCE_CONFIG.
> +         */
> +        for (i = 0; i < SPAPR_XIRQ_BASE; i++) {
> +            XiveEAS *eas = &xive->eat[i];
> +            uint32_t end_idx;
> +            uint32_t end_blk;
> +            uint8_t priority;
> +            uint32_t server;
> +
> +            if (!xive_eas_is_valid(eas)) {
> +                continue;
> +            }
> +
> +            if (xive_eas_is_masked(eas)) {
> +                continue;
> +            }
> +
> +            end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> +            end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
> +            spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> +            if (server != kvm_arch_vcpu_id(cs)) {
> +                continue;
> +            }
> +
> +            ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err);
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +            break;
> +        }
> +    }
> +
> +    /* Now restore non-IPIs */
> +    for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) {
>          if (!xive_source_is_valid(xive, i)) {
>              continue;
>          }
> 
> 


Re: [PATCH for-5.2] spapr/xive: Create IPIs in KVM on demand
Posted by Greg Kurz 3 years, 4 months ago
On Mon, 16 Nov 2020 14:43:12 +0100
Cédric Le Goater <clg@kaod.org> wrote:

> On 11/13/20 4:14 PM, Greg Kurz wrote:
> > Recent commit acbdb9956fe9 introduced a dedicated path to create
> > IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with
> > the assumption that the IPI number is equal to the vCPU id. The
> > latter is wrong: the guest chooses an arbitrary LISN from the
> > "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the
> > H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because
> > IPI numbers and vCPU ids happen to match by default. This is no
> > longer the case though when setting the VSMT machine property to
> > a value that is different from (ie. bigger than) the number of
> > vCPUs per core (ie. -smp threads). Wrong IPIs end up being created
> > in KVM but the guest still uses the ones it has allocated and gets
> > very confused (see BugLink below).
> > 
> > Fix this by creating the IPI at the only place where we have
> > its appropriate number : when the guest allocates it with the
> > H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because
> > it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall
> > arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus
> > list. It is now used instead of vcpu_id to filter unallocated IPIs
> > out in xive_source_is_valid(). It also allows to only reset the
> > IPI on the first call to H_INT_SET_SOURCE_CONFIG.
> > 
> > Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load().
> > Masked ones will be created when the guests eventually unmask them
> > with H_INT_SET_SOURCE_CONFIG.
> > 
> > Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
> > Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the other sources")
> > BugLink: https://bugs.launchpad.net/qemu/+bug/1900241
> > Cc: clg@kaod.org
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> 
> 
> I am quite certain this is correct but the complexity looks like a 
> potential source of bugs. So I think it is simpler to revert the 
> optimization introduced by acbdb9956fe9 and find a better solution 
> covering SMT also. 
> 

I tend to agree. Even if I could successfully test various scenarios
around hotplug and migration, I'm really not super confident to merge
this in an RC. I'll post a series that reverts acbdb9956fe9 ASAP.

> Thanks,
> 
> C.
> 
> 
> 
> > ---
> >  hw/intc/spapr_xive_kvm.c |  141 +++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 127 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > index 66bf4c06fe55..4e5871c3aac2 100644
> > --- a/hw/intc/spapr_xive_kvm.c
> > +++ b/hw/intc/spapr_xive_kvm.c
> > @@ -30,6 +30,7 @@
> >   */
> >  typedef struct KVMEnabledCPU {
> >      unsigned long vcpu_id;
> > +    XiveEAS *ipi_eas;
> >      QLIST_ENTRY(KVMEnabledCPU) node;
> >  } KVMEnabledCPU;
> >  
> > @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs)
> >  
> >      enabled_cpu = g_malloc(sizeof(*enabled_cpu));
> >      enabled_cpu->vcpu_id = vcpu_id;
> > +    enabled_cpu->ipi_eas = NULL;
> >      QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
> >  }
> >  
> > @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> >   */
> >  typedef struct {
> >      SpaprXive *xive;
> > +    uint32_t lisn;
> >      Error *err;
> >      int rc;
> >  } XiveInitIPI;
> >  
> >  static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
> >  {
> > -    unsigned long ipi = kvm_arch_vcpu_id(cs);
> >      XiveInitIPI *s = arg.host_ptr;
> > +    unsigned long ipi = s->lisn;
> >      uint64_t state = 0;
> >  
> >      s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
> >                                &state, true, &s->err);
> >  }
> >  
> > -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
> > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t lisn,
> > +                                 Error **errp)
> >  {
> >      XiveInitIPI s = {
> >          .xive = xive,
> >          .err  = NULL,
> >          .rc   = 0,
> > +        .lisn = lisn,
> >      };
> >  
> >      run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
> > @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
> >          return ret;
> >      }
> >  
> > -    /* Create/reset the vCPU IPI */
> > -    ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
> > -    if (ret < 0) {
> > -        return ret;
> > -    }
> > -
> >      kvm_cpu_enable(tctx->cs);
> >      return 0;
> >  }
> > @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
> >   * XIVE Interrupt Source (KVM)
> >   */
> >  
> > +static bool spapr_xive_is_ipi(uint32_t lisn)
> > +{
> > +    return lisn < SPAPR_XIRQ_BASE;
> > +}
> > +
> > +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn)
> > +{
> > +    KVMEnabledCPU *enabled_cpu;
> > +
> > +    g_assert(spapr_xive_is_ipi(lisn));
> > +
> > +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> > +        if (enabled_cpu->ipi_eas == &xive->eat[lisn]) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> > +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id,
> > +                          Error **errp)
> > +{
> > +    KVMEnabledCPU *enabled_cpu;
> > +
> > +    g_assert(spapr_xive_is_ipi(lisn));
> > +
> > +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> > +        if (enabled_cpu->vcpu_id == vcpu_id) {
> > +            CPUState *cs = CPU(spapr_find_cpu(vcpu_id));
> > +            XiveEAS *eas = &xive->eat[lisn];
> > +
> > +            /* No change ? */
> > +            if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) {
> > +                return 0;
> > +            }
> > +
> > +            /* XXX: abort ? */
> > +            if (!cs) {
> > +                break;
> > +            }
> > +
> > +            /* Create/reset the vCPU IPI */
> > +            int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp);
> > +            if (ret < 0) {
> > +                return ret;
> > +            }
> > +
> > +            enabled_cpu->ipi_eas = eas;
> > +            return 0;
> > +        }
> > +    }
> > +
> > +    error_setg(errp, "vCPU #%d not found", vcpu_id);
> > +    return -ESRCH;
> > +}
> > +
> >  int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
> >                                    Error **errp)
> >  {
> > @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
> >  
> >      spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> >  
> > +    if (spapr_xive_is_ipi(lisn)) {
> > +        /* Create the vCPU IPI */
> > +        int ret = kvm_ipi_enable(xive, lisn, server, errp);
> > +        if (ret < 0) {
> > +            return ret;
> > +        }
> > +    }
> > +
> >      kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
> >          KVM_XIVE_SOURCE_PRIORITY_MASK;
> >      kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
> > @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
> >      assert(xive->fd != -1);
> >  
> >      /*
> > -     * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
> > +     * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config()
> >       * and not with all sources in kvmppc_xive_source_reset()
> >       */
> >      assert(srcno >= SPAPR_XIRQ_BASE);
> > @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
> >   * To be valid, a source must have been claimed by the machine (valid
> >   * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
> >   * have been enabled, which means the IPI has been allocated in
> > - * kvmppc_xive_cpu_connect().
> > + * kvmppc_xive_set_source_config().
> >   */
> >  static bool xive_source_is_valid(SpaprXive *xive, int i)
> >  {
> >      return xive_eas_is_valid(&xive->eat[i]) &&
> > -        (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
> > +        (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i));
> >  }
> >  
> >  static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> > @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> >      int i;
> >  
> >      /*
> > -     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
> > -     * connected in kvmppc_xive_cpu_connect()
> > +     * Skip the vCPU IPIs. These are created/reset on-demand in
> > +     * kvmppc_xive_set_source_config().
> >       */
> >      for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
> >          int ret;
> > @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
> >      }
> >  
> >      /* Restore the EAT */
> > -    for (i = 0; i < xive->nr_irqs; i++) {
> > +
> > +    /* IPIs are restored from the appropriate vCPU context */
> > +    CPU_FOREACH(cs) {
> > +        /*
> > +         * The EAT has valid entries to accomodate all possible vCPUs,
> > +         * but we only want to allocate in KVM the IPIs that were
> > +         * actually allocated before migration. Let's consider the full
> > +         * list of IPIs to find an EAS that matches the vCPU id.
> > +         *
> > +         * If an IPI appears unmasked in the EAT, it is a proof that the
> > +         * guest did successfully call H_INT_SET_SOURCE_CONFIG and we
> > +         * should thus create the IPI at the KVM level if the END index
> > +         * matches the vCPU id.
> > +         *
> > +         * If an IPI appears masked in the EAT, then we don't know exactly
> > +         * what happened before migration but we don't care. The IPI will
> > +         * be created when the guest eventually unmasks it with a subsequent
> > +         * call to H_INT_SET_SOURCE_CONFIG.
> > +         */
> > +        for (i = 0; i < SPAPR_XIRQ_BASE; i++) {
> > +            XiveEAS *eas = &xive->eat[i];
> > +            uint32_t end_idx;
> > +            uint32_t end_blk;
> > +            uint8_t priority;
> > +            uint32_t server;
> > +
> > +            if (!xive_eas_is_valid(eas)) {
> > +                continue;
> > +            }
> > +
> > +            if (xive_eas_is_masked(eas)) {
> > +                continue;
> > +            }
> > +
> > +            end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> > +            end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
> > +            spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> > +            if (server != kvm_arch_vcpu_id(cs)) {
> > +                continue;
> > +            }
> > +
> > +            ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err);
> > +            if (ret < 0) {
> > +                goto fail;
> > +            }
> > +            break;
> > +        }
> > +    }
> > +
> > +    /* Now restore non-IPIs */
> > +    for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) {
> >          if (!xive_source_is_valid(xive, i)) {
> >              continue;
> >          }
> > 
> > 
>