[PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug

Salil Mehta via posted 29 patches 5 months, 2 weeks ago
There is a newer version of this series
[PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 5 months, 2 weeks ago
vCPU Hot-unplug will result in QOM CPU object unrealization which will do away
with all the vCPU thread creations, allocations, registrations that happened
as part of the realization process. This change introduces the ARM CPU unrealize
function taking care of exactly that.

Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context
is parked at the QEMU KVM layer.

Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
[VP: Identified CPU stall issue & suggested probable fix]
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
 target/arm/cpu.c       | 101 +++++++++++++++++++++++++++++++++++++++++
 target/arm/cpu.h       |  14 ++++++
 target/arm/gdbstub.c   |   6 +++
 target/arm/helper.c    |  25 ++++++++++
 target/arm/internals.h |   3 ++
 target/arm/kvm.c       |   5 ++
 6 files changed, 154 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index c92162fa97..a3dc669309 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -157,6 +157,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
     QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);
 }
 
+void arm_unregister_pre_el_change_hooks(ARMCPU *cpu)
+{
+    ARMELChangeHook *entry, *next;
+
+    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) {
+        QLIST_REMOVE(entry, node);
+        g_free(entry);
+    }
+}
+
 void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
                                  void *opaque)
 {
@@ -168,6 +178,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
     QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
 }
 
+void arm_unregister_el_change_hooks(ARMCPU *cpu)
+{
+    ARMELChangeHook *entry, *next;
+
+    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
+        QLIST_REMOVE(entry, node);
+        g_free(entry);
+    }
+}
+
 static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
 {
     /* Reset a single ARMCPRegInfo register */
@@ -2552,6 +2572,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     acc->parent_realize(dev, errp);
 }
 
+static void arm_cpu_unrealizefn(DeviceState *dev)
+{
+    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
+    ARMCPU *cpu = ARM_CPU(dev);
+    CPUARMState *env = &cpu->env;
+    CPUState *cs = CPU(dev);
+    bool has_secure;
+
+    has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
+
+    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */
+    cpu_address_space_destroy(cs, ARMASIdx_NS);
+
+    if (cpu->tag_memory != NULL) {
+        cpu_address_space_destroy(cs, ARMASIdx_TagNS);
+        if (has_secure) {
+            cpu_address_space_destroy(cs, ARMASIdx_TagS);
+        }
+    }
+
+    if (has_secure) {
+        cpu_address_space_destroy(cs, ARMASIdx_S);
+    }
+
+    destroy_cpreg_list(cpu);
+    arm_cpu_unregister_gdb_regs(cpu);
+    unregister_cp_regs_for_features(cpu);
+
+    if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+        g_free(env->sau.rbar);
+        g_free(env->sau.rlar);
+    }
+
+    if (arm_feature(env, ARM_FEATURE_PMSA) &&
+        arm_feature(env, ARM_FEATURE_V7) &&
+        cpu->pmsav7_dregion) {
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            g_free(env->pmsav8.rbar[M_REG_NS]);
+            g_free(env->pmsav8.rlar[M_REG_NS]);
+            if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+                g_free(env->pmsav8.rbar[M_REG_S]);
+                g_free(env->pmsav8.rlar[M_REG_S]);
+            }
+        } else {
+            g_free(env->pmsav7.drbar);
+            g_free(env->pmsav7.drsr);
+            g_free(env->pmsav7.dracr);
+        }
+        if (cpu->pmsav8r_hdregion) {
+            g_free(env->pmsav8.hprbar);
+            g_free(env->pmsav8.hprlar);
+        }
+    }
+
+    if (arm_feature(env, ARM_FEATURE_PMU)) {
+        if (!kvm_enabled()) {
+            arm_unregister_pre_el_change_hooks(cpu);
+            arm_unregister_el_change_hooks(cpu);
+        }
+
+#ifndef CONFIG_USER_ONLY
+        if (cpu->pmu_timer) {
+            timer_del(cpu->pmu_timer);
+        }
+#endif
+    }
+
+    cpu_remove_sync(CPU(dev));
+    acc->parent_unrealize(dev);
+
+#ifndef CONFIG_USER_ONLY
+    timer_del(cpu->gt_timer[GTIMER_PHYS]);
+    timer_del(cpu->gt_timer[GTIMER_VIRT]);
+    timer_del(cpu->gt_timer[GTIMER_HYP]);
+    timer_del(cpu->gt_timer[GTIMER_SEC]);
+    timer_del(cpu->gt_timer[GTIMER_HYPVIRT]);
+#endif
+}
+
 static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
 {
     ObjectClass *oc;
@@ -2654,6 +2753,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 
     device_class_set_parent_realize(dc, arm_cpu_realizefn,
                                     &acc->parent_realize);
+    device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn,
+                                      &acc->parent_unrealize);
 
     device_class_set_props(dc, arm_cpu_properties);
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 208c719db3..a4a7555f7e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1118,6 +1118,7 @@ struct ARMCPUClass {
 
     const ARMCPUInfo *info;
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
     ResettablePhases parent_phases;
 };
 
@@ -3228,6 +3229,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs)
  */
 void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
                                  void *opaque);
+/**
+ * arm_unregister_pre_el_change_hook:
+ * unregister all pre EL change hook functions. Generally called during
+ * unrealize'ing leg
+ */
+void arm_unregister_pre_el_change_hooks(ARMCPU *cpu);
+
 /**
  * arm_register_el_change_hook:
  * Register a hook function which will be called immediately after this
@@ -3240,6 +3248,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
  */
 void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
         *opaque);
+/**
+ * arm_unregister_el_change_hook:
+ * unregister all EL change hook functions.  Generally called during
+ * unrealize'ing leg
+ */
+void arm_unregister_el_change_hooks(ARMCPU *cpu);
 
 /**
  * arm_rebuild_hflags:
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index a3bb73cfa7..948e40b981 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -555,3 +555,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
     }
 #endif /* CONFIG_TCG */
 }
+
+void arm_cpu_unregister_gdb_regs(ARMCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    gdb_unregister_coprocessor_all(cs);
+}
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7587635960..9a2468347a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -264,6 +264,19 @@ void init_cpreg_list(ARMCPU *cpu)
     g_list_free(keys);
 }
 
+void destroy_cpreg_list(ARMCPU *cpu)
+{
+    assert(cpu->cpreg_indexes);
+    assert(cpu->cpreg_values);
+    assert(cpu->cpreg_vmstate_indexes);
+    assert(cpu->cpreg_vmstate_values);
+
+    g_free(cpu->cpreg_indexes);
+    g_free(cpu->cpreg_values);
+    g_free(cpu->cpreg_vmstate_indexes);
+    g_free(cpu->cpreg_vmstate_values);
+}
+
 static bool arm_pan_enabled(CPUARMState *env)
 {
     if (is_a64(env)) {
@@ -9987,6 +10000,18 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 #endif
 }
 
+void unregister_cp_regs_for_features(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    if (arm_feature(env, ARM_FEATURE_M)) {
+        /* M profile has no coprocessor registers */
+        return;
+    }
+
+    /* empty it all. unregister all the coprocessor registers */
+    g_hash_table_remove_all(cpu->cp_regs);
+}
+
 /*
  * Private utility function for define_one_arm_cp_reg_with_opaque():
  * add a single reginfo struct to the hash table.
diff --git a/target/arm/internals.h b/target/arm/internals.h
index ee3ebd383e..34dab0bb02 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -353,9 +353,12 @@ void arm_cpu_register(const ARMCPUInfo *info);
 void aarch64_cpu_register(const ARMCPUInfo *info);
 
 void register_cp_regs_for_features(ARMCPU *cpu);
+void unregister_cp_regs_for_features(ARMCPU *cpu);
 void init_cpreg_list(ARMCPU *cpu);
+void destroy_cpreg_list(ARMCPU *cpu);
 
 void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
+void arm_cpu_unregister_gdb_regs(ARMCPU *cpu);
 void arm_translate_init(void);
 
 void arm_restore_state_to_opc(CPUState *cs,
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 01c83c1994..1121771c4a 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1988,6 +1988,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
 int kvm_arch_destroy_vcpu(CPUState *cs)
 {
+    /* vCPUs which are yet to be realized will not have handler */
+    if (cs->thread_id) {
+        qemu_del_vm_change_state_handler(cs->vmcse);
+    }
+
     return 0;
 }
 
-- 
2.34.1
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Alex Bennée 3 months, 1 week ago
Salil Mehta <salil.mehta@huawei.com> writes:

> vCPU Hot-unplug will result in QOM CPU object unrealization which will do away
> with all the vCPU thread creations, allocations, registrations that happened
> as part of the realization process. This change introduces the ARM CPU unrealize
> function taking care of exactly that.
>
> Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context
> is parked at the QEMU KVM layer.
>
> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> [VP: Identified CPU stall issue & suggested probable fix]
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> ---
>  target/arm/cpu.c       | 101 +++++++++++++++++++++++++++++++++++++++++
>  target/arm/cpu.h       |  14 ++++++
>  target/arm/gdbstub.c   |   6 +++
>  target/arm/helper.c    |  25 ++++++++++
>  target/arm/internals.h |   3 ++
>  target/arm/kvm.c       |   5 ++
>  6 files changed, 154 insertions(+)
>
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index c92162fa97..a3dc669309 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -157,6 +157,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
>      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);
>  }
>  
> +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu)
> +{
> +    ARMELChangeHook *entry, *next;
> +
> +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) {
> +        QLIST_REMOVE(entry, node);
> +        g_free(entry);
> +    }
> +}
> +
>  void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
>                                   void *opaque)
>  {
> @@ -168,6 +178,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
>      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
>  }
>  
> +void arm_unregister_el_change_hooks(ARMCPU *cpu)
> +{
> +    ARMELChangeHook *entry, *next;
> +
> +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
> +        QLIST_REMOVE(entry, node);
> +        g_free(entry);
> +    }
> +}
> +
>  static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
>  {
>      /* Reset a single ARMCPRegInfo register */
> @@ -2552,6 +2572,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>      acc->parent_realize(dev, errp);
>  }
>  
> +static void arm_cpu_unrealizefn(DeviceState *dev)
> +{
> +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
> +    ARMCPU *cpu = ARM_CPU(dev);
> +    CPUARMState *env = &cpu->env;
> +    CPUState *cs = CPU(dev);
> +    bool has_secure;
> +
> +    has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
> +
> +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */
> +    cpu_address_space_destroy(cs, ARMASIdx_NS);

On current master this will fail:

../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
../../target/arm/cpu.c:2626:5: error: implicit declaration of function ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
 2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~
../../target/arm/cpu.c:2626:5: error: nested extern declaration of ‘cpu_address_space_destroy’ [-Werror=nested-externs]
cc1: all warnings being treated as errors

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
RE: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 3 months, 1 week ago
Hi Alex,

>  From: Alex Bennée <alex.bennee@linaro.org>
>  Sent: Friday, August 16, 2024 4:37 PM
>  To: Salil Mehta <salil.mehta@huawei.com>
>  
>  Salil Mehta <salil.mehta@huawei.com> writes:
>  
>  > vCPU Hot-unplug will result in QOM CPU object unrealization which will
>  > do away with all the vCPU thread creations, allocations, registrations
>  > that happened as part of the realization process. This change
>  > introduces the ARM CPU unrealize function taking care of exactly that.
>  >
>  > Note, initialized KVM vCPUs are not destroyed in host KVM but their
>  > Qemu context is parked at the QEMU KVM layer.
>  >
>  > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>  > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>  > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > [VP: Identified CPU stall issue & suggested probable fix]
>  > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > ---
>  >  target/arm/cpu.c       | 101
>  +++++++++++++++++++++++++++++++++++++++++
>  >  target/arm/cpu.h       |  14 ++++++
>  >  target/arm/gdbstub.c   |   6 +++
>  >  target/arm/helper.c    |  25 ++++++++++
>  >  target/arm/internals.h |   3 ++
>  >  target/arm/kvm.c       |   5 ++
>  >  6 files changed, 154 insertions(+)
>  >
>  > diff --git a/target/arm/cpu.c b/target/arm/cpu.c index
>  > c92162fa97..a3dc669309 100644
>  > --- a/target/arm/cpu.c
>  > +++ b/target/arm/cpu.c
>  > @@ -157,6 +157,16 @@ void
>  arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn
>  *hook,
>  >      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);  }
>  >
>  > +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) {
>  > +    ARMELChangeHook *entry, *next;
>  > +
>  > +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node,
>  next) {
>  > +        QLIST_REMOVE(entry, node);
>  > +        g_free(entry);
>  > +    }
>  > +}
>  > +
>  >  void arm_register_el_change_hook(ARMCPU *cpu,
>  ARMELChangeHookFn *hook,
>  >                                   void *opaque)  { @@ -168,6 +178,16
>  > @@ void arm_register_el_change_hook(ARMCPU *cpu,
>  ARMELChangeHookFn *hook,
>  >      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);  }
>  >
>  > +void arm_unregister_el_change_hooks(ARMCPU *cpu) {
>  > +    ARMELChangeHook *entry, *next;
>  > +
>  > +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
>  > +        QLIST_REMOVE(entry, node);
>  > +        g_free(entry);
>  > +    }
>  > +}
>  > +
>  >  static void cp_reg_reset(gpointer key, gpointer value, gpointer
>  > opaque)  {
>  >      /* Reset a single ARMCPRegInfo register */ @@ -2552,6 +2572,85 @@
>  > static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>  >      acc->parent_realize(dev, errp);
>  >  }
>  >
>  > +static void arm_cpu_unrealizefn(DeviceState *dev) {
>  > +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
>  > +    ARMCPU *cpu = ARM_CPU(dev);
>  > +    CPUARMState *env = &cpu->env;
>  > +    CPUState *cs = CPU(dev);
>  > +    bool has_secure;
>  > +
>  > +    has_secure = cpu->has_el3 || arm_feature(env,
>  > + ARM_FEATURE_M_SECURITY);
>  > +
>  > +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn
>  cleanly */
>  > +    cpu_address_space_destroy(cs, ARMASIdx_NS);
>  
>  On current master this will fail:
>  
>  ../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
>  ../../target/arm/cpu.c:2626:5: error: implicit declaration of function
>  ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
>   2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
>        |     ^~~~~~~~~~~~~~~~~~~~~~~~~
>  ../../target/arm/cpu.c:2626:5: error: nested extern declaration of
>  ‘cpu_address_space_destroy’ [-Werror=nested-externs]
>  cc1: all warnings being treated as errors


The current master already has arch-agnostic patch-set. I've applied the
RFC V3 to the latest and complied. I did not see this issue?

I've create a new branch for your reference.

https://github.com/salil-mehta/qemu/tree/virt-cpuhp-armv8/rfc-v4-rc4

Please let me know if this works for you?


Thanks
Salil.



>  
>  --
>  Alex Bennée
>  Virtualisation Tech Lead @ Linaro
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Gustavo Romero 2 months, 4 weeks ago
Hi Salil,

On 8/19/24 9:35 AM, Salil Mehta via wrote:
> Hi Alex,
> 
>>   From: Alex Bennée <alex.bennee@linaro.org>
>>   Sent: Friday, August 16, 2024 4:37 PM
>>   To: Salil Mehta <salil.mehta@huawei.com>
>>   
>>   Salil Mehta <salil.mehta@huawei.com> writes:
>>   
>>   > vCPU Hot-unplug will result in QOM CPU object unrealization which will
>>   > do away with all the vCPU thread creations, allocations, registrations
>>   > that happened as part of the realization process. This change
>>   > introduces the ARM CPU unrealize function taking care of exactly that.
>>   >
>>   > Note, initialized KVM vCPUs are not destroyed in host KVM but their
>>   > Qemu context is parked at the QEMU KVM layer.
>>   >
>>   > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>>   > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>>   > Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>>   > [VP: Identified CPU stall issue & suggested probable fix]
>>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>>   > ---
>>   >  target/arm/cpu.c       | 101
>>   +++++++++++++++++++++++++++++++++++++++++
>>   >  target/arm/cpu.h       |  14 ++++++
>>   >  target/arm/gdbstub.c   |   6 +++
>>   >  target/arm/helper.c    |  25 ++++++++++
>>   >  target/arm/internals.h |   3 ++
>>   >  target/arm/kvm.c       |   5 ++
>>   >  6 files changed, 154 insertions(+)
>>   >
>>   > diff --git a/target/arm/cpu.c b/target/arm/cpu.c index
>>   > c92162fa97..a3dc669309 100644
>>   > --- a/target/arm/cpu.c
>>   > +++ b/target/arm/cpu.c
>>   > @@ -157,6 +157,16 @@ void
>>   arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn
>>   *hook,
>>   >      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);  }
>>   >
>>   > +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) {
>>   > +    ARMELChangeHook *entry, *next;
>>   > +
>>   > +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node,
>>   next) {
>>   > +        QLIST_REMOVE(entry, node);
>>   > +        g_free(entry);
>>   > +    }
>>   > +}
>>   > +
>>   >  void arm_register_el_change_hook(ARMCPU *cpu,
>>   ARMELChangeHookFn *hook,
>>   >                                   void *opaque)  { @@ -168,6 +178,16
>>   > @@ void arm_register_el_change_hook(ARMCPU *cpu,
>>   ARMELChangeHookFn *hook,
>>   >      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);  }
>>   >
>>   > +void arm_unregister_el_change_hooks(ARMCPU *cpu) {
>>   > +    ARMELChangeHook *entry, *next;
>>   > +
>>   > +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
>>   > +        QLIST_REMOVE(entry, node);
>>   > +        g_free(entry);
>>   > +    }
>>   > +}
>>   > +
>>   >  static void cp_reg_reset(gpointer key, gpointer value, gpointer
>>   > opaque)  {
>>   >      /* Reset a single ARMCPRegInfo register */ @@ -2552,6 +2572,85 @@
>>   > static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>>   >      acc->parent_realize(dev, errp);
>>   >  }
>>   >
>>   > +static void arm_cpu_unrealizefn(DeviceState *dev) {
>>   > +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
>>   > +    ARMCPU *cpu = ARM_CPU(dev);
>>   > +    CPUARMState *env = &cpu->env;
>>   > +    CPUState *cs = CPU(dev);
>>   > +    bool has_secure;
>>   > +
>>   > +    has_secure = cpu->has_el3 || arm_feature(env,
>>   > + ARM_FEATURE_M_SECURITY);
>>   > +
>>   > +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn
>>   cleanly */
>>   > +    cpu_address_space_destroy(cs, ARMASIdx_NS);
>>   
>>   On current master this will fail:
>>   
>>   ../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
>>   ../../target/arm/cpu.c:2626:5: error: implicit declaration of function
>>   ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
>>    2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
>>         |     ^~~~~~~~~~~~~~~~~~~~~~~~~
>>   ../../target/arm/cpu.c:2626:5: error: nested extern declaration of
>>   ‘cpu_address_space_destroy’ [-Werror=nested-externs]
>>   cc1: all warnings being treated as errors
> 
> 
> The current master already has arch-agnostic patch-set. I've applied the
> RFC V3 to the latest and complied. I did not see this issue?
> 
> I've create a new branch for your reference.
> 
> https://github.com/salil-mehta/qemu/tree/virt-cpuhp-armv8/rfc-v4-rc4
> 
> Please let me know if this works for you?

It still happens on the new branch. You need to configure Linux user mode
to reproduce it, e.g.:

$ ../configure --target-list=aarch64-linux-user,aarch64-softmmu [...]

If you just configure the 'aarch64-softmmu' target it doesn't happen.


Cheers,
Gustavo

RE: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 2 months, 3 weeks ago
Hi Gustavo,

Sorry for the delay in reply...got pulled into something else.

>  From: Gustavo Romero <gustavo.romero@linaro.org>
>  Sent: Wednesday, August 28, 2024 9:24 PM
>  To: Salil Mehta <salil.mehta@huawei.com>; Alex Bennée
>  <alex.bennee@linaro.org>
>  
>  Hi Salil,
>  
>  On 8/19/24 9:35 AM, Salil Mehta via wrote:
>  > Hi Alex,
>  >
>  >>   From: Alex Bennée <alex.bennee@linaro.org>
>  >>   Sent: Friday, August 16, 2024 4:37 PM
>  >>   To: Salil Mehta <salil.mehta@huawei.com>
>  >>
>  >>   Salil Mehta <salil.mehta@huawei.com> writes:
>  >>
>  >>   > vCPU Hot-unplug will result in QOM CPU object unrealization which will
>  >>   > do away with all the vCPU thread creations, allocations, registrations
>  >>   > that happened as part of the realization process. This change
>  >>   > introduces the ARM CPU unrealize function taking care of exactly that.
>  >>   >
>  >>   > Note, initialized KVM vCPUs are not destroyed in host KVM but their
>  >>   > Qemu context is parked at the QEMU KVM layer.
>  >>   >
>  >>   > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>  >>   > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>  >>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  >>   > Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  >>   > [VP: Identified CPU stall issue & suggested probable fix]
>  >>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  >>   > ---
>  >>   >  target/arm/cpu.c       | 101
>  >>   +++++++++++++++++++++++++++++++++++++++++
>  >>   >  target/arm/cpu.h       |  14 ++++++
>  >>   >  target/arm/gdbstub.c   |   6 +++
>  >>   >  target/arm/helper.c    |  25 ++++++++++
>  >>   >  target/arm/internals.h |   3 ++
>  >>   >  target/arm/kvm.c       |   5 ++
>  >>   >  6 files changed, 154 insertions(+)
>  >>   >
>  >>   > diff --git a/target/arm/cpu.c b/target/arm/cpu.c index
>  >>   > c92162fa97..a3dc669309 100644
>  >>   > --- a/target/arm/cpu.c
>  >>   > +++ b/target/arm/cpu.c
>  >>   > @@ -157,6 +157,16 @@ void
>  >>   arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn
>  >>   *hook,
>  >>   >      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);  }
>  >>   >
>  >>   > +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) {
>  >>   > +    ARMELChangeHook *entry, *next;
>  >>   > +
>  >>   > +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node,
>  >>   next) {
>  >>   > +        QLIST_REMOVE(entry, node);
>  >>   > +        g_free(entry);
>  >>   > +    }
>  >>   > +}
>  >>   > +
>  >>   >  void arm_register_el_change_hook(ARMCPU *cpu,
>  >>   ARMELChangeHookFn *hook,
>  >>   >                                   void *opaque)  { @@ -168,6 +178,16
>  >>   > @@ void arm_register_el_change_hook(ARMCPU *cpu,
>  >>   ARMELChangeHookFn *hook,
>  >>   >      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);  }
>  >>   >
>  >>   > +void arm_unregister_el_change_hooks(ARMCPU *cpu) {
>  >>   > +    ARMELChangeHook *entry, *next;
>  >>   > +
>  >>   > +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node,
>  next) {
>  >>   > +        QLIST_REMOVE(entry, node);
>  >>   > +        g_free(entry);
>  >>   > +    }
>  >>   > +}
>  >>   > +
>  >>   >  static void cp_reg_reset(gpointer key, gpointer value, gpointer
>  >>   > opaque)  {
>  >>   >      /* Reset a single ARMCPRegInfo register */ @@ -2552,6 +2572,85
>  @@
>  >>   > static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>  >>   >      acc->parent_realize(dev, errp);
>  >>   >  }
>  >>   >
>  >>   > +static void arm_cpu_unrealizefn(DeviceState *dev) {
>  >>   > +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
>  >>   > +    ARMCPU *cpu = ARM_CPU(dev);
>  >>   > +    CPUARMState *env = &cpu->env;
>  >>   > +    CPUState *cs = CPU(dev);
>  >>   > +    bool has_secure;
>  >>   > +
>  >>   > +    has_secure = cpu->has_el3 || arm_feature(env,
>  >>   > + ARM_FEATURE_M_SECURITY);
>  >>   > +
>  >>   > +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn
>  >>   cleanly */
>  >>   > +    cpu_address_space_destroy(cs, ARMASIdx_NS);
>  >>
>  >>   On current master this will fail:
>  >>
>  >>   ../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
>  >>   ../../target/arm/cpu.c:2626:5: error: implicit declaration of function
>  >>   ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
>  >>    2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
>  >>         |     ^~~~~~~~~~~~~~~~~~~~~~~~~
>  >>   ../../target/arm/cpu.c:2626:5: error: nested extern declaration of
>  >>   ‘cpu_address_space_destroy’ [-Werror=nested-externs]
>  >>   cc1: all warnings being treated as errors
>  >
>  >
>  > The current master already has arch-agnostic patch-set. I've applied
>  > the RFC V3 to the latest and complied. I did not see this issue?
>  >
>  > I've create a new branch for your reference.
>  >
>  > https://github.com/salil-mehta/qemu/tree/virt-cpuhp-armv8/rfc-v4-rc4
>  >
>  > Please let me know if this works for you?
>  
>  It still happens on the new branch. You need to configure Linux user mode
>  to reproduce it, e.g.:
>  
>  $ ../configure --target-list=aarch64-linux-user,aarch64-softmmu [...]
>  
>  If you just configure the 'aarch64-softmmu' target it doesn't happen.


Aah, I see. I'll check it today. As vCPU Hotplug does not makes sense in
Qemu user-mode emulation. I think we might need to conditionally
compile certain code using !CONFIG_USER_ONLY switch.

Thanks for the clarification.

Cheers

>  
>  
>  Cheers,
>  Gustavo
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Peter Maydell 3 months, 1 week ago
On Fri, 16 Aug 2024 at 16:37, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Salil Mehta <salil.mehta@huawei.com> writes:
>
> > vCPU Hot-unplug will result in QOM CPU object unrealization which will do away
> > with all the vCPU thread creations, allocations, registrations that happened
> > as part of the realization process. This change introduces the ARM CPU unrealize
> > function taking care of exactly that.
> >
> > Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context
> > is parked at the QEMU KVM layer.
> >
> > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> > Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> > [VP: Identified CPU stall issue & suggested probable fix]
> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> > ---
> >  target/arm/cpu.c       | 101 +++++++++++++++++++++++++++++++++++++++++
> >  target/arm/cpu.h       |  14 ++++++
> >  target/arm/gdbstub.c   |   6 +++
> >  target/arm/helper.c    |  25 ++++++++++
> >  target/arm/internals.h |   3 ++
> >  target/arm/kvm.c       |   5 ++
> >  6 files changed, 154 insertions(+)
> >
> > diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> > index c92162fa97..a3dc669309 100644
> > --- a/target/arm/cpu.c
> > +++ b/target/arm/cpu.c
> > @@ -157,6 +157,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
> >      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);
> >  }
> >
> > +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu)
> > +{
> > +    ARMELChangeHook *entry, *next;
> > +
> > +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) {
> > +        QLIST_REMOVE(entry, node);
> > +        g_free(entry);
> > +    }
> > +}
> > +
> >  void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
> >                                   void *opaque)
> >  {
> > @@ -168,6 +178,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
> >      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
> >  }
> >
> > +void arm_unregister_el_change_hooks(ARMCPU *cpu)
> > +{
> > +    ARMELChangeHook *entry, *next;
> > +
> > +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) {
> > +        QLIST_REMOVE(entry, node);
> > +        g_free(entry);
> > +    }
> > +}
> > +
> >  static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
> >  {
> >      /* Reset a single ARMCPRegInfo register */
> > @@ -2552,6 +2572,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
> >      acc->parent_realize(dev, errp);
> >  }
> >
> > +static void arm_cpu_unrealizefn(DeviceState *dev)
> > +{
> > +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
> > +    ARMCPU *cpu = ARM_CPU(dev);
> > +    CPUARMState *env = &cpu->env;
> > +    CPUState *cs = CPU(dev);
> > +    bool has_secure;
> > +
> > +    has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
> > +
> > +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */
> > +    cpu_address_space_destroy(cs, ARMASIdx_NS);
>
> On current master this will fail:
>
> ../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
> ../../target/arm/cpu.c:2626:5: error: implicit declaration of function ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
>  2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
>       |     ^~~~~~~~~~~~~~~~~~~~~~~~~
> ../../target/arm/cpu.c:2626:5: error: nested extern declaration of ‘cpu_address_space_destroy’ [-Werror=nested-externs]
> cc1: all warnings being treated as errors

We shouldn't need to explicitly call cpu_address_space_destroy()
from a target-specific unrealize anyway: we can do it all
from the base class (and I think this would fix some
leaks in current code for targets that hot-unplug, though
I should check that). Otherwise you need to duplicate all
the logic for figuring out which address spaces we created
in realize, which is fragile and not necessary when all we
want to do is "delete every address space the CPU object has"
and we want to do that for every target architecture always.

-- PMM
RE: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 3 months, 1 week ago
Hi Peter,

>  From: Peter Maydell <peter.maydell@linaro.org>
>  Sent: Friday, August 16, 2024 4:51 PM
>  To: Alex Bennée <alex.bennee@linaro.org>
>  
>  On Fri, 16 Aug 2024 at 16:37, Alex Bennée <alex.bennee@linaro.org> wrote:
>  >
>  > Salil Mehta <salil.mehta@huawei.com> writes:
>  >
>  > > vCPU Hot-unplug will result in QOM CPU object unrealization which
>  > > will do away with all the vCPU thread creations, allocations,
>  > > registrations that happened as part of the realization process. This
>  > > change introduces the ARM CPU unrealize function taking care of exactly
>  that.
>  > >
>  > > Note, initialized KVM vCPUs are not destroyed in host KVM but their
>  > > Qemu context is parked at the QEMU KVM layer.
>  > >
>  > > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>  > > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>  > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > > Reported-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > > [VP: Identified CPU stall issue & suggested probable fix]
>  > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > > ---
>  > >  target/arm/cpu.c       | 101
>  +++++++++++++++++++++++++++++++++++++++++
>  > >  target/arm/cpu.h       |  14 ++++++
>  > >  target/arm/gdbstub.c   |   6 +++
>  > >  target/arm/helper.c    |  25 ++++++++++
>  > >  target/arm/internals.h |   3 ++
>  > >  target/arm/kvm.c       |   5 ++
>  > >  6 files changed, 154 insertions(+)
>  > >
>  > > diff --git a/target/arm/cpu.c b/target/arm/cpu.c index
>  > > c92162fa97..a3dc669309 100644
>  > > --- a/target/arm/cpu.c
>  > > +++ b/target/arm/cpu.c
>  > > @@ -157,6 +157,16 @@ void
>  arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn
>  *hook,
>  > >      QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node);  }
>  > >
>  > > +void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) {
>  > > +    ARMELChangeHook *entry, *next;
>  > > +
>  > > +    QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node,
>  next) {
>  > > +        QLIST_REMOVE(entry, node);
>  > > +        g_free(entry);
>  > > +    }
>  > > +}
>  > > +
>  > >  void arm_register_el_change_hook(ARMCPU *cpu,
>  ARMELChangeHookFn *hook,
>  > >                                   void *opaque)  { @@ -168,6 +178,16
>  > > @@ void arm_register_el_change_hook(ARMCPU *cpu,
>  ARMELChangeHookFn *hook,
>  > >      QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);  }
>  > >
>  > > +void arm_unregister_el_change_hooks(ARMCPU *cpu) {
>  > > +    ARMELChangeHook *entry, *next;
>  > > +
>  > > +    QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next)
>  {
>  > > +        QLIST_REMOVE(entry, node);
>  > > +        g_free(entry);
>  > > +    }
>  > > +}
>  > > +
>  > >  static void cp_reg_reset(gpointer key, gpointer value, gpointer
>  > > opaque)  {
>  > >      /* Reset a single ARMCPRegInfo register */ @@ -2552,6 +2572,85
>  > > @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>  > >      acc->parent_realize(dev, errp);  }
>  > >
>  > > +static void arm_cpu_unrealizefn(DeviceState *dev) {
>  > > +    ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
>  > > +    ARMCPU *cpu = ARM_CPU(dev);
>  > > +    CPUARMState *env = &cpu->env;
>  > > +    CPUState *cs = CPU(dev);
>  > > +    bool has_secure;
>  > > +
>  > > +    has_secure = cpu->has_el3 || arm_feature(env,
>  > > + ARM_FEATURE_M_SECURITY);
>  > > +
>  > > +    /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn
>  cleanly */
>  > > +    cpu_address_space_destroy(cs, ARMASIdx_NS);
>  >
>  > On current master this will fail:
>  >
>  > ../../target/arm/cpu.c: In function ‘arm_cpu_unrealizefn’:
>  > ../../target/arm/cpu.c:2626:5: error: implicit declaration of function
>  ‘cpu_address_space_destroy’ [-Werror=implicit-function-declaration]
>  >  2626 |     cpu_address_space_destroy(cs, ARMASIdx_NS);
>  >       |     ^~~~~~~~~~~~~~~~~~~~~~~~~
>  > ../../target/arm/cpu.c:2626:5: error: nested extern declaration of
>  > ‘cpu_address_space_destroy’ [-Werror=nested-externs]
>  > cc1: all warnings being treated as errors
>  
>  We shouldn't need to explicitly call cpu_address_space_destroy() from a
>  target-specific unrealize anyway: we can do it all from the base class (and I
>  think this would fix some leaks in current code for targets that hot-unplug,
>  though I should check that). Otherwise you need to duplicate all the logic for
>  figuring out which address spaces we created in realize, which is fragile and
>  not necessary when all we want to do is "delete every address space the
>  CPU object has"
>  and we want to do that for every target architecture always.


Agreed but I would suggest to make it optional i.e. in case architecture want
to release to from its code. It should be allowed.  This also ensures clarity of the
flows,

https://lore.kernel.org/qemu-devel/a308e1f4f06f4e3ab6ab51f353601f43@huawei.com/


Thanks
Salil.



>  
>  -- PMM
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Peter Maydell 3 months, 1 week ago
On Mon, 19 Aug 2024 at 13:58, Salil Mehta <salil.mehta@huawei.com> wrote:
>
> Hi Peter,
>
> >  From: Peter Maydell <peter.maydell@linaro.org>
> >
> >  We shouldn't need to explicitly call cpu_address_space_destroy() from a
> >  target-specific unrealize anyway: we can do it all from the base class (and I
> >  think this would fix some leaks in current code for targets that hot-unplug,
> >  though I should check that). Otherwise you need to duplicate all the logic for
> >  figuring out which address spaces we created in realize, which is fragile and
> >  not necessary when all we want to do is "delete every address space the
> >  CPU object has"
> >  and we want to do that for every target architecture always.
>
>
> Agreed but I would suggest to make it optional i.e. in case architecture want
> to release to from its code. It should be allowed.  This also ensures clarity of the
> flows,
>
> https://lore.kernel.org/qemu-devel/a308e1f4f06f4e3ab6ab51f353601f43@huawei.com/

Do you have any concrete examples where a target arch would want to
explicitly release an AS from its own code? Unless there's a
real use case for doing that, I think that "common code always
does the cleanup of the ASes, nothing else ever does" is a
simple design rule that avoids the need for target-specific code
and means we don't need complicated handling for "some of the
ASes in cpu->cpu_ases are live and some have been released":
either the CPU is realized and they're all valid, or else
we're in the process of unrealizing the CPU and we get rid of
them all at once.

thanks
-- PMM
RE: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 3 months ago
HI Peter,

>  From: Peter Maydell <peter.maydell@linaro.org>
>  Sent: Monday, August 19, 2024 2:47 PM
>  To: Salil Mehta <salil.mehta@huawei.com>
>  
>  On Mon, 19 Aug 2024 at 13:58, Salil Mehta <salil.mehta@huawei.com>
>  wrote:
>  >
>  > Hi Peter,
>  >
>  > >  From: Peter Maydell <peter.maydell@linaro.org>
>  > >
>  > >  We shouldn't need to explicitly call cpu_address_space_destroy()
>  > > from a  target-specific unrealize anyway: we can do it all from the
>  > > base class (and I  think this would fix some leaks in current code
>  > > for targets that hot-unplug,  though I should check that). Otherwise
>  > > you need to duplicate all the logic for  figuring out which address
>  > > spaces we created in realize, which is fragile and  not necessary
>  > > when all we want to do is "delete every address space the  CPU object
>  has"
>  > >  and we want to do that for every target architecture always.
>  >
>  >
>  > Agreed but I would suggest to make it optional i.e. in case
>  > architecture want to release to from its code. It should be allowed.
>  > This also ensures clarity of the flows,
>  >
>  > https://lore.kernel.org/qemu-
>  devel/a308e1f4f06f4e3ab6ab51f353601f43@hu
>  > awei.com/
>  
>  Do you have any concrete examples where a target arch would want to
>  explicitly release an AS from its own code? 


No, I don’t have but some of the reasons I thought were:

1. Order of destruction of address space. Can it be different than what will; be assumed in the loop?
2. What if something needs to be done or handled before destroying each address space?
3. the flow


Unless there's a real use case for
>  doing that, I think that "common code always does the cleanup of the ASes,
>  nothing else ever does" is a simple design rule that avoids the need for
>  target-specific code and means we don't need complicated handling for
>  "some of the ASes in cpu->cpu_ases are live and some have been
>  released":
>  either the CPU is realized and they're all valid, or else we're in the process of
>  unrealizing the CPU and we get rid of them all at once.

I don’t have hard opinions on this. You can share the patch. I'll test with my branch
of vCPU hotplug

Thanks
Salil.

>  
>  thanks
>  -- PMM
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Peter Maydell 3 months, 1 week ago
On Fri, 16 Aug 2024 at 16:50, Peter Maydell <peter.maydell@linaro.org> wrote:
> We shouldn't need to explicitly call cpu_address_space_destroy()
> from a target-specific unrealize anyway: we can do it all
> from the base class (and I think this would fix some
> leaks in current code for targets that hot-unplug, though
> I should check that). Otherwise you need to duplicate all
> the logic for figuring out which address spaces we created
> in realize, which is fragile and not necessary when all we
> want to do is "delete every address space the CPU object has"
> and we want to do that for every target architecture always.

I have a patch to do this now, but I need to test it a bit more
and confirm (or disprove) my hypothesis that we're currently
leaking memory on existing architectures with vCPU
hot-unplug before I send it out.

-- PMM
RE: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Salil Mehta via 3 months, 1 week ago
>  From: Peter Maydell <peter.maydell@linaro.org>
>  Sent: Friday, August 16, 2024 6:00 PM
>  To: Alex Bennée <alex.bennee@linaro.org>
>  
>  On Fri, 16 Aug 2024 at 16:50, Peter Maydell <peter.maydell@linaro.org>
>  wrote:
>  > We shouldn't need to explicitly call cpu_address_space_destroy() from
>  > a target-specific unrealize anyway: we can do it all from the base
>  > class (and I think this would fix some leaks in current code for
>  > targets that hot-unplug, though I should check that). Otherwise you
>  > need to duplicate all the logic for figuring out which address spaces
>  > we created in realize, which is fragile and not necessary when all we
>  > want to do is "delete every address space the CPU object has"
>  > and we want to do that for every target architecture always.
>  
>  I have a patch to do this now, but I need to test it a bit more and confirm (or
>  disprove) my hypothesis that we're currently leaking memory on existing
>  architectures with vCPU hot-unplug before I send it out.

I think you are referring to this patch?

https://lore.kernel.org/qemu-devel/20230918160257.30127-9-philmd@linaro.org/


>  
>  -- PMM
Re: [PATCH RFC V3 24/29] target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug
Posted by Peter Maydell 3 months, 1 week ago
On Mon, 19 Aug 2024 at 13:59, Salil Mehta <salil.mehta@huawei.com> wrote:
>
> >  From: Peter Maydell <peter.maydell@linaro.org>
> >  Sent: Friday, August 16, 2024 6:00 PM
> >  To: Alex Bennée <alex.bennee@linaro.org>
> >
> >  On Fri, 16 Aug 2024 at 16:50, Peter Maydell <peter.maydell@linaro.org>
> >  wrote:
> >  > We shouldn't need to explicitly call cpu_address_space_destroy() from
> >  > a target-specific unrealize anyway: we can do it all from the base
> >  > class (and I think this would fix some leaks in current code for
> >  > targets that hot-unplug, though I should check that). Otherwise you
> >  > need to duplicate all the logic for figuring out which address spaces
> >  > we created in realize, which is fragile and not necessary when all we
> >  > want to do is "delete every address space the CPU object has"
> >  > and we want to do that for every target architecture always.
> >
> >  I have a patch to do this now, but I need to test it a bit more and confirm (or
> >  disprove) my hypothesis that we're currently leaking memory on existing
> >  architectures with vCPU hot-unplug before I send it out.
>
> I think you are referring to this patch?
>
> https://lore.kernel.org/qemu-devel/20230918160257.30127-9-philmd@linaro.org/

I'd forgotten that Phil had sent that patch out. My patch
is a bit different because it refactors cpu_address_space_destroy()
into a single function that destroys all the ASes (and so we
don't for instance need cpu->cpu_ases_count any more).

-- PMM