From: Cédric Le Goater <clg@kaod.org>
Today, all the ICPs are created before the CPUs, stored in an array
under the sPAPR machine and linked to the CPU when the core threads
are realized. This modeling brings some complexity when a lookup in
the array is required and it can be simplified by allocating the ICPs
when the CPUs are.
This is the purpose of this proposal which introduces a new 'icp_type'
field under the machine and creates the ICP objects of the right type
(KVM or not) before the PowerPCCPU object are.
This change allows more cleanups : the removal of the icps array under
the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
helper.
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
hw/intc/xics.c | 11 -----------
hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
include/hw/ppc/spapr.h | 2 +-
include/hw/ppc/xics.h | 2 --
5 files changed, 29 insertions(+), 51 deletions(-)
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 56fe70c..d4428b4 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -38,17 +38,6 @@
#include "monitor/monitor.h"
#include "hw/intc/intc.h"
-int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
-{
- PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id);
-
- if (cpu) {
- return cpu->parent_obj.cpu_index;
- }
-
- return -1;
-}
-
void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 08f8615..703b14a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -104,7 +104,6 @@ static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
XICSFabric *xi = XICS_FABRIC(spapr);
Error *err = NULL, *local_err = NULL;
ICSState *ics = NULL;
- int i;
ics = ICS_SIMPLE(object_new(type_ics));
object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
@@ -113,34 +112,14 @@ static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
error_propagate(&err, local_err);
if (err) {
- goto error;
+ error_propagate(errp, err);
+ return -1;
}
- spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
spapr->nr_servers = nr_servers;
-
- for (i = 0; i < nr_servers; i++) {
- ICPState *icp = &spapr->icps[i];
-
- object_initialize(icp, sizeof(*icp), type_icp);
- object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
- object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
- object_property_set_bool(OBJECT(icp), true, "realized", &err);
- if (err) {
- goto error;
- }
- object_unref(OBJECT(icp));
- }
-
spapr->ics = ics;
+ spapr->icp_type = type_icp;
return 0;
-
-error:
- error_propagate(errp, err);
- if (ics) {
- object_unparent(OBJECT(ics));
- }
- return -1;
}
static int xics_system_init(MachineState *machine,
@@ -1441,9 +1420,10 @@ static int spapr_post_load(void *opaque, int version_id)
int err = 0;
if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
- int i;
- for (i = 0; i < spapr->nr_servers; i++) {
- icp_resend(&spapr->icps[i]);
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ icp_resend(ICP(cpu->intc));
}
}
@@ -3114,20 +3094,21 @@ static void spapr_ics_resend(XICSFabric *dev)
static ICPState *spapr_icp_get(XICSFabric *xi, int cpu_dt_id)
{
- sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
- int server = xics_get_cpu_index_by_dt_id(cpu_dt_id);
+ PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id);
- return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+ return cpu ? ICP(cpu->intc) : NULL;
}
static void spapr_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- int i;
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
- for (i = 0; i < spapr->nr_servers; i++) {
- icp_pic_print_info(&spapr->icps[i], mon);
+ icp_pic_print_info(ICP(cpu->intc), mon);
}
ics_pic_print_info(spapr->ics, mon);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4e1a995..2e689b5 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -63,8 +63,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
Error **errp)
{
CPUPPCState *env = &cpu->env;
- XICSFabric *xi = XICS_FABRIC(spapr);
- ICPState *icp = xics_icp_get(xi, cpu->cpu_dt_id);
/* Set time-base frequency to 512 MHz */
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
@@ -82,8 +80,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
}
}
- xics_cpu_setup(xi, cpu, icp);
-
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
}
@@ -143,18 +139,32 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
CPUState *cs = CPU(child);
PowerPCCPU *cpu = POWERPC_CPU(cs);
+ Object *obj;
+
+ obj = object_new(spapr->icp_type);
+ object_property_add_child(OBJECT(cpu), "icp", obj, NULL);
+ object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
object_property_set_bool(child, true, "realized", &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
spapr_cpu_init(spapr, cpu, &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
+
+ xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
}
static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e27de64..7dbba57 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -109,7 +109,7 @@ struct sPAPRMachineState {
MemoryHotplugState hotplug_memory;
uint32_t nr_servers;
- ICPState *icps;
+ const char *icp_type;
};
#define H_SUCCESS 0
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index b97d30b..b07f56f 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -172,8 +172,6 @@ void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu, ICPState *icp);
void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu);
/* Internal XICS interfaces */
-int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
-
void icp_set_cppr(ICPState *icp, uint8_t cppr);
void icp_set_mfrr(ICPState *icp, uint8_t mfrr);
uint32_t icp_accept(ICPState *ss);
--
2.9.3
On 26/04/2017 09:00, David Gibson wrote:
> From: Cédric Le Goater <clg@kaod.org>
>
> Today, all the ICPs are created before the CPUs, stored in an array
> under the sPAPR machine and linked to the CPU when the core threads
> are realized. This modeling brings some complexity when a lookup in
> the array is required and it can be simplified by allocating the ICPs
> when the CPUs are.
>
> This is the purpose of this proposal which introduces a new 'icp_type'
> field under the machine and creates the ICP objects of the right type
> (KVM or not) before the PowerPCCPU object are.
>
> This change allows more cleanups : the removal of the icps array under
> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
> helper.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> hw/intc/xics.c | 11 -----------
> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
> include/hw/ppc/spapr.h | 2 +-
> include/hw/ppc/xics.h | 2 --
> 5 files changed, 29 insertions(+), 51 deletions(-)
>
This commit breaks CPU re-hotplugging with KVM
the sequence "device_add, device_del, device_add" brings to the
following error message:
Unable to connect CPUx to kernel XICS: Device or resource busy
It comes from icp_kvm_cpu_setup():
...
ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
kvm_arch_vcpu_id(cs));
if (ret < 0) {
error_report("Unable to connect CPU%ld to kernel XICS: %s",
kvm_arch_vcpu_id(cs), strerror(errno));
exit(1);
}
..
It should be protected by cap_irq_xics_enabled:
...
/*
* If we are reusing a parked vCPU fd corresponding to the CPU
* which was hot-removed earlier we don't have to renable
* KVM_CAP_IRQ_XICS capability again.
*/
if (icp->cap_irq_xics_enabled) {
return;
}
...
ret = kvm_vcpu_enable_cap(...);
...
icp->cap_irq_xics_enabled = true;
...
But since this commit, "icp" is a new object on each call:
spapr_cpu_core_realize_child()
...
obj = object_new(spapr->icp_type);
...
xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
...
icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
...
...
and "cap_irq_xics_enabled" is reinitialized.
Any idea how to fix that?
Thanks,
Laurent
On 05/16/2017 02:03 PM, Laurent Vivier wrote:
> On 26/04/2017 09:00, David Gibson wrote:
>> From: Cédric Le Goater <clg@kaod.org>
>>
>> Today, all the ICPs are created before the CPUs, stored in an array
>> under the sPAPR machine and linked to the CPU when the core threads
>> are realized. This modeling brings some complexity when a lookup in
>> the array is required and it can be simplified by allocating the ICPs
>> when the CPUs are.
>>
>> This is the purpose of this proposal which introduces a new 'icp_type'
>> field under the machine and creates the ICP objects of the right type
>> (KVM or not) before the PowerPCCPU object are.
>>
>> This change allows more cleanups : the removal of the icps array under
>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
>> helper.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> ---
>> hw/intc/xics.c | 11 -----------
>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
>> include/hw/ppc/spapr.h | 2 +-
>> include/hw/ppc/xics.h | 2 --
>> 5 files changed, 29 insertions(+), 51 deletions(-)
>>
>
> This commit breaks CPU re-hotplugging with KVM
>
> the sequence "device_add, device_del, device_add" brings to the
> following error message:
>
> Unable to connect CPUx to kernel XICS: Device or resource busy
>
> It comes from icp_kvm_cpu_setup():
>
> ...
> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
> kvm_arch_vcpu_id(cs));
> if (ret < 0) {
> error_report("Unable to connect CPU%ld to kernel XICS: %s",
> kvm_arch_vcpu_id(cs), strerror(errno));
> exit(1);
> }
> ..
>
> It should be protected by cap_irq_xics_enabled:
>
> ...
> /*
> * If we are reusing a parked vCPU fd corresponding to the CPU
> * which was hot-removed earlier we don't have to renable
> * KVM_CAP_IRQ_XICS capability again.
> */
> if (icp->cap_irq_xics_enabled) {
> return;
> }
>
> ...
> ret = kvm_vcpu_enable_cap(...);
> ...
> icp->cap_irq_xics_enabled = true;
> ...
>
> But since this commit, "icp" is a new object on each call:
>
> spapr_cpu_core_realize_child()
> ...
> obj = object_new(spapr->icp_type);
> ...
> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
> ...
> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
> ...
> ...
>
> and "cap_irq_xics_enabled" is reinitialized.
>
> Any idea how to fix that?
it seems that a cleanup is not done in the kernel. We are missing
a way to call kvmppc_xics_free_icp() from QEMU. Today the only
way is to destroy the vcpu.
Else we need to reintroduce the array of icps (again) to keep some
xics state ... but that just sucks :/ Let me think about it.
C.
On 16/05/2017 14:50, Cédric Le Goater wrote:
> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
>> On 26/04/2017 09:00, David Gibson wrote:
>>> From: Cédric Le Goater <clg@kaod.org>
>>>
>>> Today, all the ICPs are created before the CPUs, stored in an array
>>> under the sPAPR machine and linked to the CPU when the core threads
>>> are realized. This modeling brings some complexity when a lookup in
>>> the array is required and it can be simplified by allocating the ICPs
>>> when the CPUs are.
>>>
>>> This is the purpose of this proposal which introduces a new 'icp_type'
>>> field under the machine and creates the ICP objects of the right type
>>> (KVM or not) before the PowerPCCPU object are.
>>>
>>> This change allows more cleanups : the removal of the icps array under
>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
>>> helper.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> ---
>>> hw/intc/xics.c | 11 -----------
>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
>>> include/hw/ppc/spapr.h | 2 +-
>>> include/hw/ppc/xics.h | 2 --
>>> 5 files changed, 29 insertions(+), 51 deletions(-)
>>>
>>
>> This commit breaks CPU re-hotplugging with KVM
>>
>> the sequence "device_add, device_del, device_add" brings to the
>> following error message:
>>
>> Unable to connect CPUx to kernel XICS: Device or resource busy
>>
>> It comes from icp_kvm_cpu_setup():
>>
>> ...
>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
>> kvm_arch_vcpu_id(cs));
>> if (ret < 0) {
>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
>> kvm_arch_vcpu_id(cs), strerror(errno));
>> exit(1);
>> }
>> ..
>>
>> It should be protected by cap_irq_xics_enabled:
>>
>> ...
>> /*
>> * If we are reusing a parked vCPU fd corresponding to the CPU
>> * which was hot-removed earlier we don't have to renable
>> * KVM_CAP_IRQ_XICS capability again.
>> */
>> if (icp->cap_irq_xics_enabled) {
>> return;
>> }
>>
>> ...
>> ret = kvm_vcpu_enable_cap(...);
>> ...
>> icp->cap_irq_xics_enabled = true;
>> ...
>>
>> But since this commit, "icp" is a new object on each call:
>>
>> spapr_cpu_core_realize_child()
>> ...
>> obj = object_new(spapr->icp_type);
>> ...
>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
>> ...
>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
>> ...
>> ...
>>
>> and "cap_irq_xics_enabled" is reinitialized.
>>
>> Any idea how to fix that?
>
> it seems that a cleanup is not done in the kernel. We are missing
> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
> way is to destroy the vcpu.
The commit introducing this hack, for reference:
commit a45863bda90daa8ec39e5a312b9734fd4665b016
Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
Date: Thu Jul 2 16:23:20 2015 +1000
xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
When supporting CPU hot removal by parking the vCPU fd and reusing
it during hotplug again, there can be cases where we try to reenable
KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
Introduce a boolean member in ICPState to track this and don't
reenable the CAP if it was already enabled earlier.
Re-enabling this CAP should ideally work, but currently it results in
kernel trying to create and associate ICP with this vCPU and that
fails since there is already an ICP associated with it. Hence this
patch is needed to work around this problem in the kernel.
This change allows CPU hot removal to work for sPAPR.
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
> Else we need to reintroduce the array of icps (again) to keep some
> xics state ... but that just sucks :/ Let me think about it.
>
Thanks,
Laurent
> C.
>
On 05/16/2017 02:55 PM, Laurent Vivier wrote:
> On 16/05/2017 14:50, Cédric Le Goater wrote:
>> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
>>> On 26/04/2017 09:00, David Gibson wrote:
>>>> From: Cédric Le Goater <clg@kaod.org>
>>>>
>>>> Today, all the ICPs are created before the CPUs, stored in an array
>>>> under the sPAPR machine and linked to the CPU when the core threads
>>>> are realized. This modeling brings some complexity when a lookup in
>>>> the array is required and it can be simplified by allocating the ICPs
>>>> when the CPUs are.
>>>>
>>>> This is the purpose of this proposal which introduces a new 'icp_type'
>>>> field under the machine and creates the ICP objects of the right type
>>>> (KVM or not) before the PowerPCCPU object are.
>>>>
>>>> This change allows more cleanups : the removal of the icps array under
>>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
>>>> helper.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>>> ---
>>>> hw/intc/xics.c | 11 -----------
>>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
>>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
>>>> include/hw/ppc/spapr.h | 2 +-
>>>> include/hw/ppc/xics.h | 2 --
>>>> 5 files changed, 29 insertions(+), 51 deletions(-)
>>>>
>>>
>>> This commit breaks CPU re-hotplugging with KVM
>>>
>>> the sequence "device_add, device_del, device_add" brings to the
>>> following error message:
>>>
>>> Unable to connect CPUx to kernel XICS: Device or resource busy
>>>
>>> It comes from icp_kvm_cpu_setup():
>>>
>>> ...
>>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
>>> kvm_arch_vcpu_id(cs));
>>> if (ret < 0) {
>>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
>>> kvm_arch_vcpu_id(cs), strerror(errno));
>>> exit(1);
>>> }
>>> ..
>>>
>>> It should be protected by cap_irq_xics_enabled:
>>>
>>> ...
>>> /*
>>> * If we are reusing a parked vCPU fd corresponding to the CPU
>>> * which was hot-removed earlier we don't have to renable
>>> * KVM_CAP_IRQ_XICS capability again.
>>> */
>>> if (icp->cap_irq_xics_enabled) {
>>> return;
>>> }
>>>
>>> ...
>>> ret = kvm_vcpu_enable_cap(...);
>>> ...
>>> icp->cap_irq_xics_enabled = true;
>>> ...
>>>
>>> But since this commit, "icp" is a new object on each call:
>>>
>>> spapr_cpu_core_realize_child()
>>> ...
>>> obj = object_new(spapr->icp_type);
>>> ...
>>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
>>> ...
>>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
>>> ...
>>> ...
>>>
>>> and "cap_irq_xics_enabled" is reinitialized.
>>>
>>> Any idea how to fix that?
>>
>> it seems that a cleanup is not done in the kernel. We are missing
>> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
>> way is to destroy the vcpu.
>
> The commit introducing this hack, for reference:
>
> commit a45863bda90daa8ec39e5a312b9734fd4665b016
> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Date: Thu Jul 2 16:23:20 2015 +1000
>
> xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
>
> When supporting CPU hot removal by parking the vCPU fd and reusing
> it during hotplug again, there can be cases where we try to reenable
> KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
> Introduce a boolean member in ICPState to track this and don't
> reenable the CAP if it was already enabled earlier.
>
> Re-enabling this CAP should ideally work, but currently it results in
> kernel trying to create and associate ICP with this vCPU and that
> fails since there is already an ICP associated with it. Hence this
> patch is needed to work around this problem in the kernel.
>
> This change allows CPU hot removal to work for sPAPR.
>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexander Graf <agraf@suse.de>
OK.
Greg is looking at re-adding the ICPState array because of a
migration issue with older machines. We might need to do so
unconditionally ...
But for that specific issue, I think it would have been better
to clean up the kernel state. Is that possible ?
Thanks,
C.
>> Else we need to reintroduce the array of icps (again) to keep some
>> xics state ... but that just sucks :/ Let me think about it.
>>
>
> Thanks,
> Laurent
>> C.
>>
>
On Tue, 16 May 2017 17:18:27 +0200
Cédric Le Goater <clg@kaod.org> wrote:
> On 05/16/2017 02:55 PM, Laurent Vivier wrote:
> > On 16/05/2017 14:50, Cédric Le Goater wrote:
> >> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
> >>> On 26/04/2017 09:00, David Gibson wrote:
> >>>> From: Cédric Le Goater <clg@kaod.org>
> >>>>
> >>>> Today, all the ICPs are created before the CPUs, stored in an array
> >>>> under the sPAPR machine and linked to the CPU when the core threads
> >>>> are realized. This modeling brings some complexity when a lookup in
> >>>> the array is required and it can be simplified by allocating the ICPs
> >>>> when the CPUs are.
> >>>>
> >>>> This is the purpose of this proposal which introduces a new 'icp_type'
> >>>> field under the machine and creates the ICP objects of the right type
> >>>> (KVM or not) before the PowerPCCPU object are.
> >>>>
> >>>> This change allows more cleanups : the removal of the icps array under
> >>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
> >>>> helper.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>>> ---
> >>>> hw/intc/xics.c | 11 -----------
> >>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
> >>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
> >>>> include/hw/ppc/spapr.h | 2 +-
> >>>> include/hw/ppc/xics.h | 2 --
> >>>> 5 files changed, 29 insertions(+), 51 deletions(-)
> >>>>
> >>>
> >>> This commit breaks CPU re-hotplugging with KVM
> >>>
> >>> the sequence "device_add, device_del, device_add" brings to the
> >>> following error message:
> >>>
> >>> Unable to connect CPUx to kernel XICS: Device or resource busy
> >>>
> >>> It comes from icp_kvm_cpu_setup():
> >>>
> >>> ...
> >>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
> >>> kvm_arch_vcpu_id(cs));
> >>> if (ret < 0) {
> >>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
> >>> kvm_arch_vcpu_id(cs), strerror(errno));
> >>> exit(1);
> >>> }
> >>> ..
> >>>
> >>> It should be protected by cap_irq_xics_enabled:
> >>>
> >>> ...
> >>> /*
> >>> * If we are reusing a parked vCPU fd corresponding to the CPU
> >>> * which was hot-removed earlier we don't have to renable
> >>> * KVM_CAP_IRQ_XICS capability again.
> >>> */
> >>> if (icp->cap_irq_xics_enabled) {
> >>> return;
> >>> }
> >>>
> >>> ...
> >>> ret = kvm_vcpu_enable_cap(...);
> >>> ...
> >>> icp->cap_irq_xics_enabled = true;
> >>> ...
> >>>
> >>> But since this commit, "icp" is a new object on each call:
> >>>
> >>> spapr_cpu_core_realize_child()
> >>> ...
> >>> obj = object_new(spapr->icp_type);
> >>> ...
> >>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
> >>> ...
> >>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
> >>> ...
> >>> ...
> >>>
> >>> and "cap_irq_xics_enabled" is reinitialized.
> >>>
> >>> Any idea how to fix that?
> >>
> >> it seems that a cleanup is not done in the kernel. We are missing
> >> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
> >> way is to destroy the vcpu.
> >
> > The commit introducing this hack, for reference:
> >
> > commit a45863bda90daa8ec39e5a312b9734fd4665b016
> > Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Date: Thu Jul 2 16:23:20 2015 +1000
> >
> > xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
> >
> > When supporting CPU hot removal by parking the vCPU fd and reusing
> > it during hotplug again, there can be cases where we try to reenable
> > KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
> > Introduce a boolean member in ICPState to track this and don't
> > reenable the CAP if it was already enabled earlier.
> >
> > Re-enabling this CAP should ideally work, but currently it results in
> > kernel trying to create and associate ICP with this vCPU and that
> > fails since there is already an ICP associated with it. Hence this
> > patch is needed to work around this problem in the kernel.
> >
> > This change allows CPU hot removal to work for sPAPR.
> >
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: Alexander Graf <agraf@suse.de>
>
> OK.
>
> Greg is looking at re-adding the ICPState array because of a
> migration issue with older machines. We might need to do so
> unconditionally ...
>
That would be a pity to carry on with the pre-allocated ICPStates for
new machine types just because of that... What about keeping track
of all the cap_irq_xics_enabled flags in a separate max_cpus sized
static array ?
> But for that specific issue, I think it would have been better
> to clean up the kernel state. Is that possible ?
>
Commit 4c055ab54fae ("cpu: Reclaim vCPU objects") gives some more details
on why we don't destroy the vCPU in KVM on unplug, but rather park the vCPU
fd for later use... so I'm not sure we can clean up the kernel state.
But since the vCPU is still present, maybe we can find a way to tell KVM
that we want to reuse an already present ICP ?
> Thanks,
>
> C.
>
>
> >> Else we need to reintroduce the array of icps (again) to keep some
> >> xics state ... but that just sucks :/ Let me think about it.
> >>
> >
> > Thanks,
> > Laurent
> >> C.
> >>
> >
>
On 05/16/2017 06:10 PM, Greg Kurz wrote:
> On Tue, 16 May 2017 17:18:27 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
>
>> On 05/16/2017 02:55 PM, Laurent Vivier wrote:
>>> On 16/05/2017 14:50, Cédric Le Goater wrote:
>>>> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
>>>>> On 26/04/2017 09:00, David Gibson wrote:
>>>>>> From: Cédric Le Goater <clg@kaod.org>
>>>>>>
>>>>>> Today, all the ICPs are created before the CPUs, stored in an array
>>>>>> under the sPAPR machine and linked to the CPU when the core threads
>>>>>> are realized. This modeling brings some complexity when a lookup in
>>>>>> the array is required and it can be simplified by allocating the ICPs
>>>>>> when the CPUs are.
>>>>>>
>>>>>> This is the purpose of this proposal which introduces a new 'icp_type'
>>>>>> field under the machine and creates the ICP objects of the right type
>>>>>> (KVM or not) before the PowerPCCPU object are.
>>>>>>
>>>>>> This change allows more cleanups : the removal of the icps array under
>>>>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
>>>>>> helper.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>>>>> ---
>>>>>> hw/intc/xics.c | 11 -----------
>>>>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
>>>>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
>>>>>> include/hw/ppc/spapr.h | 2 +-
>>>>>> include/hw/ppc/xics.h | 2 --
>>>>>> 5 files changed, 29 insertions(+), 51 deletions(-)
>>>>>>
>>>>>
>>>>> This commit breaks CPU re-hotplugging with KVM
>>>>>
>>>>> the sequence "device_add, device_del, device_add" brings to the
>>>>> following error message:
>>>>>
>>>>> Unable to connect CPUx to kernel XICS: Device or resource busy
>>>>>
>>>>> It comes from icp_kvm_cpu_setup():
>>>>>
>>>>> ...
>>>>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
>>>>> kvm_arch_vcpu_id(cs));
>>>>> if (ret < 0) {
>>>>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
>>>>> kvm_arch_vcpu_id(cs), strerror(errno));
>>>>> exit(1);
>>>>> }
>>>>> ..
>>>>>
>>>>> It should be protected by cap_irq_xics_enabled:
>>>>>
>>>>> ...
>>>>> /*
>>>>> * If we are reusing a parked vCPU fd corresponding to the CPU
>>>>> * which was hot-removed earlier we don't have to renable
>>>>> * KVM_CAP_IRQ_XICS capability again.
>>>>> */
>>>>> if (icp->cap_irq_xics_enabled) {
>>>>> return;
>>>>> }
>>>>>
>>>>> ...
>>>>> ret = kvm_vcpu_enable_cap(...);
>>>>> ...
>>>>> icp->cap_irq_xics_enabled = true;
>>>>> ...
>>>>>
>>>>> But since this commit, "icp" is a new object on each call:
>>>>>
>>>>> spapr_cpu_core_realize_child()
>>>>> ...
>>>>> obj = object_new(spapr->icp_type);
>>>>> ...
>>>>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
>>>>> ...
>>>>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
>>>>> ...
>>>>> ...
>>>>>
>>>>> and "cap_irq_xics_enabled" is reinitialized.
>>>>>
>>>>> Any idea how to fix that?
>>>>
>>>> it seems that a cleanup is not done in the kernel. We are missing
>>>> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
>>>> way is to destroy the vcpu.
>>>
>>> The commit introducing this hack, for reference:
>>>
>>> commit a45863bda90daa8ec39e5a312b9734fd4665b016
>>> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
>>> Date: Thu Jul 2 16:23:20 2015 +1000
>>>
>>> xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
>>>
>>> When supporting CPU hot removal by parking the vCPU fd and reusing
>>> it during hotplug again, there can be cases where we try to reenable
>>> KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
>>> Introduce a boolean member in ICPState to track this and don't
>>> reenable the CAP if it was already enabled earlier.
>>>
>>> Re-enabling this CAP should ideally work, but currently it results in
>>> kernel trying to create and associate ICP with this vCPU and that
>>> fails since there is already an ICP associated with it. Hence this
>>> patch is needed to work around this problem in the kernel.
>>>
>>> This change allows CPU hot removal to work for sPAPR.
>>>
>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>
>> OK.
>>
>> Greg is looking at re-adding the ICPState array because of a
>> migration issue with older machines. We might need to do so
>> unconditionally ...
>>
>
> That would be a pity to carry on with the pre-allocated ICPStates for
> new machine types just because of that... What about keeping track
> of all the cap_irq_xics_enabled flags in a separate max_cpus sized
> static array ?
Could we use 'cpu->unplug' instead ?
C.
>> But for that specific issue, I think it would have been better
>> to clean up the kernel state. Is that possible ?
>>
>
> Commit 4c055ab54fae ("cpu: Reclaim vCPU objects") gives some more details
> on why we don't destroy the vCPU in KVM on unplug, but rather park the vCPU
> fd for later use... so I'm not sure we can clean up the kernel state.
>
> But since the vCPU is still present, maybe we can find a way to tell KVM
> that we want to reuse an already present ICP ?
>
>> Thanks,
>>
>> C.
>>
>>
>>>> Else we need to reintroduce the array of icps (again) to keep some
>>>> xics state ... but that just sucks :/ Let me think about it.
>>>>
>>>
>>> Thanks,
>>> Laurent
>>>> C.
>>>>
>>>
>>
>
On Wed, May 17, 2017 at 07:50:42AM +0200, Cédric Le Goater wrote:
> On 05/16/2017 06:10 PM, Greg Kurz wrote:
> > On Tue, 16 May 2017 17:18:27 +0200
> > Cédric Le Goater <clg@kaod.org> wrote:
> >
> >> On 05/16/2017 02:55 PM, Laurent Vivier wrote:
> >>> On 16/05/2017 14:50, Cédric Le Goater wrote:
> >>>> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
> >>>>> On 26/04/2017 09:00, David Gibson wrote:
> >>>>>> From: Cédric Le Goater <clg@kaod.org>
> >>>>>>
> >>>>>> Today, all the ICPs are created before the CPUs, stored in an array
> >>>>>> under the sPAPR machine and linked to the CPU when the core threads
> >>>>>> are realized. This modeling brings some complexity when a lookup in
> >>>>>> the array is required and it can be simplified by allocating the ICPs
> >>>>>> when the CPUs are.
> >>>>>>
> >>>>>> This is the purpose of this proposal which introduces a new 'icp_type'
> >>>>>> field under the machine and creates the ICP objects of the right type
> >>>>>> (KVM or not) before the PowerPCCPU object are.
> >>>>>>
> >>>>>> This change allows more cleanups : the removal of the icps array under
> >>>>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
> >>>>>> helper.
> >>>>>>
> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>> ---
> >>>>>> hw/intc/xics.c | 11 -----------
> >>>>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
> >>>>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
> >>>>>> include/hw/ppc/spapr.h | 2 +-
> >>>>>> include/hw/ppc/xics.h | 2 --
> >>>>>> 5 files changed, 29 insertions(+), 51 deletions(-)
> >>>>>>
> >>>>>
> >>>>> This commit breaks CPU re-hotplugging with KVM
> >>>>>
> >>>>> the sequence "device_add, device_del, device_add" brings to the
> >>>>> following error message:
> >>>>>
> >>>>> Unable to connect CPUx to kernel XICS: Device or resource busy
> >>>>>
> >>>>> It comes from icp_kvm_cpu_setup():
> >>>>>
> >>>>> ...
> >>>>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
> >>>>> kvm_arch_vcpu_id(cs));
> >>>>> if (ret < 0) {
> >>>>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
> >>>>> kvm_arch_vcpu_id(cs), strerror(errno));
> >>>>> exit(1);
> >>>>> }
> >>>>> ..
> >>>>>
> >>>>> It should be protected by cap_irq_xics_enabled:
> >>>>>
> >>>>> ...
> >>>>> /*
> >>>>> * If we are reusing a parked vCPU fd corresponding to the CPU
> >>>>> * which was hot-removed earlier we don't have to renable
> >>>>> * KVM_CAP_IRQ_XICS capability again.
> >>>>> */
> >>>>> if (icp->cap_irq_xics_enabled) {
> >>>>> return;
> >>>>> }
> >>>>>
> >>>>> ...
> >>>>> ret = kvm_vcpu_enable_cap(...);
> >>>>> ...
> >>>>> icp->cap_irq_xics_enabled = true;
> >>>>> ...
> >>>>>
> >>>>> But since this commit, "icp" is a new object on each call:
> >>>>>
> >>>>> spapr_cpu_core_realize_child()
> >>>>> ...
> >>>>> obj = object_new(spapr->icp_type);
> >>>>> ...
> >>>>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
> >>>>> ...
> >>>>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
> >>>>> ...
> >>>>> ...
> >>>>>
> >>>>> and "cap_irq_xics_enabled" is reinitialized.
> >>>>>
> >>>>> Any idea how to fix that?
> >>>>
> >>>> it seems that a cleanup is not done in the kernel. We are missing
> >>>> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
> >>>> way is to destroy the vcpu.
> >>>
> >>> The commit introducing this hack, for reference:
> >>>
> >>> commit a45863bda90daa8ec39e5a312b9734fd4665b016
> >>> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>> Date: Thu Jul 2 16:23:20 2015 +1000
> >>>
> >>> xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
> >>>
> >>> When supporting CPU hot removal by parking the vCPU fd and reusing
> >>> it during hotplug again, there can be cases where we try to reenable
> >>> KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
> >>> Introduce a boolean member in ICPState to track this and don't
> >>> reenable the CAP if it was already enabled earlier.
> >>>
> >>> Re-enabling this CAP should ideally work, but currently it results in
> >>> kernel trying to create and associate ICP with this vCPU and that
> >>> fails since there is already an ICP associated with it. Hence this
> >>> patch is needed to work around this problem in the kernel.
> >>>
> >>> This change allows CPU hot removal to work for sPAPR.
> >>>
> >>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>> Signed-off-by: Alexander Graf <agraf@suse.de>
> >>
> >> OK.
> >>
> >> Greg is looking at re-adding the ICPState array because of a
> >> migration issue with older machines. We might need to do so
> >> unconditionally ...
> >>
> >
> > That would be a pity to carry on with the pre-allocated ICPStates for
> > new machine types just because of that... What about keeping track
> > of all the cap_irq_xics_enabled flags in a separate max_cpus sized
> > static array ?
>
> Could we use 'cpu->unplug' instead ?
I've only half followed this discussion, but fwiw I prefer the idea of
"parking" in-kernel ICP objects, similarly to the way we do for
removed VCPUs, rather than going back to keeping ICP objects around
indefinitely and unconditionally.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
On Wed, 17 May 2017 16:37:52 +1000 David Gibson <david@gibson.dropbear.id.au> wrote: [...] > > >> > > >> Greg is looking at re-adding the ICPState array because of a > > >> migration issue with older machines. We might need to do so > > >> unconditionally ... > > >> > > > > > > That would be a pity to carry on with the pre-allocated ICPStates for > > > new machine types just because of that... What about keeping track > > > of all the cap_irq_xics_enabled flags in a separate max_cpus sized > > > static array ? > > > > Could we use 'cpu->unplug' instead ? > > I've only half followed this discussion, but fwiw I prefer the idea of > "parking" in-kernel ICP objects, similarly to the way we do for > removed VCPUs, rather than going back to keeping ICP objects around > indefinitely and unconditionally. > I'll give a try. Cheers, -- Greg
On Wed, 17 May 2017 07:50:42 +0200
Cédric Le Goater <clg@kaod.org> wrote:
> On 05/16/2017 06:10 PM, Greg Kurz wrote:
> > On Tue, 16 May 2017 17:18:27 +0200
> > Cédric Le Goater <clg@kaod.org> wrote:
> >
> >> On 05/16/2017 02:55 PM, Laurent Vivier wrote:
> >>> On 16/05/2017 14:50, Cédric Le Goater wrote:
> >>>> On 05/16/2017 02:03 PM, Laurent Vivier wrote:
> >>>>> On 26/04/2017 09:00, David Gibson wrote:
> >>>>>> From: Cédric Le Goater <clg@kaod.org>
> >>>>>>
> >>>>>> Today, all the ICPs are created before the CPUs, stored in an array
> >>>>>> under the sPAPR machine and linked to the CPU when the core threads
> >>>>>> are realized. This modeling brings some complexity when a lookup in
> >>>>>> the array is required and it can be simplified by allocating the ICPs
> >>>>>> when the CPUs are.
> >>>>>>
> >>>>>> This is the purpose of this proposal which introduces a new 'icp_type'
> >>>>>> field under the machine and creates the ICP objects of the right type
> >>>>>> (KVM or not) before the PowerPCCPU object are.
> >>>>>>
> >>>>>> This change allows more cleanups : the removal of the icps array under
> >>>>>> the sPAPR machine and the removal of the xics_get_cpu_index_by_dt_id()
> >>>>>> helper.
> >>>>>>
> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>> ---
> >>>>>> hw/intc/xics.c | 11 -----------
> >>>>>> hw/ppc/spapr.c | 47 ++++++++++++++---------------------------------
> >>>>>> hw/ppc/spapr_cpu_core.c | 18 ++++++++++++++----
> >>>>>> include/hw/ppc/spapr.h | 2 +-
> >>>>>> include/hw/ppc/xics.h | 2 --
> >>>>>> 5 files changed, 29 insertions(+), 51 deletions(-)
> >>>>>>
> >>>>>
> >>>>> This commit breaks CPU re-hotplugging with KVM
> >>>>>
> >>>>> the sequence "device_add, device_del, device_add" brings to the
> >>>>> following error message:
> >>>>>
> >>>>> Unable to connect CPUx to kernel XICS: Device or resource busy
> >>>>>
> >>>>> It comes from icp_kvm_cpu_setup():
> >>>>>
> >>>>> ...
> >>>>> ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
> >>>>> kvm_arch_vcpu_id(cs));
> >>>>> if (ret < 0) {
> >>>>> error_report("Unable to connect CPU%ld to kernel XICS: %s",
> >>>>> kvm_arch_vcpu_id(cs), strerror(errno));
> >>>>> exit(1);
> >>>>> }
> >>>>> ..
> >>>>>
> >>>>> It should be protected by cap_irq_xics_enabled:
> >>>>>
> >>>>> ...
> >>>>> /*
> >>>>> * If we are reusing a parked vCPU fd corresponding to the CPU
> >>>>> * which was hot-removed earlier we don't have to renable
> >>>>> * KVM_CAP_IRQ_XICS capability again.
> >>>>> */
> >>>>> if (icp->cap_irq_xics_enabled) {
> >>>>> return;
> >>>>> }
> >>>>>
> >>>>> ...
> >>>>> ret = kvm_vcpu_enable_cap(...);
> >>>>> ...
> >>>>> icp->cap_irq_xics_enabled = true;
> >>>>> ...
> >>>>>
> >>>>> But since this commit, "icp" is a new object on each call:
> >>>>>
> >>>>> spapr_cpu_core_realize_child()
> >>>>> ...
> >>>>> obj = object_new(spapr->icp_type);
> >>>>> ...
> >>>>> xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
> >>>>> ...
> >>>>> icpc->cpu_setup(icp, cpu); -> icp_kvm_cpu_setup()
> >>>>> ...
> >>>>> ...
> >>>>>
> >>>>> and "cap_irq_xics_enabled" is reinitialized.
> >>>>>
> >>>>> Any idea how to fix that?
> >>>>
> >>>> it seems that a cleanup is not done in the kernel. We are missing
> >>>> a way to call kvmppc_xics_free_icp() from QEMU. Today the only
> >>>> way is to destroy the vcpu.
> >>>
> >>> The commit introducing this hack, for reference:
> >>>
> >>> commit a45863bda90daa8ec39e5a312b9734fd4665b016
> >>> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>> Date: Thu Jul 2 16:23:20 2015 +1000
> >>>
> >>> xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
> >>>
> >>> When supporting CPU hot removal by parking the vCPU fd and reusing
> >>> it during hotplug again, there can be cases where we try to reenable
> >>> KVM_CAP_IRQ_XICS CAP for the vCPU for which it was already enabled.
> >>> Introduce a boolean member in ICPState to track this and don't
> >>> reenable the CAP if it was already enabled earlier.
> >>>
> >>> Re-enabling this CAP should ideally work, but currently it results in
> >>> kernel trying to create and associate ICP with this vCPU and that
> >>> fails since there is already an ICP associated with it. Hence this
> >>> patch is needed to work around this problem in the kernel.
> >>>
> >>> This change allows CPU hot removal to work for sPAPR.
> >>>
> >>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>> Signed-off-by: Alexander Graf <agraf@suse.de>
> >>
> >> OK.
> >>
> >> Greg is looking at re-adding the ICPState array because of a
> >> migration issue with older machines. We might need to do so
> >> unconditionally ...
> >>
> >
> > That would be a pity to carry on with the pre-allocated ICPStates for
> > new machine types just because of that... What about keeping track
> > of all the cap_irq_xics_enabled flags in a separate max_cpus sized
> > static array ?
>
> Could we use 'cpu->unplug' instead ?
>
No we can't because we only park the vCPU fd actually and we always pass
a freshly allocated CPUState with @unplug == false to xics_cpu_setup().
> C.
>
>
> >> But for that specific issue, I think it would have been better
> >> to clean up the kernel state. Is that possible ?
> >>
> >
> > Commit 4c055ab54fae ("cpu: Reclaim vCPU objects") gives some more details
> > on why we don't destroy the vCPU in KVM on unplug, but rather park the vCPU
> > fd for later use... so I'm not sure we can clean up the kernel state.
> >
> > But since the vCPU is still present, maybe we can find a way to tell KVM
> > that we want to reuse an already present ICP ?
> >
> >> Thanks,
> >>
> >> C.
> >>
> >>
> >>>> Else we need to reintroduce the array of icps (again) to keep some
> >>>> xics state ... but that just sucks :/ Let me think about it.
> >>>>
> >>>
> >>> Thanks,
> >>> Laurent
> >>>> C.
> >>>>
> >>>
> >>
> >
>
© 2016 - 2026 Red Hat, Inc.