This is needed for riscv based CPUs by MIPS since those may have
sparse hart-ID layouts. ACLINT and APLIC still assume a dense
range, and if a hart is missing, this causes NULL derefs.
Signed-off-by: Chao-ying Fu <cfu@mips.com>
Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
---
hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
hw/intc/riscv_aplic.c | 11 ++++++++---
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index b0139f03f5..22ac4133d5 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -292,7 +292,13 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
s->timecmp = g_new0(uint64_t, s->num_harts);
/* Claim timer interrupt bits */
for (i = 0; i < s->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
+ CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
+ if (cpu_by_hartid == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid hartid: %u",
+ s->hartid_base + i);
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
error_report("MTIP already claimed");
exit(1);
@@ -481,7 +487,13 @@ static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp)
/* Claim software interrupt bits */
for (i = 0; i < swi->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i));
+ CPUState *cpu_by_hartid = cpu_by_arch_id(swi->hartid_base + i);
+ if (cpu_by_hartid == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-swi: invalid hartid: %u",
+ swi->hartid_base + i);
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
/* We don't claim mip.SSIP because it is writable by software */
if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) {
error_report("MSIP already claimed");
@@ -545,6 +557,11 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
for (i = 0; i < num_harts; i++) {
CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+ if (cpu == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-swi: invalid hartid: %u",
+ hartid_base + i);
+ continue;
+ }
RISCVCPU *rvcpu = RISCV_CPU(cpu);
qdev_connect_gpio_out(dev, i,
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 8bcd9f4697..c7846753fe 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -899,9 +899,11 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
if (!aplic->msimode) {
/* Claim the CPU interrupt to be triggered by this APLIC */
for (i = 0; i < aplic->num_harts; i++) {
- RISCVCPU *cpu;
-
- cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
+ CPUState *temp = cpu_by_arch_id(aplic->hartid_base + i);
+ if (temp == NULL) {
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(temp);
if (riscv_cpu_claim_interrupts(cpu,
(aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
error_report("%s already claimed",
@@ -1076,6 +1078,9 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
if (!msimode) {
for (i = 0; i < num_harts; i++) {
CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+ if (cpu == NULL) {
+ continue;
+ }
qdev_connect_gpio_out_named(dev, NULL, i,
qdev_get_gpio_in(DEVICE(cpu),
--
2.34.1
On 17/7/25 11:38, Djordje Todorovic wrote:
> This is needed for riscv based CPUs by MIPS since those may have
> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
> range, and if a hart is missing, this causes NULL derefs.
>
> Signed-off-by: Chao-ying Fu <cfu@mips.com>
> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
> ---
> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
> hw/intc/riscv_aplic.c | 11 ++++++++---
> 2 files changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
> index b0139f03f5..22ac4133d5 100644
> --- a/hw/intc/riscv_aclint.c
> +++ b/hw/intc/riscv_aclint.c
> @@ -292,7 +292,13 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
> s->timecmp = g_new0(uint64_t, s->num_harts);
> /* Claim timer interrupt bits */
> for (i = 0; i < s->num_harts; i++) {
> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
> + if (cpu_by_hartid == NULL) {
> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid hartid: %u",
> + s->hartid_base + i);
DeviceRealize() handlers are part of machine modelling, not guest uses.
IOW, triggering this is a programming mistake, so we should just
abort() here.
On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
> CAUTION: This email originated from outside of the organization. Do
> not click links or open attachments unless you recognize the sender
> and know the content is safe.
>
>
> On 17/7/25 11:38, Djordje Todorovic wrote:
>> This is needed for riscv based CPUs by MIPS since those may have
>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>> range, and if a hart is missing, this causes NULL derefs.
>>
>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>> ---
>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>> hw/intc/riscv_aplic.c | 11 ++++++++---
>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
>> index b0139f03f5..22ac4133d5 100644
>> --- a/hw/intc/riscv_aclint.c
>> +++ b/hw/intc/riscv_aclint.c
>> @@ -292,7 +292,13 @@ static void
>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>> s->timecmp = g_new0(uint64_t, s->num_harts);
>> /* Claim timer interrupt bits */
>> for (i = 0; i < s->num_harts; i++) {
>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
>> + if (cpu_by_hartid == NULL) {
>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>> hartid: %u",
>> + s->hartid_base + i);
>
> DeviceRealize() handlers are part of machine modelling, not guest uses.
>
> IOW, triggering this is a programming mistake, so we should just
> abort() here.
Well, if we do it that way, our Boston board target for P8700 cannot run.
On 1/9/25 10:17, Djordje Todorovic wrote:
> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
>
>> CAUTION: This email originated from outside of the organization. Do
>> not click links or open attachments unless you recognize the sender
>> and know the content is safe.
>>
>>
>> On 17/7/25 11:38, Djordje Todorovic wrote:
>>> This is needed for riscv based CPUs by MIPS since those may have
>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>>> range, and if a hart is missing, this causes NULL derefs.
>>>
>>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>>> ---
>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>>> hw/intc/riscv_aplic.c | 11 ++++++++---
>>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
>>> index b0139f03f5..22ac4133d5 100644
>>> --- a/hw/intc/riscv_aclint.c
>>> +++ b/hw/intc/riscv_aclint.c
>>> @@ -292,7 +292,13 @@ static void
>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>>> s->timecmp = g_new0(uint64_t, s->num_harts);
>>> /* Claim timer interrupt bits */
>>> for (i = 0; i < s->num_harts; i++) {
>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
>>> + if (cpu_by_hartid == NULL) {
>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>>> hartid: %u",
>>> + s->hartid_base + i);
>>
>> DeviceRealize() handlers are part of machine modelling, not guest uses.
>>
>> IOW, triggering this is a programming mistake, so we should just
>> abort() here.
>
> Well, if we do it that way, our Boston board target for P8700 cannot run.
So the problem is elsewhere :)
On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote:
> CAUTION: This email originated from outside of the organization. Do
> not click links or open attachments unless you recognize the sender
> and know the content is safe.
>
>
> On 1/9/25 10:17, Djordje Todorovic wrote:
>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
>>
>>> CAUTION: This email originated from outside of the organization. Do
>>> not click links or open attachments unless you recognize the sender
>>> and know the content is safe.
>>>
>>>
>>> On 17/7/25 11:38, Djordje Todorovic wrote:
>>>> This is needed for riscv based CPUs by MIPS since those may have
>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>>>> range, and if a hart is missing, this causes NULL derefs.
>>>>
>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>>>> ---
>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>>>> hw/intc/riscv_aplic.c | 11 ++++++++---
>>>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
>>>> index b0139f03f5..22ac4133d5 100644
>>>> --- a/hw/intc/riscv_aclint.c
>>>> +++ b/hw/intc/riscv_aclint.c
>>>> @@ -292,7 +292,13 @@ static void
>>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>>>> s->timecmp = g_new0(uint64_t, s->num_harts);
>>>> /* Claim timer interrupt bits */
>>>> for (i = 0; i < s->num_harts; i++) {
>>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base +
>>>> i));
>>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
>>>> + if (cpu_by_hartid == NULL) {
>>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>>>> hartid: %u",
>>>> + s->hartid_base + i);
>>>
>>> DeviceRealize() handlers are part of machine modelling, not guest uses.
>>>
>>> IOW, triggering this is a programming mistake, so we should just
>>> abort() here.
>>
>> Well, if we do it that way, our Boston board target for P8700 cannot
>> run.
>
> So the problem is elsewhere :)
I see. Would something like this be acceptable:
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -279,7 +279,7 @@ static const Property
riscv_aclint_mtimer_properties[] = {
static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
{
RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev);
- int i;
+ CPUState *cpu;
memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops,
s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size);
@@ -291,18 +291,15 @@ static void
riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
s->timers = g_new0(QEMUTimer *, s->num_harts);
s->timecmp = g_new0(uint64_t, s->num_harts);
/* Claim timer interrupt bits */
- for (i = 0; i < s->num_harts; i++) {
- CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
- if (cpu_by_hartid == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
hartid: %u",
- s->hartid_base + i);
- continue;
- }
- RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
- if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
- error_report("MTIP already claimed");
- exit(1);
- }
+ CPU_FOREACH(cpu) {
+ if (cpu == NULL)
+ abort();
+
+ RISCVCPU *cpu_riscv = RISCV_CPU(cpu);
+ if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) {
+ error_report("MTIP already claimed");
+ exit(1);
+ }
}
}
Thanks,
Djordje
On 3/9/25 14:35, Djordje Todorovic wrote:
>
> On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote:
>> CAUTION: This email originated from outside of the organization. Do
>> not click links or open attachments unless you recognize the sender
>> and know the content is safe.
>>
>>
>> On 1/9/25 10:17, Djordje Todorovic wrote:
>>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
>>>
>>>> CAUTION: This email originated from outside of the organization. Do
>>>> not click links or open attachments unless you recognize the sender
>>>> and know the content is safe.
>>>>
>>>>
>>>> On 17/7/25 11:38, Djordje Todorovic wrote:
>>>>> This is needed for riscv based CPUs by MIPS since those may have
>>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>>>>> range, and if a hart is missing, this causes NULL derefs.
>>>>>
>>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>>>>> ---
>>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>>>>> hw/intc/riscv_aplic.c | 11 ++++++++---
>>>>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
>>>>> index b0139f03f5..22ac4133d5 100644
>>>>> --- a/hw/intc/riscv_aclint.c
>>>>> +++ b/hw/intc/riscv_aclint.c
>>>>> @@ -292,7 +292,13 @@ static void
>>>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>>>>> s->timecmp = g_new0(uint64_t, s->num_harts);
>>>>> /* Claim timer interrupt bits */
>>>>> for (i = 0; i < s->num_harts; i++) {
>>>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base +
>>>>> i));
>>>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
>>>>> + if (cpu_by_hartid == NULL) {
>>>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>>>>> hartid: %u",
>>>>> + s->hartid_base + i);
>>>>
>>>> DeviceRealize() handlers are part of machine modelling, not guest uses.
>>>>
>>>> IOW, triggering this is a programming mistake, so we should just
>>>> abort() here.
>>>
>>> Well, if we do it that way, our Boston board target for P8700 cannot
>>> run.
>>
>> So the problem is elsewhere :)
>
>
> I see. Would something like this be acceptable:
>
> --- a/hw/intc/riscv_aclint.c
> +++ b/hw/intc/riscv_aclint.c
> @@ -279,7 +279,7 @@ static const Property
> riscv_aclint_mtimer_properties[] = {
> static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
> {
> RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev);
> - int i;
> + CPUState *cpu;
>
> memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops,
> s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size);
> @@ -291,18 +291,15 @@ static void
> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
> s->timers = g_new0(QEMUTimer *, s->num_harts);
> s->timecmp = g_new0(uint64_t, s->num_harts);
> /* Claim timer interrupt bits */
> - for (i = 0; i < s->num_harts; i++) {
> - CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
> - if (cpu_by_hartid == NULL) {
> - qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
> hartid: %u",
> - s->hartid_base + i);
> - continue;
> - }
> - RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
> - if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
> - error_report("MTIP already claimed");
> - exit(1);
> - }
> + CPU_FOREACH(cpu) {
> + if (cpu == NULL)
> + abort();
Why do you end having a NULL vcpu in the global cpus_queue?
(this is the 'elsewhere problem').
> +
> + RISCVCPU *cpu_riscv = RISCV_CPU(cpu);
> + if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) {
> + error_report("MTIP already claimed");
> + exit(1);
> + }
> }
> }
>
>
> Thanks,
>
> Djordje
>
>
On 22. 9. 25. 08:52, Philippe Mathieu-Daudé wrote:
> CAUTION: This email originated from outside of the organization. Do
> not click links or open attachments unless you recognize the sender
> and know the content is safe.
>
>
> On 3/9/25 14:35, Djordje Todorovic wrote:
>>
>> On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote:
>>> CAUTION: This email originated from outside of the organization. Do
>>> not click links or open attachments unless you recognize the sender
>>> and know the content is safe.
>>>
>>>
>>> On 1/9/25 10:17, Djordje Todorovic wrote:
>>>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
>>>>
>>>>> CAUTION: This email originated from outside of the organization. Do
>>>>> not click links or open attachments unless you recognize the sender
>>>>> and know the content is safe.
>>>>>
>>>>>
>>>>> On 17/7/25 11:38, Djordje Todorovic wrote:
>>>>>> This is needed for riscv based CPUs by MIPS since those may have
>>>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>>>>>> range, and if a hart is missing, this causes NULL derefs.
>>>>>>
>>>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>>>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>>>>>> ---
>>>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>>>>>> hw/intc/riscv_aplic.c | 11 ++++++++---
>>>>>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
>>>>>> index b0139f03f5..22ac4133d5 100644
>>>>>> --- a/hw/intc/riscv_aclint.c
>>>>>> +++ b/hw/intc/riscv_aclint.c
>>>>>> @@ -292,7 +292,13 @@ static void
>>>>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>>>>>> s->timecmp = g_new0(uint64_t, s->num_harts);
>>>>>> /* Claim timer interrupt bits */
>>>>>> for (i = 0; i < s->num_harts; i++) {
>>>>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base +
>>>>>> i));
>>>>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base
>>>>>> + i);
>>>>>> + if (cpu_by_hartid == NULL) {
>>>>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>>>>>> hartid: %u",
>>>>>> + s->hartid_base + i);
>>>>>
>>>>> DeviceRealize() handlers are part of machine modelling, not guest
>>>>> uses.
>>>>>
>>>>> IOW, triggering this is a programming mistake, so we should just
>>>>> abort() here.
>>>>
>>>> Well, if we do it that way, our Boston board target for P8700 cannot
>>>> run.
>>>
>>> So the problem is elsewhere :)
>>
>>
>> I see. Would something like this be acceptable:
>>
>> --- a/hw/intc/riscv_aclint.c
>> +++ b/hw/intc/riscv_aclint.c
>> @@ -279,7 +279,7 @@ static const Property
>> riscv_aclint_mtimer_properties[] = {
>> static void riscv_aclint_mtimer_realize(DeviceState *dev, Error
>> **errp)
>> {
>> RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev);
>> - int i;
>> + CPUState *cpu;
>>
>> memory_region_init_io(&s->mmio, OBJECT(dev),
>> &riscv_aclint_mtimer_ops,
>> s, TYPE_RISCV_ACLINT_MTIMER,
>> s->aperture_size);
>> @@ -291,18 +291,15 @@ static void
>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
>> s->timers = g_new0(QEMUTimer *, s->num_harts);
>> s->timecmp = g_new0(uint64_t, s->num_harts);
>> /* Claim timer interrupt bits */
>> - for (i = 0; i < s->num_harts; i++) {
>> - CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
>> - if (cpu_by_hartid == NULL) {
>> - qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid
>> hartid: %u",
>> - s->hartid_base + i);
>> - continue;
>> - }
>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
>> - if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
>> - error_report("MTIP already claimed");
>> - exit(1);
>> - }
>> + CPU_FOREACH(cpu) {
>> + if (cpu == NULL)
>> + abort();
>
> Why do you end having a NULL vcpu in the global cpus_queue?
> (this is the 'elsewhere problem').
>
Well, it is true, for our case, we would never get into vcpu == NULL case.
After several attempts to come up with a better solution for this, I think
we are back to the existing one. I will try to elaborate why.
The sparse hart-ID layout in this case is not a programming mistake but
an intentional hardware design characteristic of the P8700. The P8700
RISC-V implementation has a sparse hart-ID layout where not all hart IDs
in a range are populated. This is explicitly supported by the RISC-V APLIC
specification. The current ACLINT/APLIC implementation assumes a dense
range of hart IDs (from hartid_base to hartid_base + num_harts - 1).
For the P8700 board:
- We iterate through the theoretical hart ID range for a cluster
- Some hart IDs legitimately don't have corresponding CPUs (sparse
layout)
- We need to skip these without failing
The CPU_FOREACH approach doesn't work here because:
- The cpu==NULL will never happen
- It iterates over all CPUs system-wide, not just those in the current
cluster
- The hartid_base parameter defines which cluster's interrupt controller
we're initializing
- We need to maintain the index-based register array mapping even for
non-existent harts
I agree the LOG_GUEST_ERROR was inappropriate since this isn't a guest
error, so I will add a comment in v8 to explain why we are skipping.
Thanks a lot,
Djordje
>> +
>> + RISCVCPU *cpu_riscv = RISCV_CPU(cpu);
>> + if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) {
>> + error_report("MTIP already claimed");
>> + exit(1);
>> + }
>> }
>> }
>>
>>
>> Thanks,
>>
>> Djordje
>>
>>
On 24/9/25 10:00, Djordje Todorovic wrote:
>
> On 22. 9. 25. 08:52, Philippe Mathieu-Daudé wrote:
>> CAUTION: This email originated from outside of the organization. Do
>> not click links or open attachments unless you recognize the sender
>> and know the content is safe.
>>
>>
>> On 3/9/25 14:35, Djordje Todorovic wrote:
>>>
>>> On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote:
>>>> CAUTION: This email originated from outside of the organization. Do
>>>> not click links or open attachments unless you recognize the sender
>>>> and know the content is safe.
>>>>
>>>>
>>>> On 1/9/25 10:17, Djordje Todorovic wrote:
>>>>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote:
>>>>>
>>>>>> CAUTION: This email originated from outside of the organization. Do
>>>>>> not click links or open attachments unless you recognize the sender
>>>>>> and know the content is safe.
>>>>>>
>>>>>>
>>>>>> On 17/7/25 11:38, Djordje Todorovic wrote:
>>>>>>> This is needed for riscv based CPUs by MIPS since those may have
>>>>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense
>>>>>>> range, and if a hart is missing, this causes NULL derefs.
>>>>>>>
>>>>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com>
>>>>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
>>>>>>> ---
>>>>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
>>>>>>> hw/intc/riscv_aplic.c | 11 ++++++++---
>>>>>>> 2 files changed, 27 insertions(+), 5 deletions(-)
>>> + CPU_FOREACH(cpu) {
>>> + if (cpu == NULL)
>>> + abort();
>>
>> Why do you end having a NULL vcpu in the global cpus_queue?
>> (this is the 'elsewhere problem').
>>
> Well, it is true, for our case, we would never get into vcpu == NULL case.
>
>
> After several attempts to come up with a better solution for this, I think
>
> we are back to the existing one. I will try to elaborate why.
>
> The sparse hart-ID layout in this case is not a programming mistake but
>
> an intentional hardware design characteristic of the P8700. The P8700
>
> RISC-V implementation has a sparse hart-ID layout where not all hart IDs
>
> in a range are populated. This is explicitly supported by the RISC-V APLIC
>
> specification. The current ACLINT/APLIC implementation assumes a dense
>
> range of hart IDs (from hartid_base to hartid_base + num_harts - 1).
>
> For the P8700 board:
>
> - We iterate through the theoretical hart ID range for a cluster
>
> - Some hart IDs legitimately don't have corresponding CPUs (sparse
> layout)
>
> - We need to skip these without failing
>
> The CPU_FOREACH approach doesn't work here because:
>
> - The cpu==NULL will never happen
>
> - It iterates over all CPUs system-wide, not just those in the current
>
> cluster
Correct, we simply need to iterate over the CPUs in the cluster, which
IFAICT this device model doesn't use (TYPE_CPU_CLUSTER).
© 2016 - 2026 Red Hat, Inc.