This is needed for riscv based CPUs by MIPS since those may have
sparse hart-ID layouts. ACLINT and APLIC still assume a dense
range, and if a hart is missing, this causes NULL derefs.
Signed-off-by: Chao-ying Fu <cfu@mips.com>
Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com>
---
hw/intc/riscv_aclint.c | 21 +++++++++++++++++++--
hw/intc/riscv_aplic.c | 11 ++++++++---
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index b0139f03f5..22ac4133d5 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -292,7 +292,13 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
s->timecmp = g_new0(uint64_t, s->num_harts);
/* Claim timer interrupt bits */
for (i = 0; i < s->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i));
+ CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i);
+ if (cpu_by_hartid == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid hartid: %u",
+ s->hartid_base + i);
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
error_report("MTIP already claimed");
exit(1);
@@ -481,7 +487,13 @@ static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp)
/* Claim software interrupt bits */
for (i = 0; i < swi->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i));
+ CPUState *cpu_by_hartid = cpu_by_arch_id(swi->hartid_base + i);
+ if (cpu_by_hartid == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-swi: invalid hartid: %u",
+ swi->hartid_base + i);
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid);
/* We don't claim mip.SSIP because it is writable by software */
if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) {
error_report("MSIP already claimed");
@@ -545,6 +557,11 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
for (i = 0; i < num_harts; i++) {
CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+ if (cpu == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "aclint-swi: invalid hartid: %u",
+ hartid_base + i);
+ continue;
+ }
RISCVCPU *rvcpu = RISCV_CPU(cpu);
qdev_connect_gpio_out(dev, i,
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 8bcd9f4697..c7846753fe 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -899,9 +899,11 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
if (!aplic->msimode) {
/* Claim the CPU interrupt to be triggered by this APLIC */
for (i = 0; i < aplic->num_harts; i++) {
- RISCVCPU *cpu;
-
- cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
+ CPUState *temp = cpu_by_arch_id(aplic->hartid_base + i);
+ if (temp == NULL) {
+ continue;
+ }
+ RISCVCPU *cpu = RISCV_CPU(temp);
if (riscv_cpu_claim_interrupts(cpu,
(aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
error_report("%s already claimed",
@@ -1076,6 +1078,9 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
if (!msimode) {
for (i = 0; i < num_harts; i++) {
CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+ if (cpu == NULL) {
+ continue;
+ }
qdev_connect_gpio_out_named(dev, NULL, i,
qdev_get_gpio_in(DEVICE(cpu),
--
2.34.1
On 17/7/25 11:38, Djordje Todorovic wrote: > This is needed for riscv based CPUs by MIPS since those may have > sparse hart-ID layouts. ACLINT and APLIC still assume a dense > range, and if a hart is missing, this causes NULL derefs. > > Signed-off-by: Chao-ying Fu <cfu@mips.com> > Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> > --- > hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- > hw/intc/riscv_aplic.c | 11 ++++++++--- > 2 files changed, 27 insertions(+), 5 deletions(-) > > diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c > index b0139f03f5..22ac4133d5 100644 > --- a/hw/intc/riscv_aclint.c > +++ b/hw/intc/riscv_aclint.c > @@ -292,7 +292,13 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) > s->timecmp = g_new0(uint64_t, s->num_harts); > /* Claim timer interrupt bits */ > for (i = 0; i < s->num_harts; i++) { > - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); > + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); > + if (cpu_by_hartid == NULL) { > + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid hartid: %u", > + s->hartid_base + i); DeviceRealize() handlers are part of machine modelling, not guest uses. IOW, triggering this is a programming mistake, so we should just abort() here.
On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: > CAUTION: This email originated from outside of the organization. Do > not click links or open attachments unless you recognize the sender > and know the content is safe. > > > On 17/7/25 11:38, Djordje Todorovic wrote: >> This is needed for riscv based CPUs by MIPS since those may have >> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >> range, and if a hart is missing, this causes NULL derefs. >> >> Signed-off-by: Chao-ying Fu <cfu@mips.com> >> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >> --- >> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >> hw/intc/riscv_aplic.c | 11 ++++++++--- >> 2 files changed, 27 insertions(+), 5 deletions(-) >> >> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c >> index b0139f03f5..22ac4133d5 100644 >> --- a/hw/intc/riscv_aclint.c >> +++ b/hw/intc/riscv_aclint.c >> @@ -292,7 +292,13 @@ static void >> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >> s->timecmp = g_new0(uint64_t, s->num_harts); >> /* Claim timer interrupt bits */ >> for (i = 0; i < s->num_harts; i++) { >> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); >> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); >> + if (cpu_by_hartid == NULL) { >> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >> hartid: %u", >> + s->hartid_base + i); > > DeviceRealize() handlers are part of machine modelling, not guest uses. > > IOW, triggering this is a programming mistake, so we should just > abort() here. Well, if we do it that way, our Boston board target for P8700 cannot run.
On 1/9/25 10:17, Djordje Todorovic wrote: > On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: > >> CAUTION: This email originated from outside of the organization. Do >> not click links or open attachments unless you recognize the sender >> and know the content is safe. >> >> >> On 17/7/25 11:38, Djordje Todorovic wrote: >>> This is needed for riscv based CPUs by MIPS since those may have >>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >>> range, and if a hart is missing, this causes NULL derefs. >>> >>> Signed-off-by: Chao-ying Fu <cfu@mips.com> >>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >>> --- >>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >>> hw/intc/riscv_aplic.c | 11 ++++++++--- >>> 2 files changed, 27 insertions(+), 5 deletions(-) >>> >>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c >>> index b0139f03f5..22ac4133d5 100644 >>> --- a/hw/intc/riscv_aclint.c >>> +++ b/hw/intc/riscv_aclint.c >>> @@ -292,7 +292,13 @@ static void >>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >>> s->timecmp = g_new0(uint64_t, s->num_harts); >>> /* Claim timer interrupt bits */ >>> for (i = 0; i < s->num_harts; i++) { >>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); >>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); >>> + if (cpu_by_hartid == NULL) { >>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >>> hartid: %u", >>> + s->hartid_base + i); >> >> DeviceRealize() handlers are part of machine modelling, not guest uses. >> >> IOW, triggering this is a programming mistake, so we should just >> abort() here. > > Well, if we do it that way, our Boston board target for P8700 cannot run. So the problem is elsewhere :)
On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote: > CAUTION: This email originated from outside of the organization. Do > not click links or open attachments unless you recognize the sender > and know the content is safe. > > > On 1/9/25 10:17, Djordje Todorovic wrote: >> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: >> >>> CAUTION: This email originated from outside of the organization. Do >>> not click links or open attachments unless you recognize the sender >>> and know the content is safe. >>> >>> >>> On 17/7/25 11:38, Djordje Todorovic wrote: >>>> This is needed for riscv based CPUs by MIPS since those may have >>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >>>> range, and if a hart is missing, this causes NULL derefs. >>>> >>>> Signed-off-by: Chao-ying Fu <cfu@mips.com> >>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >>>> --- >>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >>>> hw/intc/riscv_aplic.c | 11 ++++++++--- >>>> 2 files changed, 27 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c >>>> index b0139f03f5..22ac4133d5 100644 >>>> --- a/hw/intc/riscv_aclint.c >>>> +++ b/hw/intc/riscv_aclint.c >>>> @@ -292,7 +292,13 @@ static void >>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >>>> s->timecmp = g_new0(uint64_t, s->num_harts); >>>> /* Claim timer interrupt bits */ >>>> for (i = 0; i < s->num_harts; i++) { >>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + >>>> i)); >>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); >>>> + if (cpu_by_hartid == NULL) { >>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >>>> hartid: %u", >>>> + s->hartid_base + i); >>> >>> DeviceRealize() handlers are part of machine modelling, not guest uses. >>> >>> IOW, triggering this is a programming mistake, so we should just >>> abort() here. >> >> Well, if we do it that way, our Boston board target for P8700 cannot >> run. > > So the problem is elsewhere :) I see. Would something like this be acceptable: --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -279,7 +279,7 @@ static const Property riscv_aclint_mtimer_properties[] = { static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) { RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev); - int i; + CPUState *cpu; memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops, s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size); @@ -291,18 +291,15 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) s->timers = g_new0(QEMUTimer *, s->num_harts); s->timecmp = g_new0(uint64_t, s->num_harts); /* Claim timer interrupt bits */ - for (i = 0; i < s->num_harts; i++) { - CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); - if (cpu_by_hartid == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid hartid: %u", - s->hartid_base + i); - continue; - } - RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid); - if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { - error_report("MTIP already claimed"); - exit(1); - } + CPU_FOREACH(cpu) { + if (cpu == NULL) + abort(); + + RISCVCPU *cpu_riscv = RISCV_CPU(cpu); + if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) { + error_report("MTIP already claimed"); + exit(1); + } } } Thanks, Djordje
On 3/9/25 14:35, Djordje Todorovic wrote: > > On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote: >> CAUTION: This email originated from outside of the organization. Do >> not click links or open attachments unless you recognize the sender >> and know the content is safe. >> >> >> On 1/9/25 10:17, Djordje Todorovic wrote: >>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: >>> >>>> CAUTION: This email originated from outside of the organization. Do >>>> not click links or open attachments unless you recognize the sender >>>> and know the content is safe. >>>> >>>> >>>> On 17/7/25 11:38, Djordje Todorovic wrote: >>>>> This is needed for riscv based CPUs by MIPS since those may have >>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >>>>> range, and if a hart is missing, this causes NULL derefs. >>>>> >>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com> >>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >>>>> --- >>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >>>>> hw/intc/riscv_aplic.c | 11 ++++++++--- >>>>> 2 files changed, 27 insertions(+), 5 deletions(-) >>>>> >>>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c >>>>> index b0139f03f5..22ac4133d5 100644 >>>>> --- a/hw/intc/riscv_aclint.c >>>>> +++ b/hw/intc/riscv_aclint.c >>>>> @@ -292,7 +292,13 @@ static void >>>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >>>>> s->timecmp = g_new0(uint64_t, s->num_harts); >>>>> /* Claim timer interrupt bits */ >>>>> for (i = 0; i < s->num_harts; i++) { >>>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + >>>>> i)); >>>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); >>>>> + if (cpu_by_hartid == NULL) { >>>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >>>>> hartid: %u", >>>>> + s->hartid_base + i); >>>> >>>> DeviceRealize() handlers are part of machine modelling, not guest uses. >>>> >>>> IOW, triggering this is a programming mistake, so we should just >>>> abort() here. >>> >>> Well, if we do it that way, our Boston board target for P8700 cannot >>> run. >> >> So the problem is elsewhere :) > > > I see. Would something like this be acceptable: > > --- a/hw/intc/riscv_aclint.c > +++ b/hw/intc/riscv_aclint.c > @@ -279,7 +279,7 @@ static const Property > riscv_aclint_mtimer_properties[] = { > static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) > { > RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev); > - int i; > + CPUState *cpu; > > memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops, > s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size); > @@ -291,18 +291,15 @@ static void > riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) > s->timers = g_new0(QEMUTimer *, s->num_harts); > s->timecmp = g_new0(uint64_t, s->num_harts); > /* Claim timer interrupt bits */ > - for (i = 0; i < s->num_harts; i++) { > - CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); > - if (cpu_by_hartid == NULL) { > - qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid > hartid: %u", > - s->hartid_base + i); > - continue; > - } > - RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid); > - if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { > - error_report("MTIP already claimed"); > - exit(1); > - } > + CPU_FOREACH(cpu) { > + if (cpu == NULL) > + abort(); Why do you end having a NULL vcpu in the global cpus_queue? (this is the 'elsewhere problem'). > + > + RISCVCPU *cpu_riscv = RISCV_CPU(cpu); > + if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) { > + error_report("MTIP already claimed"); > + exit(1); > + } > } > } > > > Thanks, > > Djordje > >
On 22. 9. 25. 08:52, Philippe Mathieu-Daudé wrote: > CAUTION: This email originated from outside of the organization. Do > not click links or open attachments unless you recognize the sender > and know the content is safe. > > > On 3/9/25 14:35, Djordje Todorovic wrote: >> >> On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote: >>> CAUTION: This email originated from outside of the organization. Do >>> not click links or open attachments unless you recognize the sender >>> and know the content is safe. >>> >>> >>> On 1/9/25 10:17, Djordje Todorovic wrote: >>>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: >>>> >>>>> CAUTION: This email originated from outside of the organization. Do >>>>> not click links or open attachments unless you recognize the sender >>>>> and know the content is safe. >>>>> >>>>> >>>>> On 17/7/25 11:38, Djordje Todorovic wrote: >>>>>> This is needed for riscv based CPUs by MIPS since those may have >>>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >>>>>> range, and if a hart is missing, this causes NULL derefs. >>>>>> >>>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com> >>>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >>>>>> --- >>>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >>>>>> hw/intc/riscv_aplic.c | 11 ++++++++--- >>>>>> 2 files changed, 27 insertions(+), 5 deletions(-) >>>>>> >>>>>> diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c >>>>>> index b0139f03f5..22ac4133d5 100644 >>>>>> --- a/hw/intc/riscv_aclint.c >>>>>> +++ b/hw/intc/riscv_aclint.c >>>>>> @@ -292,7 +292,13 @@ static void >>>>>> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >>>>>> s->timecmp = g_new0(uint64_t, s->num_harts); >>>>>> /* Claim timer interrupt bits */ >>>>>> for (i = 0; i < s->num_harts; i++) { >>>>>> - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + >>>>>> i)); >>>>>> + CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base >>>>>> + i); >>>>>> + if (cpu_by_hartid == NULL) { >>>>>> + qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >>>>>> hartid: %u", >>>>>> + s->hartid_base + i); >>>>> >>>>> DeviceRealize() handlers are part of machine modelling, not guest >>>>> uses. >>>>> >>>>> IOW, triggering this is a programming mistake, so we should just >>>>> abort() here. >>>> >>>> Well, if we do it that way, our Boston board target for P8700 cannot >>>> run. >>> >>> So the problem is elsewhere :) >> >> >> I see. Would something like this be acceptable: >> >> --- a/hw/intc/riscv_aclint.c >> +++ b/hw/intc/riscv_aclint.c >> @@ -279,7 +279,7 @@ static const Property >> riscv_aclint_mtimer_properties[] = { >> static void riscv_aclint_mtimer_realize(DeviceState *dev, Error >> **errp) >> { >> RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev); >> - int i; >> + CPUState *cpu; >> >> memory_region_init_io(&s->mmio, OBJECT(dev), >> &riscv_aclint_mtimer_ops, >> s, TYPE_RISCV_ACLINT_MTIMER, >> s->aperture_size); >> @@ -291,18 +291,15 @@ static void >> riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) >> s->timers = g_new0(QEMUTimer *, s->num_harts); >> s->timecmp = g_new0(uint64_t, s->num_harts); >> /* Claim timer interrupt bits */ >> - for (i = 0; i < s->num_harts; i++) { >> - CPUState *cpu_by_hartid = cpu_by_arch_id(s->hartid_base + i); >> - if (cpu_by_hartid == NULL) { >> - qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid >> hartid: %u", >> - s->hartid_base + i); >> - continue; >> - } >> - RISCVCPU *cpu = RISCV_CPU(cpu_by_hartid); >> - if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { >> - error_report("MTIP already claimed"); >> - exit(1); >> - } >> + CPU_FOREACH(cpu) { >> + if (cpu == NULL) >> + abort(); > > Why do you end having a NULL vcpu in the global cpus_queue? > (this is the 'elsewhere problem'). > Well, it is true, for our case, we would never get into vcpu == NULL case. After several attempts to come up with a better solution for this, I think we are back to the existing one. I will try to elaborate why. The sparse hart-ID layout in this case is not a programming mistake but an intentional hardware design characteristic of the P8700. The P8700 RISC-V implementation has a sparse hart-ID layout where not all hart IDs in a range are populated. This is explicitly supported by the RISC-V APLIC specification. The current ACLINT/APLIC implementation assumes a dense range of hart IDs (from hartid_base to hartid_base + num_harts - 1). For the P8700 board: - We iterate through the theoretical hart ID range for a cluster - Some hart IDs legitimately don't have corresponding CPUs (sparse layout) - We need to skip these without failing The CPU_FOREACH approach doesn't work here because: - The cpu==NULL will never happen - It iterates over all CPUs system-wide, not just those in the current cluster - The hartid_base parameter defines which cluster's interrupt controller we're initializing - We need to maintain the index-based register array mapping even for non-existent harts I agree the LOG_GUEST_ERROR was inappropriate since this isn't a guest error, so I will add a comment in v8 to explain why we are skipping. Thanks a lot, Djordje >> + >> + RISCVCPU *cpu_riscv = RISCV_CPU(cpu); >> + if (riscv_cpu_claim_interrupts(cpu_riscv, MIP_MTIP) < 0) { >> + error_report("MTIP already claimed"); >> + exit(1); >> + } >> } >> } >> >> >> Thanks, >> >> Djordje >> >>
On 24/9/25 10:00, Djordje Todorovic wrote: > > On 22. 9. 25. 08:52, Philippe Mathieu-Daudé wrote: >> CAUTION: This email originated from outside of the organization. Do >> not click links or open attachments unless you recognize the sender >> and know the content is safe. >> >> >> On 3/9/25 14:35, Djordje Todorovic wrote: >>> >>> On 1. 9. 25. 13:05, Philippe Mathieu-Daudé wrote: >>>> CAUTION: This email originated from outside of the organization. Do >>>> not click links or open attachments unless you recognize the sender >>>> and know the content is safe. >>>> >>>> >>>> On 1/9/25 10:17, Djordje Todorovic wrote: >>>>> On 8. 8. 25. 17:52, Philippe Mathieu-Daudé wrote: >>>>> >>>>>> CAUTION: This email originated from outside of the organization. Do >>>>>> not click links or open attachments unless you recognize the sender >>>>>> and know the content is safe. >>>>>> >>>>>> >>>>>> On 17/7/25 11:38, Djordje Todorovic wrote: >>>>>>> This is needed for riscv based CPUs by MIPS since those may have >>>>>>> sparse hart-ID layouts. ACLINT and APLIC still assume a dense >>>>>>> range, and if a hart is missing, this causes NULL derefs. >>>>>>> >>>>>>> Signed-off-by: Chao-ying Fu <cfu@mips.com> >>>>>>> Signed-off-by: Djordje Todorovic <djordje.todorovic@htecgroup.com> >>>>>>> --- >>>>>>> hw/intc/riscv_aclint.c | 21 +++++++++++++++++++-- >>>>>>> hw/intc/riscv_aplic.c | 11 ++++++++--- >>>>>>> 2 files changed, 27 insertions(+), 5 deletions(-) >>> + CPU_FOREACH(cpu) { >>> + if (cpu == NULL) >>> + abort(); >> >> Why do you end having a NULL vcpu in the global cpus_queue? >> (this is the 'elsewhere problem'). >> > Well, it is true, for our case, we would never get into vcpu == NULL case. > > > After several attempts to come up with a better solution for this, I think > > we are back to the existing one. I will try to elaborate why. > > The sparse hart-ID layout in this case is not a programming mistake but > > an intentional hardware design characteristic of the P8700. The P8700 > > RISC-V implementation has a sparse hart-ID layout where not all hart IDs > > in a range are populated. This is explicitly supported by the RISC-V APLIC > > specification. The current ACLINT/APLIC implementation assumes a dense > > range of hart IDs (from hartid_base to hartid_base + num_harts - 1). > > For the P8700 board: > > - We iterate through the theoretical hart ID range for a cluster > > - Some hart IDs legitimately don't have corresponding CPUs (sparse > layout) > > - We need to skip these without failing > > The CPU_FOREACH approach doesn't work here because: > > - The cpu==NULL will never happen > > - It iterates over all CPUs system-wide, not just those in the current > > cluster Correct, we simply need to iterate over the CPUs in the cluster, which IFAICT this device model doesn't use (TYPE_CPU_CLUSTER).
© 2016 - 2025 Red Hat, Inc.