[v1] riscv: introduce basic UART support and interrupts for hypervisor mode

[PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 10 months ago

Introduce interrupt controller descriptor for host APLIC to describe
the low-lovel hardare. It includes implementation of the following functions:
 - aplic_irq_startup()
 - aplic_irq_shutdown()
 - aplic_irq_enable()
 - aplic_irq_disable()
 - aplic_irq_ack()
 - aplic_host_irq_end()
 - aplic_set_irq_affinity()

As APLIC is used in MSI mode it requires to enable/disable interrupts not
only for APLIC but also for IMSIC. Thereby for the purpose of
aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().

For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
introduced to get hart id.

Also, introduce additional interrupt controller h/w operations and
host_irq_type for APLIC:
 - aplic_host_irq_type
 - aplic_set_irq_priority()
 - aplic_set_irq_type()

Patch is based on the code from [1].

[1] https://gitlab.com/xen-project/people/olkur/xen/-/commit/7390e2365828b83e27ead56b03114a56e3699dd5

Co-developed-by: Romain Caritey <Romain.Caritey@microchip.com>
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
 xen/arch/riscv/aplic.c             | 169 ++++++++++++++++++++++++++++-
 xen/arch/riscv/imsic.c             |  63 +++++++++++
 xen/arch/riscv/include/asm/aplic.h |  12 ++
 xen/arch/riscv/include/asm/imsic.h |  15 +++
 4 files changed, 258 insertions(+), 1 deletion(-)

diff --git a/xen/arch/riscv/aplic.c b/xen/arch/riscv/aplic.c
index d1aa835c3e..4b60cb9a77 100644
--- a/xen/arch/riscv/aplic.c
+++ b/xen/arch/riscv/aplic.c
@@ -15,6 +15,7 @@
 #include <xen/irq.h>
 #include <xen/mm.h>
 #include <xen/sections.h>
+#include <xen/spinlock.h>
 #include <xen/types.h>
 #include <xen/vmap.h>
 
@@ -110,9 +111,173 @@ static int __init aplic_init(void)
     return 0;
 }
 
-static const struct intc_hw_operations __ro_after_init aplic_ops = {
+static void aplic_set_irq_type(struct irq_desc *desc, unsigned int type)
+{
+    unsigned int irq = desc->irq - 1;
+
+    spin_lock(&aplic.lock);
+    switch(type) {
+        case IRQ_TYPE_EDGE_RISING:
+            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_RISE;
+            break;
+        case IRQ_TYPE_EDGE_FALLING:
+            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_FALL;
+            break;
+        case IRQ_TYPE_LEVEL_HIGH:
+            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_HIGH;
+            break;
+        case IRQ_TYPE_LEVEL_LOW:
+            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_LOW;
+            break;
+        default:
+            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_INACTIVE;
+            break;
+    }
+    spin_unlock(&aplic.lock);
+}
+
+static void aplic_set_irq_priority(struct irq_desc *desc,
+                                   unsigned int priority)
+{
+    /* No priority, do nothing */
+}
+
+static void aplic_irq_enable(struct irq_desc *desc)
+{
+    unsigned long flags;
+
+    /*
+     * TODO: Currently, APLIC is supported only with MSI interrupts.
+     *       If APLIC without MSI interrupts is required in the future,
+     *       this function will need to be updated accordingly.
+     */
+    ASSERT(aplic.imsic_cfg->is_used);
+
+    ASSERT(spin_is_locked(&desc->lock));
+
+    spin_lock_irqsave(&aplic.lock, flags);
+
+    clear_bit(_IRQ_DISABLED, &desc->status);
+
+    /* enable interrupt in IMSIC */
+    imsic_irq_enable(desc->irq);
+
+    /* enable interrupt in APLIC */
+    aplic.regs->setienum = desc->irq;
+
+    spin_unlock_irqrestore(&aplic.lock, flags);
+}
+
+static void aplic_irq_disable(struct irq_desc *desc)
+{
+    unsigned long flags;
+
+    /*
+     * TODO: Currently, APLIC is supported only with MSI interrupts.
+     *       If APLIC without MSI interrupts is required in the future,
+     *       this function will need to be updated accordingly.
+     */
+    ASSERT(aplic.imsic_cfg->is_used);
+
+    ASSERT(spin_is_locked(&desc->lock));
+
+    spin_lock_irqsave(&aplic.lock, flags);
+
+    set_bit(_IRQ_DISABLED, &desc->status);
+
+    /* disable interrupt in APLIC */
+    aplic.regs->clrienum = desc->irq;
+
+    /* disable interrupt in IMSIC */
+    imsic_irq_disable(desc->irq);
+
+    spin_unlock_irqrestore(&aplic.lock, flags);
+}
+
+static unsigned int aplic_irq_startup(struct irq_desc *desc)
+{
+    aplic_irq_enable(desc);
+
+    return 0;
+}
+
+static void aplic_irq_shutdown(struct irq_desc *desc)
+{
+    aplic_irq_disable(desc);
+}
+
+static void aplic_irq_ack(struct irq_desc *desc)
+{
+    /* nothing to do */
+}
+
+static void aplic_host_irq_end(struct irq_desc *desc)
+{
+    /* nothing to do */
+}
+
+static unsigned int aplic_get_cpu_from_mask(const cpumask_t *cpumask)
+{
+    unsigned int cpu;
+    cpumask_t possible_mask;
+
+    cpumask_and(&possible_mask, cpumask, &cpu_possible_map);
+    cpu = cpumask_any(&possible_mask);
+
+    return cpu;
+}
+
+static void aplic_set_irq_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
+    unsigned int cpu;
+    uint64_t group_index, base_ppn;
+    uint32_t hhxw, lhxw ,hhxs, value;
+    const struct imsic_config *imsic = aplic.imsic_cfg;
+
+    /*
+     * TODO: Currently, APLIC is supported only with MSI interrupts.
+     *       If APLIC without MSI interrupts is required in the future,
+     *       this function will need to be updated accordingly.
+     */
+    ASSERT(aplic.imsic_cfg->is_used);
+
+    ASSERT(!cpumask_empty(mask));
+
+    spin_lock(&aplic.lock);
+
+    cpu = cpuid_to_hartid(aplic_get_cpu_from_mask(mask));
+    hhxw = imsic->group_index_bits;
+    lhxw = imsic->hart_index_bits;
+    hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
+    base_ppn = imsic->msi[cpu].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
+
+    /* update hart and EEID in the target register */
+    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);
+    value = desc->irq;
+    value |= cpu << APLIC_TARGET_HART_IDX_SHIFT;
+    value |= group_index << (lhxw + APLIC_TARGET_HART_IDX_SHIFT) ;
+    aplic.regs->target[desc->irq - 1] = value;
+
+    spin_unlock(&aplic.lock);
+}
+
+static hw_irq_controller aplic_host_irq_type = {
+    .typename     = "aplic",
+    .startup      = aplic_irq_startup,
+    .shutdown     = aplic_irq_shutdown,
+    .enable       = aplic_irq_enable,
+    .disable      = aplic_irq_disable,
+    .ack          = aplic_irq_ack,
+    .end          = aplic_host_irq_end,
+    .set_affinity = aplic_set_irq_affinity,
+};
+
+static const struct intc_hw_operations aplic_ops = {
     .info                = &aplic_info,
     .init                = aplic_init,
+    .host_irq_type       = &aplic_host_irq_type,
+    .set_irq_priority    = aplic_set_irq_priority,
+    .set_irq_type        = aplic_set_irq_type,
 };
 
 static int aplic_irq_xlate(const uint32_t *intspec, unsigned int intsize,
@@ -149,6 +314,8 @@ static int __init aplic_preinit(struct dt_device_node *node, const void *dat)
 
     dt_irq_xlate = aplic_irq_xlate;
 
+    spin_lock_init(&aplic.lock);
+
     register_intc_ops(&aplic_ops);
 
     return 0;
diff --git a/xen/arch/riscv/imsic.c b/xen/arch/riscv/imsic.c
index 99def9af2d..8198d008ef 100644
--- a/xen/arch/riscv/imsic.c
+++ b/xen/arch/riscv/imsic.c
@@ -14,12 +14,68 @@
 #include <xen/errno.h>
 #include <xen/init.h>
 #include <xen/macros.h>
+#include <xen/spinlock.h>
 #include <xen/xmalloc.h>
 
 #include <asm/imsic.h>
 
 static struct imsic_config imsic_cfg;
 
+#define imsic_csr_set(c, v)     \
+do {                            \
+    csr_write(CSR_SISELECT, c); \
+    csr_set(CSR_SIREG, v);      \
+} while (0)
+
+#define imsic_csr_clear(c, v)   \
+do {                            \
+    csr_write(CSR_SISELECT, c); \
+    csr_clear(CSR_SIREG, v);    \
+} while (0)
+
+static void imsic_local_eix_update(unsigned long base_id, unsigned long num_id,
+                                   bool pend, bool val)
+{
+    unsigned long i, isel, ireg;
+    unsigned long id = base_id, last_id = base_id + num_id;
+
+    while ( id < last_id )
+    {
+        isel = id / __riscv_xlen;
+        isel *= __riscv_xlen / IMSIC_EIPx_BITS;
+        isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;
+
+        ireg = 0;
+        for ( i = id & (__riscv_xlen - 1);
+              (id < last_id) && (i < __riscv_xlen);
+              i++, id++ )
+            ireg |= (1 << i);
+
+        if ( val )
+            imsic_csr_set(isel, ireg);
+        else
+            imsic_csr_clear(isel, ireg);
+    }
+}
+
+void imsic_irq_enable(unsigned int hwirq)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&imsic_cfg.lock, flags);
+    imsic_local_eix_update(hwirq, 1, false, true);
+    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
+}
+
+void imsic_irq_disable(unsigned int hwirq)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&imsic_cfg.lock, flags);
+    imsic_local_eix_update(hwirq, 1, false, false);
+    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
+}
+
 const struct imsic_config *imsic_get_config(void)
 {
     return &imsic_cfg;
@@ -277,6 +333,13 @@ int __init imsic_init(struct dt_device_node *node)
         goto imsic_init_err;
     }
 
+    spin_lock_init(&imsic_cfg.lock);
+
+    /* Enable local interrupt delivery */
+    imsic_ids_local_delivery(true);
+
+    imsic_cfg.is_used = true;
+
     return 0;
 
 imsic_init_err:
diff --git a/xen/arch/riscv/include/asm/aplic.h b/xen/arch/riscv/include/asm/aplic.h
index 94b3d0b616..ce858663a9 100644
--- a/xen/arch/riscv/include/asm/aplic.h
+++ b/xen/arch/riscv/include/asm/aplic.h
@@ -18,6 +18,15 @@
 #define APLIC_DOMAINCFG_IE      BIT(8, UL)
 #define APLIC_DOMAINCFG_DM      BIT(2, UL)
 
+#define APLIC_SOURCECFG_SM_INACTIVE     0x0
+#define APLIC_SOURCECFG_SM_DETACH       0x1
+#define APLIC_SOURCECFG_SM_EDGE_RISE    0x4
+#define APLIC_SOURCECFG_SM_EDGE_FALL    0x5
+#define APLIC_SOURCECFG_SM_LEVEL_HIGH   0x6
+#define APLIC_SOURCECFG_SM_LEVEL_LOW    0x7
+
+#define APLIC_TARGET_HART_IDX_SHIFT 18
+
 struct aplic_regs {
     uint32_t domaincfg;
     uint32_t sourcecfg[1023];
@@ -70,6 +79,9 @@ struct aplic_priv {
     /* registers */
     volatile struct aplic_regs *regs;
 
+    /* lock */
+    spinlock_t lock;
+
     /* imsic configuration */
     const struct imsic_config *imsic_cfg;
 };
diff --git a/xen/arch/riscv/include/asm/imsic.h b/xen/arch/riscv/include/asm/imsic.h
index 126e651863..d2c0178529 100644
--- a/xen/arch/riscv/include/asm/imsic.h
+++ b/xen/arch/riscv/include/asm/imsic.h
@@ -11,6 +11,7 @@
 #ifndef ASM__RISCV__IMSIC_H
 #define ASM__RISCV__IMSIC_H
 
+#include <xen/spinlock.h>
 #include <xen/types.h>
 
 #define IMSIC_MMIO_PAGE_SHIFT   12
@@ -19,6 +20,11 @@
 #define IMSIC_MIN_ID            63
 #define IMSIC_MAX_ID            2048
 
+#define IMSIC_EIP0              0x80
+#define IMSIC_EIPx_BITS         32
+
+#define IMSIC_EIE0              0xC0
+
 struct imsic_msi {
     paddr_t base_addr;
     unsigned long offset;
@@ -55,6 +61,12 @@ struct imsic_config {
 
     /* MSI */
     struct imsic_msi msi[NR_CPUS];
+
+    /* a check that IMSIC is used */
+    bool is_used;
+
+    /* lock */
+    spinlock_t lock;
 };
 
 struct dt_device_node;
@@ -63,4 +75,7 @@ int imsic_init(struct dt_device_node *n);
 struct imsic_config;
 const struct imsic_config *imsic_get_config(void);
 
+void imsic_irq_enable(unsigned int hwirq);
+void imsic_irq_disable(unsigned int hwirq);
+
 #endif /* ASM__RISCV__IMSIC_H */
-- 
2.49.0

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 4 weeks ago

On 08.04.2025 17:57, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/imsic.c
> +++ b/xen/arch/riscv/imsic.c
> @@ -14,12 +14,68 @@
>  #include <xen/errno.h>
>  #include <xen/init.h>
>  #include <xen/macros.h>
> +#include <xen/spinlock.h>
>  #include <xen/xmalloc.h>
>  
>  #include <asm/imsic.h>
>  
>  static struct imsic_config imsic_cfg;
>  
> +#define imsic_csr_set(c, v)     \
> +do {                            \
> +    csr_write(CSR_SISELECT, c); \
> +    csr_set(CSR_SIREG, v);      \
> +} while (0)
> +
> +#define imsic_csr_clear(c, v)   \
> +do {                            \
> +    csr_write(CSR_SISELECT, c); \
> +    csr_clear(CSR_SIREG, v);    \
> +} while (0)

Coming back to these (the later patch adds one more here): How expensive are
these CSR writes? IOW would it perhaps make sense to maintain a local cache
of the last written SISELECT value, to avoid writing the same one again if
the same windowed register needs accessing twice in a row?

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 9 months, 3 weeks ago

On 4/15/25 4:53 PM, Jan Beulich wrote:
> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>> --- a/xen/arch/riscv/imsic.c
>> +++ b/xen/arch/riscv/imsic.c
>> @@ -14,12 +14,68 @@
>>   #include <xen/errno.h>
>>   #include <xen/init.h>
>>   #include <xen/macros.h>
>> +#include <xen/spinlock.h>
>>   #include <xen/xmalloc.h>
>>   
>>   #include <asm/imsic.h>
>>   
>>   static struct imsic_config imsic_cfg;
>>   
>> +#define imsic_csr_set(c, v)     \
>> +do {                            \
>> +    csr_write(CSR_SISELECT, c); \
>> +    csr_set(CSR_SIREG, v);      \
>> +} while (0)
>> +
>> +#define imsic_csr_clear(c, v)   \
>> +do {                            \
>> +    csr_write(CSR_SISELECT, c); \
>> +    csr_clear(CSR_SIREG, v);    \
>> +} while (0)
> Coming back to these (the later patch adds one more here): How expensive are
> these CSR writes? IOW would it perhaps make sense to maintain a local cache
> of the last written SISELECT value, to avoid writing the same one again if
> the same windowed register needs accessing twice in a row?

CSRs belong to the HART, so access to them is very fast.

~ Oleksii

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 3 weeks ago

On 18.04.2025 12:43, Oleksii Kurochko wrote:
> 
> On 4/15/25 4:53 PM, Jan Beulich wrote:
>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>> --- a/xen/arch/riscv/imsic.c
>>> +++ b/xen/arch/riscv/imsic.c
>>> @@ -14,12 +14,68 @@
>>>   #include <xen/errno.h>
>>>   #include <xen/init.h>
>>>   #include <xen/macros.h>
>>> +#include <xen/spinlock.h>
>>>   #include <xen/xmalloc.h>
>>>   
>>>   #include <asm/imsic.h>
>>>   
>>>   static struct imsic_config imsic_cfg;
>>>   
>>> +#define imsic_csr_set(c, v)     \
>>> +do {                            \
>>> +    csr_write(CSR_SISELECT, c); \
>>> +    csr_set(CSR_SIREG, v);      \
>>> +} while (0)
>>> +
>>> +#define imsic_csr_clear(c, v)   \
>>> +do {                            \
>>> +    csr_write(CSR_SISELECT, c); \
>>> +    csr_clear(CSR_SIREG, v);    \
>>> +} while (0)
>> Coming back to these (the later patch adds one more here): How expensive are
>> these CSR writes? IOW would it perhaps make sense to maintain a local cache
>> of the last written SISELECT value, to avoid writing the same one again if
>> the same windowed register needs accessing twice in a row?
> 
> CSRs belong to the HART, so access to them is very fast.

Can you back this by any data? I view CSRs as somewhat similar to x86'es MSRs,
and access (writes in particular) to some of them is rather slow.

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 9 months, 2 weeks ago

On 4/22/25 9:02 AM, Jan Beulich wrote:
> On 18.04.2025 12:43, Oleksii Kurochko wrote:
>> On 4/15/25 4:53 PM, Jan Beulich wrote:
>>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>>> --- a/xen/arch/riscv/imsic.c
>>>> +++ b/xen/arch/riscv/imsic.c
>>>> @@ -14,12 +14,68 @@
>>>>    #include <xen/errno.h>
>>>>    #include <xen/init.h>
>>>>    #include <xen/macros.h>
>>>> +#include <xen/spinlock.h>
>>>>    #include <xen/xmalloc.h>
>>>>    
>>>>    #include <asm/imsic.h>
>>>>    
>>>>    static struct imsic_config imsic_cfg;
>>>>    
>>>> +#define imsic_csr_set(c, v)     \
>>>> +do {                            \
>>>> +    csr_write(CSR_SISELECT, c); \
>>>> +    csr_set(CSR_SIREG, v);      \
>>>> +} while (0)
>>>> +
>>>> +#define imsic_csr_clear(c, v)   \
>>>> +do {                            \
>>>> +    csr_write(CSR_SISELECT, c); \
>>>> +    csr_clear(CSR_SIREG, v);    \
>>>> +} while (0)
>>> Coming back to these (the later patch adds one more here): How expensive are
>>> these CSR writes? IOW would it perhaps make sense to maintain a local cache
>>> of the last written SISELECT value, to avoid writing the same one again if
>>> the same windowed register needs accessing twice in a row?
>> CSRs belong to the HART, so access to them is very fast.
> Can you back this by any data? I view CSRs as somewhat similar to x86'es MSRs,
> and access (writes in particular) to some of them is rather slow.

CSR read 1 cycle, CSR write 7 cycles on Microchip platform. ~ Oleksii

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 2 weeks ago

On 25.04.2025 21:31, Oleksii Kurochko wrote:
> 
> On 4/22/25 9:02 AM, Jan Beulich wrote:
>> On 18.04.2025 12:43, Oleksii Kurochko wrote:
>>> On 4/15/25 4:53 PM, Jan Beulich wrote:
>>>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>>>> --- a/xen/arch/riscv/imsic.c
>>>>> +++ b/xen/arch/riscv/imsic.c
>>>>> @@ -14,12 +14,68 @@
>>>>>    #include <xen/errno.h>
>>>>>    #include <xen/init.h>
>>>>>    #include <xen/macros.h>
>>>>> +#include <xen/spinlock.h>
>>>>>    #include <xen/xmalloc.h>
>>>>>    
>>>>>    #include <asm/imsic.h>
>>>>>    
>>>>>    static struct imsic_config imsic_cfg;
>>>>>    
>>>>> +#define imsic_csr_set(c, v)     \
>>>>> +do {                            \
>>>>> +    csr_write(CSR_SISELECT, c); \
>>>>> +    csr_set(CSR_SIREG, v);      \
>>>>> +} while (0)
>>>>> +
>>>>> +#define imsic_csr_clear(c, v)   \
>>>>> +do {                            \
>>>>> +    csr_write(CSR_SISELECT, c); \
>>>>> +    csr_clear(CSR_SIREG, v);    \
>>>>> +} while (0)
>>>> Coming back to these (the later patch adds one more here): How expensive are
>>>> these CSR writes? IOW would it perhaps make sense to maintain a local cache
>>>> of the last written SISELECT value, to avoid writing the same one again if
>>>> the same windowed register needs accessing twice in a row?
>>> CSRs belong to the HART, so access to them is very fast.
>> Can you back this by any data? I view CSRs as somewhat similar to x86'es MSRs,
>> and access (writes in particular) to some of them is rather slow.
> 
> CSR read 1 cycle, CSR write 7 cycles on Microchip platform. ~ Oleksii

And that's an in-order platform, i.e. cycle count being all that matters for
performance? No other (e.g. latency) effect on subsequent insns?

Further, how does this compare to the outlined alternative, especially if we
assumed that the respective cacheline would be hot and hence usually in L1
cache?

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 4 weeks ago

On 08.04.2025 17:57, Oleksii Kurochko wrote:
> Introduce interrupt controller descriptor for host APLIC to describe
> the low-lovel hardare. It includes implementation of the following functions:
>  - aplic_irq_startup()
>  - aplic_irq_shutdown()
>  - aplic_irq_enable()
>  - aplic_irq_disable()
>  - aplic_irq_ack()
>  - aplic_host_irq_end()
>  - aplic_set_irq_affinity()
> 
> As APLIC is used in MSI mode it requires to enable/disable interrupts not
> only for APLIC but also for IMSIC. Thereby for the purpose of
> aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().
> 
> For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
> introduced to get hart id.
> 
> Also, introduce additional interrupt controller h/w operations and
> host_irq_type for APLIC:
>  - aplic_host_irq_type
>  - aplic_set_irq_priority()
>  - aplic_set_irq_type()

Yet these two functions nor the hooks they're used to populate are entirely
unused here. Since they're also outside of the common IRQ handling machinery,
it's unclear how one would sanely ack such a change.

> --- a/xen/arch/riscv/aplic.c
> +++ b/xen/arch/riscv/aplic.c
> @@ -15,6 +15,7 @@
>  #include <xen/irq.h>
>  #include <xen/mm.h>
>  #include <xen/sections.h>
> +#include <xen/spinlock.h>
>  #include <xen/types.h>
>  #include <xen/vmap.h>
>  
> @@ -110,9 +111,173 @@ static int __init aplic_init(void)
>      return 0;
>  }
>  
> -static const struct intc_hw_operations __ro_after_init aplic_ops = {
> +static void aplic_set_irq_type(struct irq_desc *desc, unsigned int type)
> +{
> +    unsigned int irq = desc->irq - 1;

Why this adjustment by 1 (and yet both items being named "irq")?

> +    spin_lock(&aplic.lock);
> +    switch(type) {
> +        case IRQ_TYPE_EDGE_RISING:

Nit (style): Missing blanks, brace on its own line, case labels indented
like their containing switch().

> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_RISE;
> +            break;
> +        case IRQ_TYPE_EDGE_FALLING:

Blank lines please between non-fall-through case blocks.

> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_FALL;
> +            break;
> +        case IRQ_TYPE_LEVEL_HIGH:
> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_HIGH;
> +            break;
> +        case IRQ_TYPE_LEVEL_LOW:
> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_LOW;
> +            break;
> +        default:
> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_INACTIVE;
> +            break;

Is the default: label legitimate to be reached?

> +    }
> +    spin_unlock(&aplic.lock);
> +}
> +
> +static void aplic_set_irq_priority(struct irq_desc *desc,
> +                                   unsigned int priority)
> +{
> +    /* No priority, do nothing */
> +}

Since the function dopes nothing, wouldn't it be better to omit it and have
the (future) caller check for a NULL pointer ahead of making the (indirect)
call? Same remark for other handlers (below) which also do nothing.

> +static void aplic_irq_enable(struct irq_desc *desc)
> +{
> +    unsigned long flags;
> +
> +    /*
> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
> +     *       If APLIC without MSI interrupts is required in the future,
> +     *       this function will need to be updated accordingly.
> +     */
> +    ASSERT(aplic.imsic_cfg->is_used);

Such an extra field, used only for assertions, is pretty odd. Can't you
use any of the other fields to achieve the same effect?

> +    ASSERT(spin_is_locked(&desc->lock));

If this lock (which is an IRQ-safe one) is necessarily held, ...

> +    spin_lock_irqsave(&aplic.lock, flags);

... you can use just spin_lock() here.

> +    clear_bit(_IRQ_DISABLED, &desc->status);

Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
question on Arm code also doing so.)

I'm uncertain about this bit setting anyway - on x86 we would only fiddle
with it for IRQs not in use, not while enabling/disabling one.

In any event this can be done outside of the APLIC-locked region, I think.

> +    /* enable interrupt in IMSIC */

May I remind you of Xen comment style?

> +    imsic_irq_enable(desc->irq);
> +
> +    /* enable interrupt in APLIC */
> +    aplic.regs->setienum = desc->irq;

Are you sure you want to use plain assignments for MMIO accesses? I'd have
expected writel() to be used here. (And only later I realized that I didn't
spot the same already higher up from here.)

From the vague understanding I've gained so far: Isn't the APLIC closer to
the CPU and the IMSIC closer to the device? If so, wouldn't you want to
enable at the APLIC before enabling at the IMSIC? But of course that also
depends on what exactly happens in the window while one is already enabled
and the other is still disabled. (Later) From the code you add to imsic.c
it looks like it's the other way around, as the IMSIC is accessed through
CSRs.

> +    spin_unlock_irqrestore(&aplic.lock, flags);
> +}
> +
> +static void aplic_irq_disable(struct irq_desc *desc)
> +{
> +    unsigned long flags;
> +
> +    /*
> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
> +     *       If APLIC without MSI interrupts is required in the future,
> +     *       this function will need to be updated accordingly.
> +     */
> +    ASSERT(aplic.imsic_cfg->is_used);
> +
> +    ASSERT(spin_is_locked(&desc->lock));
> +
> +    spin_lock_irqsave(&aplic.lock, flags);
> +
> +    set_bit(_IRQ_DISABLED, &desc->status);
> +
> +    /* disable interrupt in APLIC */
> +    aplic.regs->clrienum = desc->irq;
> +
> +    /* disable interrupt in IMSIC */
> +    imsic_irq_disable(desc->irq);
> +
> +    spin_unlock_irqrestore(&aplic.lock, flags);
> +}
> +
> +static unsigned int aplic_irq_startup(struct irq_desc *desc)
> +{
> +    aplic_irq_enable(desc);
> +
> +    return 0;
> +}
> +
> +static void aplic_irq_shutdown(struct irq_desc *desc)
> +{
> +    aplic_irq_disable(desc);
> +}

You don't really need a separate hook function here, do you?

> +static void aplic_irq_ack(struct irq_desc *desc)
> +{
> +    /* nothing to do */
> +}
> +
> +static void aplic_host_irq_end(struct irq_desc *desc)

What's the "host" in the identifier about?

> +{
> +    /* nothing to do */
> +}
> +
> +static unsigned int aplic_get_cpu_from_mask(const cpumask_t *cpumask)
> +{
> +    unsigned int cpu;

No real need for this variable?

> +    cpumask_t possible_mask;
> +
> +    cpumask_and(&possible_mask, cpumask, &cpu_possible_map);
> +    cpu = cpumask_any(&possible_mask);

Why would you use cpu_possible_map here? That includes any offline CPUs.
I think you need to use cpu_online_map here.

> +    return cpu;
> +}
> +
> +static void aplic_set_irq_affinity(struct irq_desc *desc, const cpumask_t *mask)
> +{
> +    unsigned int cpu;
> +    uint64_t group_index, base_ppn;
> +    uint32_t hhxw, lhxw ,hhxs, value;
> +    const struct imsic_config *imsic = aplic.imsic_cfg;
> +
> +    /*
> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
> +     *       If APLIC without MSI interrupts is required in the future,
> +     *       this function will need to be updated accordingly.
> +     */
> +    ASSERT(aplic.imsic_cfg->is_used);

Use the local variable you have made yourself?

> +    ASSERT(!cpumask_empty(mask));
> +
> +    spin_lock(&aplic.lock);

Aiui the lock can be acquired quite a bit later. It ought to be needed only
around the actual write to the hardware register.

> +    cpu = cpuid_to_hartid(aplic_get_cpu_from_mask(mask));
> +    hhxw = imsic->group_index_bits;
> +    lhxw = imsic->hart_index_bits;
> +    hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
> +    base_ppn = imsic->msi[cpu].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
> +
> +    /* update hart and EEID in the target register */
> +    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);

What's this magic 12 in here? Not IMSIC_MMIO_PAGE_SHIFT I suppose?

> +    value = desc->irq;
> +    value |= cpu << APLIC_TARGET_HART_IDX_SHIFT;
> +    value |= group_index << (lhxw + APLIC_TARGET_HART_IDX_SHIFT) ;
> +    aplic.regs->target[desc->irq - 1] = value;
> +
> +    spin_unlock(&aplic.lock);
> +}
> +
> +static hw_irq_controller aplic_host_irq_type = {

const?

> --- a/xen/arch/riscv/imsic.c
> +++ b/xen/arch/riscv/imsic.c
> @@ -14,12 +14,68 @@
>  #include <xen/errno.h>
>  #include <xen/init.h>
>  #include <xen/macros.h>
> +#include <xen/spinlock.h>
>  #include <xen/xmalloc.h>
>  
>  #include <asm/imsic.h>
>  
>  static struct imsic_config imsic_cfg;
>  
> +#define imsic_csr_set(c, v)     \
> +do {                            \
> +    csr_write(CSR_SISELECT, c); \
> +    csr_set(CSR_SIREG, v);      \
> +} while (0)
> +
> +#define imsic_csr_clear(c, v)   \
> +do {                            \
> +    csr_write(CSR_SISELECT, c); \
> +    csr_clear(CSR_SIREG, v);    \
> +} while (0)
> +
> +static void imsic_local_eix_update(unsigned long base_id, unsigned long num_id,
> +                                   bool pend, bool val)
> +{
> +    unsigned long i, isel, ireg;

These can be constrained to inside the outer loop below.

> +    unsigned long id = base_id, last_id = base_id + num_id;
> +
> +    while ( id < last_id )
> +    {
> +        isel = id / __riscv_xlen;
> +        isel *= __riscv_xlen / IMSIC_EIPx_BITS;
> +        isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;

Nit: Why the parentheses?

> +        ireg = 0;
> +        for ( i = id & (__riscv_xlen - 1);
> +              (id < last_id) && (i < __riscv_xlen);
> +              i++, id++ )
> +            ireg |= (1 << i);

I wonder if this calculation really needs a loop. Afaict it's just a
consecutive set of bits you mean to set.

> +        if ( val )
> +            imsic_csr_set(isel, ireg);
> +        else
> +            imsic_csr_clear(isel, ireg);
> +    }
> +}
> +
> +void imsic_irq_enable(unsigned int hwirq)
> +{
> +    unsigned long flags;
> +
> +    spin_lock_irqsave(&imsic_cfg.lock, flags);
> +    imsic_local_eix_update(hwirq, 1, false, true);

No subtraction of 1 here? Also, why "hwirq" and not just "irq"?

> +    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
> +}
> +
> +void imsic_irq_disable(unsigned int hwirq)
> +{
> +    unsigned long flags;
> +
> +    spin_lock_irqsave(&imsic_cfg.lock, flags);
> +    imsic_local_eix_update(hwirq, 1, false, false);
> +    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
> +}
> +
>  const struct imsic_config *imsic_get_config(void)
>  {
>      return &imsic_cfg;
> @@ -277,6 +333,13 @@ int __init imsic_init(struct dt_device_node *node)
>          goto imsic_init_err;
>      }
>  
> +    spin_lock_init(&imsic_cfg.lock);
> +
> +    /* Enable local interrupt delivery */
> +    imsic_ids_local_delivery(true);

What's this? I can't find the function/macro here, nor in patch 08, nor in
staging.

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 9 months, 4 weeks ago

On 4/15/25 2:46 PM, Jan Beulich wrote:
> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>> Introduce interrupt controller descriptor for host APLIC to describe
>> the low-lovel hardare. It includes implementation of the following functions:
>>   - aplic_irq_startup()
>>   - aplic_irq_shutdown()
>>   - aplic_irq_enable()
>>   - aplic_irq_disable()
>>   - aplic_irq_ack()
>>   - aplic_host_irq_end()
>>   - aplic_set_irq_affinity()
>>
>> As APLIC is used in MSI mode it requires to enable/disable interrupts not
>> only for APLIC but also for IMSIC. Thereby for the purpose of
>> aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().
>>
>> For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
>> introduced to get hart id.
>>
>> Also, introduce additional interrupt controller h/w operations and
>> host_irq_type for APLIC:
>>   - aplic_host_irq_type
>>   - aplic_set_irq_priority()
>>   - aplic_set_irq_type()
> Yet these two functions nor the hooks they're used to populate are entirely
> unused here. Since they're also outside of the common IRQ handling machinery,
> it's unclear how one would sanely ack such a change.

They will be called by intc_route_irq_to_xen() from setup_irq() during firt time
the IRQ is setup.

>
>> --- a/xen/arch/riscv/aplic.c
>> +++ b/xen/arch/riscv/aplic.c
>> @@ -15,6 +15,7 @@
>>   #include <xen/irq.h>
>>   #include <xen/mm.h>
>>   #include <xen/sections.h>
>> +#include <xen/spinlock.h>
>>   #include <xen/types.h>
>>   #include <xen/vmap.h>
>>   
>> @@ -110,9 +111,173 @@ static int __init aplic_init(void)
>>       return 0;
>>   }
>>   
>> -static const struct intc_hw_operations __ro_after_init aplic_ops = {
>> +static void aplic_set_irq_type(struct irq_desc *desc, unsigned int type)
>> +{
>> +    unsigned int irq = desc->irq - 1;
> Why this adjustment by 1 (and yet both items being named "irq")?

Interrupt 0 isn't possible based on the spec:
```
Each of an APLIC’s interrupt sources has a fixed unique identity number 
in the range 1 to N, where N is the total number of sources at the 
APLIC. The number zero is not a valid interrupt identity number at an 
APLIC. The maximum number of interrupt sources an APLIC may support is 
1023. ``` and interrupt 1 will correspond to bit 0 in sourcecfg[] register, interrupt 
2 ->sourcecfg[1] and so on. And that is the reason why we need -1.

>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_FALL;
>> +            break;
>> +        case IRQ_TYPE_LEVEL_HIGH:
>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_HIGH;
>> +            break;
>> +        case IRQ_TYPE_LEVEL_LOW:
>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_LOW;
>> +            break;
>> +        default:
>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_INACTIVE;
>> +            break;
> Is the default: label legitimate to be reached?

 From the spec:
```
0 Inactive Inactive in this domain (and not delegated) 1 Detached 
Active, detached from the source wire 2–3 — Reserved 4 Edge1 Active, 
edge-sensitive; interrupt asserted on rising edge 5 Edge0 Active, 
edge-sensitive; interrupt asserted on falling edge 6 Level1 Active, 
level-sensitive; interrupt asserted when high 7 Level0 Active, 
level-sensitive; interrupt asserted when low ``` It seems to me like 
APLIC_SOURCECFG_SM_INACTIVE just covers cases (0-3) and inactive IRQ 
pretty safe to as a default value.

>
>> +    }
>> +    spin_unlock(&aplic.lock);
>> +}
>> +
>> +static void aplic_set_irq_priority(struct irq_desc *desc,
>> +                                   unsigned int priority)
>> +{
>> +    /* No priority, do nothing */
>> +}
> Since the function dopes nothing, wouldn't it be better to omit it and have
> the (future) caller check for a NULL pointer ahead of making the (indirect)
> call? Same remark for other handlers (below) which also do nothing.

I thought about that too, but it could be some cases when the stub is introduced
with temporary BUG_ON("unimplemented") inside just to not miss to implement it
when it will be necessary.
If we will have only the caller check then we could miss to implement such stubs.

>
>> +static void aplic_irq_enable(struct irq_desc *desc)
>> +{
>> +    unsigned long flags;
>> +
>> +    /*
>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>> +     *       If APLIC without MSI interrupts is required in the future,
>> +     *       this function will need to be updated accordingly.
>> +     */
>> +    ASSERT(aplic.imsic_cfg->is_used);
> Such an extra field, used only for assertions, is pretty odd. Can't you
> use any of the other fields to achieve the same effect?

in aplic_init() there is:
     /* check for associated imsic node */
     rc = dt_property_read_u32(node, "msi-parent", &imsic_phandle);
     if ( !rc )
         panic("%s: IDC mode not supported\n", node->full_name);

So we will have panic() anyway if MSI mode isn't supported. As an option we
can just drop the ASSERT.

Or introduce static variable in aplic.c `aplic_mode`, init it in aplic_init()
and use it in ASSERT().

>
>> +    ASSERT(spin_is_locked(&desc->lock));
> If this lock (which is an IRQ-safe one) is necessarily held, ...
>
>> +    spin_lock_irqsave(&aplic.lock, flags);
> ... you can use just spin_lock() here.
>
>> +    clear_bit(_IRQ_DISABLED, &desc->status);
> Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
> question on Arm code also doing so.)

I haven't thought about that. Likely non-atomic bitop could be used here.

>
> I'm uncertain about this bit setting anyway - on x86 we would only fiddle
> with it for IRQs not in use, not while enabling/disabling one.
>
> In any event this can be done outside of the APLIC-locked region, I think.

Considering that we are doing that under desc->lock, agree we can move that outside
the APLIC-locked region.

>> +    imsic_irq_enable(desc->irq);
>> +
>> +    /* enable interrupt in APLIC */
>> +    aplic.regs->setienum = desc->irq;
> Are you sure you want to use plain assignments for MMIO accesses? I'd have
> expected writel() to be used here. (And only later I realized that I didn't
> spot the same already higher up from here.)

Good point. I have to update that with writel()...

>
>  From the vague understanding I've gained so far: Isn't the APLIC closer to
> the CPU and the IMSIC closer to the device? If so, wouldn't you want to
> enable at the APLIC before enabling at the IMSIC? But of course that also
> depends on what exactly happens in the window while one is already enabled
> and the other is still disabled. (Later) From the code you add to imsic.c
> it looks like it's the other way around, as the IMSIC is accessed through
> CSRs.

 From the AIA spec:
```
An Incoming MSI Controller (IMSIC) is an optional RISC-V hardware component
that is closely coupled with a hart, one IMSIC per hart. An IMSIC receives
and records incoming message-signaled interrupts (MSIs) for a hart, and
signals to the hart when there are pending and enabled interrupts to be
serviced.
```

Based on the figure 2 (Interrupt delivery by MSIs when harts have IMSICs for receiving them)
of AIA spechttps://github.com/riscv/riscv-aia/blob/main/src/intrsWithIMSICs.png
IMSIC is more close to CPU and APLIC is more close to the device. The external interrupt
controller is APLIC and it only sends a MSI message for a CPU.

The logical flow of an interrupt to a hart with an IMSIC would be:
1. A physical interrupt signal arrives at the APLIC.
2. The APLIC, if configured for MSI delivery mode (domaincfg.DM = 1) and if the specific
    interrupt source is active and enabled within its domain (controlled by sourcecfg[i]
    and the global Interrupt Enable bit IE in domaincfg), will generate an MSI.
3. This MSI is then sent to the target hart's IMSIC. The APLIC needs to know the MSI
    target address for each hart, which can be hardwired or configured through registers
    like mmsiaddrcfg and mmsiaddrcfgh.
4. The receiving hart's IMSIC records this MSI as a pending interrupt.
5. If the corresponding interrupt identity is enabled within the IMSIC's interrupt file,
    the IMSIC will signal the hart, typically by setting the MEIP or SEIP bit in the mip
    CSR (or sip CSR).

Generally, I think that the order in which enable interrupts doesn't really matter as
if you were to enable the IMSIC to receive a certain interrupt before the APLIC was
configured to send it (or had a pending interrupt from the device), the IMSIC would
simply be waiting for an MSI that wouldn't arrive.
Similarly, if the APLIC sends an MSI for an interrupt that is not enabled in the IMSIC,
the interrupt would remain pending in the IMSIC but wouldn't trigger an interrupt at
the hart.

IMO, the order which is used now in the code is pretty logical.

Does it make sense?

>
>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>> +}
>> +
>> +static void aplic_irq_disable(struct irq_desc *desc)
>> +{
>> +    unsigned long flags;
>> +
>> +    /*
>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>> +     *       If APLIC without MSI interrupts is required in the future,
>> +     *       this function will need to be updated accordingly.
>> +     */
>> +    ASSERT(aplic.imsic_cfg->is_used);
>> +
>> +    ASSERT(spin_is_locked(&desc->lock));
>> +
>> +    spin_lock_irqsave(&aplic.lock, flags);
>> +
>> +    set_bit(_IRQ_DISABLED, &desc->status);
>> +
>> +    /* disable interrupt in APLIC */
>> +    aplic.regs->clrienum = desc->irq;
>> +
>> +    /* disable interrupt in IMSIC */
>> +    imsic_irq_disable(desc->irq);
>> +
>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>> +}
>> +
>> +static unsigned int aplic_irq_startup(struct irq_desc *desc)
>> +{
>> +    aplic_irq_enable(desc);
>> +
>> +    return 0;
>> +}
>> +
>> +static void aplic_irq_shutdown(struct irq_desc *desc)
>> +{
>> +    aplic_irq_disable(desc);
>> +}
> You don't really need a separate hook function here, do you?

With such implementation it is really not needed to have a hook so
I will drop it.

>> +static void aplic_irq_ack(struct irq_desc *desc)
>> +{
>> +    /* nothing to do */
>> +}
>> +
>> +static void aplic_host_irq_end(struct irq_desc *desc)
> What's the "host" in the identifier about?

It was copied that from Arm and my understanding that it means
Xen-related IRQ as they also have:
```
/* XXX different for level vs edge */
static hw_irq_controller gicv2_host_irq_type = {
...
     .end          = gicv2_host_irq_end,
...
};

static hw_irq_controller gicv2_guest_irq_type = {
...
     .end          = gicv2_guest_irq_end,
...
};
```


>
>> +{
>> +    /* nothing to do */
>> +}
>> +
>> +static unsigned int aplic_get_cpu_from_mask(const cpumask_t *cpumask)
>> +{
>> +    unsigned int cpu;
> No real need for this variable?

Yes, it could be dropped.

>
>> +    cpumask_t possible_mask;
>> +
>> +    cpumask_and(&possible_mask, cpumask, &cpu_possible_map);
>> +    cpu = cpumask_any(&possible_mask);
> Why would you use cpu_possible_map here? That includes any offline CPUs.
> I think you need to use cpu_online_map here.

It makes sense, Ill switch to cpu_online_map.

>
>> +    return cpu;
>> +}
>> +
>> +static void aplic_set_irq_affinity(struct irq_desc *desc, const cpumask_t *mask)
>> +{
>> +    unsigned int cpu;
>> +    uint64_t group_index, base_ppn;
>> +    uint32_t hhxw, lhxw ,hhxs, value;
>> +    const struct imsic_config *imsic = aplic.imsic_cfg;
>> +
>> +    /*
>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>> +     *       If APLIC without MSI interrupts is required in the future,
>> +     *       this function will need to be updated accordingly.
>> +     */
>> +    ASSERT(aplic.imsic_cfg->is_used);
> Use the local variable you have made yourself?

What do you mean by local here?

>
>> +    ASSERT(!cpumask_empty(mask));
>> +
>> +    spin_lock(&aplic.lock);
> Aiui the lock can be acquired quite a bit later. It ought to be needed only
> around the actual write to the hardware register.
>
>> +    cpu = cpuid_to_hartid(aplic_get_cpu_from_mask(mask));
>> +    hhxw = imsic->group_index_bits;
>> +    lhxw = imsic->hart_index_bits;
>> +    hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
>> +    base_ppn = imsic->msi[cpu].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
>> +
>> +    /* update hart and EEID in the target register */
>> +    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);
> What's this magic 12 in here? Not IMSIC_MMIO_PAGE_SHIFT I suppose?

In the AIA spec they are using 12 explicitly:https://github.com/riscv/riscv-aia/blob/main/src/AdvPLIC.adoc#AdvPLIC-MSIAddrs

>> +    unsigned long id = base_id, last_id = base_id + num_id;
>> +
>> +    while ( id < last_id )
>> +    {
>> +        isel = id / __riscv_xlen;
>> +        isel *= __riscv_xlen / IMSIC_EIPx_BITS;
>> +        isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;
> Nit: Why the parentheses?
>
>> +        ireg = 0;
>> +        for ( i = id & (__riscv_xlen - 1);
>> +              (id < last_id) && (i < __riscv_xlen);
>> +              i++, id++ )
>> +            ireg |= (1 << i);
> I wonder if this calculation really needs a loop. Afaict it's just a
> consecutive set of bits you mean to set.

Good point. I will double-check.

>
>> +        if ( val )
>> +            imsic_csr_set(isel, ireg);
>> +        else
>> +            imsic_csr_clear(isel, ireg);
>> +    }
>> +}
>> +
>> +void imsic_irq_enable(unsigned int hwirq)
>> +{
>> +    unsigned long flags;
>> +
>> +    spin_lock_irqsave(&imsic_cfg.lock, flags);
>> +    imsic_local_eix_update(hwirq, 1, false, true);
> No subtraction of 1 here? Also, why "hwirq" and not just "irq"?

 From the spec:
```

When an interrupt file supports distinct interrupt identities, valid identity numbers are between 1
and inclusive. The identity numbers within this range are said to be implemented by the interrupt
file; numbers outside this range are not implemented. The number zero is never a valid interrupt
identity.
...

Bit positions in a valid eiek register that don’t correspond to a 
supported interrupt identity (such as bit 0 of eie0) are read-only zeros.


```

So in EIx registers interrupt i corresponds to bit i in comparison wiht APLIC's sourcecfg which starts from 0.

>
>> +    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
>> +}
>> +
>> +void imsic_irq_disable(unsigned int hwirq)
>> +{
>> +    unsigned long flags;
>> +
>> +    spin_lock_irqsave(&imsic_cfg.lock, flags);
>> +    imsic_local_eix_update(hwirq, 1, false, false);
>> +    spin_unlock_irqrestore(&imsic_cfg.lock, flags);
>> +}
>> +
>>   const struct imsic_config *imsic_get_config(void)
>>   {
>>       return &imsic_cfg;
>> @@ -277,6 +333,13 @@ int __init imsic_init(struct dt_device_node *node)
>>           goto imsic_init_err;
>>       }
>>   
>> +    spin_lock_init(&imsic_cfg.lock);
>> +
>> +    /* Enable local interrupt delivery */
>> +    imsic_ids_local_delivery(true);
> What's this? I can't find the function/macro here, nor in patch 08, nor in
> staging.

It is defined in imsic.c:
```
void imsic_ids_local_delivery(bool enable)
{
     if ( enable )
     {
         imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
         imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
     }
     else
     {
         imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
         imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
     }
}
```

Thanks for review.

~ Oleksii

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 3 weeks ago

On 16.04.2025 21:05, Oleksii Kurochko wrote:
> On 4/15/25 2:46 PM, Jan Beulich wrote:
>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>> Introduce interrupt controller descriptor for host APLIC to describe
>>> the low-lovel hardare. It includes implementation of the following functions:
>>>   - aplic_irq_startup()
>>>   - aplic_irq_shutdown()
>>>   - aplic_irq_enable()
>>>   - aplic_irq_disable()
>>>   - aplic_irq_ack()
>>>   - aplic_host_irq_end()
>>>   - aplic_set_irq_affinity()
>>>
>>> As APLIC is used in MSI mode it requires to enable/disable interrupts not
>>> only for APLIC but also for IMSIC. Thereby for the purpose of
>>> aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().
>>>
>>> For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
>>> introduced to get hart id.
>>>
>>> Also, introduce additional interrupt controller h/w operations and
>>> host_irq_type for APLIC:
>>>   - aplic_host_irq_type
>>>   - aplic_set_irq_priority()
>>>   - aplic_set_irq_type()
>> Yet these two functions nor the hooks they're used to populate are entirely
>> unused here. Since they're also outside of the common IRQ handling machinery,
>> it's unclear how one would sanely ack such a change.
> 
> They will be called by intc_route_irq_to_xen() from setup_irq() during firt time
> the IRQ is setup.

Perhaps move their introduction to there then? We don't do any Misra checking
yet lon RISC-V, but imo it's still good practice to avoid introducing new
violations, even if only temporarily.

>>> --- a/xen/arch/riscv/aplic.c
>>> +++ b/xen/arch/riscv/aplic.c
>>> @@ -15,6 +15,7 @@
>>>   #include <xen/irq.h>
>>>   #include <xen/mm.h>
>>>   #include <xen/sections.h>
>>> +#include <xen/spinlock.h>
>>>   #include <xen/types.h>
>>>   #include <xen/vmap.h>
>>>   
>>> @@ -110,9 +111,173 @@ static int __init aplic_init(void)
>>>       return 0;
>>>   }
>>>   
>>> -static const struct intc_hw_operations __ro_after_init aplic_ops = {
>>> +static void aplic_set_irq_type(struct irq_desc *desc, unsigned int type)
>>> +{
>>> +    unsigned int irq = desc->irq - 1;
>> Why this adjustment by 1 (and yet both items being named "irq")?
> 
> Interrupt 0 isn't possible based on the spec:
> ```
> Each of an APLIC’s interrupt sources has a fixed unique identity number 
> in the range 1 to N, where N is the total number of sources at the 
> APLIC. The number zero is not a valid interrupt identity number at an 
> APLIC. The maximum number of interrupt sources an APLIC may support is 
> 1023. ``` and interrupt 1 will correspond to bit 0 in sourcecfg[] register, interrupt 
> 2 ->sourcecfg[1] and so on. And that is the reason why we need -1.

Okay, fine. But what about the part of the question in parentheses?

>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_FALL;
>>> +            break;
>>> +        case IRQ_TYPE_LEVEL_HIGH:
>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_HIGH;
>>> +            break;
>>> +        case IRQ_TYPE_LEVEL_LOW:
>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_LOW;
>>> +            break;
>>> +        default:
>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_INACTIVE;
>>> +            break;
>> Is the default: label legitimate to be reached?
> 
>  From the spec:
> ```
> 0 Inactive Inactive in this domain (and not delegated) 1 Detached 
> Active, detached from the source wire 2–3 — Reserved 4 Edge1 Active, 
> edge-sensitive; interrupt asserted on rising edge 5 Edge0 Active, 
> edge-sensitive; interrupt asserted on falling edge 6 Level1 Active, 
> level-sensitive; interrupt asserted when high 7 Level0 Active, 
> level-sensitive; interrupt asserted when low ``` It seems to me like 
> APLIC_SOURCECFG_SM_INACTIVE just covers cases (0-3) and inactive IRQ 
> pretty safe to as a default value.

I fear this doesn't answer my question, which is to a large part related
to the Xen code, and only to some degree to the spec.

>>> +static void aplic_set_irq_priority(struct irq_desc *desc,
>>> +                                   unsigned int priority)
>>> +{
>>> +    /* No priority, do nothing */
>>> +}
>> Since the function dopes nothing, wouldn't it be better to omit it and have
>> the (future) caller check for a NULL pointer ahead of making the (indirect)
>> call? Same remark for other handlers (below) which also do nothing.
> 
> I thought about that too, but it could be some cases when the stub is introduced
> with temporary BUG_ON("unimplemented") inside just to not miss to implement it
> when it will be necessary.
> If we will have only the caller check then we could miss to implement such stubs.

I guess I don't understand the concern.

>>> +static void aplic_irq_enable(struct irq_desc *desc)
>>> +{
>>> +    unsigned long flags;
>>> +
>>> +    /*
>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>> +     *       If APLIC without MSI interrupts is required in the future,
>>> +     *       this function will need to be updated accordingly.
>>> +     */
>>> +    ASSERT(aplic.imsic_cfg->is_used);
>> Such an extra field, used only for assertions, is pretty odd. Can't you
>> use any of the other fields to achieve the same effect?
> 
> in aplic_init() there is:
>      /* check for associated imsic node */
>      rc = dt_property_read_u32(node, "msi-parent", &imsic_phandle);
>      if ( !rc )
>          panic("%s: IDC mode not supported\n", node->full_name);
> 
> So we will have panic() anyway if MSI mode isn't supported. As an option we
> can just drop the ASSERT.

Since they serve primarily as a reminder where changes would need making,
I'd prefer if they could be kept.

> Or introduce static variable in aplic.c `aplic_mode`, init it in aplic_init()
> and use it in ASSERT().

This would then again be used solely for assertions, aiui? As said, I
think it would be preferable if some already existing indicator could be
used for this purpose.

>>> +    ASSERT(spin_is_locked(&desc->lock));
>> If this lock (which is an IRQ-safe one) is necessarily held, ...
>>
>>> +    spin_lock_irqsave(&aplic.lock, flags);
>> ... you can use just spin_lock() here.
>>
>>> +    clear_bit(_IRQ_DISABLED, &desc->status);
>> Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
>> question on Arm code also doing so.)
> 
> I haven't thought about that. Likely non-atomic bitop could be used here.

And then - does it need to be a bitop? Aiui that's what Arm uses, while x86
doesn't. And I see no reason to use other than plain C operators here. If
Arm was switched, presumably all the redundant (and misnamed) _IRQ_*
constants could go away, with just the IRQ_* ones left.

>> I'm uncertain about this bit setting anyway - on x86 we would only fiddle
>> with it for IRQs not in use, not while enabling/disabling one.

What about this part?

>> In any event this can be done outside of the APLIC-locked region, I think.
> 
> Considering that we are doing that under desc->lock, agree we can move that outside
> the APLIC-locked region.
> 
>>> +    imsic_irq_enable(desc->irq);
>>> +
>>> +    /* enable interrupt in APLIC */
>>> +    aplic.regs->setienum = desc->irq;
>> Are you sure you want to use plain assignments for MMIO accesses? I'd have
>> expected writel() to be used here. (And only later I realized that I didn't
>> spot the same already higher up from here.)
> 
> Good point. I have to update that with writel()...
> 
>>
>>  From the vague understanding I've gained so far: Isn't the APLIC closer to
>> the CPU and the IMSIC closer to the device? If so, wouldn't you want to
>> enable at the APLIC before enabling at the IMSIC? But of course that also
>> depends on what exactly happens in the window while one is already enabled
>> and the other is still disabled. (Later) From the code you add to imsic.c
>> it looks like it's the other way around, as the IMSIC is accessed through
>> CSRs.
> 
>  From the AIA spec:
> ```
> An Incoming MSI Controller (IMSIC) is an optional RISC-V hardware component
> that is closely coupled with a hart, one IMSIC per hart. An IMSIC receives
> and records incoming message-signaled interrupts (MSIs) for a hart, and
> signals to the hart when there are pending and enabled interrupts to be
> serviced.
> ```
> 
> Based on the figure 2 (Interrupt delivery by MSIs when harts have IMSICs for receiving them)
> of AIA spechttps://github.com/riscv/riscv-aia/blob/main/src/intrsWithIMSICs.png
> IMSIC is more close to CPU and APLIC is more close to the device. The external interrupt
> controller is APLIC and it only sends a MSI message for a CPU.
> 
> The logical flow of an interrupt to a hart with an IMSIC would be:
> 1. A physical interrupt signal arrives at the APLIC.
> 2. The APLIC, if configured for MSI delivery mode (domaincfg.DM = 1) and if the specific
>     interrupt source is active and enabled within its domain (controlled by sourcecfg[i]
>     and the global Interrupt Enable bit IE in domaincfg), will generate an MSI.
> 3. This MSI is then sent to the target hart's IMSIC. The APLIC needs to know the MSI
>     target address for each hart, which can be hardwired or configured through registers
>     like mmsiaddrcfg and mmsiaddrcfgh.
> 4. The receiving hart's IMSIC records this MSI as a pending interrupt.
> 5. If the corresponding interrupt identity is enabled within the IMSIC's interrupt file,
>     the IMSIC will signal the hart, typically by setting the MEIP or SEIP bit in the mip
>     CSR (or sip CSR).
> 
> Generally, I think that the order in which enable interrupts doesn't really matter as
> if you were to enable the IMSIC to receive a certain interrupt before the APLIC was
> configured to send it (or had a pending interrupt from the device), the IMSIC would
> simply be waiting for an MSI that wouldn't arrive.
> Similarly, if the APLIC sends an MSI for an interrupt that is not enabled in the IMSIC,
> the interrupt would remain pending in the IMSIC but wouldn't trigger an interrupt at
> the hart.
> 
> IMO, the order which is used now in the code is pretty logical.
> 
> Does it make sense?

Except for the "doesn't really matter" - yes. In a reply to a later patch I
indicated I realized that IMSIC is what's closer to the CPU (and hence later
in the chain of interrupt delivery actions).

>>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>>> +}
>>> +
>>> +static void aplic_irq_disable(struct irq_desc *desc)
>>> +{
>>> +    unsigned long flags;
>>> +
>>> +    /*
>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>> +     *       If APLIC without MSI interrupts is required in the future,
>>> +     *       this function will need to be updated accordingly.
>>> +     */
>>> +    ASSERT(aplic.imsic_cfg->is_used);
>>> +
>>> +    ASSERT(spin_is_locked(&desc->lock));
>>> +
>>> +    spin_lock_irqsave(&aplic.lock, flags);
>>> +
>>> +    set_bit(_IRQ_DISABLED, &desc->status);
>>> +
>>> +    /* disable interrupt in APLIC */
>>> +    aplic.regs->clrienum = desc->irq;
>>> +
>>> +    /* disable interrupt in IMSIC */
>>> +    imsic_irq_disable(desc->irq);
>>> +
>>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>>> +}
>>> +
>>> +static unsigned int aplic_irq_startup(struct irq_desc *desc)
>>> +{
>>> +    aplic_irq_enable(desc);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void aplic_irq_shutdown(struct irq_desc *desc)
>>> +{
>>> +    aplic_irq_disable(desc);
>>> +}
>> You don't really need a separate hook function here, do you?
> 
> With such implementation it is really not needed to have a hook so
> I will drop it.
> 
>>> +static void aplic_irq_ack(struct irq_desc *desc)
>>> +{
>>> +    /* nothing to do */
>>> +}
>>> +
>>> +static void aplic_host_irq_end(struct irq_desc *desc)
>> What's the "host" in the identifier about?
> 
> It was copied that from Arm and my understanding that it means
> Xen-related IRQ as they also have:
> ```
> /* XXX different for level vs edge */
> static hw_irq_controller gicv2_host_irq_type = {
> ...
>      .end          = gicv2_host_irq_end,
> ...
> };
> 
> static hw_irq_controller gicv2_guest_irq_type = {
> ...
>      .end          = gicv2_guest_irq_end,
> ...
> };
> ```

And you expect to end up with a similar distinction on RISC-V? There's
nothing like that on x86, just to mention it.

>>> +static void aplic_set_irq_affinity(struct irq_desc *desc, const cpumask_t *mask)
>>> +{
>>> +    unsigned int cpu;
>>> +    uint64_t group_index, base_ppn;
>>> +    uint32_t hhxw, lhxw ,hhxs, value;
>>> +    const struct imsic_config *imsic = aplic.imsic_cfg;
>>> +
>>> +    /*
>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>> +     *       If APLIC without MSI interrupts is required in the future,
>>> +     *       this function will need to be updated accordingly.
>>> +     */
>>> +    ASSERT(aplic.imsic_cfg->is_used);
>> Use the local variable you have made yourself?
> 
> What do you mean by local here?

Just a few lines up you latch aplic.imsic_cfg into the local "imsic".

>>> +    ASSERT(!cpumask_empty(mask));
>>> +
>>> +    spin_lock(&aplic.lock);
>> Aiui the lock can be acquired quite a bit later. It ought to be needed only
>> around the actual write to the hardware register.
>>
>>> +    cpu = cpuid_to_hartid(aplic_get_cpu_from_mask(mask));
>>> +    hhxw = imsic->group_index_bits;
>>> +    lhxw = imsic->hart_index_bits;
>>> +    hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
>>> +    base_ppn = imsic->msi[cpu].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
>>> +
>>> +    /* update hart and EEID in the target register */
>>> +    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);
>> What's this magic 12 in here? Not IMSIC_MMIO_PAGE_SHIFT I suppose?
> 
> In the AIA spec they are using 12 explicitly:https://github.com/riscv/riscv-aia/blob/main/src/AdvPLIC.adoc#AdvPLIC-MSIAddrs

In the spec that's fine, but please make yourself a constant with a suitable
name then, to be used here. Just consider what would happen if we used literal
12 everywhere PAGE_SHIFT was meant.

>>> +void imsic_irq_enable(unsigned int hwirq)
>>> +{
>>> +    unsigned long flags;
>>> +
>>> +    spin_lock_irqsave(&imsic_cfg.lock, flags);
>>> +    imsic_local_eix_update(hwirq, 1, false, true);
>> No subtraction of 1 here? Also, why "hwirq" and not just "irq"?
> 
>  From the spec:
> ```
> 
> When an interrupt file supports distinct interrupt identities, valid identity numbers are between 1
> and inclusive. The identity numbers within this range are said to be implemented by the interrupt
> file; numbers outside this range are not implemented. The number zero is never a valid interrupt
> identity.
> ...
> 
> Bit positions in a valid eiek register that don’t correspond to a 
> supported interrupt identity (such as bit 0 of eie0) are read-only zeros.
> 
> 
> ```
> 
> So in EIx registers interrupt i corresponds to bit i in comparison wiht APLIC's sourcecfg which starts from 0.

Confusing, but what do you do.

>>> @@ -277,6 +333,13 @@ int __init imsic_init(struct dt_device_node *node)
>>>           goto imsic_init_err;
>>>       }
>>>   
>>> +    spin_lock_init(&imsic_cfg.lock);
>>> +
>>> +    /* Enable local interrupt delivery */
>>> +    imsic_ids_local_delivery(true);
>> What's this? I can't find the function/macro here, nor in patch 08, nor in
>> staging.
> 
> It is defined in imsic.c:
> ```
> void imsic_ids_local_delivery(bool enable)
> {
>      if ( enable )
>      {
>          imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
>          imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
>      }
>      else
>      {
>          imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
>          imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
>      }
> }
> ```

No, it's not. As noted in the reply to a later patch, it's only introduced
there. Hence the build will break between the two patches.

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 9 months, 2 weeks ago

On 4/17/25 8:25 AM, Jan Beulich wrote:
> On 16.04.2025 21:05, Oleksii Kurochko wrote:
>> On 4/15/25 2:46 PM, Jan Beulich wrote:
>>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>>> Introduce interrupt controller descriptor for host APLIC to describe
>>>> the low-lovel hardare. It includes implementation of the following functions:
>>>>    - aplic_irq_startup()
>>>>    - aplic_irq_shutdown()
>>>>    - aplic_irq_enable()
>>>>    - aplic_irq_disable()
>>>>    - aplic_irq_ack()
>>>>    - aplic_host_irq_end()
>>>>    - aplic_set_irq_affinity()
>>>>
>>>> As APLIC is used in MSI mode it requires to enable/disable interrupts not
>>>> only for APLIC but also for IMSIC. Thereby for the purpose of
>>>> aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().
>>>>
>>>> For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
>>>> introduced to get hart id.
>>>>
>>>> Also, introduce additional interrupt controller h/w operations and
>>>> host_irq_type for APLIC:
>>>>    - aplic_host_irq_type
>>>>    - aplic_set_irq_priority()
>>>>    - aplic_set_irq_type()
>>> Yet these two functions nor the hooks they're used to populate are entirely
>>> unused here. Since they're also outside of the common IRQ handling machinery,
>>> it's unclear how one would sanely ack such a change.
>> They will be called by intc_route_irq_to_xen() from setup_irq() during firt time
>> the IRQ is setup.
> Perhaps move their introduction to there then? We don't do any Misra checking
> yet lon RISC-V, but imo it's still good practice to avoid introducing new
> violations, even if only temporarily.

Okay, I will move their introduction to there.

Btw, what is needed to add Misra checking for RISC-V? I started to think that, probably,
it will make sense to do that from the start.

>>>> --- a/xen/arch/riscv/aplic.c
>>>> +++ b/xen/arch/riscv/aplic.c
>>>> @@ -15,6 +15,7 @@
>>>>    #include <xen/irq.h>
>>>>    #include <xen/mm.h>
>>>>    #include <xen/sections.h>
>>>> +#include <xen/spinlock.h>
>>>>    #include <xen/types.h>
>>>>    #include <xen/vmap.h>
>>>>    
>>>> @@ -110,9 +111,173 @@ static int __init aplic_init(void)
>>>>        return 0;
>>>>    }
>>>>    
>>>> -static const struct intc_hw_operations __ro_after_init aplic_ops = {
>>>> +static void aplic_set_irq_type(struct irq_desc *desc, unsigned int type)
>>>> +{
>>>> +    unsigned int irq = desc->irq - 1;
>>> Why this adjustment by 1 (and yet both items being named "irq")?
>> Interrupt 0 isn't possible based on the spec:
>> ```
>> Each of an APLIC’s interrupt sources has a fixed unique identity number
>> in the range 1 to N, where N is the total number of sources at the
>> APLIC. The number zero is not a valid interrupt identity number at an
>> APLIC. The maximum number of interrupt sources an APLIC may support is
>> 1023. ``` and interrupt 1 will correspond to bit 0 in sourcecfg[] register, interrupt
>> 2 ->sourcecfg[1] and so on. And that is the reason why we need -1.
> Okay, fine. But what about the part of the question in parentheses?

Sorry, missed to write that I'll change irq to irq_bit or something like that.

>>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_EDGE_FALL;
>>>> +            break;
>>>> +        case IRQ_TYPE_LEVEL_HIGH:
>>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_HIGH;
>>>> +            break;
>>>> +        case IRQ_TYPE_LEVEL_LOW:
>>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_LEVEL_LOW;
>>>> +            break;
>>>> +        default:
>>>> +            aplic.regs->sourcecfg[irq] = APLIC_SOURCECFG_SM_INACTIVE;
>>>> +            break;
>>> Is the default: label legitimate to be reached?
>>   From the spec:
>> ```
>> 0 Inactive Inactive in this domain (and not delegated) 1 Detached
>> Active, detached from the source wire 2–3 — Reserved 4 Edge1 Active,
>> edge-sensitive; interrupt asserted on rising edge 5 Edge0 Active,
>> edge-sensitive; interrupt asserted on falling edge 6 Level1 Active,
>> level-sensitive; interrupt asserted when high 7 Level0 Active,
>> level-sensitive; interrupt asserted when low ``` It seems to me like
>> APLIC_SOURCECFG_SM_INACTIVE just covers cases (0-3) and inactive IRQ
>> pretty safe to as a default value.
> I fear this doesn't answer my question, which is to a large part related
> to the Xen code, and only to some degree to the spec.

 From Xen code point of view, I am not sure if it legitimate or not. I've not any
issue, at the moment, with such implementation, but to be on a safe side I'll
implement default case as panic("...").

>>>> +static void aplic_set_irq_priority(struct irq_desc *desc,
>>>> +                                   unsigned int priority)
>>>> +{
>>>> +    /* No priority, do nothing */
>>>> +}
>>> Since the function dopes nothing, wouldn't it be better to omit it and have
>>> the (future) caller check for a NULL pointer ahead of making the (indirect)
>>> call? Same remark for other handlers (below) which also do nothing.
>> I thought about that too, but it could be some cases when the stub is introduced
>> with temporary BUG_ON("unimplemented") inside just to not miss to implement it
>> when it will be necessary.
>> If we will have only the caller check then we could miss to implement such stubs.
> I guess I don't understand the concern.

for example, if we will have the following code:
   void some_caller(struct irq_desc *desc)
   {
       if ( desc->handler->set_affinity )
           desc->handler->set_affinity(desc, cpu_mask);
   }

Then we will skip the call of handler->set_affinity() (if it was just initialized with
.set_affinity = NULL) without any notification. And it is fine specifically in this
case as aplic_set_irq_priority() does nothing.

But what about the cases if it is a function which will have some implementation in the
future but doesn't have implementation for now. Then without notification that this
function is unimplemented we could skip something what really matters.

But I think that your initial comment was just about the function which basically
does nothing. Am i right?

>>>> +static void aplic_irq_enable(struct irq_desc *desc)
>>>> +{
>>>> +    unsigned long flags;
>>>> +
>>>> +    /*
>>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>>> +     *       If APLIC without MSI interrupts is required in the future,
>>>> +     *       this function will need to be updated accordingly.
>>>> +     */
>>>> +    ASSERT(aplic.imsic_cfg->is_used);
>>> Such an extra field, used only for assertions, is pretty odd. Can't you
>>> use any of the other fields to achieve the same effect?
>> in aplic_init() there is:
>>       /* check for associated imsic node */
>>       rc = dt_property_read_u32(node, "msi-parent", &imsic_phandle);
>>       if ( !rc )
>>           panic("%s: IDC mode not supported\n", node->full_name);
>>
>> So we will have panic() anyway if MSI mode isn't supported. As an option we
>> can just drop the ASSERT.
> Since they serve primarily as a reminder where changes would need making,
> I'd prefer if they could be kept.
>
>> Or introduce static variable in aplic.c `aplic_mode`, init it in aplic_init()
>> and use it in ASSERT().
> This would then again be used solely for assertions, aiui? As said, I
> think it would be preferable if some already existing indicator could be
> used for this purpose.

I think that not solely, for example, if IMSIC isn't available then we should skip
the calls of imsic_irq_enable(), at least, and this variable could be used for that
purpose.

But I will double check if we have better indicator. At the moment, I don't think
we have better, probably, except checking of aplic.regs->domaincfg if bit APLIC_DOMAINCFG_DM
is set.

>>>> +    ASSERT(spin_is_locked(&desc->lock));
>>> If this lock (which is an IRQ-safe one) is necessarily held, ...
>>>
>>>> +    spin_lock_irqsave(&aplic.lock, flags);
>>> ... you can use just spin_lock() here.
>>>
>>>> +    clear_bit(_IRQ_DISABLED, &desc->status);
>>> Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
>>> question on Arm code also doing so.)
>> I haven't thought about that. Likely non-atomic bitop could be used here.
> And then - does it need to be a bitop? Aiui that's what Arm uses, while x86
> doesn't. And I see no reason to use other than plain C operators here. If
> Arm was switched, presumably all the redundant (and misnamed) _IRQ_*
> constants could go away, with just the IRQ_* ones left.

The reason for a bitop in Arm is explained in this commithttps://gitlab.com/xen-project/xen/-/commit/50d8fe8fcbab2440cfeeb65c4765868398652473
but all the places where plain C operators were changed to bitops are actually executed under|spin_lock_irqsave(&desc->lock, flags). By quick look I found only two 
places one in __setup_irq() but it is called by the functions which do ||spin_lock_irqsave(&desc->lock, flags) and in vgic_v2_fold_lr_state(). 
Maybe, I'm missing something.|
|RISC-V won't have something similar to ||vgic_v2_fold_lr_state|(), but __setup_irq() is used in a similar way. It can be added ASSERT(spin_is_lock(&desc->lock))
and then it will also safe to use non-bitop function.
Probably, it is a little bit safer to use always bitops for desc->status.
||

>>> I'm uncertain about this bit setting anyway - on x86 we would only fiddle
>>> with it for IRQs not in use, not while enabling/disabling one.
> What about this part?

As I understand, based on Arm, code then Xen enables interrupts corresponding to devices assigned
to dom0/domU before booting dom0/domU, resulting in the possibility of receiving an interrupt
and not knowing what to do with it. So it is needed for enablement of IRQs when the guest
requests it and not unconditionally at boot time.

>>>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>>>> +}
>>>> +
>>>> +static void aplic_irq_disable(struct irq_desc *desc)
>>>> +{
>>>> +    unsigned long flags;
>>>> +
>>>> +    /*
>>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>>> +     *       If APLIC without MSI interrupts is required in the future,
>>>> +     *       this function will need to be updated accordingly.
>>>> +     */
>>>> +    ASSERT(aplic.imsic_cfg->is_used);
>>>> +
>>>> +    ASSERT(spin_is_locked(&desc->lock));
>>>> +
>>>> +    spin_lock_irqsave(&aplic.lock, flags);
>>>> +
>>>> +    set_bit(_IRQ_DISABLED, &desc->status);
>>>> +
>>>> +    /* disable interrupt in APLIC */
>>>> +    aplic.regs->clrienum = desc->irq;
>>>> +
>>>> +    /* disable interrupt in IMSIC */
>>>> +    imsic_irq_disable(desc->irq);
>>>> +
>>>> +    spin_unlock_irqrestore(&aplic.lock, flags);
>>>> +}
>>>> +
>>>> +static unsigned int aplic_irq_startup(struct irq_desc *desc)
>>>> +{
>>>> +    aplic_irq_enable(desc);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void aplic_irq_shutdown(struct irq_desc *desc)
>>>> +{
>>>> +    aplic_irq_disable(desc);
>>>> +}
>>> You don't really need a separate hook function here, do you?
>> With such implementation it is really not needed to have a hook so
>> I will drop it.
>>
>>>> +static void aplic_irq_ack(struct irq_desc *desc)
>>>> +{
>>>> +    /* nothing to do */
>>>> +}
>>>> +
>>>> +static void aplic_host_irq_end(struct irq_desc *desc)
>>> What's the "host" in the identifier about?
>> It was copied that from Arm and my understanding that it means
>> Xen-related IRQ as they also have:
>> ```
>> /* XXX different for level vs edge */
>> static hw_irq_controller gicv2_host_irq_type = {
>> ...
>>       .end          = gicv2_host_irq_end,
>> ...
>> };
>>
>> static hw_irq_controller gicv2_guest_irq_type = {
>> ...
>>       .end          = gicv2_guest_irq_end,
>> ...
>> };
>> ```
> And you expect to end up with a similar distinction on RISC-V? There's
> nothing like that on x86, just to mention it.

Yes, if one day support for guest interrupts without IMSIC support will be added for APLIC.
(at the moment, we are planning only to have APLIC+IMSIC support as this way is hypervisor-aware)

>>>> +static void aplic_set_irq_affinity(struct irq_desc *desc, const cpumask_t *mask)
>>>> +{
>>>> +    unsigned int cpu;
>>>> +    uint64_t group_index, base_ppn;
>>>> +    uint32_t hhxw, lhxw ,hhxs, value;
>>>> +    const struct imsic_config *imsic = aplic.imsic_cfg;
>>>> +
>>>> +    /*
>>>> +     * TODO: Currently, APLIC is supported only with MSI interrupts.
>>>> +     *       If APLIC without MSI interrupts is required in the future,
>>>> +     *       this function will need to be updated accordingly.
>>>> +     */
>>>> +    ASSERT(aplic.imsic_cfg->is_used);
>>> Use the local variable you have made yourself?
>> What do you mean by local here?
> Just a few lines up you latch aplic.imsic_cfg into the local "imsic".

Oh, sure, if *->is_used will still present in the next patch series then I'll re-use here "imsic".

~ Oleksii

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Jan Beulich 9 months, 2 weeks ago

On 28.04.2025 10:12, Oleksii Kurochko wrote:
> On 4/17/25 8:25 AM, Jan Beulich wrote:
>> On 16.04.2025 21:05, Oleksii Kurochko wrote:
>>> On 4/15/25 2:46 PM, Jan Beulich wrote:
>>>> On 08.04.2025 17:57, Oleksii Kurochko wrote:
>>>>> Introduce interrupt controller descriptor for host APLIC to describe
>>>>> the low-lovel hardare. It includes implementation of the following functions:
>>>>>    - aplic_irq_startup()
>>>>>    - aplic_irq_shutdown()
>>>>>    - aplic_irq_enable()
>>>>>    - aplic_irq_disable()
>>>>>    - aplic_irq_ack()
>>>>>    - aplic_host_irq_end()
>>>>>    - aplic_set_irq_affinity()
>>>>>
>>>>> As APLIC is used in MSI mode it requires to enable/disable interrupts not
>>>>> only for APLIC but also for IMSIC. Thereby for the purpose of
>>>>> aplic_irq_{enable,disable}() it is introduced imsic_irq_{enable,disable)().
>>>>>
>>>>> For the purpose of aplic_set_irq_affinity() aplic_get_cpu_from_mask() is
>>>>> introduced to get hart id.
>>>>>
>>>>> Also, introduce additional interrupt controller h/w operations and
>>>>> host_irq_type for APLIC:
>>>>>    - aplic_host_irq_type
>>>>>    - aplic_set_irq_priority()
>>>>>    - aplic_set_irq_type()
>>>> Yet these two functions nor the hooks they're used to populate are entirely
>>>> unused here. Since they're also outside of the common IRQ handling machinery,
>>>> it's unclear how one would sanely ack such a change.
>>> They will be called by intc_route_irq_to_xen() from setup_irq() during firt time
>>> the IRQ is setup.
>> Perhaps move their introduction to there then? We don't do any Misra checking
>> yet lon RISC-V, but imo it's still good practice to avoid introducing new
>> violations, even if only temporarily.
> 
> Okay, I will move their introduction to there.
> 
> Btw, what is needed to add Misra checking for RISC-V? I started to think that, probably,
> it will make sense to do that from the start.

Best I can do is point you at what is done for Arm and x86. You may want to
ask people more familiar with the CI aspects involved here.

>>>>> +static void aplic_set_irq_priority(struct irq_desc *desc,
>>>>> +                                   unsigned int priority)
>>>>> +{
>>>>> +    /* No priority, do nothing */
>>>>> +}
>>>> Since the function dopes nothing, wouldn't it be better to omit it and have
>>>> the (future) caller check for a NULL pointer ahead of making the (indirect)
>>>> call? Same remark for other handlers (below) which also do nothing.
>>> I thought about that too, but it could be some cases when the stub is introduced
>>> with temporary BUG_ON("unimplemented") inside just to not miss to implement it
>>> when it will be necessary.
>>> If we will have only the caller check then we could miss to implement such stubs.
>> I guess I don't understand the concern.
> 
> for example, if we will have the following code:
>    void some_caller(struct irq_desc *desc)
>    {
>        if ( desc->handler->set_affinity )
>            desc->handler->set_affinity(desc, cpu_mask);
>    }
> 
> Then we will skip the call of handler->set_affinity() (if it was just initialized with
> .set_affinity = NULL) without any notification. And it is fine specifically in this
> case as aplic_set_irq_priority() does nothing.
> 
> But what about the cases if it is a function which will have some implementation in the
> future but doesn't have implementation for now. Then without notification that this
> function is unimplemented we could skip something what really matters.
> 
> But I think that your initial comment was just about the function which basically
> does nothing. Am i right?

Since indirect calls are not only more expensive (often; not sure about
RISC-V) but also pose speculative concerns, having such just to do nothing
simply seems like moving in the wrong direction.

>>>>> +    ASSERT(spin_is_locked(&desc->lock));
>>>> If this lock (which is an IRQ-safe one) is necessarily held, ...
>>>>
>>>>> +    spin_lock_irqsave(&aplic.lock, flags);
>>>> ... you can use just spin_lock() here.
>>>>
>>>>> +    clear_bit(_IRQ_DISABLED, &desc->status);
>>>> Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
>>>> question on Arm code also doing so.)
>>> I haven't thought about that. Likely non-atomic bitop could be used here.
>> And then - does it need to be a bitop? Aiui that's what Arm uses, while x86
>> doesn't. And I see no reason to use other than plain C operators here. If
>> Arm was switched, presumably all the redundant (and misnamed) _IRQ_*
>> constants could go away, with just the IRQ_* ones left.
> 
> The reason for a bitop in Arm is explained in this commithttps://gitlab.com/xen-project/xen/-/commit/50d8fe8fcbab2440cfeeb65c4765868398652473
> but all the places where plain C operators were changed to bitops are actually executed under|spin_lock_irqsave(&desc->lock, flags). By quick look I found only two 
> places one in __setup_irq() but it is called by the functions which do ||spin_lock_irqsave(&desc->lock, flags) and in vgic_v2_fold_lr_state(). 
> Maybe, I'm missing something.|
> |RISC-V won't have something similar to ||vgic_v2_fold_lr_state|(), but __setup_irq() is used in a similar way. It can be added ASSERT(spin_is_lock(&desc->lock))
> and then it will also safe to use non-bitop function.
> Probably, it is a little bit safer to use always bitops for desc->status.
> ||

I question that. If any accesses outside of locked regions were needed (as the
description of that commit suggests), then the situation would be different.

Btw, you not wrapping lines and you adding strange | instances doesn't help
readability of your replies.

>>>> I'm uncertain about this bit setting anyway - on x86 we would only fiddle
>>>> with it for IRQs not in use, not while enabling/disabling one.
>> What about this part?
> 
> As I understand, based on Arm, code then Xen enables interrupts corresponding to devices assigned
> to dom0/domU before booting dom0/domU, resulting in the possibility of receiving an interrupt
> and not knowing what to do with it. So it is needed for enablement of IRQs when the guest
> requests it and not unconditionally at boot time.

I fear I don't understand this. The way we do things on x86 doesn't leave us
in such a situation.

Jan

Re: [PATCH v1 10/14] xen/riscv: implementation of aplic and imsic operations

Posted by Oleksii Kurochko 9 months, 2 weeks ago

On 4/28/25 10:54 AM, Jan Beulich wrote:
>>>>>> +    ASSERT(spin_is_locked(&desc->lock));
>>>>> If this lock (which is an IRQ-safe one) is necessarily held, ...
>>>>>
>>>>>> +    spin_lock_irqsave(&aplic.lock, flags);
>>>>> ... you can use just spin_lock() here.
>>>>>
>>>>>> +    clear_bit(_IRQ_DISABLED, &desc->status);
>>>>> Why an atomic bitop when desc is locked? (And yes, I ought to raise the same
>>>>> question on Arm code also doing so.)
>>>> I haven't thought about that. Likely non-atomic bitop could be used here.
>>> And then - does it need to be a bitop? Aiui that's what Arm uses, while x86
>>> doesn't. And I see no reason to use other than plain C operators here. If
>>> Arm was switched, presumably all the redundant (and misnamed) _IRQ_*
>>> constants could go away, with just the IRQ_* ones left.
>> The reason for a bitop in Arm is explained in this commithttps://gitlab.com/xen-project/xen/-/commit/50d8fe8fcbab2440cfeeb65c4765868398652473
>> but all the places where plain C operators were changed to bitops are actually executed under|spin_lock_irqsave(&desc->lock, flags). By quick look I found only two
>> places one in __setup_irq() but it is called by the functions which do ||spin_lock_irqsave(&desc->lock, flags) and in vgic_v2_fold_lr_state().
>> Maybe, I'm missing something.|
>> |RISC-V won't have something similar to ||vgic_v2_fold_lr_state|(), but __setup_irq() is used in a similar way. It can be added ASSERT(spin_is_lock(&desc->lock))
>> and then it will also safe to use non-bitop function.
>> Probably, it is a little bit safer to use always bitops for desc->status.
>> ||
> I question that. If any accesses outside of locked regions were needed (as the
> description of that commit suggests), then the situation would be different.

Okay, then at the moment there is no such cases and I'll use plain C operator instead of
clear/set_bit().

>
> Btw, you not wrapping lines and you adding strange | instances doesn't help
> readability of your replies.
>
>>>>> I'm uncertain about this bit setting anyway - on x86 we would only fiddle
>>>>> with it for IRQs not in use, not while enabling/disabling one.
>>> What about this part?
>> As I understand, based on Arm, code then Xen enables interrupts corresponding to devices assigned
>> to dom0/domU before booting dom0/domU, resulting in the possibility of receiving an interrupt
>> and not knowing what to do with it. So it is needed for enablement of IRQs when the guest
>> requests it and not unconditionally at boot time.
> I fear I don't understand this. The way we do things on x86 doesn't leave us
> in such a situation.

On Arm, the physical interrupts would be enabled when the interrupt is initially routed and in case guest
is booting with interrupt disabled, it could introduce a problem when guest enabled interrupts it will
already have a pending interrupt for which it isn't ready.

How is it handled the case when a device isn't quiescing at the boot time in x86?

But I just realized the way how interrupts are enabled in RISC-V for guest won't lead to such case. The interrupt
will be enabled only when guest's device driver will request that. So this setting/clearing of IRQ_DISABLED could
be dropped for RISC-V.

~ Oleksii