[PATCH v1 19/27] xen/riscv: emulate guest writes to virtual APLIC MMIO

Oleksii Kurochko posted 27 patches 4 weeks ago
[PATCH v1 19/27] xen/riscv: emulate guest writes to virtual APLIC MMIO
Posted by Oleksii Kurochko 4 weeks ago
RISC-V guests program interrupt routing by writing to APLIC registers.
When APLIC is virtualised, Xen must intercept these MMIO writes and
translate them into updates of the virtual interrupt controller state.

Introduce vaplic_emulate_store() to validate guest APLIC accesses and
apply only those changes that are permitted by the domain’s authorised
interrupt bitmap. Writes targeting unauthorised interrupts are ignored,
ensuring that guests cannot affect interrupts they do not own.

APLIC TARGET writes are rewritten to reflect the effective interrupt
delivery configuration, depending on whether the domain operates in
direct (isn't supported by Xen) or MSI mode. This ensures that
guest-visible routing requests are correctly mapped onto the underlying
virtual interrupt topology.

Unexpected or unsupported register accesses are treated as fatal, as
they indicate a emulation bug.

Co-developed-by: Romain Caritey <Romain.Caritey@microchip.com>
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
 xen/arch/riscv/include/asm/intc.h |   4 +
 xen/arch/riscv/vaplic.c           | 169 ++++++++++++++++++++++++++++++
 2 files changed, 173 insertions(+)

diff --git a/xen/arch/riscv/include/asm/intc.h b/xen/arch/riscv/include/asm/intc.h
index e6b6c179415a..92a74eede4a0 100644
--- a/xen/arch/riscv/include/asm/intc.h
+++ b/xen/arch/riscv/include/asm/intc.h
@@ -56,6 +56,10 @@ struct vintc_ops {
     /* Initialize some vINTC-related stuff for a vCPU */
     int (*vcpu_init)(struct vcpu *vcpu);
 
+    /* Emulate store to virtual interrupt controller MMIOs */
+    int (*emulate_store)(const struct vcpu *vcpu, unsigned long addr,
+                         uint32_t in);
+
     /* Check if a register is virtual interrupt controller MMIO */
     int (*is_access)(const struct vcpu *vcpu, const unsigned long addr);
 
diff --git a/xen/arch/riscv/vaplic.c b/xen/arch/riscv/vaplic.c
index 82e74a609ee6..5540b4884179 100644
--- a/xen/arch/riscv/vaplic.c
+++ b/xen/arch/riscv/vaplic.c
@@ -20,6 +20,16 @@
 
 #include "aplic-priv.h"
 
+#define APLIC_REG_GET(addr, offset) \
+    readl((void *)((vaddr_t)(addr) + offset))
+#define APLIC_REG_SET(addr, offset, value) \
+    writel(value, (void *)((vaddr_t)(addr) + offset))
+
+#define AUTH_IRQ_BIT(irqnum) (auth_irq_bmp[(irqnum) / APLIC_NUM_REGS] & \
+                              BIT((irqnum) % APLIC_NUM_REGS, U))
+
+#define regval_to_irqn(reg_val) ((reg_val) / sizeof(uint32_t))
+
 struct vaplic_priv {
     /* Contains a legal interrupts for a domain */
     uint32_t auth_irq_bmp[APLIC_NUM_REGS];
@@ -127,6 +137,164 @@ int vaplic_map_device_irqs_to_domain(struct domain *d,
     return 0;
 }
 
+static void vaplic_dm_update_target(const unsigned long hart_id, uint32_t *iprio)
+{
+    *iprio &= APLIC_TARGET_IPRIO_MASK;
+    *iprio |= (hart_id << APLIC_TARGET_HART_IDX_SHIFT);
+}
+
+static void vaplic_update_target(const struct imsic_config *imsic,
+                                 const int guest_id,
+                                 const unsigned long hart_id, uint32_t *value)
+{
+    unsigned long group_index;
+    unsigned int hhxw = imsic->group_index_bits;
+    unsigned int lhxw = imsic->hart_index_bits;
+    unsigned int hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
+    unsigned long base_ppn = imsic->msi[hart_id].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
+
+    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);
+
+    *value &= APLIC_TARGET_EIID_MASK;
+    *value |= guest_id << APLIC_TARGET_GUEST_IDX_SHIFT;
+    *value |= hart_id << APLIC_TARGET_HART_IDX_SHIFT;
+    *value |= group_index << (lhxw + APLIC_TARGET_HART_IDX_SHIFT) ;
+}
+
+#define CALC_REG_VALUE(base) \
+{ \
+    uint32_t index; \
+    uint32_t tmp_val; \
+    index = regval_to_irqn(offset - base); \
+    tmp_val = APLIC_REG_GET(priv->regs, aplic_addr) & ~auth_irq_bmp[index]; \
+    value &= auth_irq_bmp[index]; \
+    value |= tmp_val; \
+}
+
+static int cf_check vaplic_emulate_store(const struct vcpu *vcpu,
+                                         unsigned long addr, uint32_t value)
+{
+    struct vaplic *vaplic = to_vaplic(vcpu->domain->arch.vintc);
+    struct aplic_priv *priv = vaplic->base.info->private;
+    uint32_t offset = addr & APLIC_REG_OFFSET_MASK;
+    unsigned long aplic_addr = addr - priv->paddr_start;
+    const uint32_t *auth_irq_bmp = vcpu->domain->arch.vintc->private;
+
+    switch ( offset )
+    {
+    case APLIC_SETIP_BASE ... APLIC_SETIP_LAST:
+        CALC_REG_VALUE(APLIC_SETIP_BASE);
+        break;
+
+    case APLIC_CLRIP_BASE ... APLIC_CLRIP_LAST:
+        CALC_REG_VALUE(APLIC_CLRIP_BASE);
+        break;
+
+    case APLIC_SETIE_BASE ... APLIC_SETIE_LAST:
+        CALC_REG_VALUE(APLIC_SETIE_BASE);
+        break;
+
+    case APLIC_CLRIE_BASE ... APLIC_CLRIE_LAST:
+        CALC_REG_VALUE(APLIC_CLRIE_BASE);
+        break;
+
+    case APLIC_SOURCECFG_BASE ... APLIC_SOURCECFG_LAST:
+        /* We don't suppert delagation, so bit10 if sourcecfg should be 0 */
+        ASSERT(!(value & BIT(10, U)));
+
+        /*
+         * As sourcecfg register starts from 1:
+         *   0x0000 domaincfg
+         *   0x0004 sourcecfg[1]
+         *   0x0008 sourcecfg[2]
+         *    ...
+         *   0x0FFC sourcecfg[1023]
+         * It is necessary to calculate an interrupt number by substracting
+         * of APLIC_DOMAINCFG instead of APLIC_SOURCECFG_BASE.
+         */
+        if ( !AUTH_IRQ_BIT(regval_to_irqn(offset - APLIC_DOMAINCFG)) )
+            /* interrupt not enabled, ignore it */
+            return 0;
+
+        break;
+
+    case APLIC_TARGET_BASE ... APLIC_TARGET_LAST:
+        struct vcpu *target_vcpu = NULL;
+
+        /*
+         * Look at vaplic_emulate_load() for explanation why
+         * APLIC_GENMSI is substracted.
+         */
+        if ( !AUTH_IRQ_BIT(regval_to_irqn(offset - APLIC_GENMSI)) )
+            /* interrupt not enabled, ignore it */
+            return 0;
+
+        for ( int i = 0; i < vcpu->domain->max_vcpus; i++ )
+        {
+            struct vcpu *v = vcpu->domain->vcpu[i];
+
+            if ( v->vcpu_id == (value >> APLIC_TARGET_HART_IDX_SHIFT) )
+            {
+                target_vcpu = v;
+                break;
+            }
+        }
+
+        ASSERT(target_vcpu);
+
+        if ( !(vaplic->regs.domaincfg & APLIC_DOMAINCFG_DM) )
+        {
+            vaplic_dm_update_target(cpuid_to_hartid(target_vcpu->processor),
+                                    &value);
+        }
+        else
+            vaplic_update_target(priv->imsic_cfg,
+                                 vcpu_guest_file_id(target_vcpu),
+                                 cpuid_to_hartid(target_vcpu->processor),
+                                 &value);
+
+        break;
+
+    case APLIC_SETIPNUM:
+    case APLIC_SETIPNUM_LE:
+    case APLIC_CLRIPNUM:
+    case APLIC_SETIENUM:
+    case APLIC_CLRIENUM:
+        if ( AUTH_IRQ_BIT(value) )
+            break;
+
+        return 0;
+
+    case APLIC_DOMAINCFG:
+        /*
+         * TODO:
+         * The domaincfg register has this format:
+         * bits 31:24 read-only 0x80
+         * bit 8      IE
+         * bit 7      read-only 0
+         * bit 2      DM (WARL)
+         * bit 0      BE (WARL)
+         *
+         * The most interesting bit for us is IE(Interrupt Enable) bit.
+         * At the moment, at least, Linux doesn't use domaincfg.IE bit to
+         * disable interrupts globally, but if one day someone will use it
+         * then extra actions should be done.
+         */
+
+        printk_once("%s: Nothing to do, domaincfg is set by aplic during "
+                    "initialization in Xen\n", __func__);
+
+        return 0;
+
+    default:
+        panic("%s: unsupported register offset: %#x\n", __func__, offset);
+    }
+
+    APLIC_REG_SET(priv->regs, aplic_addr, value);
+
+    return 0;
+}
+
 static int cf_check vaplic_is_access(const struct vcpu *vcpu,
                                      const unsigned long addr)
 {
@@ -158,6 +326,7 @@ static const struct vintc_ops vaplic_ops = {
     .vcpu_init = vcpu_vaplic_init,
     .map_device_irqs_to_domain = vaplic_map_device_irqs_to_domain,
     .is_access = vaplic_is_access,
+    .emulate_store = vaplic_emulate_store,
 };
 
 static struct vintc * __init vaplic_alloc(void)
-- 
2.53.0


Re: [PATCH v1 19/27] xen/riscv: emulate guest writes to virtual APLIC MMIO
Posted by Jan Beulich 5 days, 12 hours ago
On 10.03.2026 18:08, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/vaplic.c
> +++ b/xen/arch/riscv/vaplic.c
> @@ -20,6 +20,16 @@
>  
>  #include "aplic-priv.h"
>  
> +#define APLIC_REG_GET(addr, offset) \
> +    readl((void *)((vaddr_t)(addr) + offset))
> +#define APLIC_REG_SET(addr, offset, value) \
> +    writel(value, (void *)((vaddr_t)(addr) + offset))

Why is addr properly parenthesized, but offset isn't?

> +#define AUTH_IRQ_BIT(irqnum) (auth_irq_bmp[(irqnum) / APLIC_NUM_REGS] & \
> +                              BIT((irqnum) % APLIC_NUM_REGS, U))
> +
> +#define regval_to_irqn(reg_val) ((reg_val) / sizeof(uint32_t))

I'm trying to make sense of the division here, but I think the main issue
is with naming: It's not a "register value" which is passed into here, but
a register index (offset from a range's base register).

> @@ -127,6 +137,164 @@ int vaplic_map_device_irqs_to_domain(struct domain *d,
>      return 0;
>  }
>  
> +static void vaplic_dm_update_target(const unsigned long hart_id, uint32_t *iprio)
> +{
> +    *iprio &= APLIC_TARGET_IPRIO_MASK;
> +    *iprio |= (hart_id << APLIC_TARGET_HART_IDX_SHIFT);
> +}
> +
> +static void vaplic_update_target(const struct imsic_config *imsic,
> +                                 const int guest_id,
> +                                 const unsigned long hart_id, uint32_t *value)
> +{
> +    unsigned long group_index;
> +    unsigned int hhxw = imsic->group_index_bits;
> +    unsigned int lhxw = imsic->hart_index_bits;
> +    unsigned int hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
> +    unsigned long base_ppn = imsic->msi[hart_id].base_addr >> IMSIC_MMIO_PAGE_SHIFT;
> +
> +    group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);

And there's no constant available to make this literal 12 more descriptive?

> +    *value &= APLIC_TARGET_EIID_MASK;
> +    *value |= guest_id << APLIC_TARGET_GUEST_IDX_SHIFT;
> +    *value |= hart_id << APLIC_TARGET_HART_IDX_SHIFT;
> +    *value |= group_index << (lhxw + APLIC_TARGET_HART_IDX_SHIFT) ;
> +}

Both functions returning void right now, why would they need to return their
result via indirection?

> +#define CALC_REG_VALUE(base) \
> +{ \
> +    uint32_t index; \
> +    uint32_t tmp_val; \

Combine these two, or have the variables have initializers?

> +    index = regval_to_irqn(offset - base); \

There's no "offset" declared or passed into here, nor ...

> +    tmp_val = APLIC_REG_GET(priv->regs, aplic_addr) & ~auth_irq_bmp[index]; \

... "priv", nor ...

> +    value &= auth_irq_bmp[index]; \
> +    value |= tmp_val; \

... "value". It may remain like this, but then it wants putting inside the
sole function that uses it, and be #undef-ed at the end of the function.

> +}

Please wrap in do/while(0), for use sites to be required to have semicolons
(and hence look like normal statements). Or make it a statement expression
properly returning the calculated value.

> +static int cf_check vaplic_emulate_store(const struct vcpu *vcpu,
> +                                         unsigned long addr, uint32_t value)
> +{
> +    struct vaplic *vaplic = to_vaplic(vcpu->domain->arch.vintc);
> +    struct aplic_priv *priv = vaplic->base.info->private;
> +    uint32_t offset = addr & APLIC_REG_OFFSET_MASK;

See ./CODING_STYLE as to uses of fixed-width types.

> +    unsigned long aplic_addr = addr - priv->paddr_start;
> +    const uint32_t *auth_irq_bmp = vcpu->domain->arch.vintc->private;
> +
> +    switch ( offset )
> +    {
> +    case APLIC_SETIP_BASE ... APLIC_SETIP_LAST:

And (taking this just as example) any misaligned accesses falling in this range
are fine?

> +        CALC_REG_VALUE(APLIC_SETIP_BASE);
> +        break;
> +
> +    case APLIC_CLRIP_BASE ... APLIC_CLRIP_LAST:
> +        CALC_REG_VALUE(APLIC_CLRIP_BASE);
> +        break;
> +
> +    case APLIC_SETIE_BASE ... APLIC_SETIE_LAST:
> +        CALC_REG_VALUE(APLIC_SETIE_BASE);
> +        break;
> +
> +    case APLIC_CLRIE_BASE ... APLIC_CLRIE_LAST:
> +        CALC_REG_VALUE(APLIC_CLRIE_BASE);
> +        break;
> +
> +    case APLIC_SOURCECFG_BASE ... APLIC_SOURCECFG_LAST:
> +        /* We don't suppert delagation, so bit10 if sourcecfg should be 0 */
> +        ASSERT(!(value & BIT(10, U)));

And that bit doesn't have a proper #define?

> +        /*
> +         * As sourcecfg register starts from 1:
> +         *   0x0000 domaincfg
> +         *   0x0004 sourcecfg[1]
> +         *   0x0008 sourcecfg[2]
> +         *    ...
> +         *   0x0FFC sourcecfg[1023]
> +         * It is necessary to calculate an interrupt number by substracting

Nit: subtracting

> +         * of APLIC_DOMAINCFG instead of APLIC_SOURCECFG_BASE.
> +         */
> +        if ( !AUTH_IRQ_BIT(regval_to_irqn(offset - APLIC_DOMAINCFG)) )
> +            /* interrupt not enabled, ignore it */

Throughout the series: Please adhere to ./CODING_STYLE.

> +            return 0;
> +
> +        break;

And any value is okay to write?

> +    case APLIC_TARGET_BASE ... APLIC_TARGET_LAST:
> +        struct vcpu *target_vcpu = NULL;
> +
> +        /*
> +         * Look at vaplic_emulate_load() for explanation why
> +         * APLIC_GENMSI is substracted.
> +         */

There's no vaplic_emulate_load() - how can I go look there?

Also same typo again as above.

> +        if ( !AUTH_IRQ_BIT(regval_to_irqn(offset - APLIC_GENMSI)) )
> +            /* interrupt not enabled, ignore it */
> +            return 0;
> +
> +        for ( int i = 0; i < vcpu->domain->max_vcpus; i++ )

unsigned int

> +        {
> +            struct vcpu *v = vcpu->domain->vcpu[i];
> +
> +            if ( v->vcpu_id == (value >> APLIC_TARGET_HART_IDX_SHIFT) )
> +            {
> +                target_vcpu = v;
> +                break;
> +            }
> +        }
> +
> +        ASSERT(target_vcpu);

What guarantees the pointer to be non-NULL? The incoming value can be
arbitrary, afaict.

> +        if ( !(vaplic->regs.domaincfg & APLIC_DOMAINCFG_DM) )
> +        {
> +            vaplic_dm_update_target(cpuid_to_hartid(target_vcpu->processor),
> +                                    &value);
> +        }
> +        else
> +            vaplic_update_target(priv->imsic_cfg,
> +                                 vcpu_guest_file_id(target_vcpu),
> +                                 cpuid_to_hartid(target_vcpu->processor),
> +                                 &value);

I'm struggling with the naming here: When DM is clear, a function with "dm"
in the name is called.

For the latter one, unless other uses are intended speaking against that,
instead of the middle two arguments simply pass target_vcpu?

Also please omit the braces consistently from both branches.

> +        break;
> +
> +    case APLIC_SETIPNUM:
> +    case APLIC_SETIPNUM_LE:

What about APLIC_SETIPNUM_BE?

> +    case APLIC_CLRIPNUM:
> +    case APLIC_SETIENUM:
> +    case APLIC_CLRIENUM:
> +        if ( AUTH_IRQ_BIT(value) )
> +            break;

Aren't you easily overrunning auth_irq_bmp[] here?

> +        return 0;
> +
> +    case APLIC_DOMAINCFG:
> +        /*
> +         * TODO:
> +         * The domaincfg register has this format:
> +         * bits 31:24 read-only 0x80
> +         * bit 8      IE
> +         * bit 7      read-only 0
> +         * bit 2      DM (WARL)
> +         * bit 0      BE (WARL)
> +         *
> +         * The most interesting bit for us is IE(Interrupt Enable) bit.
> +         * At the moment, at least, Linux doesn't use domaincfg.IE bit to
> +         * disable interrupts globally, but if one day someone will use it
> +         * then extra actions should be done.
> +         */
> +
> +        printk_once("%s: Nothing to do, domaincfg is set by aplic during "
> +                    "initialization in Xen\n", __func__);

As per the comment it's not "nothing to do", but your choice to ignore writes
even if they may be relevant.

> +        return 0;
> +
> +    default:
> +        panic("%s: unsupported register offset: %#x\n", __func__, offset);

Crashing the host for the guest doing something odd? It's odd that the function
only ever returns 0 anyway - it could simply return an error here (if the
itention is to not ignore such writes).

As it's not clear what values other than zero such a function may return, I
also can't comment on its (and the hook's) return type (may want to be bool
instead of int).

Jan