In order to support IRQ domains which reside between the leaf domains
and IMSIC, put the IMSIC implementation of irq_set_affinity into its
chip.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
drivers/irqchip/irq-riscv-imsic-platform.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
index c708780e8760..5d7c30ad8855 100644
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
bool force)
{
struct imsic_vector *old_vec, *new_vec;
- struct irq_data *pd = d->parent_data;
- old_vec = irq_data_get_irq_chip_data(pd);
+ old_vec = irq_data_get_irq_chip_data(d);
if (WARN_ON(!old_vec))
return -ENOENT;
@@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
return -ENOSPC;
/* Point device to the new vector */
- imsic_msi_update_msg(d, new_vec);
+ imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
/* Update irq descriptors with the new vector */
- pd->chip_data = new_vec;
+ d->chip_data = new_vec;
- /* Update effective affinity of parent irq data */
- irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
+ /* Update effective affinity */
+ irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu));
/* Move state of the old vector to the new vector */
imsic_vector_move(old_vec, new_vec);
@@ -135,6 +134,9 @@ static struct irq_chip imsic_irq_base_chip = {
.name = "IMSIC",
.irq_mask = imsic_irq_mask,
.irq_unmask = imsic_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = imsic_irq_set_affinity,
+#endif
.irq_retrigger = imsic_irq_retrigger,
.irq_compose_msi_msg = imsic_irq_compose_msg,
.flags = IRQCHIP_SKIP_SET_WAKE |
@@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev,
if (WARN_ON_ONCE(domain != real_parent))
return false;
#ifdef CONFIG_SMP
- info->chip->irq_set_affinity = imsic_irq_set_affinity;
+ info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
#endif
break;
default:
--
2.47.0
On Thu, Nov 14 2024 at 17:18, Andrew Jones wrote:
> @@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> bool force)
> {
> struct imsic_vector *old_vec, *new_vec;
> - struct irq_data *pd = d->parent_data;
>
> - old_vec = irq_data_get_irq_chip_data(pd);
> + old_vec = irq_data_get_irq_chip_data(d);
> if (WARN_ON(!old_vec))
> return -ENOENT;
>
> @@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> return -ENOSPC;
>
> /* Point device to the new vector */
> - imsic_msi_update_msg(d, new_vec);
> + imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
This looks more than fishy. See below.
> @@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev,
> if (WARN_ON_ONCE(domain != real_parent))
> return false;
> #ifdef CONFIG_SMP
> - info->chip->irq_set_affinity = imsic_irq_set_affinity;
> + info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
This should use msi_domain_set_affinity(), which does the right thing:
1) It invokes the irq_set_affinity() callback of the parent domain
2) It composes the message via the hierarchy
3) It writes the message with the msi_write_msg() callback of the top
level domain
Sorry, I missed that when reviewing the original IMSIC MSI support.
The whole IMSIC MSI support can be moved over to MSI LIB which makes all
of this indirection go away and your intermediate domain will just fit
in.
Uncompiled patch below. If that works, it needs to be split up properly.
Note, this removes the setup of the irq_retrigger callback, but that's
fine because on hierarchical domains irq_chip_retrigger_hierarchy() is
invoked anyway. See try_retrigger().
Thanks,
tglx
---
drivers/irqchip/Kconfig | 1
drivers/irqchip/irq-gic-v2m.c | 1
drivers/irqchip/irq-imx-mu-msi.c | 1
drivers/irqchip/irq-msi-lib.c | 11 +-
drivers/irqchip/irq-mvebu-gicp.c | 1
drivers/irqchip/irq-mvebu-odmi.c | 1
drivers/irqchip/irq-mvebu-sei.c | 1
drivers/irqchip/irq-riscv-imsic-platform.c | 131 +----------------------------
include/linux/msi.h | 11 ++
9 files changed, 32 insertions(+), 127 deletions(-)
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -587,6 +587,7 @@ config RISCV_IMSIC
select IRQ_DOMAIN_HIERARCHY
select GENERIC_IRQ_MATRIX_ALLOCATOR
select GENERIC_MSI_IRQ
+ select IRQ_MSI_LIB
config RISCV_IMSIC_PCI
bool
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -255,6 +255,7 @@ static void __init gicv2m_teardown(void)
static struct msi_parent_ops gicv2m_msi_parent_ops = {
.supported_flags = GICV2M_MSI_FLAGS_SUPPORTED,
.required_flags = GICV2M_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
.prefix = "GICv2m-",
--- a/drivers/irqchip/irq-imx-mu-msi.c
+++ b/drivers/irqchip/irq-imx-mu-msi.c
@@ -214,6 +214,7 @@ static void imx_mu_msi_irq_handler(struc
static const struct msi_parent_ops imx_mu_msi_parent_ops = {
.supported_flags = IMX_MU_MSI_FLAGS_SUPPORTED,
.required_flags = IMX_MU_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "MU-MSI-",
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -28,6 +28,7 @@ bool msi_lib_init_dev_msi_info(struct de
struct msi_domain_info *info)
{
const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+ struct irq_chip *chip = info->chip;
u32 required_flags;
/* Parent ops available? */
@@ -92,10 +93,10 @@ bool msi_lib_init_dev_msi_info(struct de
info->flags |= required_flags;
/* Chip updates for all child bus types */
- if (!info->chip->irq_eoi)
- info->chip->irq_eoi = irq_chip_eoi_parent;
- if (!info->chip->irq_ack)
- info->chip->irq_ack = irq_chip_ack_parent;
+ if (!chip->irq_eoi && (pops->chip_flags & MSI_CHIP_FLAG_SET_EOI))
+ chip->irq_eoi = irq_chip_eoi_parent;
+ if (!chip->irq_ack && (pops->chip_flags & MSI_CHIP_FLAG_SET_ACK))
+ chip->irq_ack = irq_chip_ack_parent;
/*
* The device MSI domain can never have a set affinity callback. It
@@ -105,7 +106,7 @@ bool msi_lib_init_dev_msi_info(struct de
* device MSI domain aside of mask/unmask which is provided e.g. by
* PCI/MSI device domains.
*/
- info->chip->irq_set_affinity = msi_domain_set_affinity;
+ chip->irq_set_affinity = msi_domain_set_affinity;
return true;
}
EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -161,6 +161,7 @@ static const struct irq_domain_ops gicp_
static const struct msi_parent_ops gicp_msi_parent_ops = {
.supported_flags = GICP_MSI_FLAGS_SUPPORTED,
.required_flags = GICP_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "GICP-",
--- a/drivers/irqchip/irq-mvebu-odmi.c
+++ b/drivers/irqchip/irq-mvebu-odmi.c
@@ -157,6 +157,7 @@ static const struct irq_domain_ops odmi_
static const struct msi_parent_ops odmi_msi_parent_ops = {
.supported_flags = ODMI_MSI_FLAGS_SUPPORTED,
.required_flags = ODMI_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.bus_select_mask = MATCH_PLATFORM_MSI,
.prefix = "ODMI-",
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -356,6 +356,7 @@ static void mvebu_sei_reset(struct mvebu
static const struct msi_parent_ops sei_msi_parent_ops = {
.supported_flags = SEI_MSI_FLAGS_SUPPORTED,
.required_flags = SEI_MSI_FLAGS_REQUIRED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
.bus_select_mask = MATCH_PLATFORM_MSI,
.bus_select_token = DOMAIN_BUS_GENERIC_MSI,
.prefix = "SEI-",
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -21,6 +21,7 @@
#include <linux/smp.h>
#include "irq-riscv-imsic-state.h"
+#include "irq-msi-lib.h"
static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index,
phys_addr_t *out_msi_pa)
@@ -84,19 +85,10 @@ static void imsic_irq_compose_msg(struct
}
#ifdef CONFIG_SMP
-static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
-{
- struct msi_msg msg = { };
-
- imsic_irq_compose_vector_msg(vec, &msg);
- irq_data_get_irq_chip(d)->irq_write_msi_msg(d, &msg);
-}
-
static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
{
struct imsic_vector *old_vec, *new_vec;
- struct irq_data *pd = d->parent_data;
old_vec = irq_data_get_irq_chip_data(pd);
if (WARN_ON(!old_vec))
@@ -115,14 +107,11 @@ static int imsic_irq_set_affinity(struct
if (!new_vec)
return -ENOSPC;
- /* Point device to the new vector */
- imsic_msi_update_msg(d, new_vec);
-
/* Update irq descriptors with the new vector */
- pd->chip_data = new_vec;
+ d->chip_data = new_vec;
/* Update effective affinity of parent irq data */
- irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
+ irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu));
/* Move state of the old vector to the new vector */
imsic_vector_move(old_vec, new_vec);
@@ -137,6 +126,9 @@ static struct irq_chip imsic_irq_base_ch
.irq_unmask = imsic_irq_unmask,
.irq_retrigger = imsic_irq_retrigger,
.irq_compose_msi_msg = imsic_irq_compose_msg,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = imsic_irq_set_affinity,
+#endif
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};
@@ -172,22 +164,6 @@ static void imsic_irq_domain_free(struct
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
-static int imsic_irq_domain_select(struct irq_domain *domain, struct irq_fwspec *fwspec,
- enum irq_domain_bus_token bus_token)
-{
- const struct msi_parent_ops *ops = domain->msi_parent_ops;
- u32 busmask = BIT(bus_token);
-
- if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0)
- return 0;
-
- /* Handle pure domain searches */
- if (bus_token == ops->bus_select_token)
- return 1;
-
- return !!(ops->bus_select_mask & busmask);
-}
-
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
struct irq_data *irqd, int ind)
@@ -210,104 +186,15 @@ static const struct irq_domain_ops imsic
#endif
};
-#ifdef CONFIG_RISCV_IMSIC_PCI
-
-static void imsic_pci_mask_irq(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void imsic_pci_unmask_irq(struct irq_data *d)
-{
- irq_chip_unmask_parent(d);
- pci_msi_unmask_irq(d);
-}
-
-#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI)
-
-#else
-
-#define MATCH_PCI_MSI 0
-
-#endif
-
-static bool imsic_init_dev_msi_info(struct device *dev,
- struct irq_domain *domain,
- struct irq_domain *real_parent,
- struct msi_domain_info *info)
-{
- const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
-
- /* MSI parent domain specific settings */
- switch (real_parent->bus_token) {
- case DOMAIN_BUS_NEXUS:
- if (WARN_ON_ONCE(domain != real_parent))
- return false;
-#ifdef CONFIG_SMP
- info->chip->irq_set_affinity = imsic_irq_set_affinity;
-#endif
- break;
- default:
- WARN_ON_ONCE(1);
- return false;
- }
-
- /* Is the target supported? */
- switch (info->bus_token) {
-#ifdef CONFIG_RISCV_IMSIC_PCI
- case DOMAIN_BUS_PCI_DEVICE_MSI:
- case DOMAIN_BUS_PCI_DEVICE_MSIX:
- info->chip->irq_mask = imsic_pci_mask_irq;
- info->chip->irq_unmask = imsic_pci_unmask_irq;
- break;
-#endif
- case DOMAIN_BUS_DEVICE_MSI:
- /*
- * Per-device MSI should never have any MSI feature bits
- * set. It's sole purpose is to create a dumb interrupt
- * chip which has a device specific irq_write_msi_msg()
- * callback.
- */
- if (WARN_ON_ONCE(info->flags))
- return false;
-
- /* Core managed MSI descriptors */
- info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS |
- MSI_FLAG_FREE_MSI_DESCS;
- break;
- case DOMAIN_BUS_WIRED_TO_MSI:
- break;
- default:
- WARN_ON_ONCE(1);
- return false;
- }
-
- /* Use hierarchial chip operations re-trigger */
- info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
-
- /*
- * Mask out the domain specific MSI feature flags which are not
- * supported by the real parent.
- */
- info->flags &= pops->supported_flags;
-
- /* Enforce the required flags */
- info->flags |= pops->required_flags;
-
- return true;
-}
-
-#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
-
static const struct msi_parent_ops imsic_msi_parent_ops = {
.supported_flags = MSI_GENERIC_FLAGS_MASK |
MSI_FLAG_PCI_MSIX,
.required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
- MSI_FLAG_USE_DEF_CHIP_OPS,
+ MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSI_MASK_PARENT,
.bus_select_token = DOMAIN_BUS_NEXUS,
.bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
- .init_dev_msi_info = imsic_init_dev_msi_info,
+ .init_dev_msi_info = msi_lib_init_dev_msi_info,
};
int imsic_irqdomain_init(void)
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -558,11 +558,21 @@ enum {
MSI_FLAG_NO_AFFINITY = (1 << 21),
};
+/*
+ * Flags for msi_parent_ops::chip_flags
+ */
+enum {
+ MSI_CHIP_FLAG_SET_EOI = (1 << 0),
+ MSI_CHIP_FLAG_SET_ACK = (1 << 1),
+};
+
/**
* struct msi_parent_ops - MSI parent domain callbacks and configuration info
*
* @supported_flags: Required: The supported MSI flags of the parent domain
* @required_flags: Optional: The required MSI flags of the parent MSI domain
+ * @chip_flags: Optional: Select MSI chip callbacks to update with defaults
+ * in msi_lib_init_dev_msi_info().
* @bus_select_token: Optional: The bus token of the real parent domain for
* irq_domain::select()
* @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
@@ -575,6 +585,7 @@ enum {
struct msi_parent_ops {
u32 supported_flags;
u32 required_flags;
+ u32 chip_flags;
u32 bus_select_token;
u32 bus_select_mask;
const char *prefix;
On Tue, Dec 3, 2024 at 7:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Thu, Nov 14 2024 at 17:18, Andrew Jones wrote:
> > @@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> > bool force)
> > {
> > struct imsic_vector *old_vec, *new_vec;
> > - struct irq_data *pd = d->parent_data;
> >
> > - old_vec = irq_data_get_irq_chip_data(pd);
> > + old_vec = irq_data_get_irq_chip_data(d);
> > if (WARN_ON(!old_vec))
> > return -ENOENT;
> >
> > @@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> > return -ENOSPC;
> >
> > /* Point device to the new vector */
> > - imsic_msi_update_msg(d, new_vec);
> > + imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
>
> This looks more than fishy. See below.
>
> > @@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev,
> > if (WARN_ON_ONCE(domain != real_parent))
> > return false;
> > #ifdef CONFIG_SMP
> > - info->chip->irq_set_affinity = imsic_irq_set_affinity;
> > + info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
>
> This should use msi_domain_set_affinity(), which does the right thing:
>
> 1) It invokes the irq_set_affinity() callback of the parent domain
>
> 2) It composes the message via the hierarchy
>
> 3) It writes the message with the msi_write_msg() callback of the top
> level domain
>
> Sorry, I missed that when reviewing the original IMSIC MSI support.
>
> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
> of this indirection go away and your intermediate domain will just fit
> in.
>
> Uncompiled patch below. If that works, it needs to be split up properly.
>
> Note, this removes the setup of the irq_retrigger callback, but that's
> fine because on hierarchical domains irq_chip_retrigger_hierarchy() is
> invoked anyway. See try_retrigger().
The IMSIC driver was merged one kernel release before common
MSI LIB was merged.
We should definitely update the IMSIC driver to use MSI LIB, I will
try your suggested changes (below) and post a separate series.
Thanks,
Anup
>
> Thanks,
>
> tglx
> ---
> drivers/irqchip/Kconfig | 1
> drivers/irqchip/irq-gic-v2m.c | 1
> drivers/irqchip/irq-imx-mu-msi.c | 1
> drivers/irqchip/irq-msi-lib.c | 11 +-
> drivers/irqchip/irq-mvebu-gicp.c | 1
> drivers/irqchip/irq-mvebu-odmi.c | 1
> drivers/irqchip/irq-mvebu-sei.c | 1
> drivers/irqchip/irq-riscv-imsic-platform.c | 131 +----------------------------
> include/linux/msi.h | 11 ++
> 9 files changed, 32 insertions(+), 127 deletions(-)
>
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -587,6 +587,7 @@ config RISCV_IMSIC
> select IRQ_DOMAIN_HIERARCHY
> select GENERIC_IRQ_MATRIX_ALLOCATOR
> select GENERIC_MSI_IRQ
> + select IRQ_MSI_LIB
>
> config RISCV_IMSIC_PCI
> bool
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -255,6 +255,7 @@ static void __init gicv2m_teardown(void)
> static struct msi_parent_ops gicv2m_msi_parent_ops = {
> .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED,
> .required_flags = GICV2M_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
> .prefix = "GICv2m-",
> --- a/drivers/irqchip/irq-imx-mu-msi.c
> +++ b/drivers/irqchip/irq-imx-mu-msi.c
> @@ -214,6 +214,7 @@ static void imx_mu_msi_irq_handler(struc
> static const struct msi_parent_ops imx_mu_msi_parent_ops = {
> .supported_flags = IMX_MU_MSI_FLAGS_SUPPORTED,
> .required_flags = IMX_MU_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "MU-MSI-",
> --- a/drivers/irqchip/irq-msi-lib.c
> +++ b/drivers/irqchip/irq-msi-lib.c
> @@ -28,6 +28,7 @@ bool msi_lib_init_dev_msi_info(struct de
> struct msi_domain_info *info)
> {
> const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
> + struct irq_chip *chip = info->chip;
> u32 required_flags;
>
> /* Parent ops available? */
> @@ -92,10 +93,10 @@ bool msi_lib_init_dev_msi_info(struct de
> info->flags |= required_flags;
>
> /* Chip updates for all child bus types */
> - if (!info->chip->irq_eoi)
> - info->chip->irq_eoi = irq_chip_eoi_parent;
> - if (!info->chip->irq_ack)
> - info->chip->irq_ack = irq_chip_ack_parent;
> + if (!chip->irq_eoi && (pops->chip_flags & MSI_CHIP_FLAG_SET_EOI))
> + chip->irq_eoi = irq_chip_eoi_parent;
> + if (!chip->irq_ack && (pops->chip_flags & MSI_CHIP_FLAG_SET_ACK))
> + chip->irq_ack = irq_chip_ack_parent;
>
> /*
> * The device MSI domain can never have a set affinity callback. It
> @@ -105,7 +106,7 @@ bool msi_lib_init_dev_msi_info(struct de
> * device MSI domain aside of mask/unmask which is provided e.g. by
> * PCI/MSI device domains.
> */
> - info->chip->irq_set_affinity = msi_domain_set_affinity;
> + chip->irq_set_affinity = msi_domain_set_affinity;
> return true;
> }
> EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
> --- a/drivers/irqchip/irq-mvebu-gicp.c
> +++ b/drivers/irqchip/irq-mvebu-gicp.c
> @@ -161,6 +161,7 @@ static const struct irq_domain_ops gicp_
> static const struct msi_parent_ops gicp_msi_parent_ops = {
> .supported_flags = GICP_MSI_FLAGS_SUPPORTED,
> .required_flags = GICP_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "GICP-",
> --- a/drivers/irqchip/irq-mvebu-odmi.c
> +++ b/drivers/irqchip/irq-mvebu-odmi.c
> @@ -157,6 +157,7 @@ static const struct irq_domain_ops odmi_
> static const struct msi_parent_ops odmi_msi_parent_ops = {
> .supported_flags = ODMI_MSI_FLAGS_SUPPORTED,
> .required_flags = ODMI_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "ODMI-",
> --- a/drivers/irqchip/irq-mvebu-sei.c
> +++ b/drivers/irqchip/irq-mvebu-sei.c
> @@ -356,6 +356,7 @@ static void mvebu_sei_reset(struct mvebu
> static const struct msi_parent_ops sei_msi_parent_ops = {
> .supported_flags = SEI_MSI_FLAGS_SUPPORTED,
> .required_flags = SEI_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .prefix = "SEI-",
> --- a/drivers/irqchip/irq-riscv-imsic-platform.c
> +++ b/drivers/irqchip/irq-riscv-imsic-platform.c
> @@ -21,6 +21,7 @@
> #include <linux/smp.h>
>
> #include "irq-riscv-imsic-state.h"
> +#include "irq-msi-lib.h"
>
> static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index,
> phys_addr_t *out_msi_pa)
> @@ -84,19 +85,10 @@ static void imsic_irq_compose_msg(struct
> }
>
> #ifdef CONFIG_SMP
> -static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
> -{
> - struct msi_msg msg = { };
> -
> - imsic_irq_compose_vector_msg(vec, &msg);
> - irq_data_get_irq_chip(d)->irq_write_msi_msg(d, &msg);
> -}
> -
> static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
> bool force)
> {
> struct imsic_vector *old_vec, *new_vec;
> - struct irq_data *pd = d->parent_data;
>
> old_vec = irq_data_get_irq_chip_data(pd);
> if (WARN_ON(!old_vec))
> @@ -115,14 +107,11 @@ static int imsic_irq_set_affinity(struct
> if (!new_vec)
> return -ENOSPC;
>
> - /* Point device to the new vector */
> - imsic_msi_update_msg(d, new_vec);
> -
> /* Update irq descriptors with the new vector */
> - pd->chip_data = new_vec;
> + d->chip_data = new_vec;
>
> /* Update effective affinity of parent irq data */
> - irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
> + irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu));
>
> /* Move state of the old vector to the new vector */
> imsic_vector_move(old_vec, new_vec);
> @@ -137,6 +126,9 @@ static struct irq_chip imsic_irq_base_ch
> .irq_unmask = imsic_irq_unmask,
> .irq_retrigger = imsic_irq_retrigger,
> .irq_compose_msi_msg = imsic_irq_compose_msg,
> +#ifdef CONFIG_SMP
> + .irq_set_affinity = imsic_irq_set_affinity,
> +#endif
> .flags = IRQCHIP_SKIP_SET_WAKE |
> IRQCHIP_MASK_ON_SUSPEND,
> };
> @@ -172,22 +164,6 @@ static void imsic_irq_domain_free(struct
> irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> }
>
> -static int imsic_irq_domain_select(struct irq_domain *domain, struct irq_fwspec *fwspec,
> - enum irq_domain_bus_token bus_token)
> -{
> - const struct msi_parent_ops *ops = domain->msi_parent_ops;
> - u32 busmask = BIT(bus_token);
> -
> - if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0)
> - return 0;
> -
> - /* Handle pure domain searches */
> - if (bus_token == ops->bus_select_token)
> - return 1;
> -
> - return !!(ops->bus_select_mask & busmask);
> -}
> -
> #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
> struct irq_data *irqd, int ind)
> @@ -210,104 +186,15 @@ static const struct irq_domain_ops imsic
> #endif
> };
>
> -#ifdef CONFIG_RISCV_IMSIC_PCI
> -
> -static void imsic_pci_mask_irq(struct irq_data *d)
> -{
> - pci_msi_mask_irq(d);
> - irq_chip_mask_parent(d);
> -}
> -
> -static void imsic_pci_unmask_irq(struct irq_data *d)
> -{
> - irq_chip_unmask_parent(d);
> - pci_msi_unmask_irq(d);
> -}
> -
> -#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI)
> -
> -#else
> -
> -#define MATCH_PCI_MSI 0
> -
> -#endif
> -
> -static bool imsic_init_dev_msi_info(struct device *dev,
> - struct irq_domain *domain,
> - struct irq_domain *real_parent,
> - struct msi_domain_info *info)
> -{
> - const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
> -
> - /* MSI parent domain specific settings */
> - switch (real_parent->bus_token) {
> - case DOMAIN_BUS_NEXUS:
> - if (WARN_ON_ONCE(domain != real_parent))
> - return false;
> -#ifdef CONFIG_SMP
> - info->chip->irq_set_affinity = imsic_irq_set_affinity;
> -#endif
> - break;
> - default:
> - WARN_ON_ONCE(1);
> - return false;
> - }
> -
> - /* Is the target supported? */
> - switch (info->bus_token) {
> -#ifdef CONFIG_RISCV_IMSIC_PCI
> - case DOMAIN_BUS_PCI_DEVICE_MSI:
> - case DOMAIN_BUS_PCI_DEVICE_MSIX:
> - info->chip->irq_mask = imsic_pci_mask_irq;
> - info->chip->irq_unmask = imsic_pci_unmask_irq;
> - break;
> -#endif
> - case DOMAIN_BUS_DEVICE_MSI:
> - /*
> - * Per-device MSI should never have any MSI feature bits
> - * set. It's sole purpose is to create a dumb interrupt
> - * chip which has a device specific irq_write_msi_msg()
> - * callback.
> - */
> - if (WARN_ON_ONCE(info->flags))
> - return false;
> -
> - /* Core managed MSI descriptors */
> - info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS |
> - MSI_FLAG_FREE_MSI_DESCS;
> - break;
> - case DOMAIN_BUS_WIRED_TO_MSI:
> - break;
> - default:
> - WARN_ON_ONCE(1);
> - return false;
> - }
> -
> - /* Use hierarchial chip operations re-trigger */
> - info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
> -
> - /*
> - * Mask out the domain specific MSI feature flags which are not
> - * supported by the real parent.
> - */
> - info->flags &= pops->supported_flags;
> -
> - /* Enforce the required flags */
> - info->flags |= pops->required_flags;
> -
> - return true;
> -}
> -
> -#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
> -
> static const struct msi_parent_ops imsic_msi_parent_ops = {
> .supported_flags = MSI_GENERIC_FLAGS_MASK |
> MSI_FLAG_PCI_MSIX,
> .required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
> - MSI_FLAG_USE_DEF_CHIP_OPS,
> + MSI_FLAG_USE_DEF_CHIP_OPS |
> + MSI_FLAG_PCI_MSI_MASK_PARENT,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
> - .init_dev_msi_info = imsic_init_dev_msi_info,
> + .init_dev_msi_info = msi_lib_init_dev_msi_info,
> };
>
> int imsic_irqdomain_init(void)
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -558,11 +558,21 @@ enum {
> MSI_FLAG_NO_AFFINITY = (1 << 21),
> };
>
> +/*
> + * Flags for msi_parent_ops::chip_flags
> + */
> +enum {
> + MSI_CHIP_FLAG_SET_EOI = (1 << 0),
> + MSI_CHIP_FLAG_SET_ACK = (1 << 1),
> +};
> +
> /**
> * struct msi_parent_ops - MSI parent domain callbacks and configuration info
> *
> * @supported_flags: Required: The supported MSI flags of the parent domain
> * @required_flags: Optional: The required MSI flags of the parent MSI domain
> + * @chip_flags: Optional: Select MSI chip callbacks to update with defaults
> + * in msi_lib_init_dev_msi_info().
> * @bus_select_token: Optional: The bus token of the real parent domain for
> * irq_domain::select()
> * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
> @@ -575,6 +585,7 @@ enum {
> struct msi_parent_ops {
> u32 supported_flags;
> u32 required_flags;
> + u32 chip_flags;
> u32 bus_select_token;
> u32 bus_select_mask;
> const char *prefix;
On Tue, Dec 03 2024 at 22:07, Anup Patel wrote: > On Tue, Dec 3, 2024 at 7:23 PM Thomas Gleixner <tglx@linutronix.de> wrote: >> Sorry, I missed that when reviewing the original IMSIC MSI support. >> >> The whole IMSIC MSI support can be moved over to MSI LIB which makes all >> of this indirection go away and your intermediate domain will just fit >> in. >> >> Uncompiled patch below. If that works, it needs to be split up properly. >> >> Note, this removes the setup of the irq_retrigger callback, but that's >> fine because on hierarchical domains irq_chip_retrigger_hierarchy() is >> invoked anyway. See try_retrigger(). > > The IMSIC driver was merged one kernel release before common > MSI LIB was merged. Ah indeed. > We should definitely update the IMSIC driver to use MSI LIB, I will > try your suggested changes (below) and post a separate series. Pick up the delta patch I gave Andrew...
On Tue, Dec 03 2024 at 21:55, Thomas Gleixner wrote:
> On Tue, Dec 03 2024 at 22:07, Anup Patel wrote:
>> On Tue, Dec 3, 2024 at 7:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>>> Sorry, I missed that when reviewing the original IMSIC MSI support.
>>>
>>> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
>>> of this indirection go away and your intermediate domain will just fit
>>> in.
>>>
>>> Uncompiled patch below. If that works, it needs to be split up properly.
>>>
>>> Note, this removes the setup of the irq_retrigger callback, but that's
>>> fine because on hierarchical domains irq_chip_retrigger_hierarchy() is
>>> invoked anyway. See try_retrigger().
>>
>> The IMSIC driver was merged one kernel release before common
>> MSI LIB was merged.
>
> Ah indeed.
>
>> We should definitely update the IMSIC driver to use MSI LIB, I will
>> try your suggested changes (below) and post a separate series.
>
> Pick up the delta patch I gave Andrew...
As I was looking at something else MSI related I had a look at
imsic_irq_set_affinity() again.
It's actually required to have the message write in that function and
not afterwards as you invoke imsic_vector_move() from that function.
That's obviously not true for the remap case as that will not change the
message address/data pair because the remap table entry is immutable -
at least I assume so for my mental sanity sake :)
But that brings me to a related question. How is this supposed to work
with non-atomic message updates? PCI/MSI does not necessarily provide
masking, and the write of the address/data pair is done in bits and
pieces. So you can end up with an intermediate state seen by the device
which ends up somewhere in interrupt nirvana space.
See the dance in msi_set_affinity() and commit 6f1a4891a592
("x86/apic/msi: Plug non-maskable MSI affinity race") for further
explanation.
The way how the IMSIC driver works seems to be pretty much the same as
the x86 APIC mess:
@address is the physical address of the per CPU MSI target
address and @data is the vector ID on that CPU.
So the non-atomic update in case of non-maskable MSI suffers from the
same problem. It works most of the time, but if it doesn't you might
stare at the occasionally lost interrupt and the stale device in
disbelief for quite a while :)
I might be missing something which magically prevent that though :)
Thanks,
tglx
On Wed, Dec 4, 2024 at 4:29 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Dec 03 2024 at 21:55, Thomas Gleixner wrote:
> > On Tue, Dec 03 2024 at 22:07, Anup Patel wrote:
> >> On Tue, Dec 3, 2024 at 7:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
> >>> Sorry, I missed that when reviewing the original IMSIC MSI support.
> >>>
> >>> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
> >>> of this indirection go away and your intermediate domain will just fit
> >>> in.
> >>>
> >>> Uncompiled patch below. If that works, it needs to be split up properly.
> >>>
> >>> Note, this removes the setup of the irq_retrigger callback, but that's
> >>> fine because on hierarchical domains irq_chip_retrigger_hierarchy() is
> >>> invoked anyway. See try_retrigger().
> >>
> >> The IMSIC driver was merged one kernel release before common
> >> MSI LIB was merged.
> >
> > Ah indeed.
> >
> >> We should definitely update the IMSIC driver to use MSI LIB, I will
> >> try your suggested changes (below) and post a separate series.
> >
> > Pick up the delta patch I gave Andrew...
>
> As I was looking at something else MSI related I had a look at
> imsic_irq_set_affinity() again.
>
> It's actually required to have the message write in that function and
> not afterwards as you invoke imsic_vector_move() from that function.
>
> That's obviously not true for the remap case as that will not change the
> message address/data pair because the remap table entry is immutable -
> at least I assume so for my mental sanity sake :)
>
> But that brings me to a related question. How is this supposed to work
> with non-atomic message updates? PCI/MSI does not necessarily provide
> masking, and the write of the address/data pair is done in bits and
> pieces. So you can end up with an intermediate state seen by the device
> which ends up somewhere in interrupt nirvana space.
>
> See the dance in msi_set_affinity() and commit 6f1a4891a592
> ("x86/apic/msi: Plug non-maskable MSI affinity race") for further
> explanation.
>
> The way how the IMSIC driver works seems to be pretty much the same as
> the x86 APIC mess:
>
> @address is the physical address of the per CPU MSI target
> address and @data is the vector ID on that CPU.
>
> So the non-atomic update in case of non-maskable MSI suffers from the
> same problem. It works most of the time, but if it doesn't you might
> stare at the occasionally lost interrupt and the stale device in
> disbelief for quite a while :)
Yes, we have the same challenges as x86 APIC when changing
MSI affinity.
>
> I might be missing something which magically prevent that though :)
>
Your understanding is correct. In fact, the IMSIC msi_set_affinity()
handling is inspired from x86 APIC approach due to similarity in
the overall MSI controller.
The high-level idea of imsic_irq_set_affinity() is as follows:
1) Allocate new_vector (new CPU IMSIC address + new ID on that CPU)
2) Update the MSI address and data programmed in the device
based on new_vector (see imsic_msi_update_msg())
3) At this point the device points to the new_vector but old_vector
(old CPU IMSIC address + old ID on that CPU) is still enabled and
we might have received MSI on old_vector while we were busy
setting up a new_vector for the device. To address this, we call
imsic_vector_move().
4) The imsic_vector_move() marks the old_vector as being
moved and schedules a lazy timer on the old CPU.
5) The lazy timer expires on the old CPU and results in
__imsic_local_sync() being called on the old CPU.
6) If there was a pending MSI on the old vector then the
__imsic_local_sync() function injects an MSI to the
new_vector using an MMIO write.
It is very unlikely that an MSI from device will be dropped
(unless I am missing something) but the unsolved issue
is that handling of in-flight MSI received on the old_vector
during the MSI re-programming is delayed which may have
side effects on the device driver side.
I believe in the future RISC-V AIA v2.0 (whenever that
happens) will address the gaps in AIA v1.0 (like this one).
Regards,
Anup
On Wed, Dec 04 2024 at 09:13, Anup Patel wrote:
> On Wed, Dec 4, 2024 at 4:29 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>> As I was looking at something else MSI related I had a look at
>> imsic_irq_set_affinity() again.
>>
>> It's actually required to have the message write in that function and
>> not afterwards as you invoke imsic_vector_move() from that function.
>>
>> That's obviously not true for the remap case as that will not change the
>> message address/data pair because the remap table entry is immutable -
>> at least I assume so for my mental sanity sake :)
>>
>> But that brings me to a related question. How is this supposed to work
>> with non-atomic message updates? PCI/MSI does not necessarily provide
>> masking, and the write of the address/data pair is done in bits and
>> pieces. So you can end up with an intermediate state seen by the device
>> which ends up somewhere in interrupt nirvana space.
>>
>> See the dance in msi_set_affinity() and commit 6f1a4891a592
>> ("x86/apic/msi: Plug non-maskable MSI affinity race") for further
>> explanation.
>>
>> The way how the IMSIC driver works seems to be pretty much the same as
>> the x86 APIC mess:
>>
>> @address is the physical address of the per CPU MSI target
>> address and @data is the vector ID on that CPU.
>>
>> So the non-atomic update in case of non-maskable MSI suffers from the
>> same problem. It works most of the time, but if it doesn't you might
>> stare at the occasionally lost interrupt and the stale device in
>> disbelief for quite a while :)
>
> Yes, we have the same challenges as x86 APIC when changing
> MSI affinity.
>>
>> I might be missing something which magically prevent that though :)
>>
> Your understanding is correct. In fact, the IMSIC msi_set_affinity()
> handling is inspired from x86 APIC approach due to similarity in
> the overall MSI controller.
>
> The high-level idea of imsic_irq_set_affinity() is as follows:
>
> 1) Allocate new_vector (new CPU IMSIC address + new ID on that CPU)
>
> 2) Update the MSI address and data programmed in the device
> based on new_vector (see imsic_msi_update_msg())
>
> 3) At this point the device points to the new_vector but old_vector
> (old CPU IMSIC address + old ID on that CPU) is still enabled and
> we might have received MSI on old_vector while we were busy
> setting up a new_vector for the device. To address this, we call
> imsic_vector_move().
>
> 4) The imsic_vector_move() marks the old_vector as being
> moved and schedules a lazy timer on the old CPU.
>
> 5) The lazy timer expires on the old CPU and results in
> __imsic_local_sync() being called on the old CPU.
>
> 6) If there was a pending MSI on the old vector then the
> __imsic_local_sync() function injects an MSI to the
> new_vector using an MMIO write.
>
> It is very unlikely that an MSI from device will be dropped
> (unless I am missing something) but the unsolved issue
> is that handling of in-flight MSI received on the old_vector
> during the MSI re-programming is delayed which may have
> side effects on the device driver side.
Interrupt delivery can be delayed for tons of other reasons.
But yes, you are missing something which is worse than a jiffie delay:
CPU Device
msi_update_msg()
compose()
write()
write(msg->address_lo); [1]
write(msg->address_hi); [2]
Raises interrupt [3]
write(msg->data); [4]
[2] can be ignored as it should not change (otherwise 32bit only devices
would not work).
Lets assume that the original message was:
addr_lo = 0x1000, data = 0x10 (CPU1, vector 0x10)
The new message is
addr_lo = 0x2000, data = 0x20 (CPU2, vector 0x20)
After [2] the device sees:
addr_lo = 0x2000, data = 0x10 (CPU2, vector 0x10)
The interrupt raised in [3] will end up on CPU2 at vector 0x10 which
might be not in use or used by some other device. In any case the
interrupt is not reaching the real device handler and you can't see that
interrupt as pending in CPU1.
That's why x86 in case of non-remapped interrupts has this for the two
situations:
1) old->data == new->data
write_msg(new);
The next interrupt either arrives on the old CPU or on the new CPU
depending on timing. There is no intermediate state because the
vector (data) is the same on both CPUs.
2) old->data != new->data
tmp_msg.addr = old_msg.addr
tmp_msg.data = new_msg.data
write_msg(tmp_msg);
So after that write the device might raise the interrupt on CPU1
and vector 0x20.
The next step is
write_msg(new);
which changes the destination CPU to CPU2.
So depending what the device has observed the interrupt might end
up on
CPU1 vector 0x10 (old)
CPU1 vector 0x20 (tmp)
CPU2 vector 0x20 (new)
CPU1 vector 0x20 (tmp) is then checked in the pending register and
the interrupt is retriggered if pending.
That requires to move the interrupt from actual interrupt context on the
old target CPU. It allows to evaluate the old target CPUs pending
register with local interrupts disabled, which obviously does not work
remote.
I don't see a way how that can work remote with the IMSIC either even if
you can easily access the pending state of the remote CPU:
CPU0 CPU1 Device
set_affinity()
write_msg(tmp)
write(addr); // CPU1
write(data); // vector 0x20
raise IRQ
handle vector 0x20
(spurious or other device)
check_pending(CPU1, 0x20) == false -> Interrupt is lost
Remapped interrupts do not have that issue because the table update is
atomic vs. a concurrently raised interrupt, so that it either ends up on
the old or on the new destination. The device message does not change as
it always targets the table entry, which is immutable after setup.
That's why x86 has this special msi affinity setter for the non remapped
case. For the remapped case it's just using the hierarchy default which
ends up at the remap domain.
So the hierarchies look like this:
1) non-remap
PCI/MSI device domain
irq_chip.irq_set_affinity = msi_set_affinity;
VECTOR domain
2) remap
PCI/MSI device domain
irq_chip.irq_set_affinity = msi_domain_set_affinity;
REMAP domain
irq_chip.irq_set_affinity = remap_set_affinity;
VECTOR domain
The special case of msi_set_affinity() is not involved in the remap case
and solely utilized for the non-remap horrors. The remap case does not
require the interrupt to be moved in interrupt context either because
there is no intermediate step and pending register check on the old CPU
involved.
The vector cleanup of the old CPU always happens when the interrupt
arrives on the new target for the first time independent of remapping
mode. That's required for both cases to take care of the case where the
interrupt is raised on the old vector (cpu1, 0x10) before the write
happens and is pending in CPU1. So after desc::lock is released and
interrupts are reenabled the interrupt is handled on CPU1. The next one
is guaranteed to arrive on CPU2 and that triggers the clean up of the
CPU1 vector, which releases it for reuse in the matrix allocator.
I think you should model it in a similar way instead of trying to
artificially reuse imsic_irq_set_affinity() for the remap case,
especially when you decide to close the non-maskable MSI hole described
above, which I recommend to do :)
You still can utilize msi-lib and just have your private implementation
of init_dev_msi_info() as a wrapper around the library similar to
mbi_init_dev_msi_info() and its_init_dev_msi_info(). That wrapper would
just handle the non-remap case to set info::chip:irq_set_affinity to the
magic non-remap function.
Hope that helps.
> I believe in the future RISC-V AIA v2.0 (whenever that
> happens) will address the gaps in AIA v1.0 (like this one).
If that is strictly translation table based, yes.
Thanks,
tglx
On Tue, Dec 03, 2024 at 02:53:45PM +0100, Thomas Gleixner wrote:
> On Thu, Nov 14 2024 at 17:18, Andrew Jones wrote:
> > @@ -96,9 +96,8 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> > bool force)
> > {
> > struct imsic_vector *old_vec, *new_vec;
> > - struct irq_data *pd = d->parent_data;
> >
> > - old_vec = irq_data_get_irq_chip_data(pd);
> > + old_vec = irq_data_get_irq_chip_data(d);
> > if (WARN_ON(!old_vec))
> > return -ENOENT;
> >
> > @@ -116,13 +115,13 @@ static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask
> > return -ENOSPC;
> >
> > /* Point device to the new vector */
> > - imsic_msi_update_msg(d, new_vec);
> > + imsic_msi_update_msg(irq_get_irq_data(d->irq), new_vec);
>
> This looks more than fishy. See below.
>
> > @@ -245,7 +247,7 @@ static bool imsic_init_dev_msi_info(struct device *dev,
> > if (WARN_ON_ONCE(domain != real_parent))
> > return false;
> > #ifdef CONFIG_SMP
> > - info->chip->irq_set_affinity = imsic_irq_set_affinity;
> > + info->chip->irq_set_affinity = irq_chip_set_affinity_parent;
>
> This should use msi_domain_set_affinity(), which does the right thing:
>
> 1) It invokes the irq_set_affinity() callback of the parent domain
>
> 2) It composes the message via the hierarchy
>
> 3) It writes the message with the msi_write_msg() callback of the top
> level domain
>
> Sorry, I missed that when reviewing the original IMSIC MSI support.
>
> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
> of this indirection go away and your intermediate domain will just fit
> in.
>
> Uncompiled patch below. If that works, it needs to be split up properly.
Thanks Thomas. I gave your patch below a go, but we now fail to have an
msi domain set up when probing devices which go through aplic_msi_setup(),
resulting in an immediate NULL deference in
msi_create_device_irq_domain(). I'll look closer tomorrow.
Thanks,
drew
>
> Note, this removes the setup of the irq_retrigger callback, but that's
> fine because on hierarchical domains irq_chip_retrigger_hierarchy() is
> invoked anyway. See try_retrigger().
>
> Thanks,
>
> tglx
> ---
> drivers/irqchip/Kconfig | 1
> drivers/irqchip/irq-gic-v2m.c | 1
> drivers/irqchip/irq-imx-mu-msi.c | 1
> drivers/irqchip/irq-msi-lib.c | 11 +-
> drivers/irqchip/irq-mvebu-gicp.c | 1
> drivers/irqchip/irq-mvebu-odmi.c | 1
> drivers/irqchip/irq-mvebu-sei.c | 1
> drivers/irqchip/irq-riscv-imsic-platform.c | 131 +----------------------------
> include/linux/msi.h | 11 ++
> 9 files changed, 32 insertions(+), 127 deletions(-)
>
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -587,6 +587,7 @@ config RISCV_IMSIC
> select IRQ_DOMAIN_HIERARCHY
> select GENERIC_IRQ_MATRIX_ALLOCATOR
> select GENERIC_MSI_IRQ
> + select IRQ_MSI_LIB
>
> config RISCV_IMSIC_PCI
> bool
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -255,6 +255,7 @@ static void __init gicv2m_teardown(void)
> static struct msi_parent_ops gicv2m_msi_parent_ops = {
> .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED,
> .required_flags = GICV2M_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
> .prefix = "GICv2m-",
> --- a/drivers/irqchip/irq-imx-mu-msi.c
> +++ b/drivers/irqchip/irq-imx-mu-msi.c
> @@ -214,6 +214,7 @@ static void imx_mu_msi_irq_handler(struc
> static const struct msi_parent_ops imx_mu_msi_parent_ops = {
> .supported_flags = IMX_MU_MSI_FLAGS_SUPPORTED,
> .required_flags = IMX_MU_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "MU-MSI-",
> --- a/drivers/irqchip/irq-msi-lib.c
> +++ b/drivers/irqchip/irq-msi-lib.c
> @@ -28,6 +28,7 @@ bool msi_lib_init_dev_msi_info(struct de
> struct msi_domain_info *info)
> {
> const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
> + struct irq_chip *chip = info->chip;
> u32 required_flags;
>
> /* Parent ops available? */
> @@ -92,10 +93,10 @@ bool msi_lib_init_dev_msi_info(struct de
> info->flags |= required_flags;
>
> /* Chip updates for all child bus types */
> - if (!info->chip->irq_eoi)
> - info->chip->irq_eoi = irq_chip_eoi_parent;
> - if (!info->chip->irq_ack)
> - info->chip->irq_ack = irq_chip_ack_parent;
> + if (!chip->irq_eoi && (pops->chip_flags & MSI_CHIP_FLAG_SET_EOI))
> + chip->irq_eoi = irq_chip_eoi_parent;
> + if (!chip->irq_ack && (pops->chip_flags & MSI_CHIP_FLAG_SET_ACK))
> + chip->irq_ack = irq_chip_ack_parent;
>
> /*
> * The device MSI domain can never have a set affinity callback. It
> @@ -105,7 +106,7 @@ bool msi_lib_init_dev_msi_info(struct de
> * device MSI domain aside of mask/unmask which is provided e.g. by
> * PCI/MSI device domains.
> */
> - info->chip->irq_set_affinity = msi_domain_set_affinity;
> + chip->irq_set_affinity = msi_domain_set_affinity;
> return true;
> }
> EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
> --- a/drivers/irqchip/irq-mvebu-gicp.c
> +++ b/drivers/irqchip/irq-mvebu-gicp.c
> @@ -161,6 +161,7 @@ static const struct irq_domain_ops gicp_
> static const struct msi_parent_ops gicp_msi_parent_ops = {
> .supported_flags = GICP_MSI_FLAGS_SUPPORTED,
> .required_flags = GICP_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "GICP-",
> --- a/drivers/irqchip/irq-mvebu-odmi.c
> +++ b/drivers/irqchip/irq-mvebu-odmi.c
> @@ -157,6 +157,7 @@ static const struct irq_domain_ops odmi_
> static const struct msi_parent_ops odmi_msi_parent_ops = {
> .supported_flags = ODMI_MSI_FLAGS_SUPPORTED,
> .required_flags = ODMI_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .prefix = "ODMI-",
> --- a/drivers/irqchip/irq-mvebu-sei.c
> +++ b/drivers/irqchip/irq-mvebu-sei.c
> @@ -356,6 +356,7 @@ static void mvebu_sei_reset(struct mvebu
> static const struct msi_parent_ops sei_msi_parent_ops = {
> .supported_flags = SEI_MSI_FLAGS_SUPPORTED,
> .required_flags = SEI_MSI_FLAGS_REQUIRED,
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK,
> .bus_select_mask = MATCH_PLATFORM_MSI,
> .bus_select_token = DOMAIN_BUS_GENERIC_MSI,
> .prefix = "SEI-",
> --- a/drivers/irqchip/irq-riscv-imsic-platform.c
> +++ b/drivers/irqchip/irq-riscv-imsic-platform.c
> @@ -21,6 +21,7 @@
> #include <linux/smp.h>
>
> #include "irq-riscv-imsic-state.h"
> +#include "irq-msi-lib.h"
>
> static bool imsic_cpu_page_phys(unsigned int cpu, unsigned int guest_index,
> phys_addr_t *out_msi_pa)
> @@ -84,19 +85,10 @@ static void imsic_irq_compose_msg(struct
> }
>
> #ifdef CONFIG_SMP
> -static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
> -{
> - struct msi_msg msg = { };
> -
> - imsic_irq_compose_vector_msg(vec, &msg);
> - irq_data_get_irq_chip(d)->irq_write_msi_msg(d, &msg);
> -}
> -
> static int imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
> bool force)
> {
> struct imsic_vector *old_vec, *new_vec;
> - struct irq_data *pd = d->parent_data;
>
> old_vec = irq_data_get_irq_chip_data(pd);
> if (WARN_ON(!old_vec))
> @@ -115,14 +107,11 @@ static int imsic_irq_set_affinity(struct
> if (!new_vec)
> return -ENOSPC;
>
> - /* Point device to the new vector */
> - imsic_msi_update_msg(d, new_vec);
> -
> /* Update irq descriptors with the new vector */
> - pd->chip_data = new_vec;
> + d->chip_data = new_vec;
>
> /* Update effective affinity of parent irq data */
> - irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
> + irq_data_update_effective_affinity(d, cpumask_of(new_vec->cpu));
>
> /* Move state of the old vector to the new vector */
> imsic_vector_move(old_vec, new_vec);
> @@ -137,6 +126,9 @@ static struct irq_chip imsic_irq_base_ch
> .irq_unmask = imsic_irq_unmask,
> .irq_retrigger = imsic_irq_retrigger,
> .irq_compose_msi_msg = imsic_irq_compose_msg,
> +#ifdef CONFIG_SMP
> + .irq_set_affinity = imsic_irq_set_affinity,
> +#endif
> .flags = IRQCHIP_SKIP_SET_WAKE |
> IRQCHIP_MASK_ON_SUSPEND,
> };
> @@ -172,22 +164,6 @@ static void imsic_irq_domain_free(struct
> irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> }
>
> -static int imsic_irq_domain_select(struct irq_domain *domain, struct irq_fwspec *fwspec,
> - enum irq_domain_bus_token bus_token)
> -{
> - const struct msi_parent_ops *ops = domain->msi_parent_ops;
> - u32 busmask = BIT(bus_token);
> -
> - if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0)
> - return 0;
> -
> - /* Handle pure domain searches */
> - if (bus_token == ops->bus_select_token)
> - return 1;
> -
> - return !!(ops->bus_select_mask & busmask);
> -}
> -
> #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
> struct irq_data *irqd, int ind)
> @@ -210,104 +186,15 @@ static const struct irq_domain_ops imsic
> #endif
> };
>
> -#ifdef CONFIG_RISCV_IMSIC_PCI
> -
> -static void imsic_pci_mask_irq(struct irq_data *d)
> -{
> - pci_msi_mask_irq(d);
> - irq_chip_mask_parent(d);
> -}
> -
> -static void imsic_pci_unmask_irq(struct irq_data *d)
> -{
> - irq_chip_unmask_parent(d);
> - pci_msi_unmask_irq(d);
> -}
> -
> -#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI)
> -
> -#else
> -
> -#define MATCH_PCI_MSI 0
> -
> -#endif
> -
> -static bool imsic_init_dev_msi_info(struct device *dev,
> - struct irq_domain *domain,
> - struct irq_domain *real_parent,
> - struct msi_domain_info *info)
> -{
> - const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
> -
> - /* MSI parent domain specific settings */
> - switch (real_parent->bus_token) {
> - case DOMAIN_BUS_NEXUS:
> - if (WARN_ON_ONCE(domain != real_parent))
> - return false;
> -#ifdef CONFIG_SMP
> - info->chip->irq_set_affinity = imsic_irq_set_affinity;
> -#endif
> - break;
> - default:
> - WARN_ON_ONCE(1);
> - return false;
> - }
> -
> - /* Is the target supported? */
> - switch (info->bus_token) {
> -#ifdef CONFIG_RISCV_IMSIC_PCI
> - case DOMAIN_BUS_PCI_DEVICE_MSI:
> - case DOMAIN_BUS_PCI_DEVICE_MSIX:
> - info->chip->irq_mask = imsic_pci_mask_irq;
> - info->chip->irq_unmask = imsic_pci_unmask_irq;
> - break;
> -#endif
> - case DOMAIN_BUS_DEVICE_MSI:
> - /*
> - * Per-device MSI should never have any MSI feature bits
> - * set. It's sole purpose is to create a dumb interrupt
> - * chip which has a device specific irq_write_msi_msg()
> - * callback.
> - */
> - if (WARN_ON_ONCE(info->flags))
> - return false;
> -
> - /* Core managed MSI descriptors */
> - info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS |
> - MSI_FLAG_FREE_MSI_DESCS;
> - break;
> - case DOMAIN_BUS_WIRED_TO_MSI:
> - break;
> - default:
> - WARN_ON_ONCE(1);
> - return false;
> - }
> -
> - /* Use hierarchial chip operations re-trigger */
> - info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
> -
> - /*
> - * Mask out the domain specific MSI feature flags which are not
> - * supported by the real parent.
> - */
> - info->flags &= pops->supported_flags;
> -
> - /* Enforce the required flags */
> - info->flags |= pops->required_flags;
> -
> - return true;
> -}
> -
> -#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
> -
> static const struct msi_parent_ops imsic_msi_parent_ops = {
> .supported_flags = MSI_GENERIC_FLAGS_MASK |
> MSI_FLAG_PCI_MSIX,
> .required_flags = MSI_FLAG_USE_DEF_DOM_OPS |
> - MSI_FLAG_USE_DEF_CHIP_OPS,
> + MSI_FLAG_USE_DEF_CHIP_OPS |
> + MSI_FLAG_PCI_MSI_MASK_PARENT,
> .bus_select_token = DOMAIN_BUS_NEXUS,
> .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI,
> - .init_dev_msi_info = imsic_init_dev_msi_info,
> + .init_dev_msi_info = msi_lib_init_dev_msi_info,
> };
>
> int imsic_irqdomain_init(void)
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -558,11 +558,21 @@ enum {
> MSI_FLAG_NO_AFFINITY = (1 << 21),
> };
>
> +/*
> + * Flags for msi_parent_ops::chip_flags
> + */
> +enum {
> + MSI_CHIP_FLAG_SET_EOI = (1 << 0),
> + MSI_CHIP_FLAG_SET_ACK = (1 << 1),
> +};
> +
> /**
> * struct msi_parent_ops - MSI parent domain callbacks and configuration info
> *
> * @supported_flags: Required: The supported MSI flags of the parent domain
> * @required_flags: Optional: The required MSI flags of the parent MSI domain
> + * @chip_flags: Optional: Select MSI chip callbacks to update with defaults
> + * in msi_lib_init_dev_msi_info().
> * @bus_select_token: Optional: The bus token of the real parent domain for
> * irq_domain::select()
> * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
> @@ -575,6 +585,7 @@ enum {
> struct msi_parent_ops {
> u32 supported_flags;
> u32 required_flags;
> + u32 chip_flags;
> u32 bus_select_token;
> u32 bus_select_mask;
> const char *prefix;
On Tue, Dec 03 2024 at 17:27, Andrew Jones wrote:
> On Tue, Dec 03, 2024 at 02:53:45PM +0100, Thomas Gleixner wrote:
>> On Thu, Nov 14 2024 at 17:18, Andrew Jones wrote:
>> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
>> of this indirection go away and your intermediate domain will just fit
>> in.
>>
>> Uncompiled patch below. If that works, it needs to be split up properly.
>
> Thanks Thomas. I gave your patch below a go, but we now fail to have an
> msi domain set up when probing devices which go through aplic_msi_setup(),
> resulting in an immediate NULL deference in
> msi_create_device_irq_domain(). I'll look closer tomorrow.
Duh! I forgot to update the .select callback. I don't know how you fixed that
compile fail up. Delta patch below.
Thanks,
tglx
---
--- a/drivers/irqchip/irq-riscv-imsic-platform.c
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -180,7 +180,7 @@ static void imsic_irq_debug_show(struct
static const struct irq_domain_ops imsic_base_domain_ops = {
.alloc = imsic_irq_domain_alloc,
.free = imsic_irq_domain_free,
- .select = imsic_irq_domain_select,
+ .select = msi_lib_irq_domain_select,
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
.debug_show = imsic_irq_debug_show,
#endif
On Tue, Dec 03, 2024 at 05:50:13PM +0100, Thomas Gleixner wrote:
> On Tue, Dec 03 2024 at 17:27, Andrew Jones wrote:
> > On Tue, Dec 03, 2024 at 02:53:45PM +0100, Thomas Gleixner wrote:
> >> On Thu, Nov 14 2024 at 17:18, Andrew Jones wrote:
> >> The whole IMSIC MSI support can be moved over to MSI LIB which makes all
> >> of this indirection go away and your intermediate domain will just fit
> >> in.
> >>
> >> Uncompiled patch below. If that works, it needs to be split up properly.
> >
> > Thanks Thomas. I gave your patch below a go, but we now fail to have an
> > msi domain set up when probing devices which go through aplic_msi_setup(),
> > resulting in an immediate NULL deference in
> > msi_create_device_irq_domain(). I'll look closer tomorrow.
>
> Duh! I forgot to update the .select callback. I don't know how you fixed that
> compile fail up. Delta patch below.
>
> Thanks,
>
> tglx
> ---
> --- a/drivers/irqchip/irq-riscv-imsic-platform.c
> +++ b/drivers/irqchip/irq-riscv-imsic-platform.c
> @@ -180,7 +180,7 @@ static void imsic_irq_debug_show(struct
> static const struct irq_domain_ops imsic_base_domain_ops = {
> .alloc = imsic_irq_domain_alloc,
> .free = imsic_irq_domain_free,
> - .select = imsic_irq_domain_select,
> + .select = msi_lib_irq_domain_select,
> #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> .debug_show = imsic_irq_debug_show,
> #endif
Hi Thomas,
With this select fix and replacing patch 03/15 of this series with the
following diff, this irqbypass PoC still works.
Based on what Anup said, I kept imsic_msi_update_msg(), which means I
kept this entire patch (01/15) as is. Anup is working on a series to fix
the non-atomic MSI message writes to the device and will likely pick this
patch up along with your changes to convert IMSIC to msi-lib.
I'd like to know your opinion on patch 02/15 of this series and the diff
below. afaict, x86 does something similar with the DOMAIN_BUS_DMAR and
DOMAIN_BUS_AMDVI tokens in x86_init_dev_msi_info().
Thanks,
drew
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index 51464c6257f3..cc18516a4e82 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -36,14 +36,14 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
return false;
/*
- * MSI parent domain specific settings. For now there is only the
- * root parent domain, e.g. NEXUS, acting as a MSI parent, but it is
- * possible to stack MSI parents. See x86 vector -> irq remapping
+ * MSI parent domain specific settings. There may be only the root
+ * parent domain, e.g. NEXUS, acting as a MSI parent, or there may
+ * be stacked MSI parents, typically used for remapping.
*/
if (domain->bus_token == pops->bus_select_token) {
if (WARN_ON_ONCE(domain != real_parent))
return false;
- } else {
+ } else if (real_parent->bus_token != DOMAIN_BUS_MSI_REMAP) {
WARN_ON_ONCE(1);
return false;
}
© 2016 - 2026 Red Hat, Inc.