[v1] genirq: Add support for percpu_devid IRQ affinity

[PATCH 15/25] genirq: Allow per-cpu interrupt sharing for non-overlapping affinities

Posted by Marc Zyngier 5 months ago

Interrupt sharing for percpu-devid interrupts is forbidden, and
for good reasons. These are interrupts generated *from* a CPU and
handled by itself (timer, for example). Nobody in their right mind
would put two devices on the same pin (and if they have, they get to
keep the pieces...).

But this also prevents more benign cases, where devices are connected
to groups of CPUs, and for which the affinities are not overlapping.
Effectively, the only thing they share is the interrupt number, and
nothing else.

Let's tweak the definition of IRQF_SHARED applied to percpu_devid
interrupts to allow this particular case. This results in extra
validation at the point of the interrupt being setup and freed,
as well as a tiny bit of extra complexity for interrupts at handling
time (to pick the correct irqaction).

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 kernel/irq/chip.c   |  8 ++++--
 kernel/irq/manage.c | 67 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0d0276378c707..af90dd440d5ee 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -897,8 +897,9 @@ void handle_percpu_irq(struct irq_desc *desc)
 void handle_percpu_devid_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	struct irqaction *action = desc->action;
 	unsigned int irq = irq_desc_get_irq(desc);
+	unsigned int cpu = smp_processor_id();
+	struct irqaction *action;
 	irqreturn_t res;
 
 	/*
@@ -910,12 +911,15 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
 
+	for (action = desc->action; action; action = action->next)
+		if (cpumask_test_cpu(cpu, action->affinity))
+			break;
+
 	if (likely(action)) {
 		trace_irq_handler_entry(irq, action);
 		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
 		trace_irq_handler_exit(irq, action, res);
 	} else {
-		unsigned int cpu = smp_processor_id();
 		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
 
 		if (enabled)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a17ac522622e7..1ac79df0e5f76 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1418,6 +1418,19 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
 	return 0;
 }
 
+static bool valid_percpu_irqaction(struct irqaction *old, struct irqaction *new)
+{
+	do {
+		if (cpumask_intersects(old->affinity, new->affinity) ||
+		    old->percpu_dev_id == new->percpu_dev_id)
+			return false;
+
+		old = old->next;
+	} while (old);
+
+	return true;
+}
+
 /*
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
@@ -1438,6 +1451,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	struct irqaction *old, **old_ptr;
 	unsigned long flags, thread_mask = 0;
 	int ret, nested, shared = 0;
+	bool per_cpu_devid;
 
 	if (!desc)
 		return -EINVAL;
@@ -1447,6 +1461,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (!try_module_get(desc->owner))
 		return -ENODEV;
 
+	per_cpu_devid = irq_settings_is_per_cpu_devid(desc);
+
 	new->irq = irq;
 
 	/*
@@ -1554,13 +1570,20 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		unsigned int oldtype;
 
-		if (irq_is_nmi(desc)) {
+		if (irq_is_nmi(desc) && !per_cpu_devid) {
 			pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
 				new->name, irq, desc->irq_data.chip->name);
 			ret = -EINVAL;
 			goto out_unlock;
 		}
 
+		if (per_cpu_devid && !valid_percpu_irqaction(old, new)) {
+			pr_err("Overlapping affinities for %s (irq %d) on irqchip %s.\n",
+				new->name, irq, desc->irq_data.chip->name);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
 		/*
 		 * If nobody did set the configuration before, inherit
 		 * the one provided by the requester.
@@ -1711,7 +1734,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (!(new->flags & IRQF_NO_AUTOEN) &&
 		    irq_settings_can_autoenable(desc)) {
 			irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
-		} else {
+		} else if (!per_cpu_devid) {
 			/*
 			 * Shared interrupts do not go well with disabling
 			 * auto enable. The sharing interrupt might request
@@ -2346,7 +2369,7 @@ void disable_percpu_nmi(unsigned int irq)
 static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	struct irqaction *action;
+	struct irqaction *action, **action_ptr;
 
 	WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
 
@@ -2354,21 +2377,33 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 		return NULL;
 
 	scoped_guard(raw_spinlock_irqsave, &desc->lock) {
-		action = desc->action;
-		if (!action || action->percpu_dev_id != dev_id) {
-			WARN(1, "Trying to free already-free IRQ %d\n", irq);
-			return NULL;
+		action_ptr = &desc->action;
+		for (;;) {
+			action = *action_ptr;
+
+			if (!action) {
+				WARN(1, "Trying to free already-free IRQ %d\n", irq);
+				return NULL;
+			}
+
+			if (action->percpu_dev_id == dev_id)
+				break;
+
+			action_ptr = &action->next;
 		}
 
-		if (!cpumask_empty(desc->percpu_enabled)) {
-			WARN(1, "percpu IRQ %d still enabled on CPU%d!\n",
-			     irq, cpumask_first(desc->percpu_enabled));
+		if (cpumask_intersects(desc->percpu_enabled, action->affinity)) {
+			WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", irq,
+			     cpumask_first_and(desc->percpu_enabled, action->affinity));
 			return NULL;
 		}
 
 		/* Found it - now remove it from the list of entries: */
-		desc->action = NULL;
-		desc->istate &= ~IRQS_NMI;
+		*action_ptr = action->next;
+
+		/* Demote from NMI if we killed the last action */
+		if (!desc->action)
+			desc->istate &= ~IRQS_NMI;
 	}
 
 	unregister_handler_proc(irq, action);
@@ -2464,6 +2499,14 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler,
 	action->percpu_dev_id = dev_id;
 	action->affinity = affinity;
 
+	/*
+	 * We allow some form of sharing for non-overlapping affinity
+	 * masks. Obviously, covering all CPUs prevents any sharing
+	 * the first place.
+	 */
+	if (!cpumask_equal(affinity, cpu_possible_mask))
+		action->flags |= IRQF_SHARED;
+
 	return action;
 }
 
-- 
2.39.2

Re: [PATCH 15/25] genirq: Allow per-cpu interrupt sharing for non-overlapping affinities

Posted by Marc Zyngier 5 months ago

On Mon, 08 Sep 2025 17:31:17 +0100,
Marc Zyngier <maz@kernel.org> wrote:
> 
> Interrupt sharing for percpu-devid interrupts is forbidden, and
> for good reasons. These are interrupts generated *from* a CPU and
> handled by itself (timer, for example). Nobody in their right mind
> would put two devices on the same pin (and if they have, they get to
> keep the pieces...).
> 
> But this also prevents more benign cases, where devices are connected
> to groups of CPUs, and for which the affinities are not overlapping.
> Effectively, the only thing they share is the interrupt number, and
> nothing else.
> 
> Let's tweak the definition of IRQF_SHARED applied to percpu_devid
> interrupts to allow this particular case. This results in extra
> validation at the point of the interrupt being setup and freed,
> as well as a tiny bit of extra complexity for interrupts at handling
> time (to pick the correct irqaction).
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  kernel/irq/chip.c   |  8 ++++--
>  kernel/irq/manage.c | 67 +++++++++++++++++++++++++++++++++++++--------
>  2 files changed, 61 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
> index 0d0276378c707..af90dd440d5ee 100644
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -897,8 +897,9 @@ void handle_percpu_irq(struct irq_desc *desc)
>  void handle_percpu_devid_irq(struct irq_desc *desc)
>  {
>  	struct irq_chip *chip = irq_desc_get_chip(desc);
> -	struct irqaction *action = desc->action;
>  	unsigned int irq = irq_desc_get_irq(desc);
> +	unsigned int cpu = smp_processor_id();
> +	struct irqaction *action;
>  	irqreturn_t res;
>  
>  	/*
> @@ -910,12 +911,15 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
>  	if (chip->irq_ack)
>  		chip->irq_ack(&desc->irq_data);
>  
> +	for (action = desc->action; action; action = action->next)
> +		if (cpumask_test_cpu(cpu, action->affinity))
> +			break;
> +
>  	if (likely(action)) {
>  		trace_irq_handler_entry(irq, action);
>  		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
>  		trace_irq_handler_exit(irq, action, res);
>  	} else {
> -		unsigned int cpu = smp_processor_id();
>  		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
>  
>  		if (enabled)

As Will points out off the list, the above lacks the a similar
handling for percpu_devid NMIs, leading to NMIs that are only handled
on the first affinity group.

It's easy enough to move the above to common code and share it with
handle_percpu_devid_fasteoi_nmi(), but at this point there is hardly
any difference with handle_percpu_devid_irq().

Any objection to simply killing the NMI version?

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

Re: [PATCH 15/25] genirq: Allow per-cpu interrupt sharing for non-overlapping affinities

Posted by Thomas Gleixner 5 months ago

On Wed, Sep 10 2025 at 09:28, Marc Zyngier wrote:
> On Mon, 08 Sep 2025 17:31:17 +0100,
> As Will points out off the list, the above lacks the a similar
> handling for percpu_devid NMIs, leading to NMIs that are only handled
> on the first affinity group.
>
> It's easy enough to move the above to common code and share it with
> handle_percpu_devid_fasteoi_nmi(), but at this point there is hardly
> any difference with handle_percpu_devid_irq().
>
> Any objection to simply killing the NMI version?

Removing code is always appreciated :)

Re: [PATCH 15/25] genirq: Allow per-cpu interrupt sharing for non-overlapping affinities

Posted by Marc Zyngier 5 months ago

On Wed, 10 Sep 2025 15:47:01 +0100,
Thomas Gleixner <tglx@linutronix.de> wrote:
> 
> On Wed, Sep 10 2025 at 09:28, Marc Zyngier wrote:
> > On Mon, 08 Sep 2025 17:31:17 +0100,
> > As Will points out off the list, the above lacks the a similar
> > handling for percpu_devid NMIs, leading to NMIs that are only handled
> > on the first affinity group.
> >
> > It's easy enough to move the above to common code and share it with
> > handle_percpu_devid_fasteoi_nmi(), but at this point there is hardly
> > any difference with handle_percpu_devid_irq().
> >
> > Any objection to simply killing the NMI version?
> 
> Removing code is always appreciated :)
>

Works for me!

	M.

-- 
Without deviation from the norm, progress is not possible.