[PATCH v2] genirq: Fix nested thread vs synchronize_hardirq() deadlock

Vincent Whitchurch posted 1 patch 2 years, 7 months ago
There is a newer version of this series
kernel/irq/chip.c   |  5 +++--
kernel/irq/manage.c | 26 +++++++++++++++-----------
2 files changed, 18 insertions(+), 13 deletions(-)
[PATCH v2] genirq: Fix nested thread vs synchronize_hardirq() deadlock
Posted by Vincent Whitchurch 2 years, 7 months ago
There is a possibility of deadlock if synchronize_hardirq() is called
when the nested threaded interrupt is active.  The following scenario
was observed on a uniprocessor PREEMPT_NONE system:

 Thread 1                      Thread 2

 handle_nested_thread()
  Set INPROGRESS
  Call ->thread_fn()
   thread_fn goes to sleep

                              free_irq()
                               __synchronize_hardirq()
                                Busy-loop forever waiting for INPROGRESS
                                to be cleared

The INPROGRESS flag is only supposed to be used for hard interrupt
handlers.  Remove the incorrect usage in the nested threaded interrupt
case and instead re-use the threads_active / wait_for_threads mechanism
to wait for nested threaded interrupts to complete.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
Changes in v2:
- Reword commit message.
- Refactor and reuse synchronize_irq() instead of ending up open coding
  it.
- Link to v1: https://lore.kernel.org/r/20230613-genirq-nested-v1-1-289dc15b7669@axis.com
---
 kernel/irq/chip.c   |  5 +++--
 kernel/irq/manage.c | 26 +++++++++++++++-----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 49e7bc871fece..3e4b4c6de8195 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -476,7 +476,7 @@ void handle_nested_irq(unsigned int irq)
 	}
 
 	kstat_incr_irqs_this_cpu(desc);
-	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+	atomic_inc(&desc->threads_active);
 	raw_spin_unlock_irq(&desc->lock);
 
 	action_ret = IRQ_NONE;
@@ -487,7 +487,8 @@ void handle_nested_irq(unsigned int irq)
 		note_interrupt(desc, action_ret);
 
 	raw_spin_lock_irq(&desc->lock);
-	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+	if (atomic_dec_and_test(&desc->threads_active))
+		wake_up(&desc->wait_for_threads);
 
 out_unlock:
 	raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d2742af0f0fd8..b38c2c7c5c705 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -108,6 +108,18 @@ bool synchronize_hardirq(unsigned int irq)
 }
 EXPORT_SYMBOL(synchronize_hardirq);
 
+static void __synchronize_irq(struct irq_desc *desc)
+{
+	__synchronize_hardirq(desc, true);
+	/*
+	 * We made sure that no hardirq handler is
+	 * running. Now verify that no threaded handlers are
+	 * active.
+	 */
+	wait_event(desc->wait_for_threads,
+		   !atomic_read(&desc->threads_active));
+}
+
 /**
  *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *	@irq: interrupt number to wait for
@@ -127,16 +139,8 @@ void synchronize_irq(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (desc) {
-		__synchronize_hardirq(desc, true);
-		/*
-		 * We made sure that no hardirq handler is
-		 * running. Now verify that no threaded handlers are
-		 * active.
-		 */
-		wait_event(desc->wait_for_threads,
-			   !atomic_read(&desc->threads_active));
-	}
+	if (desc)
+		__synchronize_irq(desc);
 }
 EXPORT_SYMBOL(synchronize_irq);
 
@@ -1944,7 +1948,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
 	 * supports it also make sure that there is no (not yet serviced)
 	 * interrupt in flight at the hardware level.
 	 */
-	__synchronize_hardirq(desc, true);
+	__synchronize_irq(desc);
 
 #ifdef CONFIG_DEBUG_SHIRQ
 	/*

---
base-commit: 858fd168a95c5b9669aac8db6c14a9aeab446375
change-id: 20230613-genirq-nested-625612a6fa05

Best regards,
-- 
Vincent Whitchurch <vincent.whitchurch@axis.com>
Re: [PATCH v2] genirq: Fix nested thread vs synchronize_hardirq() deadlock
Posted by Thomas Gleixner 2 years, 7 months ago
On Tue, Jun 20 2023 at 13:16, Vincent Whitchurch wrote:
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -476,7 +476,7 @@ void handle_nested_irq(unsigned int irq)
>  	}
>  
>  	kstat_incr_irqs_this_cpu(desc);
> -	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
> +	atomic_inc(&desc->threads_active);
>  	raw_spin_unlock_irq(&desc->lock);
>  
>  	action_ret = IRQ_NONE;
> @@ -487,7 +487,8 @@ void handle_nested_irq(unsigned int irq)
>  		note_interrupt(desc, action_ret);
>  
>  	raw_spin_lock_irq(&desc->lock);
> -	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
> +	if (atomic_dec_and_test(&desc->threads_active))
> +		wake_up(&desc->wait_for_threads);

This breaks on RT. The wakeup cannot be inside the raw spin-locked
region.

Also this is open coding wake_threads_waitq().
  
> +static void __synchronize_irq(struct irq_desc *desc)
> +{
> +	__synchronize_hardirq(desc, true);
> +	/*
> +	 * We made sure that no hardirq handler is
> +	 * running. Now verify that no threaded handlers are
> +	 * active.
> +	 */
> +	wait_event(desc->wait_for_threads,
> +		   !atomic_read(&desc->threads_active));

Splitting this out is fine. Not reformatting it not so much.

Thanks,

        tglx
Re: [PATCH v2] genirq: Fix nested thread vs synchronize_hardirq() deadlock
Posted by Vincent Whitchurch 2 years, 7 months ago
On Fri, 2023-06-30 at 11:07 +0200, Thomas Gleixner wrote:
> On Tue, Jun 20 2023 at 13:16, Vincent Whitchurch wrote:
> > --- a/kernel/irq/chip.c
> > +++ b/kernel/irq/chip.c
> > @@ -476,7 +476,7 @@ void handle_nested_irq(unsigned int irq)
> >  	}
> >  
> > 
> >  	kstat_incr_irqs_this_cpu(desc);
> > -	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
> > +	atomic_inc(&desc->threads_active);
> >  	raw_spin_unlock_irq(&desc->lock);
> >  
> > 
> >  	action_ret = IRQ_NONE;
> > @@ -487,7 +487,8 @@ void handle_nested_irq(unsigned int irq)
> >  		note_interrupt(desc, action_ret);
> >  
> > 
> >  	raw_spin_lock_irq(&desc->lock);
> > -	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
> > +	if (atomic_dec_and_test(&desc->threads_active))
> > +		wake_up(&desc->wait_for_threads);
> 
> This breaks on RT. The wakeup cannot be inside the raw spin-locked
> region.

OK.  I will remove the acquisition of the spin lock at the end of this
function since it was only used for protecting the irqd flags.

> Also this is open coding wake_threads_waitq().

OK, that's in manage.c so I'll make it non-static and add it to
internals.h and use it from here.

> > +static void __synchronize_irq(struct irq_desc *desc)
> > +{
> > +	__synchronize_hardirq(desc, true);
> > +	/*
> > +	 * We made sure that no hardirq handler is
> > +	 * running. Now verify that no threaded handlers are
> > +	 * active.
> > +	 */
> > +	wait_event(desc->wait_for_threads,
> > +		   !atomic_read(&desc->threads_active));
> 
> Splitting this out is fine. Not reformatting it not so much.

Will fix.