[PATCH] irqdomain: Fix mapping-creation race

Johan Hovold posted 1 patch 3 years, 8 months ago
There is a newer version of this series
kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 12 deletions(-)
[PATCH] irqdomain: Fix mapping-creation race
Posted by Johan Hovold 3 years, 8 months ago
Parallel probing (e.g. due to asynchronous probing) of devices that share
interrupts can currently result in two mappings for the same hardware
interrupt to be created.

Add a serialising mapping mutex so that looking for an existing mapping
before creating a new one is done atomically.

Note that serialising the lookup and creation in
irq_create_mapping_affinity() would have been enough to prevent the
duplicate mapping, but that could instead cause
irq_create_fwspec_mapping() to fail when there is a race.

Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: Dmitry Torokhov <dtor@chromium.org>
Cc: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
---
 kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fe1da9614ee..d263a7dd4170 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -22,6 +22,7 @@
 
 static LIST_HEAD(irq_domain_list);
 static DEFINE_MUTEX(irq_domain_mutex);
+static DEFINE_MUTEX(irq_mapping_mutex);
 
 static struct irq_domain *irq_default_domain;
 
@@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
 #endif
 
 /**
- * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
+ * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
  * @domain: domain owning this hardware interrupt or NULL for default domain
  * @hwirq: hardware irq number in that domain space
  * @affinity: irq affinity
@@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
  * If the sense/trigger is to be specified, set_irq_type() should be called
  * on the number returned from that call.
  */
-unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
-				       irq_hw_number_t hwirq,
-				       const struct irq_affinity_desc *affinity)
+static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+						  irq_hw_number_t hwirq,
+						  const struct irq_affinity_desc *affinity)
 {
 	struct device_node *of_node;
 	int virq;
@@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
 
 	return virq;
 }
+
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+					 irq_hw_number_t hwirq,
+					 const struct irq_affinity_desc *affinity)
+{
+	unsigned int virq;
+
+	mutex_lock(&irq_mapping_mutex);
+	virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
+	mutex_unlock(&irq_mapping_mutex);
+
+	return virq;
+}
 EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
 
 static int irq_domain_translate(struct irq_domain *d,
@@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 	if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
 		type &= IRQ_TYPE_SENSE_MASK;
 
+	mutex_lock(&irq_mapping_mutex);
+
 	/*
 	 * If we've already configured this interrupt,
 	 * don't do it again, or hell will break loose.
@@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 		 * interrupt number.
 		 */
 		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
-			return virq;
+			goto out;
 
 		/*
 		 * If the trigger type has not been set yet, then set
@@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 		if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
 			irq_data = irq_get_irq_data(virq);
 			if (!irq_data)
-				return 0;
+				goto err;
 
 			irqd_set_trigger_type(irq_data, type);
-			return virq;
+			goto out;
 		}
 
 		pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
 			hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
-		return 0;
+		goto err;
 	}
 
 	if (irq_domain_is_hierarchy(domain)) {
 		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
 		if (virq <= 0)
-			return 0;
+			goto err;
 	} else {
 		/* Create mapping */
-		virq = irq_create_mapping(domain, hwirq);
+		virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
 		if (!virq)
-			return virq;
+			goto err;
 	}
 
 	irq_data = irq_get_irq_data(virq);
@@ -838,13 +854,19 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 			irq_domain_free_irqs(virq, 1);
 		else
 			irq_dispose_mapping(virq);
-		return 0;
+		goto err;
 	}
 
 	/* Store trigger type */
 	irqd_set_trigger_type(irq_data, type);
+out:
+	mutex_unlock(&irq_mapping_mutex);
 
 	return virq;
+err:
+	mutex_unlock(&irq_mapping_mutex);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);
 
-- 
2.35.1
Re: [PATCH] irqdomain: Fix mapping-creation race
Posted by Marc Zyngier 3 years, 8 months ago
On Thu, 28 Jul 2022 10:27:10 +0100,
Johan Hovold <johan+linaro@kernel.org> wrote:
> 
> Parallel probing (e.g. due to asynchronous probing) of devices that share
> interrupts can currently result in two mappings for the same hardware
> interrupt to be created.

And I thought nobody would be using shared interrupts anymore. Turns
out people are still building braindead HW... :-/

> 
> Add a serialising mapping mutex so that looking for an existing mapping
> before creating a new one is done atomically.
> 
> Note that serialising the lookup and creation in
> irq_create_mapping_affinity() would have been enough to prevent the
> duplicate mapping, but that could instead cause
> irq_create_fwspec_mapping() to fail when there is a race.
> 
> Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> Cc: Dmitry Torokhov <dtor@chromium.org>
> Cc: Jon Hunter <jonathanh@nvidia.com>
> Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
> ---
>  kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index 8fe1da9614ee..d263a7dd4170 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -22,6 +22,7 @@
>  
>  static LIST_HEAD(irq_domain_list);
>  static DEFINE_MUTEX(irq_domain_mutex);
> +static DEFINE_MUTEX(irq_mapping_mutex);

I'd really like to avoid a global mutex. At the very least this should
be a per-domain mutex, otherwise this will serialise a lot more than
what is needed.

>  
>  static struct irq_domain *irq_default_domain;
>  
> @@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
>  #endif
>  
>  /**
> - * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> + * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
>   * @domain: domain owning this hardware interrupt or NULL for default domain
>   * @hwirq: hardware irq number in that domain space
>   * @affinity: irq affinity
> @@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
>   * If the sense/trigger is to be specified, set_irq_type() should be called
>   * on the number returned from that call.
>   */

This comment should be moved to the exported function, instead of
documenting something that nobody can call...

> -unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> -				       irq_hw_number_t hwirq,
> -				       const struct irq_affinity_desc *affinity)
> +static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
> +						  irq_hw_number_t hwirq,
> +						  const struct irq_affinity_desc *affinity)
>  {
>  	struct device_node *of_node;
>  	int virq;
> @@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
>  
>  	return virq;
>  }
> +
> +unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> +					 irq_hw_number_t hwirq,
> +					 const struct irq_affinity_desc *affinity)
> +{
> +	unsigned int virq;
> +
> +	mutex_lock(&irq_mapping_mutex);
> +	virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
> +	mutex_unlock(&irq_mapping_mutex);
> +
> +	return virq;
> +}
>  EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
>  
>  static int irq_domain_translate(struct irq_domain *d,
> @@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>  	if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
>  		type &= IRQ_TYPE_SENSE_MASK;
>  
> +	mutex_lock(&irq_mapping_mutex);
> +
>  	/*
>  	 * If we've already configured this interrupt,
>  	 * don't do it again, or hell will break loose.
> @@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>  		 * interrupt number.
>  		 */
>  		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> -			return virq;
> +			goto out;
>  
>  		/*
>  		 * If the trigger type has not been set yet, then set
> @@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>  		if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
>  			irq_data = irq_get_irq_data(virq);
>  			if (!irq_data)
> -				return 0;
> +				goto err;
>  
>  			irqd_set_trigger_type(irq_data, type);
> -			return virq;
> +			goto out;
>  		}
>  
>  		pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
>  			hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
> -		return 0;
> +		goto err;
>  	}
>  
>  	if (irq_domain_is_hierarchy(domain)) {
>  		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
>  		if (virq <= 0)
> -			return 0;
> +			goto err;
>  	} else {
>  		/* Create mapping */
> -		virq = irq_create_mapping(domain, hwirq);
> +		virq = __irq_create_mapping_affinity(domain, hwirq, NULL);

This rechecks for the existence of the mapping. Surely we can do a bit
better by rejigging this (admittedly bitrotting) code.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [PATCH] irqdomain: Fix mapping-creation race
Posted by Johan Hovold 3 years, 8 months ago
On Thu, Jul 28, 2022 at 12:48:23PM +0100, Marc Zyngier wrote:
> On Thu, 28 Jul 2022 10:27:10 +0100,
> Johan Hovold <johan+linaro@kernel.org> wrote:
> > 
> > Parallel probing (e.g. due to asynchronous probing) of devices that share
> > interrupts can currently result in two mappings for the same hardware
> > interrupt to be created.
> 
> And I thought nobody would be using shared interrupts anymore. Turns
> out people are still building braindead HW... :-/
> 
> > 
> > Add a serialising mapping mutex so that looking for an existing mapping
> > before creating a new one is done atomically.
> > 
> > Note that serialising the lookup and creation in
> > irq_create_mapping_affinity() would have been enough to prevent the
> > duplicate mapping, but that could instead cause
> > irq_create_fwspec_mapping() to fail when there is a race.
> > 
> > Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> > Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> > Cc: Dmitry Torokhov <dtor@chromium.org>
> > Cc: Jon Hunter <jonathanh@nvidia.com>
> > Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
> > ---
> >  kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
> >  1 file changed, 34 insertions(+), 12 deletions(-)
> > 
> > diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> > index 8fe1da9614ee..d263a7dd4170 100644
> > --- a/kernel/irq/irqdomain.c
> > +++ b/kernel/irq/irqdomain.c
> > @@ -22,6 +22,7 @@
> >  
> >  static LIST_HEAD(irq_domain_list);
> >  static DEFINE_MUTEX(irq_domain_mutex);
> > +static DEFINE_MUTEX(irq_mapping_mutex);
> 
> I'd really like to avoid a global mutex. At the very least this should
> be a per-domain mutex, otherwise this will serialise a lot more than
> what is needed.

Yeah, I considered that too, but wanted to get your comments on this
first.

Also note that the likewise global irq_domain_mutex (and
sparse_irq_lock) are taken in some of these paths so perhaps using finer
locking won't actually matter that much as this is mostly for parallel
probing.

> >  
> >  static struct irq_domain *irq_default_domain;
> >  
> > @@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> >  #endif
> >  
> >  /**
> > - * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> > + * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> >   * @domain: domain owning this hardware interrupt or NULL for default domain
> >   * @hwirq: hardware irq number in that domain space
> >   * @affinity: irq affinity
> > @@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> >   * If the sense/trigger is to be specified, set_irq_type() should be called
> >   * on the number returned from that call.
> >   */
> 
> This comment should be moved to the exported function, instead of
> documenting something that nobody can call...

Yes, of course. I looked at the kernel doc for another
double-underscore-prefixed function, but those are all exported.
 
> > -unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> > -				       irq_hw_number_t hwirq,
> > -				       const struct irq_affinity_desc *affinity)
> > +static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
> > +						  irq_hw_number_t hwirq,
> > +						  const struct irq_affinity_desc *affinity)
> >  {
> >  	struct device_node *of_node;
> >  	int virq;
> > @@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> >  
> >  	return virq;
> >  }
> > +
> > +unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> > +					 irq_hw_number_t hwirq,
> > +					 const struct irq_affinity_desc *affinity)
> > +{
> > +	unsigned int virq;
> > +
> > +	mutex_lock(&irq_mapping_mutex);
> > +	virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
> > +	mutex_unlock(&irq_mapping_mutex);
> > +
> > +	return virq;
> > +}
> >  EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
> >  
> >  static int irq_domain_translate(struct irq_domain *d,
> > @@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> >  	if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
> >  		type &= IRQ_TYPE_SENSE_MASK;
> >  
> > +	mutex_lock(&irq_mapping_mutex);
> > +
> >  	/*
> >  	 * If we've already configured this interrupt,
> >  	 * don't do it again, or hell will break loose.
> > @@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> >  		 * interrupt number.
> >  		 */
> >  		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> > -			return virq;
> > +			goto out;
> >  
> >  		/*
> >  		 * If the trigger type has not been set yet, then set
> > @@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> >  		if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
> >  			irq_data = irq_get_irq_data(virq);
> >  			if (!irq_data)
> > -				return 0;
> > +				goto err;
> >  
> >  			irqd_set_trigger_type(irq_data, type);
> > -			return virq;
> > +			goto out;
> >  		}
> >  
> >  		pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
> >  			hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
> > -		return 0;
> > +		goto err;
> >  	}
> >  
> >  	if (irq_domain_is_hierarchy(domain)) {
> >  		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
> >  		if (virq <= 0)
> > -			return 0;
> > +			goto err;
> >  	} else {
> >  		/* Create mapping */
> > -		virq = irq_create_mapping(domain, hwirq);
> > +		virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
> 
> This rechecks for the existence of the mapping. Surely we can do a bit
> better by rejigging this (admittedly bitrotting) code.

I'm sure we can. Should I try to fix the race first with a patch like
this one that can potentially be backported, and then see what I can do
about cleaning this up?

After all it has looked like this for the past eight years since when
this code was first merged.

Johan
Re: [PATCH] irqdomain: Fix mapping-creation race
Posted by Marc Zyngier 3 years, 8 months ago
On Thu, 28 Jul 2022 13:56:41 +0100,
Johan Hovold <johan@kernel.org> wrote:
> 
> On Thu, Jul 28, 2022 at 12:48:23PM +0100, Marc Zyngier wrote:
> > On Thu, 28 Jul 2022 10:27:10 +0100,
> > Johan Hovold <johan+linaro@kernel.org> wrote:
> > > 
> > > Parallel probing (e.g. due to asynchronous probing) of devices that share
> > > interrupts can currently result in two mappings for the same hardware
> > > interrupt to be created.
> > 
> > And I thought nobody would be using shared interrupts anymore. Turns
> > out people are still building braindead HW... :-/
> > 
> > > 
> > > Add a serialising mapping mutex so that looking for an existing mapping
> > > before creating a new one is done atomically.
> > > 
> > > Note that serialising the lookup and creation in
> > > irq_create_mapping_affinity() would have been enough to prevent the
> > > duplicate mapping, but that could instead cause
> > > irq_create_fwspec_mapping() to fail when there is a race.
> > > 
> > > Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> > > Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> > > Cc: Dmitry Torokhov <dtor@chromium.org>
> > > Cc: Jon Hunter <jonathanh@nvidia.com>
> > > Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
> > > ---
> > >  kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
> > >  1 file changed, 34 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> > > index 8fe1da9614ee..d263a7dd4170 100644
> > > --- a/kernel/irq/irqdomain.c
> > > +++ b/kernel/irq/irqdomain.c
> > > @@ -22,6 +22,7 @@
> > >  
> > >  static LIST_HEAD(irq_domain_list);
> > >  static DEFINE_MUTEX(irq_domain_mutex);
> > > +static DEFINE_MUTEX(irq_mapping_mutex);
> > 
> > I'd really like to avoid a global mutex. At the very least this should
> > be a per-domain mutex, otherwise this will serialise a lot more than
> > what is needed.
> 
> Yeah, I considered that too, but wanted to get your comments on this
> first.
> 
> Also note that the likewise global irq_domain_mutex (and
> sparse_irq_lock) are taken in some of these paths so perhaps using finer
> locking won't actually matter that much as this is mostly for parallel
> probing.

It will be a good opportunity to make the locking suck a bit less,
like in irq_domain_associate().

> > >  	} else {
> > >  		/* Create mapping */
> > > -		virq = irq_create_mapping(domain, hwirq);
> > > +		virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
> > 
> > This rechecks for the existence of the mapping. Surely we can do a bit
> > better by rejigging this (admittedly bitrotting) code.
> 
> I'm sure we can. Should I try to fix the race first with a patch like
> this one that can potentially be backported, and then see what I can do
> about cleaning this up?
> 
> After all it has looked like this for the past eight years since when
> this code was first merged.

No, let's put the code in shape *first*, then add work on the locking,
as it should make the patch simpler. Backports aren't my concern,
really.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [PATCH] irqdomain: Fix mapping-creation race
Posted by Johan Hovold 3 years, 8 months ago
On Thu, Jul 28, 2022 at 11:27:10AM +0200, Johan Hovold wrote:
> Parallel probing (e.g. due to asynchronous probing) of devices that share
> interrupts can currently result in two mappings for the same hardware
> interrupt to be created.
> 
> Add a serialising mapping mutex so that looking for an existing mapping
> before creating a new one is done atomically.
> 
> Note that serialising the lookup and creation in
> irq_create_mapping_affinity() would have been enough to prevent the
> duplicate mapping, but that could instead cause
> irq_create_fwspec_mapping() to fail when there is a race.
> 
> Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> Cc: Dmitry Torokhov <dtor@chromium.org>
> Cc: Jon Hunter <jonathanh@nvidia.com>
> Signed-off-by: Johan Hovold <johan+linaro@kernel.org>

Here's some more background on how I ran into this:

Link: https://lore.kernel.org/r/YuJXMHoT4ijUxnRb@hovoldconsulting.com

Johan