[PATCH net-next v3 4/6] net: mana: Use GIC functions to allocate global EQs

Long Li posted 6 patches 1 month ago
There is a newer version of this series
[PATCH net-next v3 4/6] net: mana: Use GIC functions to allocate global EQs
Posted by Long Li 1 month ago
Replace the GDMA global interrupt setup code with the new GIC allocation
and release functions for managing interrupt contexts.

Signed-off-by: Long Li <longli@microsoft.com>
---
 .../net/ethernet/microsoft/mana/gdma_main.c   | 83 +++----------------
 1 file changed, 10 insertions(+), 73 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index c43fd8089e77..bdc9dc437fb7 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1831,30 +1831,13 @@ static int mana_gd_setup_dyn_irqs(struct pci_dev *pdev, int nvec)
 	 * further used in irq_setup()
 	 */
 	for (i = 1; i <= nvec; i++) {
-		gic = kzalloc_obj(*gic);
+		gic = mana_gd_get_gic(gc, false, &i);
 		if (!gic) {
 			err = -ENOMEM;
 			goto free_irq;
 		}
-		gic->handler = mana_gd_process_eq_events;
-		INIT_LIST_HEAD(&gic->eq_list);
-		spin_lock_init(&gic->lock);
-
-		snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
-			 i - 1, pci_name(pdev));
-
-		/* one pci vector is already allocated for HWC */
-		irqs[i - 1] = pci_irq_vector(pdev, i);
-		if (irqs[i - 1] < 0) {
-			err = irqs[i - 1];
-			goto free_current_gic;
-		}
-
-		err = request_irq(irqs[i - 1], mana_gd_intr, 0, gic->name, gic);
-		if (err)
-			goto free_current_gic;
 
-		xa_store(&gc->irq_contexts, i, gic, GFP_KERNEL);
+		irqs[i - 1] = gic->irq;
 	}
 
 	/*
@@ -1876,19 +1859,11 @@ static int mana_gd_setup_dyn_irqs(struct pci_dev *pdev, int nvec)
 	kfree(irqs);
 	return 0;
 
-free_current_gic:
-	kfree(gic);
 free_irq:
 	for (i -= 1; i > 0; i--) {
 		irq = pci_irq_vector(pdev, i);
-		gic = xa_load(&gc->irq_contexts, i);
-		if (WARN_ON(!gic))
-			continue;
-
 		irq_update_affinity_hint(irq, NULL);
-		free_irq(irq, gic);
-		xa_erase(&gc->irq_contexts, i);
-		kfree(gic);
+		mana_gd_put_gic(gc, false, i);
 	}
 	kfree(irqs);
 	return err;
@@ -1909,34 +1884,13 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev, int nvec)
 	start_irqs = irqs;
 
 	for (i = 0; i < nvec; i++) {
-		gic = kzalloc_obj(*gic);
+		gic = mana_gd_get_gic(gc, false, &i);
 		if (!gic) {
 			err = -ENOMEM;
 			goto free_irq;
 		}
 
-		gic->handler = mana_gd_process_eq_events;
-		INIT_LIST_HEAD(&gic->eq_list);
-		spin_lock_init(&gic->lock);
-
-		if (!i)
-			snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
-				 pci_name(pdev));
-		else
-			snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
-				 i - 1, pci_name(pdev));
-
-		irqs[i] = pci_irq_vector(pdev, i);
-		if (irqs[i] < 0) {
-			err = irqs[i];
-			goto free_current_gic;
-		}
-
-		err = request_irq(irqs[i], mana_gd_intr, 0, gic->name, gic);
-		if (err)
-			goto free_current_gic;
-
-		xa_store(&gc->irq_contexts, i, gic, GFP_KERNEL);
+		irqs[i] = gic->irq;
 	}
 
 	/* If number of IRQ is one extra than number of online CPUs,
@@ -1965,19 +1919,11 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev, int nvec)
 	kfree(start_irqs);
 	return 0;
 
-free_current_gic:
-	kfree(gic);
 free_irq:
 	for (i -= 1; i >= 0; i--) {
 		irq = pci_irq_vector(pdev, i);
-		gic = xa_load(&gc->irq_contexts, i);
-		if (WARN_ON(!gic))
-			continue;
-
 		irq_update_affinity_hint(irq, NULL);
-		free_irq(irq, gic);
-		xa_erase(&gc->irq_contexts, i);
-		kfree(gic);
+		mana_gd_put_gic(gc, false, i);
 	}
 
 	kfree(start_irqs);
@@ -2052,26 +1998,17 @@ static int mana_gd_setup_remaining_irqs(struct pci_dev *pdev)
 static void mana_gd_remove_irqs(struct pci_dev *pdev)
 {
 	struct gdma_context *gc = pci_get_drvdata(pdev);
-	struct gdma_irq_context *gic;
 	int irq, i;
 
 	if (gc->max_num_msix < 1)
 		return;
 
-	for (i = 0; i < gc->max_num_msix; i++) {
-		irq = pci_irq_vector(pdev, i);
-		if (irq < 0)
-			continue;
-
-		gic = xa_load(&gc->irq_contexts, i);
-		if (WARN_ON(!gic))
-			continue;
-
+	for (i = 0; i < (gc->msi_sharing ? gc->max_num_msix : 1); i++) {
 		/* Need to clear the hint before free_irq */
+		irq = pci_irq_vector(pdev, i);
 		irq_update_affinity_hint(irq, NULL);
-		free_irq(irq, gic);
-		xa_erase(&gc->irq_contexts, i);
-		kfree(gic);
+
+		mana_gd_put_gic(gc, false, i);
 	}
 
 	pci_free_irq_vectors(pdev);
-- 
2.43.0
Re: [net-next,v3,4/6] net: mana: Use GIC functions to allocate global EQs
Posted by Paolo Abeni 1 month ago
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mana: Use GIC functions to allocate global EQs

Replace the GDMA global interrupt setup code with the new GIC allocation
and release functions for managing interrupt contexts.

> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index c43fd8089e77..bdc9dc437fb7 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c

[ ... ]

> @@ -2052,26 +1998,17 @@ static int mana_gd_setup_remaining_irqs(struct pci_dev *pdev)
>  static void mana_gd_remove_irqs(struct pci_dev *pdev)
>  {
>  	struct gdma_context *gc = pci_get_drvdata(pdev);
> -	struct gdma_irq_context *gic;
>  	int irq, i;
>
>  	if (gc->max_num_msix < 1)
>  		return;
>
> -	for (i = 0; i < gc->max_num_msix; i++) {
> -		irq = pci_irq_vector(pdev, i);
> -		if (irq < 0)
> -			continue;
> -
> -		gic = xa_load(&gc->irq_contexts, i);
> -		if (WARN_ON(!gic))
> -			continue;
> -
> +	for (i = 0; i < (gc->msi_sharing ? gc->max_num_msix : 1); i++) {
                                                               ^^^
Does this loop condition leak GIC references when msi_sharing is false?

When msi_sharing is false, the loop only releases the GIC at index 0.
However, mana_gd_setup_dyn_irqs allocates GICs at indices 1 through nvec:

drivers/net/ethernet/microsoft/mana/gdma_main.c:mana_gd_setup_dyn_irqs() {
    for (i = 1; i <= nvec; i++) {
        gic = mana_gd_get_gic(gc, false, &i);
        ...
    }
}

Each mana_gd_get_gic call initializes the GIC with refcount=1. These GICs
at indices 1..nvec are never released because mana_gd_remove_irqs only
processes index 0 when msi_sharing is false.

The old code iterated over all gc->max_num_msix entries unconditionally:

    for (i = 0; i < gc->max_num_msix; i++) {
        ...
        gic = xa_load(&gc->irq_contexts, i);
        ...
        free_irq(irq, gic);
        xa_erase(&gc->irq_contexts, i);
        kfree(gic);
    }

This ensured every allocated GIC was freed. The new code with the
conditional loop breaks this, resulting in:

1. Leaked IRQ handlers - request_irq without matching free_irq for indices
   1..nvec
2. Leaked gdma_irq_context memory - the GIC structures are never freed
3. Leaked xarray entries - the entries remain in gc->irq_contexts

Additionally, pci_free_irq_vectors is called immediately after the loop,
which attempts to tear down MSI-X vectors while IRQ handlers at indices
1..nvec are still registered. Can this trigger warnings or use-after-free
if an interrupt fires during teardown?

The next commit in the series (4475f13983ab "net: mana: Allocate interrupt
context for each EQ when creating vPort") adds per-EQ get/put of GICs,
but the EQ teardown only drops the EQ's reference (refcount goes from 2 to
1). The base reference from mana_gd_setup_dyn_irqs remains unreleased.

>  		/* Need to clear the hint before free_irq */
> +		irq = pci_irq_vector(pdev, i);
>  		irq_update_affinity_hint(irq, NULL);
> -		free_irq(irq, gic);
> -		xa_erase(&gc->irq_contexts, i);
> -		kfree(gic);
> +
> +		mana_gd_put_gic(gc, false, i);
>  	}
>
>  	pci_free_irq_vectors(pdev);