net: stmmac: Fix MSI vector leak, make stmmac NUMA aware

[PATCH net-next 2/2] net: stmmac: Use cpumask_local_spread() for IRQ spreading

Posted by Florian Bezdeka 11 hours ago

The stmmac driver was previously implementing a self-made IRQ
spreading mechanism based on num_online_cpus(). By migrating to
cpumask_local_spread() the spreading gets NUMA aware.

In addition, most drivers seem to use cpumask_local_spread(),
aligning / harmonizing a bit more.

Signed-off-by: Florian Bezdeka <florian.bezdeka@siemens.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  2 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 41 +++++++++++++++++++----
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 012b0a477255df73b2e145b62e09eeb2133e827a..52b75df2b71239aab5b1d5138b78d6c5310b9c5b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -329,6 +329,8 @@ struct stmmac_priv {
 	int sfty_ue_irq;
 	int rx_irq[MTL_MAX_RX_QUEUES];
 	int tx_irq[MTL_MAX_TX_QUEUES];
+	cpumask_var_t rx_affinity[MTL_MAX_RX_QUEUES];
+	cpumask_var_t tx_affinity[MTL_MAX_TX_QUEUES];
 	/*irq name */
 	char int_name_mac[IFNAMSIZ + 9];
 	char int_name_wol[IFNAMSIZ + 9];
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a379221b96a348e20f2afb0f44540cfba2f2477a..ddbfc0774143d0353c83988b2dfffa75132bd0ee 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3739,6 +3739,8 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 	enum request_irq_err irq_err;
 	int irq_idx = 0;
 	char *int_name;
+	int numa_node;
+	int cpu;
 	int ret;
 	int i;
 
@@ -3845,6 +3847,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 	}
 
 	/* Request Rx MSI irq */
+	numa_node = dev_to_node(&priv->dev->dev);
 	for (i = 0; i < priv->plat->rx_queues_to_use; i++) {
 		if (i >= MTL_MAX_RX_QUEUES)
 			break;
@@ -3864,8 +3867,10 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 			irq_idx = i;
 			goto irq_error;
 		}
-		irq_set_affinity_hint(priv->rx_irq[i],
-				      cpumask_of(i % num_online_cpus()));
+
+		cpu = cpumask_local_spread(i, numa_node);
+		cpumask_set_cpu(cpu, priv->rx_affinity[i]);
+		irq_set_affinity_hint(priv->rx_irq[i], priv->rx_affinity[i]);
 	}
 
 	/* Request Tx MSI irq */
@@ -3888,8 +3893,10 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 			irq_idx = i;
 			goto irq_error;
 		}
-		irq_set_affinity_hint(priv->tx_irq[i],
-				      cpumask_of(i % num_online_cpus()));
+
+		cpu = cpumask_local_spread(i, numa_node);
+		cpumask_set_cpu(cpu, priv->tx_affinity[i]);
+		irq_set_affinity_hint(priv->tx_irq[i], priv->tx_affinity[i]);
 	}
 
 	return 0;
@@ -7653,6 +7660,14 @@ struct plat_stmmacenet_data *stmmac_plat_dat_alloc(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(stmmac_plat_dat_alloc);
 
+static void stmmac_free_affinity(cpumask_var_t *m, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		free_cpumask_var(m[i]);
+}
+
 static int __stmmac_dvr_probe(struct device *device,
 			      struct plat_stmmacenet_data *plat_dat,
 			      struct stmmac_resources *res)
@@ -7699,10 +7714,21 @@ static int __stmmac_dvr_probe(struct device *device,
 	priv->sfty_irq = res->sfty_irq;
 	priv->sfty_ce_irq = res->sfty_ce_irq;
 	priv->sfty_ue_irq = res->sfty_ue_irq;
-	for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
+	for (i = 0; i < MTL_MAX_RX_QUEUES; i++) {
 		priv->rx_irq[i] = res->rx_irq[i];
-	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
+		if (!zalloc_cpumask_var(&priv->rx_affinity[i], GFP_KERNEL)) {
+			stmmac_free_affinity(priv->rx_affinity, i);
+			return -ENOMEM;
+		}
+	}
+	for (i = 0; i < MTL_MAX_TX_QUEUES; i++) {
 		priv->tx_irq[i] = res->tx_irq[i];
+		if (!zalloc_cpumask_var(&priv->tx_affinity[i], GFP_KERNEL)) {
+			stmmac_free_affinity(priv->rx_affinity, MTL_MAX_RX_QUEUES);
+			stmmac_free_affinity(priv->tx_affinity, i);
+			return -ENOMEM;
+		}
+	}
 
 	if (!is_zero_ether_addr(res->mac))
 		eth_hw_addr_set(priv->dev, res->mac);
@@ -8023,6 +8049,9 @@ void stmmac_dvr_remove(struct device *dev)
 	pm_runtime_disable(dev);
 	pm_runtime_put_noidle(dev);
 
+	stmmac_free_affinity(priv->rx_affinity, MTL_MAX_RX_QUEUES);
+	stmmac_free_affinity(priv->tx_affinity, MTL_MAX_TX_QUEUES);
+
 	if (priv->plat->exit)
 		priv->plat->exit(dev, priv->plat->bsp_priv);
 }

-- 
2.53.0

Re: [PATCH net-next 2/2] net: stmmac: Use cpumask_local_spread() for IRQ spreading

Posted by Russell King (Oracle) 10 hours ago

On Tue, Feb 10, 2026 at 05:28:15PM +0100, Florian Bezdeka wrote:
> The stmmac driver was previously implementing a self-made IRQ
> spreading mechanism based on num_online_cpus(). By migrating to
> cpumask_local_spread() the spreading gets NUMA aware.
> 
> In addition, most drivers seem to use cpumask_local_spread(),
> aligning / harmonizing a bit more.

Oh great... sizeof(struct stmmac_priv) is already large at 880 bytes,
and adding 16 pointers or CPU mask arrays for PCI MSI adds another
128 bytes on top, whether _this_ stmmac device is PCI or not.

A better solution needs to be found. Please consider what can be done
to make MSI (a) generic to stmmac so it can live in stmmac_libpci.c,
and (b) avoid adding overhead to platforms that don't use MSI.

As an example of an improvement, the int_name_*[] strings are only
used for MSI interrupts, and each one uses over 16 bytes. I calculate
the entire usage to be 665 bytes just for these strings which are
only ever used for MSI.

With the addition of the cpumasks, we're looking at getting on for
800 bytes of this structure which are only used for MSI.

We can surely do better than this.

So, how about moving the int_name_* to its own separate struct::

struct stmmac_msi {
	/*irq_name */
	char int_name_mac[IFNAMSIZ + 9];
	... other int_name_* ...
	cpumask_var_t rx_affinity[MTL_MAX_RX_QUEUES];
	cpumask_var_t tx_affinity[MTL_MAX_TX_QUEUES];
};

and replace the existing with int_name* with a simple:

	struct stmmac_msi *msi;

This struct would only be allocated when we need it for
stmmac_request_irq_multi_msi(), and can be requested using devm in
stmmac_dvr_probe() only when required.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

[PATCH net-next 1/2] net: stmmac: intel: Fix IRQ vector leak
[PATCH net-next 2/2] net: stmmac: Use cpumask_local_spread() for IRQ spreading