[PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()

Théo Lebrun posted 2 patches 1 month ago
There is a newer version of this series
[PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
Posted by Théo Lebrun 1 month ago
bp->num_queues is the total number of queues and is constant from probe.
Introduce bp->max_num_queues which takes the current role of
bp->num_queues and allow `0 < bp->num_queues <= bp->max_num_queues`.
MACB/GEM does not know about rx/tx specific queues; it only has
combined queues.

Implement .set_channels() operation by wrapping ourselves in
macb_close() and macb_open() calls if interface is running. This
triggers reinit of buffers, which also includes the code that
enables/disables only the queues below bp->num_queues, in
macb_init_buffers().

If reopen fails, then we do one last attempt and reset num_queues to
the previous value and try reopen. We still error out because the
.set_channels() operation failed.

.set_channels() is reserved to devices with MACB_CAPS_QUEUE_DISABLE.
The tieoff workaround would not work as packets would still be routed
into queues with a tieoff descriptor.

Nit: fix an alignment issue inside gem_ethtool_ops which does not
deserve its own patch.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |   1 +
 drivers/net/ethernet/cadence/macb_main.c | 100 ++++++++++++++++++++++++++-----
 2 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 87414a2ddf6e..30fa65e2bdf2 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1294,6 +1294,7 @@ struct macb {
 	unsigned int		tx_ring_size;
 
 	unsigned int		num_queues;
+	unsigned int		max_num_queues;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
 
 	spinlock_t		lock;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 17f0ad3d7a09..bac83a2b4c4d 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -466,9 +466,24 @@ static void macb_init_buffers(struct macb *bp)
 			    upper_32_bits(bp->queues[0].tx_ring_dma));
 	}
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
-		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+	for (q = 0, queue = bp->queues; q < bp->max_num_queues; ++q, ++queue) {
+		if (q < bp->num_queues) {
+			queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
+			queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+		} else {
+			/*
+			 * macb_set_channels(), which is the only way of writing
+			 * to bp->num_queues, is only allowed if
+			 * MACB_CAPS_QUEUE_DISABLE.
+			 */
+			queue_writel(queue, RBQP, MACB_BIT(QUEUE_DISABLE));
+
+			/* Disable all interrupts */
+			queue_writel(queue, IDR, -1);
+			queue_readl(queue, ISR);
+			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+				queue_writel(queue, ISR, -1);
+		}
 	}
 }
 
@@ -3900,8 +3915,8 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		if ((cmd->fs.location >= bp->max_tuples)
-				|| (cmd->fs.ring_cookie >= bp->num_queues)) {
+		if (cmd->fs.location >= bp->max_tuples ||
+		    cmd->fs.ring_cookie >= bp->max_num_queues) {
 			ret = -EINVAL;
 			break;
 		}
@@ -3919,6 +3934,54 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 	return ret;
 }
 
+static void macb_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct macb *bp = netdev_priv(netdev);
+
+	ch->max_combined = bp->max_num_queues;
+	ch->combined_count = bp->num_queues;
+}
+
+static int macb_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct macb *bp = netdev_priv(netdev);
+	unsigned int old_count = bp->num_queues;
+	unsigned int count = ch->combined_count;
+	bool running = netif_running(bp->dev);
+	int ret = 0;
+
+	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
+		return -EOPNOTSUPP;
+
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	if (count > bp->max_num_queues)
+		return -EINVAL;
+
+	if (count == old_count)
+		return 0;
+
+	if (running)
+		macb_close(bp->dev);
+
+	bp->num_queues = count;
+	netif_set_real_num_queues(bp->dev, count, count);
+
+	if (running) {
+		ret = macb_open(bp->dev);
+		if (ret) {
+			bp->num_queues = old_count;
+			netif_set_real_num_queues(bp->dev, old_count, old_count);
+			macb_open(bp->dev);
+		}
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops macb_ethtool_ops = {
 	.get_regs_len		= macb_get_regs_len,
 	.get_regs		= macb_get_regs,
@@ -3934,6 +3997,8 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_link_ksettings     = macb_set_link_ksettings,
 	.get_ringparam		= macb_get_ringparam,
 	.set_ringparam		= macb_set_ringparam,
+	.get_channels		= macb_get_channels,
+	.set_channels		= macb_set_channels,
 };
 
 static const struct ethtool_ops gem_ethtool_ops = {
@@ -3954,10 +4019,12 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.set_link_ksettings     = macb_set_link_ksettings,
 	.get_ringparam		= macb_get_ringparam,
 	.set_ringparam		= macb_set_ringparam,
-	.get_rxnfc			= gem_get_rxnfc,
-	.set_rxnfc			= gem_set_rxnfc,
-	.get_rx_ring_count		= gem_get_rx_ring_count,
-	.nway_reset			= phy_ethtool_nway_reset,
+	.get_rxnfc		= gem_get_rxnfc,
+	.set_rxnfc		= gem_set_rxnfc,
+	.get_rx_ring_count	= gem_get_rx_ring_count,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_channels		= macb_get_channels,
+	.set_channels		= macb_set_channels,
 };
 
 static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -4098,9 +4165,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	size_t i;
 	int err;
 
-	if (conf->num_entries > bp->num_queues) {
+	if (conf->num_entries > bp->max_num_queues) {
 		netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
-			   conf->num_entries, bp->num_queues);
+			   conf->num_entries, bp->max_num_queues);
 		return -EINVAL;
 	}
 
@@ -4148,9 +4215,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* gate_mask must not select queues outside the valid queues */
 		queue_id = order_base_2(entry->gate_mask);
-		if (queue_id >= bp->num_queues) {
+		if (queue_id >= bp->max_num_queues) {
 			netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
-				   i, entry->gate_mask, bp->num_queues);
+				   i, entry->gate_mask, bp->max_num_queues);
 			err = -EINVAL;
 			goto cleanup;
 		}
@@ -4210,7 +4277,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	/* All validations passed - proceed with hardware configuration */
 	scoped_guard(spinlock_irqsave, &bp->lock) {
 		/* Disable ENST queues if running before configuring */
-		queue_mask = BIT_U32(bp->num_queues) - 1;
+		queue_mask = BIT_U32(bp->max_num_queues) - 1;
 		gem_writel(bp, ENST_CONTROL,
 			   queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
 
@@ -4245,7 +4312,7 @@ static void macb_taprio_destroy(struct net_device *ndev)
 	unsigned int q;
 
 	netdev_reset_tc(ndev);
-	queue_mask = BIT_U32(bp->num_queues) - 1;
+	queue_mask = BIT_U32(bp->max_num_queues) - 1;
 
 	scoped_guard(spinlock_irqsave, &bp->lock) {
 		/* Single disable command for all queues */
@@ -4253,7 +4320,7 @@ static void macb_taprio_destroy(struct net_device *ndev)
 			   queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
 
 		/* Clear all queue ENST registers in batch */
-		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		for (q = 0, queue = bp->queues; q < bp->max_num_queues; ++q, ++queue) {
 			queue_writel(queue, ENST_START_TIME, 0);
 			queue_writel(queue, ENST_ON_TIME, 0);
 			queue_writel(queue, ENST_OFF_TIME, 0);
@@ -5512,6 +5579,7 @@ static int macb_probe(struct platform_device *pdev)
 		bp->macb_reg_writel = hw_writel;
 	}
 	bp->num_queues = num_queues;
+	bp->max_num_queues = num_queues;
 	bp->dma_burst_length = macb_config->dma_burst_length;
 	bp->pclk = pclk;
 	bp->hclk = hclk;

-- 
2.53.0

Re: [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
Posted by Jakub Kicinski 1 month ago
On Thu, 05 Mar 2026 18:20:14 +0100 Théo Lebrun wrote:
> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> +		return -EOPNOTSUPP;

Why not set max to 1 in this case?

> +	if (!count || ch->rx_count || ch->tx_count)
> +		return -EINVAL;

Core should check this for you already

> +	if (count > bp->max_num_queues)
> +		return -EINVAL;

and this

> +	if (count == old_count)
> +		return 0;
> +
> +	if (running)
> +		macb_close(bp->dev);
> +
> +	bp->num_queues = count;
> +	netif_set_real_num_queues(bp->dev, count, count);
> +
> +	if (running) {
> +		ret = macb_open(bp->dev);
> +		if (ret) {
> +			bp->num_queues = old_count;
> +			netif_set_real_num_queues(bp->dev, old_count, old_count);
> +			macb_open(bp->dev);

both macb_open() calls may fail under memory pressure
For new functionality we ask drivers to allocate all necessary
resources upfront then just swap them in and reconfigure HW
-- 
pw-bot: cr
Re: [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
Posted by Théo Lebrun 1 month ago
On Sat Mar 7, 2026 at 4:09 AM CET, Jakub Kicinski wrote:
> On Thu, 05 Mar 2026 18:20:14 +0100 Théo Lebrun wrote:
>> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
>> +		return -EOPNOTSUPP;
>
> Why not set max to 1 in this case?

With !QUEUE_DISABLE, we only know how to run with all queues enabled.
It doesn't imply that max_num_queues == 1.

MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE (BIT0) in the
per-queue register RBQP disables queue Rx. If we don't have that
capability we can have multiple queues (if HW supports it) but we must
always run with all enabled.

The correct way to deal with `!(bp->caps & MACB_CAPS_QUEUE_DISABLE)`
would be something like:

static void macb_get_channels(struct net_device *netdev,
			      struct ethtool_channels *ch)
{
	struct macb *bp = netdev_priv(netdev);

	ch->max_combined = bp->max_num_queues;
	ch->combined_count = bp->num_queues;

	if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
		/* we know how disable individual queues */
		ch->min_combined = 1;
	else
		/* we only support running with all queues active */
		ch->min_combined = bp->max_num_queues;
}

But ch->min_combined does not exist.

>
>> +	if (!count || ch->rx_count || ch->tx_count)
>> +		return -EINVAL;
>
> Core should check this for you already
>
>> +	if (count > bp->max_num_queues)
>> +		return -EINVAL;
>
> and this

Noted thanks!

>> +	if (count == old_count)
>> +		return 0;
>> +
>> +	if (running)
>> +		macb_close(bp->dev);
>> +
>> +	bp->num_queues = count;
>> +	netif_set_real_num_queues(bp->dev, count, count);
>> +
>> +	if (running) {
>> +		ret = macb_open(bp->dev);
>> +		if (ret) {
>> +			bp->num_queues = old_count;
>> +			netif_set_real_num_queues(bp->dev, old_count, old_count);
>> +			macb_open(bp->dev);
>
> both macb_open() calls may fail under memory pressure
> For new functionality we ask drivers to allocate all necessary
> resources upfront then just swap them in and reconfigure HW

The main reason we want to set queue count is memory savings. If we take
the Mobileye EyeQ5 SoC, it has a small 32MiB RAM alias usable for DMA.
If we waste it on networking we have less available for the remaining
peripherals. Is there some way we could avoid upfront allocations?

.set_ringparam() can help to avoid memory waste by using many small
queues. But our main target config is a single large queue (common with
AF_XDP zero-copy when userspace wants a single socket). In that case we
waste `(max_num_queues - 1) / max_num_queues` percent: 75% with
max_num_queues=4 on Mobileye EyeQ5 & EyeQ6H, ie Mobileye boards on my
desk at the moment.

I wonder if we'll see GEM IPs that have >4 queues. HW manual indicates
up to 16 is supported.

Thanks Jakub,

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Re: [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
Posted by Jakub Kicinski 1 month ago
On Mon, 09 Mar 2026 18:04:06 +0100 Théo Lebrun wrote:
> On Sat Mar 7, 2026 at 4:09 AM CET, Jakub Kicinski wrote:
> > On Thu, 05 Mar 2026 18:20:14 +0100 Théo Lebrun wrote:  
> >> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> >> +		return -EOPNOTSUPP;  
> >
> > Why not set max to 1 in this case?  
> 
> With !QUEUE_DISABLE, we only know how to run with all queues enabled.
> It doesn't imply that max_num_queues == 1.
> 
> MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE (BIT0) in the
> per-queue register RBQP disables queue Rx. If we don't have that
> capability we can have multiple queues (if HW supports it) but we must
> always run with all enabled.

Oh, I see! Perhaps just a comment over the check to inform the reader
that the lack of capabilities means all rx queues must be enabled.

> >> +	if (running) {
> >> +		ret = macb_open(bp->dev);
> >> +		if (ret) {
> >> +			bp->num_queues = old_count;
> >> +			netif_set_real_num_queues(bp->dev, old_count, old_count);
> >> +			macb_open(bp->dev);  
> >
> > both macb_open() calls may fail under memory pressure
> > For new functionality we ask drivers to allocate all necessary
> > resources upfront then just swap them in and reconfigure HW  
> 
> The main reason we want to set queue count is memory savings. If we take
> the Mobileye EyeQ5 SoC, it has a small 32MiB RAM alias usable for DMA.
> If we waste it on networking we have less available for the remaining
> peripherals. Is there some way we could avoid upfront allocations?

We've been asking everyone to follow the "pre-allocate resources
paradigm" for a few years now. It has huge benefits for system
reliability. If you don't want to complicate code at this stage
you can support configuring queue count only when the device is down.
But the ask will keep coming back, any time you try to do the close+open
Re: [PATCH net-next 1/2] net: macb: implement ethtool_ops.get|set_channels()
Posted by Théo Lebrun 1 month ago
On Mon Mar 9, 2026 at 10:15 PM CET, Jakub Kicinski wrote:
> On Mon, 09 Mar 2026 18:04:06 +0100 Théo Lebrun wrote:
>> On Sat Mar 7, 2026 at 4:09 AM CET, Jakub Kicinski wrote:
>> > On Thu, 05 Mar 2026 18:20:14 +0100 Théo Lebrun wrote:  
>> >> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
>> >> +		return -EOPNOTSUPP;  
>> >
>> > Why not set max to 1 in this case?  
>> 
>> With !QUEUE_DISABLE, we only know how to run with all queues enabled.
>> It doesn't imply that max_num_queues == 1.
>> 
>> MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE (BIT0) in the
>> per-queue register RBQP disables queue Rx. If we don't have that
>> capability we can have multiple queues (if HW supports it) but we must
>> always run with all enabled.
>
> Oh, I see! Perhaps just a comment over the check to inform the reader
> that the lack of capabilities means all rx queues must be enabled.

ACK, will insert a comment.

>> >> +	if (running) {
>> >> +		ret = macb_open(bp->dev);
>> >> +		if (ret) {
>> >> +			bp->num_queues = old_count;
>> >> +			netif_set_real_num_queues(bp->dev, old_count, old_count);
>> >> +			macb_open(bp->dev);  
>> >
>> > both macb_open() calls may fail under memory pressure
>> > For new functionality we ask drivers to allocate all necessary
>> > resources upfront then just swap them in and reconfigure HW  
>> 
>> The main reason we want to set queue count is memory savings. If we take
>> the Mobileye EyeQ5 SoC, it has a small 32MiB RAM alias usable for DMA.
>> If we waste it on networking we have less available for the remaining
>> peripherals. Is there some way we could avoid upfront allocations?
>
> We've been asking everyone to follow the "pre-allocate resources
> paradigm" for a few years now. It has huge benefits for system
> reliability. If you don't want to complicate code at this stage
> you can support configuring queue count only when the device is down.
> But the ask will keep coming back, any time you try to do the close+open

I'll iterate with a .set_channels() implementation that returns -EBUSY
if netif_running(), making sure to leave a comment explaining the
tradeoff.

Thanks Jakub,

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com