[v2] net: macb: implement ethtool set channels count operation

[PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Théo Lebrun 3 weeks, 6 days ago

bp->num_queues is the total number of queues and is constant from probe.
Introduce bp->max_num_queues which takes the current role of
bp->num_queues and allow `0 < bp->num_queues <= bp->max_num_queues`.
MACB/GEM does not know about rx/tx specific queues; it only has
combined queues.

.set_channels() is reserved to devices with MACB_CAPS_QUEUE_DISABLE.
The tieoff workaround would not work as packets would still be routed
into queues with a tieoff descriptor.

Implement .set_channels() operation by refusing if netif_running().
The reason to implement .set_channels() is memory savings, we cannot
preallocate and swap at runtime.

Nit: fix an alignment issue inside gem_ethtool_ops which does not
deserve its own patch.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |   1 +
 drivers/net/ethernet/cadence/macb_main.c | 103 ++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index c69828b27dae..b08afe340996 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1309,6 +1309,7 @@ struct macb {
 	unsigned int		tx_ring_size;
 
 	unsigned int		num_queues;
+	unsigned int		max_num_queues;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
 
 	spinlock_t		lock;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 3dcae4d5f74c..8b2c77446dbd 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -467,9 +467,26 @@ static void macb_init_buffers(struct macb *bp)
 			    upper_32_bits(bp->queues[0].tx_ring_dma));
 	}
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
-		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+	for (q = 0, queue = bp->queues; q < bp->max_num_queues; ++q, ++queue) {
+		if (q < bp->num_queues) {
+			queue_writel(queue, RBQP,
+				     lower_32_bits(queue->rx_ring_dma));
+			queue_writel(queue, TBQP,
+				     lower_32_bits(queue->tx_ring_dma));
+		} else {
+			/*
+			 * macb_set_channels(), which is the only way of writing
+			 * to bp->num_queues, is only allowed if
+			 * MACB_CAPS_QUEUE_DISABLE.
+			 */
+			queue_writel(queue, RBQP, MACB_BIT(QUEUE_DISABLE));
+
+			/* Disable all interrupts */
+			queue_writel(queue, IDR, -1);
+			queue_readl(queue, ISR);
+			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+				queue_writel(queue, ISR, -1);
+		}
 	}
 }
 
@@ -4022,8 +4039,8 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		if ((cmd->fs.location >= bp->max_tuples)
-				|| (cmd->fs.ring_cookie >= bp->num_queues)) {
+		if (cmd->fs.location >= bp->max_tuples ||
+		    cmd->fs.ring_cookie >= bp->max_num_queues) {
 			ret = -EINVAL;
 			break;
 		}
@@ -4041,6 +4058,54 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 	return ret;
 }
 
+static void macb_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct macb *bp = netdev_priv(netdev);
+
+	ch->max_combined = bp->max_num_queues;
+	ch->combined_count = bp->num_queues;
+}
+
+static int macb_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct macb *bp = netdev_priv(netdev);
+	unsigned int old_count = bp->num_queues;
+	unsigned int count = ch->combined_count;
+	int ret = 0;
+
+	/*
+	 * MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE/BIT0 in
+	 * the per-queue RBQP register disables queue Rx. If we don't have that
+	 * capability we can have multiple queues but we must always run with
+	 * all enabled.
+	 */
+	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
+		return -EOPNOTSUPP;
+
+	/*
+	 * An ideal .set_channels() implementation uses upfront allocated
+	 * resources and swaps them in, bringing reliability under memory
+	 * pressure. However, here we implement it for memory savings in
+	 * setups with less than max number of queues active.
+	 *
+	 * Signal it by refusing .set_channels() once interface is opened.
+	 */
+	if (netif_running(bp->dev))
+		return -EBUSY;
+
+	if (count == old_count)
+		return 0;
+
+	ret = netif_set_real_num_queues(bp->dev, count, count);
+	if (ret)
+		return ret;
+
+	bp->num_queues = count;
+	return 0;
+}
+
 static const struct ethtool_ops macb_ethtool_ops = {
 	.get_regs_len		= macb_get_regs_len,
 	.get_regs		= macb_get_regs,
@@ -4056,6 +4121,8 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_link_ksettings     = macb_set_link_ksettings,
 	.get_ringparam		= macb_get_ringparam,
 	.set_ringparam		= macb_set_ringparam,
+	.get_channels		= macb_get_channels,
+	.set_channels		= macb_set_channels,
 };
 
 static int macb_get_eee(struct net_device *dev, struct ethtool_keee *eee)
@@ -4090,12 +4157,14 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.set_link_ksettings     = macb_set_link_ksettings,
 	.get_ringparam		= macb_get_ringparam,
 	.set_ringparam		= macb_set_ringparam,
-	.get_rxnfc			= gem_get_rxnfc,
-	.set_rxnfc			= gem_set_rxnfc,
-	.get_rx_ring_count		= gem_get_rx_ring_count,
-	.nway_reset			= phy_ethtool_nway_reset,
+	.get_rxnfc		= gem_get_rxnfc,
+	.set_rxnfc		= gem_set_rxnfc,
+	.get_rx_ring_count	= gem_get_rx_ring_count,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_eee		= macb_get_eee,
 	.set_eee		= macb_set_eee,
+	.get_channels		= macb_get_channels,
+	.set_channels		= macb_set_channels,
 };
 
 static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -4236,9 +4305,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	size_t i;
 	int err;
 
-	if (conf->num_entries > bp->num_queues) {
+	if (conf->num_entries > bp->max_num_queues) {
 		netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
-			   conf->num_entries, bp->num_queues);
+			   conf->num_entries, bp->max_num_queues);
 		return -EINVAL;
 	}
 
@@ -4286,9 +4355,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* gate_mask must not select queues outside the valid queues */
 		queue_id = order_base_2(entry->gate_mask);
-		if (queue_id >= bp->num_queues) {
+		if (queue_id >= bp->max_num_queues) {
 			netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
-				   i, entry->gate_mask, bp->num_queues);
+				   i, entry->gate_mask, bp->max_num_queues);
 			err = -EINVAL;
 			goto cleanup;
 		}
@@ -4348,7 +4417,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	/* All validations passed - proceed with hardware configuration */
 	scoped_guard(spinlock_irqsave, &bp->lock) {
 		/* Disable ENST queues if running before configuring */
-		queue_mask = BIT_U32(bp->num_queues) - 1;
+		queue_mask = BIT_U32(bp->max_num_queues) - 1;
 		gem_writel(bp, ENST_CONTROL,
 			   queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
 
@@ -4383,7 +4452,7 @@ static void macb_taprio_destroy(struct net_device *ndev)
 	unsigned int q;
 
 	netdev_reset_tc(ndev);
-	queue_mask = BIT_U32(bp->num_queues) - 1;
+	queue_mask = BIT_U32(bp->max_num_queues) - 1;
 
 	scoped_guard(spinlock_irqsave, &bp->lock) {
 		/* Single disable command for all queues */
@@ -4391,7 +4460,8 @@ static void macb_taprio_destroy(struct net_device *ndev)
 			   queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET);
 
 		/* Clear all queue ENST registers in batch */
-		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		for (q = 0, queue = bp->queues; q < bp->max_num_queues;
+		     ++q, ++queue) {
 			queue_writel(queue, ENST_START_TIME, 0);
 			queue_writel(queue, ENST_ON_TIME, 0);
 			queue_writel(queue, ENST_OFF_TIME, 0);
@@ -5651,6 +5721,7 @@ static int macb_probe(struct platform_device *pdev)
 		bp->macb_reg_writel = hw_writel;
 	}
 	bp->num_queues = num_queues;
+	bp->max_num_queues = num_queues;
 	bp->dma_burst_length = macb_config->dma_burst_length;
 	bp->pclk = pclk;
 	bp->hclk = hclk;

-- 
2.53.0

Re: [PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Jakub Kicinski 3 weeks, 4 days ago

On Wed, 11 Mar 2026 17:41:53 +0100 Théo Lebrun wrote:
> +	struct macb *bp = netdev_priv(netdev);
> +	unsigned int old_count = bp->num_queues;
> +	unsigned int count = ch->combined_count;
> +	int ret = 0;

unnecessary init

> +	/*
> +	 * MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE/BIT0 in
> +	 * the per-queue RBQP register disables queue Rx. If we don't have that
> +	 * capability we can have multiple queues but we must always run with
> +	 * all enabled.
> +	 */
> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> +		return -EOPNOTSUPP;
> +
> +	/*
> +	 * An ideal .set_channels() implementation uses upfront allocated
> +	 * resources and swaps them in, bringing reliability under memory
> +	 * pressure. However, here we implement it for memory savings in
> +	 * setups with less than max number of queues active.
> +	 *
> +	 * Signal it by refusing .set_channels() once interface is opened.
> +	 */
> +	if (netif_running(bp->dev))
> +		return -EBUSY;
> +
> +	if (count == old_count)
> +		return 0;

should we reorder this with the running() check?

Re: [PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Théo Lebrun 3 weeks, 4 days ago

Hello Jakub,

On Fri Mar 13, 2026 at 2:26 AM CET, Jakub Kicinski wrote:
> On Wed, 11 Mar 2026 17:41:53 +0100 Théo Lebrun wrote:
>> +	struct macb *bp = netdev_priv(netdev);
>> +	unsigned int old_count = bp->num_queues;
>> +	unsigned int count = ch->combined_count;
>> +	int ret = 0;
>
> unnecessary init

ack

>> +	/*
>> +	 * MACB_CAPS_QUEUE_DISABLE means that the field QUEUE_DISABLE/BIT0 in
>> +	 * the per-queue RBQP register disables queue Rx. If we don't have that
>> +	 * capability we can have multiple queues but we must always run with
>> +	 * all enabled.
>> +	 */
>> +	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
>> +		return -EOPNOTSUPP;
>> +
>> +	/*
>> +	 * An ideal .set_channels() implementation uses upfront allocated
>> +	 * resources and swaps them in, bringing reliability under memory
>> +	 * pressure. However, here we implement it for memory savings in
>> +	 * setups with less than max number of queues active.
>> +	 *
>> +	 * Signal it by refusing .set_channels() once interface is opened.
>> +	 */
>> +	if (netif_running(bp->dev))
>> +		return -EBUSY;
>> +
>> +	if (count == old_count)
>> +		return 0;
>
> should we reorder this with the running() check?

I don't agree. For example when an operation is not supported, we start
by checking that and returning EOPNOTSUPP. Then we validate the input
data. Then we act.

Here it is the same. When netif_running(), we never reply to any
request even if it happens to be a no-op.

I'll go ahead and send V3. Seeing how this was only a question I'll make
the guess you don't care much about it and are fine either way.
Same for me.

Thanks!

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

Re: [PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Jakub Kicinski 3 weeks, 3 days ago

On Fri, 13 Mar 2026 16:14:23 +0100 Théo Lebrun wrote:
> > should we reorder this with the running() check?  
> 
> I don't agree. For example when an operation is not supported, we start
> by checking that and returning EOPNOTSUPP. Then we validate the input
> data. Then we act.
> 
> Here it is the same. When netif_running(), we never reply to any
> request even if it happens to be a no-op.
> 
> I'll go ahead and send V3. Seeing how this was only a question I'll make
> the guess you don't care much about it and are fine either way.
> Same for me.

Sorry for the delay. This code can only be reached from the IOCTL path.
The Netlink path will check that params haven't changed in
ethnl_set_channels() (look at the @mod variable) and return 0 directly.
So you're basically adding a discrepancy between ioctl and Netlink.
Not a huge deal but I don't envy any user having to debug this..

Re: [PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Théo Lebrun 3 weeks, 1 day ago

On Sat Mar 14, 2026 at 3:54 PM CET, Jakub Kicinski wrote:
> On Fri, 13 Mar 2026 16:14:23 +0100 Théo Lebrun wrote:
>> > should we reorder this with the running() check?  
>> 
>> I don't agree. For example when an operation is not supported, we start
>> by checking that and returning EOPNOTSUPP. Then we validate the input
>> data. Then we act.
>> 
>> Here it is the same. When netif_running(), we never reply to any
>> request even if it happens to be a no-op.
>> 
>> I'll go ahead and send V3. Seeing how this was only a question I'll make
>> the guess you don't care much about it and are fine either way.
>> Same for me.
>
> Sorry for the delay. This code can only be reached from the IOCTL path.
> The Netlink path will check that params haven't changed in
> ethnl_set_channels() (look at the @mod variable) and return 0 directly.
> So you're basically adding a discrepancy between ioctl and Netlink.
> Not a huge deal but I don't envy any user having to debug this..

Actually, the IOCTL path also does the check (see below). So the
`count == old_count` check shall be dropped from the driver
.set_channels() callback because it is redundant. Agreed?

Extract below for the ioctl codepath:

static int ethtool_set_channels(struct net_device *dev,
				void __user *useraddr)
{
	struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };

	// ...

	if (copy_from_user(&channels, useraddr, sizeof(channels)))
		return -EFAULT;

	dev->ethtool_ops->get_channels(dev, &curr);

	if (channels.rx_count == curr.rx_count &&
	    channels.tx_count == curr.tx_count &&
	    channels.combined_count == curr.combined_count &&
	    channels.other_count == curr.other_count)
		return 0;

	// ...
}

https://elixir.bootlin.com/linux/v6.19.8/source/net/ethtool/ioctl.c#L2263-L2267

Thanks,

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

Re: [PATCH net-next v2 1/2] net: macb: implement ethtool_ops.get|set_channels()

Posted by Jakub Kicinski 3 weeks ago

On Mon, 16 Mar 2026 17:24:28 +0100 Théo Lebrun wrote:
> > Sorry for the delay. This code can only be reached from the IOCTL path.
> > The Netlink path will check that params haven't changed in
> > ethnl_set_channels() (look at the @mod variable) and return 0 directly.
> > So you're basically adding a discrepancy between ioctl and Netlink.
> > Not a huge deal but I don't envy any user having to debug this..  
> 
> Actually, the IOCTL path also does the check (see below). So the
> `count == old_count` check shall be dropped from the driver
> .set_channels() callback because it is redundant. Agreed?

Oh, even better! Some of the ioctl paths don't validate the equivalence
of the config but clearly I don't remember which.