[PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support

Nicolai Buchwitz posted 5 patches 1 month, 1 week ago
[PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support
Posted by Nicolai Buchwitz 1 month, 1 week ago
The GEM MAC has hardware LPI registers (NCR bit 19: TXLPIEN) but no
built-in idle timer, so asserting TXLPIEN blocks all TX immediately
with no automatic wake. A software idle timer is required, as noted
in Microchip documentation (section 40.6.19): "It is best to use
firmware to control LPI."

Implement phylink managed EEE using the mac_enable_tx_lpi and
mac_disable_tx_lpi callbacks:

- macb_tx_lpi_set(): atomically sets or clears TXLPIEN under the
  existing bp->lock spinlock; returns bool indicating whether the
  register actually changed, avoiding redundant writes.

- macb_tx_lpi_work_fn(): delayed_work handler that enters LPI if all
  TX queues are idle and EEE is still active.

- macb_tx_lpi_schedule(): arms the work timer using the LPI timer
  value provided by phylink (default 250 ms). Called from
  macb_tx_complete() after each TX drain so the idle countdown
  restarts whenever the ring goes quiet.

- macb_tx_lpi_wake(): called from macb_start_xmit() before TSTART.
  Clears TXLPIEN and applies a 50 us udelay for PHY wake (IEEE
  802.3az Tw_sys_tx is 16.5 us for 1000BASE-T / 30 us for
  100BASE-TX; GEM has no hardware enforcement). Only delays when
  TXLPIEN was actually set, avoiding overhead on the common path.
  The delay is placed after tx_head is advanced so the work_fn's
  queue-idle check sees a non-empty ring and cannot race back into
  LPI before the frame is transmitted.

- mac_enable_tx_lpi: stores the timer and sets eee_active, then
  defers the first LPI entry by 1 second per IEEE 802.3az section
  22.7a.

- mac_disable_tx_lpi: clears eee_active, cancels the work, and
  deasserts TXLPIEN.

Populate phylink_config lpi_interfaces (MII, GMII, RGMII variants)
and lpi_capabilities (MAC_100FD | MAC_1000FD) so phylink can
negotiate EEE with the PHY and call the callbacks appropriately.
Set lpi_timer_default to 250000 us and eee_enabled_default to true.

Reviewed-by: Théo Lebrun <theo.lebrun@bootlin.com>
Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
---
 drivers/net/ethernet/cadence/macb.h      |   8 ++
 drivers/net/ethernet/cadence/macb_main.c | 112 +++++++++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 19aa98d01c8c..c69828b27dae 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -309,6 +309,8 @@
 #define MACB_IRXFCS_SIZE	1
 
 /* GEM specific NCR bitfields. */
+#define GEM_TXLPIEN_OFFSET		19
+#define GEM_TXLPIEN_SIZE		1
 #define GEM_ENABLE_HS_MAC_OFFSET	31
 #define GEM_ENABLE_HS_MAC_SIZE		1
 
@@ -783,6 +785,7 @@
 #define MACB_CAPS_DMA_PTP			BIT(22)
 #define MACB_CAPS_RSC				BIT(23)
 #define MACB_CAPS_NO_LSO			BIT(24)
+#define MACB_CAPS_EEE				BIT(25)
 
 /* LSO settings */
 #define MACB_LSO_UFO_ENABLE			0x01
@@ -1369,6 +1372,11 @@ struct macb {
 
 	struct work_struct	hresp_err_bh_work;
 
+	/* EEE / LPI state */
+	bool			eee_active;
+	struct delayed_work	tx_lpi_work;
+	u32			tx_lpi_timer;
+
 	int	rx_bd_rd_prefetch;
 	int	tx_bd_rd_prefetch;
 
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 02eab26fd98b..c23485f049d3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -10,6 +10,7 @@
 #include <linux/clk-provider.h>
 #include <linux/clk.h>
 #include <linux/crc32.h>
+#include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/firmware/xlnx-zynqmp.h>
@@ -621,6 +622,94 @@ static const struct phylink_pcs_ops macb_phylink_pcs_ops = {
 	.pcs_config = macb_pcs_config,
 };
 
+static bool macb_tx_lpi_set(struct macb *bp, bool enable)
+{
+	unsigned long flags;
+	u32 old, ncr;
+
+	spin_lock_irqsave(&bp->lock, flags);
+	ncr = macb_readl(bp, NCR);
+	old = ncr;
+	if (enable)
+		ncr |= GEM_BIT(TXLPIEN);
+	else
+		ncr &= ~GEM_BIT(TXLPIEN);
+	if (old != ncr)
+		macb_writel(bp, NCR, ncr);
+	spin_unlock_irqrestore(&bp->lock, flags);
+
+	return old != ncr;
+}
+
+static bool macb_tx_all_queues_idle(struct macb *bp)
+{
+	unsigned int q;
+
+	for (q = 0; q < bp->num_queues; q++) {
+		struct macb_queue *queue = &bp->queues[q];
+
+		if (queue->tx_head != queue->tx_tail)
+			return false;
+	}
+	return true;
+}
+
+static void macb_tx_lpi_work_fn(struct work_struct *work)
+{
+	struct macb *bp = container_of(work, struct macb, tx_lpi_work.work);
+
+	if (bp->eee_active && macb_tx_all_queues_idle(bp))
+		macb_tx_lpi_set(bp, true);
+}
+
+static void macb_tx_lpi_schedule(struct macb *bp)
+{
+	if (bp->eee_active)
+		mod_delayed_work(system_wq, &bp->tx_lpi_work,
+				 usecs_to_jiffies(bp->tx_lpi_timer));
+}
+
+/* Wake from LPI before transmitting. The MAC must deassert TXLPIEN
+ * and wait for the PHY to exit LPI before any frame can be sent.
+ * IEEE 802.3az Tw_sys is ~17us for 1000BASE-T, ~30us for 100BASE-TX;
+ * we use a conservative 50us.
+ */
+static void macb_tx_lpi_wake(struct macb *bp)
+{
+	if (!macb_tx_lpi_set(bp, false))
+		return;
+
+	cancel_delayed_work(&bp->tx_lpi_work);
+	udelay(50);
+}
+
+static void macb_mac_disable_tx_lpi(struct phylink_config *config)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(ndev);
+
+	bp->eee_active = false;
+	cancel_delayed_work_sync(&bp->tx_lpi_work);
+	macb_tx_lpi_set(bp, false);
+}
+
+static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
+				  bool tx_clk_stop)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(ndev);
+
+	bp->tx_lpi_timer = timer;
+	bp->eee_active = true;
+
+	/* Defer initial LPI entry by 1 second after link-up per
+	 * IEEE 802.3az section 22.7a.
+	 */
+	mod_delayed_work(system_wq, &bp->tx_lpi_work, msecs_to_jiffies(1000));
+
+	return 0;
+}
+
 static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 			    const struct phylink_link_state *state)
 {
@@ -769,6 +858,8 @@ static const struct phylink_mac_ops macb_phylink_ops = {
 	.mac_config = macb_mac_config,
 	.mac_link_down = macb_mac_link_down,
 	.mac_link_up = macb_mac_link_up,
+	.mac_disable_tx_lpi = macb_mac_disable_tx_lpi,
+	.mac_enable_tx_lpi = macb_mac_enable_tx_lpi,
 };
 
 static bool macb_phy_handle_exists(struct device_node *dn)
@@ -864,6 +955,18 @@ static int macb_mii_probe(struct net_device *dev)
 		}
 	}
 
+	/* Configure EEE LPI if supported */
+	if (bp->caps & MACB_CAPS_EEE) {
+		__set_bit(PHY_INTERFACE_MODE_MII,
+			  bp->phylink_config.lpi_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_GMII,
+			  bp->phylink_config.lpi_interfaces);
+		phy_interface_set_rgmii(bp->phylink_config.lpi_interfaces);
+		bp->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD;
+		bp->phylink_config.lpi_timer_default = 250000;
+		bp->phylink_config.eee_enabled_default = true;
+	}
+
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
@@ -1260,6 +1363,9 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		netif_wake_subqueue(bp->dev, queue_index);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
+	if (packets)
+		macb_tx_lpi_schedule(bp);
+
 	return packets;
 }
 
@@ -2365,6 +2471,8 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index),
 			     skb->len);
 
+	macb_tx_lpi_wake(bp);
+
 	spin_lock(&bp->lock);
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 	spin_unlock(&bp->lock);
@@ -3026,6 +3134,8 @@ static int macb_close(struct net_device *dev)
 		netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
 	}
 
+	cancel_delayed_work_sync(&bp->tx_lpi_work);
+
 	phylink_stop(bp->phylink);
 	phylink_disconnect_phy(bp->phylink);
 
@@ -5633,6 +5743,7 @@ static int macb_probe(struct platform_device *pdev)
 	}
 
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
+	INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn);
 
 	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
@@ -5676,6 +5787,7 @@ static void macb_remove(struct platform_device *pdev)
 		mdiobus_free(bp->mii_bus);
 
 		device_set_wakeup_enable(&bp->pdev->dev, 0);
+		cancel_delayed_work_sync(&bp->tx_lpi_work);
 		cancel_work_sync(&bp->hresp_err_bh_work);
 		pm_runtime_disable(&pdev->dev);
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
-- 
2.51.0

Re: [PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support
Posted by Jakub Kicinski 1 month ago
On Fri, 27 Feb 2026 16:06:07 +0100 Nicolai Buchwitz wrote:
> +static bool macb_tx_lpi_set(struct macb *bp, bool enable)
> +{
> +	unsigned long flags;
> +	u32 old, ncr;
> +
> +	spin_lock_irqsave(&bp->lock, flags);

we should optimize this function for the past path caller.
xmit path does:

+	macb_tx_lpi_wake(bp);
+
 	spin_lock(&bp->lock);

So it immediately takes that lock again, can we move the lpi_wake()
call under the spin_lock, and make sure other callers also take that
lock? I think you can add a lockdep assert to make sure spin lock is
held

> +	ncr = macb_readl(bp, NCR);
> +	old = ncr;
> +	if (enable)
> +		ncr |= GEM_BIT(TXLPIEN);
> +	else
> +		ncr &= ~GEM_BIT(TXLPIEN);
> +	if (old != ncr)
> +		macb_writel(bp, NCR, ncr);
> +	spin_unlock_irqrestore(&bp->lock, flags);
> +
> +	return old != ncr;
> +}
> +
> +static bool macb_tx_all_queues_idle(struct macb *bp)
> +{
> +	unsigned int q;
> +
> +	for (q = 0; q < bp->num_queues; q++) {
> +		struct macb_queue *queue = &bp->queues[q];
> +
> +		if (queue->tx_head != queue->tx_tail)

Does not not need tx_ptr_lock technically?

> +			return false;
> +	}
> +	return true;
> +}
> +
> +static void macb_tx_lpi_work_fn(struct work_struct *work)
> +{
> +	struct macb *bp = container_of(work, struct macb, tx_lpi_work.work);
> +
> +	if (bp->eee_active && macb_tx_all_queues_idle(bp))
> +		macb_tx_lpi_set(bp, true);
> +}
> +
> +static void macb_tx_lpi_schedule(struct macb *bp)
> +{
> +	if (bp->eee_active)
> +		mod_delayed_work(system_wq, &bp->tx_lpi_work,
> +				 usecs_to_jiffies(bp->tx_lpi_timer));
> +}
> +
> +/* Wake from LPI before transmitting. The MAC must deassert TXLPIEN
> + * and wait for the PHY to exit LPI before any frame can be sent.
> + * IEEE 802.3az Tw_sys is ~17us for 1000BASE-T, ~30us for 100BASE-TX;
> + * we use a conservative 50us.
> + */
> +static void macb_tx_lpi_wake(struct macb *bp)
> +{
> +	if (!macb_tx_lpi_set(bp, false))

Does this lpi_set() not have a relatively high cost, even if eee_active
is disabled? Reading registers is usually pretty slow. Can we add 
a eee_active check here as well to short cut the lpi check? 
If we do we probably want to make sure that the code paths setting
eee_active are also under bp->lock, otherwise this new check will be
racy.
-- 
pw-bot: cr
Re: [PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support
Posted by Théo Lebrun 1 month ago
On Tue Mar 3, 2026 at 3:15 AM CET, Jakub Kicinski wrote:
> On Fri, 27 Feb 2026 16:06:07 +0100 Nicolai Buchwitz wrote:
>> +static bool macb_tx_lpi_set(struct macb *bp, bool enable)
>> +{
>> +	unsigned long flags;
>> +	u32 old, ncr;
>> +
>> +	spin_lock_irqsave(&bp->lock, flags);
>
> we should optimize this function for the past path caller.
> xmit path does:
>
> +	macb_tx_lpi_wake(bp);
> +
>  	spin_lock(&bp->lock);
>
> So it immediately takes that lock again, can we move the lpi_wake()
> call under the spin_lock, and make sure other callers also take that
> lock? I think you can add a lockdep assert to make sure spin lock is
> held
>
>> +	ncr = macb_readl(bp, NCR);
>> +	old = ncr;
>> +	if (enable)
>> +		ncr |= GEM_BIT(TXLPIEN);
>> +	else
>> +		ncr &= ~GEM_BIT(TXLPIEN);
>> +	if (old != ncr)
>> +		macb_writel(bp, NCR, ncr);
>> +	spin_unlock_irqrestore(&bp->lock, flags);
>> +
>> +	return old != ncr;
>> +}
>> +
>> +static bool macb_tx_all_queues_idle(struct macb *bp)
>> +{
>> +	unsigned int q;
>> +
>> +	for (q = 0; q < bp->num_queues; q++) {
>> +		struct macb_queue *queue = &bp->queues[q];
>> +
>> +		if (queue->tx_head != queue->tx_tail)
>
> Does not not need tx_ptr_lock technically?
>
>> +			return false;
>> +	}
>> +	return true;
>> +}
>> +
>> +static void macb_tx_lpi_work_fn(struct work_struct *work)
>> +{
>> +	struct macb *bp = container_of(work, struct macb, tx_lpi_work.work);
>> +
>> +	if (bp->eee_active && macb_tx_all_queues_idle(bp))
>> +		macb_tx_lpi_set(bp, true);
>> +}
>> +
>> +static void macb_tx_lpi_schedule(struct macb *bp)
>> +{
>> +	if (bp->eee_active)
>> +		mod_delayed_work(system_wq, &bp->tx_lpi_work,
>> +				 usecs_to_jiffies(bp->tx_lpi_timer));
>> +}
>> +
>> +/* Wake from LPI before transmitting. The MAC must deassert TXLPIEN
>> + * and wait for the PHY to exit LPI before any frame can be sent.
>> + * IEEE 802.3az Tw_sys is ~17us for 1000BASE-T, ~30us for 100BASE-TX;
>> + * we use a conservative 50us.
>> + */
>> +static void macb_tx_lpi_wake(struct macb *bp)
>> +{
>> +	if (!macb_tx_lpi_set(bp, false))
>
> Does this lpi_set() not have a relatively high cost, even if eee_active
> is disabled? Reading registers is usually pretty slow. Can we add 
> a eee_active check here as well to short cut the lpi check? 
> If we do we probably want to make sure that the code paths setting
> eee_active are also under bp->lock, otherwise this new check will be
> racy.

Funny how this discussion keeps coming up! I made the same remark on V3:
https://lore.kernel.org/netdev/DGOXXGNSSMYK.2XNU9AQ6E077P@bootlin.com/

And it had been discussed in a previous iteration before.

In theory I agree, in practice the optimization was statistically
insignificant on my platform. The total time spent in macb_start_xmit()
is tiny, so any optimization inside of it is even more so.

Thanks,

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Re: [PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support
Posted by Jakub Kicinski 1 month ago
On Tue, 03 Mar 2026 09:14:41 +0100 Théo Lebrun wrote:
> >> +/* Wake from LPI before transmitting. The MAC must deassert TXLPIEN
> >> + * and wait for the PHY to exit LPI before any frame can be sent.
> >> + * IEEE 802.3az Tw_sys is ~17us for 1000BASE-T, ~30us for 100BASE-TX;
> >> + * we use a conservative 50us.
> >> + */
> >> +static void macb_tx_lpi_wake(struct macb *bp)
> >> +{
> >> +	if (!macb_tx_lpi_set(bp, false))  
> >
> > Does this lpi_set() not have a relatively high cost, even if eee_active
> > is disabled? Reading registers is usually pretty slow. Can we add 
> > a eee_active check here as well to short cut the lpi check? 
> > If we do we probably want to make sure that the code paths setting
> > eee_active are also under bp->lock, otherwise this new check will be
> > racy.  
> 
> Funny how this discussion keeps coming up! I made the same remark on V3:
> https://lore.kernel.org/netdev/DGOXXGNSSMYK.2XNU9AQ6E077P@bootlin.com/
> 
> And it had been discussed in a previous iteration before.
> 
> In theory I agree, in practice the optimization was statistically
> insignificant on my platform. The total time spent in macb_start_xmit()
> is tiny, so any optimization inside of it is even more so.

TBH I started looking around because the v5 implementation seems racy.
Probably not in practice but in theory on a multiple queue device you
clear LPI, release the lock, then another queue may schedule LPI again,
if the xmit path is delayed for a long time the LPI work may turn idle
on before xmit rings the doorbell.

The rework I suggested is not only more optimal (dare I say logical to
an experienced developer) but also I think it'd be more correct.

macb has a crazy number of locks so maybe i'm missing something.
But sooner or later someone will hopefully start removing those locks,
cause this driver gotta be dog slow right now :/
Re: [PATCH net-next v5 2/5] net: cadence: macb: implement EEE TX LPI support
Posted by Claudiu Beznea 1 month, 1 week ago

On 2/27/26 17:06, Nicolai Buchwitz wrote:
> The GEM MAC has hardware LPI registers (NCR bit 19: TXLPIEN) but no
> built-in idle timer, so asserting TXLPIEN blocks all TX immediately
> with no automatic wake. A software idle timer is required, as noted
> in Microchip documentation (section 40.6.19): "It is best to use
> firmware to control LPI."
> 
> Implement phylink managed EEE using the mac_enable_tx_lpi and
> mac_disable_tx_lpi callbacks:
> 
> - macb_tx_lpi_set(): atomically sets or clears TXLPIEN under the
>    existing bp->lock spinlock; returns bool indicating whether the
>    register actually changed, avoiding redundant writes.
> 
> - macb_tx_lpi_work_fn(): delayed_work handler that enters LPI if all
>    TX queues are idle and EEE is still active.
> 
> - macb_tx_lpi_schedule(): arms the work timer using the LPI timer
>    value provided by phylink (default 250 ms). Called from
>    macb_tx_complete() after each TX drain so the idle countdown
>    restarts whenever the ring goes quiet.
> 
> - macb_tx_lpi_wake(): called from macb_start_xmit() before TSTART.
>    Clears TXLPIEN and applies a 50 us udelay for PHY wake (IEEE
>    802.3az Tw_sys_tx is 16.5 us for 1000BASE-T / 30 us for
>    100BASE-TX; GEM has no hardware enforcement). Only delays when
>    TXLPIEN was actually set, avoiding overhead on the common path.
>    The delay is placed after tx_head is advanced so the work_fn's
>    queue-idle check sees a non-empty ring and cannot race back into
>    LPI before the frame is transmitted.
> 
> - mac_enable_tx_lpi: stores the timer and sets eee_active, then
>    defers the first LPI entry by 1 second per IEEE 802.3az section
>    22.7a.
> 
> - mac_disable_tx_lpi: clears eee_active, cancels the work, and
>    deasserts TXLPIEN.
> 
> Populate phylink_config lpi_interfaces (MII, GMII, RGMII variants)
> and lpi_capabilities (MAC_100FD | MAC_1000FD) so phylink can
> negotiate EEE with the PHY and call the callbacks appropriately.
> Set lpi_timer_default to 250000 us and eee_enabled_default to true.
> 
> Reviewed-by: Théo Lebrun <theo.lebrun@bootlin.com>
> Signed-off-by: Nicolai Buchwitz <nb@tipi-net.de>
> ---
>   drivers/net/ethernet/cadence/macb.h      |   8 ++
>   drivers/net/ethernet/cadence/macb_main.c | 112 +++++++++++++++++++++++
>   2 files changed, 120 insertions(+)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index 19aa98d01c8c..c69828b27dae 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -309,6 +309,8 @@
>   #define MACB_IRXFCS_SIZE	1
>   
>   /* GEM specific NCR bitfields. */
> +#define GEM_TXLPIEN_OFFSET		19
> +#define GEM_TXLPIEN_SIZE		1
>   #define GEM_ENABLE_HS_MAC_OFFSET	31
>   #define GEM_ENABLE_HS_MAC_SIZE		1
>   
> @@ -783,6 +785,7 @@
>   #define MACB_CAPS_DMA_PTP			BIT(22)
>   #define MACB_CAPS_RSC				BIT(23)
>   #define MACB_CAPS_NO_LSO			BIT(24)
> +#define MACB_CAPS_EEE				BIT(25)
>   
>   /* LSO settings */
>   #define MACB_LSO_UFO_ENABLE			0x01
> @@ -1369,6 +1372,11 @@ struct macb {
>   
>   	struct work_struct	hresp_err_bh_work;
>   
> +	/* EEE / LPI state */
> +	bool			eee_active;
> +	struct delayed_work	tx_lpi_work;
> +	u32			tx_lpi_timer;
> +
>   	int	rx_bd_rd_prefetch;
>   	int	tx_bd_rd_prefetch;
>   
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index 02eab26fd98b..c23485f049d3 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -10,6 +10,7 @@
>   #include <linux/clk-provider.h>
>   #include <linux/clk.h>
>   #include <linux/crc32.h>
> +#include <linux/delay.h>
>   #include <linux/dma-mapping.h>
>   #include <linux/etherdevice.h>
>   #include <linux/firmware/xlnx-zynqmp.h>
> @@ -621,6 +622,94 @@ static const struct phylink_pcs_ops macb_phylink_pcs_ops = {
>   	.pcs_config = macb_pcs_config,
>   };
>   
> +static bool macb_tx_lpi_set(struct macb *bp, bool enable)
> +{
> +	unsigned long flags;
> +	u32 old, ncr;
> +
> +	spin_lock_irqsave(&bp->lock, flags);
> +	ncr = macb_readl(bp, NCR);
> +	old = ncr;
> +	if (enable)
> +		ncr |= GEM_BIT(TXLPIEN);
> +	else
> +		ncr &= ~GEM_BIT(TXLPIEN);
> +	if (old != ncr)
> +		macb_writel(bp, NCR, ncr);
> +	spin_unlock_irqrestore(&bp->lock, flags);
> +
> +	return old != ncr;
> +}
> +
> +static bool macb_tx_all_queues_idle(struct macb *bp)
> +{
> +	unsigned int q;
> +
> +	for (q = 0; q < bp->num_queues; q++) {
> +		struct macb_queue *queue = &bp->queues[q];

In case there will be another version, to have a unified approach across the 
driver, this loop can be written as all of the loops on queues in this driver:

     for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
         // ...
     }

Apart from that:

Reviewed-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>

Thank you,
Claudiu