[PATCH net v4 2/5] net: macb: remove illusion about TBQPH/RBQPH being per-queue

Théo Lebrun posted 5 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH net v4 2/5] net: macb: remove illusion about TBQPH/RBQPH being per-queue
Posted by Théo Lebrun 1 month, 2 weeks ago
The MACB driver acts as if TBQPH/RBQPH are configurable on a per queue
basis; this is a lie. A single register configures the upper 32 bits of
each DMA descriptor buffers for all queues.

Concrete actions:

 - Drop GEM_TBQPH/GEM_RBQPH macros which have a queue index argument.
   Only use MACB_TBQPH/MACB_RBQPH constants.

 - Drop struct macb_queue->TBQPH/RBQPH fields.

 - In macb_init_buffers(): do a single write to TBQPH and RBQPH for all
   queues instead of a write per queue.

 - In macb_tx_error_task(): drop the write to TBQPH.

 - In macb_alloc_consistent(): if allocations give different upper
   32-bits, fail. Previously, it would have lead to silent memory
   corruption as queues would have used the upper 32 bits of the alloc
   from queue 0 and their own low 32 bits.

 - In macb_suspend(): if we use the tie off descriptor for suspend, do
   the write once for all queues instead of once per queue.

Fixes: fff8019a08b6 ("net: macb: Add 64 bit addressing support for GEM")
Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues")
Reviewed-by: Sean Anderson <sean.anderson@linux.dev>
Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  4 ---
 drivers/net/ethernet/cadence/macb_main.c | 57 ++++++++++++++------------------
 2 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index c9a5c8beb2fa8166195d1d83f187d2d0c62668a8..a7e845fee4b3a2e3d14abb49abdbaf3e8e6ea02b 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -213,10 +213,8 @@
 
 #define GEM_ISR(hw_q)		(0x0400 + ((hw_q) << 2))
 #define GEM_TBQP(hw_q)		(0x0440 + ((hw_q) << 2))
-#define GEM_TBQPH(hw_q)		(0x04C8)
 #define GEM_RBQP(hw_q)		(0x0480 + ((hw_q) << 2))
 #define GEM_RBQS(hw_q)		(0x04A0 + ((hw_q) << 2))
-#define GEM_RBQPH(hw_q)		(0x04D4)
 #define GEM_IER(hw_q)		(0x0600 + ((hw_q) << 2))
 #define GEM_IDR(hw_q)		(0x0620 + ((hw_q) << 2))
 #define GEM_IMR(hw_q)		(0x0640 + ((hw_q) << 2))
@@ -1214,10 +1212,8 @@ struct macb_queue {
 	unsigned int		IDR;
 	unsigned int		IMR;
 	unsigned int		TBQP;
-	unsigned int		TBQPH;
 	unsigned int		RBQS;
 	unsigned int		RBQP;
-	unsigned int		RBQPH;
 
 	/* Lock to protect tx_head and tx_tail */
 	spinlock_t		tx_ptr_lock;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ce95fad8cedd7331d4818ba9f73fb6970249e85c..69325665c766927797ca2e1eb1384105bcde3cb5 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -495,19 +495,19 @@ static void macb_init_buffers(struct macb *bp)
 	struct macb_queue *queue;
 	unsigned int q;
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	/* Single register for all queues' high 32 bits. */
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
+		macb_writel(bp, RBQPH,
+			    upper_32_bits(bp->queues[0].rx_ring_dma));
+		macb_writel(bp, TBQPH,
+			    upper_32_bits(bp->queues[0].tx_ring_dma));
+	}
+#endif
+
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
-			queue_writel(queue, RBQPH,
-				     upper_32_bits(queue->rx_ring_dma));
-#endif
 		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
-			queue_writel(queue, TBQPH,
-				     upper_32_bits(queue->tx_ring_dma));
-#endif
 	}
 }
 
@@ -1166,10 +1166,6 @@ static void macb_tx_error_task(struct work_struct *work)
 
 	/* Reinitialize the TX desc queue */
 	queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (bp->hw_dma_cap & HW_DMA_CAP_64B)
-		queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
-#endif
 	/* Make TX ring reflect state of hardware */
 	queue->tx_head = 0;
 	queue->tx_tail = 0;
@@ -2542,6 +2538,7 @@ static int macb_alloc_consistent(struct macb *bp)
 {
 	struct macb_queue *queue;
 	unsigned int q;
+	u32 upper;
 	int size;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -2549,7 +2546,9 @@ static int macb_alloc_consistent(struct macb *bp)
 		queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
 						    &queue->tx_ring_dma,
 						    GFP_KERNEL);
-		if (!queue->tx_ring)
+		upper = upper_32_bits(queue->tx_ring_dma);
+		if (!queue->tx_ring ||
+		    upper != upper_32_bits(bp->queues[0].tx_ring_dma))
 			goto out_err;
 		netdev_dbg(bp->dev,
 			   "Allocated TX ring for queue %u of %d bytes at %08lx (mapped %p)\n",
@@ -2563,8 +2562,11 @@ static int macb_alloc_consistent(struct macb *bp)
 
 		size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch;
 		queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
-						 &queue->rx_ring_dma, GFP_KERNEL);
-		if (!queue->rx_ring)
+						    &queue->rx_ring_dma,
+						    GFP_KERNEL);
+		upper = upper_32_bits(queue->rx_ring_dma);
+		if (!queue->rx_ring ||
+		    upper != upper_32_bits(bp->queues[0].rx_ring_dma))
 			goto out_err;
 		netdev_dbg(bp->dev,
 			   "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
@@ -4305,12 +4307,6 @@ static int macb_init(struct platform_device *pdev)
 			queue->TBQP = GEM_TBQP(hw_q - 1);
 			queue->RBQP = GEM_RBQP(hw_q - 1);
 			queue->RBQS = GEM_RBQS(hw_q - 1);
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
-				queue->TBQPH = GEM_TBQPH(hw_q - 1);
-				queue->RBQPH = GEM_RBQPH(hw_q - 1);
-			}
-#endif
 		} else {
 			/* queue0 uses legacy registers */
 			queue->ISR  = MACB_ISR;
@@ -4319,12 +4315,6 @@ static int macb_init(struct platform_device *pdev)
 			queue->IMR  = MACB_IMR;
 			queue->TBQP = MACB_TBQP;
 			queue->RBQP = MACB_RBQP;
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
-				queue->TBQPH = MACB_TBQPH;
-				queue->RBQPH = MACB_RBQPH;
-			}
-#endif
 		}
 
 		/* get irq: here we use the linux queue index, not the hardware
@@ -5450,6 +5440,11 @@ static int __maybe_unused macb_suspend(struct device *dev)
 		 */
 		tmp = macb_readl(bp, NCR);
 		macb_writel(bp, NCR, tmp & ~(MACB_BIT(TE) | MACB_BIT(RE)));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
+			macb_writel(bp, RBQPH,
+				    upper_32_bits(bp->rx_ring_tieoff_dma));
+#endif
 		for (q = 0, queue = bp->queues; q < bp->num_queues;
 		     ++q, ++queue) {
 			/* Disable RX queues */
@@ -5459,10 +5454,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
 				/* Tie off RX queues */
 				queue_writel(queue, RBQP,
 					     lower_32_bits(bp->rx_ring_tieoff_dma));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-				queue_writel(queue, RBQPH,
-					     upper_32_bits(bp->rx_ring_tieoff_dma));
-#endif
 			}
 			/* Disable all interrupts */
 			queue_writel(queue, IDR, -1);

-- 
2.50.1

Re: [PATCH net v4 2/5] net: macb: remove illusion about TBQPH/RBQPH being per-queue
Posted by Nicolas Ferre 1 month, 1 week ago
On 20/08/2025 at 16:55, Théo Lebrun wrote:
> The MACB driver acts as if TBQPH/RBQPH are configurable on a per queue
> basis; this is a lie. A single register configures the upper 32 bits of
> each DMA descriptor buffers for all queues.
> 
> Concrete actions:
> 
>   - Drop GEM_TBQPH/GEM_RBQPH macros which have a queue index argument.
>     Only use MACB_TBQPH/MACB_RBQPH constants.
> 
>   - Drop struct macb_queue->TBQPH/RBQPH fields.
> 
>   - In macb_init_buffers(): do a single write to TBQPH and RBQPH for all
>     queues instead of a write per queue.
> 
>   - In macb_tx_error_task(): drop the write to TBQPH.
> 
>   - In macb_alloc_consistent(): if allocations give different upper
>     32-bits, fail. Previously, it would have lead to silent memory
>     corruption as queues would have used the upper 32 bits of the alloc
>     from queue 0 and their own low 32 bits.
> 
>   - In macb_suspend(): if we use the tie off descriptor for suspend, do
>     the write once for all queues instead of once per queue.

Indeed, agreed.

> Fixes: fff8019a08b6 ("net: macb: Add 64 bit addressing support for GEM")
> Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues")
> Reviewed-by: Sean Anderson <sean.anderson@linux.dev>
> Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>

Thanks Théo, best regards,
   Nicolas

> ---
>   drivers/net/ethernet/cadence/macb.h      |  4 ---
>   drivers/net/ethernet/cadence/macb_main.c | 57 ++++++++++++++------------------
>   2 files changed, 24 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index c9a5c8beb2fa8166195d1d83f187d2d0c62668a8..a7e845fee4b3a2e3d14abb49abdbaf3e8e6ea02b 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -213,10 +213,8 @@
> 
>   #define GEM_ISR(hw_q)          (0x0400 + ((hw_q) << 2))
>   #define GEM_TBQP(hw_q)         (0x0440 + ((hw_q) << 2))
> -#define GEM_TBQPH(hw_q)                (0x04C8)
>   #define GEM_RBQP(hw_q)         (0x0480 + ((hw_q) << 2))
>   #define GEM_RBQS(hw_q)         (0x04A0 + ((hw_q) << 2))
> -#define GEM_RBQPH(hw_q)                (0x04D4)
>   #define GEM_IER(hw_q)          (0x0600 + ((hw_q) << 2))
>   #define GEM_IDR(hw_q)          (0x0620 + ((hw_q) << 2))
>   #define GEM_IMR(hw_q)          (0x0640 + ((hw_q) << 2))
> @@ -1214,10 +1212,8 @@ struct macb_queue {
>          unsigned int            IDR;
>          unsigned int            IMR;
>          unsigned int            TBQP;
> -       unsigned int            TBQPH;
>          unsigned int            RBQS;
>          unsigned int            RBQP;
> -       unsigned int            RBQPH;
> 
>          /* Lock to protect tx_head and tx_tail */
>          spinlock_t              tx_ptr_lock;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index ce95fad8cedd7331d4818ba9f73fb6970249e85c..69325665c766927797ca2e1eb1384105bcde3cb5 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -495,19 +495,19 @@ static void macb_init_buffers(struct macb *bp)
>          struct macb_queue *queue;
>          unsigned int q;
> 
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +       /* Single register for all queues' high 32 bits. */
> +       if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
> +               macb_writel(bp, RBQPH,
> +                           upper_32_bits(bp->queues[0].rx_ring_dma));
> +               macb_writel(bp, TBQPH,
> +                           upper_32_bits(bp->queues[0].tx_ring_dma));
> +       }
> +#endif
> +
>          for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
>                  queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -               if (bp->hw_dma_cap & HW_DMA_CAP_64B)
> -                       queue_writel(queue, RBQPH,
> -                                    upper_32_bits(queue->rx_ring_dma));
> -#endif
>                  queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -               if (bp->hw_dma_cap & HW_DMA_CAP_64B)
> -                       queue_writel(queue, TBQPH,
> -                                    upper_32_bits(queue->tx_ring_dma));
> -#endif
>          }
>   }
> 
> @@ -1166,10 +1166,6 @@ static void macb_tx_error_task(struct work_struct *work)
> 
>          /* Reinitialize the TX desc queue */
>          queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
> -               queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
> -#endif
>          /* Make TX ring reflect state of hardware */
>          queue->tx_head = 0;
>          queue->tx_tail = 0;
> @@ -2542,6 +2538,7 @@ static int macb_alloc_consistent(struct macb *bp)
>   {
>          struct macb_queue *queue;
>          unsigned int q;
> +       u32 upper;
>          int size;
> 
>          for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
> @@ -2549,7 +2546,9 @@ static int macb_alloc_consistent(struct macb *bp)
>                  queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
>                                                      &queue->tx_ring_dma,
>                                                      GFP_KERNEL);
> -               if (!queue->tx_ring)
> +               upper = upper_32_bits(queue->tx_ring_dma);
> +               if (!queue->tx_ring ||
> +                   upper != upper_32_bits(bp->queues[0].tx_ring_dma))
>                          goto out_err;
>                  netdev_dbg(bp->dev,
>                             "Allocated TX ring for queue %u of %d bytes at %08lx (mapped %p)\n",
> @@ -2563,8 +2562,11 @@ static int macb_alloc_consistent(struct macb *bp)
> 
>                  size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch;
>                  queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
> -                                                &queue->rx_ring_dma, GFP_KERNEL);
> -               if (!queue->rx_ring)
> +                                                   &queue->rx_ring_dma,
> +                                                   GFP_KERNEL);
> +               upper = upper_32_bits(queue->rx_ring_dma);
> +               if (!queue->rx_ring ||
> +                   upper != upper_32_bits(bp->queues[0].rx_ring_dma))
>                          goto out_err;
>                  netdev_dbg(bp->dev,
>                             "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
> @@ -4305,12 +4307,6 @@ static int macb_init(struct platform_device *pdev)
>                          queue->TBQP = GEM_TBQP(hw_q - 1);
>                          queue->RBQP = GEM_RBQP(hw_q - 1);
>                          queue->RBQS = GEM_RBQS(hw_q - 1);
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -                       if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
> -                               queue->TBQPH = GEM_TBQPH(hw_q - 1);
> -                               queue->RBQPH = GEM_RBQPH(hw_q - 1);
> -                       }
> -#endif
>                  } else {
>                          /* queue0 uses legacy registers */
>                          queue->ISR  = MACB_ISR;
> @@ -4319,12 +4315,6 @@ static int macb_init(struct platform_device *pdev)
>                          queue->IMR  = MACB_IMR;
>                          queue->TBQP = MACB_TBQP;
>                          queue->RBQP = MACB_RBQP;
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -                       if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
> -                               queue->TBQPH = MACB_TBQPH;
> -                               queue->RBQPH = MACB_RBQPH;
> -                       }
> -#endif
>                  }
> 
>                  /* get irq: here we use the linux queue index, not the hardware
> @@ -5450,6 +5440,11 @@ static int __maybe_unused macb_suspend(struct device *dev)
>                   */
>                  tmp = macb_readl(bp, NCR);
>                  macb_writel(bp, NCR, tmp & ~(MACB_BIT(TE) | MACB_BIT(RE)));
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +               if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> +                       macb_writel(bp, RBQPH,
> +                                   upper_32_bits(bp->rx_ring_tieoff_dma));
> +#endif
>                  for (q = 0, queue = bp->queues; q < bp->num_queues;
>                       ++q, ++queue) {
>                          /* Disable RX queues */
> @@ -5459,10 +5454,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
>                                  /* Tie off RX queues */
>                                  queue_writel(queue, RBQP,
>                                               lower_32_bits(bp->rx_ring_tieoff_dma));
> -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> -                               queue_writel(queue, RBQPH,
> -                                            upper_32_bits(bp->rx_ring_tieoff_dma));
> -#endif
>                          }
>                          /* Disable all interrupts */
>                          queue_writel(queue, IDR, -1);
> 
> --
> 2.50.1
>