[PATCH 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll

Louis-Alexis Eyraud posted 2 patches 8 months ago
There is a newer version of this series
[PATCH 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll
Posted by Louis-Alexis Eyraud 8 months ago
Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock
and spin_unlock in mtk_star_emac driver to avoid spinlock recursion
occurrence that can happen when enabling the DMA interrupts again in
rx/tx poll.

```
BUG: spinlock recursion on CPU#0, swapper/0/0
 lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0,
    .owner_cpu: 0
CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
    6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT
Hardware name: MediaTek MT8365 Open Platform EVK (DT)
Call trace:
 show_stack+0x18/0x24 (C)
 dump_stack_lvl+0x60/0x80
 dump_stack+0x18/0x24
 spin_dump+0x78/0x88
 do_raw_spin_lock+0x11c/0x120
 _raw_spin_lock+0x20/0x2c
 mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac]
 __handle_irq_event_percpu+0x48/0x140
 handle_irq_event+0x4c/0xb0
 handle_fasteoi_irq+0xa0/0x1bc
 handle_irq_desc+0x34/0x58
 generic_handle_domain_irq+0x1c/0x28
 gic_handle_irq+0x4c/0x120
 do_interrupt_handler+0x50/0x84
 el1_interrupt+0x34/0x68
 el1h_64_irq_handler+0x18/0x24
 el1h_64_irq+0x6c/0x70
 regmap_mmio_read32le+0xc/0x20 (P)
 _regmap_bus_reg_read+0x6c/0xac
 _regmap_read+0x60/0xdc
 regmap_read+0x4c/0x80
 mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac]
 __napi_poll+0x38/0x188
 net_rx_action+0x164/0x2c0
 handle_softirqs+0x100/0x244
 __do_softirq+0x14/0x20
 ____do_softirq+0x10/0x20
 call_on_irq_stack+0x24/0x64
 do_softirq_own_stack+0x1c/0x40
 __irq_exit_rcu+0xd4/0x10c
 irq_exit_rcu+0x10/0x1c
 el1_interrupt+0x38/0x68
 el1h_64_irq_handler+0x18/0x24
 el1h_64_irq+0x6c/0x70
 cpuidle_enter_state+0xac/0x320 (P)
 cpuidle_enter+0x38/0x50
 do_idle+0x1e4/0x260
 cpu_startup_entry+0x34/0x3c
 rest_init+0xdc/0xe0
 console_on_rootfs+0x0/0x6c
 __primary_switched+0x88/0x90
```

Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs")
Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 76f202d7f05537642ec294811ace2ad4a7eae383..41d6af31027f4d827dbfdfecdb7de44326bb3de1 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget)
 	struct net_device *ndev = priv->ndev;
 	unsigned int head = ring->head;
 	unsigned int entry = ring->tail;
+	unsigned long flags = 0;
 
 	while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) {
 		ret = mtk_star_tx_complete_one(priv);
@@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget)
 		netif_wake_queue(ndev);
 
 	if (napi_complete(napi)) {
-		spin_lock(&priv->lock);
+		spin_lock_irqsave(&priv->lock, flags);
 		mtk_star_enable_dma_irq(priv, false, true);
-		spin_unlock(&priv->lock);
+		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 	return 0;
@@ -1342,15 +1343,16 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct mtk_star_priv *priv;
 	int work_done = 0;
+	unsigned long flags = 0;
 
 	priv = container_of(napi, struct mtk_star_priv, rx_napi);
 
 	work_done = mtk_star_rx(priv, budget);
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-		spin_lock(&priv->lock);
+		spin_lock_irqsave(&priv->lock, flags);
 		mtk_star_enable_dma_irq(priv, true, false);
-		spin_unlock(&priv->lock);
+		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 	return work_done;

-- 
2.49.0
Re: [PATCH 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll
Posted by Maxime Chevallier 8 months ago
Hi Louis-Alexis :)

On Tue, 22 Apr 2025 15:03:38 +0200
Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com> wrote:

> Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock
> and spin_unlock in mtk_star_emac driver to avoid spinlock recursion
> occurrence that can happen when enabling the DMA interrupts again in
> rx/tx poll.
> 
> ```
> BUG: spinlock recursion on CPU#0, swapper/0/0
>  lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0,
>     .owner_cpu: 0
> CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
>     6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT
> Hardware name: MediaTek MT8365 Open Platform EVK (DT)
> Call trace:
>  show_stack+0x18/0x24 (C)
>  dump_stack_lvl+0x60/0x80
>  dump_stack+0x18/0x24
>  spin_dump+0x78/0x88
>  do_raw_spin_lock+0x11c/0x120
>  _raw_spin_lock+0x20/0x2c
>  mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac]
>  __handle_irq_event_percpu+0x48/0x140
>  handle_irq_event+0x4c/0xb0
>  handle_fasteoi_irq+0xa0/0x1bc
>  handle_irq_desc+0x34/0x58
>  generic_handle_domain_irq+0x1c/0x28
>  gic_handle_irq+0x4c/0x120
>  do_interrupt_handler+0x50/0x84
>  el1_interrupt+0x34/0x68
>  el1h_64_irq_handler+0x18/0x24
>  el1h_64_irq+0x6c/0x70
>  regmap_mmio_read32le+0xc/0x20 (P)
>  _regmap_bus_reg_read+0x6c/0xac
>  _regmap_read+0x60/0xdc
>  regmap_read+0x4c/0x80
>  mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac]
>  __napi_poll+0x38/0x188
>  net_rx_action+0x164/0x2c0
>  handle_softirqs+0x100/0x244
>  __do_softirq+0x14/0x20
>  ____do_softirq+0x10/0x20
>  call_on_irq_stack+0x24/0x64
>  do_softirq_own_stack+0x1c/0x40
>  __irq_exit_rcu+0xd4/0x10c
>  irq_exit_rcu+0x10/0x1c
>  el1_interrupt+0x38/0x68
>  el1h_64_irq_handler+0x18/0x24
>  el1h_64_irq+0x6c/0x70
>  cpuidle_enter_state+0xac/0x320 (P)
>  cpuidle_enter+0x38/0x50
>  do_idle+0x1e4/0x260
>  cpu_startup_entry+0x34/0x3c
>  rest_init+0xdc/0xe0
>  console_on_rootfs+0x0/0x6c
>  __primary_switched+0x88/0x90
> ```
> 
> Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs")
> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>

As this is a fix, you need to indicate in your subject that you're
targetting the "net" tree, something like :

[PATCH net 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll

> ---
>  drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
> index 76f202d7f05537642ec294811ace2ad4a7eae383..41d6af31027f4d827dbfdfecdb7de44326bb3de1 100644
> --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
> +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
> @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget)
>  	struct net_device *ndev = priv->ndev;
>  	unsigned int head = ring->head;
>  	unsigned int entry = ring->tail;
> +	unsigned long flags = 0;

You don't need to init flags to 0

>  
>  	while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) {
>  		ret = mtk_star_tx_complete_one(priv);
> @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget)
>  		netif_wake_queue(ndev);
>  
>  	if (napi_complete(napi)) {
> -		spin_lock(&priv->lock);
> +		spin_lock_irqsave(&priv->lock, flags);
>  		mtk_star_enable_dma_irq(priv, false, true);
> -		spin_unlock(&priv->lock);
> +		spin_unlock_irqrestore(&priv->lock, flags);
>  	}
>  
>  	return 0;
> @@ -1342,15 +1343,16 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget)
>  {
>  	struct mtk_star_priv *priv;
>  	int work_done = 0;
> +	unsigned long flags = 0;

There's a rule in netdev that definitions must be ordered from longest
to shortest lines (reverse xmas tree, or RCT), so you should have in
the end :

struct mtk_star_priv *priv;
unsigned long flags;
int work_done = 0;

>  
>  	priv = container_of(napi, struct mtk_star_priv, rx_napi);
>  
>  	work_done = mtk_star_rx(priv, budget);
>  	if (work_done < budget) {
>  		napi_complete_done(napi, work_done);
> -		spin_lock(&priv->lock);
> +		spin_lock_irqsave(&priv->lock, flags);
>  		mtk_star_enable_dma_irq(priv, true, false);
> -		spin_unlock(&priv->lock);
> +		spin_unlock_irqrestore(&priv->lock, flags);
>  	}
>  
>  	return work_done;
> 

Besides these small comments, the patch looks correct to me :)

Regards,

Maxime
Re: [PATCH 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll
Posted by Louis-Alexis Eyraud 7 months, 4 weeks ago
Hi Maxime,

On Tue, 2025-04-22 at 16:00 +0200, Maxime Chevallier wrote:
> Hi Louis-Alexis :)
> 
> On Tue, 22 Apr 2025 15:03:38 +0200
> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com> wrote:
> 
> > Use spin_lock_irqsave and spin_unlock_irqrestore instead of
> > spin_lock
> > and spin_unlock in mtk_star_emac driver to avoid spinlock recursion
> > occurrence that can happen when enabling the DMA interrupts again
> > in
> > rx/tx poll.
> > 
> > ```
> > BUG: spinlock recursion on CPU#0, swapper/0/0
> >  lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0,
> >     .owner_cpu: 0
> > CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
> >     6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT
> > Hardware name: MediaTek MT8365 Open Platform EVK (DT)
> > Call trace:
> >  show_stack+0x18/0x24 (C)
> >  dump_stack_lvl+0x60/0x80
> >  dump_stack+0x18/0x24
> >  spin_dump+0x78/0x88
> >  do_raw_spin_lock+0x11c/0x120
> >  _raw_spin_lock+0x20/0x2c
> >  mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac]
> >  __handle_irq_event_percpu+0x48/0x140
> >  handle_irq_event+0x4c/0xb0
> >  handle_fasteoi_irq+0xa0/0x1bc
> >  handle_irq_desc+0x34/0x58
> >  generic_handle_domain_irq+0x1c/0x28
> >  gic_handle_irq+0x4c/0x120
> >  do_interrupt_handler+0x50/0x84
> >  el1_interrupt+0x34/0x68
> >  el1h_64_irq_handler+0x18/0x24
> >  el1h_64_irq+0x6c/0x70
> >  regmap_mmio_read32le+0xc/0x20 (P)
> >  _regmap_bus_reg_read+0x6c/0xac
> >  _regmap_read+0x60/0xdc
> >  regmap_read+0x4c/0x80
> >  mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac]
> >  __napi_poll+0x38/0x188
> >  net_rx_action+0x164/0x2c0
> >  handle_softirqs+0x100/0x244
> >  __do_softirq+0x14/0x20
> >  ____do_softirq+0x10/0x20
> >  call_on_irq_stack+0x24/0x64
> >  do_softirq_own_stack+0x1c/0x40
> >  __irq_exit_rcu+0xd4/0x10c
> >  irq_exit_rcu+0x10/0x1c
> >  el1_interrupt+0x38/0x68
> >  el1h_64_irq_handler+0x18/0x24
> >  el1h_64_irq+0x6c/0x70
> >  cpuidle_enter_state+0xac/0x320 (P)
> >  cpuidle_enter+0x38/0x50
> >  do_idle+0x1e4/0x260
> >  cpu_startup_entry+0x34/0x3c
> >  rest_init+0xdc/0xe0
> >  console_on_rootfs+0x0/0x6c
> >  __primary_switched+0x88/0x90
> > ```
> > 
> > Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx
> > handling with two NAPIs")
> > Signed-off-by: Louis-Alexis Eyraud
> > <louisalexis.eyraud@collabora.com>
> 
> As this is a fix, you need to indicate in your subject that you're
> targetting the "net" tree, something like :
> 
> [PATCH net 1/2] net: ethernet: mtk-star-emac: fix spinlock recursion
> issues on rx/tx poll
> 
> > ---
> >  drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++----
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c
> > b/drivers/net/ethernet/mediatek/mtk_star_emac.c
> > index
> > 76f202d7f05537642ec294811ace2ad4a7eae383..41d6af31027f4d827dbfdfecd
> > b7de44326bb3de1 100644
> > --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
> > +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
> > @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct
> > napi_struct *napi, int budget)
> >  	struct net_device *ndev = priv->ndev;
> >  	unsigned int head = ring->head;
> >  	unsigned int entry = ring->tail;
> > +	unsigned long flags = 0;
> 
> You don't need to init flags to 0
> 
> >  
> >  	while (entry != head && count < (MTK_STAR_RING_NUM_DESCS -
> > 1)) {
> >  		ret = mtk_star_tx_complete_one(priv);
> > @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct
> > napi_struct *napi, int budget)
> >  		netif_wake_queue(ndev);
> >  
> >  	if (napi_complete(napi)) {
> > -		spin_lock(&priv->lock);
> > +		spin_lock_irqsave(&priv->lock, flags);
> >  		mtk_star_enable_dma_irq(priv, false, true);
> > -		spin_unlock(&priv->lock);
> > +		spin_unlock_irqrestore(&priv->lock, flags);
> >  	}
> >  
> >  	return 0;
> > @@ -1342,15 +1343,16 @@ static int mtk_star_rx_poll(struct
> > napi_struct *napi, int budget)
> >  {
> >  	struct mtk_star_priv *priv;
> >  	int work_done = 0;
> > +	unsigned long flags = 0;
> 
> There's a rule in netdev that definitions must be ordered from
> longest
> to shortest lines (reverse xmas tree, or RCT), so you should have in
> the end :
> 
> struct mtk_star_priv *priv;
> unsigned long flags;
> int work_done = 0;
> 
> >  
> >  	priv = container_of(napi, struct mtk_star_priv, rx_napi);
> >  
> >  	work_done = mtk_star_rx(priv, budget);
> >  	if (work_done < budget) {
> >  		napi_complete_done(napi, work_done);
> > -		spin_lock(&priv->lock);
> > +		spin_lock_irqsave(&priv->lock, flags);
> >  		mtk_star_enable_dma_irq(priv, true, false);
> > -		spin_unlock(&priv->lock);
> > +		spin_unlock_irqrestore(&priv->lock, flags);
> >  	}
> >  
> >  	return work_done;
> > 
> 
> Besides these small comments, the patch looks correct to me :)
> 
> Regards,
> 
> Maxime

Thanks for the review, Maxime.
I'll add the missing subject prefix and fix the other issues in the v2
patchset.

Regards,
Louis-Alexis