[net v3] net: wwan: t7xx: Fix napi rx poll issue

Jinjian Song posted 1 patch 6 months, 3 weeks ago
drivers/net/wwan/t7xx/t7xx_netdev.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
[net v3] net: wwan: t7xx: Fix napi rx poll issue
Posted by Jinjian Song 6 months, 3 weeks ago
When driver handles the napi rx polling requests, the netdev might
have been released by the dellink logic triggered by the disconnect
operation on user plane. However, in the logic of processing skb in
polling, an invalid netdev is still being used, which causes a panic.

BUG: kernel NULL pointer dereference, address: 00000000000000f1
Oops: 0000 [#1] PREEMPT SMP NOPTI
RIP: 0010:dev_gro_receive+0x3a/0x620
[...]
Call Trace:
 <IRQ>
 ? __die_body+0x68/0xb0
 ? page_fault_oops+0x379/0x3e0
 ? exc_page_fault+0x4f/0xa0
 ? asm_exc_page_fault+0x22/0x30
 ? __pfx_t7xx_ccmni_recv_skb+0x10/0x10 [mtk_t7xx (HASH:1400 7)]
 ? dev_gro_receive+0x3a/0x620
 napi_gro_receive+0xad/0x170
 t7xx_ccmni_recv_skb+0x48/0x70 [mtk_t7xx (HASH:1400 7)]
 t7xx_dpmaif_napi_rx_poll+0x590/0x800 [mtk_t7xx (HASH:1400 7)]
 net_rx_action+0x103/0x470
 irq_exit_rcu+0x13a/0x310
 sysvec_apic_timer_interrupt+0x56/0x90
 </IRQ>

Fixes: 5545b7b9f294 ("net: wwan: t7xx: Add NAPI support")
Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>
---
v3:
 * Only Use READ_ONCE/WRITE_ONCE when the lock protecting ctlb->ccmni_inst
   is not held.
---
 drivers/net/wwan/t7xx/t7xx_netdev.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c
index 91fa082e9cab..fc0a7cb181df 100644
--- a/drivers/net/wwan/t7xx/t7xx_netdev.c
+++ b/drivers/net/wwan/t7xx/t7xx_netdev.c
@@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id
 	ccmni->ctlb = ctlb;
 	ccmni->dev = dev;
 	atomic_set(&ccmni->usage, 0);
-	ctlb->ccmni_inst[if_id] = ccmni;
+	WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni);
 
 	ret = register_netdevice(dev);
 	if (ret)
@@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l
 	if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))
 		return;
 
+	WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL);
 	unregister_netdevice(dev);
 }
 
@@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
 
 	skb_cb = T7XX_SKB_CB(skb);
 	netif_id = skb_cb->netif_idx;
-	ccmni = ccmni_ctlb->ccmni_inst[netif_id];
+	ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]);
 	if (!ccmni) {
 		dev_kfree_skb(skb);
 		return;
@@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
 
 static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
 {
-	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
 	struct netdev_queue *net_queue;
 
 	if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) {
@@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno
 
 static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
 {
-	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
+	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
 	struct netdev_queue *net_queue;
 
 	if (atomic_read(&ccmni->usage) > 0) {
@@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev,
 	if (ctlb->md_sta != MD_STATE_READY)
 		return;
 
-	if (!ctlb->ccmni_inst[0]) {
+	if (!READ_ONCE(ctlb->ccmni_inst[0])) {
 		dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
 		return;
 	}
-- 
2.34.1
Re: [net v3] net: wwan: t7xx: Fix napi rx poll issue
Posted by Larysa Zaremba 6 months, 2 weeks ago
On Fri, May 30, 2025 at 11:16:48AM +0800, Jinjian Song wrote:
> When driver handles the napi rx polling requests, the netdev might
> have been released by the dellink logic triggered by the disconnect
> operation on user plane. However, in the logic of processing skb in
> polling, an invalid netdev is still being used, which causes a panic.
> 
> BUG: kernel NULL pointer dereference, address: 00000000000000f1
> Oops: 0000 [#1] PREEMPT SMP NOPTI
> RIP: 0010:dev_gro_receive+0x3a/0x620
> [...]
> Call Trace:
>  <IRQ>
>  ? __die_body+0x68/0xb0
>  ? page_fault_oops+0x379/0x3e0
>  ? exc_page_fault+0x4f/0xa0
>  ? asm_exc_page_fault+0x22/0x30
>  ? __pfx_t7xx_ccmni_recv_skb+0x10/0x10 [mtk_t7xx (HASH:1400 7)]
>  ? dev_gro_receive+0x3a/0x620
>  napi_gro_receive+0xad/0x170
>  t7xx_ccmni_recv_skb+0x48/0x70 [mtk_t7xx (HASH:1400 7)]
>  t7xx_dpmaif_napi_rx_poll+0x590/0x800 [mtk_t7xx (HASH:1400 7)]
>  net_rx_action+0x103/0x470
>  irq_exit_rcu+0x13a/0x310
>  sysvec_apic_timer_interrupt+0x56/0x90
>  </IRQ>
> 
> Fixes: 5545b7b9f294 ("net: wwan: t7xx: Add NAPI support")
> Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>
> ---
> v3:
>  * Only Use READ_ONCE/WRITE_ONCE when the lock protecting ctlb->ccmni_inst
>    is not held.

What do you mean by "lock protecting ctlb->ccmni_inst"? Please specify.

> ---
>  drivers/net/wwan/t7xx/t7xx_netdev.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c
> index 91fa082e9cab..fc0a7cb181df 100644
> --- a/drivers/net/wwan/t7xx/t7xx_netdev.c
> +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c
> @@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id
>  	ccmni->ctlb = ctlb;
>  	ccmni->dev = dev;
>  	atomic_set(&ccmni->usage, 0);
> -	ctlb->ccmni_inst[if_id] = ccmni;
> +	WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni);
>  
>  	ret = register_netdevice(dev);
>  	if (ret)
> @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l
>  	if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))
>  		return;
>  
> +	WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL);
>  	unregister_netdevice(dev);
>  }
>  
> @@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
>  
>  	skb_cb = T7XX_SKB_CB(skb);
>  	netif_id = skb_cb->netif_idx;
> -	ccmni = ccmni_ctlb->ccmni_inst[netif_id];
> +	ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]);
>  	if (!ccmni) {
>  		dev_kfree_skb(skb);
>  		return;
> @@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
>  
>  static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
>  {
> -	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
> +	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
>  	struct netdev_queue *net_queue;
> 

You do not seem to check if ccmni is NULL here, so given ctlb->ccmni_inst[0] is 
not being hot-swapped, I guess that there are some guarantees of it not being 
NULL at this moment, so I would drop READ_ONCE here.

>  	if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) {
> @@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno
>  
>  static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
>  {
> -	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
> +	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
>  	struct netdev_queue *net_queue;
>

Same as above, either READ_ONCE is not needed or NULL check is required.

>  	if (atomic_read(&ccmni->usage) > 0) {
> @@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev,
>  	if (ctlb->md_sta != MD_STATE_READY)
>  		return;
>  
> -	if (!ctlb->ccmni_inst[0]) {
> +	if (!READ_ONCE(ctlb->ccmni_inst[0])) {
>  		dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
>  		return;
>  	}
> -- 
> 2.34.1
> 
>
Re: [net v3] net: wwan: t7xx: Fix napi rx poll issue
Posted by Larysa Zaremba 6 months, 2 weeks ago
On Wed, Jun 04, 2025 at 06:19:53PM +0800, Jinjian Song wrote:
> From: Larysa Zaremba <larysa.zaremba@intel.com>
> 
> >> Fixes: 5545b7b9f294 ("net: wwan: t7xx: Add NAPI support")
> >> Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>
> >> ---
> >> v3:
> >>  * Only Use READ_ONCE/WRITE_ONCE when the lock protecting ctlb->ccmni_inst
> >>    is not held.
> >
> >What do you mean by "lock protecting ctlb->ccmni_inst"? Please specify.
> 
> Hi Larysa,
> 
> This description might have been a bit simplified. This process is as follow:
> 
> In patch v1, I directly set ctlb->ccmni_inst. This may be not safe, as the NAPI
> processing and the driver's internal interface might not be synchronized. Therefoe,
> following Jakub's suggestion, I add READ_ONCE/WRITE_ONCE in all places where this
> pointer is accessed.
> 
> In patch v2, Paolo suggested using READ_ONCE in places that are not protected by locks.
> Some interfaces are protected by synchronization mechanisms, so it's unnecesssary to add them there.
> Therefore, I removed READ_ONCE from the interfaces.
>

I have seen the discussion for previous version, I am asking you for the symbol 
name/names for the locks that make READ_ONCE in the removed places not needed.

> >> @@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu
> >>  
> >>  static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
> >>  {
> >> -	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
> >> +	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
> >>  	struct netdev_queue *net_queue;
> >> 
> >
> >You do not seem to check if ccmni is NULL here, so given ctlb->ccmni_inst[0] is 
> >not being hot-swapped, I guess that there are some guarantees of it not being 
> >NULL at this moment, so I would drop READ_ONCE here.
> 
> This ctlb->ccmni_inst[0] is checked in the upper-level interface:
> static void t7xx_ccmni_queue_state_notify([...]) {
> 	[...]
> 	if (!READ_ONCE(ctlb->ccmni_inst[0])) {
> 		return;
> 	}
> 
> 	if (state == DMPAIF_TXQ_STATE_IRQ)
> 		t7xx_ccmni_queue_tx_irq_notify(ctlb, qno);
> 	else if (state == DMPAIF_TXQ_STATE_FULL)
> 		t7xx_ccmni_queue_tx_full_notify(ctlb, qno);
> }
> 
> Since this is part of the driver's internal logic for handing queue events, would it be
> safer to add READ_ONCE here as well?
>

Well, I am not 100% sure.  What would make the code easier to reason about in 
terms of READ_ONCE/WRITE_ONCE is if you replaced struct t7xx_ccmni_ctrl *ctlb 
argument in t7xx_ccmni_queue_tx_irq_notify() and 
t7xx_ccmni_queue_tx_full_notify() with ctlb->ccmni_inst[0], the code would look 
like this:

	struct t7xx_ccmni *ccmni = 
		READ_ONCE(t7xx_dev->ccmni_ctlb->ccmni_inst[0]);

	if (!ccmni) {
		dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
		return;
	}

	if (state == DMPAIF_TXQ_STATE_IRQ)
		t7xx_ccmni_queue_tx_irq_notify(ccmni, qno);
	else if (state == DMPAIF_TXQ_STATE_FULL)
		t7xx_ccmni_queue_tx_full_notify(ccmni, qno);

This way atomic reads in notifiers would be dependent on a single READ_ONCE, 
which should prevent nasty reordering, as far as I am concerned.

The above holds if you think you do not need to check for NULL in the notifiers, 
but is such case I would rather consider proper locking or RCU.

> >> @@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno
> >>  
> >>  static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno)
> >>  {
> >> -	struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0];
> >> +	struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]);
> >>  	struct netdev_queue *net_queue;
> >>
> >
> >Same as above, either READ_ONCE is not needed or NULL check is required.
> 
> Yes, This function in the same upper-level interface.
> 
> >  	if (atomic_read(&ccmni->usage) > 0) {
> > @@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev,
> >  	if (ctlb->md_sta != MD_STATE_READY)
> >  		return;
> >  
> > -	if (!ctlb->ccmni_inst[0]) {
> > +	if (!READ_ONCE(ctlb->ccmni_inst[0])) {
> >  		dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n");
> >  		return;
> >  	}
> > -- 
> > 2.34.1
> > 
> > 
> 
> Thanks.
> 
> Jinjian,
> Best Regards.