drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-)
Currently napi_disable() gets called during rxq and txq cleanup,
even before napi is enabled and hrtimer is initialized. It causes
kernel panic.
? page_fault_oops+0x136/0x2b0
? page_counter_cancel+0x2e/0x80
? do_user_addr_fault+0x2f2/0x640
? refill_obj_stock+0xc4/0x110
? exc_page_fault+0x71/0x160
? asm_exc_page_fault+0x27/0x30
? __mmdrop+0x10/0x180
? __mmdrop+0xec/0x180
? hrtimer_active+0xd/0x50
hrtimer_try_to_cancel+0x2c/0xf0
hrtimer_cancel+0x15/0x30
napi_disable+0x65/0x90
mana_destroy_rxq+0x4c/0x2f0
mana_create_rxq.isra.0+0x56c/0x6d0
? mana_uncfg_vport+0x50/0x50
mana_alloc_queues+0x21b/0x320
? skb_dequeue+0x5f/0x80
Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------
1 file changed, 28 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 39f56973746d..882b05e087b9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1862,7 +1862,7 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
}
-static void mana_destroy_txq(struct mana_port_context *apc)
+static void mana_cleanup_napi_txq(struct mana_port_context *apc)
{
struct napi_struct *napi;
int i;
@@ -1875,7 +1875,17 @@ static void mana_destroy_txq(struct mana_port_context *apc)
napi_synchronize(napi);
napi_disable(napi);
netif_napi_del(napi);
+ }
+}
+
+static void mana_destroy_txq(struct mana_port_context *apc)
+{
+ int i;
+
+ if (!apc->tx_qp)
+ return;
+ for (i = 0; i < apc->num_queues; i++) {
mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -2007,6 +2017,21 @@ static int mana_create_txq(struct mana_port_context *apc,
return err;
}
+static void mana_cleanup_napi_rxq(struct mana_port_context *apc,
+ struct mana_rxq *rxq, bool validate_state)
+{
+ struct napi_struct *napi;
+
+ if (!rxq)
+ return;
+
+ napi = &rxq->rx_cq.napi;
+ if (validate_state)
+ napi_synchronize(napi);
+ napi_disable(napi);
+ netif_napi_del(napi);
+}
+
static void mana_destroy_rxq(struct mana_port_context *apc,
struct mana_rxq *rxq, bool validate_state)
@@ -2014,24 +2039,14 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
- struct napi_struct *napi;
struct page *page;
int i;
if (!rxq)
return;
- napi = &rxq->rx_cq.napi;
-
- if (validate_state)
- napi_synchronize(napi);
-
- napi_disable(napi);
-
xdp_rxq_info_unreg(&rxq->xdp_rxq);
- netif_napi_del(napi);
-
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
mana_deinit_cq(apc, &rxq->rx_cq);
@@ -2336,11 +2351,11 @@ static void mana_destroy_vport(struct mana_port_context *apc)
rxq = apc->rxqs[rxq_idx];
if (!rxq)
continue;
-
+ mana_cleanup_napi_rxq(apc, rxq, true);
mana_destroy_rxq(apc, rxq, true);
apc->rxqs[rxq_idx] = NULL;
}
-
+ mana_cleanup_napi_txq(apc);
mana_destroy_txq(apc);
mana_uncfg_vport(apc);
--
2.34.1
On Tue, Aug 20, 2024 at 03:22:27AM -0700, Souradeep Chakrabarti wrote:
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
>
> ? page_fault_oops+0x136/0x2b0
> ? page_counter_cancel+0x2e/0x80
> ? do_user_addr_fault+0x2f2/0x640
> ? refill_obj_stock+0xc4/0x110
> ? exc_page_fault+0x71/0x160
> ? asm_exc_page_fault+0x27/0x30
> ? __mmdrop+0x10/0x180
> ? __mmdrop+0xec/0x180
> ? hrtimer_active+0xd/0x50
> hrtimer_try_to_cancel+0x2c/0xf0
> hrtimer_cancel+0x15/0x30
> napi_disable+0x65/0x90
> mana_destroy_rxq+0x4c/0x2f0
> mana_create_rxq.isra.0+0x56c/0x6d0
> ? mana_uncfg_vport+0x50/0x50
> mana_alloc_queues+0x21b/0x320
> ? skb_dequeue+0x5f/0x80
>
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
>
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> ---
> drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------
> 1 file changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..882b05e087b9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1862,7 +1862,7 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
> mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
> }
>
> -static void mana_destroy_txq(struct mana_port_context *apc)
> +static void mana_cleanup_napi_txq(struct mana_port_context *apc)
> {
> struct napi_struct *napi;
> int i;
> @@ -1875,7 +1875,17 @@ static void mana_destroy_txq(struct mana_port_context *apc)
> napi_synchronize(napi);
> napi_disable(napi);
> netif_napi_del(napi);
> + }
> +}
> +
> +static void mana_destroy_txq(struct mana_port_context *apc)
> +{
> + int i;
> +
> + if (!apc->tx_qp)
> + return;
>
> + for (i = 0; i < apc->num_queues; i++) {
> mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
>
> mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -2007,6 +2017,21 @@ static int mana_create_txq(struct mana_port_context *apc,
> return err;
> }
I think the mana_cleanup_napi_txq() call should also be added in the out
path of mana_create_txq(). Consider this, the napi enable for first few
tx queue succeeds but if queue creation for any further SQ fails, we
don't cleanup the napi's for previously successful ones.
>
> +static void mana_cleanup_napi_rxq(struct mana_port_context *apc,
> + struct mana_rxq *rxq, bool validate_state)
> +{
> + struct napi_struct *napi;
> +
> + if (!rxq)
> + return;
> +
> + napi = &rxq->rx_cq.napi;
> + if (validate_state)
> + napi_synchronize(napi);
> + napi_disable(napi);
> + netif_napi_del(napi);
> +}
> +
> static void mana_destroy_rxq(struct mana_port_context *apc,
> struct mana_rxq *rxq, bool validate_state)
>
> @@ -2014,24 +2039,14 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
> struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
> struct mana_recv_buf_oob *rx_oob;
> struct device *dev = gc->dev;
> - struct napi_struct *napi;
> struct page *page;
> int i;
>
> if (!rxq)
> return;
>
> - napi = &rxq->rx_cq.napi;
> -
> - if (validate_state)
> - napi_synchronize(napi);
> -
> - napi_disable(napi);
> -
> xdp_rxq_info_unreg(&rxq->xdp_rxq);
>
> - netif_napi_del(napi);
> -
> mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
>
> mana_deinit_cq(apc, &rxq->rx_cq);
> @@ -2336,11 +2351,11 @@ static void mana_destroy_vport(struct mana_port_context *apc)
> rxq = apc->rxqs[rxq_idx];
> if (!rxq)
> continue;
> -
> + mana_cleanup_napi_rxq(apc, rxq, true);
> mana_destroy_rxq(apc, rxq, true);
> apc->rxqs[rxq_idx] = NULL;
> }
> -
> + mana_cleanup_napi_txq(apc);
> mana_destroy_txq(apc);
> mana_uncfg_vport(apc);
>
> --
> 2.34.1
>
>
© 2016 - 2026 Red Hat, Inc.