[PATCH net-next 1/7] bng_en: add per-PF workqueue, timer, and slow-path task

Bhargava Marreddy posted 7 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH net-next 1/7] bng_en: add per-PF workqueue, timer, and slow-path task
Posted by Bhargava Marreddy 1 month, 1 week ago
Add a dedicated single-thread workqueue and a timer for each PF
to drive deferred slow-path work such as link event handling and
stats collection. An interrupt semaphore guards timer re-entry.
The open and close paths now start and drain these resources.

Signed-off-by: Bhargava Marreddy <bhargava.marreddy@broadcom.com>
Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
---
 .../net/ethernet/broadcom/bnge/bnge_netdev.c  | 88 ++++++++++++++++++-
 .../net/ethernet/broadcom/bnge/bnge_netdev.h  | 19 +++-
 2 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
index b8e258842c6..a1bb1f01246 100644
--- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
+++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
@@ -101,6 +101,44 @@ static int bnge_alloc_ring_stats(struct bnge_net *bn)
 	return rc;
 }
 
+static void bnge_timer(struct timer_list *t)
+{
+	struct bnge_net *bn = timer_container_of(bn, t, timer);
+	struct bnge_dev *bd = bn->bd;
+
+	if (!netif_running(bn->netdev) ||
+	    !test_bit(BNGE_STATE_OPEN, &bd->state))
+		return;
+
+	if (atomic_read(&bn->intr_sem) != 0)
+		goto bnge_restart_timer;
+
+	/* Periodic work added by later patches */
+
+bnge_restart_timer:
+	mod_timer(&bn->timer, jiffies + bn->current_interval);
+}
+
+static void bnge_sp_task(struct work_struct *work)
+{
+	struct bnge_net *bn = container_of(work, struct bnge_net, sp_task);
+	struct bnge_dev *bd = bn->bd;
+
+	set_bit(BNGE_STATE_IN_SP_TASK, &bn->state);
+	/* Ensure sp_task state is visible before checking BNGE_STATE_OPEN */
+	smp_mb__after_atomic();
+	if (!test_bit(BNGE_STATE_OPEN, &bd->state)) {
+		clear_bit(BNGE_STATE_IN_SP_TASK, &bn->state);
+		return;
+	}
+
+	/* Event handling work added by later patches */
+
+	/* Ensure all sp_task work is done before clearing the state */
+	smp_mb__before_atomic();
+	clear_bit(BNGE_STATE_IN_SP_TASK, &bn->state);
+}
+
 static void bnge_free_nq_desc_arr(struct bnge_nq_ring_info *nqr)
 {
 	struct bnge_ring_struct *ring = &nqr->ring_struct;
@@ -1960,6 +1998,7 @@ static void bnge_disable_int_sync(struct bnge_net *bn)
 	struct bnge_dev *bd = bn->bd;
 	int i;
 
+	atomic_inc(&bn->intr_sem);
 	bnge_disable_int(bn);
 	for (i = 0; i < bd->nq_nr_rings; i++) {
 		int map_idx = bnge_cp_num_to_irq_num(bn, i);
@@ -1973,6 +2012,7 @@ static void bnge_enable_int(struct bnge_net *bn)
 	struct bnge_dev *bd = bn->bd;
 	int i;
 
+	atomic_set(&bn->intr_sem, 0);
 	for (i = 0; i < bd->nq_nr_rings; i++) {
 		struct bnge_napi *bnapi = bn->bnapi[i];
 		struct bnge_nq_ring_info *nqr;
@@ -2517,6 +2557,9 @@ static int bnge_open_core(struct bnge_net *bn)
 	bnge_enable_int(bn);
 
 	bnge_tx_enable(bn);
+
+	mod_timer(&bn->timer, jiffies + bn->current_interval);
+
 	return 0;
 
 err_free_irq:
@@ -2552,8 +2595,14 @@ static void bnge_close_core(struct bnge_net *bn)
 	bnge_tx_disable(bn);
 
 	clear_bit(BNGE_STATE_OPEN, &bd->state);
+	/* Ensure BNGE_STATE_OPEN is cleared before checking drv_busy */
+	smp_mb__after_atomic();
+	while (bnge_drv_busy(bn))
+		msleep(20);
+
 	bnge_shutdown_nic(bn);
 	bnge_disable_napi(bn);
+	timer_delete_sync(&bn->timer);
 	bnge_free_all_rings_bufs(bn);
 	bnge_free_irq(bn);
 	bnge_del_napi(bn);
@@ -2700,6 +2749,23 @@ static void bnge_init_ring_params(struct bnge_net *bn)
 	bn->netdev->cfg->hds_thresh = max(BNGE_DEFAULT_RX_COPYBREAK, rx_size);
 }
 
+static struct workqueue_struct *
+bnge_create_workqueue_thread(struct bnge_dev *bd, const char *thread_name)
+{
+	struct workqueue_struct *wq;
+	char *wq_name;
+
+	wq_name = kasprintf(GFP_KERNEL, "%s-%s", thread_name,
+			    dev_name(bd->dev));
+	if (!wq_name)
+		return NULL;
+
+	wq = create_singlethread_workqueue(wq_name);
+	kfree(wq_name);
+
+	return wq;
+}
+
 int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs)
 {
 	struct net_device *netdev;
@@ -2784,6 +2850,17 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs)
 	if (bd->tso_max_segs)
 		netif_set_tso_max_segs(netdev, bd->tso_max_segs);
 
+	INIT_WORK(&bn->sp_task, bnge_sp_task);
+	timer_setup(&bn->timer, bnge_timer, 0);
+	bn->current_interval = BNGE_TIMER_INTERVAL;
+
+	bn->bnge_pf_wq = bnge_create_workqueue_thread(bd, "bnge_pf_wq");
+	if (!bn->bnge_pf_wq) {
+		netdev_err(netdev, "Unable to create workqueue.\n");
+		rc = -ENOMEM;
+		goto err_netdev;
+	}
+
 	bn->rx_ring_size = BNGE_DEFAULT_RX_RING_SIZE;
 	bn->tx_ring_size = BNGE_DEFAULT_TX_RING_SIZE;
 	bn->rx_dir = DMA_FROM_DEVICE;
@@ -2799,11 +2876,13 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs)
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(bd->dev, "Register netdev failed rc: %d\n", rc);
-		goto err_netdev;
+		goto err_free_workq;
 	}
 
 	return 0;
 
+err_free_workq:
+	destroy_workqueue(bn->bnge_pf_wq);
 err_netdev:
 	free_netdev(netdev);
 	return rc;
@@ -2812,8 +2891,15 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs)
 void bnge_netdev_free(struct bnge_dev *bd)
 {
 	struct net_device *netdev = bd->netdev;
+	struct bnge_net *bn;
+
+	bn = netdev_priv(netdev);
 
 	unregister_netdev(netdev);
+
+	cancel_work_sync(&bn->sp_task);
+	destroy_workqueue(bn->bnge_pf_wq);
+
 	free_netdev(netdev);
 	bd->netdev = NULL;
 }
diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h
index 70f1a7c2481..f7446b7c4ba 100644
--- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h
+++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h
@@ -224,6 +224,13 @@ struct bnge_tpa_info {
 #define BNGE_NQ_HDL_TYPE(hdl)	(((hdl) & BNGE_NQ_HDL_TYPE_MASK) >>	\
 				 BNGE_NQ_HDL_TYPE_SHIFT)
 
+enum bnge_net_state {
+	BNGE_STATE_NAPI_DISABLED,
+	BNGE_STATE_IN_SP_TASK,
+};
+
+#define BNGE_TIMER_INTERVAL	HZ
+
 struct bnge_net {
 	struct bnge_dev		*bd;
 	struct net_device	*netdev;
@@ -281,13 +288,18 @@ struct bnge_net {
 	u32			stats_coal_ticks;
 
 	unsigned long		state;
-#define BNGE_STATE_NAPI_DISABLED	0
 
 	u32			msg_enable;
 	u16			max_tpa;
 	__be16			vxlan_port;
 	__be16			nge_port;
 	__be16			vxlan_gpe_port;
+
+	atomic_t		intr_sem;
+	unsigned int		current_interval;
+	struct timer_list	timer;
+	struct workqueue_struct *bnge_pf_wq;
+	struct work_struct	sp_task;
 };
 
 #define BNGE_DEFAULT_RX_RING_SIZE	511
@@ -556,6 +568,11 @@ struct bnge_l2_filter {
 	refcount_t		refcnt;
 };
 
+static inline bool bnge_drv_busy(struct bnge_net *bn)
+{
+	return test_bit(BNGE_STATE_IN_SP_TASK, &bn->state);
+}
+
 u16 bnge_cp_ring_for_rx(struct bnge_rx_ring_info *rxr);
 u16 bnge_cp_ring_for_tx(struct bnge_tx_ring_info *txr);
 void bnge_fill_hw_rss_tbl(struct bnge_net *bn, struct bnge_vnic_info *vnic);
-- 
2.47.3
Re: [PATCH net-next 1/7] bng_en: add per-PF workqueue, timer, and slow-path task
Posted by Jakub Kicinski 1 month, 1 week ago
On Mon, 23 Feb 2026 22:06:35 +0530 Bhargava Marreddy wrote:
> +	if (atomic_read(&bn->intr_sem) != 0)
> +		goto bnge_restart_timer;
> +
> +	/* Periodic work added by later patches */
> +
> +bnge_restart_timer:
> +	mod_timer(&bn->timer, jiffies + bn->current_interval);

This intr_sem thing looks highly questionable.

It provides no synchronization guarantee since the state of intr_sem 
may change immediately after the read. Please use normal locking
primitives and/or cancel the timer _sync() if you want to make sure
it's not running.
Re: [PATCH net-next 1/7] bng_en: add per-PF workqueue, timer, and slow-path task
Posted by Bhargava Chenna Marreddy 1 month, 1 week ago
On Wed, Feb 25, 2026 at 8:02 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Mon, 23 Feb 2026 22:06:35 +0530 Bhargava Marreddy wrote:
> > +     if (atomic_read(&bn->intr_sem) != 0)
> > +             goto bnge_restart_timer;
> > +
> > +     /* Periodic work added by later patches */
> > +
> > +bnge_restart_timer:
> > +     mod_timer(&bn->timer, jiffies + bn->current_interval);
>
> This intr_sem thing looks highly questionable.
>
> It provides no synchronization guarantee since the state of intr_sem
> may change immediately after the read. Please use normal locking
> primitives and/or cancel the timer _sync() if you want to make sure
> it's not running.

Hi Jakub,

Agreed. The intr_sem will be removed in v2. The timer already checks
BNGE_STATE_OPEN, which is cleared before del_timer_sync() in the
close path. This ensures the timer either exits early upon seeing the
cleared bit or is synchronized by the cancel call.

Thanks,
Bhargava Marreddy