From: Bhargava Marreddy <bhargava.marreddy@broadcom.com>
Implement netdev_stat_ops to provide standardized per-queue
statistics via the Netlink API.
Below is the description of the hardware drop counters:
rx-hw-drop-overruns: Packets dropped by HW due to resource limitations
(e.g., no BDs available in the host ring).
rx-hw-drops: Total packets dropped by HW (sum of overruns and error
drops).
tx-hw-drop-errors: Packets dropped by HW because they were invalid or
malformed.
tx-hw-drops: Total packets dropped by HW (sum of resource limitations
and error drops).
The implementation was verified using the ynl tool:
./tools/net/ynl/pyynl/cli.py --spec \
Documentation/netlink/specs/netdev.yaml --dump qstats-get --json \
'{"ifindex":14, "scope":"queue"}'
[{'ifindex': 14, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 758,
'rx-hw-drop-overruns': 0, 'rx-hw-drops': 0, 'rx-packets': 11},
{'ifindex': 14, 'queue-id': 1, 'queue-type': 'rx', 'rx-bytes': 0,
'rx-hw-drop-overruns': 0, 'rx-hw-drops': 0, 'rx-packets': 0},
{'ifindex': 14, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 0,
'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 0},
{'ifindex': 14, 'queue-id': 1, 'queue-type': 'tx', 'tx-bytes': 0,
'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 0},
{'ifindex': 14, 'queue-id': 2, 'queue-type': 'tx', 'tx-bytes': 810,
'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 10},]
Signed-off-by: Bhargava Marreddy <bhargava.marreddy@broadcom.com>
Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
---
.../net/ethernet/broadcom/bnge/bnge_netdev.c | 76 +++++++++++++++++++
1 file changed, 76 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
index 2513b0907693..fa732b99358e 100644
--- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
+++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
@@ -3031,6 +3031,81 @@ static int bnge_close(struct net_device *dev)
return 0;
}
+static void bnge_get_queue_stats_rx(struct net_device *dev, int i,
+ struct netdev_queue_stats_rx *stats)
+{
+ struct bnge_net *bn = netdev_priv(dev);
+ struct bnge_nq_ring_info *nqr;
+ u64 *sw;
+
+ if (!bn->bnapi)
+ return;
+
+ nqr = &bn->bnapi[i]->nq_ring;
+ sw = nqr->stats.sw_stats;
+
+ stats->packets = 0;
+ stats->packets += BNGE_GET_RING_STATS64(sw, rx_ucast_pkts);
+ stats->packets += BNGE_GET_RING_STATS64(sw, rx_mcast_pkts);
+ stats->packets += BNGE_GET_RING_STATS64(sw, rx_bcast_pkts);
+
+ stats->bytes = 0;
+ stats->bytes += BNGE_GET_RING_STATS64(sw, rx_ucast_bytes);
+ stats->bytes += BNGE_GET_RING_STATS64(sw, rx_mcast_bytes);
+ stats->bytes += BNGE_GET_RING_STATS64(sw, rx_bcast_bytes);
+
+ stats->hw_drop_overruns = BNGE_GET_RING_STATS64(sw, rx_discard_pkts);
+ stats->hw_drops = BNGE_GET_RING_STATS64(sw, rx_error_pkts) +
+ stats->hw_drop_overruns;
+}
+
+static void bnge_get_queue_stats_tx(struct net_device *dev, int i,
+ struct netdev_queue_stats_tx *stats)
+{
+ struct bnge_net *bn = netdev_priv(dev);
+ struct bnge_napi *bnapi;
+ u64 *sw;
+
+ if (!bn->tx_ring)
+ return;
+
+ bnapi = bn->tx_ring[bn->tx_ring_map[i]].bnapi;
+ sw = bnapi->nq_ring.stats.sw_stats;
+
+ stats->packets = 0;
+ stats->packets += BNGE_GET_RING_STATS64(sw, tx_ucast_pkts);
+ stats->packets += BNGE_GET_RING_STATS64(sw, tx_mcast_pkts);
+ stats->packets += BNGE_GET_RING_STATS64(sw, tx_bcast_pkts);
+
+ stats->bytes = 0;
+ stats->bytes += BNGE_GET_RING_STATS64(sw, tx_ucast_bytes);
+ stats->bytes += BNGE_GET_RING_STATS64(sw, tx_mcast_bytes);
+ stats->bytes += BNGE_GET_RING_STATS64(sw, tx_bcast_bytes);
+
+ stats->hw_drop_errors = BNGE_GET_RING_STATS64(sw, tx_error_pkts);
+ stats->hw_drops = BNGE_GET_RING_STATS64(sw, tx_discard_pkts) +
+ stats->hw_drop_errors;
+}
+
+static void bnge_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ struct bnge_net *bn = netdev_priv(dev);
+
+ rx->packets = bn->net_stats_prev.rx_packets;
+ rx->bytes = bn->net_stats_prev.rx_bytes;
+
+ tx->packets = bn->net_stats_prev.tx_packets;
+ tx->bytes = bn->net_stats_prev.tx_bytes;
+}
+
+static const struct netdev_stat_ops bnge_stat_ops = {
+ .get_queue_stats_rx = bnge_get_queue_stats_rx,
+ .get_queue_stats_tx = bnge_get_queue_stats_tx,
+ .get_base_stats = bnge_get_base_stats,
+};
+
static const struct net_device_ops bnge_netdev_ops = {
.ndo_open = bnge_open,
.ndo_stop = bnge_close,
@@ -3194,6 +3269,7 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs)
bd->netdev = netdev;
netdev->netdev_ops = &bnge_netdev_ops;
+ netdev->stat_ops = &bnge_stat_ops;
bnge_set_ethtool_ops(netdev);
--
2.47.1
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
bng_en: implement netdev_stat_ops
This patch adds netdev_stat_ops implementation to provide standardized
per-queue statistics via the Netlink API, including hardware drop
counters.
> diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
> index 2513b0907693..fa732b99358e 100644
> --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
> +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
> @@ -3031,6 +3031,81 @@ static int bnge_close(struct net_device *dev)
> return 0;
> }
>
> +static void bnge_get_queue_stats_rx(struct net_device *dev, int i,
> + struct netdev_queue_stats_rx *stats)
> +{
> + struct bnge_net *bn = netdev_priv(dev);
> + struct bnge_nq_ring_info *nqr;
> + u64 *sw;
> +
> + if (!bn->bnapi)
> + return;
> +
> + nqr = &bn->bnapi[i]->nq_ring;
^^^^
Can this race with bnge_close_core and dereference bn->bnapi after it's
freed? The NULL check at line 3041 doesn't prevent the race since
bnge_close_core can free bn->bnapi between the check and dereference.
Looking at the existing bnge_get_stats64, it uses rcu_read_lock() when
accessing bn->bnapi:
bnge_get_stats64() {
rcu_read_lock();
if (test_bit(BNGE_STATE_OPEN, &bd->state)) {
... access bn->bnapi ...
}
rcu_read_unlock();
}
And bnge_close_core uses synchronize_rcu() to wait for readers before
freeing:
bnge_close_core() {
...
synchronize_rcu();
bnge_del_napi(bn);
-> bnge_free_core(bn);
-> bn->bnapi = NULL;
}
Without RCU protection, this sequence can occur:
Thread A (stats) Thread B (close)
---------------- ----------------
if (!bn->bnapi) [PASSES]
bnge_close_core()
bnge_free_core()
bn->bnapi = NULL
nqr = &bn->bnapi[i] [CRASH]
Should bnge_get_queue_stats_rx acquire rcu_read_lock() and check
test_bit(BNGE_STATE_OPEN) before accessing bn->bnapi?
> + sw = nqr->stats.sw_stats;
> +
> + stats->packets = 0;
> + stats->packets += BNGE_GET_RING_STATS64(sw, rx_ucast_pkts);
> + stats->packets += BNGE_GET_RING_STATS64(sw, rx_mcast_pkts);
> + stats->packets += BNGE_GET_RING_STATS64(sw, rx_bcast_pkts);
> +
> + stats->bytes = 0;
> + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_ucast_bytes);
> + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_mcast_bytes);
> + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_bcast_bytes);
> +
> + stats->hw_drop_overruns = BNGE_GET_RING_STATS64(sw, rx_discard_pkts);
> + stats->hw_drops = BNGE_GET_RING_STATS64(sw, rx_error_pkts) +
> + stats->hw_drop_overruns;
> +}
> +
> +static void bnge_get_queue_stats_tx(struct net_device *dev, int i,
> + struct netdev_queue_stats_tx *stats)
> +{
> + struct bnge_net *bn = netdev_priv(dev);
> + struct bnge_napi *bnapi;
> + u64 *sw;
> +
> + if (!bn->tx_ring)
> + return;
> +
> + bnapi = bn->tx_ring[bn->tx_ring_map[i]].bnapi;
^^^^
Does this have the same race with bnge_close_core? The NULL check doesn't
prevent bnge_close_core from freeing bn->tx_ring between the check and
dereference.
Following the same pattern as bnge_get_stats64, should this also use
rcu_read_lock() and check BNGE_STATE_OPEN before accessing bn->tx_ring?
> + sw = bnapi->nq_ring.stats.sw_stats;
> +
> + stats->packets = 0;
> + stats->packets += BNGE_GET_RING_STATS64(sw, tx_ucast_pkts);
> + stats->packets += BNGE_GET_RING_STATS64(sw, tx_mcast_pkts);
> + stats->packets += BNGE_GET_RING_STATS64(sw, tx_bcast_pkts);
> +
> + stats->bytes = 0;
> + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_ucast_bytes);
> + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_mcast_bytes);
> + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_bcast_bytes);
> +
> + stats->hw_drop_errors = BNGE_GET_RING_STATS64(sw, tx_error_pkts);
> + stats->hw_drops = BNGE_GET_RING_STATS64(sw, tx_discard_pkts) +
> + stats->hw_drop_errors;
> +}
> +
> +static void bnge_get_base_stats(struct net_device *dev,
> + struct netdev_queue_stats_rx *rx,
> + struct netdev_queue_stats_tx *tx)
> +{
> + struct bnge_net *bn = netdev_priv(dev);
> +
> + rx->packets = bn->net_stats_prev.rx_packets;
> + rx->bytes = bn->net_stats_prev.rx_bytes;
^^^^
Can this race with bnge_close_core when reading bn->net_stats_prev?
bnge_close_core writes to bn->net_stats_prev while this function reads
it without any locking.
Looking at bnge_get_stats64, it uses rcu_read_lock() before accessing
bn->net_stats_prev. While this won't cause a crash like the other two
issues, it could result in torn reads on 32-bit systems where reading
u64 is not atomic, potentially returning inconsistent statistics to
userspace.
Should bnge_get_base_stats also use rcu_read_lock() when reading
bn->net_stats_prev?
> +
> + tx->packets = bn->net_stats_prev.tx_packets;
> + tx->bytes = bn->net_stats_prev.tx_bytes;
> +}
> +
> +static const struct netdev_stat_ops bnge_stat_ops = {
> + .get_queue_stats_rx = bnge_get_queue_stats_rx,
> + .get_queue_stats_tx = bnge_get_queue_stats_tx,
> + .get_base_stats = bnge_get_base_stats,
> +};
> +
[ ... ]
> > Can this race with bnge_close_core when reading bn->net_stats_prev? > bnge_close_core writes to bn->net_stats_prev while this function reads > it without any locking. > > Looking at bnge_get_stats64, it uses rcu_read_lock() before accessing > bn->net_stats_prev. While this won't cause a crash like the other two > issues, it could result in torn reads on 32-bit systems where reading > u64 is not atomic, potentially returning inconsistent statistics to > userspace. > > Should bnge_get_base_stats also use rcu_read_lock() when reading > bn->net_stats_prev? Thanks, Simon. Regarding the RCU suggestions: bnge_get_queue_stats_rx, bnge_get_queue_stats_tx, and bnge_get_base_stats should not require RCU protection here. Unlike ndo_get_stats64, these ops are synchronized by the netdev instance lock. Since the writer path (bnge_close_core via ndo_stop) also holds the netdev instance lock, they are already mutually exclusive. Do you agree, or am I missing a case where these ops can be called without that lock? Thanks, Bhargava Marreddy > > > + > > + tx->packets = bn->net_stats_prev.tx_packets;
On Sat, Mar 21, 2026 at 01:57:12AM +0530, Bhargava Chenna Marreddy wrote: > > > > Can this race with bnge_close_core when reading bn->net_stats_prev? > > bnge_close_core writes to bn->net_stats_prev while this function reads > > it without any locking. > > > > Looking at bnge_get_stats64, it uses rcu_read_lock() before accessing > > bn->net_stats_prev. While this won't cause a crash like the other two > > issues, it could result in torn reads on 32-bit systems where reading > > u64 is not atomic, potentially returning inconsistent statistics to > > userspace. > > > > Should bnge_get_base_stats also use rcu_read_lock() when reading > > bn->net_stats_prev? > > Thanks, Simon. > > Regarding the RCU suggestions: bnge_get_queue_stats_rx, > bnge_get_queue_stats_tx, and bnge_get_base_stats should not > require RCU protection here. Unlike ndo_get_stats64, these ops > are synchronized by the netdev instance lock. Since the writer path > (bnge_close_core via ndo_stop) also holds the netdev instance lock, > they are already mutually exclusive. > > Do you agree, or am I missing a case where these ops can be called > without that lock? Yes, I agree. And sorry for not thinking of that before forwarding this review.
© 2016 - 2026 Red Hat, Inc.