Move unicast promiscuity tracking into dev_rx_mode_work so it runs
under netdev_ops_lock instead of under the addr_lock spinlock. This
is required because __dev_set_promiscuity calls dev_change_rx_flags
and __dev_notify_flags, both of which may need to sleep.
Change ASSERT_RTNL() to netdev_ops_assert_locked() in
__dev_set_promiscuity, netif_set_allmulti and __dev_change_flags
since these are now called from the work queue under the ops lock.
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
Documentation/networking/netdevices.rst | 4 ++
net/core/dev.c | 79 +++++++++++++++++--------
2 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index dc83d78d3b27..5cdaa1a3dcc8 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -298,6 +298,10 @@ struct net_device synchronization rules
Notes: Sleepable version of ndo_set_rx_mode. Receives snapshots
of the unicast and multicast address lists.
+ndo_change_rx_flags:
+ Synchronization: rtnl_lock() semaphore. In addition, netdev instance
+ lock if the driver implements queue management or shaper API.
+
ndo_setup_tc:
``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks
(i.e. no ``rtnl_lock`` and no device instance lock). The rest of
diff --git a/net/core/dev.c b/net/core/dev.c
index fedc423306fc..fc5c9b14faa0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9574,7 +9574,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
kuid_t uid;
kgid_t gid;
- ASSERT_RTNL();
+ netdev_ops_assert_locked(dev);
promiscuity = dev->promiscuity + inc;
if (promiscuity == 0) {
@@ -9610,16 +9610,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
dev_change_rx_flags(dev, IFF_PROMISC);
}
- if (notify) {
- /* The ops lock is only required to ensure consistent locking
- * for `NETDEV_CHANGE` notifiers. This function is sometimes
- * called without the lock, even for devices that are ops
- * locked, such as in `dev_uc_sync_multiple` when using
- * bonding or teaming.
- */
- netdev_ops_assert_locked(dev);
+ if (notify)
__dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
- }
return 0;
}
@@ -9641,7 +9633,7 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify)
unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
unsigned int allmulti, flags;
- ASSERT_RTNL();
+ netdev_ops_assert_locked(dev);
allmulti = dev->allmulti + inc;
if (allmulti == 0) {
@@ -9671,12 +9663,36 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify)
return 0;
}
+/**
+ * dev_uc_promisc_update() - evaluate whether uc_promisc should be toggled.
+ * @dev: device
+ *
+ * Must be called under netif_addr_lock_bh.
+ * Return: +1 to enter promisc, -1 to leave, 0 for no change.
+ */
+static int dev_uc_promisc_update(struct net_device *dev)
+{
+ if (dev->priv_flags & IFF_UNICAST_FLT)
+ return 0;
+
+ if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
+ dev->uc_promisc = true;
+ return 1;
+ }
+ if (netdev_uc_empty(dev) && dev->uc_promisc) {
+ dev->uc_promisc = false;
+ return -1;
+ }
+ return 0;
+}
+
static void dev_rx_mode_work(struct work_struct *work)
{
struct net_device *dev = container_of(work, struct net_device,
rx_mode_work);
struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref;
const struct net_device_ops *ops = dev->netdev_ops;
+ int promisc_inc;
int err;
__hw_addr_init(&uc_snap);
@@ -9704,16 +9720,29 @@ static void dev_rx_mode_work(struct work_struct *work)
if (!err)
err = __hw_addr_list_snapshot(&mc_ref, &dev->mc,
dev->addr_len);
- netif_addr_unlock_bh(dev);
if (err) {
netdev_WARN(dev, "failed to sync uc/mc addresses\n");
__hw_addr_flush(&uc_snap);
__hw_addr_flush(&uc_ref);
__hw_addr_flush(&mc_snap);
+ netif_addr_unlock_bh(dev);
goto out;
}
+ promisc_inc = dev_uc_promisc_update(dev);
+
+ netif_addr_unlock_bh(dev);
+ } else {
+ netif_addr_lock_bh(dev);
+ promisc_inc = dev_uc_promisc_update(dev);
+ netif_addr_unlock_bh(dev);
+ }
+
+ if (promisc_inc)
+ __dev_set_promiscuity(dev, promisc_inc, false);
+
+ if (ops->ndo_set_rx_mode_async) {
ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
netif_addr_lock_bh(dev);
@@ -9722,6 +9751,10 @@ static void dev_rx_mode_work(struct work_struct *work)
__hw_addr_list_reconcile(&dev->mc, &mc_snap,
&mc_ref, dev->addr_len);
netif_addr_unlock_bh(dev);
+ } else if (ops->ndo_set_rx_mode) {
+ netif_addr_lock_bh(dev);
+ ops->ndo_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
}
out:
@@ -9740,28 +9773,22 @@ static void dev_rx_mode_work(struct work_struct *work)
void __dev_set_rx_mode(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int promisc_inc;
/* dev_open will call this function so the list will stay sane. */
if (!netif_up_and_present(dev))
return;
- if (ops->ndo_set_rx_mode_async) {
+ if (ops->ndo_set_rx_mode_async || ops->ndo_change_rx_flags) {
queue_work(rx_mode_wq, &dev->rx_mode_work);
return;
}
- if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
- /* Unicast addresses changes may only happen under the rtnl,
- * therefore calling __dev_set_promiscuity here is safe.
- */
- if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
- __dev_set_promiscuity(dev, 1, false);
- dev->uc_promisc = true;
- } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
- __dev_set_promiscuity(dev, -1, false);
- dev->uc_promisc = false;
- }
- }
+ /* Legacy path for non-ops locked HW devices. */
+
+ promisc_inc = dev_uc_promisc_update(dev);
+ if (promisc_inc)
+ __dev_set_promiscuity(dev, promisc_inc, false);
if (ops->ndo_set_rx_mode)
ops->ndo_set_rx_mode(dev);
@@ -9811,7 +9838,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags,
unsigned int old_flags = dev->flags;
int ret;
- ASSERT_RTNL();
+ netdev_ops_assert_locked(dev);
/*
* Set the flags on our device.
--
2.53.0
> -----Original Message-----
> From: Stanislav Fomichev <sdf@fomichev.me>
> Sent: Friday, March 20, 2026 2:25 AM
> To: netdev@vger.kernel.org
> Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; horms@kernel.org; corbet@lwn.net;
> skhan@linuxfoundation.org; andrew+netdev@lunn.ch;
> michael.chan@broadcom.com; pavan.chebbi@broadcom.com; Nguyen, Anthony
> L <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw
> <przemyslaw.kitszel@intel.com>; saeedm@nvidia.com; tariqt@nvidia.com;
> mbloch@nvidia.com; alexanderduyck@fb.com; kernel-team@meta.com;
> johannes@sipsolutions.net; sd@queasysnail.net; jianbol@nvidia.com;
> dtatulea@nvidia.com; sdf@fomichev.me; mohsin.bashr@gmail.com; Keller,
> Jacob E <jacob.e.keller@intel.com>; willemb@google.com;
> skhawaja@google.com; bestswngs@gmail.com; Loktionov, Aleksandr
> <aleksandr.loktionov@intel.com>; kees@kernel.org; linux-
> doc@vger.kernel.org; linux-kernel@vger.kernel.org; intel-wired-
> lan@lists.osuosl.org; linux-rdma@vger.kernel.org; linux-
> wireless@vger.kernel.org; linux-kselftest@vger.kernel.org;
> leon@kernel.org
> Subject: [PATCH net-next v3 04/13] net: move promiscuity handling into
> dev_rx_mode_work
>
> Move unicast promiscuity tracking into dev_rx_mode_work so it runs
> under netdev_ops_lock instead of under the addr_lock spinlock. This is
> required because __dev_set_promiscuity calls dev_change_rx_flags and
> __dev_notify_flags, both of which may need to sleep.
>
> Change ASSERT_RTNL() to netdev_ops_assert_locked() in
> __dev_set_promiscuity, netif_set_allmulti and __dev_change_flags since
> these are now called from the work queue under the ops lock.
>
> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> ---
> Documentation/networking/netdevices.rst | 4 ++
> net/core/dev.c | 79 +++++++++++++++++-------
> -
> 2 files changed, 57 insertions(+), 26 deletions(-)
>
> diff --git a/Documentation/networking/netdevices.rst
> b/Documentation/networking/netdevices.rst
> index dc83d78d3b27..5cdaa1a3dcc8 100644
> --- a/Documentation/networking/netdevices.rst
> +++ b/Documentation/networking/netdevices.rst
> @@ -298,6 +298,10 @@ struct net_device synchronization rules
> Notes: Sleepable version of ndo_set_rx_mode. Receives snapshots
> of the unicast and multicast address lists.
>
> +ndo_change_rx_flags:
> + Synchronization: rtnl_lock() semaphore. In addition, netdev
> instance
> + lock if the driver implements queue management or shaper API.
> +
> ndo_setup_tc:
> ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT
> locks
> (i.e. no ``rtnl_lock`` and no device instance lock). The rest
> of diff --git a/net/core/dev.c b/net/core/dev.c index
> fedc423306fc..fc5c9b14faa0 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -9574,7 +9574,7 @@ static int __dev_set_promiscuity(struct
> net_device *dev, int inc, bool notify)
> kuid_t uid;
> kgid_t gid;
>
> - ASSERT_RTNL();
> + netdev_ops_assert_locked(dev);
Can you explain why do you add new hard precondition of ops lock must be held?
>
> promiscuity = dev->promiscuity + inc;
> if (promiscuity == 0) {
> @@ -9610,16 +9610,8 @@ static int __dev_set_promiscuity(struct
> net_device *dev, int inc, bool notify)
>
> dev_change_rx_flags(dev, IFF_PROMISC);
> }
...
> __hw_addr_init(&uc_snap);
> @@ -9704,16 +9720,29 @@ static void dev_rx_mode_work(struct
> work_struct *work)
> if (!err)
> err = __hw_addr_list_snapshot(&mc_ref, &dev->mc,
> dev->addr_len);
> - netif_addr_unlock_bh(dev);
>
> if (err) {
> netdev_WARN(dev, "failed to sync uc/mc
> addresses\n");
> __hw_addr_flush(&uc_snap);
> __hw_addr_flush(&uc_ref);
> __hw_addr_flush(&mc_snap);
> + netif_addr_unlock_bh(dev);
> goto out;
> }
>
> + promisc_inc = dev_uc_promisc_update(dev);
> +
> + netif_addr_unlock_bh(dev);
> + } else {
> + netif_addr_lock_bh(dev);
> + promisc_inc = dev_uc_promisc_update(dev);
> + netif_addr_unlock_bh(dev);
> + }
> +
> + if (promisc_inc)
> + __dev_set_promiscuity(dev, promisc_inc, false);
But it's being called here without any netdev_lock_ops(dev) ?
> +
> + if (ops->ndo_set_rx_mode_async) {
> ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
>
> netif_addr_lock_bh(dev);
> @@ -9722,6 +9751,10 @@ static void dev_rx_mode_work(struct work_struct
> *work)
> __hw_addr_list_reconcile(&dev->mc, &mc_snap,
> &mc_ref, dev->addr_len);
> netif_addr_unlock_bh(dev);
> + } else if (ops->ndo_set_rx_mode) {
> + netif_addr_lock_bh(dev);
> + ops->ndo_set_rx_mode(dev);
> + netif_addr_unlock_bh(dev);
> }
...
> --
> 2.53.0
On 03/20, Loktionov, Aleksandr wrote:
>
>
> > -----Original Message-----
> > From: Stanislav Fomichev <sdf@fomichev.me>
> > Sent: Friday, March 20, 2026 2:25 AM
> > To: netdev@vger.kernel.org
> > Cc: davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> > pabeni@redhat.com; horms@kernel.org; corbet@lwn.net;
> > skhan@linuxfoundation.org; andrew+netdev@lunn.ch;
> > michael.chan@broadcom.com; pavan.chebbi@broadcom.com; Nguyen, Anthony
> > L <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw
> > <przemyslaw.kitszel@intel.com>; saeedm@nvidia.com; tariqt@nvidia.com;
> > mbloch@nvidia.com; alexanderduyck@fb.com; kernel-team@meta.com;
> > johannes@sipsolutions.net; sd@queasysnail.net; jianbol@nvidia.com;
> > dtatulea@nvidia.com; sdf@fomichev.me; mohsin.bashr@gmail.com; Keller,
> > Jacob E <jacob.e.keller@intel.com>; willemb@google.com;
> > skhawaja@google.com; bestswngs@gmail.com; Loktionov, Aleksandr
> > <aleksandr.loktionov@intel.com>; kees@kernel.org; linux-
> > doc@vger.kernel.org; linux-kernel@vger.kernel.org; intel-wired-
> > lan@lists.osuosl.org; linux-rdma@vger.kernel.org; linux-
> > wireless@vger.kernel.org; linux-kselftest@vger.kernel.org;
> > leon@kernel.org
> > Subject: [PATCH net-next v3 04/13] net: move promiscuity handling into
> > dev_rx_mode_work
> >
> > Move unicast promiscuity tracking into dev_rx_mode_work so it runs
> > under netdev_ops_lock instead of under the addr_lock spinlock. This is
> > required because __dev_set_promiscuity calls dev_change_rx_flags and
> > __dev_notify_flags, both of which may need to sleep.
> >
> > Change ASSERT_RTNL() to netdev_ops_assert_locked() in
> > __dev_set_promiscuity, netif_set_allmulti and __dev_change_flags since
> > these are now called from the work queue under the ops lock.
> >
> > Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > ---
> > Documentation/networking/netdevices.rst | 4 ++
> > net/core/dev.c | 79 +++++++++++++++++-------
> > -
> > 2 files changed, 57 insertions(+), 26 deletions(-)
> >
> > diff --git a/Documentation/networking/netdevices.rst
> > b/Documentation/networking/netdevices.rst
> > index dc83d78d3b27..5cdaa1a3dcc8 100644
> > --- a/Documentation/networking/netdevices.rst
> > +++ b/Documentation/networking/netdevices.rst
> > @@ -298,6 +298,10 @@ struct net_device synchronization rules
> > Notes: Sleepable version of ndo_set_rx_mode. Receives snapshots
> > of the unicast and multicast address lists.
> >
> > +ndo_change_rx_flags:
> > + Synchronization: rtnl_lock() semaphore. In addition, netdev
> > instance
> > + lock if the driver implements queue management or shaper API.
> > +
> > ndo_setup_tc:
> > ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT
> > locks
> > (i.e. no ``rtnl_lock`` and no device instance lock). The rest
> > of diff --git a/net/core/dev.c b/net/core/dev.c index
> > fedc423306fc..fc5c9b14faa0 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -9574,7 +9574,7 @@ static int __dev_set_promiscuity(struct
> > net_device *dev, int inc, bool notify)
> > kuid_t uid;
> > kgid_t gid;
> >
> > - ASSERT_RTNL();
> > + netdev_ops_assert_locked(dev);
> Can you explain why do you add new hard precondition of ops lock must be held?
The context is that in f792709e0baa ("selftests: net: validate team flags
propagation") I had to add locking around NETDEV_CHANGE notifiers and
add that ugly `if (notify) netdev_ops_assert_locked` check. After this
patch I believe we are consistently calling __dev_set_promiscuity
with the ops lock (for ops locked netdev), so we can cleanup this enforcement
part.
> > promiscuity = dev->promiscuity + inc;
> > if (promiscuity == 0) {
> > @@ -9610,16 +9610,8 @@ static int __dev_set_promiscuity(struct
> > net_device *dev, int inc, bool notify)
> >
> > dev_change_rx_flags(dev, IFF_PROMISC);
> > }
>
> ...
>
> > __hw_addr_init(&uc_snap);
> > @@ -9704,16 +9720,29 @@ static void dev_rx_mode_work(struct
> > work_struct *work)
> > if (!err)
> > err = __hw_addr_list_snapshot(&mc_ref, &dev->mc,
> > dev->addr_len);
> > - netif_addr_unlock_bh(dev);
> >
> > if (err) {
> > netdev_WARN(dev, "failed to sync uc/mc
> > addresses\n");
> > __hw_addr_flush(&uc_snap);
> > __hw_addr_flush(&uc_ref);
> > __hw_addr_flush(&mc_snap);
> > + netif_addr_unlock_bh(dev);
> > goto out;
> > }
> >
> > + promisc_inc = dev_uc_promisc_update(dev);
> > +
> > + netif_addr_unlock_bh(dev);
> > + } else {
> > + netif_addr_lock_bh(dev);
> > + promisc_inc = dev_uc_promisc_update(dev);
> > + netif_addr_unlock_bh(dev);
> > + }
> > +
> > + if (promisc_inc)
> > + __dev_set_promiscuity(dev, promisc_inc, false);
> But it's being called here without any netdev_lock_ops(dev) ?
We have the following at the start of dev_rx_mode_work:
rtnl_lock();
netdev_lock_ops(dev);
Or am I looking at something else?
© 2016 - 2026 Red Hat, Inc.