Drivers that are using ops lock and don't depend on RTNL lock
still need to manage it because udp_tunnel's RTNL dependency.
Introduce new udp_tunnel_nic_lock and use it instead of
rtnl_lock. Drop non-UDP_TUNNEL_NIC_INFO_MAY_SLEEP mode from
udp_tunnel infra (udp_tunnel_nic_device_sync_work needs to
grab udp_tunnel_nic_lock mutex and might sleep).
Cc: Michael Chan <michael.chan@broadcom.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
---
.../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +-
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++--
drivers/net/ethernet/emulex/benet/be_main.c | 3 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 1 -
drivers/net/ethernet/intel/ice/ice_main.c | 1 -
.../net/ethernet/mellanox/mlx4/en_netdev.c | 3 +-
.../net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-
.../ethernet/netronome/nfp/nfp_net_common.c | 3 +-
.../net/ethernet/qlogic/qede/qede_filter.c | 3 --
.../net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 -
drivers/net/ethernet/sfc/ef10.c | 1 -
drivers/net/netdevsim/netdevsim.h | 1 -
drivers/net/netdevsim/udp_tunnels.c | 12 -------
include/net/udp_tunnel.h | 9 +++--
net/ipv4/udp_tunnel_nic.c | 33 ++++++++-----------
net/ipv4/udp_tunnel_stub.c | 2 ++
16 files changed, 27 insertions(+), 58 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index f522ca8ff66b..fa7e5adf9804 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10219,8 +10219,7 @@ static int bnx2x_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
static const struct udp_tunnel_nic_info bnx2x_udp_tunnels = {
.sync_table = bnx2x_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d5495762c945..a3dadde65b8d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -15573,8 +15573,7 @@ static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int ta
static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
.set_port = bnxt_udp_tunnel_set_port,
.unset_port = bnxt_udp_tunnel_unset_port,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
@@ -15582,8 +15581,7 @@ static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
}, bnxt_udp_tunnels_p7 = {
.set_port = bnxt_udp_tunnel_set_port,
.unset_port = bnxt_udp_tunnel_unset_port,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3d2e21592119..f49400ba9729 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4031,8 +4031,7 @@ static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
static const struct udp_tunnel_nic_info be_udp_tunnels = {
.set_port = be_vxlan_set_port,
.unset_port = be_vxlan_unset_port,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 120d68654e3f..3d3da9d15348 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15891,7 +15891,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
- pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 20d3baf955e3..5a60ede84091 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4745,7 +4745,6 @@ int ice_init_dev(struct ice_pf *pf)
pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
- pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
pf->hw.udp_tunnel_nic.tables[0].n_entries =
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 281b34af0bb4..d2071aff7b8f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2670,8 +2670,7 @@ static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info mlx4_udp_tunnels = {
.sync_table = mlx4_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9bd166f489e7..d5eff6c06b2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5292,8 +5292,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
priv->nic_info.set_port = mlx5e_vxlan_set_port;
priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
- priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
+ priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
/* Don't count the space hard-coded to the IANA port */
priv->nic_info.tables[0].n_entries =
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 28997ddab966..14ba38bcb07b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2394,8 +2394,7 @@ static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
static const struct udp_tunnel_nic_info nfp_udp_tunnels = {
.sync_table = nfp_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{
.n_entries = NFP_NET_N_VXLAN_PORTS,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 985026dd816f..7e341e026489 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -987,20 +987,17 @@ static int qede_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info qede_udp_tunnels_both = {
.sync_table = qede_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
},
}, qede_udp_tunnels_vxlan = {
.sync_table = qede_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},
}, qede_udp_tunnels_geneve = {
.sync_table = qede_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
},
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index eb69121df726..53cdd36c4123 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -486,7 +486,6 @@ static int qlcnic_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info qlcnic_udp_tunnels = {
.sync_table = qlcnic_udp_tunnel_sync,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 47349c148c0c..fcec81f862ec 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3985,7 +3985,6 @@ static int efx_ef10_udp_tnl_unset_port(struct net_device *dev,
static const struct udp_tunnel_nic_info efx_ef10_udp_tunnels = {
.set_port = efx_ef10_udp_tnl_set_port,
.unset_port = efx_ef10_udp_tnl_unset_port,
- .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{
.n_entries = 16,
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d04401f0bdf7..738da596f60a 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -131,7 +131,6 @@ struct netdevsim {
struct nsim_macsec macsec;
struct {
u32 inject_error;
- u32 sleep;
u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS];
struct dentry *ddir;
diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c
index 640b4983a9a0..89fff76e51cf 100644
--- a/drivers/net/netdevsim/udp_tunnels.c
+++ b/drivers/net/netdevsim/udp_tunnels.c
@@ -18,9 +18,6 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table,
ret = -ns->udp_ports.inject_error;
ns->udp_ports.inject_error = 0;
- if (ns->udp_ports.sleep)
- msleep(ns->udp_ports.sleep);
-
if (!ret) {
if (ns->udp_ports.ports[table][entry]) {
WARN(1, "entry already in use\n");
@@ -47,8 +44,6 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table,
ret = -ns->udp_ports.inject_error;
ns->udp_ports.inject_error = 0;
- if (ns->udp_ports.sleep)
- msleep(ns->udp_ports.sleep);
if (!ret) {
u32 val = be16_to_cpu(ti->port) << 16 | ti->type;
@@ -112,12 +107,10 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data,
struct net_device *dev = file->private_data;
struct netdevsim *ns = netdev_priv(dev);
- rtnl_lock();
if (dev->reg_state == NETREG_REGISTERED) {
memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
udp_tunnel_nic_reset_ntf(dev);
}
- rtnl_unlock();
return count;
}
@@ -172,7 +165,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
GFP_KERNEL);
if (!info)
return -ENOMEM;
- ns->udp_ports.sleep = nsim_dev->udp_ports.sleep;
if (nsim_dev->udp_ports.sync_all) {
info->set_port = NULL;
@@ -181,8 +173,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
info->sync_table = NULL;
}
- if (ns->udp_ports.sleep)
- info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
if (nsim_dev->udp_ports.open_only)
info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY;
if (nsim_dev->udp_ports.ipv4_only)
@@ -217,6 +207,4 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev)
&nsim_dev->udp_ports.shared);
debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.static_iana_vxlan);
- debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir,
- &nsim_dev->udp_ports.sleep);
}
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2df3b8344eb5..7f5537fdf2c9 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -221,19 +221,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
#define UDP_TUNNEL_NIC_MAX_TABLES 4
enum udp_tunnel_nic_info_flags {
- /* Device callbacks may sleep */
- UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
/* Device only supports offloads when it's open, all ports
* will be removed before close and re-added after open.
*/
- UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1),
+ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0),
/* Device supports only IPv4 tunnels */
- UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2),
+ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1),
/* Device has hard-coded the IANA VXLAN port (4789) as VXLAN.
* This port must not be counted towards n_entries of any table.
* Driver will not receive any callback associated with port 4789.
*/
- UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2),
};
struct udp_tunnel_nic;
@@ -328,6 +326,7 @@ struct udp_tunnel_nic_ops {
#ifdef CONFIG_INET
extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops;
+extern struct mutex udp_tunnel_nic_lock;
#else
#define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL)
#endif
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b6d2d16189c0..4852397a595e 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -298,22 +298,11 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
static void
udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
- const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
- bool may_sleep;
-
if (!utn->need_sync)
return;
- /* Drivers which sleep in the callback need to update from
- * the workqueue, if we come from the tunnel driver's notification.
- */
- may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
- if (!may_sleep)
- __udp_tunnel_nic_device_sync(dev, utn);
- if (may_sleep || utn->need_replay) {
- queue_work(udp_tunnel_nic_workqueue, &utn->work);
- utn->work_pending = 1;
- }
+ queue_work(udp_tunnel_nic_workqueue, &utn->work);
+ utn->work_pending = 1;
}
static bool
@@ -554,11 +543,11 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
struct udp_tunnel_nic *utn;
unsigned int i, j;
- ASSERT_RTNL();
+ mutex_lock(&udp_tunnel_nic_lock);
utn = dev->udp_tunnel_nic;
if (!utn)
- return;
+ goto unlock;
utn->need_sync = false;
for (i = 0; i < utn->n_tables; i++)
@@ -569,7 +558,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
- /* We don't release rtnl across ops */
+ /* We don't release udp_tunnel_nic_lock across ops */
WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
if (!entry->use_cnt)
continue;
@@ -579,6 +568,9 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
}
__udp_tunnel_nic_device_sync(dev, utn);
+
+unlock:
+ mutex_unlock(&udp_tunnel_nic_lock);
}
static size_t
@@ -709,13 +701,16 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
struct udp_tunnel_nic *utn =
container_of(work, struct udp_tunnel_nic, work);
- rtnl_lock();
+ mutex_lock(&udp_tunnel_nic_lock);
utn->work_pending = 0;
__udp_tunnel_nic_device_sync(utn->dev, utn);
- if (utn->need_replay)
+ if (utn->need_replay) {
+ rtnl_lock();
udp_tunnel_nic_replay(utn->dev, utn);
- rtnl_unlock();
+ rtnl_unlock();
+ }
+ mutex_unlock(&udp_tunnel_nic_lock);
}
static struct udp_tunnel_nic *
diff --git a/net/ipv4/udp_tunnel_stub.c b/net/ipv4/udp_tunnel_stub.c
index c4b2888f5fef..d60b3262beb3 100644
--- a/net/ipv4/udp_tunnel_stub.c
+++ b/net/ipv4/udp_tunnel_stub.c
@@ -3,5 +3,7 @@
#include <net/udp_tunnel.h>
+DEFINE_MUTEX(udp_tunnel_nic_lock);
+EXPORT_SYMBOL_GPL(udp_tunnel_nic_lock);
const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops;
EXPORT_SYMBOL_GPL(udp_tunnel_nic_ops);
--
2.49.0
On Tue, 20 May 2025 13:36:13 -0700 Stanislav Fomichev wrote:
> Drivers that are using ops lock and don't depend on RTNL lock
> still need to manage it because udp_tunnel's RTNL dependency.
> Introduce new udp_tunnel_nic_lock and use it instead of
> rtnl_lock. Drop non-UDP_TUNNEL_NIC_INFO_MAY_SLEEP mode from
> udp_tunnel infra (udp_tunnel_nic_device_sync_work needs to
> grab udp_tunnel_nic_lock mutex and might sleep).
There is a netdevsim-based test for this that needs to be fixed up.
> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> index 2df3b8344eb5..7f5537fdf2c9 100644
> --- a/include/net/udp_tunnel.h
> +++ b/include/net/udp_tunnel.h
> @@ -221,19 +221,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
> #define UDP_TUNNEL_NIC_MAX_TABLES 4
>
> enum udp_tunnel_nic_info_flags {
> - /* Device callbacks may sleep */
> - UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
Could we use a different lock for sleeping and non-sleeping drivers?
> @@ -554,11 +543,11 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
> struct udp_tunnel_nic *utn;
> unsigned int i, j;
>
> - ASSERT_RTNL();
> + mutex_lock(&udp_tunnel_nic_lock);
>
> utn = dev->udp_tunnel_nic;
utn and info's lifetimes are tied to the lifetime of the device
I think their existence can remain protected by the external locks
> if (!utn)
> - return;
> + goto unlock;
>
> utn->need_sync = false;
> for (i = 0; i < utn->n_tables; i++)
> - rtnl_lock();
> + mutex_lock(&udp_tunnel_nic_lock);
> utn->work_pending = 0;
> __udp_tunnel_nic_device_sync(utn->dev, utn);
>
> - if (utn->need_replay)
> + if (utn->need_replay) {
> + rtnl_lock();
> udp_tunnel_nic_replay(utn->dev, utn);
> - rtnl_unlock();
> + rtnl_unlock();
> + }
> + mutex_unlock(&udp_tunnel_nic_lock);
> }
What's the lock ordering between the new lock and rtnl lock?
BTW the lock could live in utn, right? We can't use the instance
lock because of sharing, but we could put the lock in utn?
On 05/21, Jakub Kicinski wrote:
> On Tue, 20 May 2025 13:36:13 -0700 Stanislav Fomichev wrote:
> > Drivers that are using ops lock and don't depend on RTNL lock
> > still need to manage it because udp_tunnel's RTNL dependency.
> > Introduce new udp_tunnel_nic_lock and use it instead of
> > rtnl_lock. Drop non-UDP_TUNNEL_NIC_INFO_MAY_SLEEP mode from
> > udp_tunnel infra (udp_tunnel_nic_device_sync_work needs to
> > grab udp_tunnel_nic_lock mutex and might sleep).
>
> There is a netdevsim-based test for this that needs to be fixed up.
Oh, I did not see that one, let me try to find and run it.
> > diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> > index 2df3b8344eb5..7f5537fdf2c9 100644
> > --- a/include/net/udp_tunnel.h
> > +++ b/include/net/udp_tunnel.h
> > @@ -221,19 +221,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
> > #define UDP_TUNNEL_NIC_MAX_TABLES 4
> >
> > enum udp_tunnel_nic_info_flags {
> > - /* Device callbacks may sleep */
> > - UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
>
> Could we use a different lock for sleeping and non-sleeping drivers?
We can probably do it if we reorder the locks (as you ask/suggest
below). Overall, I'm not sure I understand why we want to have two
paths here. If we can do everything via work queue, why have a separate
path for the non-sleepable callback? (more code -> more bugs)
> > @@ -554,11 +543,11 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
> > struct udp_tunnel_nic *utn;
> > unsigned int i, j;
> >
> > - ASSERT_RTNL();
> > + mutex_lock(&udp_tunnel_nic_lock);
> >
> > utn = dev->udp_tunnel_nic;
>
> utn and info's lifetimes are tied to the lifetime of the device
> I think their existence can remain protected by the external locks
SG, will move the lock down a bit.
> > if (!utn)
> > - return;
> > + goto unlock;
> >
> > utn->need_sync = false;
> > for (i = 0; i < utn->n_tables; i++)
>
> > - rtnl_lock();
> > + mutex_lock(&udp_tunnel_nic_lock);
> > utn->work_pending = 0;
> > __udp_tunnel_nic_device_sync(utn->dev, utn);
> >
> > - if (utn->need_replay)
> > + if (utn->need_replay) {
> > + rtnl_lock();
> > udp_tunnel_nic_replay(utn->dev, utn);
> > - rtnl_unlock();
> > + rtnl_unlock();
> > + }
> > + mutex_unlock(&udp_tunnel_nic_lock);
> > }
>
> What's the lock ordering between the new lock and rtnl lock?
From ops-locked, we'll get: ops->tunnel_lock (__udp_tunnel_nic_reset_ntf)
From non-ops locked, we'll get: rtnl->tunnel_lock
I see your point, we need to do rtnl->tunnel_lock here as well.
> BTW the lock could live in utn, right? We can't use the instance
> lock because of sharing, but we could put the lock in utn?
I was thinking that there is some global state besides udp_tunnel_nic,
but I don't see any, will move the lock, thanks!
On Wed, 21 May 2025 09:54:03 -0700 Stanislav Fomichev wrote:
> > > enum udp_tunnel_nic_info_flags {
> > > - /* Device callbacks may sleep */
> > > - UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
> >
> > Could we use a different lock for sleeping and non-sleeping drivers?
>
> We can probably do it if we reorder the locks (as you ask/suggest
> below). Overall, I'm not sure I understand why we want to have two
> paths here. If we can do everything via work queue, why have a separate
> path for the non-sleepable callback? (more code -> more bugs)
I think when I was pulling this code out of the drivers I was trying
to preserve the fast path for drivers which don't have to sleep.
But if some drivers are okay with the wq then the mechanism must work,
so I guess you're right, it should be fine to make all go via wq.
© 2016 - 2025 Red Hat, Inc.