Bind VXLAN sockets to the local addresses if the IFLA_VXLAN_LOCALBIND
option is set. This is the new default.
Change vxlan_find_sock to search for the socket using the listening
address.
This is implemented by copying the VXLAN local address to the udp_port_cfg
passed to udp_sock_create. The freebind option is set because VXLAN
interfaces may be UP before their outgoing interface is.
This fixes multiple VXLAN selftests that fail because of that race.
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
drivers/net/vxlan/vxlan_core.c | 59 ++++++++++++++++++++++++++--------
1 file changed, 46 insertions(+), 13 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 15fe9d83c724..12da9595436e 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -78,18 +78,34 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
}
/* Find VXLAN socket based on network namespace, address family, UDP port,
- * enabled unshareable flags and socket device binding (see l3mdev with
- * non-default VRF).
+ * bound address, enabled unshareable flags and socket device binding
+ * (see l3mdev with non-default VRF).
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags, int ifindex)
+ __be16 port, u32 flags, int ifindex,
+ union vxlan_addr *saddr)
{
struct vxlan_sock *vs;
flags &= VXLAN_F_RCV_FLAGS;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sk(vs->sock->sk)->inet_sport == port &&
+ struct sock *sk = vs->sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (flags & VXLAN_F_LOCALBIND) {
+ if (family == AF_INET &&
+ inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr)
+ continue;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (family == AF_INET6 &&
+ ipv6_addr_cmp(&sk->sk_v6_rcv_saddr,
+ &saddr->sin6.sin6_addr) != 0)
+ continue;
+#endif
+ }
+
+ if (inet->inet_sport == port &&
vxlan_get_sk_family(vs) == family &&
vs->flags == flags &&
vs->sock->sk->sk_bound_dev_if == ifindex)
@@ -141,11 +157,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs,
/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
__be32 vni, sa_family_t family,
- __be16 port, u32 flags)
+ __be16 port, u32 flags,
+ union vxlan_addr *saddr)
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, family, port, flags, ifindex);
+ vs = vxlan_find_sock(net, family, port, flags, ifindex, saddr);
if (!vs)
return NULL;
@@ -2309,7 +2326,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
dst_release(dst);
dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
addr_family, dst_port,
- vxlan->cfg.flags);
+ vxlan->cfg.flags, &vxlan->cfg.saddr);
if (!dst_vxlan) {
DEV_STATS_INC(dev, tx_errors);
vxlan_vnifilter_count(vxlan, vni, NULL,
@@ -3508,8 +3525,9 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
.get_link_ksettings = vxlan_get_link_ksettings,
};
-static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags, int ifindex)
+static struct socket *vxlan_create_sock(struct net *net, bool ipv6, __be16 port,
+ u32 flags, int ifindex,
+ union vxlan_addr *addr)
{
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -3526,6 +3544,20 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
udp_conf.family = AF_INET;
}
+ if (flags & VXLAN_F_LOCALBIND) {
+ if (ipv6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ memcpy(&udp_conf.local_ip6.s6_addr32,
+ &addr->sin6.sin6_addr.s6_addr32,
+ sizeof(addr->sin6.sin6_addr.s6_addr32));
+#endif
+ } else {
+ udp_conf.local_ip.s_addr = addr->sin.sin_addr.s_addr;
+ }
+
+ udp_conf.freebind = 1;
+ }
+
udp_conf.local_udp_port = port;
udp_conf.bind_ifindex = ifindex;
@@ -3541,7 +3573,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
__be16 port, u32 flags,
- int ifindex)
+ int ifindex,
+ union vxlan_addr *addr)
{
struct vxlan_sock *vs;
struct socket *sock;
@@ -3557,7 +3590,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
+ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex, addr);
if (IS_ERR(sock)) {
kfree(vs);
return ERR_CAST(sock);
@@ -3610,7 +3643,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
rcu_read_lock();
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
vxlan->cfg.dst_port, vxlan->cfg.flags,
- l3mdev_index);
+ l3mdev_index, &vxlan->cfg.saddr);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
rcu_read_unlock();
return -EBUSY;
@@ -3620,7 +3653,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
vxlan->cfg.dst_port, vxlan->cfg.flags,
- l3mdev_index);
+ l3mdev_index, &vxlan->cfg.saddr);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
--
2.36.1
On Tue, Aug 12, 2025 at 02:51:53PM +0200, Richard Gobert wrote: > Bind VXLAN sockets to the local addresses if the IFLA_VXLAN_LOCALBIND > option is set. This is the new default. Drop the last sentence? > > Change vxlan_find_sock to search for the socket using the listening > address. > > This is implemented by copying the VXLAN local address to the udp_port_cfg > passed to udp_sock_create. The freebind option is set because VXLAN > interfaces may be UP before their outgoing interface is. > > This fixes multiple VXLAN selftests that fail because of that race. This sentence is no longer relevant as well. > > Signed-off-by: Richard Gobert <richardbgobert@gmail.com> > --- > drivers/net/vxlan/vxlan_core.c | 59 ++++++++++++++++++++++++++-------- > 1 file changed, 46 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c > index 15fe9d83c724..12da9595436e 100644 > --- a/drivers/net/vxlan/vxlan_core.c > +++ b/drivers/net/vxlan/vxlan_core.c > @@ -78,18 +78,34 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) > } > > /* Find VXLAN socket based on network namespace, address family, UDP port, > - * enabled unshareable flags and socket device binding (see l3mdev with > - * non-default VRF). > + * bound address, enabled unshareable flags and socket device binding > + * (see l3mdev with non-default VRF). > */ > static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, > - __be16 port, u32 flags, int ifindex) > + __be16 port, u32 flags, int ifindex, > + union vxlan_addr *saddr) > { > struct vxlan_sock *vs; > > flags &= VXLAN_F_RCV_FLAGS; > > hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { > - if (inet_sk(vs->sock->sk)->inet_sport == port && > + struct sock *sk = vs->sock->sk; > + struct inet_sock *inet = inet_sk(sk); https://docs.kernel.org/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs > + > + if (flags & VXLAN_F_LOCALBIND) { > + if (family == AF_INET && > + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) > + continue; > +#if IS_ENABLED(CONFIG_IPV6) > + else if (family == AF_INET6 && > + ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, > + &saddr->sin6.sin6_addr) != 0) > + continue; > +#endif > + } > + > + if (inet->inet_sport == port && > vxlan_get_sk_family(vs) == family && > vs->flags == flags && > vs->sock->sk->sk_bound_dev_if == ifindex)
On 8/13/25 11:26, Ido Schimmel wrote: > On Tue, Aug 12, 2025 at 02:51:53PM +0200, Richard Gobert wrote: >> Bind VXLAN sockets to the local addresses if the IFLA_VXLAN_LOCALBIND >> option is set. This is the new default. > > Drop the last sentence? > >> >> Change vxlan_find_sock to search for the socket using the listening >> address. >> >> This is implemented by copying the VXLAN local address to the udp_port_cfg >> passed to udp_sock_create. The freebind option is set because VXLAN >> interfaces may be UP before their outgoing interface is. >> >> This fixes multiple VXLAN selftests that fail because of that race. > > This sentence is no longer relevant as well. > >> >> Signed-off-by: Richard Gobert <richardbgobert@gmail.com> >> --- >> drivers/net/vxlan/vxlan_core.c | 59 ++++++++++++++++++++++++++-------- >> 1 file changed, 46 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c >> index 15fe9d83c724..12da9595436e 100644 >> --- a/drivers/net/vxlan/vxlan_core.c >> +++ b/drivers/net/vxlan/vxlan_core.c >> @@ -78,18 +78,34 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) >> } >> >> /* Find VXLAN socket based on network namespace, address family, UDP port, >> - * enabled unshareable flags and socket device binding (see l3mdev with >> - * non-default VRF). >> + * bound address, enabled unshareable flags and socket device binding >> + * (see l3mdev with non-default VRF). >> */ >> static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, >> - __be16 port, u32 flags, int ifindex) >> + __be16 port, u32 flags, int ifindex, >> + union vxlan_addr *saddr) >> { >> struct vxlan_sock *vs; >> >> flags &= VXLAN_F_RCV_FLAGS; >> >> hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { >> - if (inet_sk(vs->sock->sk)->inet_sport == port && >> + struct sock *sk = vs->sock->sk; >> + struct inet_sock *inet = inet_sk(sk); > > https://docs.kernel.org/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs > >> + >> + if (flags & VXLAN_F_LOCALBIND) { >> + if (family == AF_INET && >> + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) >> + continue; >> +#if IS_ENABLED(CONFIG_IPV6) >> + else if (family == AF_INET6 && >> + ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, >> + &saddr->sin6.sin6_addr) != 0) >> + continue; >> +#endif >> + } >> + >> + if (inet->inet_sport == port && >> vxlan_get_sk_family(vs) == family && >> vs->flags == flags && >> vs->sock->sk->sk_bound_dev_if == ifindex) My bad, will fix.
From: Richard Gobert <richardbgobert@gmail.com> Date: Tue, 12 Aug 2025 14:51:53 +0200 > diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c > index 15fe9d83c724..12da9595436e 100644 > --- a/drivers/net/vxlan/vxlan_core.c > +++ b/drivers/net/vxlan/vxlan_core.c > @@ -78,18 +78,34 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) > } > > /* Find VXLAN socket based on network namespace, address family, UDP port, > - * enabled unshareable flags and socket device binding (see l3mdev with > - * non-default VRF). > + * bound address, enabled unshareable flags and socket device binding > + * (see l3mdev with non-default VRF). > */ > static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, > - __be16 port, u32 flags, int ifindex) > + __be16 port, u32 flags, int ifindex, > + union vxlan_addr *saddr) > { > struct vxlan_sock *vs; > > flags &= VXLAN_F_RCV_FLAGS; VXLAN_F_LOCALBIND seems to be cleared ? > > hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { > - if (inet_sk(vs->sock->sk)->inet_sport == port && > + struct sock *sk = vs->sock->sk; > + struct inet_sock *inet = inet_sk(sk); > + > + if (flags & VXLAN_F_LOCALBIND) { Does selftest exercise this path ? > + if (family == AF_INET && > + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) > + continue; > +#if IS_ENABLED(CONFIG_IPV6) > + else if (family == AF_INET6 && > + ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, > + &saddr->sin6.sin6_addr) != 0) > + continue; > +#endif > + } > + > + if (inet->inet_sport == port && > vxlan_get_sk_family(vs) == family && > vs->flags == flags && > vs->sock->sk->sk_bound_dev_if == ifindex)
Kuniyuki Iwashima wrote: > From: Richard Gobert <richardbgobert@gmail.com> > Date: Tue, 12 Aug 2025 14:51:53 +0200 >> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c >> index 15fe9d83c724..12da9595436e 100644 >> --- a/drivers/net/vxlan/vxlan_core.c >> +++ b/drivers/net/vxlan/vxlan_core.c >> @@ -78,18 +78,34 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) >> } >> >> /* Find VXLAN socket based on network namespace, address family, UDP port, >> - * enabled unshareable flags and socket device binding (see l3mdev with >> - * non-default VRF). >> + * bound address, enabled unshareable flags and socket device binding >> + * (see l3mdev with non-default VRF). >> */ >> static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, >> - __be16 port, u32 flags, int ifindex) >> + __be16 port, u32 flags, int ifindex, >> + union vxlan_addr *saddr) >> { >> struct vxlan_sock *vs; >> >> flags &= VXLAN_F_RCV_FLAGS; > > VXLAN_F_LOCALBIND seems to be cleared ? > >> >> hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { >> - if (inet_sk(vs->sock->sk)->inet_sport == port && >> + struct sock *sk = vs->sock->sk; >> + struct inet_sock *inet = inet_sk(sk); >> + >> + if (flags & VXLAN_F_LOCALBIND) { > > Does selftest exercise this path ? > > >> + if (family == AF_INET && >> + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) >> + continue; >> +#if IS_ENABLED(CONFIG_IPV6) >> + else if (family == AF_INET6 && >> + ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, >> + &saddr->sin6.sin6_addr) != 0) >> + continue; >> +#endif >> + } >> + >> + if (inet->inet_sport == port && >> vxlan_get_sk_family(vs) == family && >> vs->flags == flags && >> vs->sock->sk->sk_bound_dev_if == ifindex) Nice catch. I don't think the new selftest exercises this path, but I'm running the other vxlan selftests with the localbind option enabled by default and ensuring that they pass.
© 2016 - 2025 Red Hat, Inc.