Allow binding geneve sockets to local addresses, similar to
the VXLAN "local" option. Add a netlink option to configure
the local address.
Like VXLAN, Geneve sockets can be bound to non-local addresses,
meaning they may be UP before their outgoing interfaces.
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
drivers/net/geneve.c | 80 +++++++++++++++++++++++++++---
include/net/geneve.h | 6 +++
include/uapi/linux/if_link.h | 2 +
tools/include/uapi/linux/if_link.h | 2 +
4 files changed, 82 insertions(+), 8 deletions(-)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 54384f9b3872..bc88b9a52410 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -61,6 +61,7 @@ struct geneve_config {
bool inner_proto_inherit;
u16 port_min;
u16 port_max;
+ union geneve_addr saddr;
};
/* Pseudo network device */
@@ -465,7 +466,8 @@ static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
}
static struct socket *geneve_create_sock(struct net *net, bool ipv6,
- __be16 port, bool ipv6_rx_csum)
+ __be16 port, bool ipv6_rx_csum,
+ union geneve_addr *local_addr)
{
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -477,11 +479,20 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
udp_conf.family = AF_INET6;
udp_conf.ipv6_v6only = 1;
udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
+#if IS_ENABLED(CONFIG_IPV6)
+ memcpy(&udp_conf.local_ip6,
+ &local_addr->sin6.sin6_addr,
+ sizeof(local_addr->sin6.sin6_addr));
+#endif
} else {
udp_conf.family = AF_INET;
udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+ memcpy(&udp_conf.local_ip,
+ &local_addr->sin.sin_addr,
+ sizeof(local_addr->sin.sin_addr));
}
+ udp_conf.freebind = 1;
udp_conf.local_udp_port = port;
/* Open UDP socket */
@@ -586,7 +597,8 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
/* Create new listen socket if needed */
static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
- bool ipv6, bool ipv6_rx_csum)
+ bool ipv6, bool ipv6_rx_csum,
+ union geneve_addr *local_addr)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
@@ -598,7 +610,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
if (!gs)
return ERR_PTR(-ENOMEM);
- sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
+ sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum, local_addr);
if (IS_ERR(sock)) {
kfree(gs);
return ERR_CAST(sock);
@@ -657,12 +669,24 @@ static void geneve_sock_release(struct geneve_dev *geneve)
static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
sa_family_t family,
- __be16 dst_port)
+ __be16 dst_port,
+ union geneve_addr *saddr)
{
struct geneve_sock *gs;
list_for_each_entry(gs, &gn->sock_list, list) {
- if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
+ struct sock *sk = gs->sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (family == AF_INET &&
+ inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr)
+ continue;
+
+ else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr,
+ &saddr->sin6.sin6_addr) != 0)
+ continue;
+
+ if (inet->inet_sport == dst_port &&
geneve_get_sk_family(gs) == family) {
return gs;
}
@@ -679,14 +703,16 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
__u8 vni[3];
__u32 hash;
- gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst);
+ gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET,
+ geneve->cfg.info.key.tp_dst, &geneve->cfg.saddr);
if (gs) {
gs->refcnt++;
goto out;
}
gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
- geneve->cfg.use_udp6_rx_checksums);
+ geneve->cfg.use_udp6_rx_checksums,
+ &geneve->cfg.saddr);
if (IS_ERR(gs))
return PTR_ERR(gs);
@@ -1246,6 +1272,8 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
[IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
[IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)),
+ [IFLA_GENEVE_LOCAL] = NLA_POLICY_EXACT_LEN(sizeof_field(struct iphdr, saddr)),
+ [IFLA_GENEVE_LOCAL6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
};
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1596,6 +1624,32 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
cfg->inner_proto_inherit = true;
}
+ if (data[IFLA_GENEVE_LOCAL]) {
+ if (changelink && (ip_tunnel_info_af(info) != AF_INET)) {
+ attrtype = IFLA_GENEVE_LOCAL;
+ goto change_notsup;
+ }
+
+ cfg->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]);
+ cfg->saddr.sa.sa_family = AF_INET;
+ }
+
+ if (data[IFLA_GENEVE_LOCAL6]) {
+#if IS_ENABLED(CONFIG_IPV6)
+ if (changelink && (ip_tunnel_info_af(info) != AF_INET6)) {
+ attrtype = IFLA_GENEVE_LOCAL6;
+ goto change_notsup;
+ }
+
+ cfg->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]);
+ cfg->saddr.sa.sa_family = AF_INET6;
+#else
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6],
+ "IPv6 support not enabled in the kernel");
+ return -EPFNOSUPPORT;
+#endif
+ }
+
return 0;
change_notsup:
NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
@@ -1782,6 +1836,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */
+ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_LOCAL{6} */
0;
}
@@ -1807,16 +1862,25 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
info->key.u.ipv4.dst))
goto nla_put_failure;
+
+ if (nla_put_in_addr(skb, IFLA_GENEVE_LOCAL,
+ info->key.u.ipv4.src))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
test_bit(IP_TUNNEL_CSUM_BIT,
info->key.tun_flags)))
goto nla_put_failure;
-
#if IS_ENABLED(CONFIG_IPV6)
} else if (!metadata) {
if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
&info->key.u.ipv6.dst))
goto nla_put_failure;
+
+ if (nla_put_in6_addr(skb, IFLA_GENEVE_LOCAL6,
+ &info->key.u.ipv6.src))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
!test_bit(IP_TUNNEL_CSUM_BIT,
info->key.tun_flags)))
diff --git a/include/net/geneve.h b/include/net/geneve.h
index 5c96827a487e..7b12c70db11f 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -62,6 +62,12 @@ struct genevehdr {
u8 options[];
};
+union geneve_addr {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ struct sockaddr sa;
+};
+
static inline bool netif_is_geneve(const struct net_device *dev)
{
return dev->rtnl_link_ops &&
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7350129b1444..ff362d76a0d4 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1442,6 +1442,8 @@ enum {
IFLA_GENEVE_DF,
IFLA_GENEVE_INNER_PROTO_INHERIT,
IFLA_GENEVE_PORT_RANGE,
+ IFLA_GENEVE_LOCAL,
+ IFLA_GENEVE_LOCAL6,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index eee934cc2cf4..894a1aa91133 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1438,6 +1438,8 @@ enum {
IFLA_GENEVE_TTL_INHERIT,
IFLA_GENEVE_DF,
IFLA_GENEVE_INNER_PROTO_INHERIT,
+ IFLA_GENEVE_LOCAL,
+ IFLA_GENEVE_LOCAL6,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
--
2.36.1
On Thu, Jul 17, 2025 at 01:54:12PM +0200, Richard Gobert wrote: > @@ -1246,6 +1272,8 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { > [IFLA_GENEVE_DF] = { .type = NLA_U8 }, > [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, > [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), > + [IFLA_GENEVE_LOCAL] = NLA_POLICY_EXACT_LEN(sizeof_field(struct iphdr, saddr)), > + [IFLA_GENEVE_LOCAL6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), > }; Please update rt_link.yaml as well. See for example: https://lore.kernel.org/all/20250226182030.89440-2-daniel@iogearbox.net/
Hi Richard, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Richard-Gobert/net-udp-add-freebind-option-to-udp_sock_create/20250717-200233 base: net-next/main patch link: https://lore.kernel.org/r/20250717115412.11424-5-richardbgobert%40gmail.com patch subject: [PATCH net-next v4 4/4] net: geneve: enable binding geneve sockets to local addresses config: arm-randconfig-001-20250718 (https://download.01.org/0day-ci/archive/20250718/202507181610.vgBNLLYf-lkp@intel.com/config) compiler: arm-linux-gnueabi-gcc (GCC) 8.5.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250718/202507181610.vgBNLLYf-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507181610.vgBNLLYf-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from include/net/ipv6_stubs.h:11, from drivers/net/geneve.c:15: drivers/net/geneve.c: In function 'geneve_find_sock': >> include/net/sock.h:385:37: error: 'struct sock_common' has no member named 'skc_v6_rcv_saddr'; did you mean 'skc_rcv_saddr'? #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr ^~~~~~~~~~~~~~~~ drivers/net/geneve.c:685:31: note: in expansion of macro 'sk_v6_rcv_saddr' else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, ^~~~~~~~~~~~~~~ -- In file included from include/net/ipv6_stubs.h:11, from geneve.c:15: geneve.c: In function 'geneve_find_sock': >> include/net/sock.h:385:37: error: 'struct sock_common' has no member named 'skc_v6_rcv_saddr'; did you mean 'skc_rcv_saddr'? #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr ^~~~~~~~~~~~~~~~ geneve.c:685:31: note: in expansion of macro 'sk_v6_rcv_saddr' else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, ^~~~~~~~~~~~~~~ vim +385 include/net/sock.h 4dc6dc7162c08b Eric Dumazet 2009-07-15 364 68835aba4d9b74 Eric Dumazet 2010-11-30 365 #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin 68835aba4d9b74 Eric Dumazet 2010-11-30 366 #define sk_dontcopy_end __sk_common.skc_dontcopy_end 4dc6dc7162c08b Eric Dumazet 2009-07-15 367 #define sk_hash __sk_common.skc_hash 5080546682bae3 Eric Dumazet 2013-10-02 368 #define sk_portpair __sk_common.skc_portpair 05dbc7b59481ca Eric Dumazet 2013-10-03 369 #define sk_num __sk_common.skc_num 05dbc7b59481ca Eric Dumazet 2013-10-03 370 #define sk_dport __sk_common.skc_dport 5080546682bae3 Eric Dumazet 2013-10-02 371 #define sk_addrpair __sk_common.skc_addrpair 5080546682bae3 Eric Dumazet 2013-10-02 372 #define sk_daddr __sk_common.skc_daddr 5080546682bae3 Eric Dumazet 2013-10-02 373 #define sk_rcv_saddr __sk_common.skc_rcv_saddr ^1da177e4c3f41 Linus Torvalds 2005-04-16 374 #define sk_family __sk_common.skc_family ^1da177e4c3f41 Linus Torvalds 2005-04-16 375 #define sk_state __sk_common.skc_state ^1da177e4c3f41 Linus Torvalds 2005-04-16 376 #define sk_reuse __sk_common.skc_reuse 055dc21a1d1d21 Tom Herbert 2013-01-22 377 #define sk_reuseport __sk_common.skc_reuseport 9fe516ba3fb29b Eric Dumazet 2014-06-27 378 #define sk_ipv6only __sk_common.skc_ipv6only 26abe14379f8e2 Eric W. Biederman 2015-05-08 379 #define sk_net_refcnt __sk_common.skc_net_refcnt ^1da177e4c3f41 Linus Torvalds 2005-04-16 380 #define sk_bound_dev_if __sk_common.skc_bound_dev_if ^1da177e4c3f41 Linus Torvalds 2005-04-16 381 #define sk_bind_node __sk_common.skc_bind_node 8feaf0c0a5488b Arnaldo Carvalho de Melo 2005-08-09 382 #define sk_prot __sk_common.skc_prot 07feaebfcc10cd Eric W. Biederman 2007-09-12 383 #define sk_net __sk_common.skc_net efe4208f47f907 Eric Dumazet 2013-10-03 384 #define sk_v6_daddr __sk_common.skc_v6_daddr efe4208f47f907 Eric Dumazet 2013-10-03 @385 #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr 33cf7c90fe2f97 Eric Dumazet 2015-03-11 386 #define sk_cookie __sk_common.skc_cookie 70da268b569d32 Eric Dumazet 2015-10-08 387 #define sk_incoming_cpu __sk_common.skc_incoming_cpu 8e5eb54d303b7c Eric Dumazet 2015-10-08 388 #define sk_flags __sk_common.skc_flags ed53d0ab761f5c Eric Dumazet 2015-10-08 389 #define sk_rxhash __sk_common.skc_rxhash efe4208f47f907 Eric Dumazet 2013-10-03 390 5d4cc87414c5d1 Eric Dumazet 2024-02-16 391 __cacheline_group_begin(sock_write_rx); 43f51df4172955 Eric Dumazet 2021-11-15 392 9115e8cd2a0c6e Eric Dumazet 2016-12-03 393 atomic_t sk_drops; 5d4cc87414c5d1 Eric Dumazet 2024-02-16 394 __s32 sk_peek_off; 9115e8cd2a0c6e Eric Dumazet 2016-12-03 395 struct sk_buff_head sk_error_queue; b178bb3dfc30d9 Eric Dumazet 2010-11-16 396 struct sk_buff_head sk_receive_queue; fa438ccfdfd3f6 Eric Dumazet 2007-03-04 397 /* fa438ccfdfd3f6 Eric Dumazet 2007-03-04 398 * The backlog queue is special, it is always used with fa438ccfdfd3f6 Eric Dumazet 2007-03-04 399 * the per-socket spinlock held and requires low latency fa438ccfdfd3f6 Eric Dumazet 2007-03-04 400 * access. Therefore we special case it's implementation. b178bb3dfc30d9 Eric Dumazet 2010-11-16 401 * Note : rmem_alloc is in this structure to fill a hole b178bb3dfc30d9 Eric Dumazet 2010-11-16 402 * on 64bit arches, not because its logically part of b178bb3dfc30d9 Eric Dumazet 2010-11-16 403 * backlog. fa438ccfdfd3f6 Eric Dumazet 2007-03-04 404 */ fa438ccfdfd3f6 Eric Dumazet 2007-03-04 405 struct { b178bb3dfc30d9 Eric Dumazet 2010-11-16 406 atomic_t rmem_alloc; b178bb3dfc30d9 Eric Dumazet 2010-11-16 407 int len; fa438ccfdfd3f6 Eric Dumazet 2007-03-04 408 struct sk_buff *head; fa438ccfdfd3f6 Eric Dumazet 2007-03-04 409 struct sk_buff *tail; fa438ccfdfd3f6 Eric Dumazet 2007-03-04 410 } sk_backlog; b178bb3dfc30d9 Eric Dumazet 2010-11-16 411 #define sk_rmem_alloc sk_backlog.rmem_alloc 2c8c56e15df3d4 Eric Dumazet 2014-11-11 412 5d4cc87414c5d1 Eric Dumazet 2024-02-16 413 __cacheline_group_end(sock_write_rx); 5d4cc87414c5d1 Eric Dumazet 2024-02-16 414 5d4cc87414c5d1 Eric Dumazet 2024-02-16 415 __cacheline_group_begin(sock_read_rx); 5d4cc87414c5d1 Eric Dumazet 2024-02-16 416 /* early demux fields */ 5d4cc87414c5d1 Eric Dumazet 2024-02-16 417 struct dst_entry __rcu *sk_rx_dst; 5d4cc87414c5d1 Eric Dumazet 2024-02-16 418 int sk_rx_dst_ifindex; 5d4cc87414c5d1 Eric Dumazet 2024-02-16 419 u32 sk_rx_dst_cookie; 5d4cc87414c5d1 Eric Dumazet 2024-02-16 420 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Thu, Jul 17, 2025 at 01:54:12PM +0200, Richard Gobert wrote: ... > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c ... > static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, > sa_family_t family, > - __be16 dst_port) > + __be16 dst_port, > + union geneve_addr *saddr) > { > struct geneve_sock *gs; > > list_for_each_entry(gs, &gn->sock_list, list) { > - if (inet_sk(gs->sock->sk)->inet_sport == dst_port && > + struct sock *sk = gs->sock->sk; > + struct inet_sock *inet = inet_sk(sk); > + > + if (family == AF_INET && > + inet->inet_rcv_saddr != saddr->sin.sin_addr.s_addr) > + continue; > + > + else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, > + &saddr->sin6.sin6_addr) != 0) > + continue; Hi Richard, Unfortunately this fails to build when CONFIG_IPV6 is not set. .../geneve.c:685:31: error: no member named 'skc_v6_rcv_saddr' in 'struct sock_common'; did you mean 'skc_rcv_saddr'? 685 | else if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, | ^ ./include/net/sock.h:385:37: note: expanded from macro 'sk_v6_rcv_saddr' 385 | #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr | ^ ./include/net/sock.h:155:11: note: 'skc_rcv_saddr' declared here 155 | __be32 skc_rcv_saddr; | ^ > + > + if (inet->inet_sport == dst_port && > geneve_get_sk_family(gs) == family) { > return gs; > } ... -- pw-bot: changes-requested
On Fri, Jul 18, 2025 at 08:31:41AM +0100, Simon Horman wrote: > On Thu, Jul 17, 2025 at 01:54:12PM +0200, Richard Gobert wrote: > > ... > > > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c > > ... > > > static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, > > sa_family_t family, > > - __be16 dst_port) > > + __be16 dst_port, > > + union geneve_addr *saddr) Sorry, one more minor thing: the indentatoin on the line above looks off. ...
© 2016 - 2025 Red Hat, Inc.