[v5] Convert CONFIG_IPV6 to built-in and remove stubs

[PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by Fernando Fernandez Mancera 1 week, 1 day ago

As IPv6 is built-in only, the ipv6_bpf_stub can be removed completely.

Convert all ipv6_bpf_stub usage to direct function calls instead. The
fallback functions introduced previously will prevent linkage errors
when CONFIG_IPV6 is disabled.

Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Tested-by: Ricardo B. Marlière <rbm@suse.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/ipv6.h       |  2 ++
 include/net/ipv6_stubs.h | 21 ---------------
 net/core/filter.c        | 58 +++++++++++++++++-----------------------
 net/core/lwt_bpf.c       | 10 ++++---
 net/ipv6/af_inet6.c      | 13 ++-------
 5 files changed, 35 insertions(+), 69 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f99f273341f0..d042afe7a245 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1149,6 +1149,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 void inet6_cleanup_sock(struct sock *sk);
 void inet6_sock_destruct(struct sock *sk);
 int inet6_release(struct socket *sock);
+int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
+		 u32 flags);
 int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len);
 int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 907681cecde8..dc708d9eca7a 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -77,25 +77,4 @@ struct ipv6_stub {
 			__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
-
-/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
-struct ipv6_bpf_stub {
-	int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
-			  u32 flags);
-	struct sock *(*udp6_lib_lookup)(const struct net *net,
-					const struct in6_addr *saddr, __be16 sport,
-					const struct in6_addr *daddr, __be16 dport,
-					int dif, int sdif, struct sk_buff *skb);
-	int (*ipv6_setsockopt)(struct sock *sk, int level, int optname,
-			       sockptr_t optval, unsigned int optlen);
-	int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
-			       sockptr_t optval, sockptr_t optlen);
-	int (*ipv6_dev_get_saddr)(struct net *net,
-				  const struct net_device *dst_dev,
-				  const struct in6_addr *daddr,
-				  unsigned int prefs,
-				  struct in6_addr *saddr);
-};
-extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
-
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index c56821afaa0f..d55525cc5540 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -73,7 +73,6 @@
 #include <net/seg6.h>
 #include <net/seg6_local.h>
 #include <net/lwtunnel.h>
-#include <net/ipv6_stubs.h>
 #include <net/bpf_sk_storage.h>
 #include <net/transp_v6.h>
 #include <linux/btf_ids.h>
@@ -2279,7 +2278,7 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
 			.saddr	      = ip6h->saddr,
 		};
 
-		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+		dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
 		if (IS_ERR(dst))
 			goto out_drop;
 
@@ -5577,12 +5576,12 @@ static int sol_ipv6_sockopt(struct sock *sk, int optname,
 	}
 
 	if (getopt)
-		return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
-						      KERNEL_SOCKPTR(optval),
-						      KERNEL_SOCKPTR(optlen));
+		return do_ipv6_getsockopt(sk, SOL_IPV6, optname,
+					  KERNEL_SOCKPTR(optval),
+					  KERNEL_SOCKPTR(optlen));
 
-	return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
-					      KERNEL_SOCKPTR(optval), *optlen);
+	return do_ipv6_setsockopt(sk, SOL_IPV6, optname,
+				  KERNEL_SOCKPTR(optval), *optlen);
 }
 
 static int __bpf_setsockopt(struct sock *sk, int level, int optname,
@@ -5981,9 +5980,6 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
-EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
-
 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 	   int, addr_len)
 {
@@ -6007,11 +6003,9 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 			return err;
 		if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
 			flags |= BIND_FORCE_ADDRESS_NO_PORT;
-		/* ipv6_bpf_stub cannot be NULL, since it's called from
-		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
-		 */
-		return ipv6_bpf_stub->inet6_bind(sk, (struct sockaddr_unsized *)addr,
-						 addr_len, flags);
+
+		return __inet6_bind(sk, (struct sockaddr_unsized *)addr,
+				    addr_len, flags);
 #endif /* CONFIG_IPV6 */
 	}
 #endif /* CONFIG_INET */
@@ -6099,9 +6093,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 			       u32 flags, bool check_mtu)
 {
+	struct neighbour *neigh = NULL;
 	struct fib_nh_common *nhc;
 	struct in_device *in_dev;
-	struct neighbour *neigh;
 	struct net_device *dev;
 	struct fib_result res;
 	struct flowi4 fl4;
@@ -6221,8 +6215,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (likely(nhc->nhc_gw_family != AF_INET6))
 		neigh = __ipv4_neigh_lookup_noref(dev,
 						  (__force u32)params->ipv4_dst);
-	else
-		neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
+	else if (IS_ENABLED(CONFIG_IPV6))
+		neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst);
 
 	if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
@@ -6290,12 +6284,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 			params->tbid = 0;
 		}
 
-		tb = ipv6_stub->fib6_get_table(net, tbid);
+		tb = fib6_get_table(net, tbid);
 		if (unlikely(!tb))
 			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
-						   strict);
+		err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict);
 	} else {
 		if (flags & BPF_FIB_LOOKUP_MARK)
 			fl6.flowi6_mark = params->mark;
@@ -6305,7 +6298,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		fl6.flowi6_tun_key.tun_id = 0;
 		fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-		err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
+		err = fib6_lookup(net, oif, &fl6, &res, strict);
 	}
 
 	if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
@@ -6326,11 +6319,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		return BPF_FIB_LKUP_RET_NOT_FWDED;
 	}
 
-	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
-				    fl6.flowi6_oif != 0, NULL, strict);
+	fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
+			 fl6.flowi6_oif != 0, NULL, strict);
 
 	if (check_mtu) {
-		mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
+		mtu = ip6_mtu_from_fib6(&res, dst, src);
 		if (params->tot_len > mtu) {
 			params->mtu_result = mtu; /* union with tot_len */
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
@@ -6351,9 +6344,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		if (res.f6i->fib6_prefsrc.plen) {
 			*src = res.f6i->fib6_prefsrc.addr;
 		} else {
-			err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
-								&fl6.daddr, 0,
-								src);
+			err = ipv6_dev_get_saddr(net, dev, &fl6.daddr, 0, src);
 			if (err)
 				return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
 		}
@@ -6365,7 +6356,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
 	 * not needed here.
 	 */
-	neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+	neigh = __ipv6_neigh_lookup_noref(dev, dst);
 	if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
 		return BPF_FIB_LKUP_RET_NO_NEIGH;
 	memcpy(params->dmac, neigh->ha, ETH_ALEN);
@@ -6900,11 +6891,10 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 					    src6, tuple->ipv6.sport,
 					    dst6, ntohs(tuple->ipv6.dport),
 					    dif, sdif, &refcounted);
-		else if (likely(ipv6_bpf_stub))
-			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
-							    src6, tuple->ipv6.sport,
-							    dst6, tuple->ipv6.dport,
-							    dif, sdif, NULL);
+		else if (likely(ipv6_mod_enabled()))
+			sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
+					       dst6, tuple->ipv6.dport,
+					       dif, sdif, NULL);
 #endif
 	}
 
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 9f40be0c3e71..f71ef82a5f3d 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -13,7 +13,6 @@
 #include <net/gre.h>
 #include <net/ip.h>
 #include <net/ip6_route.h>
-#include <net/ipv6_stubs.h>
 
 struct bpf_lwt_prog {
 	struct bpf_prog *prog;
@@ -103,7 +102,12 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
 		dev_put(dev);
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 		skb_dst_drop(skb);
-		err = ipv6_stub->ipv6_route_input(skb);
+		if (IS_ENABLED(CONFIG_IPV6)) {
+			ip6_route_input(skb);
+			err = skb_dst(skb)->error;
+		} else {
+			err = -EAFNOSUPPORT;
+		}
 	} else {
 		err = -EAFNOSUPPORT;
 	}
@@ -233,7 +237,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 		fl6.daddr = iph6->daddr;
 		fl6.saddr = iph6->saddr;
 
-		dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
+		dst = ip6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
 		if (IS_ERR(dst)) {
 			err = PTR_ERR(dst);
 			goto err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bb29b29fdcfb..07ae6ea7743a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -264,8 +264,8 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	goto out;
 }
 
-static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
-			u32 flags)
+int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
+		 u32 flags)
 {
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
 	struct inet_sock *inet = inet_sk(sk);
@@ -1032,14 +1032,6 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.ip6_xmit = ip6_xmit,
 };
 
-static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
-	.inet6_bind = __inet6_bind,
-	.udp6_lib_lookup = __udp6_lib_lookup,
-	.ipv6_setsockopt = do_ipv6_setsockopt,
-	.ipv6_getsockopt = do_ipv6_getsockopt,
-	.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
-};
-
 static int __init inet6_init(void)
 {
 	struct list_head *r;
@@ -1199,7 +1191,6 @@ static int __init inet6_init(void)
 	/* ensure that ipv6 stubs are visible only after ipv6 is ready */
 	wmb();
 	ipv6_stub = &ipv6_stub_impl;
-	ipv6_bpf_stub = &ipv6_bpf_stub_impl;
 out:
 	return err;
 
-- 
2.53.0

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by Martin KaFai Lau 1 week, 1 day ago

On 3/25/26 5:08 AM, Fernando Fernandez Mancera wrote:
> As IPv6 is built-in only, the ipv6_bpf_stub can be removed completely.
> 
> Convert all ipv6_bpf_stub usage to direct function calls instead. The
> fallback functions introduced previously will prevent linkage errors
> when CONFIG_IPV6 is disabled.

Thanks for working on this.

> @@ -6221,8 +6215,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   	if (likely(nhc->nhc_gw_family != AF_INET6))
>   		neigh = __ipv4_neigh_lookup_noref(dev,
>   						  (__force u32)params->ipv4_dst);
> -	else
> -		neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
> +	else if (IS_ENABLED(CONFIG_IPV6))
> +		neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst);

Should it be ipv6_mod_enabled() instead of IS_ENABLED(CONFIG_IPV6)?
Is nd_tbl always initialized?

>   
>   	if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
>   		return BPF_FIB_LKUP_RET_NO_NEIGH;
> @@ -6290,12 +6284,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   			params->tbid = 0;
>   		}
>   
> -		tb = ipv6_stub->fib6_get_table(net, tbid);
> +		tb = fib6_get_table(net, tbid);
>   		if (unlikely(!tb))
>   			return BPF_FIB_LKUP_RET_NOT_FWDED;
>   
> -		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
> -						   strict);
> +		err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict);

A similar question here and other changes in the patch.

I think bpf_ipv6_fib_lookup() is fine because the earlier
"!idev" check should fail when ipv6 is disabled at boot time?

>   	} else {
>   		if (flags & BPF_FIB_LOOKUP_MARK)
>   			fl6.flowi6_mark = params->mark;
> @@ -6305,7 +6298,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   		fl6.flowi6_tun_key.tun_id = 0;
>   		fl6.flowi6_uid = sock_net_uid(net, NULL);
>   
> -		err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
> +		err = fib6_lookup(net, oif, &fl6, &res, strict);
>   	}
>   
>   	if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
> @@ -6326,11 +6319,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   		return BPF_FIB_LKUP_RET_NOT_FWDED;
>   	}
>   
> -	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
> -				    fl6.flowi6_oif != 0, NULL, strict);
> +	fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
> +			 fl6.flowi6_oif != 0, NULL, strict);
>   
>   	if (check_mtu) {
> -		mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
> +		mtu = ip6_mtu_from_fib6(&res, dst, src);
>   		if (params->tot_len > mtu) {
>   			params->mtu_result = mtu; /* union with tot_len */
>   			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
> @@ -6351,9 +6344,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   		if (res.f6i->fib6_prefsrc.plen) {
>   			*src = res.f6i->fib6_prefsrc.addr;
>   		} else {
> -			err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
> -								&fl6.daddr, 0,
> -								src);
> +			err = ipv6_dev_get_saddr(net, dev, &fl6.daddr, 0, src);
>   			if (err)
>   				return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
>   		}
> @@ -6365,7 +6356,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>   	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
>   	 * not needed here.
>   	 */
> -	neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
> +	neigh = __ipv6_neigh_lookup_noref(dev, dst);
>   	if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
>   		return BPF_FIB_LKUP_RET_NO_NEIGH;
>   	memcpy(params->dmac, neigh->ha, ETH_ALEN);
> @@ -6900,11 +6891,10 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
>   					    src6, tuple->ipv6.sport,
>   					    dst6, ntohs(tuple->ipv6.dport),
>   					    dif, sdif, &refcounted);
> -		else if (likely(ipv6_bpf_stub))
> -			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
> -							    src6, tuple->ipv6.sport,
> -							    dst6, tuple->ipv6.dport,
> -							    dif, sdif, NULL);
> +		else if (likely(ipv6_mod_enabled()))
> +			sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
> +					       dst6, tuple->ipv6.dport,
> +					       dif, sdif, NULL);
>   #endif
>   	}
>   
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index 9f40be0c3e71..f71ef82a5f3d 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -13,7 +13,6 @@
>   #include <net/gre.h>
>   #include <net/ip.h>
>   #include <net/ip6_route.h>
> -#include <net/ipv6_stubs.h>
>   
>   struct bpf_lwt_prog {
>   	struct bpf_prog *prog;
> @@ -103,7 +102,12 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
>   		dev_put(dev);
>   	} else if (skb->protocol == htons(ETH_P_IPV6)) {
>   		skb_dst_drop(skb);
> -		err = ipv6_stub->ipv6_route_input(skb);
> +		if (IS_ENABLED(CONFIG_IPV6)) {
> +			ip6_route_input(skb);
> +			err = skb_dst(skb)->error;
> +		} else {
> +			err = -EAFNOSUPPORT;
> +		}
>   	} else {
>   		err = -EAFNOSUPPORT;
>   	}
> @@ -233,7 +237,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>   		fl6.daddr = iph6->daddr;
>   		fl6.saddr = iph6->saddr;
>   
> -		dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
> +		dst = ip6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
>   		if (IS_ERR(dst)) {
>   			err = PTR_ERR(dst);
>   			goto err;
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index bb29b29fdcfb..07ae6ea7743a 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -264,8 +264,8 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
>   	goto out;
>   }
>   
> -static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
> -			u32 flags)
> +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
> +		 u32 flags)
>   {
>   	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
>   	struct inet_sock *inet = inet_sk(sk);
> @@ -1032,14 +1032,6 @@ static const struct ipv6_stub ipv6_stub_impl = {
>   	.ip6_xmit = ip6_xmit,
>   };
>   
> -static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
> -	.inet6_bind = __inet6_bind,
> -	.udp6_lib_lookup = __udp6_lib_lookup,
> -	.ipv6_setsockopt = do_ipv6_setsockopt,
> -	.ipv6_getsockopt = do_ipv6_getsockopt,
> -	.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
> -};
> -
>   static int __init inet6_init(void)
>   {
>   	struct list_head *r;
> @@ -1199,7 +1191,6 @@ static int __init inet6_init(void)
>   	/* ensure that ipv6 stubs are visible only after ipv6 is ready */
>   	wmb();
>   	ipv6_stub = &ipv6_stub_impl;
> -	ipv6_bpf_stub = &ipv6_bpf_stub_impl;
>   out:
>   	return err;
>

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by Fernando Fernandez Mancera 1 week, 1 day ago

On 3/25/26 8:11 PM, Martin KaFai Lau wrote:
> On 3/25/26 5:08 AM, Fernando Fernandez Mancera wrote:
>> As IPv6 is built-in only, the ipv6_bpf_stub can be removed completely.
>>
>> Convert all ipv6_bpf_stub usage to direct function calls instead. The
>> fallback functions introduced previously will prevent linkage errors
>> when CONFIG_IPV6 is disabled.
> 
> Thanks for working on this.
> 
>> @@ -6221,8 +6215,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, 
>> struct bpf_fib_lookup *params,
>>       if (likely(nhc->nhc_gw_family != AF_INET6))
>>           neigh = __ipv4_neigh_lookup_noref(dev,
>>                             (__force u32)params->ipv4_dst);
>> -    else
>> -        neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
>> +    else if (IS_ENABLED(CONFIG_IPV6))
>> +        neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst);
> 
> Should it be ipv6_mod_enabled() instead of IS_ENABLED(CONFIG_IPV6)?
> Is nd_tbl always initialized?
> 

Hi Martin,

I don't think so. The IS_ENABLED(CONFIG_IPV6) check here is just to 
prevent an undefined reference when compiling with CONFIG_IPV6=n. Note 
that this code isn't reachable when ipv6.disable=1 is set during 
booting, as it would have crashed even before this change because 
ipv6_stub->nd_tbl is NULL if the IPV6 is disabled since booting.

We addressed the vulnerable paths already during this series:

https://lore.kernel.org/netdev/20260307-net-nd_tbl_fixes-v4-0-e2677e85628c@suse.com/#

>>       if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
>>           return BPF_FIB_LKUP_RET_NO_NEIGH;
>> @@ -6290,12 +6284,11 @@ static int bpf_ipv6_fib_lookup(struct net 
>> *net, struct bpf_fib_lookup *params,
>>               params->tbid = 0;
>>           }
>> -        tb = ipv6_stub->fib6_get_table(net, tbid);
>> +        tb = fib6_get_table(net, tbid);
>>           if (unlikely(!tb))
>>               return BPF_FIB_LKUP_RET_NOT_FWDED;
>> -        err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
>> -                           strict);
>> +        err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict);
> 
> A similar question here and other changes in the patch.
> 
> I think bpf_ipv6_fib_lookup() is fine because the earlier
> "!idev" check should fail when ipv6 is disabled at boot time?

Yes, the !idev check prevents us to reach this path so it is safe to 
call fib6_table_lookup().

While working on the series I have been trying to exploit these paths 
(this one and other changes in the series), so far I didn't find anything.

Thanks for the review!
Fernando.

> 
>>       } else {
>>           if (flags & BPF_FIB_LOOKUP_MARK)
>>               fl6.flowi6_mark = params->mark;
>> @@ -6305,7 +6298,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, 
>> struct bpf_fib_lookup *params,
>>           fl6.flowi6_tun_key.tun_id = 0;
>>           fl6.flowi6_uid = sock_net_uid(net, NULL);
>> -        err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
>> +        err = fib6_lookup(net, oif, &fl6, &res, strict);
>>       }
>>       if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
>> @@ -6326,11 +6319,11 @@ static int bpf_ipv6_fib_lookup(struct net 
>> *net, struct bpf_fib_lookup *params,
>>           return BPF_FIB_LKUP_RET_NOT_FWDED;
>>       }
>> -    ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
>> -                    fl6.flowi6_oif != 0, NULL, strict);
>> +    fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
>> +             fl6.flowi6_oif != 0, NULL, strict);
>>       if (check_mtu) {
>> -        mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
>> +        mtu = ip6_mtu_from_fib6(&res, dst, src);
>>           if (params->tot_len > mtu) {
>>               params->mtu_result = mtu; /* union with tot_len */
>>               return BPF_FIB_LKUP_RET_FRAG_NEEDED;
>> @@ -6351,9 +6344,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, 
>> struct bpf_fib_lookup *params,
>>           if (res.f6i->fib6_prefsrc.plen) {
>>               *src = res.f6i->fib6_prefsrc.addr;
>>           } else {
>> -            err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
>> -                                &fl6.daddr, 0,
>> -                                src);
>> +            err = ipv6_dev_get_saddr(net, dev, &fl6.daddr, 0, src);
>>               if (err)
>>                   return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
>>           }
>> @@ -6365,7 +6356,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, 
>> struct bpf_fib_lookup *params,
>>       /* xdp and cls_bpf programs are run in RCU-bh so 
>> rcu_read_lock_bh is
>>        * not needed here.
>>        */
>> -    neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
>> +    neigh = __ipv6_neigh_lookup_noref(dev, dst);
>>       if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
>>           return BPF_FIB_LKUP_RET_NO_NEIGH;
>>       memcpy(params->dmac, neigh->ha, ETH_ALEN);
>> @@ -6900,11 +6891,10 @@ static struct sock *sk_lookup(struct net *net, 
>> struct bpf_sock_tuple *tuple,
>>                           src6, tuple->ipv6.sport,
>>                           dst6, ntohs(tuple->ipv6.dport),
>>                           dif, sdif, &refcounted);
>> -        else if (likely(ipv6_bpf_stub))
>> -            sk = ipv6_bpf_stub->udp6_lib_lookup(net,
>> -                                src6, tuple->ipv6.sport,
>> -                                dst6, tuple->ipv6.dport,
>> -                                dif, sdif, NULL);
>> +        else if (likely(ipv6_mod_enabled()))
>> +            sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
>> +                           dst6, tuple->ipv6.dport,
>> +                           dif, sdif, NULL);
>>   #endif
>>       }
>> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
>> index 9f40be0c3e71..f71ef82a5f3d 100644
>> --- a/net/core/lwt_bpf.c
>> +++ b/net/core/lwt_bpf.c
>> @@ -13,7 +13,6 @@
>>   #include <net/gre.h>
>>   #include <net/ip.h>
>>   #include <net/ip6_route.h>
>> -#include <net/ipv6_stubs.h>
>>   struct bpf_lwt_prog {
>>       struct bpf_prog *prog;
>> @@ -103,7 +102,12 @@ static int bpf_lwt_input_reroute(struct sk_buff 
>> *skb)
>>           dev_put(dev);
>>       } else if (skb->protocol == htons(ETH_P_IPV6)) {
>>           skb_dst_drop(skb);
>> -        err = ipv6_stub->ipv6_route_input(skb);
>> +        if (IS_ENABLED(CONFIG_IPV6)) {
>> +            ip6_route_input(skb);
>> +            err = skb_dst(skb)->error;
>> +        } else {
>> +            err = -EAFNOSUPPORT;
>> +        }
>>       } else {
>>           err = -EAFNOSUPPORT;
>>       }
>> @@ -233,7 +237,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>>           fl6.daddr = iph6->daddr;
>>           fl6.saddr = iph6->saddr;
>> -        dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
>> +        dst = ip6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
>>           if (IS_ERR(dst)) {
>>               err = PTR_ERR(dst);
>>               goto err;
>> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
>> index bb29b29fdcfb..07ae6ea7743a 100644
>> --- a/net/ipv6/af_inet6.c
>> +++ b/net/ipv6/af_inet6.c
>> @@ -264,8 +264,8 @@ static int inet6_create(struct net *net, struct 
>> socket *sock, int protocol,
>>       goto out;
>>   }
>> -static int __inet6_bind(struct sock *sk, struct sockaddr_unsized 
>> *uaddr, int addr_len,
>> -            u32 flags)
>> +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int 
>> addr_len,
>> +         u32 flags)
>>   {
>>       struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
>>       struct inet_sock *inet = inet_sk(sk);
>> @@ -1032,14 +1032,6 @@ static const struct ipv6_stub ipv6_stub_impl = {
>>       .ip6_xmit = ip6_xmit,
>>   };
>> -static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
>> -    .inet6_bind = __inet6_bind,
>> -    .udp6_lib_lookup = __udp6_lib_lookup,
>> -    .ipv6_setsockopt = do_ipv6_setsockopt,
>> -    .ipv6_getsockopt = do_ipv6_getsockopt,
>> -    .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
>> -};
>> -
>>   static int __init inet6_init(void)
>>   {
>>       struct list_head *r;
>> @@ -1199,7 +1191,6 @@ static int __init inet6_init(void)
>>       /* ensure that ipv6 stubs are visible only after ipv6 is ready */
>>       wmb();
>>       ipv6_stub = &ipv6_stub_impl;
>> -    ipv6_bpf_stub = &ipv6_bpf_stub_impl;
>>   out:
>>       return err;
>

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by David Ahern 1 week, 1 day ago

On 3/25/26 2:29 PM, Fernando Fernandez Mancera wrote:
> Hi Martin,
> 
> I don't think so. The IS_ENABLED(CONFIG_IPV6) check here is just to
> prevent an undefined reference when compiling with CONFIG_IPV6=n. Note
> that this code isn't reachable when ipv6.disable=1 is set during
> booting, as it would have crashed even before this change because
> ipv6_stub->nd_tbl is NULL if the IPV6 is disabled since booting.
> 
> We addressed the vulnerable paths already during this series:
> 
> https://lore.kernel.org/netdev/20260307-net-nd_tbl_fixes-v4-0-e2677e85628c@suse.com/#

What about the use case of IPv4 routes with IPv6 nexthop address? Has
that been tested with a bpf forwarding program?

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by Fernando Fernandez Mancera 1 week, 1 day ago

On 3/25/26 11:40 PM, David Ahern wrote:
> On 3/25/26 2:29 PM, Fernando Fernandez Mancera wrote:
>> Hi Martin,
>>
>> I don't think so. The IS_ENABLED(CONFIG_IPV6) check here is just to
>> prevent an undefined reference when compiling with CONFIG_IPV6=n. Note
>> that this code isn't reachable when ipv6.disable=1 is set during
>> booting, as it would have crashed even before this change because
>> ipv6_stub->nd_tbl is NULL if the IPV6 is disabled since booting.
>>
>> We addressed the vulnerable paths already during this series:
>>
>> https://lore.kernel.org/netdev/20260307-net-nd_tbl_fixes-v4-0-e2677e85628c@suse.com/#
> 
> What about the use case of IPv4 routes with IPv6 nexthop address? Has
> that been tested with a bpf forwarding program?
> 

Hi David, I did the following testing:

1. ipv6.disabled=1 since booting - the IPv6 nexthop cannot be added to 
the IPv4 route at all.

2. ipv6.disabled=1 after booting but before configuring the IPv4 route - 
same result as above.

3. ipv6.disabled=1 after booting and after the IPv4 is configured - the 
neighbor lookup is fine as the nd_tbl is initialized. It didn't crash.

I did tracing to make sure that code path was hit. I tested the 
forwarding with 3 namespaces 1<-->2<-->3 loading the BPF program on the 
second one performing the bpf_fib_lookup() instruction.

I could do more testing regarding this if needed, let me know.

Thanks,
Fernando.

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by David Ahern 1 week ago

On 3/25/26 5:41 PM, Fernando Fernandez Mancera wrote:
> Hi David, I did the following testing:
> 
> 1. ipv6.disabled=1 since booting - the IPv6 nexthop cannot be added to
> the IPv4 route at all.
> 
> 2. ipv6.disabled=1 after booting but before configuring the IPv4 route -
> same result as above.
> 
> 3. ipv6.disabled=1 after booting and after the IPv4 is configured - the
> neighbor lookup is fine as the nd_tbl is initialized. It didn't crash.
> 
> I did tracing to make sure that code path was hit. I tested the
> forwarding with 3 namespaces 1<-->2<-->3 loading the BPF program on the
> second one performing the bpf_fib_lookup() instruction.
> 
> I could do more testing regarding this if needed, let me know.
> 

that looks good. Thank you

Re: [PATCH 09/11 net-next v5] bpf: remove ipv6_bpf_stub completely and use direct function calls

Posted by Martin KaFai Lau 1 week, 1 day ago

On 3/25/26 1:29 PM, Fernando Fernandez Mancera wrote:
> On 3/25/26 8:11 PM, Martin KaFai Lau wrote:
>> On 3/25/26 5:08 AM, Fernando Fernandez Mancera wrote:
>>> As IPv6 is built-in only, the ipv6_bpf_stub can be removed completely.
>>>
>>> Convert all ipv6_bpf_stub usage to direct function calls instead. The
>>> fallback functions introduced previously will prevent linkage errors
>>> when CONFIG_IPV6 is disabled.
>>
>> Thanks for working on this.
>>
>>> @@ -6221,8 +6215,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, 
>>> struct bpf_fib_lookup *params,
>>>       if (likely(nhc->nhc_gw_family != AF_INET6))
>>>           neigh = __ipv4_neigh_lookup_noref(dev,
>>>                             (__force u32)params->ipv4_dst);
>>> -    else
>>> -        neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
>>> +    else if (IS_ENABLED(CONFIG_IPV6))
>>> +        neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst);
>>
>> Should it be ipv6_mod_enabled() instead of IS_ENABLED(CONFIG_IPV6)?
>> Is nd_tbl always initialized?
>>
> 
> Hi Martin,
> 
> I don't think so. The IS_ENABLED(CONFIG_IPV6) check here is just to 
> prevent an undefined reference when compiling with CONFIG_IPV6=n. Note 
> that this code isn't reachable when ipv6.disable=1 is set during 
> booting, as it would have crashed even before this change because 
> ipv6_stub->nd_tbl is NULL if the IPV6 is disabled since booting.
> 
> We addressed the vulnerable paths already during this series:
> 
> https://lore.kernel.org/netdev/20260307-net-nd_tbl_fixes-v4-0- 
> e2677e85628c@suse.com/#
> 
>>>       if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
>>>           return BPF_FIB_LKUP_RET_NO_NEIGH;
>>> @@ -6290,12 +6284,11 @@ static int bpf_ipv6_fib_lookup(struct net 
>>> *net, struct bpf_fib_lookup *params,
>>>               params->tbid = 0;
>>>           }
>>> -        tb = ipv6_stub->fib6_get_table(net, tbid);
>>> +        tb = fib6_get_table(net, tbid);
>>>           if (unlikely(!tb))
>>>               return BPF_FIB_LKUP_RET_NOT_FWDED;
>>> -        err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
>>> -                           strict);
>>> +        err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict);
>>
>> A similar question here and other changes in the patch.
>>
>> I think bpf_ipv6_fib_lookup() is fine because the earlier
>> "!idev" check should fail when ipv6 is disabled at boot time?
> 
> Yes, the !idev check prevents us to reach this path so it is safe to 
> call fib6_table_lookup().
> 
> While working on the series I have been trying to exploit these paths 
> (this one and other changes in the series), so far I didn't find anything.

Thanks for the explanation.

Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>