[PATCH 3/6] net: Guard Legacy IP entry points with CONFIG_LEGACY_IP

David Woodhouse posted 6 patches 14 hours ago
[PATCH 3/6] net: Guard Legacy IP entry points with CONFIG_LEGACY_IP
Posted by David Woodhouse 14 hours ago
From: David Woodhouse <dwmw@amazon.co.uk>

Wrap the IPv4-specific registrations in inet_init() with
CONFIG_LEGACY_IP guards. When LEGACY_IP is disabled, the kernel
will not:
 - Register the AF_INET socket family
 - Register the ETH_P_IP packet handler (ip_rcv)
 - Initialize ARP, ICMP, IGMP, or IPv4 routing
 - Register IPv4 protocol handlers (TCP/UDP/ICMP over IPv4)
 - Initialize IPv4 multicast routing, proc entries, or fragmentation

The shared INET infrastructure (tcp_prot, udp_prot, tcp_init, etc.)
remains initialized for use by IPv6.

Also update INDIRECT_CALL_INET to not use ip_rcv/ip_list_rcv as
direct call targets when LEGACY_IP is disabled, avoiding a link-time
reference to functions that will eventually be compiled out.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 include/linux/indirect_call_wrapper.h |  4 +++-
 net/ipv4/af_inet.c                    | 20 +++++++++++++-----
 net/ipv4/devinet.c                    |  2 ++
 net/ipv4/route.c                      |  1 -
 net/ipv4/tcp_ipv4.c                   | 30 ++++++++++++++-------------
 5 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index dc272b514a01..25a3873da462 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -57,9 +57,11 @@
  * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6
  * alternatives
  */
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_BUILTIN(CONFIG_IPV6) && IS_ENABLED(CONFIG_LEGACY_IP)
 #define INDIRECT_CALL_INET(f, f2, f1, ...) \
 	INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#elif IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
 #elif IS_ENABLED(CONFIG_INET)
 #define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
 #else
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c7731e300a44..dc358faa1647 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1922,7 +1922,15 @@ static int __init inet_init(void)
 	/*
 	 *	Tell SOCKET that we are alive...
 	 */
+	/* Initialize the socket-side protocol switch tables. */
+	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
+		INIT_LIST_HEAD(r);
+
+#ifdef CONFIG_XFRM
+	xfrm_init();
+#endif
 
+#ifdef CONFIG_LEGACY_IP
 	(void)sock_register(&inet_family_ops);
 
 #ifdef CONFIG_SYSCTL
@@ -1957,10 +1965,6 @@ static int __init inet_init(void)
 		pr_crit("%s: Cannot add IGMP protocol\n", __func__);
 #endif
 
-	/* Register the socket-side information for inet_create. */
-	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
-		INIT_LIST_HEAD(r);
-
 	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
 		inet_register_protosw(q);
 
@@ -1975,6 +1979,7 @@ static int __init inet_init(void)
 	 */
 
 	ip_init();
+#endif /* CONFIG_LEGACY_IP */
 
 	/* Initialise per-cpu ipv4 mibs */
 	if (init_ipv4_mibs())
@@ -1987,7 +1992,8 @@ static int __init inet_init(void)
 	udp_init();
 
 	/* Add UDP-Lite (RFC 3828) */
-	udplite4_register();
+	if (IS_ENABLED(CONFIG_LEGACY_IP))
+		udplite4_register();
 
 	raw_init();
 
@@ -1997,6 +2003,7 @@ static int __init inet_init(void)
 	 *	Set the ICMP layer up
 	 */
 
+#ifdef CONFIG_LEGACY_IP
 	if (icmp_init() < 0)
 		panic("Failed to create the ICMP control socket.\n");
 
@@ -2007,10 +2014,12 @@ static int __init inet_init(void)
 	if (ip_mr_init())
 		pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
 #endif
+#endif /* CONFIG_LEGACY_IP */
 
 	if (init_inet_pernet_ops())
 		pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
 
+#ifdef CONFIG_LEGACY_IP
 	ipv4_proc_init();
 
 	ipfrag_init();
@@ -2018,6 +2027,7 @@ static int __init inet_init(void)
 	dev_add_pack(&ip_packet_type);
 
 	ip_tunnel_core_init();
+#endif /* CONFIG_LEGACY_IP */
 
 	rc = 0;
 out:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 537bb6c315d2..9b9db10e5db2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -348,7 +348,9 @@ static int __init inet_blackhole_dev_init(void)
 
 	return PTR_ERR_OR_ZERO(in_dev);
 }
+#ifdef CONFIG_LEGACY_IP
 late_initcall(inet_blackhole_dev_init);
+#endif
 
 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 463236e0dc2d..125614f552c7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3773,7 +3773,6 @@ int __init ip_rt_init(void)
 	if (ip_rt_proc_init())
 		pr_err("Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
-	xfrm_init();
 	xfrm4_init();
 #endif
 	rtnl_register_many(ip_rt_rtnl_msg_handlers);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c7b2463c2e25..7660bd45aac7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3717,25 +3717,27 @@ static void __init bpf_iter_register(void)
 
 void __init tcp_v4_init(void)
 {
-	int cpu, res;
+	if (IS_ENABLED(CONFIG_LEGACY_IP)) {
+		int cpu, res;
 
-	for_each_possible_cpu(cpu) {
-		struct sock *sk;
+		for_each_possible_cpu(cpu) {
+			struct sock *sk;
 
-		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
-					   IPPROTO_TCP, &init_net);
-		if (res)
-			panic("Failed to create the TCP control socket.\n");
-		sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+			res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+						   IPPROTO_TCP, &init_net);
+			if (res)
+				panic("Failed to create the TCP control socket.\n");
+			sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
-		/* Please enforce IP_DF and IPID==0 for RST and
-		 * ACK sent in SYN-RECV and TIME-WAIT state.
-		 */
-		inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+			/* Please enforce IP_DF and IPID==0 for RST and
+			 * ACK sent in SYN-RECV and TIME-WAIT state.
+			 */
+			inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
 
-		sk->sk_clockid = CLOCK_MONOTONIC;
+			sk->sk_clockid = CLOCK_MONOTONIC;
 
-		per_cpu(ipv4_tcp_sk.sock, cpu) = sk;
+			per_cpu(ipv4_tcp_sk.sock, cpu) = sk;
+		}
 	}
 	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
-- 
2.51.0
Re: [PATCH 3/6] net: Guard Legacy IP entry points with CONFIG_LEGACY_IP
Posted by bot+bpf-ci@kernel.org 13 hours ago
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1922,7 +1922,15 @@ static int __init inet_init(void)
>  	/*
>  	 *	Tell SOCKET that we are alive...
>  	 */
> +	/* Initialize the socket-side protocol switch tables. */
> +	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
> +		INIT_LIST_HEAD(r);
> +
> +#ifdef CONFIG_XFRM
> +	xfrm_init();
> +#endif
>
> +#ifdef CONFIG_LEGACY_IP
>  	(void)sock_register(&inet_family_ops);

Moving xfrm_init() here places it before sock_register(), but
xfrm_init() calls xfrm_nat_keepalive_init(AF_INET) which creates
per-cpu PF_INET sockets via this chain:

    xfrm_init()
      xfrm_nat_keepalive_init(AF_INET)
        nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET)
          inet_ctl_sock_create(&sk, PF_INET, ...)
            sock_create_kern()
              __sock_create()
                pf = rcu_dereference(net_families[PF_INET]);
                if (!pf) -> returns -EAFNOSUPPORT

Since sock_register(&inet_family_ops) has not been called yet,
net_families[PF_INET] is still NULL at this point. This causes
xfrm_nat_keepalive_init(AF_INET) to silently fail (the return
value is not checked by xfrm_init()), leaving the IPv4 XFRM NAT
keepalive sockets uninitialized.

This affects the default CONFIG_LEGACY_IP=y configuration as
well, not just IPv6-only builds. Would it be better to leave
xfrm_init() after sock_register(), or move the
xfrm_nat_keepalive_init(AF_INET) call into xfrm4_init() where
it would run after IPv4 socket registration is complete?

Previously, xfrm_init() was called from ip_rt_init() which ran
well after sock_register():

    inet_init()
      sock_register(&inet_family_ops)   <-- AF_INET available
      ...
      ip_init()
        ip_rt_init()
          xfrm_init()                   <-- worked fine


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23838411859
Re: [PATCH 3/6] net: Guard Legacy IP entry points with CONFIG_LEGACY_IP
Posted by Eric Dumazet 13 hours ago
On Wed, Apr 1, 2026 at 12:45 AM David Woodhouse <dwmw2@infradead.org> wrote:
>
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> Wrap the IPv4-specific registrations in inet_init() with
> CONFIG_LEGACY_IP guards. When LEGACY_IP is disabled, the kernel
> will not:
>  - Register the AF_INET socket family
>  - Register the ETH_P_IP packet handler (ip_rcv)
>  - Initialize ARP, ICMP, IGMP, or IPv4 routing
>  - Register IPv4 protocol handlers (TCP/UDP/ICMP over IPv4)
>  - Initialize IPv4 multicast routing, proc entries, or fragmentation
>
> The shared INET infrastructure (tcp_prot, udp_prot, tcp_init, etc.)
> remains initialized for use by IPv6.
>

...

>
>         /* Add UDP-Lite (RFC 3828) */
> -       udplite4_register();
> +       if (IS_ENABLED(CONFIG_LEGACY_IP))
> +               udplite4_register();

udplite has been removed in net-next.

I would think your patch series is net-next material ?
Re: [PATCH 3/6] net: Guard Legacy IP entry points with CONFIG_LEGACY_IP
Posted by David Woodhouse 13 hours ago
On Wed, 2026-04-01 at 02:14 -0700, Eric Dumazet wrote:
> 
> > 
> >          /* Add UDP-Lite (RFC 3828) */
> > -       udplite4_register();
> > +       if (IS_ENABLED(CONFIG_LEGACY_IP))
> > +               udplite4_register();
> 
> udplite has been removed in net-next.
> 
> I would think your patch series is net-next material ?

A more conservative variant of the patch series on another day of the
year, sure. It also probably wants to land after 
https://lore.kernel.org/lkml/20260310153506.5181-1-fmancera@suse.de/
turns CONFIG_IPV6 into a boolean.

I'll need to take a closer look at CONFIG_INET too; it ends up being
possible to configure with INET && !LEGACY_IP && !IPV6 which isn't a
combination that makes sense (and I obviously didn't test). 

As discussed, some of this series *is* realistic for another day, and
I'll happily work on whatever direction we think makes sense.