From: David Woodhouse <dwmw@amazon.co.uk>
Wrap the IPv4-specific registrations in inet_init() with
CONFIG_LEGACY_IP guards. When LEGACY_IP is disabled, the kernel
will not:
- Register the AF_INET socket family
- Register the ETH_P_IP packet handler (ip_rcv)
- Initialize ARP, ICMP, IGMP, or IPv4 routing
- Register IPv4 protocol handlers (TCP/UDP/ICMP over IPv4)
- Initialize IPv4 multicast routing, proc entries, or fragmentation
The shared INET infrastructure (tcp_prot, udp_prot, tcp_init, etc.)
remains initialized for use by IPv6.
Also update INDIRECT_CALL_INET to not use ip_rcv/ip_list_rcv as
direct call targets when LEGACY_IP is disabled, avoiding a link-time
reference to functions that will eventually be compiled out.
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
include/linux/indirect_call_wrapper.h | 4 +++-
net/ipv4/af_inet.c | 20 +++++++++++++-----
net/ipv4/devinet.c | 2 ++
net/ipv4/route.c | 1 -
net/ipv4/tcp_ipv4.c | 30 ++++++++++++++-------------
5 files changed, 36 insertions(+), 21 deletions(-)
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index dc272b514a01..25a3873da462 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -57,9 +57,11 @@
* builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6
* alternatives
*/
-#if IS_BUILTIN(CONFIG_IPV6)
+#if IS_BUILTIN(CONFIG_IPV6) && IS_ENABLED(CONFIG_LEGACY_IP)
#define INDIRECT_CALL_INET(f, f2, f1, ...) \
INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#elif IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
#elif IS_ENABLED(CONFIG_INET)
#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
#else
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c7731e300a44..dc358faa1647 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1922,7 +1922,15 @@ static int __init inet_init(void)
/*
* Tell SOCKET that we are alive...
*/
+ /* Initialize the socket-side protocol switch tables. */
+ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
+ INIT_LIST_HEAD(r);
+
+#ifdef CONFIG_XFRM
+ xfrm_init();
+#endif
+#ifdef CONFIG_LEGACY_IP
(void)sock_register(&inet_family_ops);
#ifdef CONFIG_SYSCTL
@@ -1957,10 +1965,6 @@ static int __init inet_init(void)
pr_crit("%s: Cannot add IGMP protocol\n", __func__);
#endif
- /* Register the socket-side information for inet_create. */
- for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
- INIT_LIST_HEAD(r);
-
for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
inet_register_protosw(q);
@@ -1975,6 +1979,7 @@ static int __init inet_init(void)
*/
ip_init();
+#endif /* CONFIG_LEGACY_IP */
/* Initialise per-cpu ipv4 mibs */
if (init_ipv4_mibs())
@@ -1987,7 +1992,8 @@ static int __init inet_init(void)
udp_init();
/* Add UDP-Lite (RFC 3828) */
- udplite4_register();
+ if (IS_ENABLED(CONFIG_LEGACY_IP))
+ udplite4_register();
raw_init();
@@ -1997,6 +2003,7 @@ static int __init inet_init(void)
* Set the ICMP layer up
*/
+#ifdef CONFIG_LEGACY_IP
if (icmp_init() < 0)
panic("Failed to create the ICMP control socket.\n");
@@ -2007,10 +2014,12 @@ static int __init inet_init(void)
if (ip_mr_init())
pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
#endif
+#endif /* CONFIG_LEGACY_IP */
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
+#ifdef CONFIG_LEGACY_IP
ipv4_proc_init();
ipfrag_init();
@@ -2018,6 +2027,7 @@ static int __init inet_init(void)
dev_add_pack(&ip_packet_type);
ip_tunnel_core_init();
+#endif /* CONFIG_LEGACY_IP */
rc = 0;
out:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 537bb6c315d2..9b9db10e5db2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -348,7 +348,9 @@ static int __init inet_blackhole_dev_init(void)
return PTR_ERR_OR_ZERO(in_dev);
}
+#ifdef CONFIG_LEGACY_IP
late_initcall(inet_blackhole_dev_init);
+#endif
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 463236e0dc2d..125614f552c7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3773,7 +3773,6 @@ int __init ip_rt_init(void)
if (ip_rt_proc_init())
pr_err("Unable to create route proc files\n");
#ifdef CONFIG_XFRM
- xfrm_init();
xfrm4_init();
#endif
rtnl_register_many(ip_rt_rtnl_msg_handlers);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c7b2463c2e25..7660bd45aac7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3717,25 +3717,27 @@ static void __init bpf_iter_register(void)
void __init tcp_v4_init(void)
{
- int cpu, res;
+ if (IS_ENABLED(CONFIG_LEGACY_IP)) {
+ int cpu, res;
- for_each_possible_cpu(cpu) {
- struct sock *sk;
+ for_each_possible_cpu(cpu) {
+ struct sock *sk;
- res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
- IPPROTO_TCP, &init_net);
- if (res)
- panic("Failed to create the TCP control socket.\n");
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+ IPPROTO_TCP, &init_net);
+ if (res)
+ panic("Failed to create the TCP control socket.\n");
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
- /* Please enforce IP_DF and IPID==0 for RST and
- * ACK sent in SYN-RECV and TIME-WAIT state.
- */
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+ /* Please enforce IP_DF and IPID==0 for RST and
+ * ACK sent in SYN-RECV and TIME-WAIT state.
+ */
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
- sk->sk_clockid = CLOCK_MONOTONIC;
+ sk->sk_clockid = CLOCK_MONOTONIC;
- per_cpu(ipv4_tcp_sk.sock, cpu) = sk;
+ per_cpu(ipv4_tcp_sk.sock, cpu) = sk;
+ }
}
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
--
2.51.0
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1922,7 +1922,15 @@ static int __init inet_init(void)
> /*
> * Tell SOCKET that we are alive...
> */
> + /* Initialize the socket-side protocol switch tables. */
> + for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
> + INIT_LIST_HEAD(r);
> +
> +#ifdef CONFIG_XFRM
> + xfrm_init();
> +#endif
>
> +#ifdef CONFIG_LEGACY_IP
> (void)sock_register(&inet_family_ops);
Moving xfrm_init() here places it before sock_register(), but
xfrm_init() calls xfrm_nat_keepalive_init(AF_INET) which creates
per-cpu PF_INET sockets via this chain:
xfrm_init()
xfrm_nat_keepalive_init(AF_INET)
nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET)
inet_ctl_sock_create(&sk, PF_INET, ...)
sock_create_kern()
__sock_create()
pf = rcu_dereference(net_families[PF_INET]);
if (!pf) -> returns -EAFNOSUPPORT
Since sock_register(&inet_family_ops) has not been called yet,
net_families[PF_INET] is still NULL at this point. This causes
xfrm_nat_keepalive_init(AF_INET) to silently fail (the return
value is not checked by xfrm_init()), leaving the IPv4 XFRM NAT
keepalive sockets uninitialized.
This affects the default CONFIG_LEGACY_IP=y configuration as
well, not just IPv6-only builds. Would it be better to leave
xfrm_init() after sock_register(), or move the
xfrm_nat_keepalive_init(AF_INET) call into xfrm4_init() where
it would run after IPv4 socket registration is complete?
Previously, xfrm_init() was called from ip_rt_init() which ran
well after sock_register():
inet_init()
sock_register(&inet_family_ops) <-- AF_INET available
...
ip_init()
ip_rt_init()
xfrm_init() <-- worked fine
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23838411859
On Wed, Apr 1, 2026 at 12:45 AM David Woodhouse <dwmw2@infradead.org> wrote: > > From: David Woodhouse <dwmw@amazon.co.uk> > > Wrap the IPv4-specific registrations in inet_init() with > CONFIG_LEGACY_IP guards. When LEGACY_IP is disabled, the kernel > will not: > - Register the AF_INET socket family > - Register the ETH_P_IP packet handler (ip_rcv) > - Initialize ARP, ICMP, IGMP, or IPv4 routing > - Register IPv4 protocol handlers (TCP/UDP/ICMP over IPv4) > - Initialize IPv4 multicast routing, proc entries, or fragmentation > > The shared INET infrastructure (tcp_prot, udp_prot, tcp_init, etc.) > remains initialized for use by IPv6. > ... > > /* Add UDP-Lite (RFC 3828) */ > - udplite4_register(); > + if (IS_ENABLED(CONFIG_LEGACY_IP)) > + udplite4_register(); udplite has been removed in net-next. I would think your patch series is net-next material ?
On Wed, 2026-04-01 at 02:14 -0700, Eric Dumazet wrote: > > > > > /* Add UDP-Lite (RFC 3828) */ > > - udplite4_register(); > > + if (IS_ENABLED(CONFIG_LEGACY_IP)) > > + udplite4_register(); > > udplite has been removed in net-next. > > I would think your patch series is net-next material ? A more conservative variant of the patch series on another day of the year, sure. It also probably wants to land after https://lore.kernel.org/lkml/20260310153506.5181-1-fmancera@suse.de/ turns CONFIG_IPV6 into a boolean. I'll need to take a closer look at CONFIG_INET too; it ends up being possible to configure with INET && !LEGACY_IP && !IPV6 which isn't a combination that makes sense (and I obviously didn't test). As discussed, some of this series *is* realistic for another day, and I'll happily work on whatever direction we think makes sense.
© 2016 - 2026 Red Hat, Inc.