liveupdate: suppress TCP RST during post-kexec restore window

[PATCH v1 3/3] liveupdate: suppress TCP RST during post-kexec restore window

Posted by Li Chen 1 week, 2 days ago

During a kexec-based live update, userspace may restore established TCP
connections after the new kernel has booted (e.g. via CRIU). Any packet
arriving for a not-yet-restored socket will hit the no-socket path and
trigger a TCP RST, causing the peer to immediately drop the connection.
Add an optional cmdline knob, liveupdate_tcp_rst_suppress=, to drop such
packets while liveupdate_restore_in_progress() is true. Only segments
with ACK set and SYN clear are dropped, and the default behavior remains
unchanged.
Document the liveupdate_tcp_rst_suppress cmdline parameter.

Signed-off-by: Li Chen <me@linux.beauty>
---
 Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++
 include/linux/liveupdate.h                      | 11 +++++++++++
 kernel/liveupdate/luo_core.c                    | 14 ++++++++++++++
 kernel/liveupdate/luo_session.c                 |  1 +
 net/ipv4/tcp_ipv4.c                             |  5 +++++
 net/ipv6/tcp_ipv6.c                             |  5 +++++
 6 files changed, 46 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3097e4266d76..b73347a0aefd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3442,6 +3442,16 @@ Kernel parameters
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
+	liveupdate_tcp_rst_suppress=	[KNL,EARLY]
+			Format: <bool>
+			When enabled, drop packets for established connections
+			(ACK set, SYN clear) that would otherwise trigger a RST
+			in the LUO post-kexec restore window.
+			This is useful when userspace restores sockets after
+			kexec (e.g. via CRIU).
+			Requires liveupdate=on.
+			Default: off.
+
 	lockd.nlm_grace_period=P  [NFS] Assign grace period.
 			Format: <integer>
 
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index 301d3e94516e..6ca740ec19d4 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -227,6 +227,12 @@ bool liveupdate_enabled(void);
  */
 bool liveupdate_restore_in_progress(void);
 
+/*
+ * Return true when TCP RST suppression is enabled for the post-kexec restore
+ * window.
+ */
+bool liveupdate_tcp_rst_suppress_enabled(void);
+
 /* Called during kexec to tell LUO that entered into reboot */
 int liveupdate_reboot(void);
 
@@ -253,6 +259,11 @@ static inline bool liveupdate_restore_in_progress(void)
 	return false;
 }
 
+static inline bool liveupdate_tcp_rst_suppress_enabled(void)
+{
+	return false;
+}
+
 static inline int liveupdate_reboot(void)
 {
 	return 0;
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index fb6a73c08979..0ed5c9ce1421 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -64,6 +64,7 @@
 
 static struct {
 	bool enabled;
+	bool tcp_rst_suppress;
 	void *fdt_out;
 	void *fdt_in;
 	u64 liveupdate_num;
@@ -75,6 +76,13 @@ static int __init early_liveupdate_param(char *buf)
 }
 early_param("liveupdate", early_liveupdate_param);
 
+static int __init early_liveupdate_tcp_rst_suppress_param(char *buf)
+{
+	return kstrtobool(buf, &luo_global.tcp_rst_suppress);
+}
+early_param("liveupdate_tcp_rst_suppress",
+	    early_liveupdate_tcp_rst_suppress_param);
+
 static int __init luo_early_startup(void)
 {
 	phys_addr_t fdt_phys;
@@ -259,6 +267,12 @@ bool liveupdate_enabled(void)
 	return luo_global.enabled;
 }
 
+bool liveupdate_tcp_rst_suppress_enabled(void)
+{
+	return liveupdate_enabled() && luo_global.tcp_rst_suppress;
+}
+EXPORT_SYMBOL_GPL(liveupdate_tcp_rst_suppress_enabled);
+
 /**
  * DOC: LUO ioctl Interface
  *
diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c
index 2c7dd3b12303..427ae74061ba 100644
--- a/kernel/liveupdate/luo_session.c
+++ b/kernel/liveupdate/luo_session.c
@@ -146,6 +146,7 @@ bool liveupdate_restore_in_progress(void)
 {
 	return atomic_long_read(&liveupdate_incoming_sessions_left) > 0;
 }
+EXPORT_SYMBOL_GPL(liveupdate_restore_in_progress);
 
 void __init luo_session_restore_window_init(void)
 {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8a9596e8f4d..9a95f3dbf39a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -56,6 +56,7 @@
 #include <linux/fips.h>
 #include <linux/jhash.h>
 #include <linux/init.h>
+#include <linux/liveupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
@@ -2349,6 +2350,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 bad_packet:
 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
 	} else {
+		if (liveupdate_tcp_rst_suppress_enabled() &&
+		    liveupdate_restore_in_progress() &&
+		    th->ack && !th->syn)
+			goto discard_it;
 		tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 280fe5978559..c2e680eba041 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -40,6 +40,7 @@
 #include <linux/icmpv6.h>
 #include <linux/random.h>
 #include <linux/indirect_call_wrapper.h>
+#include <linux/liveupdate.h>
 
 #include <net/aligned_data.h>
 #include <net/tcp.h>
@@ -1900,6 +1901,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 bad_packet:
 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
 	} else {
+		if (liveupdate_tcp_rst_suppress_enabled() &&
+		    liveupdate_restore_in_progress() &&
+		    th->ack && !th->syn)
+			goto discard_it;
 		tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
 	}
 
-- 
2.52.0

Re: [PATCH v1 3/3] liveupdate: suppress TCP RST during post-kexec restore window

Posted by Jakub Kicinski 1 week, 2 days ago

On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote:
> During a kexec-based live update, userspace may restore established TCP
> connections after the new kernel has booted (e.g. via CRIU). Any packet
> arriving for a not-yet-restored socket will hit the no-socket path and
> trigger a TCP RST, causing the peer to immediately drop the connection.

Can you not add a filter to simply drop those packets until workload is
running again? It'd actually be less racy than this hac^w patch ...

Re: [PATCH v1 3/3] liveupdate: suppress TCP RST during post-kexec restore window

Posted by Li Chen 1 week, 1 day ago

Hi Jakub,

 > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote:
 > > During a kexec-based live update, userspace may restore established TCP
 > > connections after the new kernel has booted (e.g. via CRIU). Any packet
 > > arriving for a not-yet-restored socket will hit the no-socket path and
 > > trigger a TCP RST, causing the peer to immediately drop the connection.
 > 
 > Can you not add a filter to simply drop those packets until workload is
 > running again? It'd actually be less racy than this hac^w patch ...
 > 

Thanks for the suggestion.

When you say "add a filter", do you mean installing a temporary drop rule
(nftables/iptables/tc) in the network domain which does not get rebooted by
kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel
until the workload is restored and ready?

If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover
the critical window: kexec resets the ruleset, so we'd have to install the
drop rule extremely early (initramfs) before any packets hit the no-socket
path, which still seems inherently racy.

If the expectation is to drain/blackhole traffic externally and re-enable it
once the workload is running again, I can rework the series to keep only the
restore-window tracking plus a clear "restore done" control plane, and rely
on the external filter for the data plane.

Regards
Li

Re: [PATCH v1 3/3] liveupdate: suppress TCP RST during post-kexec restore window

Posted by Jakub Kicinski 6 days, 4 hours ago

On Sun, 01 Feb 2026 09:44:27 +0800 Li Chen wrote:
>  > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote:  
>  > > During a kexec-based live update, userspace may restore established TCP
>  > > connections after the new kernel has booted (e.g. via CRIU). Any packet
>  > > arriving for a not-yet-restored socket will hit the no-socket path and
>  > > trigger a TCP RST, causing the peer to immediately drop the connection.  
>  > 
>  > Can you not add a filter to simply drop those packets until workload is
>  > running again? It'd actually be less racy than this hac^w patch ...
>  >   
> 
> Thanks for the suggestion.
> 
> When you say "add a filter", do you mean installing a temporary drop rule
> (nftables/iptables/tc) in the network domain which does not get rebooted by
> kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel
> until the workload is restored and ready?
> 
> If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover
> the critical window: kexec resets the ruleset, so we'd have to install the
> drop rule extremely early (initramfs) before any packets hit the no-socket
> path, which still seems inherently racy.

I'm not sure what your flow is exactly, but I assume you drive 
the workload restore from user space already?

Re: [PATCH v1 3/3] liveupdate: suppress TCP RST during post-kexec restore window

Posted by Li Chen 6 days, 2 hours ago

Hi Jakub,

 ---- On Tue, 03 Feb 2026 08:53:20 +0800  Jakub Kicinski <kuba@kernel.org> wrote --- 
 > On Sun, 01 Feb 2026 09:44:27 +0800 Li Chen wrote:
 > >  > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote:  
 > >  > > During a kexec-based live update, userspace may restore established TCP
 > >  > > connections after the new kernel has booted (e.g. via CRIU). Any packet
 > >  > > arriving for a not-yet-restored socket will hit the no-socket path and
 > >  > > trigger a TCP RST, causing the peer to immediately drop the connection.  
 > >  > 
 > >  > Can you not add a filter to simply drop those packets until workload is
 > >  > running again? It'd actually be less racy than this hac^w patch ...
 > >  >   
 > > 
 > > Thanks for the suggestion.
 > > 
 > > When you say "add a filter", do you mean installing a temporary drop rule
 > > (nftables/iptables/tc) in the network domain which does not get rebooted by
 > > kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel
 > > until the workload is restored and ready?
 > > 
 > > If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover
 > > the critical window: kexec resets the ruleset, so we'd have to install the
 > > drop rule extremely early (initramfs) before any packets hit the no-socket
 > > path, which still seems inherently racy.
 > 
 > I'm not sure what your flow is exactly, but I assume you drive 
 > the workload restore from user space already?
 > 

Yes, in our PoC setup the post-kexec restore flow is driven from initramfs / early userspace.

We pass an initramfs via kexec --initrd and install a temporary iptables INPUT DROP rule from a dracut pre-mount hook (keyed by a cmdline like luo_tcp_drop_port=...). In our
external-peer test this avoids the early TCP RST window; the peer just retransmits/timeouts until CRIU restore recreates the socket.

The downside is that it makes initramfs heavier (iptables userspace + required xtables extensions, and it relies on legacy iptables filter support being available early). Not sure
this is a great general solution, but it can work when initramfs is under our control.

Regards,
Li