During a kexec-based live update, userspace may restore established TCP
connections after the new kernel has booted (e.g. via CRIU). Any packet
arriving for a not-yet-restored socket will hit the no-socket path and
trigger a TCP RST, causing the peer to immediately drop the connection.
Add an optional cmdline knob, liveupdate_tcp_rst_suppress=, to drop such
packets while liveupdate_restore_in_progress() is true. Only segments
with ACK set and SYN clear are dropped, and the default behavior remains
unchanged.
Document the liveupdate_tcp_rst_suppress cmdline parameter.
Signed-off-by: Li Chen <me@linux.beauty>
---
Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++++
include/linux/liveupdate.h | 11 +++++++++++
kernel/liveupdate/luo_core.c | 14 ++++++++++++++
kernel/liveupdate/luo_session.c | 1 +
net/ipv4/tcp_ipv4.c | 5 +++++
net/ipv6/tcp_ipv6.c | 5 +++++
6 files changed, 46 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3097e4266d76..b73347a0aefd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3442,6 +3442,16 @@ Kernel parameters
If there are multiple matching configurations changing
the same attribute, the last one is used.
+ liveupdate_tcp_rst_suppress= [KNL,EARLY]
+ Format: <bool>
+ When enabled, drop packets for established connections
+ (ACK set, SYN clear) that would otherwise trigger a RST
+ in the LUO post-kexec restore window.
+ This is useful when userspace restores sockets after
+ kexec (e.g. via CRIU).
+ Requires liveupdate=on.
+ Default: off.
+
lockd.nlm_grace_period=P [NFS] Assign grace period.
Format: <integer>
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h
index 301d3e94516e..6ca740ec19d4 100644
--- a/include/linux/liveupdate.h
+++ b/include/linux/liveupdate.h
@@ -227,6 +227,12 @@ bool liveupdate_enabled(void);
*/
bool liveupdate_restore_in_progress(void);
+/*
+ * Return true when TCP RST suppression is enabled for the post-kexec restore
+ * window.
+ */
+bool liveupdate_tcp_rst_suppress_enabled(void);
+
/* Called during kexec to tell LUO that entered into reboot */
int liveupdate_reboot(void);
@@ -253,6 +259,11 @@ static inline bool liveupdate_restore_in_progress(void)
return false;
}
+static inline bool liveupdate_tcp_rst_suppress_enabled(void)
+{
+ return false;
+}
+
static inline int liveupdate_reboot(void)
{
return 0;
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index fb6a73c08979..0ed5c9ce1421 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -64,6 +64,7 @@
static struct {
bool enabled;
+ bool tcp_rst_suppress;
void *fdt_out;
void *fdt_in;
u64 liveupdate_num;
@@ -75,6 +76,13 @@ static int __init early_liveupdate_param(char *buf)
}
early_param("liveupdate", early_liveupdate_param);
+static int __init early_liveupdate_tcp_rst_suppress_param(char *buf)
+{
+ return kstrtobool(buf, &luo_global.tcp_rst_suppress);
+}
+early_param("liveupdate_tcp_rst_suppress",
+ early_liveupdate_tcp_rst_suppress_param);
+
static int __init luo_early_startup(void)
{
phys_addr_t fdt_phys;
@@ -259,6 +267,12 @@ bool liveupdate_enabled(void)
return luo_global.enabled;
}
+bool liveupdate_tcp_rst_suppress_enabled(void)
+{
+ return liveupdate_enabled() && luo_global.tcp_rst_suppress;
+}
+EXPORT_SYMBOL_GPL(liveupdate_tcp_rst_suppress_enabled);
+
/**
* DOC: LUO ioctl Interface
*
diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c
index 2c7dd3b12303..427ae74061ba 100644
--- a/kernel/liveupdate/luo_session.c
+++ b/kernel/liveupdate/luo_session.c
@@ -146,6 +146,7 @@ bool liveupdate_restore_in_progress(void)
{
return atomic_long_read(&liveupdate_incoming_sessions_left) > 0;
}
+EXPORT_SYMBOL_GPL(liveupdate_restore_in_progress);
void __init luo_session_restore_window_init(void)
{
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8a9596e8f4d..9a95f3dbf39a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -56,6 +56,7 @@
#include <linux/fips.h>
#include <linux/jhash.h>
#include <linux/init.h>
+#include <linux/liveupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
#include <linux/sched.h>
@@ -2349,6 +2350,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
+ if (liveupdate_tcp_rst_suppress_enabled() &&
+ liveupdate_restore_in_progress() &&
+ th->ack && !th->syn)
+ goto discard_it;
tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 280fe5978559..c2e680eba041 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -40,6 +40,7 @@
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/liveupdate.h>
#include <net/aligned_data.h>
#include <net/tcp.h>
@@ -1900,6 +1901,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
+ if (liveupdate_tcp_rst_suppress_enabled() &&
+ liveupdate_restore_in_progress() &&
+ th->ack && !th->syn)
+ goto discard_it;
tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
--
2.52.0
On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote: > During a kexec-based live update, userspace may restore established TCP > connections after the new kernel has booted (e.g. via CRIU). Any packet > arriving for a not-yet-restored socket will hit the no-socket path and > trigger a TCP RST, causing the peer to immediately drop the connection. Can you not add a filter to simply drop those packets until workload is running again? It'd actually be less racy than this hac^w patch ...
Hi Jakub, > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote: > > During a kexec-based live update, userspace may restore established TCP > > connections after the new kernel has booted (e.g. via CRIU). Any packet > > arriving for a not-yet-restored socket will hit the no-socket path and > > trigger a TCP RST, causing the peer to immediately drop the connection. > > Can you not add a filter to simply drop those packets until workload is > running again? It'd actually be less racy than this hac^w patch ... > Thanks for the suggestion. When you say "add a filter", do you mean installing a temporary drop rule (nftables/iptables/tc) in the network domain which does not get rebooted by kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel until the workload is restored and ready? If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover the critical window: kexec resets the ruleset, so we'd have to install the drop rule extremely early (initramfs) before any packets hit the no-socket path, which still seems inherently racy. If the expectation is to drain/blackhole traffic externally and re-enable it once the workload is running again, I can rework the series to keep only the restore-window tracking plus a clear "restore done" control plane, and rely on the external filter for the data plane. Regards Li
On Sun, 01 Feb 2026 09:44:27 +0800 Li Chen wrote: > > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote: > > > During a kexec-based live update, userspace may restore established TCP > > > connections after the new kernel has booted (e.g. via CRIU). Any packet > > > arriving for a not-yet-restored socket will hit the no-socket path and > > > trigger a TCP RST, causing the peer to immediately drop the connection. > > > > Can you not add a filter to simply drop those packets until workload is > > running again? It'd actually be less racy than this hac^w patch ... > > > > Thanks for the suggestion. > > When you say "add a filter", do you mean installing a temporary drop rule > (nftables/iptables/tc) in the network domain which does not get rebooted by > kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel > until the workload is restored and ready? > > If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover > the critical window: kexec resets the ruleset, so we'd have to install the > drop rule extremely early (initramfs) before any packets hit the no-socket > path, which still seems inherently racy. I'm not sure what your flow is exactly, but I assume you drive the workload restore from user space already?
Hi Jakub, ---- On Tue, 03 Feb 2026 08:53:20 +0800 Jakub Kicinski <kuba@kernel.org> wrote --- > On Sun, 01 Feb 2026 09:44:27 +0800 Li Chen wrote: > > > On Fri, 30 Jan 2026 22:51:19 +0800 Li Chen wrote: > > > > During a kexec-based live update, userspace may restore established TCP > > > > connections after the new kernel has booted (e.g. via CRIU). Any packet > > > > arriving for a not-yet-restored socket will hit the no-socket path and > > > > trigger a TCP RST, causing the peer to immediately drop the connection. > > > > > > Can you not add a filter to simply drop those packets until workload is > > > running again? It'd actually be less racy than this hac^w patch ... > > > > > > > Thanks for the suggestion. > > > > When you say "add a filter", do you mean installing a temporary drop rule > > (nftables/iptables/tc) in the network domain which does not get rebooted by > > kexec (e.g. LB/ToR/host firewall), so packets never reach the new kernel > > until the workload is restored and ready? > > > > If you meant a filter inside the kexec'ed kernel, I'm worried it won't cover > > the critical window: kexec resets the ruleset, so we'd have to install the > > drop rule extremely early (initramfs) before any packets hit the no-socket > > path, which still seems inherently racy. > > I'm not sure what your flow is exactly, but I assume you drive > the workload restore from user space already? > Yes, in our PoC setup the post-kexec restore flow is driven from initramfs / early userspace. We pass an initramfs via kexec --initrd and install a temporary iptables INPUT DROP rule from a dracut pre-mount hook (keyed by a cmdline like luo_tcp_drop_port=...). In our external-peer test this avoids the early TCP RST window; the peer just retransmits/timeouts until CRIU restore recreates the socket. The downside is that it makes initramfs heavier (iptables userspace + required xtables extensions, and it relies on legacy iptables filter support being available early). Not sure this is a great general solution, but it can work when initramfs is under our control. Regards, Li
© 2016 - 2026 Red Hat, Inc.