From: Wesley Atwell <atwellwea@gmail.com>
Extend TCP_REPAIR_WINDOW so repair and restore can round-trip both the
live rwnd snapshot and the remembered maximum sender-visible window.
Keep the ABI append-only by accepting the legacy and v1 prefix lengths on
both get and set, rebuilding any missing max-window state from the live
window when older userspace restores a socket.
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
---
include/net/tcp.h | 13 +++----
include/uapi/linux/tcp.h | 8 +++++
net/ipv4/tcp.c | 73 ++++++++++++++++++++++++++++++++++++----
3 files changed, 81 insertions(+), 13 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b479ad44f89..12e62fea2aaf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1766,13 +1766,14 @@ static inline bool tcp_space_from_wnd_snapshot(u8 scaling_ratio, int win,
}
/* Rebuild hard receive-memory units for data already covered by tp->rcv_wnd if
- * the advertise-time basis is known.
+ * the advertise-time basis is known. Legacy TCP_REPAIR restores can only
+ * recover tp->rcv_wnd itself; callers must fall back when the snapshot is
+ * unknown.
*/
static inline bool tcp_space_from_rcv_wnd(const struct tcp_sock *tp, int win,
int *space)
{
- return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win,
- space);
+ return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win, space);
}
/* Same as tcp_space_from_rcv_wnd(), but for the remembered maximum
@@ -1800,9 +1801,9 @@ static inline void tcp_scaling_ratio_init(struct sock *sk)
}
/* tp->rcv_wnd is paired with the scaling_ratio that was in force when that
- * window was last advertised. Callers can leave a zero snapshot when the
- * advertise-time basis is unknown and refresh the pair on the next local
- * window update.
+ * window was last advertised. Legacy TCP_REPAIR restores can only recover the
+ * window value itself and use a zero snapshot until a fresh local window
+ * advertisement refreshes the pair.
*/
static inline void tcp_set_rcv_wnd_snapshot(struct tcp_sock *tp, u32 win,
u8 scaling_ratio)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 03772dd4d399..564a77f69130 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -152,6 +152,11 @@ struct tcp_repair_opt {
__u32 opt_val;
};
+/* Append-only repair ABI.
+ * Older userspace may stop at rcv_wup or rcv_wnd_scaling_ratio.
+ * The kernel accepts those prefix lengths and rebuilds any missing
+ * receive-window snapshot state on restore.
+ */
struct tcp_repair_window {
__u32 snd_wl1;
__u32 snd_wnd;
@@ -159,6 +164,9 @@ struct tcp_repair_window {
__u32 rcv_wnd;
__u32 rcv_wup;
+ __u32 rcv_wnd_scaling_ratio; /* 0 means live-window basis unknown */
+ __u32 rcv_mwnd_seq;
+ __u32 rcv_mwnd_scaling_ratio; /* 0 means max-window basis unknown */
};
enum {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 66706dbb90f5..39a1265876ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3533,17 +3533,31 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
(sk->sk_state != TCP_LISTEN);
}
+/* Keep accepting the pre-extension TCP_REPAIR_WINDOW layout so legacy
+ * userspace can restore sockets without fabricating a snapshot basis.
+ */
+static inline int tcp_repair_window_legacy_size(void)
+{
+ return offsetof(struct tcp_repair_window, rcv_wnd_scaling_ratio);
+}
+
+static inline int tcp_repair_window_v1_size(void)
+{
+ return offsetof(struct tcp_repair_window, rcv_mwnd_seq);
+}
+
static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
{
- struct tcp_repair_window opt;
+ struct tcp_repair_window opt = {};
if (!tp->repair)
return -EPERM;
- if (len != sizeof(opt))
+ if (len != tcp_repair_window_legacy_size() &&
+ len != tcp_repair_window_v1_size() && len != sizeof(opt))
return -EINVAL;
- if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
+ if (copy_from_sockptr(&opt, optbuf, len))
return -EFAULT;
if (opt.max_window < opt.snd_wnd)
@@ -3559,9 +3573,47 @@ static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
tp->snd_wnd = opt.snd_wnd;
tp->max_window = opt.max_window;
- tp->rcv_wnd = opt.rcv_wnd;
+ if (len == tcp_repair_window_legacy_size()) {
+ /* Legacy repair UAPI has no advertise-time basis for tp->rcv_wnd.
+ * Mark the snapshot unknown until a fresh local advertisement
+ * re-establishes the pair.
+ */
+ tcp_set_rcv_wnd_unknown(tp, opt.rcv_wnd);
+ tp->rcv_wup = opt.rcv_wup;
+ tcp_init_max_rcv_wnd_seq(tp);
+ return 0;
+ }
+
+ if (opt.rcv_wnd_scaling_ratio > U8_MAX)
+ return -EINVAL;
+
+ tcp_set_rcv_wnd_snapshot(tp, opt.rcv_wnd, opt.rcv_wnd_scaling_ratio);
tp->rcv_wup = opt.rcv_wup;
- tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd;
+
+ if (len == tcp_repair_window_v1_size()) {
+ /* v1 repair can restore the live-window snapshot, but not a
+ * retracted max-window snapshot. Rebuild it from the live pair
+ * until a fresh local advertisement updates it again.
+ */
+ tcp_init_max_rcv_wnd_seq(tp);
+ return 0;
+ }
+
+ if (opt.rcv_mwnd_scaling_ratio > U8_MAX)
+ return -EINVAL;
+
+ /* Userspace may repair sequence-space values after checkpoint without
+ * also rebasing the remembered max advertised right edge. If the exact
+ * snapshot no longer covers the restored live window, treat it like
+ * v1 and rebuild the max-window side from the live pair.
+ */
+ if (after(opt.rcv_wup + opt.rcv_wnd, opt.rcv_mwnd_seq)) {
+ tcp_init_max_rcv_wnd_seq(tp);
+ return 0;
+ }
+
+ tp->rcv_mwnd_seq = opt.rcv_mwnd_seq;
+ tp->rcv_mwnd_scaling_ratio = opt.rcv_mwnd_scaling_ratio;
return 0;
}
@@ -4650,12 +4702,16 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_REPAIR_WINDOW: {
- struct tcp_repair_window opt;
+ struct tcp_repair_window opt = {};
if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
- if (len != sizeof(opt))
+ /* Mirror the accepted set-side prefix lengths so checkpoint
+ * tools can round-trip exactly the layout version they know.
+ */
+ if (len != tcp_repair_window_legacy_size() &&
+ len != tcp_repair_window_v1_size() && len != sizeof(opt))
return -EINVAL;
if (!tp->repair)
@@ -4666,6 +4722,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
opt.max_window = tp->max_window;
opt.rcv_wnd = tp->rcv_wnd;
opt.rcv_wup = tp->rcv_wup;
+ opt.rcv_wnd_scaling_ratio = tp->rcv_wnd_scaling_ratio;
+ opt.rcv_mwnd_seq = tp->rcv_mwnd_seq;
+ opt.rcv_mwnd_scaling_ratio = tp->rcv_mwnd_scaling_ratio;
if (copy_to_sockptr(optval, &opt, len))
return -EFAULT;
--
2.43.0