From: Wesley Atwell <atwellwea@gmail.com>
Add a test-only TUN ioctl that inflates RX skb->truesize, plus the
packetdrill-side helper needed to drive that ioctl through packetdrill's
own TUN queue file descriptor.
Use that plumbing to cover the receive-window regressions where
scaling_ratio drifts after advertisement, alongside the baseline too-big
packetdrill cases that exercise the same sender-visible rwnd accounting
from the non-injected path.
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
---
drivers/net/tun.c | 65 ++++++++
include/uapi/linux/if_tun.h | 4 +
.../tcp_rcv_neg_window_truesize.pkt | 143 ++++++++++++++++++
.../net/packetdrill/tcp_rcv_toobig.pkt | 35 +++++
.../packetdrill/tcp_rcv_toobig_default.pkt | 97 ++++++++++++
.../tcp_rcv_toobig_default_truesize.pkt | 118 +++++++++++++++
.../tcp_rcv_wnd_shrink_allowed_truesize.pkt | 49 ++++++
tools/testing/selftests/net/tun.c | 140 ++++++++++++++++-
8 files changed, 650 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window_truesize.pkt
create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default.pkt
create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default_truesize.pkt
create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed_truesize.pkt
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c492fda6fc15..2cef62cebe88 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -53,6 +53,7 @@
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/if_vlan.h>
+#include <linux/overflow.h>
#include <linux/crc32.h>
#include <linux/math.h>
#include <linux/nsproxy.h>
@@ -85,8 +86,13 @@
#include "tun_vnet.h"
+struct tun_file;
+
+#define TUNSETTRUESIZE_OLD _IOW('T', 228, unsigned int)
+
static void tun_default_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd);
+static void tun_rx_update_truesize(struct tun_file *tfile, struct sk_buff *skb);
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
@@ -138,6 +144,7 @@ struct tun_file {
u16 queue_index;
unsigned int ifindex;
};
+ u32 rx_extra_truesize;
struct napi_struct napi;
bool napi_enabled;
bool napi_frags_enabled;
@@ -1817,6 +1824,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
goto free_skb;
}
+ tun_rx_update_truesize(tfile, skb);
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
if (tun->flags & IFF_NO_PI) {
@@ -2373,6 +2381,25 @@ static void tun_put_page(struct tun_page *tpage)
__page_frag_cache_drain(tpage->page, tpage->count);
}
+/* Tests can inflate skb->truesize on ingress to exercise receive-memory
+ * accounting against a scaling_ratio that drifts after a window was
+ * advertised. The knob is per queue file, defaults to zero, and only changes
+ * behavior when explicitly enabled through the TUN fd.
+ */
+static void tun_rx_update_truesize(struct tun_file *tfile, struct sk_buff *skb)
+{
+ u32 extra = READ_ONCE(tfile->rx_extra_truesize);
+ unsigned int truesize;
+
+ if (!extra)
+ return;
+
+ if (check_add_overflow(skb->truesize, extra, &truesize))
+ truesize = UINT_MAX;
+
+ skb->truesize = truesize;
+}
+
static int tun_xdp_one(struct tun_struct *tun,
struct tun_file *tfile,
struct xdp_buff *xdp, int *flush,
@@ -2459,6 +2486,7 @@ static int tun_xdp_one(struct tun_struct *tun,
goto out;
}
+ tun_rx_update_truesize(tfile, skb);
skb->protocol = eth_type_trans(skb, tun->dev);
skb_reset_network_header(skb);
skb_probe_transport_header(skb);
@@ -3045,6 +3073,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
unsigned int carrier;
+ unsigned int extra_truesize;
struct ifreq ifr;
kuid_t owner;
kgid_t group;
@@ -3309,6 +3338,40 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = tun_net_change_carrier(tun->dev, (bool)carrier);
break;
+ /* Support both the legacy pointer-payload form and the scalar form
+ * used by the selftest helper when injecting truesize from
+ * packetdrill shell commands.
+ */
+ case TUNSETTRUESIZE:
+ case TUNSETTRUESIZE_OLD:
+ ret = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto unlock;
+
+ if (cmd == TUNSETTRUESIZE_OLD) {
+ ret = -EFAULT;
+ if (copy_from_user(&extra_truesize, argp,
+ sizeof(extra_truesize))) {
+ ret = -EINVAL;
+ if (arg > U32_MAX)
+ goto unlock;
+
+ extra_truesize = arg;
+ }
+ } else {
+ ret = -EINVAL;
+ if (arg > U32_MAX)
+ goto unlock;
+
+ extra_truesize = arg;
+ }
+
+ WRITE_ONCE(tfile->rx_extra_truesize, extra_truesize);
+ netif_info(tun, drv, tun->dev,
+ "rx extra truesize set to %u\n", extra_truesize);
+ ret = 0;
+ break;
+
case TUNGETDEVNETNS:
ret = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -3348,6 +3411,7 @@ static long tun_chr_compat_ioctl(struct file *file,
case TUNGETSNDBUF:
case TUNSETSNDBUF:
case SIOCGIFHWADDR:
+ case TUNSETTRUESIZE_OLD:
case SIOCSIFHWADDR:
arg = (unsigned long)compat_ptr(arg);
break;
@@ -3408,6 +3472,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
RCU_INIT_POINTER(tfile->tun, NULL);
tfile->flags = 0;
tfile->ifindex = 0;
+ tfile->rx_extra_truesize = 0;
init_waitqueue_head(&tfile->socket.wq.wait);
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 79d53c7a1ebd..4be63efe6540 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -61,6 +61,10 @@
#define TUNSETFILTEREBPF _IOR('T', 225, int)
#define TUNSETCARRIER _IOW('T', 226, int)
#define TUNGETDEVNETNS _IO('T', 227)
+/* Test-only: add scalar bytes to skb->truesize on RX after TUN allocates
+ * an skb.
+ */
+#define TUNSETTRUESIZE _IO('T', 228)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window_truesize.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window_truesize.pkt
new file mode 100644
index 000000000000..1c5550fff509
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window_truesize.pkt
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+// Run the negative-window / max-advertised-window regression with inflated
+// TUN skb->truesize so scaling_ratio drifts throughout the flow. The sequence
+// checks and drop counters should remain identical to the uninflated case.
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1000000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 4>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Put 1040000 bytes into the receive buffer.
+ +0 < P. 1:65001(65000) ack 1 win 257
+ * > . 1:1(0) ack 65001
+ +0 < P. 65001:130001(65000) ack 1 win 257
+ * > . 1:1(0) ack 130001
+ +0 < P. 130001:195001(65000) ack 1 win 257
+ * > . 1:1(0) ack 195001
+ +0 < P. 195001:260001(65000) ack 1 win 257
+ * > . 1:1(0) ack 260001
+ +0 < P. 260001:325001(65000) ack 1 win 257
+ * > . 1:1(0) ack 325001
+ +0 < P. 325001:390001(65000) ack 1 win 257
+ * > . 1:1(0) ack 390001
+ +0 < P. 390001:455001(65000) ack 1 win 257
+ * > . 1:1(0) ack 455001
+ +0 < P. 455001:520001(65000) ack 1 win 257
+ * > . 1:1(0) ack 520001
+ +0 < P. 520001:585001(65000) ack 1 win 257
+ * > . 1:1(0) ack 585001
+ +0 < P. 585001:650001(65000) ack 1 win 257
+ * > . 1:1(0) ack 650001
+ +0 < P. 650001:715001(65000) ack 1 win 257
+ * > . 1:1(0) ack 715001
+ +0 < P. 715001:780001(65000) ack 1 win 257
+ * > . 1:1(0) ack 780001
+ +0 < P. 780001:845001(65000) ack 1 win 257
+ * > . 1:1(0) ack 845001
+ +0 < P. 845001:910001(65000) ack 1 win 257
+ * > . 1:1(0) ack 910001
+ +0 < P. 910001:975001(65000) ack 1 win 257
+ * > . 1:1(0) ack 975001
+ +0 < P. 975001:1040001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1040001
+
+// Start inflating future TUN skbs only after the baseline sender-visible
+// window has been established, so the negative-window checks below exercise
+// ratio drift without changing the initial max advertised window.
+ +0 `../tun --set-rx-truesize tun0 65536`
+
+// Trigger an extreme memory squeeze by shrinking SO_RCVBUF.
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [16000], 4) = 0
+
+ +0 < P. 1040001:1105001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1040001 win 0
+// Check LINUX_MIB_TCPRCVQDROP has been incremented.
+ +0 `nstat -s | grep TcpExtTCPRcvQDrop | grep -q " 1 "`
+
+// RWIN == 0: rcv_wup = 1040001, rcv_wnd = 0, rcv_mwnd_seq > 1105001.
+
+// Accept pure ack with seq in max adv. window.
+ +0 write(4, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 1040001 win 0
+ +0 < . 1105001:1105001(0) ack 1001 win 257
+
+// In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW).
+ +0 < P. 1040001:1041001(1000) ack 1001 win 257
+ +0 > . 1001:1001(0) ack 1040001 win 0
+// Ooo partial segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW).
+ +0 < P. 1039001:1041001(2000) ack 1001 win 257
+ +0 > . 1001:1001(0) ack 1040001 win 0 <nop,nop,sack 1039001:1040001>
+// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented twice.
+ +0 `nstat -s | grep TcpExtTCPZeroWindowDrop | grep -q " 2 "`
+
+// Ooo segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW).
+ +0 < P. 1105001:1106001(1000) ack 1001 win 257
+ +0 > . 1001:1001(0) ack 1040001 win 0
+// Ooo segment, beyond max adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE).
+ +0 < P. 2000001:2001001(1000) ack 1001 win 257
+ +0 > . 1001:1001(0) ack 1040001 win 0
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice.
+ +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 2 "`
+
+// Read all data.
+ +0 read(4, ..., 2000000) = 1040000
+ * > . 1001:1001(0) ack 1040001
+
+// RWIN > 0: rcv_wup = 1040001, 0 < rcv_wnd < 32000, rcv_mwnd_seq > 1105001.
+
+// Accept pure ack with seq in max adv. window, beyond adv. window.
+ +0 write(4, ..., 1000) = 1000
+ +0 > P. 1001:2001(1000) ack 1040001
+ +0 < . 1105001:1105001(0) ack 2001 win 257
+
+// In order segment, in max adv. window, in adv. window -> accept.
+ +0 < P. 1040001:1041001(1000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1041001
+
+// Ooo partial segment, in adv. window -> accept.
+ +0 < P. 1040001:1042001(2000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1042001 <nop,nop,sack 1040001:1041001>
+
+// Ooo segment, in max adv. window, beyond adv. window -> drop.
+ +0 < P. 1105001:1106001(1000) ack 2001 win 257
+ +0 > . 2001:2001(0) ack 1042001
+// Ooo segment, beyond max adv. window, beyond adv. window -> drop.
+ +0 < P. 2000001:2001001(1000) ack 2001 win 257
+ +0 > . 2001:2001(0) ack 1042001
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice more.
+ +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 4 "`
+
+// We are allowed to go beyond the window and buffer with one packet.
+ +0 < P. 1042001:1062001(20000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1062001
+ +0 < P. 1062001:1082001(20000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1082001 win 0
+
+// But not more: in-order segment, in max adv. window -> drop.
+ +0 < P. 1082001:1083001(1000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1082001
+// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented again.
+ +0 `nstat -s | grep TcpExtTCPZeroWindowDrop | grep -q " 3 "`
+
+// Another ratio drop must not change the final zero-window decision.
+ +0 `../tun --set-rx-truesize tun0 131072`
+
+ +0 < P. 1082001:1083001(1000) ack 2001 win 257
+ * > . 2001:2001(0) ack 1082001
+// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented once more.
+ +0 `nstat -s | grep TcpExtTCPZeroWindowDrop | grep -q " 4 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..837ba3633752
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+
+// A too big packet is accepted if the receive queue is empty, but the
+// stronger admission path must not zero the receive buffer while doing so.
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ * > . 1:1(0) ack 80001 win 0
+ +0 %{ assert SK_MEMINFO_RCVBUF > 0, SK_MEMINFO_RCVBUF }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default.pkt
new file mode 100644
index 000000000000..b2e4950e0b83
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default.pkt
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_moderate_rcvbuf=0`
+
+// Establish a connection on the default receive buffer. Leave a large skb in
+// the queue, then deliver another one which still fits the remaining rwnd.
+// We should grow sk_rcvbuf to honor the already-advertised window instead of
+// dropping the packet.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <...>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Exchange enough data to get past the completely fresh-socket case while
+// still keeping the receive buffer at its 128kB default.
+ +0 < P. 1:65001(65000) ack 1 win 257
+ * > . 1:1(0) ack 65001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 65001:130001(65000) ack 1 win 257
+ * > . 1:1(0) ack 130001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 130001:195001(65000) ack 1 win 257
+ * > . 1:1(0) ack 195001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 195001:260001(65000) ack 1 win 257
+ * > . 1:1(0) ack 260001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 260001:325001(65000) ack 1 win 257
+ * > . 1:1(0) ack 325001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 325001:390001(65000) ack 1 win 257
+ * > . 1:1(0) ack 390001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 390001:455001(65000) ack 1 win 257
+ * > . 1:1(0) ack 455001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 455001:520001(65000) ack 1 win 257
+ * > . 1:1(0) ack 520001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 520001:585001(65000) ack 1 win 257
+ * > . 1:1(0) ack 585001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 585001:650001(65000) ack 1 win 257
+ * > . 1:1(0) ack 650001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 650001:715001(65000) ack 1 win 257
+ * > . 1:1(0) ack 715001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 715001:780001(65000) ack 1 win 257
+ * > . 1:1(0) ack 780001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 780001:845001(65000) ack 1 win 257
+ * > . 1:1(0) ack 845001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 845001:910001(65000) ack 1 win 257
+ * > . 1:1(0) ack 910001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 910001:975001(65000) ack 1 win 257
+ * > . 1:1(0) ack 975001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 975001:1040001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1040001
+ +0 read(4, ..., 65000) = 65000
+
+// Leave about 60kB queued, then accept another large skb which still fits
+// the rwnd we already exposed to the peer. The regression is the drop; the
+// exact sk_rcvbuf growth path is an implementation detail.
+ +0 < P. 1040001:1102001(62000) ack 1 win 257
+ * > . 1:1(0) ack 1102001
+
+ +0 < P. 1102001:1167001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1167001
+ +0 read(4, ..., 127000) = 127000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default_truesize.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default_truesize.pkt
new file mode 100644
index 000000000000..c2ebe11d75f7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig_default_truesize.pkt
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_moderate_rcvbuf=0`
+
+// Establish a connection on the default receive buffer. The warmup traffic
+// keeps the socket in the normal data path without changing its default
+// sk_rcvbuf. Then inflate skb->truesize on future TUN RX packets so the live
+// scaling_ratio drops after we already exposed a larger rwnd to the peer.
+// The follow-up packet should still be admitted, and tcp_clamp_window() should
+// grow sk_rcvbuf to honor the sender-visible window instead of dropping data.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <...>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Exchange enough data to get past the completely fresh-socket case while
+// still keeping the receive buffer at its initial default.
+ +0 < P. 1:65001(65000) ack 1 win 257
+ * > . 1:1(0) ack 65001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 65001:130001(65000) ack 1 win 257
+ * > . 1:1(0) ack 130001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 130001:195001(65000) ack 1 win 257
+ * > . 1:1(0) ack 195001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 195001:260001(65000) ack 1 win 257
+ * > . 1:1(0) ack 260001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 260001:325001(65000) ack 1 win 257
+ * > . 1:1(0) ack 325001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 325001:390001(65000) ack 1 win 257
+ * > . 1:1(0) ack 390001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 390001:455001(65000) ack 1 win 257
+ * > . 1:1(0) ack 455001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 455001:520001(65000) ack 1 win 257
+ * > . 1:1(0) ack 520001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 520001:585001(65000) ack 1 win 257
+ * > . 1:1(0) ack 585001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 585001:650001(65000) ack 1 win 257
+ * > . 1:1(0) ack 650001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 650001:715001(65000) ack 1 win 257
+ * > . 1:1(0) ack 715001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 715001:780001(65000) ack 1 win 257
+ * > . 1:1(0) ack 780001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 780001:845001(65000) ack 1 win 257
+ * > . 1:1(0) ack 845001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 845001:910001(65000) ack 1 win 257
+ * > . 1:1(0) ack 910001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 910001:975001(65000) ack 1 win 257
+ * > . 1:1(0) ack 975001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 < P. 975001:1040001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1040001
+ +0 read(4, ..., 65000) = 65000
+
+ +0 %{ base_rcvbuf = SK_MEMINFO_RCVBUF }%
+
+// Leave about 60kB queued, then make future TUN skbs look more expensive in
+// two steps. Both inflated skbs still fit the already-advertised window and
+// must be admitted, and sk_rcvbuf should keep growing as the live
+// scaling_ratio drops further.
+ +0 < P. 1040001:1102001(62000) ack 1 win 257
+ * > . 1:1(0) ack 1102001
+
+ +0 `../tun --set-rx-truesize tun0 4096`
+
+ +0 < P. 1102001:1167001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1167001
+ +0 %{ assert SK_MEMINFO_RCVBUF > base_rcvbuf, (base_rcvbuf, SK_MEMINFO_RCVBUF) }%
+ +0 %{ small_rcvbuf = SK_MEMINFO_RCVBUF }%
+
+ +0 < P. 1167001:1229001(62000) ack 1 win 257
+ * > . 1:1(0) ack 1229001
+
+ +0 `../tun --set-rx-truesize tun0 65536`
+
+ +0 < P. 1229001:1294001(65000) ack 1 win 257
+ * > . 1:1(0) ack 1294001
+ +0 %{ assert SK_MEMINFO_RCVBUF > small_rcvbuf, (base_rcvbuf, small_rcvbuf, SK_MEMINFO_RCVBUF) }%
+
+ +0 < P. 1294001:1356001(62000) ack 1 win 257
+ * > . 1:1(0) ack 1356001
+ +0 read(4, ..., 254000) = 254000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed_truesize.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed_truesize.pkt
new file mode 100644
index 000000000000..08da5fddaa12
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed_truesize.pkt
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_shrink_window=1
+sysctl -q net.ipv4.tcp_rmem="4096 32768 $((32*1024*1024))"`
+
+ 0 `nstat -n`
+
+// Establish a connection. After the first payload we know the peer has seen a
+// scaled receive window reaching sequence 25361. Inflate later TUN skbs in two
+// steps so the live scaling_ratio drops more than once, then verify that:
+// 1) a segment one byte beyond the max advertised window is still dropped,
+// 2) a segment exactly using the previously advertised max window is still
+// accepted even though the current live ratio no longer matches that
+// original advertisement basis.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 10>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:10001(10000) ack 1 win 257
+ * > . 1:1(0) ack 10001 win 15
+
+// Max window seq advertised here is 10001 + 15*1024 = 25361.
+ +0 `../tun --set-rx-truesize tun0 4096`
+
+ +0 < P. 10001:11024(1023) ack 1 win 257
+ * > . 1:1(0) ack 11024
+
+ +0 `../tun --set-rx-truesize tun0 65536`
+
+// Segment beyond the max window stays invalid even after ratio drift.
+ +0 < P. 11024:25362(14338) ack 1 win 257
+ * > . 1:1(0) ack 11024
+
+// Segment exactly using the max window must still be accepted.
+ +0 < P. 11024:25361(14337) ack 1 win 257
+ * > . 1:1(0) ack 25361
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented once.
+ +0 `nstat | grep TcpExtBeyondWindow | grep -q " 1 "`
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index cf106a49b55e..473992b3784d 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -2,14 +2,17 @@
#define _GNU_SOURCE
+#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/if_tun.h>
#include <sys/ioctl.h>
+#include <sys/syscall.h>
#include <sys/socket.h>
#include "kselftest_harness.h"
@@ -174,6 +177,135 @@ static int tun_delete(char *dev)
return ip_link_del(dev);
}
+static bool is_numeric_name(const char *name)
+{
+ for (; *name; name++) {
+ if (*name < '0' || *name > '9')
+ return false;
+ }
+
+ return true;
+}
+
+static int packetdrill_dup_fd(int pidfd, const char *fd_name)
+{
+ char *end;
+ unsigned long tmp;
+
+ errno = 0;
+ tmp = strtoul(fd_name, &end, 10);
+ if (errno || *end || tmp > INT_MAX) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return syscall(SYS_pidfd_getfd, pidfd, (int)tmp, 0);
+}
+
+static int open_packetdrill_tunfd(pid_t pid, const char *ifname)
+{
+ char fd_dir[PATH_MAX];
+ struct dirent *dent;
+ struct ifreq ifr = {};
+ int pidfd;
+ int saved_errno = ENOENT;
+ DIR *dir;
+
+ snprintf(fd_dir, sizeof(fd_dir), "/proc/%ld/fd", (long)pid);
+
+ pidfd = syscall(SYS_pidfd_open, pid, 0);
+ if (pidfd < 0)
+ return -1;
+
+ dir = opendir(fd_dir);
+ if (!dir) {
+ close(pidfd);
+ return -1;
+ }
+
+ while ((dent = readdir(dir))) {
+ int fd;
+
+ if (!is_numeric_name(dent->d_name))
+ continue;
+
+ /* Reopen via pidfd_getfd() so we duplicate packetdrill's attached
+ * queue file, instead of opening a fresh /dev/net/tun instance.
+ */
+ fd = packetdrill_dup_fd(pidfd, dent->d_name);
+ if (fd < 0) {
+ saved_errno = errno;
+ continue;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ if (!ioctl(fd, TUNGETIFF, &ifr) &&
+ !strncmp(ifr.ifr_name, ifname, IFNAMSIZ)) {
+ close(pidfd);
+ closedir(dir);
+ return fd;
+ }
+
+ if (errno)
+ saved_errno = errno;
+ close(fd);
+ }
+
+ close(pidfd);
+ closedir(dir);
+ errno = saved_errno;
+ return -1;
+}
+
+/* Packetdrill owns the TUN queue fd, so drive the test ioctl through that
+ * exact file descriptor found under /proc/$PACKETDRILL_PID/fd.
+ */
+static int packetdrill_set_rx_truesize(const char *ifname, const char *value)
+{
+ char *packetdrill_pid, *end;
+ unsigned long long tmp;
+ unsigned int extra;
+ pid_t pid;
+ int fd;
+
+ packetdrill_pid = getenv("PACKETDRILL_PID");
+ if (!packetdrill_pid || !*packetdrill_pid) {
+ fprintf(stderr, "PACKETDRILL_PID is not set\n");
+ return 1;
+ }
+
+ errno = 0;
+ tmp = strtoull(packetdrill_pid, &end, 10);
+ if (errno || *end || !tmp || tmp > INT_MAX) {
+ fprintf(stderr, "invalid PACKETDRILL_PID: %s\n", packetdrill_pid);
+ return 1;
+ }
+ pid = (pid_t)tmp;
+
+ errno = 0;
+ tmp = strtoull(value, &end, 0);
+ if (errno || *end || tmp > UINT_MAX) {
+ fprintf(stderr, "invalid truesize value: %s\n", value);
+ return 1;
+ }
+ extra = (unsigned int)tmp;
+
+ fd = open_packetdrill_tunfd(pid, ifname);
+ if (fd < 0) {
+ perror("open_packetdrill_tunfd");
+ return 1;
+ }
+
+ if (ioctl(fd, TUNSETTRUESIZE, (unsigned long)extra)) {
+ perror("ioctl(TUNSETTRUESIZE)");
+ close(fd);
+ return 1;
+ }
+
+ close(fd);
+ return 0;
+}
+
static int tun_open(char *dev, const int flags, const int hdrlen,
const int features, const unsigned char *mac_addr)
{
@@ -985,4 +1117,10 @@ XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
-TEST_HARNESS_MAIN
+int main(int argc, char **argv)
+{
+ if (argc == 4 && !strcmp(argv[1], "--set-rx-truesize"))
+ return packetdrill_set_rx_truesize(argv[2], argv[3]);
+
+ return test_harness_run(argc, argv);
+}
--
2.43.0
On Sat, 14 Mar 2026 14:13:46 -0600 atwellwea@gmail.com wrote:
> Add a test-only TUN ioctl that inflates RX skb->truesize, plus the
> packetdrill-side helper needed to drive that ioctl through packetdrill's
> own TUN queue file descriptor.
>
> Use that plumbing to cover the receive-window regressions where
> scaling_ratio drifts after advertisement, alongside the baseline too-big
> packetdrill cases that exercise the same sender-visible rwnd accounting
> from the non-injected path.
You missed adding the tun program to the Makefile.
tcp-rcv-neg-window-truesize-pkt
tcp-rcv-toobig-default-truesize-pkt
tcp-rcv-wnd-shrink-allowed-truesize-pkt
Fail with:
sh: line 1: ../tun: No such file or directory
tcp_rcv_*_truesize.pkt:*: error executing `../tun --set-rx-truesize tun0 6553\
6` command: non-zero status 127
https://netdev.bots.linux.dev/contest.html?pw-n=0&branch=net-next-2026-03-15--00-00&pw-n=0&pass=0
© 2016 - 2026 Red Hat, Inc.