net/xfrm/xfrm_output.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)
The IPv4 output path validates ihl before handing packets to xfrm,
and raw_send_hdrinc() now rejects IP_HDRINCL packets with ihl < 5.
xfrm can still see a malformed IPv4 skb after that point, for example
if a netfilter rule rewrites the packet between the normal IPv4 checks
and the xfrm output transform.
Do not let xfrm output consumers be the first code to discover that
malformed header. xfrm4_transport_output() consumes iph->ihl before
AH gets control, and the BEET/tunnel path records IPv4 option length
from iph->ihl before constructing the outer header.
Validate IPv4 skbs before xfrm output handles offload/GSO and again
before each software outer-mode transform. Warn once for ihl < 5,
since that means a malformed IPv4 packet was reinjected after the
normal IP stack checks, and reject the packet before transform code can
consume the bogus header length.
A QEMU regression with an nft payload rule on the IPv4 output hook
rewriting byte 0 to 0x40 now reaches the WARN_ON_ONCE, drops before
AH, leaves xfrm packet counters at zero, and exits without a panic.
A valid AH transport regression with normal ihl=5 UDP still succeeds:
five sends complete and the xfrm state accounts 75 bytes and five
packets.
Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
---
Posting this as a follow-up to Herbert's requests in the existing
thread. Patch 1/2 from the original series landed as 915fab69823a1;
the AH-only hardening did not. I read the patch-2 comment as asking
for the defensive guard to live in common xfrm output code rather than
per-consumer, and this patch is my attempt at that first block. Happy
to revise if you'd prefer the validator placed differently (only at
xfrm_outer_mode_output, only at xfrm_output, gated under a debug
option, or moved further out to __ip_local_out / xfrm4_extract_output)
or the WARN_ON_ONCE swapped for a silent counter.
net/xfrm/xfrm_output.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc35c2fcbbe09..02f38eaa68ff6 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -27,6 +27,31 @@
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
+static int xfrm_output_validate_iphdr(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ unsigned int ihl;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return 0;
+
+ if (unlikely(!pskb_network_may_pull(skb, sizeof(struct iphdr))))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+ if (unlikely(iph->version != 4))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(iph->ihl < 5))
+ return -EINVAL;
+
+ ihl = ip_hdrlen(skb);
+ if (unlikely(!pskb_network_may_pull(skb, ihl)))
+ return -EINVAL;
+
+ return 0;
+}
+
static int xfrm_skb_check_space(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -459,6 +484,12 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
{
+ int err;
+
+ err = xfrm_output_validate_iphdr(skb);
+ if (err)
+ return err;
+
switch (x->props.mode) {
case XFRM_MODE_BEET:
case XFRM_MODE_TUNNEL:
@@ -769,6 +800,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
break;
}
+ err = xfrm_output_validate_iphdr(skb);
+ if (err) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ return err;
+ }
+
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
if (!xfrm_dev_offload_ok(skb, x)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
base-commit: aaec7096f9961eb223b5b149abe9495525c205d9
--
2.53.0
© 2016 - 2026 Red Hat, Inc.