net/ipv6/sit.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
When create ipip6 tunnel, if tunnel->parms.link is assigned to the previous
created tunnel device, the dev->needed_headroom will increase based on the
previous one.
If the number of tunnel device is sufficient, the needed_headroom can be
overflowed. The overflow happens like this:
ipip6_newlink
ipip6_tunnel_create
register_netdevice
ipip6_tunnel_init
ipip6_tunnel_bind_dev
t_hlen = tunnel->hlen + sizeof(struct iphdr); // 40
hlen = tdev->hard_header_len + tdev->needed_headroom; // 65496
dev->needed_headroom = t_hlen + hlen; // 65536 -> 0
The value of LL_RESERVED_SPACE(rt->dst.dev) may be HH_DATA_MOD, that leads
to a small skb allocated in __ip_append_data(), which triggers a
skb_under_panic:
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:209!
Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 0 UID: 0 PID: 24133 Comm: test Tainted: G W 6.14.0-rc7-00067-g76b6905c11fd-dirty #1
Tainted: [W]=WARN
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
RIP: 0010:skb_panic+0x156/0x1d0
Call Trace:
<TASK>
skb_push+0xc8/0xe0
fou_build_udp+0x31/0x3a0
gue_build_header+0xf7/0x150
ip_tunnel_xmit+0x684/0x3660
sit_tunnel_xmit__.isra.0+0xeb/0x150
sit_tunnel_xmit+0x2e3/0x2930
dev_hard_start_xmit+0x1a6/0x7b0
__dev_queue_xmit+0x2fa9/0x4120
neigh_connected_output+0x39e/0x590
ip_finish_output2+0x7bb/0x1f00
__ip_finish_output+0x442/0x940
ip_finish_output+0x31/0x380
ip_mc_output+0x1c4/0x6a0
ip_send_skb+0x339/0x570
udp_send_skb+0x905/0x1540
udp_sendmsg+0x17c8/0x28f0
udpv6_sendmsg+0x17f1/0x2c30
inet6_sendmsg+0x105/0x140
____sys_sendmsg+0x801/0xc70
___sys_sendmsg+0x110/0x1b0
__sys_sendmmsg+0x1f2/0x410
__x64_sys_sendmmsg+0x99/0x100
do_syscall_64+0x6e/0x1c0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
---[ end trace 0000000000000000 ]---
Fix this by add check for needed_headroom in ipip6_tunnel_bind_dev().
Reported-by: syzbot+4c63f36709a642f801c5@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4c63f36709a642f801c5
Fixes: c88f8d5cd95f ("sit: update dev->needed_headroom in ipip6_tunnel_bind_dev()")
Signed-off-by: Wang Liang <wangliang74@huawei.com>
---
net/ipv6/sit.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 39bd8951bfca..1662b735c5e3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1095,7 +1095,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
}
-static void ipip6_tunnel_bind_dev(struct net_device *dev)
+static int ipip6_tunnel_bind_dev(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
@@ -1134,7 +1134,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
WRITE_ONCE(dev->mtu, mtu);
hlen = tdev->hard_header_len + tdev->needed_headroom;
}
+
+ if (t_hlen + hlen > U16_MAX)
+ return -EOVERFLOW;
+
dev->needed_headroom = t_hlen + hlen;
+ return 0;
}
static void ipip6_tunnel_update(struct ip_tunnel *t,
@@ -1452,7 +1457,9 @@ static int ipip6_tunnel_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- ipip6_tunnel_bind_dev(dev);
+ err = ipip6_tunnel_bind_dev(dev);
+ if (err)
+ return err;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (err)
--
2.34.1
On Thu, Mar 27, 2025 at 2:48 AM Wang Liang <wangliang74@huawei.com> wrote:
>
> When create ipip6 tunnel, if tunnel->parms.link is assigned to the previous
> created tunnel device, the dev->needed_headroom will increase based on the
> previous one.
>
> If the number of tunnel device is sufficient, the needed_headroom can be
> overflowed. The overflow happens like this:
How many stacked devices would be needed to reach this point ?
I thought we had a limit, to make sure we do not overflow the kernel stack ?
>
> ipip6_newlink
> ipip6_tunnel_create
> register_netdevice
> ipip6_tunnel_init
> ipip6_tunnel_bind_dev
> t_hlen = tunnel->hlen + sizeof(struct iphdr); // 40
> hlen = tdev->hard_header_len + tdev->needed_headroom; // 65496
> dev->needed_headroom = t_hlen + hlen; // 65536 -> 0
>
> The value of LL_RESERVED_SPACE(rt->dst.dev) may be HH_DATA_MOD, that leads
> to a small skb allocated in __ip_append_data(), which triggers a
> skb_under_panic:
>
> ------------[ cut here ]------------
> kernel BUG at net/core/skbuff.c:209!
> Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> CPU: 0 UID: 0 PID: 24133 Comm: test Tainted: G W 6.14.0-rc7-00067-g76b6905c11fd-dirty #1
> Tainted: [W]=WARN
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
> RIP: 0010:skb_panic+0x156/0x1d0
> Call Trace:
> <TASK>
> skb_push+0xc8/0xe0
> fou_build_udp+0x31/0x3a0
> gue_build_header+0xf7/0x150
> ip_tunnel_xmit+0x684/0x3660
> sit_tunnel_xmit__.isra.0+0xeb/0x150
> sit_tunnel_xmit+0x2e3/0x2930
> dev_hard_start_xmit+0x1a6/0x7b0
> __dev_queue_xmit+0x2fa9/0x4120
> neigh_connected_output+0x39e/0x590
> ip_finish_output2+0x7bb/0x1f00
> __ip_finish_output+0x442/0x940
> ip_finish_output+0x31/0x380
> ip_mc_output+0x1c4/0x6a0
> ip_send_skb+0x339/0x570
> udp_send_skb+0x905/0x1540
> udp_sendmsg+0x17c8/0x28f0
> udpv6_sendmsg+0x17f1/0x2c30
> inet6_sendmsg+0x105/0x140
> ____sys_sendmsg+0x801/0xc70
> ___sys_sendmsg+0x110/0x1b0
> __sys_sendmmsg+0x1f2/0x410
> __x64_sys_sendmmsg+0x99/0x100
> do_syscall_64+0x6e/0x1c0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> ---[ end trace 0000000000000000 ]---
Can you provide symbols ?
scripts/decode_stacktrace.sh is your friend.
>
> Fix this by add check for needed_headroom in ipip6_tunnel_bind_dev().
>
> Reported-by: syzbot+4c63f36709a642f801c5@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=4c63f36709a642f801c5
> Fixes: c88f8d5cd95f ("sit: update dev->needed_headroom in ipip6_tunnel_bind_dev()")
> Signed-off-by: Wang Liang <wangliang74@huawei.com>
> ---
> net/ipv6/sit.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index 39bd8951bfca..1662b735c5e3 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -1095,7 +1095,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
>
> }
>
> -static void ipip6_tunnel_bind_dev(struct net_device *dev)
> +static int ipip6_tunnel_bind_dev(struct net_device *dev)
> {
> struct ip_tunnel *tunnel = netdev_priv(dev);
> int t_hlen = tunnel->hlen + sizeof(struct iphdr);
> @@ -1134,7 +1134,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
> WRITE_ONCE(dev->mtu, mtu);
> hlen = tdev->hard_header_len + tdev->needed_headroom;
> }
> +
> + if (t_hlen + hlen > U16_MAX)
> + return -EOVERFLOW;
> +
> dev->needed_headroom = t_hlen + hlen;
> + return 0;
> }
>
> static void ipip6_tunnel_update(struct ip_tunnel *t,
> @@ -1452,7 +1457,9 @@ static int ipip6_tunnel_init(struct net_device *dev)
> tunnel->net = dev_net(dev);
> strcpy(tunnel->parms.name, dev->name);
>
> - ipip6_tunnel_bind_dev(dev);
> + err = ipip6_tunnel_bind_dev(dev);
> + if (err)
> + return err;
>
> err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
> if (err)
> --
> 2.34.1
>
在 2025/3/27 12:32, Eric Dumazet 写道:
> On Thu, Mar 27, 2025 at 2:48 AM Wang Liang <wangliang74@huawei.com> wrote:
>> When create ipip6 tunnel, if tunnel->parms.link is assigned to the previous
>> created tunnel device, the dev->needed_headroom will increase based on the
>> previous one.
>>
>> If the number of tunnel device is sufficient, the needed_headroom can be
>> overflowed. The overflow happens like this:
> How many stacked devices would be needed to reach this point ?
In the ideal situation, maybe 3277 (65536 / sizeof(struct iphdr)) sit
devices is enough.
This issue can be easily reproduced by the C repro from
https://syzkaller.appspot.com/text?tag=ReproC&x=14fc39a4880000
It is the 2022/10/11 23:38 crash issue in
https://syzkaller.appspot.com/bug?extid=4c63f36709a642f801c5
>
> I thought we had a limit, to make sure we do not overflow the kernel stack ?
The commit 5ae1e9922bbd ("net: ip_tunnel: prevent perpetual headroom
growth")
add a needed_headroom limit in ip_tunnel_adj_headroom() before send skb. It
not work in this issue, because the needed_headroom is already overflowed
when create device, and the skb allocated in __ip_append_data() is too
small.
>> ipip6_newlink
>> ipip6_tunnel_create
>> register_netdevice
>> ipip6_tunnel_init
>> ipip6_tunnel_bind_dev
>> t_hlen = tunnel->hlen + sizeof(struct iphdr); // 40
>> hlen = tdev->hard_header_len + tdev->needed_headroom; // 65496
>> dev->needed_headroom = t_hlen + hlen; // 65536 -> 0
>>
>> The value of LL_RESERVED_SPACE(rt->dst.dev) may be HH_DATA_MOD, that leads
>> to a small skb allocated in __ip_append_data(), which triggers a
>> skb_under_panic:
>>
>> ------------[ cut here ]------------
>> kernel BUG at net/core/skbuff.c:209!
>> Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
>> CPU: 0 UID: 0 PID: 24133 Comm: test Tainted: G W 6.14.0-rc7-00067-g76b6905c11fd-dirty #1
>> Tainted: [W]=WARN
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
>> RIP: 0010:skb_panic+0x156/0x1d0
>> Call Trace:
>> <TASK>
>> skb_push+0xc8/0xe0
>> fou_build_udp+0x31/0x3a0
>> gue_build_header+0xf7/0x150
>> ip_tunnel_xmit+0x684/0x3660
>> sit_tunnel_xmit__.isra.0+0xeb/0x150
>> sit_tunnel_xmit+0x2e3/0x2930
>> dev_hard_start_xmit+0x1a6/0x7b0
>> __dev_queue_xmit+0x2fa9/0x4120
>> neigh_connected_output+0x39e/0x590
>> ip_finish_output2+0x7bb/0x1f00
>> __ip_finish_output+0x442/0x940
>> ip_finish_output+0x31/0x380
>> ip_mc_output+0x1c4/0x6a0
>> ip_send_skb+0x339/0x570
>> udp_send_skb+0x905/0x1540
>> udp_sendmsg+0x17c8/0x28f0
>> udpv6_sendmsg+0x17f1/0x2c30
>> inet6_sendmsg+0x105/0x140
>> ____sys_sendmsg+0x801/0xc70
>> ___sys_sendmsg+0x110/0x1b0
>> __sys_sendmmsg+0x1f2/0x410
>> __x64_sys_sendmmsg+0x99/0x100
>> do_syscall_64+0x6e/0x1c0
>> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> ---[ end trace 0000000000000000 ]---
> Can you provide symbols ?
>
> scripts/decode_stacktrace.sh is your friend.
You can get the report in
https://syzkaller.appspot.com/text?tag=CrashReport&x=106b6b34880000
>> Fix this by add check for needed_headroom in ipip6_tunnel_bind_dev().
>>
>> Reported-by: syzbot+4c63f36709a642f801c5@syzkaller.appspotmail.com
>> Closes: https://syzkaller.appspot.com/bug?extid=4c63f36709a642f801c5
>> Fixes: c88f8d5cd95f ("sit: update dev->needed_headroom in ipip6_tunnel_bind_dev()")
>> Signed-off-by: Wang Liang <wangliang74@huawei.com>
>> ---
>> net/ipv6/sit.c | 11 +++++++++--
>> 1 file changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
>> index 39bd8951bfca..1662b735c5e3 100644
>> --- a/net/ipv6/sit.c
>> +++ b/net/ipv6/sit.c
>> @@ -1095,7 +1095,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
>>
>> }
>>
>> -static void ipip6_tunnel_bind_dev(struct net_device *dev)
>> +static int ipip6_tunnel_bind_dev(struct net_device *dev)
>> {
>> struct ip_tunnel *tunnel = netdev_priv(dev);
>> int t_hlen = tunnel->hlen + sizeof(struct iphdr);
>> @@ -1134,7 +1134,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
>> WRITE_ONCE(dev->mtu, mtu);
>> hlen = tdev->hard_header_len + tdev->needed_headroom;
>> }
>> +
>> + if (t_hlen + hlen > U16_MAX)
>> + return -EOVERFLOW;
>> +
>> dev->needed_headroom = t_hlen + hlen;
>> + return 0;
>> }
>>
>> static void ipip6_tunnel_update(struct ip_tunnel *t,
>> @@ -1452,7 +1457,9 @@ static int ipip6_tunnel_init(struct net_device *dev)
>> tunnel->net = dev_net(dev);
>> strcpy(tunnel->parms.name, dev->name);
>>
>> - ipip6_tunnel_bind_dev(dev);
>> + err = ipip6_tunnel_bind_dev(dev);
>> + if (err)
>> + return err;
>>
>> err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
>> if (err)
>> --
>> 2.34.1
>>
On Thu, Mar 27, 2025 at 7:33 AM Wang Liang <wangliang74@huawei.com> wrote: > > > > You can get the report in > https://syzkaller.appspot.com/text?tag=CrashReport&x=106b6b34880000 Well, please provide the most accurate stack trace with symbols in your patch then ? If you spent time reproducing the issue and providing your stack trace, please add the symbols.
在 2025/3/27 14:39, Eric Dumazet 写道: > On Thu, Mar 27, 2025 at 7:33 AM Wang Liang <wangliang74@huawei.com> wrote: >> >> >> You can get the report in >> https://syzkaller.appspot.com/text?tag=CrashReport&x=106b6b34880000 > Well, please provide the most accurate stack trace with symbols in > your patch then ? > > If you spent time reproducing the issue and providing your stack > trace, please add the symbols. Thank you for the reminder of decode_stacktrace.sh. I just reproduce the issue, and first use decode_stacktrace.sh to get the stack trace below[1], please check it. I will update the stack trace in my patch later. Thanks. [1] [ 895.885034][T23587] ------------[ cut here ]------------ [ 895.885951][T23587] kernel BUG at net/core/skbuff.c:209! [ 895.886889][T23587] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 895.888037][T23587] CPU: 0 UID: 0 PID: 23587 Comm: test Tainted: G W 6.14.0-00624-g2f2d52945852-dirty #15 [ 895.889837][T23587] Tainted: [W]=WARN [ 895.890469][T23587] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [895.891962][T23587] RIP: 0010:skb_panic (net/core/skbuff.c:209 (discriminator 4)) [ 895.892786][T23587] Code: 0f b6 04 01 84 c0 74 04 3c 03 7e 20 8b 4b 70 41 56 45 89 e8 48 c7 c7 c0 0c 7e 8b 41 57 56 48 89 ee 52 4c 89 e2 e8 6a 40 6e f9 <0f> 0b 4c 89 4c 24 10 48 89 54 24 08 48 89 34 24 e8 b5 68 ec f9 4c [ 895.895918][T23587] RSP: 0018:ffffc900000e6a18 EFLAGS: 00010282 [ 895.897396][T23587] RAX: 0000000000000088 RBX: ffff88809a0cd000 RCX: ffffffff819352e9 [ 895.898695][T23587] RDX: 0000000000000000 RSI: ffffffff8193bd1d RDI: 0000000000000005 [ 895.899992][T23587] RBP: ffffffff8b7e2020 R08: 0000000000000000 R09: fffffbfff1989a84 [ 895.901274][T23587] R10: 0000000000000200 R11: 000000000023df70 R12: ffffffff88d9b291 [ 895.902561][T23587] R13: 0000000000000008 R14: ffff88805013e120 R15: 0000000000000180 [ 895.903863][T23587] FS: 00000000162863c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 [ 895.905307][T23587] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 895.906378][T23587] CR2: ffffffffff600400 CR3: 0000000094fcc000 CR4: 00000000000006f0 [ 895.907669][T23587] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 895.908960][T23587] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 895.910252][T23587] Call Trace: [ 895.910798][T23587] <TASK> [895.923567][T23587] skb_push (net/core/skbuff.c:2544) [895.924232][T23587] fou_build_udp (./include/linux/skbuff.h:3026 net/ipv4/fou_core.c:1041) [895.925001][T23587] gue_build_header (net/ipv4/fou_core.c:1085) [895.927586][T23587] ip_tunnel_xmit (./include/net/ip_tunnels.h:541 ./include/net/ip_tunnels.h:525 net/ipv4/ip_tunnel.c:780) [895.931769][T23587] sit_tunnel_xmit__.isra.0 (net/ipv6/sit.c:1065) [895.932682][T23587] sit_tunnel_xmit (net/ipv6/sit.c:1076) [895.937147][T23587] dev_hard_start_xmit (./include/linux/netdevice.h:5161 net/core/dev.c:3800 net/core/dev.c:3816) [895.937996][T23587] __dev_queue_xmit (net/core/dev.h:320 net/core/dev.c:4653) [895.945680][T23587] neigh_connected_output (./include/linux/netdevice.h:3313 net/core/neighbour.c:1543) [895.946570][T23587] ip_finish_output2 (./include/net/neighbour.h:539 net/ipv4/ip_output.c:236) [895.948304][T23587] __ip_finish_output (net/ipv4/ip_output.c:314 net/ipv4/ip_output.c:296) [895.949152][T23587] ip_finish_output (net/ipv4/ip_output.c:324) [895.949945][T23587] ip_mc_output (./include/linux/netfilter.h:303 net/ipv4/ip_output.c:421) [895.951538][T23587] ip_send_skb (./include/net/dst.h:459 ./include/net/dst.h:457 net/ipv4/ip_output.c:130 net/ipv4/ip_output.c:1502) [895.952279][T23587] udp_send_skb (net/ipv4/udp.c:1197) [895.953048][T23587] udp_sendmsg (net/ipv4/udp.c:1484) [895.962452][T23587] udpv6_sendmsg (net/ipv6/udp.c:1545 (discriminator 1)) [895.976909][T23587] inet6_sendmsg (net/ipv6/af_inet6.c:659 (discriminator 4)) [895.978530][T23587] ____sys_sendmsg (net/socket.c:718 net/socket.c:733 net/socket.c:2573) [895.982832][T23587] ___sys_sendmsg (net/socket.c:2629) [895.988814][T23587] __sys_sendmmsg (net/socket.c:2719) [895.994530][T23587] __x64_sys_sendmmsg (net/socket.c:2740) [895.996217][T23587] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) [895.996965][T23587] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 895.997937][T23587] RIP: 0033:0x44a19d [ 895.998581][T23587] Code: c3 e8 37 1f 00 00 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 [ 896.001683][T23587] RSP: 002b:00007fffc1b01a88 EFLAGS: 00000216 ORIG_RAX: 0000000000000133 [ 896.003032][T23587] RAX: ffffffffffffffda RBX: 0000000020000014 RCX: 000000000044a19d [ 896.004311][T23587] RDX: 0000000000000001 RSI: 00000000200017c0 RDI: 0000000000000003 [ 896.005595][T23587] RBP: 00007fffc1b01ab0 R08: 0000000000000000 R09: 0000000000000000 [ 896.006891][T23587] R10: 0000000000000000 R11: 0000000000000216 R12: 0000000000000001 [ 896.008164][T23587] R13: 00007fffc1b01cf8 R14: 00000000004c4710 R15: 0000000000000001 [ 896.009454][T23587] </TASK> [ 896.009969][T23587] Modules linked in: [ 896.010664][T23587] ---[ end trace 0000000000000000 ]---
© 2016 - 2025 Red Hat, Inc.