[PATCH net] net: vlan: set header_ops to match hard_header_len when hw offload is toggled

Chen Zhen posted 1 patch 1 month, 1 week ago
net/8021q/vlan.c     |  5 +----
net/8021q/vlan.h     |  3 +++
net/8021q/vlan_dev.c | 22 ++++++++++++++--------
3 files changed, 18 insertions(+), 12 deletions(-)
[PATCH net] net: vlan: set header_ops to match hard_header_len when hw offload is toggled
Posted by Chen Zhen 1 month, 1 week ago
We found a skb_panic in vlan_dev_hard_header():

skbuff: skb_under_panic: text:ffffffff95b33e66 len:90 put:14 head:ffff915ac1967440 data:ffff915ac196743e tail:0x58 end:0x180 dev:br0.10
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:197!
Call Trace:
 <TASK>
 skb_push+0x39/0x40
 eth_header+0x26/0xb0
 vlan_dev_hard_header+0x58/0x130 [8021q]
 neigh_connected_output+0xae/0x100
 ip6_finish_output2+0x2cc/0x650
 ? nf_hook_slow+0x41/0xc0
 ip6_finish_output+0x27/0xd0
 ndisc_send_skb+0x1d0/0x370
 ? __pfx_dst_output+0x10/0x10
 ndisc_send_ns+0x5a/0xb0
 addrconf_dad_work+0x2b5/0x380
 process_one_work+0x17f/0x320
 worker_thread+0x26d/0x2f0
 ? __pfx_worker_thread+0x10/0x10
 kthread+0xcc/0x100
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x30/0x50
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1b/0x30
 </TASK>

This bug can be easily reproduced by these steps:

 ip link add veth0 type veth peer name veth1
 ip link set veth0 up
 ip link set veth1 up
 ethtool -K veth0 tx-vlan-hw-insert off
 # vlandev.header_ops = vlan_header_ops, hard_header_len = 18(hard_header_len + VLAN_HLEN)
 ip link add link veth0 name veth0.10 type vlan id 10 reorder_hdr off
 ip addr add 192.168.10.1/24 dev veth0.10
 ip link set veth0.10 up
 # vlandev.hard_header_len = 14(hard_header_len)
 ethtool -K veth0 tx-vlan-hw-insert on
 # Panic!

The reason is that when NETIF_F_HW_VLAN_CTAG_TX is off, vlandev.hard_header_len will be set to
dev->hard_header_len since commit 029f5fc31cdb ("8021q: set hard_header_len when VLAN offload features
are toggled"), but the header_ops remains unchanged. Then neigh_connected_output() will call
vlan_dev_hard_header() and panic in skb_push() because reorder_hdr is off.

Fix this by also setting header_ops of vlan dev when offload feature is toggled.

Fixes: 029f5fc31cdb ("8021q: set hard_header_len when VLAN offload features are toggled")
Signed-off-by: Chen Zhen <chenzhen126@huawei.com>
---
 net/8021q/vlan.c     |  5 +----
 net/8021q/vlan.h     |  3 +++
 net/8021q/vlan_dev.c | 22 ++++++++++++++--------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2b74ed56eb16..84b3a3f67996 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -323,10 +323,7 @@ static void vlan_transfer_features(struct net_device *dev,
 
 	netif_inherit_tso_max(vlandev, dev);
 
-	if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
-		vlandev->hard_header_len = dev->hard_header_len;
-	else
-		vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
+	vlan_dev_set_header_attributes(dev, vlandev, vlan->vlan_proto);
 
 #if IS_ENABLED(CONFIG_FCOE)
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index c7ffe591d593..1d837814e061 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -143,6 +143,9 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 bool vlan_dev_inherit_address(struct net_device *dev,
 			      struct net_device *real_dev);
+void vlan_dev_set_header_attributes(struct net_device *dev,
+				    struct net_device *vlan_dev,
+				    __be16 proto);
 
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 					    u16 vlan_tci)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fbf296137b09..1fe171748711 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -519,6 +519,19 @@ static const struct device_type vlan_type = {
 
 static const struct net_device_ops vlan_netdev_ops;
 
+void vlan_dev_set_header_attributes(struct net_device *dev,
+				    struct net_device *vlan_dev,
+				    __be16 proto)
+{
+	if (vlan_hw_offload_capable(dev->features, proto)) {
+		vlan_dev->header_ops      = &vlan_passthru_header_ops;
+		vlan_dev->hard_header_len = dev->hard_header_len;
+	} else {
+		vlan_dev->header_ops      = &vlan_header_ops;
+		vlan_dev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
+	}
+}
+
 static int vlan_dev_init(struct net_device *dev)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -572,14 +585,7 @@ static int vlan_dev_init(struct net_device *dev)
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (vlan_hw_offload_capable(real_dev->features, vlan->vlan_proto)) {
-		dev->header_ops      = &vlan_passthru_header_ops;
-		dev->hard_header_len = real_dev->hard_header_len;
-	} else {
-		dev->header_ops      = &vlan_header_ops;
-		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
-	}
-
+	vlan_dev_set_header_attributes(real_dev, dev, vlan->vlan_proto);
 	dev->netdev_ops = &vlan_netdev_ops;
 
 	SET_NETDEV_DEVTYPE(dev, &vlan_type);
-- 
2.33.0
Re: [PATCH net] net: vlan: set header_ops to match hard_header_len when hw offload is toggled
Posted by Jakub Kicinski 1 month ago
On Wed, 31 Dec 2025 11:54:19 +0800 Chen Zhen wrote:
> skbuff: skb_under_panic: text:ffffffff95b33e66 len:90 put:14 head:ffff915ac1967440 data:ffff915ac196743e tail:0x58 end:0x180 dev:br0.10
> ------------[ cut here ]------------
> kernel BUG at net/core/skbuff.c:197!
> Call Trace:
>  <TASK>
>  skb_push+0x39/0x40
>  eth_header+0x26/0xb0
>  vlan_dev_hard_header+0x58/0x130 [8021q]
>  neigh_connected_output+0xae/0x100
>  ip6_finish_output2+0x2cc/0x650
>  ? nf_hook_slow+0x41/0xc0
>  ip6_finish_output+0x27/0xd0
>  ndisc_send_skb+0x1d0/0x370
>  ? __pfx_dst_output+0x10/0x10
>  ndisc_send_ns+0x5a/0xb0
>  addrconf_dad_work+0x2b5/0x380
>  process_one_work+0x17f/0x320

Please run this stack trace thru script/decode_stacktrace
and you can cut off here, no need to include functions
below process_one_work, they are irrelevant.

>  worker_thread+0x26d/0x2f0
>  ? __pfx_worker_thread+0x10/0x10
>  kthread+0xcc/0x100
>  ? __pfx_kthread+0x10/0x10
>  ret_from_fork+0x30/0x50
>  ? __pfx_kthread+0x10/0x10
>  ret_from_fork_asm+0x1b/0x30
>  </TASK>
> 
> This bug can be easily reproduced by these steps:
> 
>  ip link add veth0 type veth peer name veth1
>  ip link set veth0 up
>  ip link set veth1 up
>  ethtool -K veth0 tx-vlan-hw-insert off
>  # vlandev.header_ops = vlan_header_ops, hard_header_len = 18(hard_header_len + VLAN_HLEN)
>  ip link add link veth0 name veth0.10 type vlan id 10 reorder_hdr off
>  ip addr add 192.168.10.1/24 dev veth0.10
>  ip link set veth0.10 up
>  # vlandev.hard_header_len = 14(hard_header_len)
>  ethtool -K veth0 tx-vlan-hw-insert on
>  # Panic!

Instead of putting this in the commit message please add a selftest
which will automatically catch re-occurrence of the issue.

> The reason is that when NETIF_F_HW_VLAN_CTAG_TX is off, vlandev.hard_header_len will be set to
> dev->hard_header_len since commit 029f5fc31cdb ("8021q: set hard_header_len when VLAN offload features
> are toggled"), but the header_ops remains unchanged. Then neigh_connected_output() will call
> vlan_dev_hard_header() and panic in skb_push() because reorder_hdr is off.

Please wrap commit messages at 70 columns.
-- 
pw-bot: cr
Re: [PATCH net] net: vlan: set header_ops to match hard_header_len when hw offload is toggled
Posted by Chen Zhen 1 month ago
On 26/1/6 8:22, Jakub Kicinski wrote:
> On Wed, 31 Dec 2025 11:54:19 +0800 Chen Zhen wrote:
>> skbuff: skb_under_panic: text:ffffffff95b33e66 len:90 put:14 head:ffff915ac1967440 data:ffff915ac196743e tail:0x58 end:0x180 dev:br0.10
>> ------------[ cut here ]------------
>> kernel BUG at net/core/skbuff.c:197!
>> Call Trace:
>>  <TASK>
>>  skb_push+0x39/0x40
>>  eth_header+0x26/0xb0
>>  vlan_dev_hard_header+0x58/0x130 [8021q]
>>  neigh_connected_output+0xae/0x100
>>  ip6_finish_output2+0x2cc/0x650
>>  ? nf_hook_slow+0x41/0xc0
>>  ip6_finish_output+0x27/0xd0
>>  ndisc_send_skb+0x1d0/0x370
>>  ? __pfx_dst_output+0x10/0x10
>>  ndisc_send_ns+0x5a/0xb0
>>  addrconf_dad_work+0x2b5/0x380
>>  process_one_work+0x17f/0x320
> 
> Please run this stack trace thru script/decode_stacktrace
> and you can cut off here, no need to include functions
> below process_one_work, they are irrelevant.
> 
>>  worker_thread+0x26d/0x2f0
>>  ? __pfx_worker_thread+0x10/0x10
>>  kthread+0xcc/0x100
>>  ? __pfx_kthread+0x10/0x10
>>  ret_from_fork+0x30/0x50
>>  ? __pfx_kthread+0x10/0x10
>>  ret_from_fork_asm+0x1b/0x30
>>  </TASK>
>>
>> This bug can be easily reproduced by these steps:
>>
>>  ip link add veth0 type veth peer name veth1
>>  ip link set veth0 up
>>  ip link set veth1 up
>>  ethtool -K veth0 tx-vlan-hw-insert off
>>  # vlandev.header_ops = vlan_header_ops, hard_header_len = 18(hard_header_len + VLAN_HLEN)
>>  ip link add link veth0 name veth0.10 type vlan id 10 reorder_hdr off
>>  ip addr add 192.168.10.1/24 dev veth0.10
>>  ip link set veth0.10 up
>>  # vlandev.hard_header_len = 14(hard_header_len)
>>  ethtool -K veth0 tx-vlan-hw-insert on
>>  # Panic!
> 
> Instead of putting this in the commit message please add a selftest
> which will automatically catch re-occurrence of the issue.
> 
>> The reason is that when NETIF_F_HW_VLAN_CTAG_TX is off, vlandev.hard_header_len will be set to
>> dev->hard_header_len since commit 029f5fc31cdb ("8021q: set hard_header_len when VLAN offload features
>> are toggled"), but the header_ops remains unchanged. Then neigh_connected_output() will call
>> vlan_dev_hard_header() and panic in skb_push() because reorder_hdr is off.
> 
> Please wrap commit messages at 70 columns.

Thanks for your patient review and I will send patch v2 soon.

Best regards,
Chen Zhen