net/core/skbuff.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-)
Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
once per nskb") added the call to zero copy functions in skb_segment().
The change introduced a bug in skb_segment() because skb_orphan_frags()
may possibly change the number of fragments or allocate new fragments
altogether leaving nrfrags and frag to point to the old values. This can
cause a panic with stacktrace like the one below.
[ 193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
[ 193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G O 5.15.123+ #26
[ 193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
[ 194.021892] Call Trace:
[ 194.027422] <TASK>
[ 194.072861] tcp_gso_segment+0x107/0x540
[ 194.082031] inet_gso_segment+0x15c/0x3d0
[ 194.090783] skb_mac_gso_segment+0x9f/0x110
[ 194.095016] __skb_gso_segment+0xc1/0x190
[ 194.103131] netem_enqueue+0x290/0xb10 [sch_netem]
[ 194.107071] dev_qdisc_enqueue+0x16/0x70
[ 194.110884] __dev_queue_xmit+0x63b/0xb30
[ 194.121670] bond_start_xmit+0x159/0x380 [bonding]
[ 194.128506] dev_hard_start_xmit+0xc3/0x1e0
[ 194.131787] __dev_queue_xmit+0x8a0/0xb30
[ 194.138225] macvlan_start_xmit+0x4f/0x100 [macvlan]
[ 194.141477] dev_hard_start_xmit+0xc3/0x1e0
[ 194.144622] sch_direct_xmit+0xe3/0x280
[ 194.147748] __dev_queue_xmit+0x54a/0xb30
[ 194.154131] tap_get_user+0x2a8/0x9c0 [tap]
[ 194.157358] tap_sendmsg+0x52/0x8e0 [tap]
[ 194.167049] handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
[ 194.173631] handle_tx+0xcd/0xe0 [vhost_net]
[ 194.176959] vhost_worker+0x76/0xb0 [vhost]
[ 194.183667] kthread+0x118/0x140
[ 194.190358] ret_from_fork+0x1f/0x30
[ 194.193670] </TASK>
In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
local variable in skb_segment() stale. This resulted in the code hitting
i >= nrfrags prematurely and trying to move to next frag_skb using
list_skb pointer, which was NULL, and caused kernel panic. Move the call
to zero copy functions before using frags and nr_frags.
Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
Reported-by: Amit Goyal <agoyal@purestorage.com>
Cc: stable@vger.kernel.org
---
net/core/skbuff.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a298992060e6..18a33dc2d6af 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
@@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
@@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
SKBFL_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
--
2.17.1
On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
<mkhalfella@purestorage.com> wrote:
>
> Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
> once per nskb") added the call to zero copy functions in skb_segment().
> The change introduced a bug in skb_segment() because skb_orphan_frags()
> may possibly change the number of fragments or allocate new fragments
> altogether leaving nrfrags and frag to point to the old values. This can
> cause a panic with stacktrace like the one below.
>
> [ 193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
> [ 193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G O 5.15.123+ #26
> [ 193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
> [ 194.021892] Call Trace:
> [ 194.027422] <TASK>
> [ 194.072861] tcp_gso_segment+0x107/0x540
> [ 194.082031] inet_gso_segment+0x15c/0x3d0
> [ 194.090783] skb_mac_gso_segment+0x9f/0x110
> [ 194.095016] __skb_gso_segment+0xc1/0x190
> [ 194.103131] netem_enqueue+0x290/0xb10 [sch_netem]
> [ 194.107071] dev_qdisc_enqueue+0x16/0x70
> [ 194.110884] __dev_queue_xmit+0x63b/0xb30
> [ 194.121670] bond_start_xmit+0x159/0x380 [bonding]
> [ 194.128506] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.131787] __dev_queue_xmit+0x8a0/0xb30
> [ 194.138225] macvlan_start_xmit+0x4f/0x100 [macvlan]
> [ 194.141477] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.144622] sch_direct_xmit+0xe3/0x280
> [ 194.147748] __dev_queue_xmit+0x54a/0xb30
> [ 194.154131] tap_get_user+0x2a8/0x9c0 [tap]
> [ 194.157358] tap_sendmsg+0x52/0x8e0 [tap]
> [ 194.167049] handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
> [ 194.173631] handle_tx+0xcd/0xe0 [vhost_net]
> [ 194.176959] vhost_worker+0x76/0xb0 [vhost]
> [ 194.183667] kthread+0x118/0x140
> [ 194.190358] ret_from_fork+0x1f/0x30
> [ 194.193670] </TASK>
>
> In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
> local variable in skb_segment() stale. This resulted in the code hitting
> i >= nrfrags prematurely and trying to move to next frag_skb using
> list_skb pointer, which was NULL, and caused kernel panic. Move the call
> to zero copy functions before using frags and nr_frags.
>
> Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> Reported-by: Amit Goyal <agoyal@purestorage.com>
> Cc: stable@vger.kernel.org
> ---
> net/core/skbuff.c | 34 ++++++++++++++++++++--------------
> 1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index a298992060e6..18a33dc2d6af 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> struct sk_buff *segs = NULL;
> struct sk_buff *tail = NULL;
> struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
> - skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> unsigned int mss = skb_shinfo(head_skb)->gso_size;
> unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> - struct sk_buff *frag_skb = head_skb;
> unsigned int offset = doffset;
> unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> unsigned int partial_segs = 0;
> unsigned int headroom;
> unsigned int len = head_skb->len;
> + struct sk_buff *frag_skb;
> + skb_frag_t *frag;
> __be16 proto;
> bool csum, sg;
> - int nfrags = skb_shinfo(head_skb)->nr_frags;
> int err = -ENOMEM;
> int i = 0;
> - int pos;
> + int nfrags, pos;
>
> if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
> mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
> @@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> headroom = skb_headroom(head_skb);
> pos = skb_headlen(head_skb);
>
> + if (skb_orphan_frags(head_skb, GFP_ATOMIC))
> + return ERR_PTR(-ENOMEM);
> +
> + nfrags = skb_shinfo(head_skb)->nr_frags;
> + frag = skb_shinfo(head_skb)->frags;
> + frag_skb = head_skb;
> +
> do {
> struct sk_buff *nskb;
> skb_frag_t *nskb_frag;
> @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> (skb_headlen(list_skb) == len || sg)) {
> BUG_ON(skb_headlen(list_skb) > len);
>
> + nskb = skb_clone(list_skb, GFP_ATOMIC);
> + if (unlikely(!nskb))
> + goto err;
> +
This patch is quite complex to review, so I am asking if this part was
really needed ?
<1> : You moved here <2> and <3>
If this is not strictly needed, please keep the code as is to ease
code review...
> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> frag++;
> }
>
> - nskb = skb_clone(list_skb, GFP_ATOMIC);
<2>
> list_skb = list_skb->next;
>
> - if (unlikely(!nskb))
> - goto err;
> -
<3>
> if (unlikely(pskb_trim(nskb, len))) {
> kfree_skb(nskb);
> goto err;
> @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> SKBFL_SHARED_FRAG;
>
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> + if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
Why using list_skb here instead of frag_skb ?
Again, I have to look at the whole thing to understand why you did this.
> goto err;
>
> while (pos < offset + len) {
> if (i >= nfrags) {
> + if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
> + skb_zerocopy_clone(nskb, list_skb,
> + GFP_ATOMIC))
> + goto err;
> +
This part is fine.
> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> i--;
> frag--;
> }
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb,
> - GFP_ATOMIC))
> - goto err;
>
> list_skb = list_skb->next;
> }
> --
> 2.17.1
>
Thanks.
On 2023-08-31 08:58:51 +0200, Eric Dumazet wrote:
> On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
> <mkhalfella@purestorage.com> wrote:
> > do {
> > struct sk_buff *nskb;
> > skb_frag_t *nskb_frag;
> > @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > (skb_headlen(list_skb) == len || sg)) {
> > BUG_ON(skb_headlen(list_skb) > len);
> >
> > + nskb = skb_clone(list_skb, GFP_ATOMIC);
> > + if (unlikely(!nskb))
> > + goto err;
> > +
>
> This patch is quite complex to review, so I am asking if this part was
> really needed ?
Unfortunately the patch is complex because I try to avoid calling
skb_orphan_frags() in the middle of processing these frags. Otherwise
it would be much harder to implement because as reallocated frags do not
map 1:1 with existing frags as Willem mentioned.
> <1> : You moved here <2> and <3>
<2> was moved here because skb_clone() calls skb_orphan_frags(). By
moving this up we do not need to call skb_orphan_frags() for list_skb
and we can start to use nr_frags and frags without worrying their value
is going to change.
<3> was moved here because <2> was moved here. Fail fast if we can not
clone list_skb.
>
> If this is not strictly needed, please keep the code as is to ease
> code review...
>
> > i = 0;
> > nfrags = skb_shinfo(list_skb)->nr_frags;
> > frag = skb_shinfo(list_skb)->frags;
> > @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > frag++;
> > }
> >
> > - nskb = skb_clone(list_skb, GFP_ATOMIC);
>
> <2>
>
> > list_skb = list_skb->next;
> >
> > - if (unlikely(!nskb))
> > - goto err;
> > -
>
> <3>
>
> > if (unlikely(pskb_trim(nskb, len))) {
> > kfree_skb(nskb);
> > goto err;
> > @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> > SKBFL_SHARED_FRAG;
> >
> > - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> > - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> > + if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
>
> Why using list_skb here instead of frag_skb ?
> Again, I have to look at the whole thing to understand why you did this.
Oops, this is a mistake. It should be frag_skb. Will fix it run the test
one more time and post v3.
On Thu, Aug 31, 2023 at 9:30 AM Mohamed Khalfella
<mkhalfella@purestorage.com> wrote:
>
> On 2023-08-31 08:58:51 +0200, Eric Dumazet wrote:
> > On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
> > <mkhalfella@purestorage.com> wrote:
> > > do {
> > > struct sk_buff *nskb;
> > > skb_frag_t *nskb_frag;
> > > @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > > (skb_headlen(list_skb) == len || sg)) {
> > > BUG_ON(skb_headlen(list_skb) > len);
> > >
> > > + nskb = skb_clone(list_skb, GFP_ATOMIC);
> > > + if (unlikely(!nskb))
> > > + goto err;
> > > +
> >
> > This patch is quite complex to review, so I am asking if this part was
> > really needed ?
>
> Unfortunately the patch is complex because I try to avoid calling
> skb_orphan_frags() in the middle of processing these frags. Otherwise
> it would be much harder to implement because as reallocated frags do not
> map 1:1 with existing frags as Willem mentioned.
>
> > <1> : You moved here <2> and <3>
>
> <2> was moved here because skb_clone() calls skb_orphan_frags(). By
Oh right, I think we should amend skb_clone() documentation, it is
slightly wrong.
( I will take care of this change)
> moving this up we do not need to call skb_orphan_frags() for list_skb
> and we can start to use nr_frags and frags without worrying their value
> is going to change.
>
> <3> was moved here because <2> was moved here. Fail fast if we can not
> clone list_skb.
>
> >
> > If this is not strictly needed, please keep the code as is to ease
> > code review...
> >
> > > i = 0;
> > > nfrags = skb_shinfo(list_skb)->nr_frags;
> > > frag = skb_shinfo(list_skb)->frags;
> > > @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > > frag++;
> > > }
> > >
> > > - nskb = skb_clone(list_skb, GFP_ATOMIC);
> >
> > <2>
> >
> > > list_skb = list_skb->next;
> > >
> > > - if (unlikely(!nskb))
> > > - goto err;
> > > -
> >
> > <3>
> >
> > > if (unlikely(pskb_trim(nskb, len))) {
> > > kfree_skb(nskb);
> > > goto err;
> > > @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > > skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> > > SKBFL_SHARED_FRAG;
> > >
> > > - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> > > - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> > > + if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
> >
> > Why using list_skb here instead of frag_skb ?
> > Again, I have to look at the whole thing to understand why you did this.
>
> Oops, this is a mistake. It should be frag_skb. Will fix it run the test
> one more time and post v3.
© 2016 - 2026 Red Hat, Inc.