[PATCH net-next 1/3] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue

Justin Iurman posted 3 patches 1 month ago
There is a newer version of this series
[PATCH net-next 1/3] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue
Posted by Justin Iurman 1 month ago
This patch mitigates the two-reallocations issue with ioam6_iptunnel by
providing the dst_entry (in the cache) to the first call to
skb_cow_head(). As a result, the very first iteration would still
trigger two reallocations (i.e., empty cache), while next iterations
would only trigger a single reallocation.

Performance tests before/after applying this patch, which clearly shows
the improvement:
- inline mode:
  - before: https://ibb.co/LhQ8V63
  - after: https://ibb.co/x5YT2bS
- encap mode:
  - before: https://ibb.co/3Cjm5m0
  - after: https://ibb.co/TwpsxTC
- encap mode with tunsrc:
  - before: https://ibb.co/Gpy9QPg
  - after: https://ibb.co/PW1bZFT

This patch also fixes an incorrect behavior: after the insertion, the
second call to skb_cow_head() makes sure that the dev has enough
headroom in the skb for layer 2 and stuff. In that case, the "old"
dst_entry was used, which is now fixed. After discussing with Paolo, it
appears that both patches can be merged into a single one -this one-
(for the sake of readability) and target net-next.

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
---
 net/ipv6/ioam6_iptunnel.c | 84 ++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 41 deletions(-)

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index beb6b4cfc551..da79055fc02a 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -255,14 +255,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
 }
 
 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
-			   struct ioam6_lwt_encap *tuninfo)
+			   struct ioam6_lwt_encap *tuninfo,
+			   struct dst_entry *dst)
 {
 	struct ipv6hdr *oldhdr, *hdr;
 	int hdrlen, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 
-	err = skb_cow_head(skb, hdrlen + skb->mac_len);
+	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
+					       : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -293,16 +295,17 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 			  struct ioam6_lwt_encap *tuninfo,
 			  bool has_tunsrc,
 			  struct in6_addr *tunsrc,
-			  struct in6_addr *tundst)
+			  struct in6_addr *tundst,
+			  struct dst_entry *dst)
 {
-	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr, *inner_hdr;
 	int hdrlen, len, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 	len = sizeof(*hdr) + hdrlen;
 
-	err = skb_cow_head(skb, len + skb->mac_len);
+	err = skb_cow_head(skb, len + (!dst ? skb->mac_len
+					    : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -326,7 +329,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 	if (has_tunsrc)
 		memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
 	else
-		ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+		ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
 				   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
 
 	skb_postpush_rcsum(skb, hdr, len);
@@ -336,7 +339,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 
 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb_dst(skb);
+	struct dst_entry *dst, *orig_dst = skb_dst(skb);
 	struct in6_addr orig_daddr;
 	struct ioam6_lwt *ilwt;
 	int err = -EINVAL;
@@ -345,7 +348,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ilwt = ioam6_lwt_state(dst->lwtstate);
+	ilwt = ioam6_lwt_state(orig_dst->lwtstate);
 
 	/* Check for insertion frequency (i.e., "k over n" insertions) */
 	pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
@@ -354,6 +357,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	orig_daddr = ipv6_hdr(skb)->daddr;
 
+	local_bh_disable();
+	dst = dst_cache_get(&ilwt->cache);
+	local_bh_enable();
+
 	switch (ilwt->mode) {
 	case IOAM6_IPTUNNEL_MODE_INLINE:
 do_inline:
@@ -361,7 +368,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
 			goto out;
 
-		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -371,7 +378,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		/* Encapsulation (ip6ip6) */
 		err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
 				     ilwt->has_tunsrc, &ilwt->tunsrc,
-				     &ilwt->tundst);
+				     &ilwt->tundst, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -389,45 +396,40 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
-	if (unlikely(err))
-		goto drop;
+	if (unlikely(!dst)) {
+		struct ipv6hdr *hdr = ipv6_hdr(skb);
+		struct flowi6 fl6;
+
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.daddr = hdr->daddr;
+		fl6.saddr = hdr->saddr;
+		fl6.flowlabel = ip6_flowinfo(hdr);
+		fl6.flowi6_mark = skb->mark;
+		fl6.flowi6_proto = hdr->nexthdr;
+
+		dst = ip6_route_output(net, NULL, &fl6);
+		if (dst->error) {
+			err = dst->error;
+			dst_release(dst);
+			goto drop;
+		}
 
-	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
 		local_bh_disable();
-		dst = dst_cache_get(&ilwt->cache);
+		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
 		local_bh_enable();
 
-		if (unlikely(!dst)) {
-			struct ipv6hdr *hdr = ipv6_hdr(skb);
-			struct flowi6 fl6;
-
-			memset(&fl6, 0, sizeof(fl6));
-			fl6.daddr = hdr->daddr;
-			fl6.saddr = hdr->saddr;
-			fl6.flowlabel = ip6_flowinfo(hdr);
-			fl6.flowi6_mark = skb->mark;
-			fl6.flowi6_proto = hdr->nexthdr;
-
-			dst = ip6_route_output(net, NULL, &fl6);
-			if (dst->error) {
-				err = dst->error;
-				dst_release(dst);
-				goto drop;
-			}
-
-			local_bh_disable();
-			dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
-			local_bh_enable();
-		}
+		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+		if (unlikely(err))
+			goto drop;
+	}
 
-		skb_dst_drop(skb);
-		skb_dst_set(skb, dst);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
 
+	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
 		return dst_output(net, sk, skb);
-	}
 out:
-	return dst->lwtstate->orig_output(net, sk, skb);
+	return orig_dst->lwtstate->orig_output(net, sk, skb);
 drop:
 	kfree_skb(skb);
 	return err;
-- 
2.34.1
Re: [PATCH net-next 1/3] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue
Posted by Alexander Lobakin 1 month ago
From: Justin Iurman <justin.iurman@uliege.be>
Date: Fri, 25 Oct 2024 15:37:25 +0200

> This patch mitigates the two-reallocations issue with ioam6_iptunnel by
> providing the dst_entry (in the cache) to the first call to
> skb_cow_head(). As a result, the very first iteration would still
> trigger two reallocations (i.e., empty cache), while next iterations
> would only trigger a single reallocation.

[...]

>  static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
> -			   struct ioam6_lwt_encap *tuninfo)
> +			   struct ioam6_lwt_encap *tuninfo,
> +			   struct dst_entry *dst)
>  {
>  	struct ipv6hdr *oldhdr, *hdr;
>  	int hdrlen, err;
>  
>  	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>  
> -	err = skb_cow_head(skb, hdrlen + skb->mac_len);
> +	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
> +					       : LL_RESERVED_SPACE(dst->dev)));

You use this pattern a lot throughout the series. I believe you should
make a static inline or a macro from it.

static inline u32 some_name(const *dst, const *skb)
{
	return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len;
}

BTW why do you check for `!dst`, not `dst`? Does changing this affects
performance?


>  	if (unlikely(err))
>  		return err;

Thanks,
Olek
Re: [PATCH net-next 1/3] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue
Posted by Justin Iurman 1 month ago
On 10/25/24 17:12, Alexander Lobakin wrote:
> From: Justin Iurman <justin.iurman@uliege.be>
> Date: Fri, 25 Oct 2024 15:37:25 +0200
> 
>> This patch mitigates the two-reallocations issue with ioam6_iptunnel by
>> providing the dst_entry (in the cache) to the first call to
>> skb_cow_head(). As a result, the very first iteration would still
>> trigger two reallocations (i.e., empty cache), while next iterations
>> would only trigger a single reallocation.
> 
> [...]
> 
>>   static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
>> -			   struct ioam6_lwt_encap *tuninfo)
>> +			   struct ioam6_lwt_encap *tuninfo,
>> +			   struct dst_entry *dst)
>>   {
>>   	struct ipv6hdr *oldhdr, *hdr;
>>   	int hdrlen, err;
>>   
>>   	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>>   
>> -	err = skb_cow_head(skb, hdrlen + skb->mac_len);
>> +	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
>> +					       : LL_RESERVED_SPACE(dst->dev)));
> 
> You use this pattern a lot throughout the series. I believe you should
> make a static inline or a macro from it.
> 
> static inline u32 some_name(const *dst, const *skb)
> {
> 	return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len;
> }
> 
> BTW why do you check for `!dst`, not `dst`? Does changing this affects
> performance?

Not at all, you're right... even the opposite actually. Regarding the 
static inline suggestion, it could be a good idea and may even look like 
this as an optimization:

static inline u32 dev_overhead(struct dst_entry *dst, struct sk_buff *skb)
{
	if (likely(dst))
		return LL_RESERVED_SPACE(dst->dev);

	return skb->mac_len;
}

The question is... where should it go then? A static inline function per 
file (i.e., ioam6_iptunnel.c, seg6_iptunnel.c, and rpl_iptunnel.c)? In 
that case, it would still be repeated 3 times. Or in a header file 
somewhere, to have it defined only once? If so, what location do you 
think would be best?
Re: [PATCH net-next 1/3] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue
Posted by Alexander Lobakin 3 weeks, 6 days ago
From: Justin Iurman <justin.iurman@uliege.be>
Date: Fri, 25 Oct 2024 23:06:36 +0200

> On 10/25/24 17:12, Alexander Lobakin wrote:
>> From: Justin Iurman <justin.iurman@uliege.be>
>> Date: Fri, 25 Oct 2024 15:37:25 +0200
>>
>>> This patch mitigates the two-reallocations issue with ioam6_iptunnel by
>>> providing the dst_entry (in the cache) to the first call to
>>> skb_cow_head(). As a result, the very first iteration would still
>>> trigger two reallocations (i.e., empty cache), while next iterations
>>> would only trigger a single reallocation.
>>
>> [...]
>>
>>>   static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
>>> -               struct ioam6_lwt_encap *tuninfo)
>>> +               struct ioam6_lwt_encap *tuninfo,
>>> +               struct dst_entry *dst)
>>>   {
>>>       struct ipv6hdr *oldhdr, *hdr;
>>>       int hdrlen, err;
>>>         hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>>>   -    err = skb_cow_head(skb, hdrlen + skb->mac_len);
>>> +    err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
>>> +                           : LL_RESERVED_SPACE(dst->dev)));
>>
>> You use this pattern a lot throughout the series. I believe you should
>> make a static inline or a macro from it.
>>
>> static inline u32 some_name(const *dst, const *skb)
>> {
>>     return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len;
>> }
>>
>> BTW why do you check for `!dst`, not `dst`? Does changing this affects
>> performance?
> 
> Not at all, you're right... even the opposite actually. Regarding the
> static inline suggestion, it could be a good idea and may even look like
> this as an optimization:
> 
> static inline u32 dev_overhead(struct dst_entry *dst, struct sk_buff *skb)
> {
>     if (likely(dst))
>         return LL_RESERVED_SPACE(dst->dev);
> 
>     return skb->mac_len;
> }

Oh, nice!

> 
> The question is... where should it go then? A static inline function per
> file (i.e., ioam6_iptunnel.c, seg6_iptunnel.c, and rpl_iptunnel.c)? In
> that case, it would still be repeated 3 times. Or in a header file
> somewhere, to have it defined only once? If so, what location do you
> think would be best?

100% should be in a header file. Can't suggest any since I don't usually
work with tunnels and ain't deep into its header structure.

Thanks,
Olek