[PATCH net 2/2] net: ioam6: mitigate the two reallocations problem

Justin Iurman posted 2 patches 1 year, 5 months ago
[PATCH net 2/2] net: ioam6: mitigate the two reallocations problem
Posted by Justin Iurman 1 year, 5 months ago
Get the cache _before_ adding bytes. This way, we provide the dst entry
to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid
two reallocations in some specific cases. We cannot do much when the dst
entry is empty (cache is empty, this is the first time): in that case,
we use skb->mac_len by default and two reallocations will happen in
those specific cases. However, it will only happen once, not every
single time.

Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
---
 net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index b08c13550144..e5a7e7472b71 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -220,14 +220,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
 }
 
 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
-			   struct ioam6_lwt_encap *tuninfo)
+			   struct ioam6_lwt_encap *tuninfo,
+			   struct dst_entry *dst)
 {
 	struct ipv6hdr *oldhdr, *hdr;
 	int hdrlen, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 
-	err = skb_cow_head(skb, hdrlen + skb->mac_len);
+	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
+					       : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -256,16 +258,17 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
 
 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 			  struct ioam6_lwt_encap *tuninfo,
-			  struct in6_addr *tundst)
+			  struct in6_addr *tundst,
+			  struct dst_entry *dst)
 {
-	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr, *inner_hdr;
 	int hdrlen, len, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 	len = sizeof(*hdr) + hdrlen;
 
-	err = skb_cow_head(skb, len + skb->mac_len);
+	err = skb_cow_head(skb, len + (!dst ? skb->mac_len
+					    : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -285,7 +288,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 	hdr->nexthdr = NEXTHDR_HOP;
 	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
 	hdr->daddr = *tundst;
-	ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+	ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
 			   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
 
 	skb_postpush_rcsum(skb, hdr, len);
@@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	orig_daddr = ipv6_hdr(skb)->daddr;
 
+	local_bh_disable();
+	dst = dst_cache_get(&ilwt->cache);
+	local_bh_enable();
+
 	switch (ilwt->mode) {
 	case IOAM6_IPTUNNEL_MODE_INLINE:
 do_inline:
@@ -320,7 +327,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
 			goto out;
 
-		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -328,7 +335,8 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	case IOAM6_IPTUNNEL_MODE_ENCAP:
 do_encap:
 		/* Encapsulation (ip6ip6) */
-		err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+		err = ioam6_do_encap(net, skb,
+				     &ilwt->tuninfo, &ilwt->tundst, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -346,10 +354,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	local_bh_disable();
-	dst = dst_cache_get(&ilwt->cache);
-	local_bh_enable();
-
 	if (unlikely(!dst)) {
 		struct ipv6hdr *hdr = ipv6_hdr(skb);
 		struct flowi6 fl6;
@@ -371,15 +375,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		local_bh_disable();
 		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
 		local_bh_enable();
+
+		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+		if (unlikely(err))
+			goto drop;
 	}
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 
-	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
-	if (unlikely(err))
-		goto drop;
-
 	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
 		return dst_output(net, sk, skb);
 out:
-- 
2.34.1
Re: [PATCH net 2/2] net: ioam6: mitigate the two reallocations problem
Posted by Paolo Abeni 1 year, 5 months ago
On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote:
> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>  
>  	orig_daddr = ipv6_hdr(skb)->daddr;
>  
> +	local_bh_disable();
> +	dst = dst_cache_get(&ilwt->cache);
> +	local_bh_enable();
> +
>  	switch (ilwt->mode) {
>  	case IOAM6_IPTUNNEL_MODE_INLINE:

I now see that the way you coded patch 1/2 makes this one easier.

Still I think it's quite doubtful to make the dst cache access
unconditional.

Given the above I suggest to replace the 2 patches with a single one
moving the whole dst_cache logic before the switch statement.

Also this does not address a functional issue, IMHO it's more a
performance improvement, could as well target net-next with no fixes
tag.

WRT seg6 and rpl tunnels, before any patch, I think we first need
confirmation the problem is present there, too.

Thanks,

Paolo
Re: [PATCH net 2/2] net: ioam6: mitigate the two reallocations problem
Posted by Justin Iurman 1 year, 5 months ago
On 7/4/24 11:23, Paolo Abeni wrote:
> On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote:
>> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>>   
>>   	orig_daddr = ipv6_hdr(skb)->daddr;
>>   
>> +	local_bh_disable();
>> +	dst = dst_cache_get(&ilwt->cache);
>> +	local_bh_enable();
>> +
>>   	switch (ilwt->mode) {
>>   	case IOAM6_IPTUNNEL_MODE_INLINE:
> 
> I now see that the way you coded patch 1/2 makes this one easier.

Hi Paolo,

Indeed. I originally had it as a single two-in-one patch, then I thought 
it would be clearer to split it up (looks like I was wrong, sorry).

> Still I think it's quite doubtful to make the dst cache access
> unconditional.

By unconditional, you mean to get the cache _before_ the switch, right? 
If so, that's indeed the only solution to provide it to the encap/inline 
function for the mitigation. However, I don't see it as a problem. 
Instead of having (a) call encap/fill function, then (b) get cache; 
you'd have (a) get cache, then (b) call encap/fill function. IMHO, it's 
the same. I'll re-run our measurements and compare them to our previous 
results in order to confirm getting the cache early does not impact 
performance. The only exception would be when skb_cow_head returns an 
error in encap/fill functions: in that case, getting the cache early 
would be a waste of time, but this situation suggests there is a problem 
already so it's probably fine.

> Given the above I suggest to replace the 2 patches with a single one
> moving the whole dst_cache logic before the switch statement.

Will do!

> Also this does not address a functional issue, IMHO it's more a
> performance improvement, could as well target net-next with no fixes
> tag.

Hmmm, it's indeed OK to target net-next for patch #2 since it could be 
considered as an improvement (not really a functional issue per se). 
However, I'm not sure for patch #1. Wouldn't the kernel crash if not 
enough headroom was allocated (assuming no check is done before writing 
in the driver)?

> WRT seg6 and rpl tunnels, before any patch, I think we first need
> confirmation the problem is present there, too.

Ack. I'll try to run some tests to check that.

Thanks,
Justin

> Thanks,
> 
> Paolo
>
Re: [PATCH net 2/2] net: ioam6: mitigate the two reallocations problem
Posted by Justin Iurman 1 year, 5 months ago
On 7/2/24 19:44, Justin Iurman wrote:
> Get the cache _before_ adding bytes. This way, we provide the dst entry
> to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid
> two reallocations in some specific cases. We cannot do much when the dst
> entry is empty (cache is empty, this is the first time): in that case,
> we use skb->mac_len by default and two reallocations will happen in
> those specific cases. However, it will only happen once, not every
> single time.

This fix could also be applied to seg6 and rpl. Not sure if the problem 
would show up though (I did some quick computations, seems unlikely), 
but still... would probably be interesting to have it there too, just in 
case. Any opinion?

> Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
> Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
> ---
>   net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++----------------
>   1 file changed, 20 insertions(+), 16 deletions(-)
> 
> diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
> index b08c13550144..e5a7e7472b71 100644
> --- a/net/ipv6/ioam6_iptunnel.c
> +++ b/net/ipv6/ioam6_iptunnel.c
> @@ -220,14 +220,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
>   }
>   
>   static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
> -			   struct ioam6_lwt_encap *tuninfo)
> +			   struct ioam6_lwt_encap *tuninfo,
> +			   struct dst_entry *dst)
>   {
>   	struct ipv6hdr *oldhdr, *hdr;
>   	int hdrlen, err;
>   
>   	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>   
> -	err = skb_cow_head(skb, hdrlen + skb->mac_len);
> +	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
> +					       : LL_RESERVED_SPACE(dst->dev)));
>   	if (unlikely(err))
>   		return err;
>   
> @@ -256,16 +258,17 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
>   
>   static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
>   			  struct ioam6_lwt_encap *tuninfo,
> -			  struct in6_addr *tundst)
> +			  struct in6_addr *tundst,
> +			  struct dst_entry *dst)
>   {
> -	struct dst_entry *dst = skb_dst(skb);
>   	struct ipv6hdr *hdr, *inner_hdr;
>   	int hdrlen, len, err;
>   
>   	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>   	len = sizeof(*hdr) + hdrlen;
>   
> -	err = skb_cow_head(skb, len + skb->mac_len);
> +	err = skb_cow_head(skb, len + (!dst ? skb->mac_len
> +					    : LL_RESERVED_SPACE(dst->dev)));
>   	if (unlikely(err))
>   		return err;
>   
> @@ -285,7 +288,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
>   	hdr->nexthdr = NEXTHDR_HOP;
>   	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
>   	hdr->daddr = *tundst;
> -	ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
> +	ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
>   			   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
>   
>   	skb_postpush_rcsum(skb, hdr, len);
> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   
>   	orig_daddr = ipv6_hdr(skb)->daddr;
>   
> +	local_bh_disable();
> +	dst = dst_cache_get(&ilwt->cache);
> +	local_bh_enable();
> +
>   	switch (ilwt->mode) {
>   	case IOAM6_IPTUNNEL_MODE_INLINE:
>   do_inline:
> @@ -320,7 +327,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
>   			goto out;
>   
> -		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
> +		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
>   		if (unlikely(err))
>   			goto drop;
>   
> @@ -328,7 +335,8 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   	case IOAM6_IPTUNNEL_MODE_ENCAP:
>   do_encap:
>   		/* Encapsulation (ip6ip6) */
> -		err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
> +		err = ioam6_do_encap(net, skb,
> +				     &ilwt->tuninfo, &ilwt->tundst, dst);
>   		if (unlikely(err))
>   			goto drop;
>   
> @@ -346,10 +354,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		goto drop;
>   	}
>   
> -	local_bh_disable();
> -	dst = dst_cache_get(&ilwt->cache);
> -	local_bh_enable();
> -
>   	if (unlikely(!dst)) {
>   		struct ipv6hdr *hdr = ipv6_hdr(skb);
>   		struct flowi6 fl6;
> @@ -371,15 +375,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		local_bh_disable();
>   		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
>   		local_bh_enable();
> +
> +		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> +		if (unlikely(err))
> +			goto drop;
>   	}
>   
>   	skb_dst_drop(skb);
>   	skb_dst_set(skb, dst);
>   
> -	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> -	if (unlikely(err))
> -		goto drop;
> -
>   	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
>   		return dst_output(net, sk, skb);
>   out: