This patch mitigates the two-reallocations issue with ioam6_iptunnel by
providing the dst_entry (in the cache) to the first call to
skb_cow_head(). As a result, the very first iteration would still
trigger two reallocations (i.e., empty cache), while next iterations
would only trigger a single reallocation.
Performance tests before/after applying this patch, which clearly shows
the improvement:
- inline mode:
- before: https://ibb.co/LhQ8V63
- after: https://ibb.co/x5YT2bS
- encap mode:
- before: https://ibb.co/3Cjm5m0
- after: https://ibb.co/TwpsxTC
- encap mode with tunsrc:
- before: https://ibb.co/Gpy9QPg
- after: https://ibb.co/PW1bZFT
This patch also fixes an incorrect behavior: after the insertion, the
second call to skb_cow_head() makes sure that the dev has enough
headroom in the skb for layer 2 and stuff. In that case, the "old"
dst_entry was used, which is now fixed. After discussing with Paolo, it
appears that both patches can be merged into a single one -this one-
(for the sake of readability) and target net-next.
Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
---
net/ipv6/ioam6_iptunnel.c | 84 ++++++++++++++++++++-------------------
1 file changed, 43 insertions(+), 41 deletions(-)
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index beb6b4cfc551..da79055fc02a 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -255,14 +255,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
+ : LL_RESERVED_SPACE(dst->dev)));
if (unlikely(err))
return err;
@@ -293,16 +295,17 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
bool has_tunsrc,
struct in6_addr *tunsrc,
- struct in6_addr *tundst)
+ struct in6_addr *tundst,
+ struct dst_entry *dst)
{
- struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
int hdrlen, len, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + (!dst ? skb->mac_len
+ : LL_RESERVED_SPACE(dst->dev)));
if (unlikely(err))
return err;
@@ -326,7 +329,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
if (has_tunsrc)
memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
else
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -336,7 +339,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
+ struct dst_entry *dst, *orig_dst = skb_dst(skb);
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
@@ -345,7 +348,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ilwt = ioam6_lwt_state(dst->lwtstate);
+ ilwt = ioam6_lwt_state(orig_dst->lwtstate);
/* Check for insertion frequency (i.e., "k over n" insertions) */
pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
@@ -354,6 +357,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
+
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
do_inline:
@@ -361,7 +368,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
if (unlikely(err))
goto drop;
@@ -371,7 +378,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst);
+ &ilwt->tundst, dst);
if (unlikely(err))
goto drop;
@@ -389,45 +396,40 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = dst->error;
+ dst_release(dst);
+ goto drop;
+ }
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
local_bh_disable();
- dst = dst_cache_get(&ilwt->cache);
+ dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
local_bh_enable();
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
-
- local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- local_bh_enable();
- }
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+ if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
return dst_output(net, sk, skb);
- }
out:
- return dst->lwtstate->orig_output(net, sk, skb);
+ return orig_dst->lwtstate->orig_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
--
2.34.1
From: Justin Iurman <justin.iurman@uliege.be> Date: Fri, 25 Oct 2024 15:37:25 +0200 > This patch mitigates the two-reallocations issue with ioam6_iptunnel by > providing the dst_entry (in the cache) to the first call to > skb_cow_head(). As a result, the very first iteration would still > trigger two reallocations (i.e., empty cache), while next iterations > would only trigger a single reallocation. [...] > static int ioam6_do_inline(struct net *net, struct sk_buff *skb, > - struct ioam6_lwt_encap *tuninfo) > + struct ioam6_lwt_encap *tuninfo, > + struct dst_entry *dst) > { > struct ipv6hdr *oldhdr, *hdr; > int hdrlen, err; > > hdrlen = (tuninfo->eh.hdrlen + 1) << 3; > > - err = skb_cow_head(skb, hdrlen + skb->mac_len); > + err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len > + : LL_RESERVED_SPACE(dst->dev))); You use this pattern a lot throughout the series. I believe you should make a static inline or a macro from it. static inline u32 some_name(const *dst, const *skb) { return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len; } BTW why do you check for `!dst`, not `dst`? Does changing this affects performance? > if (unlikely(err)) > return err; Thanks, Olek
On 10/25/24 17:12, Alexander Lobakin wrote: > From: Justin Iurman <justin.iurman@uliege.be> > Date: Fri, 25 Oct 2024 15:37:25 +0200 > >> This patch mitigates the two-reallocations issue with ioam6_iptunnel by >> providing the dst_entry (in the cache) to the first call to >> skb_cow_head(). As a result, the very first iteration would still >> trigger two reallocations (i.e., empty cache), while next iterations >> would only trigger a single reallocation. > > [...] > >> static int ioam6_do_inline(struct net *net, struct sk_buff *skb, >> - struct ioam6_lwt_encap *tuninfo) >> + struct ioam6_lwt_encap *tuninfo, >> + struct dst_entry *dst) >> { >> struct ipv6hdr *oldhdr, *hdr; >> int hdrlen, err; >> >> hdrlen = (tuninfo->eh.hdrlen + 1) << 3; >> >> - err = skb_cow_head(skb, hdrlen + skb->mac_len); >> + err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len >> + : LL_RESERVED_SPACE(dst->dev))); > > You use this pattern a lot throughout the series. I believe you should > make a static inline or a macro from it. > > static inline u32 some_name(const *dst, const *skb) > { > return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len; > } > > BTW why do you check for `!dst`, not `dst`? Does changing this affects > performance? Not at all, you're right... even the opposite actually. Regarding the static inline suggestion, it could be a good idea and may even look like this as an optimization: static inline u32 dev_overhead(struct dst_entry *dst, struct sk_buff *skb) { if (likely(dst)) return LL_RESERVED_SPACE(dst->dev); return skb->mac_len; } The question is... where should it go then? A static inline function per file (i.e., ioam6_iptunnel.c, seg6_iptunnel.c, and rpl_iptunnel.c)? In that case, it would still be repeated 3 times. Or in a header file somewhere, to have it defined only once? If so, what location do you think would be best?
From: Justin Iurman <justin.iurman@uliege.be> Date: Fri, 25 Oct 2024 23:06:36 +0200 > On 10/25/24 17:12, Alexander Lobakin wrote: >> From: Justin Iurman <justin.iurman@uliege.be> >> Date: Fri, 25 Oct 2024 15:37:25 +0200 >> >>> This patch mitigates the two-reallocations issue with ioam6_iptunnel by >>> providing the dst_entry (in the cache) to the first call to >>> skb_cow_head(). As a result, the very first iteration would still >>> trigger two reallocations (i.e., empty cache), while next iterations >>> would only trigger a single reallocation. >> >> [...] >> >>> static int ioam6_do_inline(struct net *net, struct sk_buff *skb, >>> - struct ioam6_lwt_encap *tuninfo) >>> + struct ioam6_lwt_encap *tuninfo, >>> + struct dst_entry *dst) >>> { >>> struct ipv6hdr *oldhdr, *hdr; >>> int hdrlen, err; >>> hdrlen = (tuninfo->eh.hdrlen + 1) << 3; >>> - err = skb_cow_head(skb, hdrlen + skb->mac_len); >>> + err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len >>> + : LL_RESERVED_SPACE(dst->dev))); >> >> You use this pattern a lot throughout the series. I believe you should >> make a static inline or a macro from it. >> >> static inline u32 some_name(const *dst, const *skb) >> { >> return dst ? LL_RESERVED_SPACE(dst->dev) : skb->mac_len; >> } >> >> BTW why do you check for `!dst`, not `dst`? Does changing this affects >> performance? > > Not at all, you're right... even the opposite actually. Regarding the > static inline suggestion, it could be a good idea and may even look like > this as an optimization: > > static inline u32 dev_overhead(struct dst_entry *dst, struct sk_buff *skb) > { > if (likely(dst)) > return LL_RESERVED_SPACE(dst->dev); > > return skb->mac_len; > } Oh, nice! > > The question is... where should it go then? A static inline function per > file (i.e., ioam6_iptunnel.c, seg6_iptunnel.c, and rpl_iptunnel.c)? In > that case, it would still be repeated 3 times. Or in a header file > somewhere, to have it defined only once? If so, what location do you > think would be best? 100% should be in a header file. Can't suggest any since I don't usually work with tunnels and ain't deep into its header structure. Thanks, Olek
© 2016 - 2024 Red Hat, Inc.