Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can
be mangled. Outer IDs can always be mangled.
Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing
both inner and outer IDs to be mangled. In the future, we could add
NETIF_F_TSO_MANGLEID_INNER to provide more granular control to
drivers.
This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly.
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 ++++++--
drivers/net/ethernet/sfc/ef100_tx.c | 12 +++++++-----
include/linux/netdevice.h | 9 +++++++--
include/linux/skbuff.h | 6 +++++-
net/core/dev.c | 7 +++----
net/ipv4/af_inet.c | 13 ++++++-------
net/ipv4/tcp_offload.c | 5 +----
7 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b8c609d91d11..505c4ce7cef8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1289,8 +1289,12 @@ static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *
tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
ipv4->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
- if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+ if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) {
+ bool encap = rq->hw_gro_data->fk.control.flags & FLOW_DIS_ENCAPSULATION;
+
+ skb_shinfo(skb)->gso_type |= encap ?
+ SKB_GSO_TCP_FIXEDID_INNER : SKB_GSO_TCP_FIXEDID;
+ }
skb->csum_start = (unsigned char *)tcp - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
index e6b6be549581..4efd22b44986 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -189,7 +189,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
{
bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
unsigned int len, ip_offset, tcp_offset, payload_segs;
- u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
+ u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
+ u32 mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
unsigned int outer_ip_offset, outer_l4_offset;
u16 vlan_tci = skb_vlan_tag_get(skb);
u32 mss = skb_shinfo(skb)->gso_size;
@@ -201,7 +202,9 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
u32 paylen;
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID)
- mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP;
+ mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER)
+ mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_NO_OP;
if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX)
vlan_enable = skb_vlan_tag_present(skb);
@@ -239,14 +242,13 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
ESF_GZ_TX_TSO_CSO_INNER_L4, 1,
ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1,
ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1,
- ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid,
+ ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid_inner,
ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1,
ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1,
ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1,
ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial,
ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial,
- ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid :
- ESE_GZ_TX_DESC_IP4_ID_NO_OP,
+ ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer,
ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable,
ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci
);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e5de4b0a433..f47d4d67bce9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5287,13 +5287,18 @@ void skb_warn_bad_offload(const struct sk_buff *skb);
static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{
- netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT;
+ netdev_features_t feature;
+
+ if (gso_type & (SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_FIXEDID_INNER))
+ gso_type |= __SKB_GSO_TCP_FIXEDID;
+
+ feature = ((netdev_features_t)gso_type << NETIF_F_GSO_SHIFT) & NETIF_F_GSO_MASK;
/* check flags correspondence */
BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
- BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(__SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14b923ddb6df..9e4540d379ba 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -674,7 +674,7 @@ enum {
/* This indicates the tcp segment has CWR set. */
SKB_GSO_TCP_ECN = 1 << 2,
- SKB_GSO_TCP_FIXEDID = 1 << 3,
+ __SKB_GSO_TCP_FIXEDID = 1 << 3,
SKB_GSO_TCPV6 = 1 << 4,
@@ -707,6 +707,10 @@ enum {
SKB_GSO_FRAGLIST = 1 << 18,
SKB_GSO_TCP_ACCECN = 1 << 19,
+
+ /* These don't correspond with netdev features. */
+ SKB_GSO_TCP_FIXEDID = 1 << 30,
+ SKB_GSO_TCP_FIXEDID_INNER = 1 << 31,
};
#if BITS_PER_LONG > 32
diff --git a/net/core/dev.c b/net/core/dev.c
index 68dc47d7e700..9941c39b5970 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
* IPv4 header has the potential to be fragmented.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
- struct iphdr *iph = skb->encapsulation ?
- inner_ip_hdr(skb) : ip_hdr(skb);
-
- if (!(iph->frag_off & htons(IP_DF)))
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) ||
+ (skb->encapsulation &&
+ !(inner_ip_hdr(skb)->frag_off & htons(IP_DF))))
features &= ~NETIF_F_TSO_MANGLEID;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76e38092cd8a..fc7a6955fa0a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1393,14 +1393,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
- if (!skb->encapsulation || encap) {
- udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
- fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
+ /* fixed ID is invalid if DF bit is not set */
+ fixedid = !!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCP_FIXEDID << encap));
+ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
+ goto out;
- /* fixed ID is invalid if DF bit is not set */
- if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
- goto out;
- }
+ if (!skb->encapsulation || encap)
+ udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 56817ef12ad2..be5c2294610e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -472,7 +472,6 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
- bool is_fixedid;
if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
@@ -486,10 +485,8 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- is_fixedid = (NAPI_GRO_CB(skb)->ip_fixedid >> skb->encapsulation) & 1;
-
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 |
- (is_fixedid * SKB_GSO_TCP_FIXEDID);
+ (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID);
tcp_gro_complete(skb);
return 0;
--
2.36.1
On 8/21/25 9:30 AM, Richard Gobert wrote: > diff --git a/net/core/dev.c b/net/core/dev.c > index 68dc47d7e700..9941c39b5970 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, > * IPv4 header has the potential to be fragmented. > */ > if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { > - struct iphdr *iph = skb->encapsulation ? > - inner_ip_hdr(skb) : ip_hdr(skb); > - > - if (!(iph->frag_off & htons(IP_DF))) > + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || > + (skb->encapsulation && > + !(inner_ip_hdr(skb)->frag_off & htons(IP_DF)))) > features &= ~NETIF_F_TSO_MANGLEID; FWIW, I think the above is the problematic part causing GSO PARTIAL issues. By default UDP tunnels do not set the DF bit, and most/all devices implementing GSO_PARTIAL clear TSO for encapsulated packet when MANGLEID is not available. I think the following should workaround the problem (assuming my email client did not corrupt the diff), but I also fear this change will cause very visible regressions in existing setups. Note that the current status is incorrect - GSO partial devices are mangling the outer IP ID for encapsulated packets even when the outer header IP DF is not set. /P --- diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3370827409aa..b0c71a0d8028 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -214,8 +214,8 @@ def main() -> None: # name, v4/v6 ethtool_feature tun:(type, partial, args) ("", "4", "tx-tcp-segmentation", None), ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), + ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum df set")), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum df set")), ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), )
Paolo Abeni wrote: > On 8/21/25 9:30 AM, Richard Gobert wrote: >> diff --git a/net/core/dev.c b/net/core/dev.c >> index 68dc47d7e700..9941c39b5970 100644 >> --- a/net/core/dev.c >> +++ b/net/core/dev.c >> @@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, >> * IPv4 header has the potential to be fragmented. >> */ >> if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { >> - struct iphdr *iph = skb->encapsulation ? >> - inner_ip_hdr(skb) : ip_hdr(skb); >> - >> - if (!(iph->frag_off & htons(IP_DF))) >> + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || >> + (skb->encapsulation && >> + !(inner_ip_hdr(skb)->frag_off & htons(IP_DF)))) >> features &= ~NETIF_F_TSO_MANGLEID; > > FWIW, I think the above is the problematic part causing GSO PARTIAL issues. > > By default UDP tunnels do not set the DF bit, and most/all devices > implementing GSO_PARTIAL clear TSO for encapsulated packet when MANGLEID > is not available. > > I think the following should workaround the problem (assuming my email > client did not corrupt the diff), but I also fear this change will cause > very visible regressions in existing setups. > Thanks for the thorough review! To solve this issue, we can decide that MANGLEID cannot cause incrementing IDs to become fixed for outer headers of encapsulated packets (which is the current behavior), then just revert this diff. I'll update the documentation in segmentation-offloads.rst to reflect this. Do you think that would be a good solution? > Note that the current status is incorrect - GSO partial devices are > mangling the outer IP ID for encapsulated packets even when the outer > header IP DF is not set. > > /P WDYM? Currently, when the DF-bit isn't set, it means that the IDs must be incrementing. Otherwise, the packets wouldn't have been merged by GRO. GSO partial (and also regular GSO/TSO) generate incrementing IDs, so the IDs cannot be mangled. With my patch, if the IDs were originally fixed, regardless of the DF-bit, TSO/GSO partial will not occur unless MANGLEID is enabled. > --- > diff --git a/tools/testing/selftests/drivers/net/hw/tso.py > b/tools/testing/selftests/drivers/net/hw/tso.py > index 3370827409aa..b0c71a0d8028 100755 > --- a/tools/testing/selftests/drivers/net/hw/tso.py > +++ b/tools/testing/selftests/drivers/net/hw/tso.py > @@ -214,8 +214,8 @@ def main() -> None: > # name, v4/v6 ethtool_feature > tun:(type, partial, args) > ("", "4", "tx-tcp-segmentation", None), > ("", "6", "tx-tcp6-segmentation", None), > - ("vxlan", "", "tx-udp_tnl-segmentation", > ("vxlan", True, "id 100 dstport 4789 noudpcsum")), > - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", > ("vxlan", False, "id 100 dstport 4789 udpcsum")), > + ("vxlan", "", "tx-udp_tnl-segmentation", > ("vxlan", True, "id 100 dstport 4789 noudpcsum df set")), > + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", > ("vxlan", False, "id 100 dstport 4789 udpcsum df set")), > ("gre", "4", "tx-gre-segmentation", > ("gre", False, "")), > ("gre", "6", "tx-gre-segmentation", > ("ip6gre", False, "")), > ) >
On 8/25/25 3:31 PM, Richard Gobert wrote: > Paolo Abeni wrote: >> On 8/21/25 9:30 AM, Richard Gobert wrote: >>> diff --git a/net/core/dev.c b/net/core/dev.c >>> index 68dc47d7e700..9941c39b5970 100644 >>> --- a/net/core/dev.c >>> +++ b/net/core/dev.c >>> @@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, >>> * IPv4 header has the potential to be fragmented. >>> */ >>> if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { >>> - struct iphdr *iph = skb->encapsulation ? >>> - inner_ip_hdr(skb) : ip_hdr(skb); >>> - >>> - if (!(iph->frag_off & htons(IP_DF))) >>> + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || >>> + (skb->encapsulation && >>> + !(inner_ip_hdr(skb)->frag_off & htons(IP_DF)))) >>> features &= ~NETIF_F_TSO_MANGLEID; >> >> FWIW, I think the above is the problematic part causing GSO PARTIAL issues. >> >> By default UDP tunnels do not set the DF bit, and most/all devices >> implementing GSO_PARTIAL clear TSO for encapsulated packet when MANGLEID >> is not available. >> >> I think the following should workaround the problem (assuming my email >> client did not corrupt the diff), but I also fear this change will cause >> very visible regressions in existing setups. >> > > Thanks for the thorough review! > > To solve this issue, we can decide that MANGLEID cannot cause > incrementing IDs to become fixed for outer headers of encapsulated > packets (which is the current behavior), then just revert this diff. > I'll update the documentation in segmentation-offloads.rst to reflect this. > Do you think that would be a good solution? I'm not sure I read correctly the above, let me rephrase. You are suggesting that devices can set MANGLEID but they must ensure, for encapsulated packets, to keep incrementing IDs for outer IP headers even when MANGLEID is set. It that what you mean? Note that most device exposing GSO_PARTIAL can't perform the above. >> Note that the current status is incorrect - GSO partial devices are >> mangling the outer IP ID for encapsulated packets even when the outer >> header IP DF is not set. >> >> /P > > WDYM? In the following setup: TCP socket -> UDP encap device (without 'df set') -> H/W NIC exposing GSO partial for encap traffic with the current kernel, if the TCP socket creates a GSO packet with size MSS multiple, the wire packets will have the outer IP header with the DF bit NOT set and will have ID fixed - for all the wire packets corresponding to a given GSO one. /P > Currently, when the DF-bit isn't set, it means that the IDs must > be incrementing. Otherwise, the packets wouldn't have been merged by GRO. Note that GSO packets can be locally generated. > GSO partial (and also regular GSO/TSO) generate incrementing IDs, so the > IDs cannot be mangled. AFAIK, most device exposing GSO partial don't increment the outer IP ID for encap packet (the silicon is not able to do that). > With my patch, if the IDs were originally fixed, > regardless of the DF-bit, TSO/GSO partial will not occur unless MANGLEID > is enabled. I think the above statement is a lit confusing, S/W segmentation can happen even if MANGLEID is enabled on the egress device: for FIXEDID pkts, with DF bit not set, both the current kernel code and your patch will clear it. /P
Paolo Abeni wrote: > On 8/25/25 3:31 PM, Richard Gobert wrote: >> Paolo Abeni wrote: >>> On 8/21/25 9:30 AM, Richard Gobert wrote: >>>> diff --git a/net/core/dev.c b/net/core/dev.c >>>> index 68dc47d7e700..9941c39b5970 100644 >>>> --- a/net/core/dev.c >>>> +++ b/net/core/dev.c >>>> @@ -3772,10 +3772,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, >>>> * IPv4 header has the potential to be fragmented. >>>> */ >>>> if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { >>>> - struct iphdr *iph = skb->encapsulation ? >>>> - inner_ip_hdr(skb) : ip_hdr(skb); >>>> - >>>> - if (!(iph->frag_off & htons(IP_DF))) >>>> + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || >>>> + (skb->encapsulation && >>>> + !(inner_ip_hdr(skb)->frag_off & htons(IP_DF)))) >>>> features &= ~NETIF_F_TSO_MANGLEID; >>> >>> FWIW, I think the above is the problematic part causing GSO PARTIAL issues. >>> >>> By default UDP tunnels do not set the DF bit, and most/all devices >>> implementing GSO_PARTIAL clear TSO for encapsulated packet when MANGLEID >>> is not available. >>> >>> I think the following should workaround the problem (assuming my email >>> client did not corrupt the diff), but I also fear this change will cause >>> very visible regressions in existing setups. >>> >> >> Thanks for the thorough review! >> >> To solve this issue, we can decide that MANGLEID cannot cause >> incrementing IDs to become fixed for outer headers of encapsulated >> packets (which is the current behavior), then just revert this diff. >> I'll update the documentation in segmentation-offloads.rst to reflect this. >> Do you think that would be a good solution? > > I'm not sure I read correctly the above, let me rephrase. You are > suggesting that devices can set MANGLEID but they must ensure, for > encapsulated packets, to keep incrementing IDs for outer IP headers even > when MANGLEID is set. It that what you mean? > > Note that most device exposing GSO_PARTIAL can't perform the above. I think there are two misunderstandings: 1. When I'm talking about mangled ids, I'm mostly talking about forwarded packets whose IDs are mangled after being forwarded. You also consider locally generated packets to have mangled IDs if the IDs are modified during segmentation, which is fair. 2. You say that GSO partial keeps the outer IDs fixed, but AFAICT this isn't the case. (See below) I think you understood me correctly, but I'll explain in more detail. My understanding is that TSO generates packets with incrementing IDs. Since TSO can't promise to keep fixed IDs, if a packet has SKB_GSO_TCP_FIXEDID, TSO cannot be used and software GSO must be used instead (this is checked by net_gso_ok). If you don't care about mangled IDs, MANGLEID can be set on the device so that TSO can still be used. If MANGLEID is set on the device, then TSO is allowed to generate either incrementing or fixed IDs, depending on the device's preference. However, mangling incrementing IDs into fixed IDs is a problem when the DF-bit is not set, as the packets might then be fragmented and fragments of different packets will share the same ID. To prevent this, the check discussed above removes MANGLEID if the DF-bit is not set. Currently, MANGLEID is only relevant for the inner-most header. With my patch, MANGLEID explicitly allows the mangling of outer IDs as well, so the check must be modified to check both the inner and the outer headers. I suggest that we revert the diff so that only the inner-most header is checked, allowing TSO even when the DF bit is not set on the outer header. For this to be correct, we must explicitly define MANGLEID on the outer header to mean that TSO is not allowed to turn incrementing IDs into fixed IDs when the DF bit is not set. No code change is required since devices already generate incrementing IDs for the outer headers. > >>> Note that the current status is incorrect - GSO partial devices are >>> mangling the outer IP ID for encapsulated packets even when the outer >>> header IP DF is not set. >>> >>> /P >> >> WDYM? > > In the following setup: > > TCP socket -> UDP encap device (without 'df set') -> H/W NIC exposing > GSO partial for encap traffic > > with the current kernel, if the TCP socket creates a GSO packet with > size MSS multiple, the wire packets will have the outer IP header with > the DF bit NOT set and will have ID fixed - for all the wire packets > corresponding to a given GSO one. Are you sure? The documentation clearly states that for GSO partial, the device drivers must increment outer IDs when the DF-bit is not set. For example, AFAICT, this is what ixgbe and i40e do, but I don't have the hardware to verify. I would think the behavior in the example you provided is undesirable, since the packets could potentially be fragmented later. > > /P > >> Currently, when the DF-bit isn't set, it means that the IDs must >> be incrementing. Otherwise, the packets wouldn't have been merged by GRO. > > Note that GSO packets can be locally generated. Of course. I was just referring to forwarded packets. > >> GSO partial (and also regular GSO/TSO) generate incrementing IDs, so the >> IDs cannot be mangled. > > AFAIK, most device exposing GSO partial don't increment the outer IP ID > for encap packet (the silicon is not able to do that). > >> With my patch, if the IDs were originally fixed, >> regardless of the DF-bit, TSO/GSO partial will not occur unless MANGLEID >> is enabled. > > I think the above statement is a lit confusing, S/W segmentation can > happen even if MANGLEID is enabled on the egress device: for FIXEDID > pkts, with DF bit not set, both the current kernel code and your patch > will clear it. Sorry, I phrased that awkwardly. > > /P >
On 21/08/2025 08:30, Richard Gobert wrote: > Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can > be mangled. Outer IDs can always be mangled. > > Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing > both inner and outer IDs to be mangled. In the future, we could add > NETIF_F_TSO_MANGLEID_INNER to provide more granular control to > drivers. > > This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly. > > Signed-off-by: Richard Gobert <richardbgobert@gmail.com> ... > diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c > index e6b6be549581..4efd22b44986 100644 > --- a/drivers/net/ethernet/sfc/ef100_tx.c > +++ b/drivers/net/ethernet/sfc/ef100_tx.c > @@ -189,7 +189,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx, > { > bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; > unsigned int len, ip_offset, tcp_offset, payload_segs; > - u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; > + u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; > + u32 mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; > unsigned int outer_ip_offset, outer_l4_offset; > u16 vlan_tci = skb_vlan_tag_get(skb); > u32 mss = skb_shinfo(skb)->gso_size; > @@ -201,7 +202,9 @@ static void ef100_make_tso_desc(struct efx_nic *efx, > u32 paylen; > > if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) > - mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; > + mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; > + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER) > + mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_NO_OP; > if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) > vlan_enable = skb_vlan_tag_present(skb); > > @@ -239,14 +242,13 @@ static void ef100_make_tso_desc(struct efx_nic *efx, > ESF_GZ_TX_TSO_CSO_INNER_L4, 1, > ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, > ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, > - ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, > + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid_inner, > ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, > ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, > ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, > ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, > ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, > - ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : > - ESE_GZ_TX_DESC_IP4_ID_NO_OP, > + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer, > ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, > ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci > ); AFAICT this will now, in the case when FIXEDID isn't set, set ESF_GZ_TX_TSO_ED_OUTER_IP4_ID on non-encapsulated frames, for which ESF_GZ_TX_TSO_OUTER_L3_OFF_W has been set to 0. I'm not 100% sure, but I think that will cause the NIC to do an INC_MOD16 on octets 4 and 5 of the packet, corrupting the Ethernet header. Please retain the existing logic whereby ED_OUTER_IP4_ID is set to NO_OP in the !encap case. Note that the EF100 host interface's semantics take the view that an unencapsulated packet has an INNER and no OUTER header, which AIUI is the opposite to how your new gso_type flags are defined, so I think for !encap you also need to set mangleid_inner based on SKB_GSO_TCP_FIXEDID, rather than SKB_GSO_TCP_FIXEDID_INNER. My apologies for not spotting this in earlier versions.
Edward Cree wrote: > On 21/08/2025 08:30, Richard Gobert wrote: >> Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can >> be mangled. Outer IDs can always be mangled. >> >> Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing >> both inner and outer IDs to be mangled. In the future, we could add >> NETIF_F_TSO_MANGLEID_INNER to provide more granular control to >> drivers. >> >> This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly. >> >> Signed-off-by: Richard Gobert <richardbgobert@gmail.com> > ... >> diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c >> index e6b6be549581..4efd22b44986 100644 >> --- a/drivers/net/ethernet/sfc/ef100_tx.c >> +++ b/drivers/net/ethernet/sfc/ef100_tx.c >> @@ -189,7 +189,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx, >> { >> bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; >> unsigned int len, ip_offset, tcp_offset, payload_segs; >> - u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; >> + u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; >> + u32 mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; >> unsigned int outer_ip_offset, outer_l4_offset; >> u16 vlan_tci = skb_vlan_tag_get(skb); >> u32 mss = skb_shinfo(skb)->gso_size; >> @@ -201,7 +202,9 @@ static void ef100_make_tso_desc(struct efx_nic *efx, >> u32 paylen; >> >> if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) >> - mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; >> + mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; >> + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER) >> + mangleid_inner = ESE_GZ_TX_DESC_IP4_ID_NO_OP; >> if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) >> vlan_enable = skb_vlan_tag_present(skb); >> >> @@ -239,14 +242,13 @@ static void ef100_make_tso_desc(struct efx_nic *efx, >> ESF_GZ_TX_TSO_CSO_INNER_L4, 1, >> ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, >> ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, >> - ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, >> + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid_inner, >> ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, >> ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, >> ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, >> ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, >> ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, >> - ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : >> - ESE_GZ_TX_DESC_IP4_ID_NO_OP, >> + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer, >> ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, >> ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci >> ); > > AFAICT this will now, in the case when FIXEDID isn't set, set > ESF_GZ_TX_TSO_ED_OUTER_IP4_ID on non-encapsulated frames, for which > ESF_GZ_TX_TSO_OUTER_L3_OFF_W has been set to 0. I'm not 100% sure, > but I think that will cause the NIC to do an INC_MOD16 on octets 4 > and 5 of the packet, corrupting the Ethernet header. > Please retain the existing logic whereby ED_OUTER_IP4_ID is set to > NO_OP in the !encap case. > Note that the EF100 host interface's semantics take the view that an > unencapsulated packet has an INNER and no OUTER header, which AIUI > is the opposite to how your new gso_type flags are defined, so I > think for !encap you also need to set mangleid_inner based on > SKB_GSO_TCP_FIXEDID, rather than SKB_GSO_TCP_FIXEDID_INNER. > > My apologies for not spotting this in earlier versions. Will fix this in v4. Thanks!
© 2016 - 2025 Red Hat, Inc.