drivers/net/ppp/pppoe.c | 160 +++++++++++++++++++++++++++++++++++++++- net/ipv4/af_inet.c | 2 + net/ipv6/ip6_offload.c | 2 + 3 files changed, 163 insertions(+), 1 deletion(-)
Only handles packets where the pppoe header length field matches the exact
packet length. Significantly improves rx throughput.
When running NAT traffic through a MediaTek MT7621 devices from a host
behind PPPoE to a host directly connected via ethernet, the TCP throughput
that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s,
using fraglist GRO.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
v2: fix compile error
v3:
- increase priority value
- implement GSO support
- use INDIRECT_CALL_INET
- update pppoe length field
- remove unnecessary network_offsets update
drivers/net/ppp/pppoe.c | 160 +++++++++++++++++++++++++++++++++++++++-
net/ipv4/af_inet.c | 2 +
net/ipv6/ip6_offload.c | 2 +
3 files changed, 163 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 410effa42ade..a8d8eb870bce 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -77,6 +77,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
+#include <net/gro.h>
#include <linux/uaccess.h>
@@ -435,7 +436,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->len < len)
goto drop;
- if (pskb_trim_rcsum(skb, len))
+ if (!skb_is_gso(skb) && pskb_trim_rcsum(skb, len))
goto drop;
ph = pppoe_hdr(skb);
@@ -1173,6 +1174,161 @@ static struct pernet_operations pppoe_net_ops = {
.size = sizeof(struct pppoe_net),
};
+static u16
+compare_pppoe_header(struct pppoe_hdr *phdr, struct pppoe_hdr *phdr2)
+{
+ return (__force __u16)((phdr->sid ^ phdr2->sid) |
+ (phdr->tag[0].tag_type ^ phdr2->tag[0].tag_type));
+}
+
+static __be16 pppoe_hdr_proto(struct pppoe_hdr *phdr)
+{
+ switch (phdr->tag[0].tag_type) {
+ case cpu_to_be16(PPP_IP):
+ return cpu_to_be16(ETH_P_IP);
+ case cpu_to_be16(PPP_IPV6):
+ return cpu_to_be16(ETH_P_IPV6);
+ default:
+ return 0;
+ }
+
+}
+
+static struct sk_buff *pppoe_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
+{
+ const struct packet_offload *ptype;
+ unsigned int hlen, off_pppoe;
+ struct sk_buff *pp = NULL;
+ struct pppoe_hdr *phdr;
+ struct sk_buff *p;
+ int flush = 1;
+ __be16 type;
+
+ off_pppoe = skb_gro_offset(skb);
+ hlen = off_pppoe + sizeof(*phdr);
+ phdr = skb_gro_header(skb, hlen + 2, off_pppoe);
+ if (unlikely(!phdr))
+ goto out;
+
+ /* ignore packets with padding or invalid length */
+ if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + hlen)
+ goto out;
+
+ type = pppoe_hdr_proto(phdr);
+ if (!type)
+ goto out;
+
+ ptype = gro_find_receive_by_type(type);
+ if (!ptype)
+ goto out;
+
+ flush = 0;
+
+ list_for_each_entry(p, head, list) {
+ struct pppoe_hdr *phdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ phdr2 = (struct pppoe_hdr *)(p->data + off_pppoe);
+ if (compare_pppoe_header(phdr, phdr2))
+ NAPI_GRO_CB(p)->same_flow = 0;
+ }
+
+ skb_gro_pull(skb, sizeof(*phdr) + 2);
+ skb_gro_postpull_rcsum(skb, phdr, sizeof(*phdr) + 2);
+
+ pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ head, skb);
+
+out:
+ skb_gro_flush_final(skb, pp, flush);
+
+ return pp;
+}
+
+static int pppoe_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct pppoe_hdr *phdr = (struct pppoe_hdr *)(skb->data + nhoff);
+ __be16 type = pppoe_hdr_proto(phdr);
+ struct packet_offload *ptype;
+ int len, err;
+
+ ptype = gro_find_complete_by_type(type);
+ if (!ptype)
+ return -ENOENT;
+
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, nhoff + sizeof(*phdr) + 2);
+ if (err)
+ return err;
+
+ len = skb->len - (nhoff + sizeof(*phdr));
+ phdr->length = cpu_to_be16(len);
+
+ return 0;
+}
+
+static struct sk_buff *pppoe_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int pppoe_hlen = sizeof(struct pppoe_hdr) + 2;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ u16 mac_offset = skb->mac_header;
+ struct packet_offload *ptype;
+ u16 mac_len = skb->mac_len;
+ struct pppoe_hdr *phdr;
+ __be16 orig_type, type;
+ int len, nhoff;
+
+ skb_reset_network_header(skb);
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+
+ if (unlikely(!pskb_may_pull(skb, pppoe_hlen)))
+ goto out;
+
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
+ type = pppoe_hdr_proto(phdr);
+ ptype = gro_find_complete_by_type(type);
+ if (!ptype)
+ goto out;
+
+ orig_type = skb->protocol;
+ __skb_pull(skb, pppoe_hlen);
+ segs = ptype->callbacks.gso_segment(skb, features);
+ if (IS_ERR_OR_NULL(segs)) {
+ skb_gso_error_unwind(skb, orig_type, pppoe_hlen, mac_offset,
+ mac_len);
+ goto out;
+ }
+
+ skb = segs;
+ do {
+ phdr = (struct pppoe_hdr *)(skb_mac_header(skb) + nhoff);
+ len = skb->len - (nhoff + sizeof(*phdr));
+ phdr->length = cpu_to_be16(len);
+ skb->network_header = (u8 *)phdr - skb->head;
+ skb->protocol = orig_type;
+ skb_reset_mac_len(skb);
+ } while ((skb = skb->next));
+
+out:
+ return segs;
+}
+
+static struct packet_offload pppoe_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_PPP_SES),
+ .priority = 20,
+ .callbacks = {
+ .gro_receive = pppoe_gro_receive,
+ .gro_complete = pppoe_gro_complete,
+ .gso_segment = pppoe_gso_segment,
+ },
+};
+
static int __init pppoe_init(void)
{
int err;
@@ -1189,6 +1345,7 @@ static int __init pppoe_init(void)
if (err)
goto out_unregister_pppoe_proto;
+ dev_add_offload(&pppoe_packet_offload);
dev_add_pack(&pppoes_ptype);
dev_add_pack(&pppoed_ptype);
register_netdevice_notifier(&pppoe_notifier);
@@ -1208,6 +1365,7 @@ static void __exit pppoe_exit(void)
unregister_netdevice_notifier(&pppoe_notifier);
dev_remove_pack(&pppoed_ptype);
dev_remove_pack(&pppoes_ptype);
+ dev_remove_offload(&pppoe_packet_offload);
unregister_pppox_proto(PX_PROTO_OE);
proto_unregister(&pppoe_sk_proto);
unregister_pernet_device(&pppoe_net_ops);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76e38092cd8a..0480a6d4f203 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1533,6 +1533,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
return pp;
}
+EXPORT_INDIRECT_CALLABLE(inet_gro_receive);
static struct sk_buff *ipip_gro_receive(struct list_head *head,
struct sk_buff *skb)
@@ -1618,6 +1619,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
out:
return err;
}
+EXPORT_INDIRECT_CALLABLE(inet_gro_complete);
static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
{
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index fce91183797a..9e3640b018a4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -306,6 +306,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
return pp;
}
+EXPORT_INDIRECT_CALLABLE(ipv6_gro_receive);
static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
struct sk_buff *skb)
@@ -388,6 +389,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
out:
return err;
}
+EXPORT_INDIRECT_CALLABLE(ipv6_gro_complete);
static int sit_gro_complete(struct sk_buff *skb, int nhoff)
{
--
2.50.1
Felix Fietkau wrote: > Only handles packets where the pppoe header length field matches the exact > packet length. Significantly improves rx throughput. > > When running NAT traffic through a MediaTek MT7621 devices from a host > behind PPPoE to a host directly connected via ethernet, the TCP throughput > that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s, > using fraglist GRO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > v2: fix compile error > v3: > - increase priority value > - implement GSO support > - use INDIRECT_CALL_INET > - update pppoe length field > - remove unnecessary network_offsets update > > drivers/net/ppp/pppoe.c | 160 +++++++++++++++++++++++++++++++++++++++- > net/ipv4/af_inet.c | 2 + > net/ipv6/ip6_offload.c | 2 + > 3 files changed, 163 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c > index 410effa42ade..a8d8eb870bce 100644 > --- a/drivers/net/ppp/pppoe.c > +++ b/drivers/net/ppp/pppoe.c > @@ -77,6 +77,7 @@ > #include <net/net_namespace.h> > #include <net/netns/generic.h> > #include <net/sock.h> > +#include <net/gro.h> > > #include <linux/uaccess.h> > > @@ -435,7 +436,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, > if (skb->len < len) > goto drop; > > - if (pskb_trim_rcsum(skb, len)) > + if (!skb_is_gso(skb) && pskb_trim_rcsum(skb, len)) > goto drop; > > ph = pppoe_hdr(skb); > @@ -1173,6 +1174,161 @@ static struct pernet_operations pppoe_net_ops = { > .size = sizeof(struct pppoe_net), > }; > > +static u16 > +compare_pppoe_header(struct pppoe_hdr *phdr, struct pppoe_hdr *phdr2) > +{ > + return (__force __u16)((phdr->sid ^ phdr2->sid) | > + (phdr->tag[0].tag_type ^ phdr2->tag[0].tag_type)); > +} > + > +static __be16 pppoe_hdr_proto(struct pppoe_hdr *phdr) > +{ > + switch (phdr->tag[0].tag_type) { > + case cpu_to_be16(PPP_IP): > + return cpu_to_be16(ETH_P_IP); > + case cpu_to_be16(PPP_IPV6): > + return cpu_to_be16(ETH_P_IPV6); > + default: > + return 0; > + } > + > +} > + > +static struct sk_buff *pppoe_gro_receive(struct list_head *head, > + struct sk_buff *skb) > +{ > + const struct packet_offload *ptype; > + unsigned int hlen, off_pppoe; > + struct sk_buff *pp = NULL; > + struct pppoe_hdr *phdr; > + struct sk_buff *p; > + int flush = 1; > + __be16 type; > + > + off_pppoe = skb_gro_offset(skb); > + hlen = off_pppoe + sizeof(*phdr); > + phdr = skb_gro_header(skb, hlen + 2, off_pppoe); > + if (unlikely(!phdr)) > + goto out; > + > + /* ignore packets with padding or invalid length */ > + if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + hlen) > + goto out; > + > + type = pppoe_hdr_proto(phdr); > + if (!type) > + goto out; > + > + ptype = gro_find_receive_by_type(type); > + if (!ptype) > + goto out; > + > + flush = 0; > + > + list_for_each_entry(p, head, list) { > + struct pppoe_hdr *phdr2; > + > + if (!NAPI_GRO_CB(p)->same_flow) > + continue; > + > + phdr2 = (struct pppoe_hdr *)(p->data + off_pppoe); > + if (compare_pppoe_header(phdr, phdr2)) > + NAPI_GRO_CB(p)->same_flow = 0; > + } > + > + skb_gro_pull(skb, sizeof(*phdr) + 2); > + skb_gro_postpull_rcsum(skb, phdr, sizeof(*phdr) + 2); > + > + pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, > + ipv6_gro_receive, inet_gro_receive, > + head, skb); > + > +out: > + skb_gro_flush_final(skb, pp, flush); > + > + return pp; > +} > + > +static int pppoe_gro_complete(struct sk_buff *skb, int nhoff) > +{ > + struct pppoe_hdr *phdr = (struct pppoe_hdr *)(skb->data + nhoff); > + __be16 type = pppoe_hdr_proto(phdr); > + struct packet_offload *ptype; > + int len, err; > + > + ptype = gro_find_complete_by_type(type); > + if (!ptype) > + return -ENOENT; > + > + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, > + ipv6_gro_complete, inet_gro_complete, > + skb, nhoff + sizeof(*phdr) + 2); > + if (err) > + return err; > + > + len = skb->len - (nhoff + sizeof(*phdr)); > + phdr->length = cpu_to_be16(len); > + > + return 0; > +} > + > +static struct sk_buff *pppoe_gso_segment(struct sk_buff *skb, > + netdev_features_t features) > +{ I don't think this will be called for PPPoE over GRE packets, since gre_gso_segment skips everything up to the network header. > + unsigned int pppoe_hlen = sizeof(struct pppoe_hdr) + 2; > + struct sk_buff *segs = ERR_PTR(-EINVAL); > + u16 mac_offset = skb->mac_header; > + struct packet_offload *ptype; > + u16 mac_len = skb->mac_len; > + struct pppoe_hdr *phdr; > + __be16 orig_type, type; > + int len, nhoff; > + > + skb_reset_network_header(skb); > + nhoff = skb_network_header(skb) - skb_mac_header(skb); > + > + if (unlikely(!pskb_may_pull(skb, pppoe_hlen))) > + goto out; > + > + phdr = (struct pppoe_hdr *)skb_network_header(skb); > + type = pppoe_hdr_proto(phdr); > + ptype = gro_find_complete_by_type(type); > + if (!ptype) > + goto out; > + > + orig_type = skb->protocol; > + __skb_pull(skb, pppoe_hlen); > + segs = ptype->callbacks.gso_segment(skb, features); > + if (IS_ERR_OR_NULL(segs)) { > + skb_gso_error_unwind(skb, orig_type, pppoe_hlen, mac_offset, > + mac_len); > + goto out; > + } > + > + skb = segs; > + do { > + phdr = (struct pppoe_hdr *)(skb_mac_header(skb) + nhoff); > + len = skb->len - (nhoff + sizeof(*phdr)); > + phdr->length = cpu_to_be16(len); > + skb->network_header = (u8 *)phdr - skb->head; > + skb->protocol = orig_type; > + skb_reset_mac_len(skb); > + } while ((skb = skb->next)); > + > +out: > + return segs; > +} > + > +static struct packet_offload pppoe_packet_offload __read_mostly = { > + .type = cpu_to_be16(ETH_P_PPP_SES), > + .priority = 20, > + .callbacks = { > + .gro_receive = pppoe_gro_receive, > + .gro_complete = pppoe_gro_complete, > + .gso_segment = pppoe_gso_segment, > + }, > +}; > + > static int __init pppoe_init(void) > { > int err; > @@ -1189,6 +1345,7 @@ static int __init pppoe_init(void) > if (err) > goto out_unregister_pppoe_proto; > > + dev_add_offload(&pppoe_packet_offload); > dev_add_pack(&pppoes_ptype); > dev_add_pack(&pppoed_ptype); > register_netdevice_notifier(&pppoe_notifier); > @@ -1208,6 +1365,7 @@ static void __exit pppoe_exit(void) > unregister_netdevice_notifier(&pppoe_notifier); > dev_remove_pack(&pppoed_ptype); > dev_remove_pack(&pppoes_ptype); > + dev_remove_offload(&pppoe_packet_offload); > unregister_pppox_proto(PX_PROTO_OE); > proto_unregister(&pppoe_sk_proto); > unregister_pernet_device(&pppoe_net_ops); > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index 76e38092cd8a..0480a6d4f203 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1533,6 +1533,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) > > return pp; > } > +EXPORT_INDIRECT_CALLABLE(inet_gro_receive); > > static struct sk_buff *ipip_gro_receive(struct list_head *head, > struct sk_buff *skb) > @@ -1618,6 +1619,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) > out: > return err; > } > +EXPORT_INDIRECT_CALLABLE(inet_gro_complete); > > static int ipip_gro_complete(struct sk_buff *skb, int nhoff) > { > diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c > index fce91183797a..9e3640b018a4 100644 > --- a/net/ipv6/ip6_offload.c > +++ b/net/ipv6/ip6_offload.c > @@ -306,6 +306,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, > > return pp; > } > +EXPORT_INDIRECT_CALLABLE(ipv6_gro_receive); > > static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head, > struct sk_buff *skb) > @@ -388,6 +389,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) > out: > return err; > } > +EXPORT_INDIRECT_CALLABLE(ipv6_gro_complete); > > static int sit_gro_complete(struct sk_buff *skb, int nhoff) > {
On 14.08.25 16:30, Richard Gobert wrote: > Felix Fietkau wrote: >> Only handles packets where the pppoe header length field matches the exact >> packet length. Significantly improves rx throughput. >> >> When running NAT traffic through a MediaTek MT7621 devices from a host >> behind PPPoE to a host directly connected via ethernet, the TCP throughput >> that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s, >> using fraglist GRO. >> >> Signed-off-by: Felix Fietkau <nbd@nbd.name> >> --- >> v2: fix compile error >> v3: >> - increase priority value >> - implement GSO support >> - use INDIRECT_CALL_INET >> - update pppoe length field >> - remove unnecessary network_offsets update >> >> drivers/net/ppp/pppoe.c | 160 +++++++++++++++++++++++++++++++++++++++- >> net/ipv4/af_inet.c | 2 + >> net/ipv6/ip6_offload.c | 2 + >> 3 files changed, 163 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c >> index 410effa42ade..a8d8eb870bce 100644 >> --- a/drivers/net/ppp/pppoe.c >> +++ b/drivers/net/ppp/pppoe.c >> +compare_pppoe_header(struct pppoe_hdr *phdr, struct pppoe_hdr *phdr2) >> +{ >> + return (__force __u16)((phdr->sid ^ phdr2->sid) | >> + (phdr->tag[0].tag_type ^ phdr2->tag[0].tag_type)); >> +} >> + >> +static __be16 pppoe_hdr_proto(struct pppoe_hdr *phdr) >> +{ >> + switch (phdr->tag[0].tag_type) { >> + case cpu_to_be16(PPP_IP): >> + return cpu_to_be16(ETH_P_IP); >> + case cpu_to_be16(PPP_IPV6): >> + return cpu_to_be16(ETH_P_IPV6); >> + default: >> + return 0; >> + } >> + >> +} >> + >> +static struct sk_buff *pppoe_gro_receive(struct list_head *head, >> + struct sk_buff *skb) >> +{ >> + const struct packet_offload *ptype; >> + unsigned int hlen, off_pppoe; >> + struct sk_buff *pp = NULL; >> + struct pppoe_hdr *phdr; >> + struct sk_buff *p; >> + int flush = 1; >> + __be16 type; >> + >> + off_pppoe = skb_gro_offset(skb); >> + hlen = off_pppoe + sizeof(*phdr); >> + phdr = skb_gro_header(skb, hlen + 2, off_pppoe); >> + if (unlikely(!phdr)) >> + goto out; >> + >> + /* ignore packets with padding or invalid length */ >> + if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + hlen) >> + goto out; >> + >> + type = pppoe_hdr_proto(phdr); >> + if (!type) >> + goto out; >> + >> + ptype = gro_find_receive_by_type(type); >> + if (!ptype) >> + goto out; >> + >> + flush = 0; >> + >> + list_for_each_entry(p, head, list) { >> + struct pppoe_hdr *phdr2; >> + >> + if (!NAPI_GRO_CB(p)->same_flow) >> + continue; >> + >> + phdr2 = (struct pppoe_hdr *)(p->data + off_pppoe); >> + if (compare_pppoe_header(phdr, phdr2)) >> + NAPI_GRO_CB(p)->same_flow = 0; >> + } >> + >> + skb_gro_pull(skb, sizeof(*phdr) + 2); >> + skb_gro_postpull_rcsum(skb, phdr, sizeof(*phdr) + 2); >> + >> + pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, >> + ipv6_gro_receive, inet_gro_receive, >> + head, skb); >> + >> +out: >> + skb_gro_flush_final(skb, pp, flush); >> + >> + return pp; >> +} >> + >> +static int pppoe_gro_complete(struct sk_buff *skb, int nhoff) >> +{ >> + struct pppoe_hdr *phdr = (struct pppoe_hdr *)(skb->data + nhoff); >> + __be16 type = pppoe_hdr_proto(phdr); >> + struct packet_offload *ptype; >> + int len, err; >> + >> + ptype = gro_find_complete_by_type(type); >> + if (!ptype) >> + return -ENOENT; >> + >> + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, >> + ipv6_gro_complete, inet_gro_complete, >> + skb, nhoff + sizeof(*phdr) + 2); >> + if (err) >> + return err; >> + >> + len = skb->len - (nhoff + sizeof(*phdr)); >> + phdr->length = cpu_to_be16(len); >> + >> + return 0; >> +} >> + >> +static struct sk_buff *pppoe_gso_segment(struct sk_buff *skb, >> + netdev_features_t features) >> +{ > > I don't think this will be called for PPPoE over GRE packets, > since gre_gso_segment skips everything up to the network header. What's a good solution to this issue? Use the outer network header instead of the inner one when the protocol is PPPoE? - Felix
Felix Fietkau wrote: > On 14.08.25 16:30, Richard Gobert wrote: >> Felix Fietkau wrote: >>> Only handles packets where the pppoe header length field matches the exact >>> packet length. Significantly improves rx throughput. >>> >>> When running NAT traffic through a MediaTek MT7621 devices from a host >>> behind PPPoE to a host directly connected via ethernet, the TCP throughput >>> that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s, >>> using fraglist GRO. >>> >>> Signed-off-by: Felix Fietkau <nbd@nbd.name> >>> --- >>> v2: fix compile error >>> v3: >>> - increase priority value >>> - implement GSO support >>> - use INDIRECT_CALL_INET >>> - update pppoe length field >>> - remove unnecessary network_offsets update >>> >>> drivers/net/ppp/pppoe.c | 160 +++++++++++++++++++++++++++++++++++++++- >>> net/ipv4/af_inet.c | 2 + >>> net/ipv6/ip6_offload.c | 2 + >>> 3 files changed, 163 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c >>> index 410effa42ade..a8d8eb870bce 100644 >>> --- a/drivers/net/ppp/pppoe.c >>> +++ b/drivers/net/ppp/pppoe.c >>> +compare_pppoe_header(struct pppoe_hdr *phdr, struct pppoe_hdr *phdr2) >>> +{ >>> + return (__force __u16)((phdr->sid ^ phdr2->sid) | >>> + (phdr->tag[0].tag_type ^ phdr2->tag[0].tag_type)); >>> +} >>> + >>> +static __be16 pppoe_hdr_proto(struct pppoe_hdr *phdr) >>> +{ >>> + switch (phdr->tag[0].tag_type) { >>> + case cpu_to_be16(PPP_IP): >>> + return cpu_to_be16(ETH_P_IP); >>> + case cpu_to_be16(PPP_IPV6): >>> + return cpu_to_be16(ETH_P_IPV6); >>> + default: >>> + return 0; >>> + } >>> + >>> +} >>> + >>> +static struct sk_buff *pppoe_gro_receive(struct list_head *head, >>> + struct sk_buff *skb) >>> +{ >>> + const struct packet_offload *ptype; >>> + unsigned int hlen, off_pppoe; >>> + struct sk_buff *pp = NULL; >>> + struct pppoe_hdr *phdr; >>> + struct sk_buff *p; >>> + int flush = 1; >>> + __be16 type; >>> + >>> + off_pppoe = skb_gro_offset(skb); >>> + hlen = off_pppoe + sizeof(*phdr); >>> + phdr = skb_gro_header(skb, hlen + 2, off_pppoe); >>> + if (unlikely(!phdr)) >>> + goto out; >>> + >>> + /* ignore packets with padding or invalid length */ >>> + if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + hlen) >>> + goto out; >>> + >>> + type = pppoe_hdr_proto(phdr); >>> + if (!type) >>> + goto out; >>> + >>> + ptype = gro_find_receive_by_type(type); >>> + if (!ptype) >>> + goto out; >>> + >>> + flush = 0; >>> + >>> + list_for_each_entry(p, head, list) { >>> + struct pppoe_hdr *phdr2; >>> + >>> + if (!NAPI_GRO_CB(p)->same_flow) >>> + continue; >>> + >>> + phdr2 = (struct pppoe_hdr *)(p->data + off_pppoe); >>> + if (compare_pppoe_header(phdr, phdr2)) >>> + NAPI_GRO_CB(p)->same_flow = 0; >>> + } >>> + >>> + skb_gro_pull(skb, sizeof(*phdr) + 2); >>> + skb_gro_postpull_rcsum(skb, phdr, sizeof(*phdr) + 2); >>> + >>> + pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, >>> + ipv6_gro_receive, inet_gro_receive, >>> + head, skb); >>> + >>> +out: >>> + skb_gro_flush_final(skb, pp, flush); >>> + >>> + return pp; >>> +} >>> + >>> +static int pppoe_gro_complete(struct sk_buff *skb, int nhoff) >>> +{ >>> + struct pppoe_hdr *phdr = (struct pppoe_hdr *)(skb->data + nhoff); >>> + __be16 type = pppoe_hdr_proto(phdr); >>> + struct packet_offload *ptype; >>> + int len, err; >>> + >>> + ptype = gro_find_complete_by_type(type); >>> + if (!ptype) >>> + return -ENOENT; >>> + >>> + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, >>> + ipv6_gro_complete, inet_gro_complete, >>> + skb, nhoff + sizeof(*phdr) + 2); >>> + if (err) >>> + return err; >>> + >>> + len = skb->len - (nhoff + sizeof(*phdr)); >>> + phdr->length = cpu_to_be16(len); >>> + >>> + return 0; >>> +} >>> + >>> +static struct sk_buff *pppoe_gso_segment(struct sk_buff *skb, >>> + netdev_features_t features) >>> +{ >> >> I don't think this will be called for PPPoE over GRE packets, >> since gre_gso_segment skips everything up to the network header. > > What's a good solution to this issue? Use the outer network header instead of the inner one when the protocol is PPPoE? > > - Felix I don't really have a good solution for this. You could explicitly check if the protocol is PPPoE in gre_gso_segment, but that wouldn't be very elegant or future-proof. I think setting skb->inner_network_header in pppoe_gro_complete (while not resetting it in inet_gro_complete) wouldn't work since other functions assume that skb->inner_network_header is an IP header.
On 8/18/25 1:22 PM, Richard Gobert wrote: > Felix Fietkau wrote: >> On 14.08.25 16:30, Richard Gobert wrote: >>> I don't think this will be called for PPPoE over GRE packets, >>> since gre_gso_segment skips everything up to the network header. >> >> What's a good solution to this issue? Use the outer network header instead of the inner one when the protocol is PPPoE? >> >> - Felix > > I don't really have a good solution for this. You could explicitly check > if the protocol is PPPoE in gre_gso_segment, but that wouldn't be very > elegant or future-proof. > > I think setting skb->inner_network_header in pppoe_gro_complete > (while not resetting it in inet_gro_complete) wouldn't work since other > functions assume that skb->inner_network_header is an IP header. Is PPPoE over GRE really a thing? IOW do we need to care at the GRO level? FTR, my biased answers are "no" and "no" ;) /P
On 8/11/25 11:57 AM, Felix Fietkau wrote: > @@ -1173,6 +1174,161 @@ static struct pernet_operations pppoe_net_ops = { > .size = sizeof(struct pppoe_net), > }; > > +static u16 > +compare_pppoe_header(struct pppoe_hdr *phdr, struct pppoe_hdr *phdr2) > +{ > + return (__force __u16)((phdr->sid ^ phdr2->sid) | > + (phdr->tag[0].tag_type ^ phdr2->tag[0].tag_type)); I'm sorry for the late feedback. I see that the pppoe rcv() code ignores the type and ver fields, but I guess it should be better to match them here, to ensure that the segmented packet sequence matches the pre-aggregation one. You could cast the phdr* to u32* and compare such integer. > +} > + > +static __be16 pppoe_hdr_proto(struct pppoe_hdr *phdr) > +{ > + switch (phdr->tag[0].tag_type) { > + case cpu_to_be16(PPP_IP): > + return cpu_to_be16(ETH_P_IP); > + case cpu_to_be16(PPP_IPV6): > + return cpu_to_be16(ETH_P_IPV6); > + default: > + return 0; > + } > + Minor nit: unneeded empty line above > +} > + > +static struct sk_buff *pppoe_gro_receive(struct list_head *head, > + struct sk_buff *skb) > +{ > + const struct packet_offload *ptype; > + unsigned int hlen, off_pppoe; > + struct sk_buff *pp = NULL; > + struct pppoe_hdr *phdr; > + struct sk_buff *p; > + int flush = 1; > + __be16 type; > + > + off_pppoe = skb_gro_offset(skb); > + hlen = off_pppoe + sizeof(*phdr); > + phdr = skb_gro_header(skb, hlen + 2, off_pppoe); > + if (unlikely(!phdr)) > + goto out; > + > + /* ignore packets with padding or invalid length */ > + if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + hlen) > + goto out; What about filtering for phdr->code == 0 (session data) to avoid useless late processing? Thanks, Paolo
© 2016 - 2025 Red Hat, Inc.