[v5] net: pppoe: implement GRO/GSO support

[PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 1 month ago

From: Felix Fietkau <nbd@nbd.name>

Only handles packets where the pppoe header length field matches the exact
packet length. Significantly improves rx throughput.

When running NAT traffic through a MediaTek MT7621 devices from a host
behind PPPoE to a host directly connected via ethernet, the TCP throughput
that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s,
using fraglist GRO.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Qingfang Deng <dqfext@gmail.com>
---
v5: 
 - remove skb_is_gso() check from pppoe_rcv(), as the pppoe length
   field is updated in pppoe_gro_complete() to indicate the gso size.
 - fix length check in pppoe_gro_receive().
 - remove unnecessary check after pppoe_hdr_proto().
 - remove some unnecessary const qualifiers.
 https://lore.kernel.org/all/20260228032317.146855-1-dqfext@gmail.com/

 drivers/net/ppp/pppoe.c | 163 ++++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c      |   2 +
 net/ipv6/ip6_offload.c  |   2 +
 3 files changed, 167 insertions(+)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 1ac61c273b28..7908262ff2cd 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -77,6 +77,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
+#include <net/gro.h>
 
 #include <linux/uaccess.h>
 
@@ -1097,6 +1098,166 @@ static struct pernet_operations pppoe_net_ops = {
 	.size = sizeof(struct pppoe_net),
 };
 
+static u16
+compare_pppoe_header(const struct pppoe_hdr *phdr,
+		     const struct pppoe_hdr *phdr2)
+{
+	__be16 proto = *(const __be16 *)(phdr + 1);
+	__be16 proto2 = *(const __be16 *)(phdr2 + 1);
+
+	return (__force u16)((phdr->sid ^ phdr2->sid) | (proto ^ proto2));
+}
+
+static __be16 pppoe_hdr_proto(const struct pppoe_hdr *phdr)
+{
+	__be16 proto = *(const __be16 *)(phdr + 1);
+
+	switch (proto) {
+	case cpu_to_be16(PPP_IP):
+		return cpu_to_be16(ETH_P_IP);
+	case cpu_to_be16(PPP_IPV6):
+		return cpu_to_be16(ETH_P_IPV6);
+	default:
+		return 0;
+	}
+}
+
+static struct sk_buff *pppoe_gro_receive(struct list_head *head,
+					 struct sk_buff *skb)
+{
+	const struct packet_offload *ptype;
+	unsigned int hlen, off_pppoe;
+	const struct pppoe_hdr *phdr;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
+	int flush = 1;
+	__be16 type;
+
+	off_pppoe = skb_gro_offset(skb);
+	hlen = off_pppoe + sizeof(*phdr);
+	phdr = skb_gro_header(skb, hlen + 2, off_pppoe);
+	if (unlikely(!phdr))
+		goto out;
+
+	/* filter for session packets (type:1, ver:1, code:0) */
+	if (*(const __be16 *)phdr != cpu_to_be16(0x1100))
+		goto out;
+
+	/* ignore packets with padding or invalid length */
+	if (skb_gro_len(skb) != be16_to_cpu(phdr->length) + sizeof(*phdr))
+		goto out;
+
+	type = pppoe_hdr_proto(phdr);
+	ptype = gro_find_receive_by_type(type);
+	if (!ptype)
+		goto out;
+
+	flush = 0;
+
+	list_for_each_entry(p, head, list) {
+		const struct pppoe_hdr *phdr2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		phdr2 = (const struct pppoe_hdr *)(p->data + off_pppoe);
+		if (compare_pppoe_header(phdr, phdr2))
+			NAPI_GRO_CB(p)->same_flow = 0;
+	}
+
+	skb_gro_pull(skb, sizeof(*phdr) + 2);
+	skb_gro_postpull_rcsum(skb, phdr, sizeof(*phdr) + 2);
+
+	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+					    ipv6_gro_receive, inet_gro_receive,
+					    head, skb);
+
+out:
+	skb_gro_flush_final(skb, pp, flush);
+
+	return pp;
+}
+
+static int pppoe_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct pppoe_hdr *phdr = (struct pppoe_hdr *)(skb->data + nhoff);
+	__be16 type = pppoe_hdr_proto(phdr);
+	struct packet_offload *ptype;
+	int len, err;
+
+	ptype = gro_find_complete_by_type(type);
+	if (!ptype)
+		return -ENOENT;
+
+	err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+				 ipv6_gro_complete, inet_gro_complete,
+				 skb, nhoff + sizeof(*phdr) + 2);
+	if (err)
+		return err;
+
+	len = skb->len - (nhoff + sizeof(*phdr));
+	phdr->length = cpu_to_be16(len);
+
+	return 0;
+}
+
+static struct sk_buff *pppoe_gso_segment(struct sk_buff *skb,
+					 netdev_features_t features)
+{
+	unsigned int pppoe_hlen = sizeof(struct pppoe_hdr) + 2;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	u16 mac_offset = skb->mac_header;
+	struct packet_offload *ptype;
+	u16 mac_len = skb->mac_len;
+	struct pppoe_hdr *phdr;
+	__be16 orig_type, type;
+	int len, nhoff;
+
+	skb_reset_network_header(skb);
+	nhoff = skb_network_header(skb) - skb_mac_header(skb);
+
+	if (unlikely(!pskb_may_pull(skb, pppoe_hlen)))
+		goto out;
+
+	phdr = (struct pppoe_hdr *)skb_network_header(skb);
+	type = pppoe_hdr_proto(phdr);
+	ptype = gro_find_complete_by_type(type);
+	if (!ptype)
+		goto out;
+
+	orig_type = skb->protocol;
+	__skb_pull(skb, pppoe_hlen);
+	segs = ptype->callbacks.gso_segment(skb, features);
+	if (IS_ERR_OR_NULL(segs)) {
+		skb_gso_error_unwind(skb, orig_type, pppoe_hlen, mac_offset,
+				     mac_len);
+		goto out;
+	}
+
+	skb = segs;
+	do {
+		phdr = (struct pppoe_hdr *)(skb_mac_header(skb) + nhoff);
+		len = skb->len - (nhoff + sizeof(*phdr));
+		phdr->length = cpu_to_be16(len);
+		skb->network_header = (u8 *)phdr - skb->head;
+		skb->protocol = orig_type;
+		skb_reset_mac_len(skb);
+	} while ((skb = skb->next));
+
+out:
+	return segs;
+}
+
+static struct packet_offload pppoe_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_PPP_SES),
+	.priority = 20,
+	.callbacks = {
+		.gro_receive = pppoe_gro_receive,
+		.gro_complete = pppoe_gro_complete,
+		.gso_segment = pppoe_gso_segment,
+	},
+};
+
 static int __init pppoe_init(void)
 {
 	int err;
@@ -1113,6 +1274,7 @@ static int __init pppoe_init(void)
 	if (err)
 		goto out_unregister_pppoe_proto;
 
+	dev_add_offload(&pppoe_packet_offload);
 	dev_add_pack(&pppoes_ptype);
 	dev_add_pack(&pppoed_ptype);
 	register_netdevice_notifier(&pppoe_notifier);
@@ -1132,6 +1294,7 @@ static void __exit pppoe_exit(void)
 	unregister_netdevice_notifier(&pppoe_notifier);
 	dev_remove_pack(&pppoed_ptype);
 	dev_remove_pack(&pppoes_ptype);
+	dev_remove_offload(&pppoe_packet_offload);
 	unregister_pppox_proto(PX_PROTO_OE);
 	proto_unregister(&pppoe_sk_proto);
 	unregister_pernet_device(&pppoe_net_ops);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index babcd75a08e2..aa3324d819b3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1532,6 +1532,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	return pp;
 }
+EXPORT_INDIRECT_CALLABLE(inet_gro_receive);
 
 static struct sk_buff *ipip_gro_receive(struct list_head *head,
 					struct sk_buff *skb)
@@ -1617,6 +1618,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 out:
 	return err;
 }
+EXPORT_INDIRECT_CALLABLE(inet_gro_complete);
 
 static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
 {
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index bd7f780e37a5..32ba4739cef9 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -297,6 +297,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 
 	return pp;
 }
+EXPORT_INDIRECT_CALLABLE(ipv6_gro_receive);
 
 static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
 					      struct sk_buff *skb)
@@ -359,6 +360,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 out:
 	return err;
 }
+EXPORT_INDIRECT_CALLABLE(ipv6_gro_complete);
 
 static int sit_gro_complete(struct sk_buff *skb, int nhoff)
 {
-- 
2.43.0

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 1 week, 6 days ago

Hi all,

In some cases (such as BIG TCP) a GRO skb length can overflow a u16,
then what should I do with the u16 length field here?

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Xin Long 1 week, 5 days ago

On Wed, Mar 25, 2026 at 10:43 PM Qingfang Deng <dqfext@gmail.com> wrote:
>
> Hi all,
>
> In some cases (such as BIG TCP) a GRO skb length can overflow a u16,
> then what should I do with the u16 length field here?

I think you can either add a check in pppoe_gro_complete() to ensure
the aggregated size does not exceed U16_MAX, avoiding BIG TCP
behavior; Or, set phdr->length to 0 and rely on skb->len to determine
the actual length, as done in BIG TCP itself.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Xin Long 1 week, 5 days ago

On Thu, Mar 26, 2026 at 10:43 AM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Wed, Mar 25, 2026 at 10:43 PM Qingfang Deng <dqfext@gmail.com> wrote:
> >
> > Hi all,
> >
> > In some cases (such as BIG TCP) a GRO skb length can overflow a u16,
> > then what should I do with the u16 length field here?
>
> I think you can either add a check in pppoe_gro_complete() to ensure
sorry, I meant add a check in pppoe_gro_receive().
> the aggregated size does not exceed U16_MAX, avoiding BIG TCP
> behavior; Or, set phdr->length to 0 and rely on skb->len to determine
> the actual length, as done in BIG TCP itself.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 1 week, 5 days ago

Hi,

On Thu, Mar 26, 2026 at 10:44 PM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Thu, Mar 26, 2026 at 10:43 AM Xin Long <lucien.xin@gmail.com> wrote:
> >
> > On Wed, Mar 25, 2026 at 10:43 PM Qingfang Deng <dqfext@gmail.com> wrote:
> > >
> > > Hi all,
> > >
> > > In some cases (such as BIG TCP) a GRO skb length can overflow a u16,
> > > then what should I do with the u16 length field here?
> >
> > I think you can either add a check in pppoe_gro_complete() to ensure
> sorry, I meant add a check in pppoe_gro_receive().
> > the aggregated size does not exceed U16_MAX, avoiding BIG TCP
> > behavior; Or, set phdr->length to 0 and rely on skb->len to determine
> > the actual length, as done in BIG TCP itself.

In my v6 patch, I set phdr->length to 0xFFFF. Is that okay?

https://lore.kernel.org/netdev/20260326081127.61229-1-dqfext@gmail.com/

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Xin Long 1 week, 4 days ago

On Thu, Mar 26, 2026 at 9:49 PM Qingfang Deng <dqfext@gmail.com> wrote:
>
> Hi,
>
> On Thu, Mar 26, 2026 at 10:44 PM Xin Long <lucien.xin@gmail.com> wrote:
> >
> > On Thu, Mar 26, 2026 at 10:43 AM Xin Long <lucien.xin@gmail.com> wrote:
> > >
> > > On Wed, Mar 25, 2026 at 10:43 PM Qingfang Deng <dqfext@gmail.com> wrote:
> > > >
> > > > Hi all,
> > > >
> > > > In some cases (such as BIG TCP) a GRO skb length can overflow a u16,
> > > > then what should I do with the u16 length field here?
> > >
> > > I think you can either add a check in pppoe_gro_complete() to ensure
> > sorry, I meant add a check in pppoe_gro_receive().
> > > the aggregated size does not exceed U16_MAX, avoiding BIG TCP
> > > behavior; Or, set phdr->length to 0 and rely on skb->len to determine
> > > the actual length, as done in BIG TCP itself.
>
> In my v6 patch, I set phdr->length to 0xFFFF. Is that okay?
>
> https://lore.kernel.org/netdev/20260326081127.61229-1-dqfext@gmail.com/

I think you need to check how the PPPoE stack handles this value. If
it recalculates the actual length using skb->len when phdr->length ==
0xFFFF, then this approach might be fine.

Also, how does the PPPoE stack distinguish between 0xFFFF being the
actual payload length versus it being used as a marker to indicate a
jumbo packet (similar to BIG TCP)?

Thanks.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 1 week, 4 days ago

Hi, Xin Long,

On Fri, Mar 27, 2026 at 9:23 PM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Thu, Mar 26, 2026 at 9:49 PM Qingfang Deng <dqfext@gmail.com> wrote:
> >
> > In my v6 patch, I set phdr->length to 0xFFFF. Is that okay?
> >
> > https://lore.kernel.org/netdev/20260326081127.61229-1-dqfext@gmail.com/
>
> I think you need to check how the PPPoE stack handles this value. If
> it recalculates the actual length using skb->len when phdr->length ==
> 0xFFFF, then this approach might be fine.
>
> Also, how does the PPPoE stack distinguish between 0xFFFF being the
> actual payload length versus it being used as a marker to indicate a
> jumbo packet (similar to BIG TCP)?

In the receive function 'pppoe_rcv()': if a skb's actual payload
length is less than phdr->length, it will be dropped; if greater than
phdr->length, it will be trimmed to match phdr->length. After that,
the phdr is popped and the skb is passed to the generic PPP layer.
There is also a fastpath in Netfilter flowtable for IPv4/IPv6 that
bypasses pppoe_rcv(): it does not check phdr->length at all, and only
relies on the length field in the network header.

In the v2 version of this patch, phdr->length is not updated in
gro_complete(), so an additional check 'skb_is_gso()' is added to the
function to avoid trimming a GRO skb. Then Richard Gobert suggested
that the length field needs to be updated, and if the updated field
matches the actual length, the 'skb_is_gso()' is not necessary, but
only if the length fits in the u16 field.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Xin Long 1 week, 4 days ago

On Fri, Mar 27, 2026 at 10:43 AM Qingfang Deng <dqfext@gmail.com> wrote:
>
> Hi, Xin Long,
>
> On Fri, Mar 27, 2026 at 9:23 PM Xin Long <lucien.xin@gmail.com> wrote:
> >
> > On Thu, Mar 26, 2026 at 9:49 PM Qingfang Deng <dqfext@gmail.com> wrote:
> > >
> > > In my v6 patch, I set phdr->length to 0xFFFF. Is that okay?
> > >
> > > https://lore.kernel.org/netdev/20260326081127.61229-1-dqfext@gmail.com/
> >
> > I think you need to check how the PPPoE stack handles this value. If
> > it recalculates the actual length using skb->len when phdr->length ==
> > 0xFFFF, then this approach might be fine.
> >
> > Also, how does the PPPoE stack distinguish between 0xFFFF being the
> > actual payload length versus it being used as a marker to indicate a
> > jumbo packet (similar to BIG TCP)?
>
> In the receive function 'pppoe_rcv()': if a skb's actual payload
> length is less than phdr->length, it will be dropped; if greater than
> phdr->length, it will be trimmed to match phdr->length. After that,
> the phdr is popped and the skb is passed to the generic PPP layer.
> There is also a fastpath in Netfilter flowtable for IPv4/IPv6 that
> bypasses pppoe_rcv(): it does not check phdr->length at all, and only
> relies on the length field in the network header.
>
> In the v2 version of this patch, phdr->length is not updated in
> gro_complete(), so an additional check 'skb_is_gso()' is added to the
> function to avoid trimming a GRO skb. Then Richard Gobert suggested
> that the length field needs to be updated, and if the updated field
> matches the actual length, the 'skb_is_gso()' is not necessary, but
> only if the length fits in the u16 field.
I guess you have to look back to pppoe_rcv() for this, and double check
with Richard Gobert.

The phdr->length trick for jumbo packets might also affect how tcpdump
or tshark parses PPPoE packets.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Jakub Kicinski 1 month ago

On Thu,  5 Mar 2026 09:38:52 +0800 Qingfang Deng wrote:
> From: Felix Fietkau <nbd@nbd.name>
> 
> Only handles packets where the pppoe header length field matches the exact
> packet length. Significantly improves rx throughput.
> 
> When running NAT traffic through a MediaTek MT7621 devices from a host
> behind PPPoE to a host directly connected via ethernet, the TCP throughput
> that the device is able to handle improves from ~130 Mbit/s to ~630 Mbit/s,
> using fraglist GRO.
> 
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> Signed-off-by: Qingfang Deng <dqfext@gmail.com>

Please add some selftests.
-- 
pw-bot: cr

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 4 weeks, 1 day ago

Hi Jakub,

On Sat, Mar 7, 2026 at 9:04 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Please add some selftests.

I haven't written a kselftest before. What tests should I include?

I think I can add a local ppp server and client over veth, and test
the connection with ping and iperf3.

> --
> pw-bot: cr

Regards,
- Qingfang

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Jakub Kicinski 4 weeks, 1 day ago

On Mon, 9 Mar 2026 18:46:59 +0800 Qingfang Deng wrote:
> > Please add some selftests.  
> 
> I haven't written a kselftest before. What tests should I include?
> 
> I think I can add a local ppp server and client over veth, and test
> the connection with ping and iperf3.

We have tools/testing/selftests/drivers/net/gro.c and associated Python
test. (note I have ambiguous feelings about adding ppp cases to this
file vs creating a new test, adding Willem to CC maybe he has some
guidance)

The test itself should send and receive raw packets using packet
sockets. That way you can easily inject arbitrary fragmented frames 
on one end and check if they are coalesced correctly on the other.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Qingfang Deng 1 week, 6 days ago

On Tue, Mar 10, 2026 at 5:01 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Mon, 9 Mar 2026 18:46:59 +0800 Qingfang Deng wrote:
> > > Please add some selftests.
> >
> > I haven't written a kselftest before. What tests should I include?
> >
> > I think I can add a local ppp server and client over veth, and test
> > the connection with ping and iperf3.
>
> We have tools/testing/selftests/drivers/net/gro.c and associated Python
> test. (note I have ambiguous feelings about adding ppp cases to this
> file vs creating a new test, adding Willem to CC maybe he has some
> guidance)

He hasn't replied. What do you think?

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Jakub Kicinski 1 week, 6 days ago

On Wed, 25 Mar 2026 17:23:39 +0800 Qingfang Deng wrote:
> > > I haven't written a kselftest before. What tests should I include?
> > >
> > > I think I can add a local ppp server and client over veth, and test
> > > the connection with ping and iperf3.  
> >
> > We have tools/testing/selftests/drivers/net/gro.c and associated Python
> > test. (note I have ambiguous feelings about adding ppp cases to this
> > file vs creating a new test, adding Willem to CC maybe he has some
> > guidance)  
> 
> He hasn't replied. What do you think?

He probably missed the mid-thread email. Proceed with whatever you feel
makes sense.

Re: [PATCH net-next v5] net: pppoe: implement GRO/GSO support

Posted by Willem de Bruijn 1 week, 6 days ago

Jakub Kicinski wrote:
> On Wed, 25 Mar 2026 17:23:39 +0800 Qingfang Deng wrote:
> > > > I haven't written a kselftest before. What tests should I include?
> > > >
> > > > I think I can add a local ppp server and client over veth, and test
> > > > the connection with ping and iperf3.  
> > >
> > > We have tools/testing/selftests/drivers/net/gro.c and associated Python
> > > test. (note I have ambiguous feelings about adding ppp cases to this
> > > file vs creating a new test, adding Willem to CC maybe he has some
> > > guidance)  
> > 
> > He hasn't replied. What do you think?
> 
> He probably missed the mid-thread email. Proceed with whatever you feel
> makes sense.

Sorry I missed this.

It's hard to say without seeing the code. Perhaps code it up as an
extension to gro.c. If the code changes are manageable, send that. Else,
consider moving it into a stand-alone test.