[PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit

Aditya Garg posted 1 patch 2 months, 2 weeks ago
drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
include/net/mana/gdma.h                       |  8 +++++-
include/net/mana/mana.h                       |  1 +
3 files changed, 29 insertions(+), 6 deletions(-)
[PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 2 months, 2 weeks ago
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
limit, the driver drops the skb. Add a check in mana_start_xmit() to
detect such cases and linearize the SKB before transmission.

Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
send other errors to free_sgl_ptr to free resources and record the tx
drop.

Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
 include/net/mana/gdma.h                       |  8 +++++-
 include/net/mana/mana.h                       |  1 +
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index f4fc86f20213..22605753ca84 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -20,6 +20,7 @@
 
 #include <net/mana/mana.h>
 #include <net/mana/mana_auxiliary.h>
+#include <linux/skbuff.h>
 
 static DEFINE_IDA(mana_adev_ida);
 
@@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cq = &apc->tx_qp[txq_idx].tx_cq;
 	tx_stats = &txq->stats;
 
+	BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
+	#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
+		if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
+			netdev_info_once(ndev,
+					 "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
+					 skb_shinfo(skb)->nr_frags);
+			if (skb_linearize(skb)) {
+				netdev_warn_once(ndev, "Failed to linearize skb\n");
+				goto tx_drop_count;
+			}
+		}
+	#endif
+
 	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
 	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
 
@@ -402,8 +416,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		}
 	}
 
-	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
 	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
 		pkg.wqe_req.sgl = pkg.sgl_array;
 	} else {
@@ -438,9 +450,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	if (err) {
 		(void)skb_dequeue_tail(&txq->pending_skbs);
+		mana_unmap_skb(skb, apc);
 		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
-		err = NETDEV_TX_BUSY;
-		goto tx_busy;
+		if (err == -ENOSPC) {
+			err = NETDEV_TX_BUSY;
+			goto tx_busy;
+		}
+		goto free_sgl_ptr;
 	}
 
 	err = NETDEV_TX_OK;
@@ -1606,7 +1622,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
 	return 0;
 }
 
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 {
 	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..67fab1a5f382 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -489,6 +489,8 @@ struct gdma_wqe {
 #define MAX_TX_WQE_SIZE 512
 #define MAX_RX_WQE_SIZE 256
 
+#define MANA_MAX_TX_WQE_SGL_ENTRIES 30
+
 #define MAX_TX_WQE_SGL_ENTRIES	((GDMA_MAX_SQE_SIZE -			   \
 			sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \
 			sizeof(struct gdma_sge))
@@ -591,6 +593,9 @@ enum {
 /* Driver can self reset on FPGA Reconfig EQE notification */
 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
 
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +604,8 @@ enum {
 	 GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
 	 GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
 	 GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
-	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+	 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+	 GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..330e1bb088bb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
 void mana_query_phy_stats(struct mana_port_context *apc);
 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 extern struct dentry *mana_debugfs_root;
-- 
2.34.1
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Simon Horman 2 months, 2 weeks ago
On Fri, Oct 03, 2025 at 08:47:24AM -0700, Aditya Garg wrote:
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> detect such cases and linearize the SKB before transmission.
> 
> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> send other errors to free_sgl_ptr to free resources and record the tx
> drop.
> 
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>  include/net/mana/gdma.h                       |  8 +++++-
>  include/net/mana/mana.h                       |  1 +
>  3 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index f4fc86f20213..22605753ca84 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -20,6 +20,7 @@
>  
>  #include <net/mana/mana.h>
>  #include <net/mana/mana_auxiliary.h>
> +#include <linux/skbuff.h>
>  
>  static DEFINE_IDA(mana_adev_ida);
>  
> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  	cq = &apc->tx_qp[txq_idx].tx_cq;
>  	tx_stats = &txq->stats;
>  
> +	BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> +	#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)

Hi Aditya,

I see that Eric has made a more substantial review of this patch,
so please follow his advice.

But I wanted to add something to keep in mind for the future: I if the #if
/ #else used here can be replaced by a simple if() statement, then that
would be preferable.  The advantage being that it improves compile
coverage.  And, as these are all constants, I would expect the compiler to
optimise away any unused code.

N.B: I did not check, so please consider this more of a general statement

> +		if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> +			netdev_info_once(ndev,
> +					 "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> +					 skb_shinfo(skb)->nr_frags);
> +			if (skb_linearize(skb)) {
> +				netdev_warn_once(ndev, "Failed to linearize skb\n");
> +				goto tx_drop_count;
> +			}
> +		}
> +	#endif
> +
>  	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
>  	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>  

...
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 2 months, 1 week ago
On 04-10-2025 15:08, Simon Horman wrote:
> On Fri, Oct 03, 2025 at 08:47:24AM -0700, Aditya Garg wrote:
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>> detect such cases and linearize the SKB before transmission.
>>
>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>> send other errors to free_sgl_ptr to free resources and record the tx
>> drop.
>>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> ---
>>   drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>>   include/net/mana/gdma.h                       |  8 +++++-
>>   include/net/mana/mana.h                       |  1 +
>>   3 files changed, 29 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> index f4fc86f20213..22605753ca84 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> @@ -20,6 +20,7 @@
>>   
>>   #include <net/mana/mana.h>
>>   #include <net/mana/mana_auxiliary.h>
>> +#include <linux/skbuff.h>
>>   
>>   static DEFINE_IDA(mana_adev_ida);
>>   
>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>   	cq = &apc->tx_qp[txq_idx].tx_cq;
>>   	tx_stats = &txq->stats;
>>   
>> +	BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>> +	#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> 
> Hi Aditya,
> 
> I see that Eric has made a more substantial review of this patch,
> so please follow his advice.
> 
> But I wanted to add something to keep in mind for the future: I if the #if
> / #else used here can be replaced by a simple if() statement, then that
> would be preferable.  The advantage being that it improves compile
> coverage.  And, as these are all constants, I would expect the compiler to
> optimise away any unused code.

Hi Simon,
I will take care of yours and Eric's comment in v2 of this patch.
Regards,
Aditya
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Eric Dumazet 2 months, 2 weeks ago
On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> detect such cases and linearize the SKB before transmission.
>
> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> send other errors to free_sgl_ptr to free resources and record the tx
> drop.
>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>  include/net/mana/gdma.h                       |  8 +++++-
>  include/net/mana/mana.h                       |  1 +
>  3 files changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index f4fc86f20213..22605753ca84 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -20,6 +20,7 @@
>
>  #include <net/mana/mana.h>
>  #include <net/mana/mana_auxiliary.h>
> +#include <linux/skbuff.h>
>
>  static DEFINE_IDA(mana_adev_ida);
>
> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>         cq = &apc->tx_qp[txq_idx].tx_cq;
>         tx_stats = &txq->stats;
>
> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> +               if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> +                       netdev_info_once(ndev,
> +                                        "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> +                                        skb_shinfo(skb)->nr_frags);
> +                       if (skb_linearize(skb)) {

This will fail in many cases.

This sort of check is better done in ndo_features_check()

Most probably this would occur for GSO packets, so can ask a software
segmentation
to avoid this big and risky kmalloc() by all means.

Look at idpf_features_check()  which has something similar.
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 2 months, 1 week ago
On 03-10-2025 21:45, Eric Dumazet wrote:
> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>> detect such cases and linearize the SKB before transmission.
>>
>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>> send other errors to free_sgl_ptr to free resources and record the tx
>> drop.
>>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> ---
>>   drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>>   include/net/mana/gdma.h                       |  8 +++++-
>>   include/net/mana/mana.h                       |  1 +
>>   3 files changed, 29 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> index f4fc86f20213..22605753ca84 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> @@ -20,6 +20,7 @@
>>
>>   #include <net/mana/mana.h>
>>   #include <net/mana/mana_auxiliary.h>
>> +#include <linux/skbuff.h>
>>
>>   static DEFINE_IDA(mana_adev_ida);
>>
>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>          cq = &apc->tx_qp[txq_idx].tx_cq;
>>          tx_stats = &txq->stats;
>>
>> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>> +               if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
>> +                       netdev_info_once(ndev,
>> +                                        "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
>> +                                        skb_shinfo(skb)->nr_frags);
>> +                       if (skb_linearize(skb)) {
> 
> This will fail in many cases.
> 
> This sort of check is better done in ndo_features_check()
> 
> Most probably this would occur for GSO packets, so can ask a software
> segmentation
> to avoid this big and risky kmalloc() by all means.
> 
> Look at idpf_features_check()  which has something similar.

Hi Eric,
Thank you for your review. I understand your concerns regarding the use 
of skb_linearize() in the xmit path, as it can fail under memory 
pressure and introduces additional overhead in the transmit path. Based 
on your input, I will work on a v2 that will move the SGE limit check to 
the ndo_features_check() path and for GSO skbs exceding the hw limit 
will disable the NETIF_F_GSO_MASK to enforce software segmentation in 
kernel before the call to xmit.
Also for non GSO skb exceeding the SGE hw limit should we go for using 
skb_linearize only then or would you suggest some other approach here?

Regards,
Aditya
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Eric Dumazet 2 months, 1 week ago
On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> On 03-10-2025 21:45, Eric Dumazet wrote:
> > On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> > <gargaditya@linux.microsoft.com> wrote:
> >>
> >> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> >> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> >> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> >> detect such cases and linearize the SKB before transmission.
> >>
> >> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> >> send other errors to free_sgl_ptr to free resources and record the tx
> >> drop.
> >>
> >> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> >> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> >> ---
> >>   drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
> >>   include/net/mana/gdma.h                       |  8 +++++-
> >>   include/net/mana/mana.h                       |  1 +
> >>   3 files changed, 29 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> index f4fc86f20213..22605753ca84 100644
> >> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> @@ -20,6 +20,7 @@
> >>
> >>   #include <net/mana/mana.h>
> >>   #include <net/mana/mana_auxiliary.h>
> >> +#include <linux/skbuff.h>
> >>
> >>   static DEFINE_IDA(mana_adev_ida);
> >>
> >> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> >>          cq = &apc->tx_qp[txq_idx].tx_cq;
> >>          tx_stats = &txq->stats;
> >>
> >> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> >> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> >> +               if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> >> +                       netdev_info_once(ndev,
> >> +                                        "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> >> +                                        skb_shinfo(skb)->nr_frags);
> >> +                       if (skb_linearize(skb)) {
> >
> > This will fail in many cases.
> >
> > This sort of check is better done in ndo_features_check()
> >
> > Most probably this would occur for GSO packets, so can ask a software
> > segmentation
> > to avoid this big and risky kmalloc() by all means.
> >
> > Look at idpf_features_check()  which has something similar.
>
> Hi Eric,
> Thank you for your review. I understand your concerns regarding the use
> of skb_linearize() in the xmit path, as it can fail under memory
> pressure and introduces additional overhead in the transmit path. Based
> on your input, I will work on a v2 that will move the SGE limit check to
> the ndo_features_check() path and for GSO skbs exceding the hw limit
> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
> kernel before the call to xmit.
> Also for non GSO skb exceeding the SGE hw limit should we go for using
> skb_linearize only then or would you suggest some other approach here?

I think that for non GSO, the linearization attempt is fine.

Note that this is extremely unlikely for non malicious users,
and MTU being usually small (9K or less),
the allocation will be much smaller than a GSO packet.
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 2 months, 1 week ago
On 08-10-2025 20:51, Eric Dumazet wrote:
> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>> <gargaditya@linux.microsoft.com> wrote:
>>>>
>>>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>> detect such cases and linearize the SKB before transmission.
>>>>
>>>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>> drop.
>>>>
>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>> ---
>>>>    drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>>>>    include/net/mana/gdma.h                       |  8 +++++-
>>>>    include/net/mana/mana.h                       |  1 +
>>>>    3 files changed, 29 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> index f4fc86f20213..22605753ca84 100644
>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> @@ -20,6 +20,7 @@
>>>>
>>>>    #include <net/mana/mana.h>
>>>>    #include <net/mana/mana_auxiliary.h>
>>>> +#include <linux/skbuff.h>
>>>>
>>>>    static DEFINE_IDA(mana_adev_ida);
>>>>
>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>>>           cq = &apc->tx_qp[txq_idx].tx_cq;
>>>>           tx_stats = &txq->stats;
>>>>
>>>> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>> +               if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>> +                       netdev_info_once(ndev,
>>>> +                                        "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
>>>> +                                        skb_shinfo(skb)->nr_frags);
>>>> +                       if (skb_linearize(skb)) {
>>>
>>> This will fail in many cases.
>>>
>>> This sort of check is better done in ndo_features_check()
>>>
>>> Most probably this would occur for GSO packets, so can ask a software
>>> segmentation
>>> to avoid this big and risky kmalloc() by all means.
>>>
>>> Look at idpf_features_check()  which has something similar.
>>
>> Hi Eric,
>> Thank you for your review. I understand your concerns regarding the use
>> of skb_linearize() in the xmit path, as it can fail under memory
>> pressure and introduces additional overhead in the transmit path. Based
>> on your input, I will work on a v2 that will move the SGE limit check to
>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>> kernel before the call to xmit.
>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>> skb_linearize only then or would you suggest some other approach here?
> 
> I think that for non GSO, the linearization attempt is fine.
> 
> Note that this is extremely unlikely for non malicious users,
> and MTU being usually small (9K or less),
> the allocation will be much smaller than a GSO packet.

Okay. Will send a v2
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 2 months ago
On 08-10-2025 20:58, Aditya Garg wrote:
> On 08-10-2025 20:51, Eric Dumazet wrote:
>> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
>> <gargaditya@linux.microsoft.com> wrote:
>>>
>>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>
>>>>> The MANA hardware supports a maximum of 30 scatter-gather entries 
>>>>> (SGEs)
>>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds 
>>>>> this
>>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>>> detect such cases and linearize the SKB before transmission.
>>>>>
>>>>> Return NETDEV_TX_BUSY only for -ENOSPC from 
>>>>> mana_gd_post_work_request(),
>>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>>> drop.
>>>>>
>>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>>> ---
>>>>>    drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++ 
>>>>> ++----
>>>>>    include/net/mana/gdma.h                       |  8 +++++-
>>>>>    include/net/mana/mana.h                       |  1 +
>>>>>    3 files changed, 29 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/ 
>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> index f4fc86f20213..22605753ca84 100644
>>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> @@ -20,6 +20,7 @@
>>>>>
>>>>>    #include <net/mana/mana.h>
>>>>>    #include <net/mana/mana_auxiliary.h>
>>>>> +#include <linux/skbuff.h>
>>>>>
>>>>>    static DEFINE_IDA(mana_adev_ida);
>>>>>
>>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff 
>>>>> *skb, struct net_device *ndev)
>>>>>           cq = &apc->tx_qp[txq_idx].tx_cq;
>>>>>           tx_stats = &txq->stats;
>>>>>
>>>>> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != 
>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>>> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>>> +               if (skb_shinfo(skb)->nr_frags + 2 > 
>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>>> +                       netdev_info_once(ndev,
>>>>> +                                        "nr_frags %d exceeds max 
>>>>> supported sge limit. Attempting skb_linearize\n",
>>>>> +                                        skb_shinfo(skb)->nr_frags);
>>>>> +                       if (skb_linearize(skb)) {
>>>>
>>>> This will fail in many cases.
>>>>
>>>> This sort of check is better done in ndo_features_check()
>>>>
>>>> Most probably this would occur for GSO packets, so can ask a software
>>>> segmentation
>>>> to avoid this big and risky kmalloc() by all means.
>>>>
>>>> Look at idpf_features_check()  which has something similar.
>>>
>>> Hi Eric,
>>> Thank you for your review. I understand your concerns regarding the use
>>> of skb_linearize() in the xmit path, as it can fail under memory
>>> pressure and introduces additional overhead in the transmit path. Based
>>> on your input, I will work on a v2 that will move the SGE limit check to
>>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>>> kernel before the call to xmit.
>>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>>> skb_linearize only then or would you suggest some other approach here?
>>
>> I think that for non GSO, the linearization attempt is fine.
>>
>> Note that this is extremely unlikely for non malicious users,
>> and MTU being usually small (9K or less),
>> the allocation will be much smaller than a GSO packet.
> 
> Okay. Will send a v2
Hi Eric,
I tested the code by disabling GSO in ndo_features_check when the number 
of SGEs exceeds the hardware limit, using iperf for a single TCP 
connection with zerocopy enabled. I noticed a significant difference in 
throughput compared to when we linearize the skbs.
For reference, the throughput is 35.6 Gbits/sec when using 
skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.

Hence, We propose to  linearizing skbs until the first failure occurs. 
After that, we switch to a fail-safe mode by disabling GSO for SKBs with 
  sge > hw limit using the ndo_feature_check implementation, while 
continuing to apply  skb_linearize() for non-GSO packets that exceed the 
hardware limit. This ensures we remain on the optimal performance path 
initially, and only transition to the fail-safe path after encountering 
a failure.
Regards,
Aditya
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Eric Dumazet 2 months ago
On Fri, Oct 17, 2025 at 10:41 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> On 08-10-2025 20:58, Aditya Garg wrote:
> > On 08-10-2025 20:51, Eric Dumazet wrote:
> >> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
> >> <gargaditya@linux.microsoft.com> wrote:
> >>>
> >>> On 03-10-2025 21:45, Eric Dumazet wrote:
> >>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> >>>> <gargaditya@linux.microsoft.com> wrote:
> >>>>>
> >>>>> The MANA hardware supports a maximum of 30 scatter-gather entries
> >>>>> (SGEs)
> >>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds
> >>>>> this
> >>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> >>>>> detect such cases and linearize the SKB before transmission.
> >>>>>
> >>>>> Return NETDEV_TX_BUSY only for -ENOSPC from
> >>>>> mana_gd_post_work_request(),
> >>>>> send other errors to free_sgl_ptr to free resources and record the tx
> >>>>> drop.
> >>>>>
> >>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> >>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> >>>>> ---
> >>>>>    drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++
> >>>>> ++----
> >>>>>    include/net/mana/gdma.h                       |  8 +++++-
> >>>>>    include/net/mana/mana.h                       |  1 +
> >>>>>    3 files changed, 29 insertions(+), 6 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/
> >>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> index f4fc86f20213..22605753ca84 100644
> >>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> @@ -20,6 +20,7 @@
> >>>>>
> >>>>>    #include <net/mana/mana.h>
> >>>>>    #include <net/mana/mana_auxiliary.h>
> >>>>> +#include <linux/skbuff.h>
> >>>>>
> >>>>>    static DEFINE_IDA(mana_adev_ida);
> >>>>>
> >>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff
> >>>>> *skb, struct net_device *ndev)
> >>>>>           cq = &apc->tx_qp[txq_idx].tx_cq;
> >>>>>           tx_stats = &txq->stats;
> >>>>>
> >>>>> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES !=
> >>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
> >>>>> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> >>>>> +               if (skb_shinfo(skb)->nr_frags + 2 >
> >>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
> >>>>> +                       netdev_info_once(ndev,
> >>>>> +                                        "nr_frags %d exceeds max
> >>>>> supported sge limit. Attempting skb_linearize\n",
> >>>>> +                                        skb_shinfo(skb)->nr_frags);
> >>>>> +                       if (skb_linearize(skb)) {
> >>>>
> >>>> This will fail in many cases.
> >>>>
> >>>> This sort of check is better done in ndo_features_check()
> >>>>
> >>>> Most probably this would occur for GSO packets, so can ask a software
> >>>> segmentation
> >>>> to avoid this big and risky kmalloc() by all means.
> >>>>
> >>>> Look at idpf_features_check()  which has something similar.
> >>>
> >>> Hi Eric,
> >>> Thank you for your review. I understand your concerns regarding the use
> >>> of skb_linearize() in the xmit path, as it can fail under memory
> >>> pressure and introduces additional overhead in the transmit path. Based
> >>> on your input, I will work on a v2 that will move the SGE limit check to
> >>> the ndo_features_check() path and for GSO skbs exceding the hw limit
> >>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
> >>> kernel before the call to xmit.
> >>> Also for non GSO skb exceeding the SGE hw limit should we go for using
> >>> skb_linearize only then or would you suggest some other approach here?
> >>
> >> I think that for non GSO, the linearization attempt is fine.
> >>
> >> Note that this is extremely unlikely for non malicious users,
> >> and MTU being usually small (9K or less),
> >> the allocation will be much smaller than a GSO packet.
> >
> > Okay. Will send a v2
> Hi Eric,
> I tested the code by disabling GSO in ndo_features_check when the number
> of SGEs exceeds the hardware limit, using iperf for a single TCP
> connection with zerocopy enabled. I noticed a significant difference in
> throughput compared to when we linearize the skbs.
> For reference, the throughput is 35.6 Gbits/sec when using
> skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.

You must be doing something very wrong.

Difference between TSO and non TSO should not be that high.

ethtool -K eth0 tso on
netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

540000 262144 262144    10.00    92766.69


ethtool -K eth0 tso off
netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

540000 262144 262144    10.00    52218.97

Now if I force linearization, you can definitely see the very high
cost of the copies !

ethtool -K eth1 sg off
tjbp26:/home/edumazet# ./netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

540000 262144 262144    10.00    16951.32

>
> Hence, We propose to  linearizing skbs until the first failure occurs.

Hmm... basically hiding a bug then ?

> After that, we switch to a fail-safe mode by disabling GSO for SKBs with
>   sge > hw limit using the ndo_feature_check implementation, while
> continuing to apply  skb_linearize() for non-GSO packets that exceed the
> hardware limit. This ensures we remain on the optimal performance path
> initially, and only transition to the fail-safe path after encountering
> a failure.

Please post your patch (adding the check in ndo_features_check()),
perhaps one of us is able to help.
Re: [PATCH net-next] net: mana: Linearize SKB if TX SGEs exceeds hardware limit
Posted by Aditya Garg 1 month, 3 weeks ago
On 17-10-2025 23:36, Eric Dumazet wrote:
> On Fri, Oct 17, 2025 at 10:41 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> On 08-10-2025 20:58, Aditya Garg wrote:
>>> On 08-10-2025 20:51, Eric Dumazet wrote:
>>>> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>
>>>>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>>>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>>>
>>>>>>> The MANA hardware supports a maximum of 30 scatter-gather entries
>>>>>>> (SGEs)
>>>>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds
>>>>>>> this
>>>>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>>>>> detect such cases and linearize the SKB before transmission.
>>>>>>>
>>>>>>> Return NETDEV_TX_BUSY only for -ENOSPC from
>>>>>>> mana_gd_post_work_request(),
>>>>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>>>>> drop.
>>>>>>>
>>>>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>>>>> ---
>>>>>>>     drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++
>>>>>>> ++----
>>>>>>>     include/net/mana/gdma.h                       |  8 +++++-
>>>>>>>     include/net/mana/mana.h                       |  1 +
>>>>>>>     3 files changed, 29 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/
>>>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> index f4fc86f20213..22605753ca84 100644
>>>>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> @@ -20,6 +20,7 @@
>>>>>>>
>>>>>>>     #include <net/mana/mana.h>
>>>>>>>     #include <net/mana/mana_auxiliary.h>
>>>>>>> +#include <linux/skbuff.h>
>>>>>>>
>>>>>>>     static DEFINE_IDA(mana_adev_ida);
>>>>>>>
>>>>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff
>>>>>>> *skb, struct net_device *ndev)
>>>>>>>            cq = &apc->tx_qp[txq_idx].tx_cq;
>>>>>>>            tx_stats = &txq->stats;
>>>>>>>
>>>>>>> +       BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES !=
>>>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>>>>> +       #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>>>>> +               if (skb_shinfo(skb)->nr_frags + 2 >
>>>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>>>>> +                       netdev_info_once(ndev,
>>>>>>> +                                        "nr_frags %d exceeds max
>>>>>>> supported sge limit. Attempting skb_linearize\n",
>>>>>>> +                                        skb_shinfo(skb)->nr_frags);
>>>>>>> +                       if (skb_linearize(skb)) {
>>>>>>
>>>>>> This will fail in many cases.
>>>>>>
>>>>>> This sort of check is better done in ndo_features_check()
>>>>>>
>>>>>> Most probably this would occur for GSO packets, so can ask a software
>>>>>> segmentation
>>>>>> to avoid this big and risky kmalloc() by all means.
>>>>>>
>>>>>> Look at idpf_features_check()  which has something similar.
>>>>>
>>>>> Hi Eric,
>>>>> Thank you for your review. I understand your concerns regarding the use
>>>>> of skb_linearize() in the xmit path, as it can fail under memory
>>>>> pressure and introduces additional overhead in the transmit path. Based
>>>>> on your input, I will work on a v2 that will move the SGE limit check to
>>>>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>>>>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>>>>> kernel before the call to xmit.
>>>>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>>>>> skb_linearize only then or would you suggest some other approach here?
>>>>
>>>> I think that for non GSO, the linearization attempt is fine.
>>>>
>>>> Note that this is extremely unlikely for non malicious users,
>>>> and MTU being usually small (9K or less),
>>>> the allocation will be much smaller than a GSO packet.
>>>
>>> Okay. Will send a v2
>> Hi Eric,
>> I tested the code by disabling GSO in ndo_features_check when the number
>> of SGEs exceeds the hardware limit, using iperf for a single TCP
>> connection with zerocopy enabled. I noticed a significant difference in
>> throughput compared to when we linearize the skbs.
>> For reference, the throughput is 35.6 Gbits/sec when using
>> skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.
> 
> You must be doing something very wrong.
> 
> Difference between TSO and non TSO should not be that high.
> 
> ethtool -K eth0 tso on
> netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
> 540000 262144 262144    10.00    92766.69
> 
> 
> ethtool -K eth0 tso off
> netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
> 540000 262144 262144    10.00    52218.97
> 
> Now if I force linearization, you can definitely see the very high
> cost of the copies !
> 
> ethtool -K eth1 sg off
> tjbp26:/home/edumazet# ./netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
> 540000 262144 262144    10.00    16951.32
> 
>>
>> Hence, We propose to  linearizing skbs until the first failure occurs.
> 
> Hmm... basically hiding a bug then ?
> 
>> After that, we switch to a fail-safe mode by disabling GSO for SKBs with
>>    sge > hw limit using the ndo_feature_check implementation, while
>> continuing to apply  skb_linearize() for non-GSO packets that exceed the
>> hardware limit. This ensures we remain on the optimal performance path
>> initially, and only transition to the fail-safe path after encountering
>> a failure.
> 
> Please post your patch (adding the check in ndo_features_check()),
> perhaps one of us is able to help.

Okay Eric, I'll Post a v2 with RFC. Please let me know.

Regards,
Aditya