drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++---- include/net/mana/gdma.h | 8 +++++- include/net/mana/mana.h | 1 + 3 files changed, 29 insertions(+), 6 deletions(-)
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
limit, the driver drops the skb. Add a check in mana_start_xmit() to
detect such cases and linearize the SKB before transmission.
Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
send other errors to free_sgl_ptr to free resources and record the tx
drop.
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
include/net/mana/gdma.h | 8 +++++-
include/net/mana/mana.h | 1 +
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index f4fc86f20213..22605753ca84 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -20,6 +20,7 @@
#include <net/mana/mana.h>
#include <net/mana/mana_auxiliary.h>
+#include <linux/skbuff.h>
static DEFINE_IDA(mana_adev_ida);
@@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cq = &apc->tx_qp[txq_idx].tx_cq;
tx_stats = &txq->stats;
+ BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
+ #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
+ if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
+ netdev_info_once(ndev,
+ "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
+ skb_shinfo(skb)->nr_frags);
+ if (skb_linearize(skb)) {
+ netdev_warn_once(ndev, "Failed to linearize skb\n");
+ goto tx_drop_count;
+ }
+ }
+ #endif
+
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -402,8 +416,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
@@ -438,9 +450,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
if (err) {
(void)skb_dequeue_tail(&txq->pending_skbs);
+ mana_unmap_skb(skb, apc);
netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
- err = NETDEV_TX_BUSY;
- goto tx_busy;
+ if (err == -ENOSPC) {
+ err = NETDEV_TX_BUSY;
+ goto tx_busy;
+ }
+ goto free_sgl_ptr;
}
err = NETDEV_TX_OK;
@@ -1606,7 +1622,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
return 0;
}
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
{
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..67fab1a5f382 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -489,6 +489,8 @@ struct gdma_wqe {
#define MAX_TX_WQE_SIZE 512
#define MAX_RX_WQE_SIZE 256
+#define MANA_MAX_TX_WQE_SGL_ENTRIES 30
+
#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \
sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \
sizeof(struct gdma_sge))
@@ -591,6 +593,9 @@ enum {
/* Driver can self reset on FPGA Reconfig EQE notification */
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +604,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
- GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..330e1bb088bb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
void mana_query_phy_stats(struct mana_port_context *apc);
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
extern struct dentry *mana_debugfs_root;
--
2.34.1
On Fri, Oct 03, 2025 at 08:47:24AM -0700, Aditya Garg wrote:
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> detect such cases and linearize the SKB before transmission.
>
> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> send other errors to free_sgl_ptr to free resources and record the tx
> drop.
>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> ---
> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
> include/net/mana/gdma.h | 8 +++++-
> include/net/mana/mana.h | 1 +
> 3 files changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index f4fc86f20213..22605753ca84 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -20,6 +20,7 @@
>
> #include <net/mana/mana.h>
> #include <net/mana/mana_auxiliary.h>
> +#include <linux/skbuff.h>
>
> static DEFINE_IDA(mana_adev_ida);
>
> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> cq = &apc->tx_qp[txq_idx].tx_cq;
> tx_stats = &txq->stats;
>
> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
Hi Aditya,
I see that Eric has made a more substantial review of this patch,
so please follow his advice.
But I wanted to add something to keep in mind for the future: I if the #if
/ #else used here can be replaced by a simple if() statement, then that
would be preferable. The advantage being that it improves compile
coverage. And, as these are all constants, I would expect the compiler to
optimise away any unused code.
N.B: I did not check, so please consider this more of a general statement
> + if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> + netdev_info_once(ndev,
> + "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> + skb_shinfo(skb)->nr_frags);
> + if (skb_linearize(skb)) {
> + netdev_warn_once(ndev, "Failed to linearize skb\n");
> + goto tx_drop_count;
> + }
> + }
> + #endif
> +
> pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
> pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
>
...
On 04-10-2025 15:08, Simon Horman wrote: > On Fri, Oct 03, 2025 at 08:47:24AM -0700, Aditya Garg wrote: >> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs) >> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this >> limit, the driver drops the skb. Add a check in mana_start_xmit() to >> detect such cases and linearize the SKB before transmission. >> >> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(), >> send other errors to free_sgl_ptr to free resources and record the tx >> drop. >> >> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com> >> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com> >> --- >> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++---- >> include/net/mana/gdma.h | 8 +++++- >> include/net/mana/mana.h | 1 + >> 3 files changed, 29 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c >> index f4fc86f20213..22605753ca84 100644 >> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c >> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c >> @@ -20,6 +20,7 @@ >> >> #include <net/mana/mana.h> >> #include <net/mana/mana_auxiliary.h> >> +#include <linux/skbuff.h> >> >> static DEFINE_IDA(mana_adev_ida); >> >> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) >> cq = &apc->tx_qp[txq_idx].tx_cq; >> tx_stats = &txq->stats; >> >> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES); >> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) > > Hi Aditya, > > I see that Eric has made a more substantial review of this patch, > so please follow his advice. > > But I wanted to add something to keep in mind for the future: I if the #if > / #else used here can be replaced by a simple if() statement, then that > would be preferable. The advantage being that it improves compile > coverage. And, as these are all constants, I would expect the compiler to > optimise away any unused code. Hi Simon, I will take care of yours and Eric's comment in v2 of this patch. Regards, Aditya
On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> detect such cases and linearize the SKB before transmission.
>
> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> send other errors to free_sgl_ptr to free resources and record the tx
> drop.
>
> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> ---
> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
> include/net/mana/gdma.h | 8 +++++-
> include/net/mana/mana.h | 1 +
> 3 files changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index f4fc86f20213..22605753ca84 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -20,6 +20,7 @@
>
> #include <net/mana/mana.h>
> #include <net/mana/mana_auxiliary.h>
> +#include <linux/skbuff.h>
>
> static DEFINE_IDA(mana_adev_ida);
>
> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> cq = &apc->tx_qp[txq_idx].tx_cq;
> tx_stats = &txq->stats;
>
> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> + if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> + netdev_info_once(ndev,
> + "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> + skb_shinfo(skb)->nr_frags);
> + if (skb_linearize(skb)) {
This will fail in many cases.
This sort of check is better done in ndo_features_check()
Most probably this would occur for GSO packets, so can ask a software
segmentation
to avoid this big and risky kmalloc() by all means.
Look at idpf_features_check() which has something similar.
On 03-10-2025 21:45, Eric Dumazet wrote:
> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>> detect such cases and linearize the SKB before transmission.
>>
>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>> send other errors to free_sgl_ptr to free resources and record the tx
>> drop.
>>
>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>> ---
>> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>> include/net/mana/gdma.h | 8 +++++-
>> include/net/mana/mana.h | 1 +
>> 3 files changed, 29 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> index f4fc86f20213..22605753ca84 100644
>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>> @@ -20,6 +20,7 @@
>>
>> #include <net/mana/mana.h>
>> #include <net/mana/mana_auxiliary.h>
>> +#include <linux/skbuff.h>
>>
>> static DEFINE_IDA(mana_adev_ida);
>>
>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> cq = &apc->tx_qp[txq_idx].tx_cq;
>> tx_stats = &txq->stats;
>>
>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>> + if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
>> + netdev_info_once(ndev,
>> + "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
>> + skb_shinfo(skb)->nr_frags);
>> + if (skb_linearize(skb)) {
>
> This will fail in many cases.
>
> This sort of check is better done in ndo_features_check()
>
> Most probably this would occur for GSO packets, so can ask a software
> segmentation
> to avoid this big and risky kmalloc() by all means.
>
> Look at idpf_features_check() which has something similar.
Hi Eric,
Thank you for your review. I understand your concerns regarding the use
of skb_linearize() in the xmit path, as it can fail under memory
pressure and introduces additional overhead in the transmit path. Based
on your input, I will work on a v2 that will move the SGE limit check to
the ndo_features_check() path and for GSO skbs exceding the hw limit
will disable the NETIF_F_GSO_MASK to enforce software segmentation in
kernel before the call to xmit.
Also for non GSO skb exceeding the SGE hw limit should we go for using
skb_linearize only then or would you suggest some other approach here?
Regards,
Aditya
On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> On 03-10-2025 21:45, Eric Dumazet wrote:
> > On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> > <gargaditya@linux.microsoft.com> wrote:
> >>
> >> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> >> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
> >> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> >> detect such cases and linearize the SKB before transmission.
> >>
> >> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
> >> send other errors to free_sgl_ptr to free resources and record the tx
> >> drop.
> >>
> >> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> >> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> >> ---
> >> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
> >> include/net/mana/gdma.h | 8 +++++-
> >> include/net/mana/mana.h | 1 +
> >> 3 files changed, 29 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> index f4fc86f20213..22605753ca84 100644
> >> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >> @@ -20,6 +20,7 @@
> >>
> >> #include <net/mana/mana.h>
> >> #include <net/mana/mana_auxiliary.h>
> >> +#include <linux/skbuff.h>
> >>
> >> static DEFINE_IDA(mana_adev_ida);
> >>
> >> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> >> cq = &apc->tx_qp[txq_idx].tx_cq;
> >> tx_stats = &txq->stats;
> >>
> >> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> >> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> >> + if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
> >> + netdev_info_once(ndev,
> >> + "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
> >> + skb_shinfo(skb)->nr_frags);
> >> + if (skb_linearize(skb)) {
> >
> > This will fail in many cases.
> >
> > This sort of check is better done in ndo_features_check()
> >
> > Most probably this would occur for GSO packets, so can ask a software
> > segmentation
> > to avoid this big and risky kmalloc() by all means.
> >
> > Look at idpf_features_check() which has something similar.
>
> Hi Eric,
> Thank you for your review. I understand your concerns regarding the use
> of skb_linearize() in the xmit path, as it can fail under memory
> pressure and introduces additional overhead in the transmit path. Based
> on your input, I will work on a v2 that will move the SGE limit check to
> the ndo_features_check() path and for GSO skbs exceding the hw limit
> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
> kernel before the call to xmit.
> Also for non GSO skb exceeding the SGE hw limit should we go for using
> skb_linearize only then or would you suggest some other approach here?
I think that for non GSO, the linearization attempt is fine.
Note that this is extremely unlikely for non malicious users,
and MTU being usually small (9K or less),
the allocation will be much smaller than a GSO packet.
On 08-10-2025 20:51, Eric Dumazet wrote:
> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>> <gargaditya@linux.microsoft.com> wrote:
>>>>
>>>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds this
>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>> detect such cases and linearize the SKB before transmission.
>>>>
>>>> Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>> drop.
>>>>
>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>> ---
>>>> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++++----
>>>> include/net/mana/gdma.h | 8 +++++-
>>>> include/net/mana/mana.h | 1 +
>>>> 3 files changed, 29 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> index f4fc86f20213..22605753ca84 100644
>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>> @@ -20,6 +20,7 @@
>>>>
>>>> #include <net/mana/mana.h>
>>>> #include <net/mana/mana_auxiliary.h>
>>>> +#include <linux/skbuff.h>
>>>>
>>>> static DEFINE_IDA(mana_adev_ida);
>>>>
>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>>> cq = &apc->tx_qp[txq_idx].tx_cq;
>>>> tx_stats = &txq->stats;
>>>>
>>>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>> + if (skb_shinfo(skb)->nr_frags + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>> + netdev_info_once(ndev,
>>>> + "nr_frags %d exceeds max supported sge limit. Attempting skb_linearize\n",
>>>> + skb_shinfo(skb)->nr_frags);
>>>> + if (skb_linearize(skb)) {
>>>
>>> This will fail in many cases.
>>>
>>> This sort of check is better done in ndo_features_check()
>>>
>>> Most probably this would occur for GSO packets, so can ask a software
>>> segmentation
>>> to avoid this big and risky kmalloc() by all means.
>>>
>>> Look at idpf_features_check() which has something similar.
>>
>> Hi Eric,
>> Thank you for your review. I understand your concerns regarding the use
>> of skb_linearize() in the xmit path, as it can fail under memory
>> pressure and introduces additional overhead in the transmit path. Based
>> on your input, I will work on a v2 that will move the SGE limit check to
>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>> kernel before the call to xmit.
>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>> skb_linearize only then or would you suggest some other approach here?
>
> I think that for non GSO, the linearization attempt is fine.
>
> Note that this is extremely unlikely for non malicious users,
> and MTU being usually small (9K or less),
> the allocation will be much smaller than a GSO packet.
Okay. Will send a v2
On 08-10-2025 20:58, Aditya Garg wrote:
> On 08-10-2025 20:51, Eric Dumazet wrote:
>> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
>> <gargaditya@linux.microsoft.com> wrote:
>>>
>>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>
>>>>> The MANA hardware supports a maximum of 30 scatter-gather entries
>>>>> (SGEs)
>>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds
>>>>> this
>>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>>> detect such cases and linearize the SKB before transmission.
>>>>>
>>>>> Return NETDEV_TX_BUSY only for -ENOSPC from
>>>>> mana_gd_post_work_request(),
>>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>>> drop.
>>>>>
>>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>>> ---
>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++
>>>>> ++----
>>>>> include/net/mana/gdma.h | 8 +++++-
>>>>> include/net/mana/mana.h | 1 +
>>>>> 3 files changed, 29 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/
>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> index f4fc86f20213..22605753ca84 100644
>>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>> @@ -20,6 +20,7 @@
>>>>>
>>>>> #include <net/mana/mana.h>
>>>>> #include <net/mana/mana_auxiliary.h>
>>>>> +#include <linux/skbuff.h>
>>>>>
>>>>> static DEFINE_IDA(mana_adev_ida);
>>>>>
>>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff
>>>>> *skb, struct net_device *ndev)
>>>>> cq = &apc->tx_qp[txq_idx].tx_cq;
>>>>> tx_stats = &txq->stats;
>>>>>
>>>>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES !=
>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>>> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>>> + if (skb_shinfo(skb)->nr_frags + 2 >
>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>>> + netdev_info_once(ndev,
>>>>> + "nr_frags %d exceeds max
>>>>> supported sge limit. Attempting skb_linearize\n",
>>>>> + skb_shinfo(skb)->nr_frags);
>>>>> + if (skb_linearize(skb)) {
>>>>
>>>> This will fail in many cases.
>>>>
>>>> This sort of check is better done in ndo_features_check()
>>>>
>>>> Most probably this would occur for GSO packets, so can ask a software
>>>> segmentation
>>>> to avoid this big and risky kmalloc() by all means.
>>>>
>>>> Look at idpf_features_check() which has something similar.
>>>
>>> Hi Eric,
>>> Thank you for your review. I understand your concerns regarding the use
>>> of skb_linearize() in the xmit path, as it can fail under memory
>>> pressure and introduces additional overhead in the transmit path. Based
>>> on your input, I will work on a v2 that will move the SGE limit check to
>>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>>> kernel before the call to xmit.
>>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>>> skb_linearize only then or would you suggest some other approach here?
>>
>> I think that for non GSO, the linearization attempt is fine.
>>
>> Note that this is extremely unlikely for non malicious users,
>> and MTU being usually small (9K or less),
>> the allocation will be much smaller than a GSO packet.
>
> Okay. Will send a v2
Hi Eric,
I tested the code by disabling GSO in ndo_features_check when the number
of SGEs exceeds the hardware limit, using iperf for a single TCP
connection with zerocopy enabled. I noticed a significant difference in
throughput compared to when we linearize the skbs.
For reference, the throughput is 35.6 Gbits/sec when using
skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.
Hence, We propose to linearizing skbs until the first failure occurs.
After that, we switch to a fail-safe mode by disabling GSO for SKBs with
sge > hw limit using the ndo_feature_check implementation, while
continuing to apply skb_linearize() for non-GSO packets that exceed the
hardware limit. This ensures we remain on the optimal performance path
initially, and only transition to the fail-safe path after encountering
a failure.
Regards,
Aditya
On Fri, Oct 17, 2025 at 10:41 AM Aditya Garg
<gargaditya@linux.microsoft.com> wrote:
>
> On 08-10-2025 20:58, Aditya Garg wrote:
> > On 08-10-2025 20:51, Eric Dumazet wrote:
> >> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
> >> <gargaditya@linux.microsoft.com> wrote:
> >>>
> >>> On 03-10-2025 21:45, Eric Dumazet wrote:
> >>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
> >>>> <gargaditya@linux.microsoft.com> wrote:
> >>>>>
> >>>>> The MANA hardware supports a maximum of 30 scatter-gather entries
> >>>>> (SGEs)
> >>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds
> >>>>> this
> >>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
> >>>>> detect such cases and linearize the SKB before transmission.
> >>>>>
> >>>>> Return NETDEV_TX_BUSY only for -ENOSPC from
> >>>>> mana_gd_post_work_request(),
> >>>>> send other errors to free_sgl_ptr to free resources and record the tx
> >>>>> drop.
> >>>>>
> >>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
> >>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
> >>>>> ---
> >>>>> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++
> >>>>> ++----
> >>>>> include/net/mana/gdma.h | 8 +++++-
> >>>>> include/net/mana/mana.h | 1 +
> >>>>> 3 files changed, 29 insertions(+), 6 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/
> >>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> index f4fc86f20213..22605753ca84 100644
> >>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> >>>>> @@ -20,6 +20,7 @@
> >>>>>
> >>>>> #include <net/mana/mana.h>
> >>>>> #include <net/mana/mana_auxiliary.h>
> >>>>> +#include <linux/skbuff.h>
> >>>>>
> >>>>> static DEFINE_IDA(mana_adev_ida);
> >>>>>
> >>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff
> >>>>> *skb, struct net_device *ndev)
> >>>>> cq = &apc->tx_qp[txq_idx].tx_cq;
> >>>>> tx_stats = &txq->stats;
> >>>>>
> >>>>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES !=
> >>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
> >>>>> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> >>>>> + if (skb_shinfo(skb)->nr_frags + 2 >
> >>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
> >>>>> + netdev_info_once(ndev,
> >>>>> + "nr_frags %d exceeds max
> >>>>> supported sge limit. Attempting skb_linearize\n",
> >>>>> + skb_shinfo(skb)->nr_frags);
> >>>>> + if (skb_linearize(skb)) {
> >>>>
> >>>> This will fail in many cases.
> >>>>
> >>>> This sort of check is better done in ndo_features_check()
> >>>>
> >>>> Most probably this would occur for GSO packets, so can ask a software
> >>>> segmentation
> >>>> to avoid this big and risky kmalloc() by all means.
> >>>>
> >>>> Look at idpf_features_check() which has something similar.
> >>>
> >>> Hi Eric,
> >>> Thank you for your review. I understand your concerns regarding the use
> >>> of skb_linearize() in the xmit path, as it can fail under memory
> >>> pressure and introduces additional overhead in the transmit path. Based
> >>> on your input, I will work on a v2 that will move the SGE limit check to
> >>> the ndo_features_check() path and for GSO skbs exceding the hw limit
> >>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
> >>> kernel before the call to xmit.
> >>> Also for non GSO skb exceeding the SGE hw limit should we go for using
> >>> skb_linearize only then or would you suggest some other approach here?
> >>
> >> I think that for non GSO, the linearization attempt is fine.
> >>
> >> Note that this is extremely unlikely for non malicious users,
> >> and MTU being usually small (9K or less),
> >> the allocation will be much smaller than a GSO packet.
> >
> > Okay. Will send a v2
> Hi Eric,
> I tested the code by disabling GSO in ndo_features_check when the number
> of SGEs exceeds the hardware limit, using iperf for a single TCP
> connection with zerocopy enabled. I noticed a significant difference in
> throughput compared to when we linearize the skbs.
> For reference, the throughput is 35.6 Gbits/sec when using
> skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.
You must be doing something very wrong.
Difference between TSO and non TSO should not be that high.
ethtool -K eth0 tso on
netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
540000 262144 262144 10.00 92766.69
ethtool -K eth0 tso off
netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
540000 262144 262144 10.00 52218.97
Now if I force linearization, you can definitely see the very high
cost of the copies !
ethtool -K eth1 sg off
tjbp26:/home/edumazet# ./netperf -H tjbp27
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
tjbp27.prod.google.com () port 0 AF_INET6
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
540000 262144 262144 10.00 16951.32
>
> Hence, We propose to linearizing skbs until the first failure occurs.
Hmm... basically hiding a bug then ?
> After that, we switch to a fail-safe mode by disabling GSO for SKBs with
> sge > hw limit using the ndo_feature_check implementation, while
> continuing to apply skb_linearize() for non-GSO packets that exceed the
> hardware limit. This ensures we remain on the optimal performance path
> initially, and only transition to the fail-safe path after encountering
> a failure.
Please post your patch (adding the check in ndo_features_check()),
perhaps one of us is able to help.
On 17-10-2025 23:36, Eric Dumazet wrote:
> On Fri, Oct 17, 2025 at 10:41 AM Aditya Garg
> <gargaditya@linux.microsoft.com> wrote:
>>
>> On 08-10-2025 20:58, Aditya Garg wrote:
>>> On 08-10-2025 20:51, Eric Dumazet wrote:
>>>> On Wed, Oct 8, 2025 at 8:16 AM Aditya Garg
>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>
>>>>> On 03-10-2025 21:45, Eric Dumazet wrote:
>>>>>> On Fri, Oct 3, 2025 at 8:47 AM Aditya Garg
>>>>>> <gargaditya@linux.microsoft.com> wrote:
>>>>>>>
>>>>>>> The MANA hardware supports a maximum of 30 scatter-gather entries
>>>>>>> (SGEs)
>>>>>>> per TX WQE. In rare configurations where MAX_SKB_FRAGS + 2 exceeds
>>>>>>> this
>>>>>>> limit, the driver drops the skb. Add a check in mana_start_xmit() to
>>>>>>> detect such cases and linearize the SKB before transmission.
>>>>>>>
>>>>>>> Return NETDEV_TX_BUSY only for -ENOSPC from
>>>>>>> mana_gd_post_work_request(),
>>>>>>> send other errors to free_sgl_ptr to free resources and record the tx
>>>>>>> drop.
>>>>>>>
>>>>>>> Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
>>>>>>> Reviewed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
>>>>>>> ---
>>>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c | 26 +++++++++++++
>>>>>>> ++----
>>>>>>> include/net/mana/gdma.h | 8 +++++-
>>>>>>> include/net/mana/mana.h | 1 +
>>>>>>> 3 files changed, 29 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/
>>>>>>> drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> index f4fc86f20213..22605753ca84 100644
>>>>>>> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
>>>>>>> @@ -20,6 +20,7 @@
>>>>>>>
>>>>>>> #include <net/mana/mana.h>
>>>>>>> #include <net/mana/mana_auxiliary.h>
>>>>>>> +#include <linux/skbuff.h>
>>>>>>>
>>>>>>> static DEFINE_IDA(mana_adev_ida);
>>>>>>>
>>>>>>> @@ -289,6 +290,19 @@ netdev_tx_t mana_start_xmit(struct sk_buff
>>>>>>> *skb, struct net_device *ndev)
>>>>>>> cq = &apc->tx_qp[txq_idx].tx_cq;
>>>>>>> tx_stats = &txq->stats;
>>>>>>>
>>>>>>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES !=
>>>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES);
>>>>>>> + #if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>>>>>>> + if (skb_shinfo(skb)->nr_frags + 2 >
>>>>>>> MANA_MAX_TX_WQE_SGL_ENTRIES) {
>>>>>>> + netdev_info_once(ndev,
>>>>>>> + "nr_frags %d exceeds max
>>>>>>> supported sge limit. Attempting skb_linearize\n",
>>>>>>> + skb_shinfo(skb)->nr_frags);
>>>>>>> + if (skb_linearize(skb)) {
>>>>>>
>>>>>> This will fail in many cases.
>>>>>>
>>>>>> This sort of check is better done in ndo_features_check()
>>>>>>
>>>>>> Most probably this would occur for GSO packets, so can ask a software
>>>>>> segmentation
>>>>>> to avoid this big and risky kmalloc() by all means.
>>>>>>
>>>>>> Look at idpf_features_check() which has something similar.
>>>>>
>>>>> Hi Eric,
>>>>> Thank you for your review. I understand your concerns regarding the use
>>>>> of skb_linearize() in the xmit path, as it can fail under memory
>>>>> pressure and introduces additional overhead in the transmit path. Based
>>>>> on your input, I will work on a v2 that will move the SGE limit check to
>>>>> the ndo_features_check() path and for GSO skbs exceding the hw limit
>>>>> will disable the NETIF_F_GSO_MASK to enforce software segmentation in
>>>>> kernel before the call to xmit.
>>>>> Also for non GSO skb exceeding the SGE hw limit should we go for using
>>>>> skb_linearize only then or would you suggest some other approach here?
>>>>
>>>> I think that for non GSO, the linearization attempt is fine.
>>>>
>>>> Note that this is extremely unlikely for non malicious users,
>>>> and MTU being usually small (9K or less),
>>>> the allocation will be much smaller than a GSO packet.
>>>
>>> Okay. Will send a v2
>> Hi Eric,
>> I tested the code by disabling GSO in ndo_features_check when the number
>> of SGEs exceeds the hardware limit, using iperf for a single TCP
>> connection with zerocopy enabled. I noticed a significant difference in
>> throughput compared to when we linearize the skbs.
>> For reference, the throughput is 35.6 Gbits/sec when using
>> skb_linearize, but drops to 6.75 Gbits/sec when disabling GSO per skb.
>
> You must be doing something very wrong.
>
> Difference between TSO and non TSO should not be that high.
>
> ethtool -K eth0 tso on
> netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv Send Send
> Socket Socket Message Elapsed
> Size Size Size Time Throughput
> bytes bytes bytes secs. 10^6bits/sec
>
> 540000 262144 262144 10.00 92766.69
>
>
> ethtool -K eth0 tso off
> netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv Send Send
> Socket Socket Message Elapsed
> Size Size Size Time Throughput
> bytes bytes bytes secs. 10^6bits/sec
>
> 540000 262144 262144 10.00 52218.97
>
> Now if I force linearization, you can definitely see the very high
> cost of the copies !
>
> ethtool -K eth1 sg off
> tjbp26:/home/edumazet# ./netperf -H tjbp27
> MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to
> tjbp27.prod.google.com () port 0 AF_INET6
> Recv Send Send
> Socket Socket Message Elapsed
> Size Size Size Time Throughput
> bytes bytes bytes secs. 10^6bits/sec
>
> 540000 262144 262144 10.00 16951.32
>
>>
>> Hence, We propose to linearizing skbs until the first failure occurs.
>
> Hmm... basically hiding a bug then ?
>
>> After that, we switch to a fail-safe mode by disabling GSO for SKBs with
>> sge > hw limit using the ndo_feature_check implementation, while
>> continuing to apply skb_linearize() for non-GSO packets that exceed the
>> hardware limit. This ensures we remain on the optimal performance path
>> initially, and only transition to the fail-safe path after encountering
>> a failure.
>
> Please post your patch (adding the check in ndo_features_check()),
> perhaps one of us is able to help.
Okay Eric, I'll Post a v2 with RFC. Please let me know.
Regards,
Aditya
© 2016 - 2025 Red Hat, Inc.