The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.
Also, Add ethtool counter for SKBs linearized
Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com>
---
Changes in v5:
* Drop skb_is_gso() check for disabling GSO in mana_features_check().
* Register .ndo_features_check conditionally to avoid unnecessary call.
Changes in v4:
* No change.
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 41 ++++++++++++++++++-
.../ethernet/microsoft/mana/mana_ethtool.c | 2 +
include/net/mana/gdma.h | 8 +++-
include/net/mana/mana.h | 1 +
4 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index cccd5b63cee6..d92069954fd9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -329,6 +330,22 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cq = &apc->tx_qp[txq_idx].tx_cq;
tx_stats = &txq->stats;
+ BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
+#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
+ if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* GSO skb with Hardware SGE limit exceeded is not expected here
+ * as they are handled in mana_features_check() callback
+ */
+ if (skb_linearize(skb)) {
+ netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
+ skb_shinfo(skb)->nr_frags,
+ skb_is_gso(skb));
+ goto tx_drop_count;
+ }
+ apc->eth_stats.linear_pkt_tx_cnt++;
+ }
+#endif
+
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -442,8 +459,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
@@ -518,6 +533,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+ struct net_device *ndev,
+ netdev_features_t features)
+{
+ if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+ /* Exceeds HW SGE limit.
+ * GSO case:
+ * Disable GSO so the stack will software-segment the skb
+ * into smaller skbs that fit the SGE budget.
+ * Non-GSO case:
+ * The xmit path will attempt skb_linearize() as a fallback.
+ */
+ features &= ~NETIF_F_GSO_MASK;
+ }
+ return features;
+}
+#endif
+
static void mana_get_stats64(struct net_device *ndev,
struct rtnl_link_stats64 *st)
{
@@ -878,6 +912,9 @@ static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
.ndo_select_queue = mana_select_queue,
+#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
+ .ndo_features_check = mana_features_check,
+#endif
.ndo_start_xmit = mana_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a1afa75a9463..fa5e1a2f06a9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -71,6 +71,8 @@ static const struct mana_stats_desc mana_eth_stats[] = {
{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
tx_cqe_unknown_type)},
+ {"linear_pkt_tx_cnt", offsetof(struct mana_ethtool_stats,
+ linear_pkt_tx_cnt)},
{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 637f42485dba..6dae78dc468f 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -489,6 +489,8 @@ struct gdma_wqe {
#define MAX_TX_WQE_SIZE 512
#define MAX_RX_WQE_SIZE 256
+#define MANA_MAX_TX_WQE_SGL_ENTRIES 30
+
#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \
sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \
sizeof(struct gdma_sge))
@@ -592,6 +594,9 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -601,7 +606,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
- GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
+ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 8906901535f5..50a532fb30d6 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -404,6 +404,7 @@ struct mana_ethtool_stats {
u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
+ u64 linear_pkt_tx_cnt;
u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
--
2.43.0
On Fri, 14 Nov 2025 13:16:42 -0800 Aditya Garg wrote:
> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
> per TX WQE. Exceeding this limit can cause TX failures.
> Add ndo_features_check() callback to validate SKB layout before
> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
> exceed the SGE limit.
> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
> +#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
> + if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
nit: please try to avoid the use of ifdef if you can. This helps to
avoid build breakage sneaking in as this code will be compiled out
on default config on all platforms.
Instead you should be able to simply add the static condition to the
if statement:
if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES &&
skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
and let the compiler (rather than preprocessor) eliminate this if ()
block.
> + /* GSO skb with Hardware SGE limit exceeded is not expected here
> + * as they are handled in mana_features_check() callback
> + */
> + if (skb_linearize(skb)) {
> + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n",
> + skb_shinfo(skb)->nr_frags,
> + skb_is_gso(skb));
> + goto tx_drop_count;
> + }
> + apc->eth_stats.linear_pkt_tx_cnt++;
> + }
> +#endif
--
pw-bot: cr
On 18-11-2025 09:16, Jakub Kicinski wrote:
> On Fri, 14 Nov 2025 13:16:42 -0800 Aditya Garg wrote:
>> The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
>> per TX WQE. Exceeding this limit can cause TX failures.
>> Add ndo_features_check() callback to validate SKB layout before
>> transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
>> NETIF_F_GSO_MASK to enforce software segmentation in the stack.
>> Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
>> exceed the SGE limit.
>
>> + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES);
>> +#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES)
>> + if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>
> nit: please try to avoid the use of ifdef if you can. This helps to
> avoid build breakage sneaking in as this code will be compiled out
> on default config on all platforms.
>
> Instead you should be able to simply add the static condition to the
> if statement:
>
> if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES &&
> skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
>
> and let the compiler (rather than preprocessor) eliminate this if ()
> block.
>
Thanks for review and explanation Jakub, I will incorporate this change
in next revision.
Regards,
Aditya
On Fri, Nov 14, 2025 at 1:19 PM Aditya Garg <gargaditya@linux.microsoft.com> wrote: > > The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs) > per TX WQE. Exceeding this limit can cause TX failures. > Add ndo_features_check() callback to validate SKB layout before > transmission. For GSO SKBs that would exceed the hardware SGE limit, clear > NETIF_F_GSO_MASK to enforce software segmentation in the stack. > Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still > exceed the SGE limit. > > Also, Add ethtool counter for SKBs linearized > > Co-developed-by: Dipayaan Roy <dipayanroy@linux.microsoft.com> > Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com> > Signed-off-by: Aditya Garg <gargaditya@linux.microsoft.com> Reviewed-by: Eric Dumazet <edumazet@google.com>
© 2016 - 2026 Red Hat, Inc.