From nobody Sat Apr 4 03:20:29 2026 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 3ED303A7F72; Fri, 20 Mar 2026 23:54:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=13.77.154.182 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774050876; cv=none; b=sRSs4xiApVtm+510viZ0S68GDwvMED+FTvE8paCxWi/nu0ZNPiDwg7lPJmaLc0wiq/HwFigbcTrxr36OGDKzKKXApXaWUzjlbkT9UKH5XwsQ8/STl/dEeIdo60Ro5RNddxKV516hsd+j8VflIRpinyCG0XrkWPGDV8KWIbvJhmQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774050876; c=relaxed/simple; bh=lmfHT09RF3SpS5yieKdXKXAbO1QBMotlrB/8Dx0epFo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=rw+7MwHXIOfvgNUmrh5bTVN7oXeXEqOHwpADxYSu6TfrnEdseIGWboc9ZnHybjSQZ5Qxc4YjnhanzglsHcYNigZxLtGJJgYXv5GN3cRMJm0Ai5Vi837X+BLvUmQiQKRbw/3bvVpUPErp6D4r+bGJDJ2vIWmKT5TuqUMdg/AFQX0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=microsoft.com; spf=pass smtp.mailfrom=linux.microsoft.com; arc=none smtp.client-ip=13.77.154.182 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=microsoft.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.microsoft.com Received: by linux.microsoft.com (Postfix, from userid 1202) id 33B0120B6F1F; Fri, 20 Mar 2026 16:54:35 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 33B0120B6F1F From: Long Li To: Long Li , Konstantin Taranov , Jakub Kicinski , "David S . Miller" , Paolo Abeni , Eric Dumazet , Andrew Lunn , Jason Gunthorpe , Leon Romanovsky , Haiyang Zhang , "K . Y . Srinivasan" , Wei Liu , Dexuan Cui Cc: Simon Horman , netdev@vger.kernel.org, linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH net-next v4 2/6] net: mana: Query device capabilities and configure MSI-X sharing for EQs Date: Fri, 20 Mar 2026 16:54:15 -0700 Message-ID: X-Mailer: git-send-email 2.43.7 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When querying the device, adjust the max number of queues to allow dedicated MSI-X vectors for each vPort. The number of queues per vPort is clamped to no less than MANA_DEF_NUM_QUEUES. MSI-X sharing among vPorts is disabled by default and is only enabled when there are not enough MSI-X vectors for dedicated allocation. Rename mana_query_device_cfg() to mana_gd_query_device_cfg() as it is used at GDMA device probe time for querying device capabilities. Signed-off-by: Long Li --- Changes in v4: - Use MANA_DEF_NUM_QUEUES instead of hardcoded 16 for max_num_queues clamping Changes in v2: - Fixed misleading comment for max_num_queues vs max_num_queues_vport in gdma.h --- .../net/ethernet/microsoft/mana/gdma_main.c | 66 ++++++++++++++++--- drivers/net/ethernet/microsoft/mana/mana_en.c | 36 +++++----- include/net/mana/gdma.h | 13 +++- 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/= ethernet/microsoft/mana/gdma_main.c index 2ba1fa3336f9..ae18b4054a02 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -124,6 +124,9 @@ static int mana_gd_query_max_resources(struct pci_dev *= pdev) struct gdma_context *gc =3D pci_get_drvdata(pdev); struct gdma_query_max_resources_resp resp =3D {}; struct gdma_general_req req =3D {}; + unsigned int max_num_queues; + u8 bm_hostmode; + u16 num_ports; int err; =20 mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, @@ -169,6 +172,40 @@ static int mana_gd_query_max_resources(struct pci_dev = *pdev) if (gc->max_num_queues > gc->num_msix_usable - 1) gc->max_num_queues =3D gc->num_msix_usable - 1; =20 + err =3D mana_gd_query_device_cfg(gc, MANA_MAJOR_VERSION, MANA_MINOR_VERSI= ON, + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); + if (err) + return err; + + if (!num_ports) + return -EINVAL; + + /* + * Adjust gc->max_num_queues returned from the SOC to allow dedicated + * MSIx for each vPort. Clamp to no less than MANA_DEF_NUM_QUEUES. + */ + max_num_queues =3D (gc->num_msix_usable - 1) / num_ports; + max_num_queues =3D roundup_pow_of_two(max(max_num_queues, 1U)); + if (max_num_queues < MANA_DEF_NUM_QUEUES) + max_num_queues =3D MANA_DEF_NUM_QUEUES; + + /* + * Use dedicated MSIx for EQs whenever possible, use MSIx sharing for + * Ethernet EQs when (max_num_queues * num_ports > num_msix_usable - 1) + */ + max_num_queues =3D min(gc->max_num_queues, max_num_queues); + if (max_num_queues * num_ports > gc->num_msix_usable - 1) + gc->msi_sharing =3D true; + + /* If MSI is shared, use max allowed value */ + if (gc->msi_sharing) + gc->max_num_queues_vport =3D min(gc->num_msix_usable - 1, gc->max_num_qu= eues); + else + gc->max_num_queues_vport =3D max_num_queues; + + dev_info(gc->dev, "MSI sharing mode %d max queues %d\n", + gc->msi_sharing, gc->max_num_queues); + return 0; } =20 @@ -1831,6 +1868,7 @@ static int mana_gd_setup_hwc_irqs(struct pci_dev *pde= v) /* Need 1 interrupt for HWC */ max_irqs =3D min(num_online_cpus(), MANA_MAX_NUM_QUEUES) + 1; min_irqs =3D 2; + gc->msi_sharing =3D true; } =20 nvec =3D pci_alloc_irq_vectors(pdev, min_irqs, max_irqs, PCI_IRQ_MSIX); @@ -1909,6 +1947,8 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev) =20 pci_free_irq_vectors(pdev); =20 + bitmap_free(gc->msi_bitmap); + gc->msi_bitmap =3D NULL; gc->max_num_msix =3D 0; gc->num_msix_usable =3D 0; } @@ -1943,20 +1983,30 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; =20 - err =3D mana_gd_query_max_resources(pdev); + err =3D mana_gd_detect_devices(pdev); if (err) goto destroy_hwc; =20 - err =3D mana_gd_setup_remaining_irqs(pdev); - if (err) { - dev_err(gc->dev, "Failed to setup remaining IRQs: %d", err); - goto destroy_hwc; - } - - err =3D mana_gd_detect_devices(pdev); + err =3D mana_gd_query_max_resources(pdev); if (err) goto destroy_hwc; =20 + if (!gc->msi_sharing) { + gc->msi_bitmap =3D bitmap_zalloc(gc->num_msix_usable, GFP_KERNEL); + if (!gc->msi_bitmap) { + err =3D -ENOMEM; + goto destroy_hwc; + } + /* Set bit for HWC */ + set_bit(0, gc->msi_bitmap); + } else { + err =3D mana_gd_setup_remaining_irqs(pdev); + if (err) { + dev_err(gc->dev, "Failed to setup remaining IRQs: %d", err); + goto destroy_hwc; + } + } + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; =20 diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/et= hernet/microsoft/mana/mana_en.c index 32f924d2a99b..87a444a6c297 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1000,10 +1000,9 @@ static int mana_init_port_context(struct mana_port_c= ontext *apc) return !apc->rxqs ? -ENOMEM : 0; } =20 -static int mana_send_request(struct mana_context *ac, void *in_buf, - u32 in_len, void *out_buf, u32 out_len) +static int gdma_mana_send_request(struct gdma_context *gc, void *in_buf, + u32 in_len, void *out_buf, u32 out_len) { - struct gdma_context *gc =3D ac->gdma_dev->gdma_context; struct gdma_resp_hdr *resp =3D out_buf; struct gdma_req_hdr *req =3D in_buf; struct device *dev =3D gc->dev; @@ -1037,6 +1036,14 @@ static int mana_send_request(struct mana_context *ac= , void *in_buf, return 0; } =20 +static int mana_send_request(struct mana_context *ac, void *in_buf, + u32 in_len, void *out_buf, u32 out_len) +{ + struct gdma_context *gc =3D ac->gdma_dev->gdma_context; + + return gdma_mana_send_request(gc, in_buf, in_len, out_buf, out_len); +} + static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, const enum mana_command_code expected_code, const u32 min_size) @@ -1170,11 +1177,10 @@ static void mana_pf_deregister_filter(struct mana_p= ort_context *apc) err, resp.hdr.status); } =20 -static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_= ver, - u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports, u8 *bm_hostmode) +int mana_gd_query_device_cfg(struct gdma_context *gc, u32 proto_major_ver, + u32 proto_minor_ver, u32 proto_micro_ver, + u16 *max_num_vports, u8 *bm_hostmode) { - struct gdma_context *gc =3D ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp =3D {}; struct mana_query_device_cfg_req req =3D {}; struct device *dev =3D gc->dev; @@ -1189,7 +1195,7 @@ static int mana_query_device_cfg(struct mana_context = *ac, u32 proto_major_ver, req.proto_minor_ver =3D proto_minor_ver; req.proto_micro_ver =3D proto_micro_ver; =20 - err =3D mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); + err =3D gdma_mana_send_request(gc, &req, sizeof(req), &resp, sizeof(resp)= ); if (err) { dev_err(dev, "Failed to query config: %d", err); return err; @@ -1217,8 +1223,6 @@ static int mana_query_device_cfg(struct mana_context = *ac, u32 proto_major_ver, else *bm_hostmode =3D 0; =20 - debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapte= r_mtu); - return 0; } =20 @@ -3373,7 +3377,7 @@ static int mana_probe_port(struct mana_context *ac, i= nt port_idx, int err; =20 ndev =3D alloc_etherdev_mq(sizeof(struct mana_port_context), - gc->max_num_queues); + gc->max_num_queues_vport); if (!ndev) return -ENOMEM; =20 @@ -3382,8 +3386,8 @@ static int mana_probe_port(struct mana_context *ac, i= nt port_idx, apc =3D netdev_priv(ndev); apc->ac =3D ac; apc->ndev =3D ndev; - apc->max_queues =3D gc->max_num_queues; - apc->num_queues =3D min(gc->max_num_queues, MANA_DEF_NUM_QUEUES); + apc->max_queues =3D gc->max_num_queues_vport; + apc->num_queues =3D min(gc->max_num_queues_vport, MANA_DEF_NUM_QUEUES); apc->tx_queue_size =3D DEF_TX_BUFFERS_PER_QUEUE; apc->rx_queue_size =3D DEF_RX_BUFFERS_PER_QUEUE; apc->port_handle =3D INVALID_MANA_HANDLE; @@ -3643,13 +3647,15 @@ int mana_probe(struct gdma_dev *gd, bool resuming) gd->driver_data =3D ac; } =20 - err =3D mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports, &bm_hostmode); + err =3D mana_gd_query_device_cfg(gc, MANA_MAJOR_VERSION, MANA_MINOR_VERSI= ON, + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; =20 ac->bm_hostmode =3D bm_hostmode; =20 + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapte= r_mtu); + if (!resuming) { ac->num_ports =3D num_ports; =20 diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 7fe3a1b61b2d..ecd9949df213 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -399,8 +399,10 @@ struct gdma_context { struct device *dev; struct dentry *mana_pci_debugfs; =20 - /* Per-vPort max number of queues */ + /* Hardware max number of queues */ unsigned int max_num_queues; + /* Per-vPort max number of queues */ + unsigned int max_num_queues_vport; unsigned int max_num_msix; unsigned int num_msix_usable; struct xarray irq_contexts; @@ -446,6 +448,12 @@ struct gdma_context { struct workqueue_struct *service_wq; =20 unsigned long flags; + + /* Indicate if this device is sharing MSI for EQs on MANA */ + bool msi_sharing; + + /* Bitmap tracks where MSI is allocated when it is not shared for EQs */ + unsigned long *msi_bitmap; }; =20 static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -1013,4 +1021,7 @@ int mana_gd_resume(struct pci_dev *pdev); =20 bool mana_need_log(struct gdma_context *gc, int err); =20 +int mana_gd_query_device_cfg(struct gdma_context *gc, u32 proto_major_ver, + u32 proto_minor_ver, u32 proto_micro_ver, + u16 *max_num_vports, u8 *bm_hostmode); #endif /* _GDMA_H */ --=20 2.43.0