[PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability

Erni Sri Satya Vennela posted 1 patch 2 weeks, 3 days ago
.../net/ethernet/microsoft/mana/gdma_main.c   |  6 +++++
.../net/ethernet/microsoft/mana/hw_channel.c  | 12 ++++++----
drivers/net/ethernet/microsoft/mana/mana_en.c | 23 ++++++++++++++-----
3 files changed, 30 insertions(+), 11 deletions(-)
[PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Erni Sri Satya Vennela 2 weeks, 3 days ago
Enhance MANA driver logging to provide better visibility into
hardware configuration and error states during driver initialization
and runtime operations.

Signed-off-by: Erni Sri Satya Vennela <ernis@linux.microsoft.com>
---
Changes in v2:
* Update commit message.
* Use "Enabled vPort ..." instead of "Configured vPort" in
  mana_cfg_vport.
* Add info log in mana_uncfg_vport, mana_gd_verify_vf_version,
  mana_gd_query_max_resources, mana_query_device_cfg and
  mana_query_vport_cfg.
---
 .../net/ethernet/microsoft/mana/gdma_main.c   |  6 +++++
 .../net/ethernet/microsoft/mana/hw_channel.c  | 12 ++++++----
 drivers/net/ethernet/microsoft/mana/mana_en.c | 23 ++++++++++++++-----
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 0055c231acf6..c7b65ddea651 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -152,6 +152,9 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev)
 	if (gc->max_num_queues > gc->num_msix_usable - 1)
 		gc->max_num_queues = gc->num_msix_usable - 1;
 
+	dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
+		 gc->num_msix_usable, gc->max_num_queues);
+
 	return 0;
 }
 
@@ -1229,6 +1232,9 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev)
 		}
 		dev_dbg(gc->dev, "set the hwc timeout to %u\n", hwc->hwc_timeout);
 	}
+
+	dev_info(gc->dev, "VF Version: protocol=0x%llx pf_caps=[0x%llx]\n",
+		 resp.gdma_protocol_ver, gc->pf_cap_flags1);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index aa4e2731e2ba..71a18c70ecaf 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -853,6 +853,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
 	struct hwc_caller_ctx *ctx;
 	u32 dest_vrcq = 0;
 	u32 dest_vrq = 0;
+	u32 command;
 	u16 msg_id;
 	int err;
 
@@ -877,6 +878,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
 
 	req_msg->req.hwc_msg_id = msg_id;
 
+	command = req_msg->req.msg_type;
 	tx_wr->msg_size = req_len;
 
 	if (gc->is_pf) {
@@ -893,8 +895,8 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
 	if (!wait_for_completion_timeout(&ctx->comp_event,
 					 (msecs_to_jiffies(hwc->hwc_timeout)))) {
 		if (hwc->hwc_timeout != 0)
-			dev_err(hwc->dev, "HWC: Request timed out: %u ms\n",
-				hwc->hwc_timeout);
+			dev_err(hwc->dev, "HWC: Request timed out: %u ms for command 0x%x\n",
+				hwc->hwc_timeout, command);
 
 		/* Reduce further waiting if HWC no response */
 		if (hwc->hwc_timeout > 1)
@@ -914,9 +916,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
 			err = -EOPNOTSUPP;
 			goto out;
 		}
-		if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT)
-			dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
-				ctx->status_code);
+		if (command != MANA_QUERY_PHY_STAT)
+			dev_err(hwc->dev, "hw_channel command 0x%x failed with status: 0x%x\n",
+				command, ctx->status_code);
 		err = -EPROTO;
 		goto out;
 	}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 91c418097284..09064f9706b8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1026,8 +1026,8 @@ static int mana_send_request(struct mana_context *ac, void *in_buf,
 
 		if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
 		    mana_need_log(gc, err))
-			dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
-				err, resp->status);
+			dev_err(dev, "Command 0x%x failed with status: 0x%x, err: %d\n",
+				req->req.msg_type, resp->status, err);
 		return err ? err : -EPROTO;
 	}
 
@@ -1222,6 +1222,9 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
 	else
 		*bm_hostmode = 0;
 
+	dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
+		 *max_num_vports, gc->adapter_mtu, *bm_hostmode);
+
 	debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu);
 
 	return 0;
@@ -1268,6 +1271,9 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
 	apc->port_handle = resp.vport;
 	ether_addr_copy(apc->mac_addr, resp.mac_addr);
 
+	netdev_info(apc->ndev, "VPort Config: vport=0x%llx max_sq=%u max_rq=%u indir_ent=%u MAC=%pM",
+		    apc->port_handle, *max_sq, *max_rq, *num_indir_entry, apc->mac_addr);
+
 	return 0;
 }
 
@@ -1277,6 +1283,9 @@ void mana_uncfg_vport(struct mana_port_context *apc)
 	apc->vport_use_count--;
 	WARN_ON(apc->vport_use_count < 0);
 	mutex_unlock(&apc->vport_mutex);
+
+	netdev_info(apc->ndev, "Disabled vPort %llu MAC %pM\n",
+		    apc->port_handle, apc->mac_addr);
 }
 EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA");
 
@@ -1340,8 +1349,8 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
 	apc->tx_shortform_allowed = resp.short_form_allowed;
 	apc->tx_vp_offset = resp.tx_vport_offset;
 
-	netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n",
-		    apc->port_handle, protection_dom_id, doorbell_pg_id);
+	netdev_info(apc->ndev, "Enabled vPort %llu PD %u DB %u MAC %pM\n",
+		    apc->port_handle, protection_dom_id, doorbell_pg_id, apc->mac_addr);
 out:
 	if (err)
 		mana_uncfg_vport(apc);
@@ -1412,8 +1421,10 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
 		err = -EPROTO;
 	}
 
-	netdev_info(ndev, "Configured steering vPort %llu entries %u\n",
-		    apc->port_handle, apc->indir_table_sz);
+	netdev_info(ndev,
+		    "Configured steering vPort %llu entries %u MAC %pM [rx:%u rss:%u update_indirection_table:%u cqe_coalescing:%u]\n",
+		    apc->port_handle, apc->indir_table_sz, apc->mac_addr,
+		    rx, apc->rss_state, update_tab, req->cqe_coalescing_enable);
 out:
 	kfree(req);
 	return err;
-- 
2.34.1
Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Jakub Kicinski 2 weeks, 2 days ago
On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:
> Enhance MANA driver logging to provide better visibility into
> hardware configuration and error states during driver initialization
> and runtime operations.

> +	dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
> +		 gc->num_msix_usable, gc->max_num_queues);

> +	dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
> +		 *max_num_vports, gc->adapter_mtu, *bm_hostmode);

IIUC in networking we try to follow the mantra that if the system is
functioning correctly there should be no logs. You can expose the debug
info via ethtool, devlink, debugfs etc. Take your pick.
-- 
pw-bot: cr
Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Erni Sri Satya Vennela 2 weeks, 1 day ago
On Wed, Jan 21, 2026 at 08:14:12PM -0800, Jakub Kicinski wrote:
> On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:
> > Enhance MANA driver logging to provide better visibility into
> > hardware configuration and error states during driver initialization
> > and runtime operations.
> 
> > +	dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
> > +		 gc->num_msix_usable, gc->max_num_queues);
> 
> > +	dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
> > +		 *max_num_vports, gc->adapter_mtu, *bm_hostmode);
> 
> IIUC in networking we try to follow the mantra that if the system is
> functioning correctly there should be no logs. You can expose the debug
> info via ethtool, devlink, debugfs etc. Take your pick.

We discussed this internally and noted that customers often cannot
reliably reproduce the VM issue. In such cases, the only evidence
available is the dmesg logs captured during the incident. Asking them to
re-enable debug options later is not practical, since the problem may
not occur again. Similarly, exposing the information via ethtool,
devlink, or debugfs is less effective because the data is transient and
lost after a reboot. As these messages are printed only once during
initialization, and not repeated during runtime or driver load/unload,
we decided to keep them at info level to aid troubleshooting without
adding noise.

- Vennela
Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Jakub Kicinski 2 weeks, 1 day ago
On Thu, 22 Jan 2026 09:43:42 -0800 Erni Sri Satya Vennela wrote:
> On Wed, Jan 21, 2026 at 08:14:12PM -0800, Jakub Kicinski wrote:
> > On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:  
> > > Enhance MANA driver logging to provide better visibility into
> > > hardware configuration and error states during driver initialization
> > > and runtime operations.  
> >   
> > > +	dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
> > > +		 gc->num_msix_usable, gc->max_num_queues);  
> >   
> > > +	dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
> > > +		 *max_num_vports, gc->adapter_mtu, *bm_hostmode);  
> > 
> > IIUC in networking we try to follow the mantra that if the system is
> > functioning correctly there should be no logs. You can expose the debug
> > info via ethtool, devlink, debugfs etc. Take your pick.  
> 
> We discussed this internally and noted that customers often cannot
> reliably reproduce the VM issue. In such cases, the only evidence
> available is the dmesg logs captured during the incident. Asking them to
> re-enable debug options later is not practical, since the problem may
> not occur again. Similarly, exposing the information via ethtool,
> devlink, or debugfs is less effective because the data is transient and
> lost after a reboot. As these messages are printed only once during
> initialization, and not repeated during runtime or driver load/unload,
> we decided to keep them at info level to aid troubleshooting without
> adding noise.

You will have to build proper support tooling like every single vendor
before you. Presumably you can also log from the hypervisor side which
makes your life so much easier than supporting real HW. Yet, real
NIC don't spew random trash to the logs all the time. SMH. Respectfully,
next time y'all "discuss things internally" start with the question of
what makes your case special :|
Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Leon Romanovsky 1 week, 4 days ago
On Thu, Jan 22, 2026 at 06:07:45PM -0800, Jakub Kicinski wrote:
> On Thu, 22 Jan 2026 09:43:42 -0800 Erni Sri Satya Vennela wrote:
> > On Wed, Jan 21, 2026 at 08:14:12PM -0800, Jakub Kicinski wrote:
> > > On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:  
> > > > Enhance MANA driver logging to provide better visibility into
> > > > hardware configuration and error states during driver initialization
> > > > and runtime operations.  
> > >   
> > > > +	dev_info(gc->dev, "Max Resources: msix_usable=%u max_queues=%u\n",
> > > > +		 gc->num_msix_usable, gc->max_num_queues);  
> > >   
> > > > +	dev_info(dev, "Device Config: max_vports=%u adapter_mtu=%u bm_hostmode=%u\n",
> > > > +		 *max_num_vports, gc->adapter_mtu, *bm_hostmode);  
> > > 
> > > IIUC in networking we try to follow the mantra that if the system is
> > > functioning correctly there should be no logs. You can expose the debug
> > > info via ethtool, devlink, debugfs etc. Take your pick.  
> > 
> > We discussed this internally and noted that customers often cannot
> > reliably reproduce the VM issue. In such cases, the only evidence
> > available is the dmesg logs captured during the incident. Asking them to
> > re-enable debug options later is not practical, since the problem may
> > not occur again. Similarly, exposing the information via ethtool,
> > devlink, or debugfs is less effective because the data is transient and
> > lost after a reboot. As these messages are printed only once during
> > initialization, and not repeated during runtime or driver load/unload,
> > we decided to keep them at info level to aid troubleshooting without
> > adding noise.
> 
> You will have to build proper support tooling like every single vendor
> before you. Presumably you can also log from the hypervisor side which
> makes your life so much easier than supporting real HW. Yet, real
> NIC don't spew random trash to the logs all the time. SMH. Respectfully,
> next time y'all "discuss things internally" start with the question of
> what makes your case special :|

+100

Interesting. Completely independent of your comment, I provided the same
feedback on their mana_ib driver. They added debug logs to nearly every
command, even though those commands already had existing debug logging.

https://lore.kernel.org/linux-rdma/20260122131442.GL13201@unreal/T/#m51e8a12f4bca4a6c1377c5531c8a6d94a43af1e5

"In order to simplify things for you: unless you can clearly justify why this
print is required and why you cannot proceed without it, I must ask you to stop
adding any new debug or error messages to the mana_ib driver. There is a wide
range of existing tools and well‑established practices for debugging the kernel,
and none of them require spamming dmesg."

Thanks
Re: [PATCH net-next v2] net: mana: Improve diagnostic logging for better debuggability
Posted by Erni Sri Satya Vennela 1 week ago
On Mon, Jan 26, 2026 at 09:58:50PM +0200, Leon Romanovsky wrote:
> On Thu, Jan 22, 2026 at 06:07:45PM -0800, Jakub Kicinski wrote:
> > On Thu, 22 Jan 2026 09:43:42 -0800 Erni Sri Satya Vennela wrote:
> > > On Wed, Jan 21, 2026 at 08:14:12PM -0800, Jakub Kicinski wrote:
> > > > On Tue, 20 Jan 2026 22:56:55 -0800 Erni Sri Satya Vennela wrote:  
> > 
> > You will have to build proper support tooling like every single vendor
> > before you. Presumably you can also log from the hypervisor side which
> > makes your life so much easier than supporting real HW. Yet, real
> > NIC don't spew random trash to the logs all the time. SMH. Respectfully,
> > next time y'all "discuss things internally" start with the question of
> > what makes your case special :|
> 
> +100
> 
> Interesting. Completely independent of your comment, I provided the same
> feedback on their mana_ib driver. They added debug logs to nearly every
> command, even though those commands already had existing debug logging.
> 
> https://lore.kernel.org/linux-rdma/20260122131442.GL13201@unreal/T/#m51e8a12f4bca4a6c1377c5531c8a6d94a43af1e5
> 
> "In order to simplify things for you: unless you can clearly justify why this
> print is required and why you cannot proceed without it, I must ask you to stop
> adding any new debug or error messages to the mana_ib driver. There is a wide
> range of existing tools and well‑established practices for debugging the kernel,
> and none of them require spamming dmesg."
> 
> Thanks

Hi Jakub, Leon,

We agree with the concerns pointed out by adding new lines of logging,
hence we are planning to get the soc logs required for debugging issues
from customers by modifying the existing logs itself and would not be
adding any new lines.

Old Logs:

mana 7870:00:00.0: Microsoft Azure Network Adapter protocol version:
0.1.1
mana 7870:00:00.0 enP30832s1: Configured vPort 0 PD 18 DB 16
mana 7870:00:00.0 enP30832s1: Configured steering vPort 0 entries 64

Modified logs:

Initialization:
mana 7870:00:00.0: Microsoft Azure Network Adapter protocol version:
0.1.1 Max Resources: msix_usable=33 max_queues=32 VF version:
protocol=0x0 pf_caps=[0x1d]

Module load:
mana 7870:00:00.0 enP30832s1: Enabled vPort 0 PD 18 DB 16 MAC
60:45:bd:7b:76:30 Vport Config: max_txq=32 max_rxq=32 indir_ent=64
Device Config: max_vports=1 adapter_mtu=9216 bm_hostmode=0
mana 7870:00:00.0 enP30832s1: Configured steering vPort 0 entries 64 MAC
60:45:bd:7b:76:30 [rx:1 rss:1 update_indirection_table:1
cqe_coalescing:0]

Module unload:
mana 7870:00:00.0 enP30832s1: Configured steering vPort 0 entries 64 MAC
60:45:bd:7b:76:30 [rx:1 rss:1 update_indirection_table:1
cqe_coalescing:0]
mana 7870:00:00.0 enP30832s1: Disabled vPort 0 MAC 60:45:bd:7b:76:30

We considered this approach because we wanted to support older kernels,
which the customers are using and it is an easier way to backport these
changes. Is this approach acceptable?