[v7] Add driver for 1Gbe network chips from MUCSE

[PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Dong Yibo 1 month, 1 week ago

Initialize basic mbx_fw ops, such as get_capability, reset phy
and so on.

Signed-off-by: Dong Yibo <dong100@mucse.com>
---
 drivers/net/ethernet/mucse/rnpgbe/Makefile    |   3 +-
 drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h    |   1 +
 .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 333 ++++++++++++++++++
 .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 152 ++++++++
 4 files changed, 488 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
 create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h

diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile
index 5fc878ada4b1..de8bcb7772ab 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/Makefile
+++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile
@@ -7,4 +7,5 @@
 obj-$(CONFIG_MGBE) += rnpgbe.o
 rnpgbe-objs := rnpgbe_main.o\
 	       rnpgbe_chip.o\
-	       rnpgbe_mbx.o
+	       rnpgbe_mbx.o\
+	       rnpgbe_mbx_fw.o
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
index 67e28a4667e7..a32419a34d75 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
@@ -52,6 +52,7 @@ struct mucse_hw {
 	void __iomem *hw_addr;
 	struct pci_dev *pdev;
 	enum rnpgbe_hw_type hw_type;
+	u8 pfvfnum;
 	struct mucse_mbx_info mbx;
 };
 
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
new file mode 100644
index 000000000000..84570763cf79
--- /dev/null
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 - 2025 Mucse Corporation. */
+
+#include <linux/pci.h>
+#include <linux/if_ether.h>
+
+#include "rnpgbe.h"
+#include "rnpgbe_hw.h"
+#include "rnpgbe_mbx.h"
+#include "rnpgbe_mbx_fw.h"
+
+/**
+ * mucse_fw_send_cmd_wait - Send cmd req and wait for response
+ * @hw: pointer to the HW structure
+ * @req: pointer to the cmd req structure
+ * @reply: pointer to the fw reply structure
+ *
+ * mucse_fw_send_cmd_wait sends req to pf-fw mailbox and wait
+ * reply from fw.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_fw_send_cmd_wait(struct mucse_hw *hw,
+				  struct mbx_fw_cmd_req *req,
+				  struct mbx_fw_cmd_reply *reply)
+{
+	int len = le16_to_cpu(req->datalen);
+	int retry_cnt = 3;
+	int err;
+
+	err = mutex_lock_interruptible(&hw->mbx.lock);
+	if (err)
+		return err;
+	err = mucse_write_posted_mbx(hw, (u32 *)req, len);
+	if (err)
+		goto out;
+	do {
+		err = mucse_read_posted_mbx(hw, (u32 *)reply,
+					    sizeof(*reply));
+		if (err)
+			goto out;
+		/* mucse_write_posted_mbx return 0 means fw has
+		 * received request, wait for the expect opcode
+		 * reply with 'retry_cnt' times.
+		 */
+	} while (--retry_cnt >= 0 && reply->opcode != req->opcode);
+out:
+	mutex_unlock(&hw->mbx.lock);
+	if (!err && retry_cnt < 0)
+		return -ETIMEDOUT;
+	if (!err && reply->error_code)
+		return -EIO;
+	return err;
+}
+
+/**
+ * build_phy_abilities_req - build req with get_phy_ability opcode
+ * @req: pointer to the cmd req structure
+ **/
+static void build_phy_abilities_req(struct mbx_fw_cmd_req *req)
+{
+	req->flags = 0;
+	req->opcode = cpu_to_le16(GET_PHY_ABILITY);
+	req->datalen = cpu_to_le16(MBX_REQ_HDR_LEN);
+	req->reply_lo = 0;
+	req->reply_hi = 0;
+}
+
+/**
+ * mucse_fw_get_capability - Get hw abilities from fw
+ * @hw: pointer to the HW structure
+ * @abil: pointer to the hw_abilities structure
+ *
+ * mucse_fw_get_capability tries to get hw abilities from
+ * hw.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_fw_get_capability(struct mucse_hw *hw,
+				   struct hw_abilities *abil)
+{
+	struct mbx_fw_cmd_reply reply = {};
+	struct mbx_fw_cmd_req req = {};
+	int err;
+
+	build_phy_abilities_req(&req);
+	err = mucse_fw_send_cmd_wait(hw, &req, &reply);
+	if (!err)
+		memcpy(abil, &reply.hw_abilities, sizeof(*abil));
+	return err;
+}
+
+/**
+ * mucse_mbx_get_capability - Get hw abilities from fw
+ * @hw: pointer to the HW structure
+ *
+ * mucse_mbx_get_capability tries to get capabities from
+ * hw. Many retrys will do if it is failed.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_get_capability(struct mucse_hw *hw)
+{
+	struct hw_abilities ability = {};
+	int try_cnt = 3;
+	int err = -EIO;
+
+	while (try_cnt--) {
+		err = mucse_fw_get_capability(hw, &ability);
+		if (err)
+			continue;
+		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
+		return 0;
+	}
+	return err;
+}
+
+/**
+ * mbx_cookie_zalloc - Alloc a cookie structure
+ * @priv_len: private length for this cookie
+ *
+ * @return: cookie structure on success
+ **/
+static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len)
+{
+	struct mbx_req_cookie *cookie;
+
+	cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL);
+	if (cookie) {
+		cookie->timeout_jiffies = 30 * HZ;
+		cookie->magic = COOKIE_MAGIC;
+		cookie->priv_len = priv_len;
+	}
+	return cookie;
+}
+
+/**
+ * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
+ * @hw: pointer to the HW structure
+ * @req: pointer to the cmd req structure
+ * @cookie: pointer to the req cookie
+ *
+ * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
+ * reply. cookie->wait will be set in irq handler.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
+				 struct mbx_fw_cmd_req *req,
+				 struct mbx_req_cookie *cookie)
+{
+	int len = le16_to_cpu(req->datalen);
+	int err;
+
+	cookie->errcode = 0;
+	cookie->done = 0;
+	init_waitqueue_head(&cookie->wait);
+	err = mutex_lock_interruptible(&hw->mbx.lock);
+	if (err)
+		return err;
+	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
+	if (err)
+		goto out;
+	/* if write succeeds, we must wait for firmware response or
+	 * timeout to avoid using the already freed cookie->wait
+	 */
+	err = wait_event_timeout(cookie->wait,
+				 cookie->done == 1,
+				 cookie->timeout_jiffies);
+
+	if (!err)
+		err = -ETIMEDOUT;
+	else
+		err = 0;
+	if (!err && cookie->errcode)
+		err = cookie->errcode;
+out:
+	mutex_unlock(&hw->mbx.lock);
+	return err;
+}
+
+/**
+ * build_ifinsmod - build req with insmod opcode
+ * @req: pointer to the cmd req structure
+ * @status: true for insmod, false for rmmod
+ **/
+static void build_ifinsmod(struct mbx_fw_cmd_req *req,
+			   int status)
+{
+	req->flags = 0;
+	req->opcode = cpu_to_le16(DRIVER_INSMOD);
+	req->datalen = cpu_to_le16(sizeof(req->ifinsmod) +
+				   MBX_REQ_HDR_LEN);
+	req->cookie = NULL;
+	req->reply_lo = 0;
+	req->reply_hi = 0;
+#define FIXED_VERSION 0xFFFFFFFF
+	req->ifinsmod.version = cpu_to_le32(FIXED_VERSION);
+	req->ifinsmod.status = cpu_to_le32(status);
+}
+
+/**
+ * mucse_mbx_ifinsmod - Echo driver insmod status to hw
+ * @hw: pointer to the HW structure
+ * @status: true for insmod, false for rmmod
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status)
+{
+	struct mbx_fw_cmd_req req = {};
+	int len;
+	int err;
+
+	build_ifinsmod(&req, status);
+	len = le16_to_cpu(req.datalen);
+	err = mutex_lock_interruptible(&hw->mbx.lock);
+	if (err)
+		return err;
+
+	if (status) {
+		err = mucse_write_posted_mbx(hw, (u32 *)&req,
+					     len);
+	} else {
+		err = mucse_write_mbx_pf(hw, (u32 *)&req,
+					 len);
+	}
+
+	mutex_unlock(&hw->mbx.lock);
+	return err;
+}
+
+/**
+ * build_reset_phy_req - build req with reset_phy opcode
+ * @req: pointer to the cmd req structure
+ * @cookie: pointer of cookie for this cmd
+ **/
+static void build_reset_phy_req(struct mbx_fw_cmd_req *req,
+				void *cookie)
+{
+	req->flags = 0;
+	req->opcode = cpu_to_le16(RESET_PHY);
+	req->datalen = cpu_to_le16(MBX_REQ_HDR_LEN);
+	req->reply_lo = 0;
+	req->reply_hi = 0;
+	req->cookie = cookie;
+}
+
+/**
+ * mucse_mbx_fw_reset_phy - Posts a mbx req to reset hw
+ * @hw: pointer to the HW structure
+ *
+ * mucse_mbx_fw_reset_phy posts a mbx req to firmware to reset hw.
+ * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
+ * if other irq is registered.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_fw_reset_phy(struct mucse_hw *hw)
+{
+	struct mbx_fw_cmd_reply reply = {};
+	struct mbx_fw_cmd_req req = {};
+	int ret;
+
+	if (hw->mbx.irq_enabled) {
+		struct mbx_req_cookie *cookie = mbx_cookie_zalloc(0);
+
+		if (!cookie)
+			return -ENOMEM;
+
+		build_reset_phy_req(&req, cookie);
+		ret = mucse_mbx_fw_post_req(hw, &req, cookie);
+		kfree(cookie);
+		return ret;
+	}
+
+	build_reset_phy_req(&req, &req);
+	return mucse_fw_send_cmd_wait(hw, &req, &reply);
+}
+
+/**
+ * build_get_macaddress_req - build req with get_mac opcode
+ * @req: pointer to the cmd req structure
+ * @port_mask: port valid for this cmd
+ * @pfvfnum: pfvfnum for this cmd
+ * @cookie: pointer of cookie for this cmd
+ **/
+static void build_get_macaddress_req(struct mbx_fw_cmd_req *req,
+				     int port_mask, int pfvfnum,
+				     void *cookie)
+{
+	req->flags = 0;
+	req->opcode = cpu_to_le16(GET_MAC_ADDRES);
+	req->datalen = cpu_to_le16(sizeof(req->get_mac_addr) +
+				   MBX_REQ_HDR_LEN);
+	req->cookie = cookie;
+	req->reply_lo = 0;
+	req->reply_hi = 0;
+	req->get_mac_addr.port_mask = cpu_to_le32(port_mask);
+	req->get_mac_addr.pfvf_num = cpu_to_le32(pfvfnum);
+}
+
+/**
+ * mucse_fw_get_macaddr - Posts a mbx req to request macaddr
+ * @hw: pointer to the HW structure
+ * @pfvfnum: index of pf/vf num
+ * @mac_addr: pointer to store mac_addr
+ * @port: port index
+ *
+ * mucse_fw_get_macaddr posts a mbx req to firmware to get mac_addr.
+ * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
+ * if other irq is registered.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
+			 u8 *mac_addr,
+			 int port)
+{
+	struct mbx_fw_cmd_reply reply = {};
+	struct mbx_fw_cmd_req req = {};
+	int err;
+
+	build_get_macaddress_req(&req, BIT(port), pfvfnum, &req);
+	err = mucse_fw_send_cmd_wait(hw, &req, &reply);
+	if (err)
+		return err;
+	if (le32_to_cpu(reply.mac_addr.ports) & BIT(port))
+		memcpy(mac_addr, reply.mac_addr.addrs[port].mac, ETH_ALEN);
+	else
+		return -ENODATA;
+	return 0;
+}
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
new file mode 100644
index 000000000000..b73238d0e848
--- /dev/null
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 - 2025 Mucse Corporation. */
+
+#ifndef _RNPGBE_MBX_FW_H
+#define _RNPGBE_MBX_FW_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+
+#include "rnpgbe.h"
+
+#define MBX_REQ_HDR_LEN 24
+
+struct mbx_fw_cmd_reply;
+typedef void (*cookie_cb)(struct mbx_fw_cmd_reply *reply, void *priv);
+
+struct mbx_req_cookie {
+	int magic;
+#define COOKIE_MAGIC 0xCE
+	cookie_cb cb;
+	int timeout_jiffies;
+	int errcode;
+	wait_queue_head_t wait;
+	int done;
+	int priv_len;
+	char priv[] __counted_by(priv_len);
+};
+
+enum MUCSE_FW_CMD {
+	GET_PHY_ABILITY = 0x0601,
+	GET_MAC_ADDRES = 0x0602,
+	RESET_PHY = 0x0603,
+	DRIVER_INSMOD = 0x0803,
+};
+
+struct hw_abilities {
+	u8 link_stat;
+	u8 port_mask;
+	__le32 speed;
+	__le16 phy_type;
+	__le16 nic_mode;
+	__le16 pfnum;
+	__le32 fw_version;
+	__le32 axi_mhz;
+	union {
+		u8 port_id[4];
+		__le32 port_ids;
+	};
+	__le32 bd_uid;
+	__le32 phy_id;
+	__le32 wol_status;
+	union {
+		__le32 ext_ability;
+		struct {
+			u32 valid : 1;
+			u32 wol_en : 1;
+			u32 pci_preset_runtime_en : 1;
+			u32 smbus_en : 1;
+			u32 ncsi_en : 1;
+			u32 rpu_en : 1;
+			u32 v2 : 1;
+			u32 pxe_en : 1;
+			u32 mctp_en : 1;
+			u32 yt8614 : 1;
+			u32 pci_ext_reset : 1;
+			u32 rpu_availble : 1;
+			u32 fw_lldp_ability : 1;
+			u32 lldp_enabled : 1;
+			u32 only_1g : 1;
+			u32 force_down_en: 1;
+		} e_host;
+	};
+} __packed;
+
+/* FW stores extended ability information in 'ext_ability' as a 32-bit
+ * little-endian value. To make these flags easily accessible in the
+ * kernel (via named 'bitfields' instead of raw bitmask operations),
+ * we use the union's 'e_host' struct, which provides named bits
+ * (e.g., 'wol_en', 'smbus_en')
+ */
+static inline void ability_update_host_endian(struct hw_abilities *abi)
+{
+	u32 host_val = le32_to_cpu(abi->ext_ability);
+
+	abi->e_host = *(typeof(abi->e_host) *)&host_val;
+}
+
+#define FLAGS_DD BIT(0)
+#define FLAGS_ERR BIT(2)
+
+struct mbx_fw_cmd_req {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 ret_value;
+	union {
+		struct {
+			__le32 cookie_lo;
+			__le32 cookie_hi;
+		};
+
+		void *cookie;
+	};
+	__le32 reply_lo;
+	__le32 reply_hi;
+	union {
+		u8 data[32];
+		struct {
+			__le32 version;
+			__le32 status;
+		} ifinsmod;
+		struct {
+			__le32 port_mask;
+			__le32 pfvf_num;
+		} get_mac_addr;
+	};
+} __packed;
+
+struct mbx_fw_cmd_reply {
+	__le16 flags;
+	__le16 opcode;
+	__le16 error_code;
+	__le16 datalen;
+	union {
+		struct {
+			__le32 cookie_lo;
+			__le32 cookie_hi;
+		};
+		void *cookie;
+	};
+	union {
+		u8 data[40];
+		struct mac_addr {
+			__le32 ports;
+			struct _addr {
+				/* for macaddr:01:02:03:04:05:06
+				 * mac-hi=0x01020304 mac-lo=0x05060000
+				 */
+				u8 mac[8];
+			} addrs[4];
+		} mac_addr;
+		struct hw_abilities hw_abilities;
+	};
+} __packed;
+
+int mucse_mbx_get_capability(struct mucse_hw *hw);
+int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status);
+int mucse_mbx_fw_reset_phy(struct mucse_hw *hw);
+int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
+			 u8 *mac_addr, int port);
+#endif /* _RNPGBE_MBX_FW_H */
-- 
2.25.1

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Vadim Fedorenko 1 month, 1 week ago

On 22/08/2025 03:34, Dong Yibo wrote:

[...]
> +/**
> + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> + * @hw: pointer to the HW structure
> + * @req: pointer to the cmd req structure
> + * @cookie: pointer to the req cookie
> + *
> + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> + * reply. cookie->wait will be set in irq handler.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> +				 struct mbx_fw_cmd_req *req,
> +				 struct mbx_req_cookie *cookie)
> +{
> +	int len = le16_to_cpu(req->datalen);
> +	int err;
> +
> +	cookie->errcode = 0;
> +	cookie->done = 0;
> +	init_waitqueue_head(&cookie->wait);
> +	err = mutex_lock_interruptible(&hw->mbx.lock);
> +	if (err)
> +		return err;
> +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> +	if (err)
> +		goto out;
> +	/* if write succeeds, we must wait for firmware response or
> +	 * timeout to avoid using the already freed cookie->wait
> +	 */
> +	err = wait_event_timeout(cookie->wait,
> +				 cookie->done == 1,
> +				 cookie->timeout_jiffies);

it's unclear to me, what part of the code is managing values of cookie
structure? I didn't get the reason why are you putting the address of
cookie structure into request which is then directly passed to the FW.
Is the FW supposed to change values in cookie?

> +
> +	if (!err)
> +		err = -ETIMEDOUT;
> +	else
> +		err = 0;
> +	if (!err && cookie->errcode)
> +		err = cookie->errcode;
> +out:
> +	mutex_unlock(&hw->mbx.lock);
> +	return err;
> +}

[...]

> +struct mbx_fw_cmd_req {
> +	__le16 flags;
> +	__le16 opcode;
> +	__le16 datalen;
> +	__le16 ret_value;
> +	union {
> +		struct {
> +			__le32 cookie_lo;
> +			__le32 cookie_hi;
> +		};
> +
> +		void *cookie;
> +	};
> +	__le32 reply_lo;
> +	__le32 reply_hi;

what do these 2 fields mean? are you going to provide reply's buffer
address directly to FW?

> +	union {
> +		u8 data[32];
> +		struct {
> +			__le32 version;
> +			__le32 status;
> +		} ifinsmod;
> +		struct {
> +			__le32 port_mask;
> +			__le32 pfvf_num;
> +		} get_mac_addr;
> +	};
> +} __packed;
> +
> +struct mbx_fw_cmd_reply {
> +	__le16 flags;
> +	__le16 opcode;
> +	__le16 error_code;
> +	__le16 datalen;
> +	union {
> +		struct {
> +			__le32 cookie_lo;
> +			__le32 cookie_hi;
> +		};
> +		void *cookie;
> +	};

This part looks like the request, apart from datalen and error_code are
swapped in the header. And it actually means that the FW will put back
the address of provided cookie into reply, right? If yes, then it
doesn't look correct at all...

> +	union {
> +		u8 data[40];
> +		struct mac_addr {
> +			__le32 ports;
> +			struct _addr {
> +				/* for macaddr:01:02:03:04:05:06
> +				 * mac-hi=0x01020304 mac-lo=0x05060000
> +				 */
> +				u8 mac[8];
> +			} addrs[4];
> +		} mac_addr;
> +		struct hw_abilities hw_abilities;
> +	};
> +} __packed;

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Mon, Aug 25, 2025 at 05:37:27PM +0100, Vadim Fedorenko wrote:
> On 22/08/2025 03:34, Dong Yibo wrote:
> 
> [...]
> > +/**
> > + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> > + * @hw: pointer to the HW structure
> > + * @req: pointer to the cmd req structure
> > + * @cookie: pointer to the req cookie
> > + *
> > + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> > + * reply. cookie->wait will be set in irq handler.
> > + *
> > + * @return: 0 on success, negative on failure
> > + **/
> > +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> > +				 struct mbx_fw_cmd_req *req,
> > +				 struct mbx_req_cookie *cookie)
> > +{
> > +	int len = le16_to_cpu(req->datalen);
> > +	int err;
> > +
> > +	cookie->errcode = 0;
> > +	cookie->done = 0;
> > +	init_waitqueue_head(&cookie->wait);
> > +	err = mutex_lock_interruptible(&hw->mbx.lock);
> > +	if (err)
> > +		return err;
> > +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> > +	if (err)
> > +		goto out;
> > +	/* if write succeeds, we must wait for firmware response or
> > +	 * timeout to avoid using the already freed cookie->wait
> > +	 */
> > +	err = wait_event_timeout(cookie->wait,
> > +				 cookie->done == 1,
> > +				 cookie->timeout_jiffies);
> 
> it's unclear to me, what part of the code is managing values of cookie
> structure? I didn't get the reason why are you putting the address of
> cookie structure into request which is then directly passed to the FW.
> Is the FW supposed to change values in cookie?
> 

cookie will be used in an irq-handler. like this:
static int rnpgbe_mbx_fw_reply_handler(struct mucse *mucse,
                                       struct mbx_fw_cmd_reply *reply)
{
        struct mbx_req_cookie *cookie;

        cookie = reply->cookie;

        if (cookie->priv_len > 0)
                memcpy(cookie->priv, reply->data, cookie->priv_len);
        cookie->done = 1;
        if (le16_to_cpu(reply->flags) & FLAGS_ERR)
                cookie->errcode = -EIO;
        else
                cookie->errcode = 0;
        wake_up(&cookie->wait);
        return 0;
}
That is why we must wait for firmware response.
But irq is not added in this patch series. Maybe I should move all
cookie relative codes to the patch will add irq?

> > +
> > +	if (!err)
> > +		err = -ETIMEDOUT;
> > +	else
> > +		err = 0;
> > +	if (!err && cookie->errcode)
> > +		err = cookie->errcode;
> > +out:
> > +	mutex_unlock(&hw->mbx.lock);
> > +	return err;
> > +}
> 
> [...]
> 
> > +struct mbx_fw_cmd_req {
> > +	__le16 flags;
> > +	__le16 opcode;
> > +	__le16 datalen;
> > +	__le16 ret_value;
> > +	union {
> > +		struct {
> > +			__le32 cookie_lo;
> > +			__le32 cookie_hi;
> > +		};
> > +
> > +		void *cookie;
> > +	};
> > +	__le32 reply_lo;
> > +	__le32 reply_hi;
> 
> what do these 2 fields mean? are you going to provide reply's buffer
> address directly to FW?
> 

No, this is defined by fw. Some fw can access physical address.
But I don't use it in this driver.

> > +	union {
> > +		u8 data[32];
> > +		struct {
> > +			__le32 version;
> > +			__le32 status;
> > +		} ifinsmod;
> > +		struct {
> > +			__le32 port_mask;
> > +			__le32 pfvf_num;
> > +		} get_mac_addr;
> > +	};
> > +} __packed;
> > +
> > +struct mbx_fw_cmd_reply {
> > +	__le16 flags;
> > +	__le16 opcode;
> > +	__le16 error_code;
> > +	__le16 datalen;
> > +	union {
> > +		struct {
> > +			__le32 cookie_lo;
> > +			__le32 cookie_hi;
> > +		};
> > +		void *cookie;
> > +	};
> 
> This part looks like the request, apart from datalen and error_code are
> swapped in the header. And it actually means that the FW will put back
> the address of provided cookie into reply, right? If yes, then it
> doesn't look correct at all...
> 

It is yes. cookie is used in irq handler as show above.
Sorry, I didn't understand 'the not correct' point?

> > +	union {
> > +		u8 data[40];
> > +		struct mac_addr {
> > +			__le32 ports;
> > +			struct _addr {
> > +				/* for macaddr:01:02:03:04:05:06
> > +				 * mac-hi=0x01020304 mac-lo=0x05060000
> > +				 */
> > +				u8 mac[8];
> > +			} addrs[4];
> > +		} mac_addr;
> > +		struct hw_abilities hw_abilities;
> > +	};
> > +} __packed;
>

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Vadim Fedorenko 1 month, 1 week ago

On 26/08/2025 02:31, Yibo Dong wrote:
> On Mon, Aug 25, 2025 at 05:37:27PM +0100, Vadim Fedorenko wrote:
>> On 22/08/2025 03:34, Dong Yibo wrote:
>>
>> [...]
>>> +/**
>>> + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
>>> + * @hw: pointer to the HW structure
>>> + * @req: pointer to the cmd req structure
>>> + * @cookie: pointer to the req cookie
>>> + *
>>> + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
>>> + * reply. cookie->wait will be set in irq handler.
>>> + *
>>> + * @return: 0 on success, negative on failure
>>> + **/
>>> +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
>>> +				 struct mbx_fw_cmd_req *req,
>>> +				 struct mbx_req_cookie *cookie)
>>> +{
>>> +	int len = le16_to_cpu(req->datalen);
>>> +	int err;
>>> +
>>> +	cookie->errcode = 0;
>>> +	cookie->done = 0;
>>> +	init_waitqueue_head(&cookie->wait);
>>> +	err = mutex_lock_interruptible(&hw->mbx.lock);
>>> +	if (err)
>>> +		return err;
>>> +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
>>> +	if (err)
>>> +		goto out;
>>> +	/* if write succeeds, we must wait for firmware response or
>>> +	 * timeout to avoid using the already freed cookie->wait
>>> +	 */
>>> +	err = wait_event_timeout(cookie->wait,
>>> +				 cookie->done == 1,
>>> +				 cookie->timeout_jiffies);
>>
>> it's unclear to me, what part of the code is managing values of cookie
>> structure? I didn't get the reason why are you putting the address of
>> cookie structure into request which is then directly passed to the FW.
>> Is the FW supposed to change values in cookie?
>>
> 
> cookie will be used in an irq-handler. like this:
> static int rnpgbe_mbx_fw_reply_handler(struct mucse *mucse,
>                                         struct mbx_fw_cmd_reply *reply)
> {
>          struct mbx_req_cookie *cookie;
> 
>          cookie = reply->cookie;
> 
>          if (cookie->priv_len > 0)
>                  memcpy(cookie->priv, reply->data, cookie->priv_len);
>          cookie->done = 1;
>          if (le16_to_cpu(reply->flags) & FLAGS_ERR)
>                  cookie->errcode = -EIO;
>          else
>                  cookie->errcode = 0;
>          wake_up(&cookie->wait);
>          return 0;
> }
> That is why we must wait for firmware response.
> But irq is not added in this patch series. Maybe I should move all
> cookie relative codes to the patch will add irq?

well, yes, in general it's better to introduce the code as a solid
solution. this way it's much easier to review

> 
>>> +
>>> +	if (!err)
>>> +		err = -ETIMEDOUT;
>>> +	else
>>> +		err = 0;
>>> +	if (!err && cookie->errcode)
>>> +		err = cookie->errcode;
>>> +out:
>>> +	mutex_unlock(&hw->mbx.lock);
>>> +	return err;
>>> +}
>>
>> [...]
>>
>>> +struct mbx_fw_cmd_req {
>>> +	__le16 flags;
>>> +	__le16 opcode;
>>> +	__le16 datalen;
>>> +	__le16 ret_value;
>>> +	union {
>>> +		struct {
>>> +			__le32 cookie_lo;
>>> +			__le32 cookie_hi;
>>> +		};
>>> +
>>> +		void *cookie;
>>> +	};
>>> +	__le32 reply_lo;
>>> +	__le32 reply_hi;
>>
>> what do these 2 fields mean? are you going to provide reply's buffer
>> address directly to FW?
>>
> 
> No, this is defined by fw. Some fw can access physical address.
> But I don't use it in this driver.

FW can access physical address without previously configuring IOMMU?
How can that be?

> 
>>> +	union {
>>> +		u8 data[32];
>>> +		struct {
>>> +			__le32 version;
>>> +			__le32 status;
>>> +		} ifinsmod;
>>> +		struct {
>>> +			__le32 port_mask;
>>> +			__le32 pfvf_num;
>>> +		} get_mac_addr;
>>> +	};
>>> +} __packed;
>>> +
>>> +struct mbx_fw_cmd_reply {
>>> +	__le16 flags;
>>> +	__le16 opcode;
>>> +	__le16 error_code;
>>> +	__le16 datalen;
>>> +	union {
>>> +		struct {
>>> +			__le32 cookie_lo;
>>> +			__le32 cookie_hi;
>>> +		};
>>> +		void *cookie;
>>> +	};
>>
>> This part looks like the request, apart from datalen and error_code are
>> swapped in the header. And it actually means that the FW will put back
>> the address of provided cookie into reply, right? If yes, then it
>> doesn't look correct at all...
>>
> 
> It is yes. cookie is used in irq handler as show above.
> Sorry, I didn't understand 'the not correct' point?

The example above showed that the irq handler uses some value received
from the device as a pointer to kernel memory. That's not safe, you
cannot be sure that provided value is valid pointer, and that it points
to previously allocated cookie structure. It is a clear way to corrupt
memory.

> 
>>> +	union {
>>> +		u8 data[40];
>>> +		struct mac_addr {
>>> +			__le32 ports;
>>> +			struct _addr {
>>> +				/* for macaddr:01:02:03:04:05:06
>>> +				 * mac-hi=0x01020304 mac-lo=0x05060000
>>> +				 */
>>> +				u8 mac[8];
>>> +			} addrs[4];
>>> +		} mac_addr;
>>> +		struct hw_abilities hw_abilities;
>>> +	};
>>> +} __packed;
>>

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Tue, Aug 26, 2025 at 11:14:19AM +0100, Vadim Fedorenko wrote:
> On 26/08/2025 02:31, Yibo Dong wrote:
> > On Mon, Aug 25, 2025 at 05:37:27PM +0100, Vadim Fedorenko wrote:
> > > On 22/08/2025 03:34, Dong Yibo wrote:
> > > 
> > > [...]
> > > > +/**
> > > > + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> > > > + * @hw: pointer to the HW structure
> > > > + * @req: pointer to the cmd req structure
> > > > + * @cookie: pointer to the req cookie
> > > > + *
> > > > + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> > > > + * reply. cookie->wait will be set in irq handler.
> > > > + *
> > > > + * @return: 0 on success, negative on failure
> > > > + **/
> > > > +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> > > > +				 struct mbx_fw_cmd_req *req,
> > > > +				 struct mbx_req_cookie *cookie)
> > > > +{
> > > > +	int len = le16_to_cpu(req->datalen);
> > > > +	int err;
> > > > +
> > > > +	cookie->errcode = 0;
> > > > +	cookie->done = 0;
> > > > +	init_waitqueue_head(&cookie->wait);
> > > > +	err = mutex_lock_interruptible(&hw->mbx.lock);
> > > > +	if (err)
> > > > +		return err;
> > > > +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> > > > +	if (err)
> > > > +		goto out;
> > > > +	/* if write succeeds, we must wait for firmware response or
> > > > +	 * timeout to avoid using the already freed cookie->wait
> > > > +	 */
> > > > +	err = wait_event_timeout(cookie->wait,
> > > > +				 cookie->done == 1,
> > > > +				 cookie->timeout_jiffies);
> > > 
> > > it's unclear to me, what part of the code is managing values of cookie
> > > structure? I didn't get the reason why are you putting the address of
> > > cookie structure into request which is then directly passed to the FW.
> > > Is the FW supposed to change values in cookie?
> > > 
> > 
> > cookie will be used in an irq-handler. like this:
> > static int rnpgbe_mbx_fw_reply_handler(struct mucse *mucse,
> >                                         struct mbx_fw_cmd_reply *reply)
> > {
> >          struct mbx_req_cookie *cookie;
> > 
> >          cookie = reply->cookie;
> > 
> >          if (cookie->priv_len > 0)
> >                  memcpy(cookie->priv, reply->data, cookie->priv_len);
> >          cookie->done = 1;
> >          if (le16_to_cpu(reply->flags) & FLAGS_ERR)
> >                  cookie->errcode = -EIO;
> >          else
> >                  cookie->errcode = 0;
> >          wake_up(&cookie->wait);
> >          return 0;
> > }
> > That is why we must wait for firmware response.
> > But irq is not added in this patch series. Maybe I should move all
> > cookie relative codes to the patch will add irq?
> 
> well, yes, in general it's better to introduce the code as a solid
> solution. this way it's much easier to review
> 

Ok, I will remove it in this series and add later.

> > 
> > > > +
> > > > +	if (!err)
> > > > +		err = -ETIMEDOUT;
> > > > +	else
> > > > +		err = 0;
> > > > +	if (!err && cookie->errcode)
> > > > +		err = cookie->errcode;
> > > > +out:
> > > > +	mutex_unlock(&hw->mbx.lock);
> > > > +	return err;
> > > > +}
> > > 
> > > [...]
> > > 
> > > > +struct mbx_fw_cmd_req {
> > > > +	__le16 flags;
> > > > +	__le16 opcode;
> > > > +	__le16 datalen;
> > > > +	__le16 ret_value;
> > > > +	union {
> > > > +		struct {
> > > > +			__le32 cookie_lo;
> > > > +			__le32 cookie_hi;
> > > > +		};
> > > > +
> > > > +		void *cookie;
> > > > +	};
> > > > +	__le32 reply_lo;
> > > > +	__le32 reply_hi;
> > > 
> > > what do these 2 fields mean? are you going to provide reply's buffer
> > > address directly to FW?
> > > 
> > 
> > No, this is defined by fw. Some fw can access physical address.
> > But I don't use it in this driver.
> 
> FW can access physical address without previously configuring IOMMU?
> How can that be?
> 

memory is allocated by dma_alloc_coherent, and get physical address.
Then fw use it.

> > 
> > > > +	union {
> > > > +		u8 data[32];
> > > > +		struct {
> > > > +			__le32 version;
> > > > +			__le32 status;
> > > > +		} ifinsmod;
> > > > +		struct {
> > > > +			__le32 port_mask;
> > > > +			__le32 pfvf_num;
> > > > +		} get_mac_addr;
> > > > +	};
> > > > +} __packed;
> > > > +
> > > > +struct mbx_fw_cmd_reply {
> > > > +	__le16 flags;
> > > > +	__le16 opcode;
> > > > +	__le16 error_code;
> > > > +	__le16 datalen;
> > > > +	union {
> > > > +		struct {
> > > > +			__le32 cookie_lo;
> > > > +			__le32 cookie_hi;
> > > > +		};
> > > > +		void *cookie;
> > > > +	};
> > > 
> > > This part looks like the request, apart from datalen and error_code are
> > > swapped in the header. And it actually means that the FW will put back
> > > the address of provided cookie into reply, right? If yes, then it
> > > doesn't look correct at all...
> > > 
> > 
> > It is yes. cookie is used in irq handler as show above.
> > Sorry, I didn't understand 'the not correct' point?
> 
> The example above showed that the irq handler uses some value received
> from the device as a pointer to kernel memory. That's not safe, you
> cannot be sure that provided value is valid pointer, and that it points
> to previously allocated cookie structure. It is a clear way to corrupt
> memory.
> 

Yes. It is not safe, so I 'must wait_event_timeout before free cookie'....
But is there a safe way to do it?
Maybe:
->allocate cookie
  -> map it to an unique id
    ->set the id to req->cookie
      ->receive response and check id valid? Then access cookie?
Please give me some advice... 

> > 
> > > > +	union {
> > > > +		u8 data[40];
> > > > +		struct mac_addr {
> > > > +			__le32 ports;
> > > > +			struct _addr {
> > > > +				/* for macaddr:01:02:03:04:05:06
> > > > +				 * mac-hi=0x01020304 mac-lo=0x05060000
> > > > +				 */
> > > > +				u8 mac[8];
> > > > +			} addrs[4];
> > > > +		} mac_addr;
> > > > +		struct hw_abilities hw_abilities;
> > > > +	};
> > > > +} __packed;
> > > 
> 
>

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Andrew Lunn 1 month, 1 week ago

> Yes. It is not safe, so I 'must wait_event_timeout before free cookie'....
> But is there a safe way to do it?
> Maybe:
> ->allocate cookie
>   -> map it to an unique id
>     ->set the id to req->cookie
>       ->receive response and check id valid? Then access cookie?

This is part of why adding cookies in a separate patch with a good
commit message is important.

Please take a step back. What is the big picture? Why do you need a
cookie? What is it used for? If you describe what your requirements
are, we might be able to suggest a better solution, or point you at a
driver you can copy code from.

	Andrew

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Tue, Aug 26, 2025 at 02:39:07PM +0200, Andrew Lunn wrote:
> > Yes. It is not safe, so I 'must wait_event_timeout before free cookie'....
> > But is there a safe way to do it?
> > Maybe:
> > ->allocate cookie
> >   -> map it to an unique id
> >     ->set the id to req->cookie
> >       ->receive response and check id valid? Then access cookie?
> 
> This is part of why adding cookies in a separate patch with a good
> commit message is important.
> 
> Please take a step back. What is the big picture? Why do you need a
> cookie? What is it used for? If you describe what your requirements
> are, we might be able to suggest a better solution, or point you at a
> driver you can copy code from.
> 
> 	Andrew
> 

I try to explain the it:

driver-->fw, we has two types request:
1. without response, such as mucse_mbx_ifinsmod
2. with response, such as mucse_fw_get_macaddr

fw --> driver, we has one types request:
1. link status (link speed, duplex, pause status...)

fw tiggers irq when it sends response or request.
In order to handle link status timely, we do an irqhandle like this:

static int rnpgbe_rcv_msg_from_fw(struct mucse *mucse)
{
        u32 msgbuf[MUCSE_FW_MAILBOX_WORDS];
        struct mucse_hw *hw = &mucse->hw;
        struct mbx_fw_cmd_reply *reply;
        int retval;
	/* read mbx data out */
        retval = mucse_read_mbx(hw, msgbuf, MUCSE_FW_MAILBOX_WORDS);
        if (retval)
                return retval;

        reply = (struct mbx_fw_cmd_reply *)msgbuf;
	/* judge request or response */
        if (le16_to_cpu(reply->flags) & FLAGS_DD) {
		/* if it is a response, call wake_up(cookie) */
                return rnpgbe_mbx_fw_reply_handler(mucse,
                                (struct mbx_fw_cmd_reply *)msgbuf);
        } else {
		/* if it is a request, handle link status */
                return rnpgbe_mbx_fw_req_handler(mucse,
                                (struct mbx_fw_cmd_req *)msgbuf);
        }
}

And driver requests with response is bellow 'without' irqhandle:

static int mucse_fw_send_cmd_wait(struct mucse_hw *hw,
				  struct mbx_fw_cmd_req *req,
				  struct mbx_fw_cmd_reply *reply)
{
...
	mucse_write_posted_mbx(hw, (u32 *)req, len);

	...
	/* but as irqhandle be added, mbx data is read out in the
	 * handler, mucse_read_posted_mbx cannot read anything */
	mucse_read_posted_mbx(hw, (u32 *)reply, sizeof(*reply));

}

To solve mucse_read_posted_mbx cannot read data with irq, we add 'cookie'.
After mucse_write_posted_mbx, call wait_event_timeout. wake_up is called
in irqhandle.

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Andrew Lunn 1 month, 1 week ago

> I try to explain the it:
> 
> driver-->fw, we has two types request:
> 1. without response, such as mucse_mbx_ifinsmod
> 2. with response, such as mucse_fw_get_macaddr
> 
> fw --> driver, we has one types request:
> 1. link status (link speed, duplex, pause status...)

Is the firmware multi threaded? By that, i mean can there be two
request/responses going on at once?

I'm assuming not.

So there appears to be four use cases:

1) Fire and forget, request without response.
2) Request with a response
3) Link state change from the firmware
4) Race condition: Request/response and link state change at the same time.

Again, assuming the firmware is single threaded, there must be a big
mutex around the message box so there can only be one thread doing any
sort of interaction with the firmware.

Since there can only be one thread waiting for the response, the
struct completion can be a member of the message box. The thread
waiting for a response uses wait_for_completion(mbx->completion).

The interrupt handler can look at the type of message it got from the
firmware. If it is a link state, process it, and exit. If it is
anything else, complete(mbx->completion) and exit.

I don't see the need for any sort of cookie.

  Andrew

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month ago

On Wed, Aug 27, 2025 at 09:54:58PM +0200, Andrew Lunn wrote:
> > I try to explain the it:
> > 
> > driver-->fw, we has two types request:
> > 1. without response, such as mucse_mbx_ifinsmod
> > 2. with response, such as mucse_fw_get_macaddr
> > 
> > fw --> driver, we has one types request:
> > 1. link status (link speed, duplex, pause status...)
> 
> Is the firmware multi threaded? By that, i mean can there be two
> request/responses going on at once?
> 
> I'm assuming not.

No, fw is single threaded.

> 
> So there appears to be four use cases:
> 
> 1) Fire and forget, request without response.
> 2) Request with a response
> 3) Link state change from the firmware
> 4) Race condition: Request/response and link state change at the same time.
> 
> Again, assuming the firmware is single threaded, there must be a big
> mutex around the message box so there can only be one thread doing any
> sort of interaction with the firmware.
> 
> Since there can only be one thread waiting for the response, the
> struct completion can be a member of the message box. The thread
> waiting for a response uses wait_for_completion(mbx->completion).
> 
> The interrupt handler can look at the type of message it got from the
> firmware. If it is a link state, process it, and exit. If it is
> anything else, complete(mbx->completion) and exit.
> 
> I don't see the need for any sort of cookie.

Got it, I will try in patch which adds irq handler in the future.

> 
>   Andrew
> 

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Vadim Fedorenko 1 month, 1 week ago

On 22/08/2025 03:34, Dong Yibo wrote:
> Initialize basic mbx_fw ops, such as get_capability, reset phy
> and so on.
> 
> Signed-off-by: Dong Yibo <dong100@mucse.com>

[...]

> +/**
> + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> + * @hw: pointer to the HW structure
> + * @req: pointer to the cmd req structure
> + * @cookie: pointer to the req cookie
> + *
> + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> + * reply. cookie->wait will be set in irq handler.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> +				 struct mbx_fw_cmd_req *req,
> +				 struct mbx_req_cookie *cookie)
> +{
> +	int len = le16_to_cpu(req->datalen);
> +	int err;
> +
> +	cookie->errcode = 0;
> +	cookie->done = 0;
> +	init_waitqueue_head(&cookie->wait);
> +	err = mutex_lock_interruptible(&hw->mbx.lock);
> +	if (err)
> +		return err;
> +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> +	if (err)
> +		goto out;
> +	/* if write succeeds, we must wait for firmware response or
> +	 * timeout to avoid using the already freed cookie->wait
> +	 */
> +	err = wait_event_timeout(cookie->wait,
> +				 cookie->done == 1,
> +				 cookie->timeout_jiffies);
> +
> +	if (!err)
> +		err = -ETIMEDOUT;
> +	else
> +		err = 0;
> +	if (!err && cookie->errcode)
> +		err = cookie->errcode;

can cookie->errcode be non 0 if FW times out?


looks like this can be simplified to

if(!wait_event_timeout())
   err = -ETIMEDOUT
else
   err = cookie->errcode

> +out:
> +	mutex_unlock(&hw->mbx.lock);
> +	return err;
> +}
> +
> +/**
> + * build_ifinsmod - build req with insmod opcode
> + * @req: pointer to the cmd req structure
> + * @status: true for insmod, false for rmmod

naming is misleading here, I believe.. no strong feeling, but
is_insmod might be better

> + **/
> +static void build_ifinsmod(struct mbx_fw_cmd_req *req,
> +			   int status)
> +{
> +	req->flags = 0;
> +	req->opcode = cpu_to_le16(DRIVER_INSMOD);
> +	req->datalen = cpu_to_le16(sizeof(req->ifinsmod) +
> +				   MBX_REQ_HDR_LEN);
> +	req->cookie = NULL;
> +	req->reply_lo = 0;
> +	req->reply_hi = 0;
> +#define FIXED_VERSION 0xFFFFFFFF
> +	req->ifinsmod.version = cpu_to_le32(FIXED_VERSION);
> +	req->ifinsmod.status = cpu_to_le32(status);
> +}
> +
> +/**
> + * mucse_mbx_ifinsmod - Echo driver insmod status to hw
> + * @hw: pointer to the HW structure
> + * @status: true for insmod, false for rmmod

here as well

> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status)
> +{
> +	struct mbx_fw_cmd_req req = {};
> +	int len;
> +	int err;
> +
> +	build_ifinsmod(&req, status);
> +	len = le16_to_cpu(req.datalen);
> +	err = mutex_lock_interruptible(&hw->mbx.lock);
> +	if (err)
> +		return err;
> +
> +	if (status) {
> +		err = mucse_write_posted_mbx(hw, (u32 *)&req,
> +					     len);
> +	} else {
> +		err = mucse_write_mbx_pf(hw, (u32 *)&req,
> +					 len);
> +	}
> +
> +	mutex_unlock(&hw->mbx.lock);
> +	return err;
> +}

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Sat, Aug 23, 2025 at 04:02:29PM +0100, Vadim Fedorenko wrote:
> On 22/08/2025 03:34, Dong Yibo wrote:
> > Initialize basic mbx_fw ops, such as get_capability, reset phy
> > and so on.
> > 
> > Signed-off-by: Dong Yibo <dong100@mucse.com>
> 
> [...]
> 
> > +/**
> > + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> > + * @hw: pointer to the HW structure
> > + * @req: pointer to the cmd req structure
> > + * @cookie: pointer to the req cookie
> > + *
> > + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> > + * reply. cookie->wait will be set in irq handler.
> > + *
> > + * @return: 0 on success, negative on failure
> > + **/
> > +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> > +				 struct mbx_fw_cmd_req *req,
> > +				 struct mbx_req_cookie *cookie)
> > +{
> > +	int len = le16_to_cpu(req->datalen);
> > +	int err;
> > +
> > +	cookie->errcode = 0;
> > +	cookie->done = 0;
> > +	init_waitqueue_head(&cookie->wait);
> > +	err = mutex_lock_interruptible(&hw->mbx.lock);
> > +	if (err)
> > +		return err;
> > +	err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> > +	if (err)
> > +		goto out;
> > +	/* if write succeeds, we must wait for firmware response or
> > +	 * timeout to avoid using the already freed cookie->wait
> > +	 */
> > +	err = wait_event_timeout(cookie->wait,
> > +				 cookie->done == 1,
> > +				 cookie->timeout_jiffies);
> > +
> > +	if (!err)
> > +		err = -ETIMEDOUT;
> > +	else
> > +		err = 0;
> > +	if (!err && cookie->errcode)
> > +		err = cookie->errcode;
> 
> can cookie->errcode be non 0 if FW times out?
> 

cookie is alloced by kzalloc, if fw timeout, nochange for it.
So cookie->errcode is 0 if FW times out.

> 
> looks like this can be simplified to
> 
> if(!wait_event_timeout())
>   err = -ETIMEDOUT
> else
>   err = cookie->errcode
> 

Got it, I will update it.

> > +out:
> > +	mutex_unlock(&hw->mbx.lock);
> > +	return err;
> > +}
> > +
> > +/**
> > + * build_ifinsmod - build req with insmod opcode
> > + * @req: pointer to the cmd req structure
> > + * @status: true for insmod, false for rmmod
> 
> naming is misleading here, I believe.. no strong feeling, but
> is_insmod might be better
> 

I see, I will fix it.

> > + **/
> > +static void build_ifinsmod(struct mbx_fw_cmd_req *req,
> > +			   int status)
> > +{
> > +	req->flags = 0;
> > +	req->opcode = cpu_to_le16(DRIVER_INSMOD);
> > +	req->datalen = cpu_to_le16(sizeof(req->ifinsmod) +
> > +				   MBX_REQ_HDR_LEN);
> > +	req->cookie = NULL;
> > +	req->reply_lo = 0;
> > +	req->reply_hi = 0;
> > +#define FIXED_VERSION 0xFFFFFFFF
> > +	req->ifinsmod.version = cpu_to_le32(FIXED_VERSION);
> > +	req->ifinsmod.status = cpu_to_le32(status);
> > +}
> > +
> > +/**
> > + * mucse_mbx_ifinsmod - Echo driver insmod status to hw
> > + * @hw: pointer to the HW structure
> > + * @status: true for insmod, false for rmmod
> 
> here as well
> 

Got it.

> > + *
> > + * @return: 0 on success, negative on failure
> > + **/
> > +int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status)
> > +{
> > +	struct mbx_fw_cmd_req req = {};
> > +	int len;
> > +	int err;
> > +
> > +	build_ifinsmod(&req, status);
> > +	len = le16_to_cpu(req.datalen);
> > +	err = mutex_lock_interruptible(&hw->mbx.lock);
> > +	if (err)
> > +		return err;
> > +
> > +	if (status) {
> > +		err = mucse_write_posted_mbx(hw, (u32 *)&req,
> > +					     len);
> > +	} else {
> > +		err = mucse_write_mbx_pf(hw, (u32 *)&req,
> > +					 len);
> > +	}
> > +
> > +	mutex_unlock(&hw->mbx.lock);
> > +	return err;
> > +}
> 

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Andrew Lunn 1 month, 1 week ago

> +/**
> + * mucse_mbx_get_capability - Get hw abilities from fw
> + * @hw: pointer to the HW structure
> + *
> + * mucse_mbx_get_capability tries to get capabities from
> + * hw. Many retrys will do if it is failed.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_mbx_get_capability(struct mucse_hw *hw)
> +{
> +	struct hw_abilities ability = {};
> +	int try_cnt = 3;
> +	int err = -EIO;
> +
> +	while (try_cnt--) {
> +		err = mucse_fw_get_capability(hw, &ability);
> +		if (err)
> +			continue;
> +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> +		return 0;
> +	}
> +	return err;
> +}

Please could you add an explanation why it would fail? Is this to do
with getting the driver and firmware in sync? Maybe you should make
this explicit, add a function mucse_mbx_sync() with a comment that
this is used once during probe to synchronise communication with the
firmware. You can then remove this loop here.

I would also differentiate between different error codes. It is
pointless to try again with ENOMEM, EINVAL, etc. These are real errors
which should be reported. However TIMEDOUT might makes sense to
retry.

	Andrew

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Fri, Aug 22, 2025 at 04:43:16PM +0200, Andrew Lunn wrote:
> > +/**
> > + * mucse_mbx_get_capability - Get hw abilities from fw
> > + * @hw: pointer to the HW structure
> > + *
> > + * mucse_mbx_get_capability tries to get capabities from
> > + * hw. Many retrys will do if it is failed.
> > + *
> > + * @return: 0 on success, negative on failure
> > + **/
> > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > +{
> > +	struct hw_abilities ability = {};
> > +	int try_cnt = 3;
> > +	int err = -EIO;
> > +
> > +	while (try_cnt--) {
> > +		err = mucse_fw_get_capability(hw, &ability);
> > +		if (err)
> > +			continue;
> > +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > +		return 0;
> > +	}
> > +	return err;
> > +}
> 
> Please could you add an explanation why it would fail? Is this to do
> with getting the driver and firmware in sync? Maybe you should make
> this explicit, add a function mucse_mbx_sync() with a comment that
> this is used once during probe to synchronise communication with the
> firmware. You can then remove this loop here.

It is just get some fw capability(or info such as fw version).
It is failed maybe:
1. -EIO: return by mucse_obtain_mbx_lock_pf. The function tries to get
pf-fw lock(in chip register, not driver), failed when fw hold the lock.
2. -ETIMEDOUT: return by mucse_poll_for_xx. Failed when timeout.
3. -ETIMEDOUT: return by mucse_fw_send_cmd_wait. Failed when wait
response timeout.
4. -EIO: return by mucse_fw_send_cmd_wait. Failed when error_code in
response.
5. err return by mutex_lock_interruptible.

> 
> I would also differentiate between different error codes. It is
> pointless to try again with ENOMEM, EINVAL, etc. These are real errors
> which should be reported. However TIMEDOUT might makes sense to
> retry.
> 
> 	Andrew
> 

Yes, I didn't differentiate between different error codes. But it cost
~0 to ask firmware again. And error will be reported after 'try_cnt' times
retry to the function caller.
Maybe can simply handle error codes link this?

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Andrew Lunn 1 month, 1 week ago

On Sat, Aug 23, 2025 at 09:58:24AM +0800, Yibo Dong wrote:
> On Fri, Aug 22, 2025 at 04:43:16PM +0200, Andrew Lunn wrote:
> > > +/**
> > > + * mucse_mbx_get_capability - Get hw abilities from fw
> > > + * @hw: pointer to the HW structure
> > > + *
> > > + * mucse_mbx_get_capability tries to get capabities from
> > > + * hw. Many retrys will do if it is failed.
> > > + *
> > > + * @return: 0 on success, negative on failure
> > > + **/
> > > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > > +{
> > > +	struct hw_abilities ability = {};
> > > +	int try_cnt = 3;
> > > +	int err = -EIO;
> > > +
> > > +	while (try_cnt--) {
> > > +		err = mucse_fw_get_capability(hw, &ability);
> > > +		if (err)
> > > +			continue;
> > > +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > > +		return 0;
> > > +	}
> > > +	return err;
> > > +}
> > 
> > Please could you add an explanation why it would fail? Is this to do
> > with getting the driver and firmware in sync? Maybe you should make
> > this explicit, add a function mucse_mbx_sync() with a comment that
> > this is used once during probe to synchronise communication with the
> > firmware. You can then remove this loop here.
> 
> It is just get some fw capability(or info such as fw version).
> It is failed maybe:
> 1. -EIO: return by mucse_obtain_mbx_lock_pf. The function tries to get
> pf-fw lock(in chip register, not driver), failed when fw hold the lock.

If it cannot get the lock, isn't that fatal? You cannot do anything
without the lock.

> 2. -ETIMEDOUT: return by mucse_poll_for_xx. Failed when timeout.
> 3. -ETIMEDOUT: return by mucse_fw_send_cmd_wait. Failed when wait
> response timeout.

If its dead, its dead. Why would it suddenly start responding?

> 4. -EIO: return by mucse_fw_send_cmd_wait. Failed when error_code in
> response.

Which should be fatal. No retries necessary.

> 5. err return by mutex_lock_interruptible.

So you want the user to have to ^C three times?

And is mucse_mbx_get_capability() special, or will all interactions
with the firmware have three retries?

	Andrew

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Sat, Aug 23, 2025 at 05:17:45PM +0200, Andrew Lunn wrote:
> On Sat, Aug 23, 2025 at 09:58:24AM +0800, Yibo Dong wrote:
> > On Fri, Aug 22, 2025 at 04:43:16PM +0200, Andrew Lunn wrote:
> > > > +/**
> > > > + * mucse_mbx_get_capability - Get hw abilities from fw
> > > > + * @hw: pointer to the HW structure
> > > > + *
> > > > + * mucse_mbx_get_capability tries to get capabities from
> > > > + * hw. Many retrys will do if it is failed.
> > > > + *
> > > > + * @return: 0 on success, negative on failure
> > > > + **/
> > > > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > > > +{
> > > > +	struct hw_abilities ability = {};
> > > > +	int try_cnt = 3;
> > > > +	int err = -EIO;
> > > > +
> > > > +	while (try_cnt--) {
> > > > +		err = mucse_fw_get_capability(hw, &ability);
> > > > +		if (err)
> > > > +			continue;
> > > > +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > > > +		return 0;
> > > > +	}
> > > > +	return err;
> > > > +}
> > > 
> > > Please could you add an explanation why it would fail? Is this to do
> > > with getting the driver and firmware in sync? Maybe you should make
> > > this explicit, add a function mucse_mbx_sync() with a comment that
> > > this is used once during probe to synchronise communication with the
> > > firmware. You can then remove this loop here.
> > 
> > It is just get some fw capability(or info such as fw version).
> > It is failed maybe:
> > 1. -EIO: return by mucse_obtain_mbx_lock_pf. The function tries to get
> > pf-fw lock(in chip register, not driver), failed when fw hold the lock.
> 
> If it cannot get the lock, isn't that fatal? You cannot do anything
> without the lock.
> 
> > 2. -ETIMEDOUT: return by mucse_poll_for_xx. Failed when timeout.
> > 3. -ETIMEDOUT: return by mucse_fw_send_cmd_wait. Failed when wait
> > response timeout.
> 
> If its dead, its dead. Why would it suddenly start responding?
> 
> > 4. -EIO: return by mucse_fw_send_cmd_wait. Failed when error_code in
> > response.
> 
> Which should be fatal. No retries necessary.
> 
> > 5. err return by mutex_lock_interruptible.
> 
> So you want the user to have to ^C three times?
> 
> And is mucse_mbx_get_capability() special, or will all interactions
> with the firmware have three retries?

It is the first 'cmd with response' from fw when probe. If it failed,
return err and nothing else todo (no registe netdev ...). So, we design
to give retry for it.
fatal with no retry, maybe like this? 

int mucse_mbx_get_capability(struct mucse_hw *hw)
{
        struct hw_abilities ability = {};
        int try_cnt = 3;
        int err;

        do {
                err = mucse_fw_get_capability(hw, &ability);
                if (err == -ETIMEDOUT)
                        continue;

		break;
        } while(try_cnt--);

	if (!err)
		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
        return err;
}

> 
> 	Andrew
> 

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Andrew Lunn 1 month, 1 week ago

On Sun, Aug 24, 2025 at 12:10:52PM +0800, Yibo Dong wrote:
> On Sat, Aug 23, 2025 at 05:17:45PM +0200, Andrew Lunn wrote:
> > On Sat, Aug 23, 2025 at 09:58:24AM +0800, Yibo Dong wrote:
> > > On Fri, Aug 22, 2025 at 04:43:16PM +0200, Andrew Lunn wrote:
> > > > > +/**
> > > > > + * mucse_mbx_get_capability - Get hw abilities from fw
> > > > > + * @hw: pointer to the HW structure
> > > > > + *
> > > > > + * mucse_mbx_get_capability tries to get capabities from
> > > > > + * hw. Many retrys will do if it is failed.
> > > > > + *
> > > > > + * @return: 0 on success, negative on failure
> > > > > + **/
> > > > > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > > > > +{
> > > > > +	struct hw_abilities ability = {};
> > > > > +	int try_cnt = 3;
> > > > > +	int err = -EIO;
> > > > > +
> > > > > +	while (try_cnt--) {
> > > > > +		err = mucse_fw_get_capability(hw, &ability);
> > > > > +		if (err)
> > > > > +			continue;
> > > > > +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > > > > +		return 0;
> > > > > +	}
> > > > > +	return err;
> > > > > +}
> > > > 
> > > > Please could you add an explanation why it would fail? Is this to do
> > > > with getting the driver and firmware in sync? Maybe you should make
> > > > this explicit, add a function mucse_mbx_sync() with a comment that
> > > > this is used once during probe to synchronise communication with the
> > > > firmware. You can then remove this loop here.
> > > 
> > > It is just get some fw capability(or info such as fw version).
> > > It is failed maybe:
> > > 1. -EIO: return by mucse_obtain_mbx_lock_pf. The function tries to get
> > > pf-fw lock(in chip register, not driver), failed when fw hold the lock.
> > 
> > If it cannot get the lock, isn't that fatal? You cannot do anything
> > without the lock.
> > 
> > > 2. -ETIMEDOUT: return by mucse_poll_for_xx. Failed when timeout.
> > > 3. -ETIMEDOUT: return by mucse_fw_send_cmd_wait. Failed when wait
> > > response timeout.
> > 
> > If its dead, its dead. Why would it suddenly start responding?
> > 
> > > 4. -EIO: return by mucse_fw_send_cmd_wait. Failed when error_code in
> > > response.
> > 
> > Which should be fatal. No retries necessary.
> > 
> > > 5. err return by mutex_lock_interruptible.
> > 
> > So you want the user to have to ^C three times?
> > 
> > And is mucse_mbx_get_capability() special, or will all interactions
> > with the firmware have three retries?
> 

> It is the first 'cmd with response' from fw when probe. If it failed,
> return err and nothing else todo (no registe netdev ...). So, we design
> to give retry for it.
> fatal with no retry, maybe like this? 
 
Quoting myself:

> > > > Is this to do
> > > > with getting the driver and firmware in sync? Maybe you should make
> > > > this explicit, add a function mucse_mbx_sync() with a comment that
> > > > this is used once during probe to synchronise communication with the
> > > > firmware. You can then remove this loop here.

Does the firmware offer a NOP command? Or one to get the firmware
version?  If you are trying to get the driver and firmware in sync, it
make sense to use an operation which is low value and won't be used
anywhere else.

	Andrew

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Sun, Aug 24, 2025 at 05:15:25PM +0200, Andrew Lunn wrote:
> On Sun, Aug 24, 2025 at 12:10:52PM +0800, Yibo Dong wrote:
> > On Sat, Aug 23, 2025 at 05:17:45PM +0200, Andrew Lunn wrote:
> > > On Sat, Aug 23, 2025 at 09:58:24AM +0800, Yibo Dong wrote:
> > > > On Fri, Aug 22, 2025 at 04:43:16PM +0200, Andrew Lunn wrote:
> > > > > > +/**
> > > > > > + * mucse_mbx_get_capability - Get hw abilities from fw
> > > > > > + * @hw: pointer to the HW structure
> > > > > > + *
> > > > > > + * mucse_mbx_get_capability tries to get capabities from
> > > > > > + * hw. Many retrys will do if it is failed.
> > > > > > + *
> > > > > > + * @return: 0 on success, negative on failure
> > > > > > + **/
> > > > > > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > > > > > +{
> > > > > > +	struct hw_abilities ability = {};
> > > > > > +	int try_cnt = 3;
> > > > > > +	int err = -EIO;
> > > > > > +
> > > > > > +	while (try_cnt--) {
> > > > > > +		err = mucse_fw_get_capability(hw, &ability);
> > > > > > +		if (err)
> > > > > > +			continue;
> > > > > > +		hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > > > > > +		return 0;
> > > > > > +	}
> > > > > > +	return err;
> > > > > > +}
> > > > > 
> > > > > Please could you add an explanation why it would fail? Is this to do
> > > > > with getting the driver and firmware in sync? Maybe you should make
> > > > > this explicit, add a function mucse_mbx_sync() with a comment that
> > > > > this is used once during probe to synchronise communication with the
> > > > > firmware. You can then remove this loop here.
> > > > 
> > > > It is just get some fw capability(or info such as fw version).
> > > > It is failed maybe:
> > > > 1. -EIO: return by mucse_obtain_mbx_lock_pf. The function tries to get
> > > > pf-fw lock(in chip register, not driver), failed when fw hold the lock.
> > > 
> > > If it cannot get the lock, isn't that fatal? You cannot do anything
> > > without the lock.
> > > 
> > > > 2. -ETIMEDOUT: return by mucse_poll_for_xx. Failed when timeout.
> > > > 3. -ETIMEDOUT: return by mucse_fw_send_cmd_wait. Failed when wait
> > > > response timeout.
> > > 
> > > If its dead, its dead. Why would it suddenly start responding?
> > > 
> > > > 4. -EIO: return by mucse_fw_send_cmd_wait. Failed when error_code in
> > > > response.
> > > 
> > > Which should be fatal. No retries necessary.
> > > 
> > > > 5. err return by mutex_lock_interruptible.
> > > 
> > > So you want the user to have to ^C three times?
> > > 
> > > And is mucse_mbx_get_capability() special, or will all interactions
> > > with the firmware have three retries?
> > 
> 
> > It is the first 'cmd with response' from fw when probe. If it failed,
> > return err and nothing else todo (no registe netdev ...). So, we design
> > to give retry for it.
> > fatal with no retry, maybe like this? 
>  
> Quoting myself:
> 
> > > > > Is this to do
> > > > > with getting the driver and firmware in sync? Maybe you should make
> > > > > this explicit, add a function mucse_mbx_sync() with a comment that
> > > > > this is used once during probe to synchronise communication with the
> > > > > firmware. You can then remove this loop here.

'mucse_mbx_get_capability' is used once during probe in fact, and won't be
used anywhere.

> 
> Does the firmware offer a NOP command? Or one to get the firmware
> version?  If you are trying to get the driver and firmware in sync, it
> make sense to use an operation which is low value and won't be used
> anywhere else.
> 
> 	Andrew
> 

No NOP command.. 'mucse_mbx_get_capability' can get the firmware version
and in fact only used in probe, maybe I should rename it to 'mucse_mbx_sync',
and add comment 'only be used once during probe'?
Or keep the name with that comment?

Thanks for your feedback.

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Parthiban.Veerasooran@microchip.com 1 month, 1 week ago

On 22/08/25 8:04 am, Dong Yibo wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> Initialize basic mbx_fw ops, such as get_capability, reset phy
> and so on.
> 
> Signed-off-by: Dong Yibo <dong100@mucse.com>
> ---
>   drivers/net/ethernet/mucse/rnpgbe/Makefile    |   3 +-
>   drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h    |   1 +
>   .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 333 ++++++++++++++++++
>   .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 152 ++++++++
>   4 files changed, 488 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
>   create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
> 
> diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile
> index 5fc878ada4b1..de8bcb7772ab 100644
> --- a/drivers/net/ethernet/mucse/rnpgbe/Makefile
> +++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile
> @@ -7,4 +7,5 @@
>   obj-$(CONFIG_MGBE) += rnpgbe.o
>   rnpgbe-objs := rnpgbe_main.o\
>                 rnpgbe_chip.o\
> -              rnpgbe_mbx.o
> +              rnpgbe_mbx.o\
> +              rnpgbe_mbx_fw.o
> diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
> index 67e28a4667e7..a32419a34d75 100644
> --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
> +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
> @@ -52,6 +52,7 @@ struct mucse_hw {
>          void __iomem *hw_addr;
>          struct pci_dev *pdev;
>          enum rnpgbe_hw_type hw_type;
> +       u8 pfvfnum;
>          struct mucse_mbx_info mbx;
>   };
> 
> diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
> new file mode 100644
> index 000000000000..84570763cf79
> --- /dev/null
> +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
> @@ -0,0 +1,333 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2020 - 2025 Mucse Corporation. */
> +
> +#include <linux/pci.h>
> +#include <linux/if_ether.h>
> +
> +#include "rnpgbe.h"
> +#include "rnpgbe_hw.h"
> +#include "rnpgbe_mbx.h"
> +#include "rnpgbe_mbx_fw.h"
> +
> +/**
> + * mucse_fw_send_cmd_wait - Send cmd req and wait for response
> + * @hw: pointer to the HW structure
> + * @req: pointer to the cmd req structure
> + * @reply: pointer to the fw reply structure
> + *
> + * mucse_fw_send_cmd_wait sends req to pf-fw mailbox and wait
> + * reply from fw.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +static int mucse_fw_send_cmd_wait(struct mucse_hw *hw,
> +                                 struct mbx_fw_cmd_req *req,
> +                                 struct mbx_fw_cmd_reply *reply)
> +{
> +       int len = le16_to_cpu(req->datalen);
> +       int retry_cnt = 3;
> +       int err;
> +
> +       err = mutex_lock_interruptible(&hw->mbx.lock);
> +       if (err)
> +               return err;
> +       err = mucse_write_posted_mbx(hw, (u32 *)req, len);
> +       if (err)
> +               goto out;
> +       do {
> +               err = mucse_read_posted_mbx(hw, (u32 *)reply,
> +                                           sizeof(*reply));
> +               if (err)
> +                       goto out;
> +               /* mucse_write_posted_mbx return 0 means fw has
> +                * received request, wait for the expect opcode
> +                * reply with 'retry_cnt' times.
> +                */
> +       } while (--retry_cnt >= 0 && reply->opcode != req->opcode);
> +out:
> +       mutex_unlock(&hw->mbx.lock);
> +       if (!err && retry_cnt < 0)
> +               return -ETIMEDOUT;
> +       if (!err && reply->error_code)
> +               return -EIO;
> +       return err;
> +}
> +
> +/**
> + * build_phy_abilities_req - build req with get_phy_ability opcode
> + * @req: pointer to the cmd req structure
> + **/
> +static void build_phy_abilities_req(struct mbx_fw_cmd_req *req)
> +{
> +       req->flags = 0;
> +       req->opcode = cpu_to_le16(GET_PHY_ABILITY);
> +       req->datalen = cpu_to_le16(MBX_REQ_HDR_LEN);
> +       req->reply_lo = 0;
> +       req->reply_hi = 0;
> +}
> +
> +/**
> + * mucse_fw_get_capability - Get hw abilities from fw
> + * @hw: pointer to the HW structure
> + * @abil: pointer to the hw_abilities structure
> + *
> + * mucse_fw_get_capability tries to get hw abilities from
> + * hw.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +static int mucse_fw_get_capability(struct mucse_hw *hw,
> +                                  struct hw_abilities *abil)
> +{
> +       struct mbx_fw_cmd_reply reply = {};
> +       struct mbx_fw_cmd_req req = {};
> +       int err;
> +
> +       build_phy_abilities_req(&req);
> +       err = mucse_fw_send_cmd_wait(hw, &req, &reply);
> +       if (!err)
> +               memcpy(abil, &reply.hw_abilities, sizeof(*abil));
> +       return err;
> +}
> +
> +/**
> + * mucse_mbx_get_capability - Get hw abilities from fw
> + * @hw: pointer to the HW structure
> + *
> + * mucse_mbx_get_capability tries to get capabities from
> + * hw. Many retrys will do if it is failed.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_mbx_get_capability(struct mucse_hw *hw)
> +{
> +       struct hw_abilities ability = {};
> +       int try_cnt = 3;
> +       int err = -EIO;
Here too you no need to assign -EIO as it is updated in the while.

Best regards,
Parthiban V
> +
> +       while (try_cnt--) {
> +               err = mucse_fw_get_capability(hw, &ability);
> +               if (err)
> +                       continue;
> +               hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> +               return 0;
> +       }
> +       return err;
> +}
> +
> +/**
> + * mbx_cookie_zalloc - Alloc a cookie structure
> + * @priv_len: private length for this cookie
> + *
> + * @return: cookie structure on success
> + **/
> +static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len)
> +{
> +       struct mbx_req_cookie *cookie;
> +
> +       cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL);
> +       if (cookie) {
> +               cookie->timeout_jiffies = 30 * HZ;
> +               cookie->magic = COOKIE_MAGIC;
> +               cookie->priv_len = priv_len;
> +       }
> +       return cookie;
> +}
> +
> +/**
> + * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
> + * @hw: pointer to the HW structure
> + * @req: pointer to the cmd req structure
> + * @cookie: pointer to the req cookie
> + *
> + * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
> + * reply. cookie->wait will be set in irq handler.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
> +                                struct mbx_fw_cmd_req *req,
> +                                struct mbx_req_cookie *cookie)
> +{
> +       int len = le16_to_cpu(req->datalen);
> +       int err;
> +
> +       cookie->errcode = 0;
> +       cookie->done = 0;
> +       init_waitqueue_head(&cookie->wait);
> +       err = mutex_lock_interruptible(&hw->mbx.lock);
> +       if (err)
> +               return err;
> +       err = mucse_write_mbx_pf(hw, (u32 *)req, len);
> +       if (err)
> +               goto out;
> +       /* if write succeeds, we must wait for firmware response or
> +        * timeout to avoid using the already freed cookie->wait
> +        */
> +       err = wait_event_timeout(cookie->wait,
> +                                cookie->done == 1,
> +                                cookie->timeout_jiffies);
> +
> +       if (!err)
> +               err = -ETIMEDOUT;
> +       else
> +               err = 0;
> +       if (!err && cookie->errcode)
> +               err = cookie->errcode;
> +out:
> +       mutex_unlock(&hw->mbx.lock);
> +       return err;
> +}
> +
> +/**
> + * build_ifinsmod - build req with insmod opcode
> + * @req: pointer to the cmd req structure
> + * @status: true for insmod, false for rmmod
> + **/
> +static void build_ifinsmod(struct mbx_fw_cmd_req *req,
> +                          int status)
> +{
> +       req->flags = 0;
> +       req->opcode = cpu_to_le16(DRIVER_INSMOD);
> +       req->datalen = cpu_to_le16(sizeof(req->ifinsmod) +
> +                                  MBX_REQ_HDR_LEN);
> +       req->cookie = NULL;
> +       req->reply_lo = 0;
> +       req->reply_hi = 0;
> +#define FIXED_VERSION 0xFFFFFFFF
> +       req->ifinsmod.version = cpu_to_le32(FIXED_VERSION);
> +       req->ifinsmod.status = cpu_to_le32(status);
> +}
> +
> +/**
> + * mucse_mbx_ifinsmod - Echo driver insmod status to hw
> + * @hw: pointer to the HW structure
> + * @status: true for insmod, false for rmmod
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status)
> +{
> +       struct mbx_fw_cmd_req req = {};
> +       int len;
> +       int err;
> +
> +       build_ifinsmod(&req, status);
> +       len = le16_to_cpu(req.datalen);
> +       err = mutex_lock_interruptible(&hw->mbx.lock);
> +       if (err)
> +               return err;
> +
> +       if (status) {
> +               err = mucse_write_posted_mbx(hw, (u32 *)&req,
> +                                            len);
> +       } else {
> +               err = mucse_write_mbx_pf(hw, (u32 *)&req,
> +                                        len);
> +       }
> +
> +       mutex_unlock(&hw->mbx.lock);
> +       return err;
> +}
> +
> +/**
> + * build_reset_phy_req - build req with reset_phy opcode
> + * @req: pointer to the cmd req structure
> + * @cookie: pointer of cookie for this cmd
> + **/
> +static void build_reset_phy_req(struct mbx_fw_cmd_req *req,
> +                               void *cookie)
> +{
> +       req->flags = 0;
> +       req->opcode = cpu_to_le16(RESET_PHY);
> +       req->datalen = cpu_to_le16(MBX_REQ_HDR_LEN);
> +       req->reply_lo = 0;
> +       req->reply_hi = 0;
> +       req->cookie = cookie;
> +}
> +
> +/**
> + * mucse_mbx_fw_reset_phy - Posts a mbx req to reset hw
> + * @hw: pointer to the HW structure
> + *
> + * mucse_mbx_fw_reset_phy posts a mbx req to firmware to reset hw.
> + * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
> + * if other irq is registered.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_mbx_fw_reset_phy(struct mucse_hw *hw)
> +{
> +       struct mbx_fw_cmd_reply reply = {};
> +       struct mbx_fw_cmd_req req = {};
> +       int ret;
> +
> +       if (hw->mbx.irq_enabled) {
> +               struct mbx_req_cookie *cookie = mbx_cookie_zalloc(0);
> +
> +               if (!cookie)
> +                       return -ENOMEM;
> +
> +               build_reset_phy_req(&req, cookie);
> +               ret = mucse_mbx_fw_post_req(hw, &req, cookie);
> +               kfree(cookie);
> +               return ret;
> +       }
> +
> +       build_reset_phy_req(&req, &req);
> +       return mucse_fw_send_cmd_wait(hw, &req, &reply);
> +}
> +
> +/**
> + * build_get_macaddress_req - build req with get_mac opcode
> + * @req: pointer to the cmd req structure
> + * @port_mask: port valid for this cmd
> + * @pfvfnum: pfvfnum for this cmd
> + * @cookie: pointer of cookie for this cmd
> + **/
> +static void build_get_macaddress_req(struct mbx_fw_cmd_req *req,
> +                                    int port_mask, int pfvfnum,
> +                                    void *cookie)
> +{
> +       req->flags = 0;
> +       req->opcode = cpu_to_le16(GET_MAC_ADDRES);
> +       req->datalen = cpu_to_le16(sizeof(req->get_mac_addr) +
> +                                  MBX_REQ_HDR_LEN);
> +       req->cookie = cookie;
> +       req->reply_lo = 0;
> +       req->reply_hi = 0;
> +       req->get_mac_addr.port_mask = cpu_to_le32(port_mask);
> +       req->get_mac_addr.pfvf_num = cpu_to_le32(pfvfnum);
> +}
> +
> +/**
> + * mucse_fw_get_macaddr - Posts a mbx req to request macaddr
> + * @hw: pointer to the HW structure
> + * @pfvfnum: index of pf/vf num
> + * @mac_addr: pointer to store mac_addr
> + * @port: port index
> + *
> + * mucse_fw_get_macaddr posts a mbx req to firmware to get mac_addr.
> + * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
> + * if other irq is registered.
> + *
> + * @return: 0 on success, negative on failure
> + **/
> +int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
> +                        u8 *mac_addr,
> +                        int port)
> +{
> +       struct mbx_fw_cmd_reply reply = {};
> +       struct mbx_fw_cmd_req req = {};
> +       int err;
> +
> +       build_get_macaddress_req(&req, BIT(port), pfvfnum, &req);
> +       err = mucse_fw_send_cmd_wait(hw, &req, &reply);
> +       if (err)
> +               return err;
> +       if (le32_to_cpu(reply.mac_addr.ports) & BIT(port))
> +               memcpy(mac_addr, reply.mac_addr.addrs[port].mac, ETH_ALEN);
> +       else
> +               return -ENODATA;
> +       return 0;
> +}
> diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
> new file mode 100644
> index 000000000000..b73238d0e848
> --- /dev/null
> +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
> @@ -0,0 +1,152 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright(c) 2020 - 2025 Mucse Corporation. */
> +
> +#ifndef _RNPGBE_MBX_FW_H
> +#define _RNPGBE_MBX_FW_H
> +
> +#include <linux/types.h>
> +#include <linux/errno.h>
> +#include <linux/wait.h>
> +
> +#include "rnpgbe.h"
> +
> +#define MBX_REQ_HDR_LEN 24
> +
> +struct mbx_fw_cmd_reply;
> +typedef void (*cookie_cb)(struct mbx_fw_cmd_reply *reply, void *priv);
> +
> +struct mbx_req_cookie {
> +       int magic;
> +#define COOKIE_MAGIC 0xCE
> +       cookie_cb cb;
> +       int timeout_jiffies;
> +       int errcode;
> +       wait_queue_head_t wait;
> +       int done;
> +       int priv_len;
> +       char priv[] __counted_by(priv_len);
> +};
> +
> +enum MUCSE_FW_CMD {
> +       GET_PHY_ABILITY = 0x0601,
> +       GET_MAC_ADDRES = 0x0602,
> +       RESET_PHY = 0x0603,
> +       DRIVER_INSMOD = 0x0803,
> +};
> +
> +struct hw_abilities {
> +       u8 link_stat;
> +       u8 port_mask;
> +       __le32 speed;
> +       __le16 phy_type;
> +       __le16 nic_mode;
> +       __le16 pfnum;
> +       __le32 fw_version;
> +       __le32 axi_mhz;
> +       union {
> +               u8 port_id[4];
> +               __le32 port_ids;
> +       };
> +       __le32 bd_uid;
> +       __le32 phy_id;
> +       __le32 wol_status;
> +       union {
> +               __le32 ext_ability;
> +               struct {
> +                       u32 valid : 1;
> +                       u32 wol_en : 1;
> +                       u32 pci_preset_runtime_en : 1;
> +                       u32 smbus_en : 1;
> +                       u32 ncsi_en : 1;
> +                       u32 rpu_en : 1;
> +                       u32 v2 : 1;
> +                       u32 pxe_en : 1;
> +                       u32 mctp_en : 1;
> +                       u32 yt8614 : 1;
> +                       u32 pci_ext_reset : 1;
> +                       u32 rpu_availble : 1;
> +                       u32 fw_lldp_ability : 1;
> +                       u32 lldp_enabled : 1;
> +                       u32 only_1g : 1;
> +                       u32 force_down_en: 1;
> +               } e_host;
> +       };
> +} __packed;
> +
> +/* FW stores extended ability information in 'ext_ability' as a 32-bit
> + * little-endian value. To make these flags easily accessible in the
> + * kernel (via named 'bitfields' instead of raw bitmask operations),
> + * we use the union's 'e_host' struct, which provides named bits
> + * (e.g., 'wol_en', 'smbus_en')
> + */
> +static inline void ability_update_host_endian(struct hw_abilities *abi)
> +{
> +       u32 host_val = le32_to_cpu(abi->ext_ability);
> +
> +       abi->e_host = *(typeof(abi->e_host) *)&host_val;
> +}
> +
> +#define FLAGS_DD BIT(0)
> +#define FLAGS_ERR BIT(2)
> +
> +struct mbx_fw_cmd_req {
> +       __le16 flags;
> +       __le16 opcode;
> +       __le16 datalen;
> +       __le16 ret_value;
> +       union {
> +               struct {
> +                       __le32 cookie_lo;
> +                       __le32 cookie_hi;
> +               };
> +
> +               void *cookie;
> +       };
> +       __le32 reply_lo;
> +       __le32 reply_hi;
> +       union {
> +               u8 data[32];
> +               struct {
> +                       __le32 version;
> +                       __le32 status;
> +               } ifinsmod;
> +               struct {
> +                       __le32 port_mask;
> +                       __le32 pfvf_num;
> +               } get_mac_addr;
> +       };
> +} __packed;
> +
> +struct mbx_fw_cmd_reply {
> +       __le16 flags;
> +       __le16 opcode;
> +       __le16 error_code;
> +       __le16 datalen;
> +       union {
> +               struct {
> +                       __le32 cookie_lo;
> +                       __le32 cookie_hi;
> +               };
> +               void *cookie;
> +       };
> +       union {
> +               u8 data[40];
> +               struct mac_addr {
> +                       __le32 ports;
> +                       struct _addr {
> +                               /* for macaddr:01:02:03:04:05:06
> +                                * mac-hi=0x01020304 mac-lo=0x05060000
> +                                */
> +                               u8 mac[8];
> +                       } addrs[4];
> +               } mac_addr;
> +               struct hw_abilities hw_abilities;
> +       };
> +} __packed;
> +
> +int mucse_mbx_get_capability(struct mucse_hw *hw);
> +int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status);
> +int mucse_mbx_fw_reset_phy(struct mucse_hw *hw);
> +int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
> +                        u8 *mac_addr, int port);
> +#endif /* _RNPGBE_MBX_FW_H */
> --
> 2.25.1
> 
>

Re: [PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support

Posted by Yibo Dong 1 month, 1 week ago

On Fri, Aug 22, 2025 at 04:49:44AM +0000, Parthiban.Veerasooran@microchip.com wrote:
> On 22/08/25 8:04 am, Dong Yibo wrote:
> > +/**
> > + * mucse_mbx_get_capability - Get hw abilities from fw
> > + * @hw: pointer to the HW structure
> > + *
> > + * mucse_mbx_get_capability tries to get capabities from
> > + * hw. Many retrys will do if it is failed.
> > + *
> > + * @return: 0 on success, negative on failure
> > + **/
> > +int mucse_mbx_get_capability(struct mucse_hw *hw)
> > +{
> > +       struct hw_abilities ability = {};
> > +       int try_cnt = 3;
> > +       int err = -EIO;
> Here too you no need to assign -EIO as it is updated in the while.
> 
> Best regards,
> Parthiban V
> > +
> > +       while (try_cnt--) {
> > +               err = mucse_fw_get_capability(hw, &ability);
> > +               if (err)
> > +                       continue;
> > +               hw->pfvfnum = le16_to_cpu(ability.pfnum) & GENMASK_U16(7, 0);
> > +               return 0;
> > +       }
> > +       return err;
> > +}
> > +

err is updated because 'try_cnt = 3'. But to the code logic itself, it should
not leave err uninitialized since no guarantee that codes 'whthin while'
run at least once. Right?

Thanks for your feedback.

[PATCH net-next v7 1/5] net: rnpgbe: Add build support for rnpgbe
[PATCH net-next v7 2/5] net: rnpgbe: Add n500/n210 chip support
[PATCH net-next v7 3/5] net: rnpgbe: Add basic mbx ops support
[PATCH net-next v7 4/5] net: rnpgbe: Add basic mbx_fw support
[PATCH net-next v7 5/5] net: rnpgbe: Add register_netdev