Initialize basic mbx_fw ops, such as get_capability, reset phy
and so on.
Signed-off-by: Dong Yibo <dong100@mucse.com>
---
drivers/net/ethernet/mucse/rnpgbe/Makefile | 3 +-
drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 4 +
.../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 264 ++++++++++++++++++
.../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 201 +++++++++++++
4 files changed, 471 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile
index 5fc878ada4b1..de8bcb7772ab 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/Makefile
+++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile
@@ -7,4 +7,5 @@
obj-$(CONFIG_MGBE) += rnpgbe.o
rnpgbe-objs := rnpgbe_main.o\
rnpgbe_chip.o\
- rnpgbe_mbx.o
+ rnpgbe_mbx.o\
+ rnpgbe_mbx_fw.o
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
index c3f2a86979d7..7ab1cbb432f6 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h
@@ -83,9 +83,13 @@ struct mucse_mbx_info {
};
struct mucse_hw {
+ u8 pfvfnum;
void __iomem *hw_addr;
void __iomem *ring_msix_base;
struct pci_dev *pdev;
+ u32 fw_version;
+ u32 axi_mhz;
+ u32 bd_uid;
enum rnpgbe_hw_type hw_type;
struct mucse_dma_info dma;
struct mucse_eth_info eth;
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
new file mode 100644
index 000000000000..6a10ffeb74da
--- /dev/null
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 - 2025 Mucse Corporation. */
+
+#include <linux/pci.h>
+
+#include "rnpgbe.h"
+#include "rnpgbe_hw.h"
+#include "rnpgbe_mbx.h"
+#include "rnpgbe_mbx_fw.h"
+
+/**
+ * mucse_fw_send_cmd_wait - Send cmd req and wait for response
+ * @hw: pointer to the HW structure
+ * @req: pointer to the cmd req structure
+ * @reply: pointer to the fw reply structure
+ *
+ * mucse_fw_send_cmd_wait sends req to pf-fw mailbox and wait
+ * reply from fw.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_fw_send_cmd_wait(struct mucse_hw *hw,
+ struct mbx_fw_cmd_req *req,
+ struct mbx_fw_cmd_reply *reply)
+{
+ int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN;
+ int retry_cnt = 3;
+ int err;
+
+ err = mutex_lock_interruptible(&hw->mbx.lock);
+ if (err)
+ return err;
+ err = hw->mbx.ops->write_posted(hw, (u32 *)req,
+ L_WD(len));
+ if (err)
+ goto quit;
+ do {
+ err = hw->mbx.ops->read_posted(hw, (u32 *)reply,
+ L_WD(sizeof(*reply)));
+ if (err)
+ goto quit;
+ } while (--retry_cnt >= 0 && reply->opcode != req->opcode);
+quit:
+ mutex_unlock(&hw->mbx.lock);
+ if (!err && retry_cnt < 0)
+ return -ETIMEDOUT;
+ if (!err && reply->error_code)
+ return -EIO;
+ return err;
+}
+
+/**
+ * mucse_fw_get_capability - Get hw abilities from fw
+ * @hw: pointer to the HW structure
+ * @abil: pointer to the hw_abilities structure
+ *
+ * mucse_fw_get_capability tries to get hw abilities from
+ * hw.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_fw_get_capability(struct mucse_hw *hw,
+ struct hw_abilities *abil)
+{
+ struct mbx_fw_cmd_reply reply = {};
+ struct mbx_fw_cmd_req req = {};
+ int err;
+
+ build_phy_abilities_req(&req, &req);
+ err = mucse_fw_send_cmd_wait(hw, &req, &reply);
+ if (!err)
+ memcpy(abil, &reply.hw_abilities, sizeof(*abil));
+ return err;
+}
+
+/**
+ * mucse_mbx_get_capability - Get hw abilities from fw
+ * @hw: pointer to the HW structure
+ *
+ * mucse_mbx_get_capability tries to get capabities from
+ * hw. Many retrys will do if it is failed.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_get_capability(struct mucse_hw *hw)
+{
+ struct hw_abilities ability = {};
+ int try_cnt = 3;
+ int err = -EIO;
+
+ while (try_cnt--) {
+ err = mucse_fw_get_capability(hw, &ability);
+ if (err)
+ continue;
+ hw->pfvfnum = le16_to_cpu(ability.pfnum);
+ hw->fw_version = le32_to_cpu(ability.fw_version);
+ hw->axi_mhz = le32_to_cpu(ability.axi_mhz);
+ hw->bd_uid = le32_to_cpu(ability.bd_uid);
+ return 0;
+ }
+ return err;
+}
+
+/**
+ * mbx_cookie_zalloc - Alloc a cookie structure
+ * @priv_len: private length for this cookie
+ *
+ * @return: cookie structure on success
+ **/
+static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len)
+{
+ struct mbx_req_cookie *cookie;
+
+ cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL);
+ if (cookie) {
+ cookie->timeout_jiffes = 30 * HZ;
+ cookie->magic = COOKIE_MAGIC;
+ cookie->priv_len = priv_len;
+ }
+ return cookie;
+}
+
+/**
+ * mucse_mbx_fw_post_req - Posts a mbx req to firmware and wait reply
+ * @hw: pointer to the HW structure
+ * @req: pointer to the cmd req structure
+ * @cookie: pointer to the req cookie
+ *
+ * mucse_mbx_fw_post_req posts a mbx req to firmware and wait for the
+ * reply. cookie->wait will be set in irq handler.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+static int mucse_mbx_fw_post_req(struct mucse_hw *hw,
+ struct mbx_fw_cmd_req *req,
+ struct mbx_req_cookie *cookie)
+{
+ int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN;
+ int err;
+
+ cookie->errcode = 0;
+ cookie->done = 0;
+ init_waitqueue_head(&cookie->wait);
+ err = mutex_lock_interruptible(&hw->mbx.lock);
+ if (err)
+ return err;
+ err = mucse_write_mbx(hw, (u32 *)req,
+ L_WD(len));
+ if (err) {
+ mutex_unlock(&hw->mbx.lock);
+ return err;
+ }
+ do {
+ err = wait_event_interruptible_timeout(cookie->wait,
+ cookie->done == 1,
+ cookie->timeout_jiffes);
+ } while (err == -ERESTARTSYS);
+
+ mutex_unlock(&hw->mbx.lock);
+ if (!err)
+ err = -ETIME;
+ else
+ err = 0;
+ if (!err && cookie->errcode)
+ err = cookie->errcode;
+
+ return err;
+}
+
+/**
+ * mucse_mbx_ifinsmod - Echo driver insmod status to hw
+ * @hw: pointer to the HW structure
+ * @status: true for insmod, false for rmmod
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status)
+{
+ struct mbx_fw_cmd_req req = {};
+ int len;
+ int err;
+
+ build_ifinsmod(&req, hw->driver_version, status);
+ len = le16_to_cpu(req.datalen) + MBX_REQ_HDR_LEN;
+ err = mutex_lock_interruptible(&hw->mbx.lock);
+ if (err)
+ return err;
+
+ if (status) {
+ err = hw->mbx.ops->write_posted(hw, (u32 *)&req,
+ L_WD(len));
+ } else {
+ err = hw->mbx.ops->write(hw, (u32 *)&req,
+ L_WD(len));
+ }
+
+ mutex_unlock(&hw->mbx.lock);
+ return err;
+}
+
+/**
+ * mucse_mbx_fw_reset_phy - Posts a mbx req to reset hw
+ * @hw: pointer to the HW structure
+ *
+ * mucse_mbx_fw_reset_phy posts a mbx req to firmware to reset hw.
+ * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
+ * if other irq is registered.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_mbx_fw_reset_phy(struct mucse_hw *hw)
+{
+ struct mbx_fw_cmd_reply reply = {};
+ struct mbx_fw_cmd_req req = {};
+ int ret;
+
+ if (hw->mbx.irq_enabled) {
+ struct mbx_req_cookie *cookie = mbx_cookie_zalloc(0);
+
+ if (!cookie)
+ return -ENOMEM;
+
+ build_reset_phy_req(&req, cookie);
+ ret = mucse_mbx_fw_post_req(hw, &req, cookie);
+ kfree(cookie);
+ return ret;
+ }
+
+ build_reset_phy_req(&req, &req);
+ return mucse_fw_send_cmd_wait(hw, &req, &reply);
+}
+
+/**
+ * mucse_fw_get_macaddr - Posts a mbx req to request macaddr
+ * @hw: pointer to the HW structure
+ * @pfvfnum: index of pf/vf num
+ * @mac_addr: pointer to store mac_addr
+ * @lane: lane index
+ *
+ * mucse_fw_get_macaddr posts a mbx req to firmware to get mac_addr.
+ * It uses mucse_fw_send_cmd_wait if no irq, and mucse_mbx_fw_post_req
+ * if other irq is registered.
+ *
+ * @return: 0 on success, negative on failure
+ **/
+int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
+ u8 *mac_addr,
+ int lane)
+{
+ struct mbx_fw_cmd_reply reply = {};
+ struct mbx_fw_cmd_req req = {};
+ int err;
+
+ build_get_macaddress_req(&req, 1 << lane, pfvfnum, &req);
+ err = mucse_fw_send_cmd_wait(hw, &req, &reply);
+ if (err)
+ return err;
+
+ if ((1 << lane) & le32_to_cpu(reply.mac_addr.lanes))
+ memcpy(mac_addr, reply.mac_addr.addrs[lane].mac, 6);
+ else
+ return -ENODATA;
+ return 0;
+}
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
new file mode 100644
index 000000000000..1d434536c616
--- /dev/null
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 - 2025 Mucse Corporation. */
+
+#ifndef _RNPGBE_MBX_FW_H
+#define _RNPGBE_MBX_FW_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+
+#include "rnpgbe.h"
+
+#define MBX_REQ_HDR_LEN 24
+#define L_WD(x) ((x) / 4)
+#define M_DEFAULT_DUMY 0xa0000000
+#define M_DUMY_MASK 0x0f000f11
+#define EVT_LINK_UP 1
+
+struct mbx_fw_cmd_reply;
+typedef void (*cookie_cb)(struct mbx_fw_cmd_reply *reply, void *priv);
+
+struct mbx_req_cookie {
+ int magic;
+#define COOKIE_MAGIC 0xCE
+ cookie_cb cb;
+ int timeout_jiffes;
+ int errcode;
+ wait_queue_head_t wait;
+ int done;
+ int priv_len;
+ char priv[];
+};
+
+enum MUCSE_FW_CMD {
+ GET_PHY_ABALITY = 0x0601,
+ GET_MAC_ADDRES = 0x0602,
+ RESET_PHY = 0x0603,
+ DRIVER_INSMOD = 0x0803,
+};
+
+struct hw_abilities {
+ u8 link_stat;
+ u8 lane_mask;
+ __le32 speed;
+ __le16 phy_type;
+ __le16 nic_mode;
+ __le16 pfnum;
+ __le32 fw_version;
+ __le32 axi_mhz;
+ union {
+ u8 port_id[4];
+ __le32 port_ids;
+ };
+ __le32 bd_uid;
+ __le32 phy_id;
+ __le32 wol_status;
+ union {
+ __le32 ext_ability;
+ struct {
+ u32 valid : 1;
+ u32 wol_en : 1;
+ u32 pci_preset_runtime_en : 1;
+ u32 smbus_en : 1;
+ u32 ncsi_en : 1;
+ u32 rpu_en : 1;
+ u32 v2 : 1;
+ u32 pxe_en : 1;
+ u32 mctp_en : 1;
+ u32 yt8614 : 1;
+ u32 pci_ext_reset : 1;
+ u32 rpu_availble : 1;
+ u32 fw_lldp_ability : 1;
+ u32 lldp_enabled : 1;
+ u32 only_1g : 1;
+ u32 force_down_en: 1;
+ } e_host;
+ };
+} __packed;
+
+static inline void ability_update_host_endian(struct hw_abilities *abi)
+{
+ u32 host_val = le32_to_cpu(abi->ext_ability);
+
+ abi->e_host = *(typeof(abi->e_host) *)&host_val;
+}
+
+#define FLAGS_DD BIT(0)
+#define FLAGS_ERR BIT(2)
+
+/* Request is in little-endian format. Big-endian systems should be considered */
+struct mbx_fw_cmd_req {
+ __le16 flags;
+ __le16 opcode;
+ __le16 datalen;
+ __le16 ret_value;
+ union {
+ struct {
+ __le32 cookie_lo;
+ __le32 cookie_hi;
+ };
+
+ void *cookie;
+ };
+ __le32 reply_lo;
+ __le32 reply_hi;
+ union {
+ u8 data[32];
+ struct {
+ __le32 lane;
+ __le32 status;
+ } ifinsmod;
+ struct {
+ __le32 lane_mask;
+ __le32 pfvf_num;
+ } get_mac_addr;
+ };
+} __packed;
+
+struct mbx_fw_cmd_reply {
+ __le16 flags;
+ __le16 opcode;
+ __le16 error_code;
+ __le16 datalen;
+ union {
+ struct {
+ __le32 cookie_lo;
+ __le32 cookie_hi;
+ };
+ void *cookie;
+ };
+ union {
+ u8 data[40];
+ struct mac_addr {
+ __le32 lanes;
+ struct _addr {
+ /* for macaddr:01:02:03:04:05:06
+ * mac-hi=0x01020304 mac-lo=0x05060000
+ */
+ u8 mac[8];
+ } addrs[4];
+ } mac_addr;
+ struct hw_abilities hw_abilities;
+ };
+} __packed;
+
+static inline void build_phy_abilities_req(struct mbx_fw_cmd_req *req,
+ void *cookie)
+{
+ req->flags = 0;
+ req->opcode = cpu_to_le16(GET_PHY_ABALITY);
+ req->datalen = 0;
+ req->reply_lo = 0;
+ req->reply_hi = 0;
+ req->cookie = cookie;
+}
+
+static inline void build_ifinsmod(struct mbx_fw_cmd_req *req,
+ unsigned int lane,
+ int status)
+{
+ req->flags = 0;
+ req->opcode = cpu_to_le16(DRIVER_INSMOD);
+ req->datalen = cpu_to_le16(sizeof(req->ifinsmod));
+ req->cookie = NULL;
+ req->reply_lo = 0;
+ req->reply_hi = 0;
+ req->ifinsmod.lane = cpu_to_le32(lane);
+ req->ifinsmod.status = cpu_to_le32(status);
+}
+
+static inline void build_reset_phy_req(struct mbx_fw_cmd_req *req,
+ void *cookie)
+{
+ req->flags = 0;
+ req->opcode = cpu_to_le16(RESET_PHY);
+ req->datalen = 0;
+ req->reply_lo = 0;
+ req->reply_hi = 0;
+ req->cookie = cookie;
+}
+
+static inline void build_get_macaddress_req(struct mbx_fw_cmd_req *req,
+ int lane_mask, int pfvfnum,
+ void *cookie)
+{
+ req->flags = 0;
+ req->opcode = cpu_to_le16(GET_MAC_ADDRES);
+ req->datalen = cpu_to_le16(sizeof(req->get_mac_addr));
+ req->cookie = cookie;
+ req->reply_lo = 0;
+ req->reply_hi = 0;
+ req->get_mac_addr.lane_mask = cpu_to_le32(lane_mask);
+ req->get_mac_addr.pfvf_num = cpu_to_le32(pfvfnum);
+}
+
+int mucse_mbx_get_capability(struct mucse_hw *hw);
+int mucse_mbx_ifinsmod(struct mucse_hw *hw, int status);
+int mucse_mbx_fw_reset_phy(struct mucse_hw *hw);
+int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum,
+ u8 *mac_addr, int lane);
+#endif /* _RNPGBE_MBX_FW_H */
--
2.25.1
> struct mucse_hw { > + u8 pfvfnum; > void __iomem *hw_addr; > void __iomem *ring_msix_base; > struct pci_dev *pdev; > + u32 fw_version; > + u32 axi_mhz; > + u32 bd_uid; > enum rnpgbe_hw_type hw_type; > struct mucse_dma_info dma; > struct mucse_eth_info eth; Think about alignment of these structures. The compiler is going to put in padding after the u8 phvfnum. The 3 pointers are all the same size, no padding. The u32 probably go straight after the pointers. The enum it might represent as a single byte, so there is will be padding before dma. So consider moving the u8 next to the enum. pahole(1) will tell you what the compiler really did, but you will find more experienced engineers try to minimise padding, or deliberately group hot items on a cache line, and document that. > +static int mucse_fw_send_cmd_wait(struct mucse_hw *hw, > + struct mbx_fw_cmd_req *req, > + struct mbx_fw_cmd_reply *reply) > +{ > + int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN; > + int retry_cnt = 3; > + int err; > + > + err = mutex_lock_interruptible(&hw->mbx.lock); > + if (err) > + return err; > + err = hw->mbx.ops->write_posted(hw, (u32 *)req, > + L_WD(len)); This L_WD macro is not nice. It seems like a place bugs will be introduced, forgetting to call it here. Why not have write_posted() take bytes, and have the lowest layer convert to 32 bit words. It also seems odd you are adding MBX_REQ_HDR_LEN here but not that actual header. Why not increase the length at the point the header is actually added? Keep stuff logically together. > + if (err) > + goto quit; > + do { > + err = hw->mbx.ops->read_posted(hw, (u32 *)reply, > + L_WD(sizeof(*reply))); > + if (err) > + goto quit; > + } while (--retry_cnt >= 0 && reply->opcode != req->opcode); > +quit: Maybe add some documentation about what is actually going on here. I assume you are trying to get the driver and firmware into sync after one or other has crashed, burned, and rebooted. You need to flush out old replies. You allow up to three old replies to be in the queue, and then give up. Since you don't retry the write, you don't expect writes to be lost? > + mutex_unlock(&hw->mbx.lock); > + if (!err && retry_cnt < 0) > + return -ETIMEDOUT; > + if (!err && reply->error_code) > + return -EIO; > + return err; > +} > + > +/** > + * mucse_fw_get_capability - Get hw abilities from fw > + * @hw: pointer to the HW structure > + * @abil: pointer to the hw_abilities structure > + * > + * mucse_fw_get_capability tries to get hw abilities from > + * hw. > + * > + * @return: 0 on success, negative on failure > + **/ > +static int mucse_fw_get_capability(struct mucse_hw *hw, > + struct hw_abilities *abil) > +{ > + struct mbx_fw_cmd_reply reply = {}; > + struct mbx_fw_cmd_req req = {}; > + int err; > + > + build_phy_abilities_req(&req, &req); Passing the same parameter twice? Is that correct? It looks very odd. > +/** > + * mbx_cookie_zalloc - Alloc a cookie structure > + * @priv_len: private length for this cookie > + * > + * @return: cookie structure on success > + **/ > +static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len) > +{ > + struct mbx_req_cookie *cookie; > + > + cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL); > + if (cookie) { > + cookie->timeout_jiffes = 30 * HZ; > + cookie->magic = COOKIE_MAGIC; > + cookie->priv_len = priv_len; > + } > + return cookie; > +struct mbx_req_cookie { > + int magic; > +#define COOKIE_MAGIC 0xCE > + cookie_cb cb; > + int timeout_jiffes; > + int errcode; > + wait_queue_head_t wait; > + int done; > + int priv_len; > + char priv[]; > +}; Using struct_size() makes me think this is supposed to be a flexible array? I've never used them myself, but shouldn't be some markup so the compiler knows priv_len is the len of priv? > +static int mucse_mbx_fw_post_req(struct mucse_hw *hw, > + struct mbx_fw_cmd_req *req, > + struct mbx_req_cookie *cookie) > +{ > + int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN; > + int err; > + > + cookie->errcode = 0; > + cookie->done = 0; > + init_waitqueue_head(&cookie->wait); > + err = mutex_lock_interruptible(&hw->mbx.lock); > + if (err) > + return err; > + err = mucse_write_mbx(hw, (u32 *)req, > + L_WD(len)); > + if (err) { > + mutex_unlock(&hw->mbx.lock); Please try to put the unlock at the end of the function, with a goto on error. > + return err; > + } > + do { > + err = wait_event_interruptible_timeout(cookie->wait, > + cookie->done == 1, > + cookie->timeout_jiffes); > + } while (err == -ERESTARTSYS); This needs a comment, because i don't understand it. > + mutex_unlock(&hw->mbx.lock); > + if (!err) > + err = -ETIME; I _think_ ETIMEDOUT would be more normal. > + else > + err = 0; > + if (!err && cookie->errcode) > + err = cookie->errcode; > + > + return err; > +} > +int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum, > + u8 *mac_addr, > + int lane) > +{ > + struct mbx_fw_cmd_reply reply = {}; > + struct mbx_fw_cmd_req req = {}; > + int err; > + > + build_get_macaddress_req(&req, 1 << lane, pfvfnum, &req); > + err = mucse_fw_send_cmd_wait(hw, &req, &reply); > + if (err) > + return err; > + > + if ((1 << lane) & le32_to_cpu(reply.mac_addr.lanes)) BIT(). And normally the & would be the other way around. What exactly is a lane here? Normally we would think of a lane is -KR4, 4 SERDES lanes making one port. But the MAC address is a property of the port, not the lane within a port. > + memcpy(mac_addr, reply.mac_addr.addrs[lane].mac, 6); There is a macro for 6, please use it. > +struct hw_abilities { > + u8 link_stat; > + u8 lane_mask; > + __le32 speed; > + __le16 phy_type; > + __le16 nic_mode; > + __le16 pfnum; Another example of a bad structure layout. It would of been much better to put the two u8 after speed. > +} __packed; And because this is packed, and badly aligned, you are forcing the compiler to do a lot more work accessing these members. > + > +static inline void ability_update_host_endian(struct hw_abilities *abi) > +{ > + u32 host_val = le32_to_cpu(abi->ext_ability); > + > + abi->e_host = *(typeof(abi->e_host) *)&host_val; > +} Please add a comment what this is doing, it is not obvious. > + > +#define FLAGS_DD BIT(0) > +#define FLAGS_ERR BIT(2) > + > +/* Request is in little-endian format. Big-endian systems should be considered */ So the code now sparse clean? If it is, you can probably remove this comment. > +static inline void build_phy_abilities_req(struct mbx_fw_cmd_req *req, > + void *cookie) > +{ > + req->flags = 0; > + req->opcode = cpu_to_le16(GET_PHY_ABALITY); > + req->datalen = 0; > + req->reply_lo = 0; > + req->reply_hi = 0; > + req->cookie = cookie; > +} > + > +static inline void build_ifinsmod(struct mbx_fw_cmd_req *req, > + unsigned int lane, > + int status) > +{ > + req->flags = 0; > + req->opcode = cpu_to_le16(DRIVER_INSMOD); > + req->datalen = cpu_to_le16(sizeof(req->ifinsmod)); > + req->cookie = NULL; > + req->reply_lo = 0; > + req->reply_hi = 0; > + req->ifinsmod.lane = cpu_to_le32(lane); > + req->ifinsmod.status = cpu_to_le32(status); > +} > + > +static inline void build_reset_phy_req(struct mbx_fw_cmd_req *req, > + void *cookie) > +{ > + req->flags = 0; > + req->opcode = cpu_to_le16(RESET_PHY); > + req->datalen = 0; > + req->reply_lo = 0; > + req->reply_hi = 0; > + req->cookie = cookie; > +} > + > +static inline void build_get_macaddress_req(struct mbx_fw_cmd_req *req, > + int lane_mask, int pfvfnum, > + void *cookie) > +{ > + req->flags = 0; > + req->opcode = cpu_to_le16(GET_MAC_ADDRES); > + req->datalen = cpu_to_le16(sizeof(req->get_mac_addr)); > + req->cookie = cookie; > + req->reply_lo = 0; > + req->reply_hi = 0; > + req->get_mac_addr.lane_mask = cpu_to_le32(lane_mask); > + req->get_mac_addr.pfvf_num = cpu_to_le32(pfvfnum); > +} These are rather large for inline functions in a header. Please move them into a .c file. Andrew
On Fri, Aug 15, 2025 at 05:27:51AM +0200, Andrew Lunn wrote: > > struct mucse_hw { > > + u8 pfvfnum; > > void __iomem *hw_addr; > > void __iomem *ring_msix_base; > > struct pci_dev *pdev; > > + u32 fw_version; > > + u32 axi_mhz; > > + u32 bd_uid; > > enum rnpgbe_hw_type hw_type; > > struct mucse_dma_info dma; > > struct mucse_eth_info eth; > > Think about alignment of these structures. The compiler is going to > put in padding after the u8 phvfnum. The 3 pointers are all the same > size, no padding. The u32 probably go straight after the pointers. The > enum it might represent as a single byte, so there is will be padding > before dma. So consider moving the u8 next to the enum. > > pahole(1) will tell you what the compiler really did, but you will > find more experienced engineers try to minimise padding, or > deliberately group hot items on a cache line, and document that. > Got it, I will update this. > > +static int mucse_fw_send_cmd_wait(struct mucse_hw *hw, > > + struct mbx_fw_cmd_req *req, > > + struct mbx_fw_cmd_reply *reply) > > +{ > > + int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN; > > + int retry_cnt = 3; > > + int err; > > + > > + err = mutex_lock_interruptible(&hw->mbx.lock); > > + if (err) > > + return err; > > + err = hw->mbx.ops->write_posted(hw, (u32 *)req, > > + L_WD(len)); > > This L_WD macro is not nice. It seems like a place bugs will be > introduced, forgetting to call it here. Why not have write_posted() > take bytes, and have the lowest layer convert to 32 bit words. > > It also seems odd you are adding MBX_REQ_HDR_LEN here but not that > actual header. Why not increase the length at the point the header is > actually added? Keep stuff logically together. > Ok, I will have write_posted() take bytes like you suggestion, and try to improve len here. > > + if (err) > > + goto quit; > > + do { > > + err = hw->mbx.ops->read_posted(hw, (u32 *)reply, > > + L_WD(sizeof(*reply))); > > + if (err) > > + goto quit; > > + } while (--retry_cnt >= 0 && reply->opcode != req->opcode); > > +quit: > > Maybe add some documentation about what is actually going on here. I > assume you are trying to get the driver and firmware into sync after > one or other has crashed, burned, and rebooted. You need to flush out > old replies. You allow up to three old replies to be in the queue, and > then give up. Since you don't retry the write, you don't expect writes > to be lost? > > write_posted return 0 only after fw acked it, so no need to write. It is a sync mechanism, tries to get the correct response opcode. Maybe comment link this? /* write_posted return 0 means fw has received request, wait for * the expect opcode reply with 'retry_cnt' times. */ > > + mutex_unlock(&hw->mbx.lock); > > + if (!err && retry_cnt < 0) > > + return -ETIMEDOUT; > > + if (!err && reply->error_code) > > + return -EIO; > > + return err; > > +} > > + > > +/** > > + * mucse_fw_get_capability - Get hw abilities from fw > > + * @hw: pointer to the HW structure > > + * @abil: pointer to the hw_abilities structure > > + * > > + * mucse_fw_get_capability tries to get hw abilities from > > + * hw. > > + * > > + * @return: 0 on success, negative on failure > > + **/ > > +static int mucse_fw_get_capability(struct mucse_hw *hw, > > + struct hw_abilities *abil) > > +{ > > + struct mbx_fw_cmd_reply reply = {}; > > + struct mbx_fw_cmd_req req = {}; > > + int err; > > + > > + build_phy_abilities_req(&req, &req); > > Passing the same parameter twice? Is that correct? It looks very odd. > Got it, I will fix it. > > +/** > > + * mbx_cookie_zalloc - Alloc a cookie structure > > + * @priv_len: private length for this cookie > > + * > > + * @return: cookie structure on success > > + **/ > > +static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len) > > +{ > > + struct mbx_req_cookie *cookie; > > + > > + cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL); > > + if (cookie) { > > + cookie->timeout_jiffes = 30 * HZ; > > + cookie->magic = COOKIE_MAGIC; > > + cookie->priv_len = priv_len; > > + } > > + return cookie; > > > +struct mbx_req_cookie { > > + int magic; > > +#define COOKIE_MAGIC 0xCE > > + cookie_cb cb; > > + int timeout_jiffes; > > + int errcode; > > + wait_queue_head_t wait; > > + int done; > > + int priv_len; > > + char priv[]; > > +}; > > > Using struct_size() makes me think this is supposed to be a flexible > array? I've never used them myself, but shouldn't be some markup so > the compiler knows priv_len is the len of priv? > Maybe link this? struct mbx_req_cookie { .... int priv_len; char priv[] __counted_by(priv_len); } > > +static int mucse_mbx_fw_post_req(struct mucse_hw *hw, > > + struct mbx_fw_cmd_req *req, > > + struct mbx_req_cookie *cookie) > > +{ > > + int len = le16_to_cpu(req->datalen) + MBX_REQ_HDR_LEN; > > + int err; > > + > > + cookie->errcode = 0; > > + cookie->done = 0; > > + init_waitqueue_head(&cookie->wait); > > + err = mutex_lock_interruptible(&hw->mbx.lock); > > + if (err) > > + return err; > > + err = mucse_write_mbx(hw, (u32 *)req, > > + L_WD(len)); > > + if (err) { > > + mutex_unlock(&hw->mbx.lock); > > Please try to put the unlock at the end of the function, with a goto > on error. > Got it, I will fix it. > > + return err; > > + } > > + do { > > + err = wait_event_interruptible_timeout(cookie->wait, > > + cookie->done == 1, > > + cookie->timeout_jiffes); > > + } while (err == -ERESTARTSYS); > > This needs a comment, because i don't understand it. > > wait_event_interruptible_timeout return -ERESTARTSYS if it was interrupted by a signal, which will cause misjudgement about cookie->done is timeout. In this case, just wait for timeout. Maybe comment link this? /* If it was interrupted by a signal (-ERESTARTSYS), it is not true timeout, * just wait again. */ > > + mutex_unlock(&hw->mbx.lock); > > + if (!err) > > + err = -ETIME; > > I _think_ ETIMEDOUT would be more normal. > Got it, I will update it. > > + else > > + err = 0; > > + if (!err && cookie->errcode) > > + err = cookie->errcode; > > + > > + return err; > > +} > > +int mucse_fw_get_macaddr(struct mucse_hw *hw, int pfvfnum, > > + u8 *mac_addr, > > + int lane) > > +{ > > + struct mbx_fw_cmd_reply reply = {}; > > + struct mbx_fw_cmd_req req = {}; > > + int err; > > + > > + build_get_macaddress_req(&req, 1 << lane, pfvfnum, &req); > > + err = mucse_fw_send_cmd_wait(hw, &req, &reply); > > + if (err) > > + return err; > > + > > + if ((1 << lane) & le32_to_cpu(reply.mac_addr.lanes)) > > BIT(). And normally the & would be the other way around. > Maybe changed link this? ... if (le32_to_cpu(reply.mac_addr.ports) & BIT(lane)) ... > What exactly is a lane here? Normally we would think of a lane is > -KR4, 4 SERDES lanes making one port. But the MAC address is a > property of the port, not the lane within a port. > lane is the valid bit in 'reply.mac_addr.ports'. Maybe change it to 'port', that is more appropriate. > > + memcpy(mac_addr, reply.mac_addr.addrs[lane].mac, 6); > > There is a macro for 6, please use it. > Got it, I will use ETH_ALEN. > > +struct hw_abilities { > > + u8 link_stat; > > + u8 lane_mask; > > + __le32 speed; > > + __le16 phy_type; > > + __le16 nic_mode; > > + __le16 pfnum; > > Another example of a bad structure layout. It would of been much > better to put the two u8 after speed. > > > +} __packed; > > And because this is packed, and badly aligned, you are forcing the > compiler to do a lot more work accessing these members. > Yes, It is bad. But FW use this define, I can only follow the define... Maybe I can add comment here? /* Must follow FW define here */ > > + > > +static inline void ability_update_host_endian(struct hw_abilities *abi) > > +{ > > + u32 host_val = le32_to_cpu(abi->ext_ability); > > + > > + abi->e_host = *(typeof(abi->e_host) *)&host_val; > > +} > > Please add a comment what this is doing, it is not obvious. > > Maybe link this? /* Converts the little-endian ext_ability field to host byte order, * then copies the value into the e_host field by reinterpreting the * memory as the type of e_host (likely a bitfield or structure that * represents the extended abilities in a host-friendly format). */ > > + > > +#define FLAGS_DD BIT(0) > > +#define FLAGS_ERR BIT(2) > > + > > +/* Request is in little-endian format. Big-endian systems should be considered */ > > So the code now sparse clean? If it is, you can probably remove this > comment. > Yes, sparse clean. I will remove this. > > +static inline void build_phy_abilities_req(struct mbx_fw_cmd_req *req, > > + void *cookie) > > +{ > > + req->flags = 0; > > + req->opcode = cpu_to_le16(GET_PHY_ABALITY); > > + req->datalen = 0; > > + req->reply_lo = 0; > > + req->reply_hi = 0; > > + req->cookie = cookie; > > +} > > + > > +static inline void build_ifinsmod(struct mbx_fw_cmd_req *req, > > + unsigned int lane, > > + int status) > > +{ > > + req->flags = 0; > > + req->opcode = cpu_to_le16(DRIVER_INSMOD); > > + req->datalen = cpu_to_le16(sizeof(req->ifinsmod)); > > + req->cookie = NULL; > > + req->reply_lo = 0; > > + req->reply_hi = 0; > > + req->ifinsmod.lane = cpu_to_le32(lane); > > + req->ifinsmod.status = cpu_to_le32(status); > > +} > > + > > +static inline void build_reset_phy_req(struct mbx_fw_cmd_req *req, > > + void *cookie) > > +{ > > + req->flags = 0; > > + req->opcode = cpu_to_le16(RESET_PHY); > > + req->datalen = 0; > > + req->reply_lo = 0; > > + req->reply_hi = 0; > > + req->cookie = cookie; > > +} > > + > > +static inline void build_get_macaddress_req(struct mbx_fw_cmd_req *req, > > + int lane_mask, int pfvfnum, > > + void *cookie) > > +{ > > + req->flags = 0; > > + req->opcode = cpu_to_le16(GET_MAC_ADDRES); > > + req->datalen = cpu_to_le16(sizeof(req->get_mac_addr)); > > + req->cookie = cookie; > > + req->reply_lo = 0; > > + req->reply_hi = 0; > > + req->get_mac_addr.lane_mask = cpu_to_le32(lane_mask); > > + req->get_mac_addr.pfvf_num = cpu_to_le32(pfvfnum); > > +} > > These are rather large for inline functions in a header. Please move > them into a .c file. > > Andrew > Got it. I will update it. Thanks for your feedback.
> > Using struct_size() makes me think this is supposed to be a flexible > > array? I've never used them myself, but shouldn't be some markup so > > the compiler knows priv_len is the len of priv? > > > > Maybe link this? > struct mbx_req_cookie { > .... > int priv_len; > char priv[] __counted_by(priv_len); > } Yes, that looks correct. > > > + return err; > > > + } > > > + do { > > > + err = wait_event_interruptible_timeout(cookie->wait, > > > + cookie->done == 1, > > > + cookie->timeout_jiffes); > > > + } while (err == -ERESTARTSYS); > > > > This needs a comment, because i don't understand it. > > > > > > wait_event_interruptible_timeout return -ERESTARTSYS if it was interrupted > by a signal, which will cause misjudgement about cookie->done is timeout. > In this case, just wait for timeout. > Maybe comment link this? > /* If it was interrupted by a signal (-ERESTARTSYS), it is not true timeout, > * just wait again. > */ But why use wait_event_interruptible_timout() if you are going to ignore all interrupts, a.k.a. signals? Why not use wait_event_timeout()? > > > + if ((1 << lane) & le32_to_cpu(reply.mac_addr.lanes)) > > > > BIT(). And normally the & would be the other way around. > > > > Maybe changed link this? > ... > if (le32_to_cpu(reply.mac_addr.ports) & BIT(lane)) Yes, that is better. > > What exactly is a lane here? Normally we would think of a lane is > > -KR4, 4 SERDES lanes making one port. But the MAC address is a > > property of the port, not the lane within a port. > > > > lane is the valid bit in 'reply.mac_addr.ports'. > Maybe change it to 'port', that is more appropriate. You need to be careful with your terms. I read in another patch, that there is a dual version and a quad version. I've not yet seen how you handle this, but i assume they are identical, and appear on the PCI bus X number of times, and this driver will probe X times, once per instance. We would normally refer to each instance as an interface. But this driver also mentions PF, so i assume you also have VFs? And if you have VF, i assume you have an embedded switch which each of the VFs are connected to. Each VF would normally be connected to a port of the switch. So even though you don't have VF support yet, you should be thinking forward. In the big picture architecture, what does this lane/port represent? What do other drivers call it? > > Another example of a bad structure layout. It would of been much > > better to put the two u8 after speed. > > > > > +} __packed; > > > > And because this is packed, and badly aligned, you are forcing the > > compiler to do a lot more work accessing these members. > > > > Yes, It is bad. But FW use this define, I can only follow the define... > Maybe I can add comment here? > /* Must follow FW define here */ No need. When somebody sees __packed, it becomes obvious this is ABI and cannot be changed. Just think about it for any future extensions to the firmware ABI. > > > > + > > > +static inline void ability_update_host_endian(struct hw_abilities *abi) > > > +{ > > > + u32 host_val = le32_to_cpu(abi->ext_ability); > > > + > > > + abi->e_host = *(typeof(abi->e_host) *)&host_val; > > > +} > > > > Please add a comment what this is doing, it is not obvious. > > > > > > Maybe link this? > /* Converts the little-endian ext_ability field to host byte order, > * then copies the value into the e_host field by reinterpreting the > * memory as the type of e_host (likely a bitfield or structure that > * represents the extended abilities in a host-friendly format). > */ This explains what you are doing, but why? Why do you do this only to this field? What about all the others? Andrew
On Fri, Aug 15, 2025 at 03:21:33PM +0200, Andrew Lunn wrote: > > > > + return err; > > > > + } > > > > + do { > > > > + err = wait_event_interruptible_timeout(cookie->wait, > > > > + cookie->done == 1, > > > > + cookie->timeout_jiffes); > > > > + } while (err == -ERESTARTSYS); > > > > > > This needs a comment, because i don't understand it. > > > > > > > > > > wait_event_interruptible_timeout return -ERESTARTSYS if it was interrupted > > by a signal, which will cause misjudgement about cookie->done is timeout. > > In this case, just wait for timeout. > > Maybe comment link this? > > /* If it was interrupted by a signal (-ERESTARTSYS), it is not true timeout, > > * just wait again. > > */ > > But why use wait_event_interruptible_timout() if you are going to > ignore all interrupts, a.k.a. signals? Why not use > wait_event_timeout()? > Yes, I should use wait_event_timeout, I will fix it. > > > What exactly is a lane here? Normally we would think of a lane is > > > -KR4, 4 SERDES lanes making one port. But the MAC address is a > > > property of the port, not the lane within a port. > > > > > > > lane is the valid bit in 'reply.mac_addr.ports'. > > Maybe change it to 'port', that is more appropriate. > > You need to be careful with your terms. I read in another patch, that > there is a dual version and a quad version. I've not yet seen how you > handle this, but i assume they are identical, and appear on the PCI > bus X number of times, and this driver will probe X times, once per > instance. We would normally refer to each instance as an > interface. But this driver also mentions PF, so i assume you also have > VFs? And if you have VF, i assume you have an embedded switch which > each of the VFs are connected to. Each VF would normally be connected > to a port of the switch. > > So even though you don't have VF support yet, you should be thinking > forward. In the big picture architecture, what does this lane/port > represent? What do other drivers call it? > "lane/port" in the code does not refer to SERDES physical lanes (like KR4’s 4 lanes per port). It is for physical network ports (or a PF). We use it as a valid bit since fw cmd support multiple physical network ports within a pci device (with one mbx handler). So, for each PCI bus X, 'port' is started from 0. PCI bus X -- port0 | -- port1 PCI bus Y -- port0 | -- port1 > > > Another example of a bad structure layout. It would of been much > > > better to put the two u8 after speed. > > > > > > > +} __packed; > > > > > > And because this is packed, and badly aligned, you are forcing the > > > compiler to do a lot more work accessing these members. > > > > > > > Yes, It is bad. But FW use this define, I can only follow the define... > > Maybe I can add comment here? > > /* Must follow FW define here */ > > No need. When somebody sees __packed, it becomes obvious this is ABI > and cannot be changed. Just think about it for any future extensions > to the firmware ABI. > > > > > > > + > > > > +static inline void ability_update_host_endian(struct hw_abilities *abi) > > > > +{ > > > > + u32 host_val = le32_to_cpu(abi->ext_ability); > > > > + > > > > + abi->e_host = *(typeof(abi->e_host) *)&host_val; > > > > +} > > > > > > Please add a comment what this is doing, it is not obvious. > > > > > > > > > > Maybe link this? > > /* Converts the little-endian ext_ability field to host byte order, > > * then copies the value into the e_host field by reinterpreting the > > * memory as the type of e_host (likely a bitfield or structure that > > * represents the extended abilities in a host-friendly format). > > */ > > This explains what you are doing, but why? Why do you do this only to > this field? What about all the others? > > Andrew > FW stores extended ability information in `ext_ability` as a 32-bit little-endian value. To make these flags easily accessible in the kernel (via named 'bitfields' instead of raw bitmask operations), we use the union's `e_host` struct, which provides named bits (e.g., `wol_en`, `smbus_en`). Others 'not bitfields' is just use 'lexx_to_cpu' when value is used. Thanks for your feedback.
On 14/08/25 1:08 pm, Dong Yibo wrote: > Initialize basic mbx_fw ops, such as get_capability, reset phy > and so on. > > Signed-off-by: Dong Yibo <dong100@mucse.com> > --- > drivers/net/ethernet/mucse/rnpgbe/Makefile | 3 +- > drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 4 + > .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 264 ++++++++++++++++++ > .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 201 +++++++++++++ > 4 files changed, 471 insertions(+), 1 deletion(-) > create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c > create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h > > + > +/** > + * mbx_cookie_zalloc - Alloc a cookie structure > + * @priv_len: private length for this cookie > + * > + * @return: cookie structure on success > + **/ > +static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len) > +{ > + struct mbx_req_cookie *cookie; > + > + cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL); > + if (cookie) { > + cookie->timeout_jiffes = 30 * HZ; Typo: should be "timeout_jiffies" instead of "timeout_jiffes" > + cookie->magic = COOKIE_MAGIC; > + cookie->priv_len = priv_len; > + } > + return cookie; > +} > + > +/** -- Thanks and Regards, Danish
On Thu, Aug 14, 2025 at 05:40:14PM +0530, MD Danish Anwar wrote: > On 14/08/25 1:08 pm, Dong Yibo wrote: > > Initialize basic mbx_fw ops, such as get_capability, reset phy > > and so on. > > > > Signed-off-by: Dong Yibo <dong100@mucse.com> > > --- > > drivers/net/ethernet/mucse/rnpgbe/Makefile | 3 +- > > drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 4 + > > .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 264 ++++++++++++++++++ > > .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 201 +++++++++++++ > > 4 files changed, 471 insertions(+), 1 deletion(-) > > create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c > > create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h > > > > > + > > +/** > > + * mbx_cookie_zalloc - Alloc a cookie structure > > + * @priv_len: private length for this cookie > > + * > > + * @return: cookie structure on success > > + **/ > > +static struct mbx_req_cookie *mbx_cookie_zalloc(int priv_len) > > +{ > > + struct mbx_req_cookie *cookie; > > + > > + cookie = kzalloc(struct_size(cookie, priv, priv_len), GFP_KERNEL); > > + if (cookie) { > > + cookie->timeout_jiffes = 30 * HZ; > > Typo: should be "timeout_jiffies" instead of "timeout_jiffes" > Got it, I will fix it. > > + cookie->magic = COOKIE_MAGIC; > > + cookie->priv_len = priv_len; > > + } > > + return cookie; > > +} > > + > > +/** > > > -- > Thanks and Regards, > Danish > > Thanks for you feedback.
© 2016 - 2025 Red Hat, Inc.