[RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware

Danielle Ratson posted 9 patches 1 year, 11 months ago
There is a newer version of this series
[RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware
Posted by Danielle Ratson 1 year, 11 months ago
Add the ability to flash the modules' firmware by implementing the
interface between the user space and the kernel.

Example from a succeeding implementation:

 # ethtool --flash-module-firmware swp40 file test.bin

 Transceiver module firmware flashing started for device eth0

 Transceiver module firmware flashing in progress for device eth0
 Status message: Downloading firmware image
 Progress: 0%

 [...]

 Transceiver module firmware flashing in progress for device eth0
 Status message: Downloading firmware image
 Progress: 50%

 [...]

 Transceiver module firmware flashing in progress for device eth0
 Status message: Downloading firmware image
 Progress: 100%

 Transceiver module firmware flashing completed for device eth0

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
---
 net/ethtool/module.c  | 179 ++++++++++++++++++++++++++++++++++++++++++
 net/ethtool/netlink.c |   7 ++
 net/ethtool/netlink.h |   2 +
 3 files changed, 188 insertions(+)

diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index 09cf11564840..69cedb3ede6d 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/ethtool.h>
+#include <linux/sfp.h>
+#include <linux/firmware.h>
 
 #include "netlink.h"
 #include "common.h"
@@ -160,6 +162,183 @@ const struct ethnl_request_ops ethnl_module_request_ops = {
 	.set_ntf_cmd		= ETHTOOL_MSG_MODULE_NTF,
 };
 
+/* MODULE_FW_FLASH_ACT */
+
+const struct nla_policy
+ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1] = {
+	[ETHTOOL_A_MODULE_FW_FLASH_HEADER] =
+		NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME] = { .type = NLA_NUL_STRING },
+	[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD] = { .type = NLA_U32 },
+};
+
+struct module_sff8024_id_rpl {
+	u8 id;
+};
+
+#define MODULE_EEPROM_PAGE	0
+#define MODULE_EEPROM_OFFSET	0
+#define MODULE_EEPROM_LENGTH	1
+#define MODULE_EEPROM_I2C_ADDR	0x50
+
+static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw,
+				     struct net_device *dev,
+				     struct netlink_ext_ack *extack)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_module_eeprom page_data = {};
+	struct module_sff8024_id_rpl *rpl;
+	int err;
+
+	/* Fetch the SFF-8024 Identifier Value. For all supported standards, it
+	 * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024,
+	 * revision 4.9.
+	 */
+	page_data.page = MODULE_EEPROM_PAGE;
+	page_data.offset = MODULE_EEPROM_OFFSET;
+	page_data.length = MODULE_EEPROM_LENGTH;
+	page_data.i2c_address = MODULE_EEPROM_I2C_ADDR;
+	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
+	if (!page_data.data)
+		return -ENOMEM;
+
+	err = ops->get_module_eeprom_by_page(dev, &page_data, extack);
+	if (err < 0)
+		goto out;
+
+	rpl = (struct module_sff8024_id_rpl *)page_data.data;
+	switch (rpl->id) {
+	case SFF8024_ID_QSFP_DD:
+	case SFF8024_ID_OSFP:
+	case SFF8024_ID_DSFP:
+	case SFF8024_ID_QSFP_PLUS_CMIS:
+	case SFF8024_ID_SFP_DD_CMIS:
+	case SFF8024_ID_SFP_PLUS_CMIS:
+		INIT_WORK(&module_fw->work, ethtool_cmis_fw_update);
+		goto out;
+	default:
+		NL_SET_ERR_MSG(extack,
+			       "Module type does not support firmware flashing");
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+out:
+	kfree(page_data.data);
+	return err;
+}
+
+static int
+module_flash_fw_schedule(struct net_device *dev,
+			 struct ethtool_module_fw_flash_params *params,
+			 struct netlink_ext_ack *extack)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_module_fw_flash *module_fw;
+	int err;
+
+	if (!ops->set_module_eeprom_by_page ||
+	    !ops->get_module_eeprom_by_page) {
+		NL_SET_ERR_MSG(extack,
+			       "Flashing module firmware is not supported by this device");
+		return -EOPNOTSUPP;
+	}
+
+	if (dev->module_fw_flash_in_progress) {
+		NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress");
+		return -EBUSY;
+	}
+
+	module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL);
+	if (!module_fw)
+		return -ENOMEM;
+
+	module_fw->params = *params;
+	err = request_firmware(&module_fw->fw, module_fw->params.file_name,
+			       &dev->dev);
+	if (err) {
+		NL_SET_ERR_MSG(extack,
+			       "Failed to request module firmware image");
+		goto err_request_firmware;
+	}
+
+	err = module_flash_fw_work_init(module_fw, dev, extack);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Flashing module firmware is not supported by this device");
+		goto err_work_init;
+	}
+
+	dev->module_fw_flash_in_progress = true;
+	netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL);
+	module_fw->dev = dev;
+
+	schedule_work(&module_fw->work);
+
+	return 0;
+
+err_work_init:
+	release_firmware(module_fw->fw);
+err_request_firmware:
+	kfree(module_fw);
+	return err;
+}
+
+static int module_flash_fw(struct net_device *dev, struct nlattr **tb,
+			   struct netlink_ext_ack *extack)
+{
+	struct ethtool_module_fw_flash_params params = {};
+	struct nlattr *attr;
+
+	if (!tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]) {
+		NL_SET_ERR_MSG_ATTR(extack,
+				    tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME],
+				    "File name attribute is missing");
+		return -EINVAL;
+	}
+
+	params.file_name =
+		nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]);
+
+	attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD];
+	if (attr) {
+		params.password = cpu_to_be32(nla_get_u32(attr));
+		params.password_valid = true;
+	}
+
+	return module_flash_fw_schedule(dev, &params, extack);
+}
+
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ethnl_req_info req_info = {};
+	struct nlattr **tb = info->attrs;
+	struct net_device *dev;
+	int ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info,
+					 tb[ETHTOOL_A_MODULE_FW_FLASH_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+	dev = req_info.dev;
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+
+	ret = module_flash_fw(dev, tb, info->extack);
+
+	ethnl_ops_complete(dev);
+
+out_rtnl:
+	rtnl_unlock();
+	ethnl_parse_header_dev_put(&req_info);
+	return ret;
+}
+
 /* MODULE_FW_FLASH_NTF */
 
 static void
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index fe3553f60bf3..85e27bdb1f73 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -1129,6 +1129,13 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_mm_set_policy,
 		.maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_act_module_fw_flash,
+		.policy	= ethnl_module_fw_flash_act_policy,
+		.maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9a333a8d04c1..46712c9531ae 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -441,6 +441,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1]
 extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
 extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
 extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
+extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
 
 int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
 int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -448,6 +449,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_start(struct netlink_callback *cb);
 int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
 
 extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
-- 
2.40.1
Re: [RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware
Posted by Andrew Lunn 1 year, 11 months ago
> +static int
> +module_flash_fw_schedule(struct net_device *dev,
> +			 struct ethtool_module_fw_flash_params *params,
> +			 struct netlink_ext_ack *extack)
> +{
> +	const struct ethtool_ops *ops = dev->ethtool_ops;
> +	struct ethtool_module_fw_flash *module_fw;
> +	int err;
> +
> +	if (!ops->set_module_eeprom_by_page ||
> +	    !ops->get_module_eeprom_by_page) {
> +		NL_SET_ERR_MSG(extack,
> +			       "Flashing module firmware is not supported by this device");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (dev->module_fw_flash_in_progress) {
> +		NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress");
> +		return -EBUSY;
> +	}
> +
> +	module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL);
> +	if (!module_fw)
> +		return -ENOMEM;
> +
> +	module_fw->params = *params;
> +	err = request_firmware(&module_fw->fw, module_fw->params.file_name,
> +			       &dev->dev);

How big are these firmware blobs?

Ideally we want to be able to use the same API to upgrade things like
GPON modules, which often run an openwrt image, and they are plugged
into a cable modem which does not have too much RAM.

Given that the interface to the EEPROM is using 128 byte 1/2 pages,
would it be possible to use request_partial_firmware_into_buf() to
read it on demand, rather than all at once?

     Andrew
Re: [RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware
Posted by Russell King (Oracle) 1 year, 11 months ago
On Mon, Jan 22, 2024 at 10:45:30AM +0200, Danielle Ratson wrote:
> +#define MODULE_EEPROM_PAGE	0
> +#define MODULE_EEPROM_OFFSET	0
> +#define MODULE_EEPROM_LENGTH	1
> +#define MODULE_EEPROM_I2C_ADDR	0x50
> +
> +static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw,
> +				     struct net_device *dev,
> +				     struct netlink_ext_ack *extack)
> +{
> +	const struct ethtool_ops *ops = dev->ethtool_ops;
> +	struct ethtool_module_eeprom page_data = {};
> +	struct module_sff8024_id_rpl *rpl;
> +	int err;
> +
> +	/* Fetch the SFF-8024 Identifier Value. For all supported standards, it
> +	 * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024,
> +	 * revision 4.9.
> +	 */
> +	page_data.page = MODULE_EEPROM_PAGE;
> +	page_data.offset = MODULE_EEPROM_OFFSET;
> +	page_data.length = MODULE_EEPROM_LENGTH;
> +	page_data.i2c_address = MODULE_EEPROM_I2C_ADDR;

Please use better names - these aren't any better than using integers.

Maybe use SFP_PHYS_ID for the offset?

> +	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
> +	if (!page_data.data)
> +		return -ENOMEM;
> +
> +	err = ops->get_module_eeprom_by_page(dev, &page_data, extack);
> +	if (err < 0)
> +		goto out;
> +
> +	rpl = (struct module_sff8024_id_rpl *)page_data.data;

What purpose does this structure of a single byte serve? To me, it just
obfuscates the code.

	u8 phys_id;

	...
	page_data.offset = SFP_PHYS_ID;
	page_data.length = sizeof(phys_id);
	page_data.data = &phys_id;
	...
	switch (phys_id) {

will work just as well, and be more explicit about what's actually going
on here. It doesn't mean that I have to understand what this new
module_sff8024_id_rpl structure is. I can see that we're just getting
one byte which is the module physical ID.

You also then don't need to care about kfree()ing one byte of data
structure.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware
Posted by Jakub Kicinski 1 year, 11 months ago
On Mon, 22 Jan 2024 10:45:30 +0200 Danielle Ratson wrote:
>  #include <linux/ethtool.h>
> +#include <linux/sfp.h>
> +#include <linux/firmware.h>

alphabetical order, please

> +static int
> +module_flash_fw_schedule(struct net_device *dev,
> +			 struct ethtool_module_fw_flash_params *params,
> +			 struct netlink_ext_ack *extack)
> +{
> +	const struct ethtool_ops *ops = dev->ethtool_ops;
> +	struct ethtool_module_fw_flash *module_fw;
> +	int err;
> +
> +	if (!ops->set_module_eeprom_by_page ||
> +	    !ops->get_module_eeprom_by_page) {
> +		NL_SET_ERR_MSG(extack,
> +			       "Flashing module firmware is not supported by this device");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (dev->module_fw_flash_in_progress) {
> +		NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress");
> +		return -EBUSY;
> +	}
> +
> +	module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL);
> +	if (!module_fw)
> +		return -ENOMEM;
> +
> +	module_fw->params = *params;
> +	err = request_firmware(&module_fw->fw, module_fw->params.file_name,

request_firmware_direct() ? I think udev timeout is 30 sec and we're
holding rtnl_lock.. I don't remember why we didn't use that in devlink

> +			       &dev->dev);
> +	if (err) {
> +		NL_SET_ERR_MSG(extack,
> +			       "Failed to request module firmware image");
> +		goto err_request_firmware;
> +	}
> +
> +	err = module_flash_fw_work_init(module_fw, dev, extack);
> +	if (err < 0) {
> +		NL_SET_ERR_MSG(extack,
> +			       "Flashing module firmware is not supported by this device");
> +		goto err_work_init;
> +	}
> +
> +	dev->module_fw_flash_in_progress = true;

What does this protect us from? 

> +static int module_flash_fw(struct net_device *dev, struct nlattr **tb,
> +			   struct netlink_ext_ack *extack)
> +{
> +	struct ethtool_module_fw_flash_params params = {};
> +	struct nlattr *attr;
> +
> +	if (!tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]) {
> +		NL_SET_ERR_MSG_ATTR(extack,

GENL_REQ_ATTR_CHECK, and you can check it in the caller,
before taking rtnl_lock.

> +				    tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME],
> +				    "File name attribute is missing");
> +		return -EINVAL;
> +	}
> +
> +	params.file_name =
> +		nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]);

Hm. I think you copy the param struct by value to the work container.
nla_data() is in the skb which is going to get freed after _ACT returns.
So if anyone tries to access the name from the work it's going to UAF?

> +
> +	attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD];
> +	if (attr) {
> +		params.password = cpu_to_be32(nla_get_u32(attr));
> +		params.password_valid = true;
> +	}
> +
> +	return module_flash_fw_schedule(dev, &params, extack);
> +}
Re: [RFC PATCH net-next 9/9] ethtool: Add ability to flash transceiver modules' firmware
Posted by Simon Horman 1 year, 11 months ago
On Mon, Jan 22, 2024 at 10:45:30AM +0200, Danielle Ratson wrote:

...

> +static int module_flash_fw(struct net_device *dev, struct nlattr **tb,
> +			   struct netlink_ext_ack *extack)
> +{
> +	struct ethtool_module_fw_flash_params params = {};
> +	struct nlattr *attr;
> +
> +	if (!tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]) {
> +		NL_SET_ERR_MSG_ATTR(extack,
> +				    tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME],
> +				    "File name attribute is missing");
> +		return -EINVAL;
> +	}
> +
> +	params.file_name =
> +		nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]);
> +
> +	attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD];
> +	if (attr) {
> +		params.password = cpu_to_be32(nla_get_u32(attr));

Hi Danielle,

The type of password is u32, so perhaps cpu_to_be32() isn't needed here?

Flagged by Sparse.

> +		params.password_valid = true;
> +	}
> +
> +	return module_flash_fw_schedule(dev, &params, extack);
> +}

...