Create bnxt_fwctl device. This will bind to bnxt's aux device.
On the upper edge, it will register with the fwctl subsystem.
It will make use of bnxt's ULP functions to send FW commands.
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
---
MAINTAINERS | 6 +
drivers/fwctl/Kconfig | 11 +
drivers/fwctl/Makefile | 1 +
drivers/fwctl/bnxt/Makefile | 4 +
drivers/fwctl/bnxt/main.c | 416 ++++++++++++++++++++++++++++++++++++
include/uapi/fwctl/bnxt.h | 64 ++++++
include/uapi/fwctl/fwctl.h | 1 +
7 files changed, 503 insertions(+)
create mode 100644 drivers/fwctl/bnxt/Makefile
create mode 100644 drivers/fwctl/bnxt/main.c
create mode 100644 include/uapi/fwctl/bnxt.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 12f49de7fe03..38acd5d334b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10408,6 +10408,12 @@ L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/fwctl/pds/
+FWCTL BNXT DRIVER
+M: Pavan Chebbi <pavan.chebbi@broadcom.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: drivers/fwctl/bnxt/
+
GALAXYCORE GC0308 CAMERA SENSOR DRIVER
M: Sebastian Reichel <sre@kernel.org>
L: linux-media@vger.kernel.org
diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
index b5583b12a011..b3795a17f8f2 100644
--- a/drivers/fwctl/Kconfig
+++ b/drivers/fwctl/Kconfig
@@ -29,5 +29,16 @@ config FWCTL_PDS
to access the debug and configuration information of the AMD/Pensando
DSC hardware family.
+ If you don't know what to do here, say N.
+
+config FWCTL_BNXT
+ tristate "bnxt control fwctl driver"
+ depends on BNXT
+ help
+ BNXT provides interface for the user process to access the debug and
+ configuration registers of the Broadcom NIC hardware family.
+ This will allow configuration and debug tools to work out of the box on
+ mainstream kernel.
+
If you don't know what to do here, say N.
endif
diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
index c093b5f661d6..fdd46f3a0e4e 100644
--- a/drivers/fwctl/Makefile
+++ b/drivers/fwctl/Makefile
@@ -2,5 +2,6 @@
obj-$(CONFIG_FWCTL) += fwctl.o
obj-$(CONFIG_FWCTL_MLX5) += mlx5/
obj-$(CONFIG_FWCTL_PDS) += pds/
+obj-$(CONFIG_FWCTL_BNXT) += bnxt/
fwctl-y += main.o
diff --git a/drivers/fwctl/bnxt/Makefile b/drivers/fwctl/bnxt/Makefile
new file mode 100644
index 000000000000..b47172761f1e
--- /dev/null
+++ b/drivers/fwctl/bnxt/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_BNXT) += bnxt_fwctl.o
+
+bnxt_fwctl-y += main.o
diff --git a/drivers/fwctl/bnxt/main.c b/drivers/fwctl/bnxt/main.c
new file mode 100644
index 000000000000..67d65b36cb38
--- /dev/null
+++ b/drivers/fwctl/bnxt/main.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026, Broadcom Corporation
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/fwctl.h>
+#include <linux/bnxt/hsi.h>
+#include <linux/bnxt/ulp.h>
+#include <uapi/fwctl/fwctl.h>
+#include <uapi/fwctl/bnxt.h>
+
+struct bnxtctl_uctx {
+ struct fwctl_uctx uctx;
+ u32 uctx_caps;
+};
+
+struct bnxtctl_dev {
+ struct fwctl_device fwctl;
+ struct bnxt_aux_priv *aux_priv;
+};
+
+DEFINE_FREE(bnxtctl, struct bnxtctl_dev *, if (_T) fwctl_put(&_T->fwctl))
+
+static int bnxtctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ struct bnxtctl_uctx *bnxtctl_uctx =
+ container_of(uctx, struct bnxtctl_uctx, uctx);
+
+ bnxtctl_uctx->uctx_caps = BIT(FWCTL_BNXT_QUERY_COMMANDS) |
+ BIT(FWCTL_BNXT_SEND_COMMAND);
+ return 0;
+}
+
+static void bnxtctl_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+static void *bnxtctl_info(struct fwctl_uctx *uctx, size_t *length)
+{
+ struct bnxtctl_uctx *bnxtctl_uctx =
+ container_of(uctx, struct bnxtctl_uctx, uctx);
+ struct fwctl_info_bnxt *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->uctx_caps = bnxtctl_uctx->uctx_caps;
+
+ *length = sizeof(*info);
+ return info;
+}
+
+static bool bnxtctl_validate_rpc(struct bnxt_en_dev *edev,
+ struct bnxt_fw_msg *hwrm_in,
+ enum fwctl_rpc_scope scope)
+{
+ struct input *req = (struct input *)hwrm_in->msg;
+
+ guard(mutex)(&edev->en_dev_lock);
+ if (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)
+ return false;
+
+ switch (le16_to_cpu(req->req_type)) {
+ case HWRM_FUNC_RESET:
+ case HWRM_PORT_CLR_STATS:
+ case HWRM_FW_SET_STRUCTURED_DATA:
+ case HWRM_PORT_PRBS_TEST:
+ case HWRM_FW_LIVEPATCH:
+ case HWRM_FW_RESET:
+ case HWRM_FW_SYNC:
+ case HWRM_FW_SET_TIME:
+ case HWRM_MFG_TESTS:
+ case HWRM_DBG_SERDES_TEST:
+ case HWRM_DBG_LOG_BUFFER_FLUSH:
+ case HWRM_DBG_DUMP:
+ case HWRM_DBG_ERASE_NVM:
+ case HWRM_DBG_CFG:
+ case HWRM_DBG_COREDUMP_LIST:
+ case HWRM_DBG_COREDUMP_INITIATE:
+ case HWRM_DBG_COREDUMP_RETRIEVE:
+ case HWRM_DBG_CRASHDUMP_HEADER:
+ case HWRM_DBG_CRASHDUMP_ERASE:
+ case HWRM_DBG_PTRACE:
+ case HWRM_DBG_TOKEN_CFG:
+ case HWRM_NVM_DEFRAG:
+ case HWRM_NVM_FACTORY_DEFAULTS:
+ case HWRM_NVM_FLUSH:
+ case HWRM_NVM_INSTALL_UPDATE:
+ case HWRM_NVM_MODIFY:
+ case HWRM_NVM_VERIFY_UPDATE:
+ case HWRM_NVM_ERASE_DIR_ENTRY:
+ case HWRM_NVM_MOD_DIR_ENTRY:
+ case HWRM_NVM_FIND_DIR_ENTRY:
+ case HWRM_NVM_RAW_DUMP:
+ return scope >= FWCTL_RPC_CONFIGURATION;
+
+ case HWRM_VER_GET:
+ case HWRM_FW_GET_STRUCTURED_DATA:
+ case HWRM_ERROR_RECOVERY_QCFG:
+ case HWRM_FUNC_QCAPS:
+ case HWRM_FUNC_QCFG:
+ case HWRM_FUNC_QSTATS:
+ case HWRM_PORT_QSTATS:
+ case HWRM_PORT_PHY_QCFG:
+ case HWRM_PORT_MAC_QCFG:
+ case HWRM_PORT_PHY_QCAPS:
+ case HWRM_PORT_PHY_I2C_READ:
+ case HWRM_PORT_PHY_MDIO_READ:
+ case HWRM_QUEUE_PRI2COS_QCFG:
+ case HWRM_QUEUE_COS2BW_QCFG:
+ case HWRM_QUEUE_DSCP2PRI_QCFG:
+ case HWRM_VNIC_RSS_QCFG:
+ case HWRM_QUEUE_GLOBAL_QCFG:
+ case HWRM_QUEUE_ADPTV_QOS_RX_FEATURE_QCFG:
+ case HWRM_QUEUE_ADPTV_QOS_TX_FEATURE_QCFG:
+ case HWRM_QUEUE_QCAPS:
+ case HWRM_QUEUE_ADPTV_QOS_RX_TUNING_QCFG:
+ case HWRM_QUEUE_ADPTV_QOS_TX_TUNING_QCFG:
+ case HWRM_TUNNEL_DST_PORT_QUERY:
+ case HWRM_PORT_QSTATS_EXT:
+ case HWRM_PORT_TX_FIR_QCFG:
+ case HWRM_FW_LIVEPATCH_QUERY:
+ case HWRM_FW_QSTATUS:
+ case HWRM_FW_HEALTH_CHECK:
+ case HWRM_FW_GET_TIME:
+ case HWRM_PORT_DSC_DUMP:
+ case HWRM_PORT_EP_TX_QCFG:
+ case HWRM_PORT_QCFG:
+ case HWRM_PORT_MAC_QCAPS:
+ case HWRM_TEMP_MONITOR_QUERY:
+ case HWRM_REG_POWER_QUERY:
+ case HWRM_CORE_FREQUENCY_QUERY:
+ case HWRM_STAT_QUERY_ROCE_STATS:
+ case HWRM_STAT_QUERY_ROCE_STATS_EXT:
+ case HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE:
+ case HWRM_CFA_FLOW_INFO:
+ case HWRM_CFA_ADV_FLOW_MGNT_QCAPS:
+ case HWRM_FUNC_RESOURCE_QCAPS:
+ case HWRM_FUNC_BACKING_STORE_QCAPS:
+ case HWRM_FUNC_BACKING_STORE_QCFG:
+ case HWRM_FUNC_QSTATS_EXT:
+ case HWRM_FUNC_PTP_PIN_QCFG:
+ case HWRM_FUNC_PTP_EXT_QCFG:
+ case HWRM_FUNC_BACKING_STORE_QCFG_V2:
+ case HWRM_FUNC_BACKING_STORE_QCAPS_V2:
+ case HWRM_FUNC_SYNCE_QCFG:
+ case HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY:
+ case HWRM_PCIE_QSTATS:
+ case HWRM_MFG_OTP_QCFG:
+ case HWRM_MFG_FRU_EEPROM_READ:
+ case HWRM_MFG_GET_NVM_MEASUREMENT:
+ case HWRM_STAT_GENERIC_QSTATS:
+ case HWRM_PORT_PHY_FDRSTAT:
+ case HWRM_QUEUE_ADPTV_QOS_RX_QCFG:
+ case HWRM_QUEUE_ADPTV_QOS_TX_QCFG:
+ case HWRM_DBG_READ_DIRECT:
+ case HWRM_DBG_READ_INDIRECT:
+ case HWRM_DBG_RING_INFO_GET:
+ case HWRM_DBG_QCAPS:
+ case HWRM_DBG_QCFG:
+ case HWRM_DBG_USEQ_FLUSH:
+ case HWRM_DBG_USEQ_QCAPS:
+ case HWRM_DBG_SIM_CABLE_STATE:
+ case HWRM_DBG_TOKEN_QUERY_AUTH_IDS:
+ case HWRM_NVM_GET_VARIABLE:
+ case HWRM_NVM_GET_DEV_INFO:
+ case HWRM_NVM_GET_DIR_ENTRIES:
+ case HWRM_NVM_GET_DIR_INFO:
+ case HWRM_NVM_READ:
+ case HWRM_SELFTEST_QLIST:
+ case HWRM_SELFTEST_RETRIEVE_SERDES_DATA:
+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
+
+ case HWRM_PORT_PHY_I2C_WRITE:
+ case HWRM_MFG_FRU_WRITE_CONTROL:
+ case HWRM_MFG_FRU_EEPROM_WRITE:
+ case HWRM_DBG_WRITE_DIRECT:
+ case HWRM_NVM_SET_VARIABLE:
+ case HWRM_NVM_WRITE:
+ case HWRM_NVM_RAW_WRITE_BLK:
+ case HWRM_PORT_PHY_MDIO_WRITE:
+ return scope >= FWCTL_RPC_DEBUG_WRITE;
+
+ default:
+ return false;
+ }
+}
+
+static int bnxt_fw_setup_input_dma(struct bnxtctl_dev *bnxt_dev,
+ struct device *dev,
+ struct fwctl_dma_info_bnxt *msg,
+ struct bnxt_fw_msg *fw_msg,
+ int num_dma,
+ void **dma_virt_addr,
+ dma_addr_t *dma_addr)
+{
+ struct fwctl_dma_info_bnxt *dma_buf = msg;
+ u8 i, num_allocated = 0;
+ void *dma_ptr;
+ int rc;
+
+ for (i = 0; i < num_dma; i++) {
+ if (msg->len == 0 || msg->len > MAX_DMA_MEM_SIZE) {
+ rc = -EINVAL;
+ goto err;
+ }
+ dma_virt_addr[i] = dma_alloc_coherent(dev->parent, msg->len,
+ &dma_addr[i], GFP_KERNEL);
+ if (!dma_virt_addr[i]) {
+ rc = -ENOMEM;
+ goto err;
+ }
+ num_allocated++;
+ if (msg->dma_direction == DEVICE_WRITE) {
+ if (copy_from_user(dma_virt_addr[i],
+ u64_to_user_ptr(msg->data),
+ msg->len)) {
+ rc = -EFAULT;
+ goto err;
+ }
+ }
+ dma_ptr = fw_msg->msg + msg->offset;
+
+ if (!(PTR_ALIGN(dma_ptr, 8) == dma_ptr) ||
+ msg->offset >= fw_msg->msg_len) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]);
+ msg++;
+ }
+
+ return 0;
+err:
+ for (i = 0; i < num_allocated; i++)
+ dma_free_coherent(dev->parent, dma_buf[i].len,
+ dma_virt_addr[i], dma_addr[i]);
+
+ return rc;
+}
+
+static void *bnxtctl_fw_rpc(struct fwctl_uctx *uctx,
+ enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct bnxtctl_dev *bnxtctl =
+ container_of(uctx->fwctl, struct bnxtctl_dev, fwctl);
+ struct bnxt_aux_priv *bnxt_aux_priv = bnxtctl->aux_priv;
+ void *dma_virt_addr[MAX_NUM_DMA_INDICATIONS];
+ dma_addr_t dma_addr[MAX_NUM_DMA_INDICATIONS];
+ struct fwctl_dma_info_bnxt *dma_buf = NULL;
+ struct device *dev = &uctx->fwctl->dev;
+ struct fwctl_rpc_bnxt *msg = in;
+ struct bnxt_fw_msg rpc_in = {0};
+ int i, rc, err = 0;
+
+ rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
+ if (IS_ERR(rpc_in.msg))
+ return rpc_in.msg;
+
+ if (!bnxtctl_validate_rpc(bnxt_aux_priv->edev, &rpc_in, scope)) {
+ err = -EPERM;
+ goto free_msg_out;
+ }
+
+ rpc_in.msg_len = msg->req_len;
+ rpc_in.resp = kzalloc(*out_len, GFP_KERNEL);
+ if (!rpc_in.resp) {
+ err = -ENOMEM;
+ goto free_msg_out;
+ }
+
+ rpc_in.resp_max_len = *out_len;
+ if (!msg->timeout)
+ rpc_in.timeout = DFLT_HWRM_CMD_TIMEOUT;
+ else
+ rpc_in.timeout = msg->timeout;
+
+ if (msg->num_dma) {
+ if (msg->num_dma > MAX_NUM_DMA_INDICATIONS) {
+ dev_err(dev, "DMA buffers exceed the number supported\n");
+ err = -EINVAL;
+ goto free_msg_out;
+ }
+
+ dma_buf = kcalloc(msg->num_dma, sizeof(*dma_buf), GFP_KERNEL);
+ if (!dma_buf) {
+ err = -ENOMEM;
+ goto free_msg_out;
+ }
+
+ if (copy_from_user(dma_buf, u64_to_user_ptr(msg->payload),
+ msg->num_dma * sizeof(*dma_buf))) {
+ dev_dbg(dev, "Failed to copy payload from user\n");
+ err = -EFAULT;
+ goto free_dmabuf_out;
+ }
+
+ err = bnxt_fw_setup_input_dma(bnxtctl, dev, dma_buf, &rpc_in,
+ msg->num_dma, &dma_virt_addr[0],
+ &dma_addr[0]);
+ if (err)
+ goto free_dmabuf_out;
+ }
+
+ rc = bnxt_send_msg(bnxt_aux_priv->edev, &rpc_in);
+ if (rc) {
+ struct output *resp = rpc_in.resp;
+
+ /* Copy the response to user always, as it contains
+ * detailed status of the command failure
+ */
+ if (!resp->error_code)
+ /* bnxt_send_msg() returned much before FW
+ * received the command.
+ */
+ resp->error_code = rc;
+
+ goto free_dma_out;
+ }
+
+ for (i = 0; i < msg->num_dma; i++) {
+ if (dma_buf[i].dma_direction != DEVICE_READ)
+ continue;
+ if (copy_to_user(u64_to_user_ptr(dma_buf[i].data),
+ dma_virt_addr[i], dma_buf[i].len)) {
+ dev_dbg(dev, "Failed to copy resp to user\n");
+ err = -EFAULT;
+ break;
+ }
+ }
+free_dma_out:
+ /* Cleanup any dma memory that bnxt_fw_setup_input_dma() may have
+ * allocated
+ */
+ for (i = 0; i < msg->num_dma; i++)
+ dma_free_coherent(dev->parent, dma_buf[i].len, dma_virt_addr[i],
+ dma_addr[i]);
+free_dmabuf_out:
+ kfree(dma_buf);
+free_msg_out:
+ kfree(rpc_in.msg);
+
+ if (err) {
+ kfree(rpc_in.resp);
+ return ERR_PTR(err);
+ }
+
+ return rpc_in.resp;
+}
+
+static const struct fwctl_ops bnxtctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_BNXT,
+ .uctx_size = sizeof(struct bnxtctl_uctx),
+ .open_uctx = bnxtctl_open_uctx,
+ .close_uctx = bnxtctl_close_uctx,
+ .info = bnxtctl_info,
+ .fw_rpc = bnxtctl_fw_rpc,
+};
+
+static int bnxtctl_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxtctl_dev *bnxtctl __free(bnxtctl) =
+ fwctl_alloc_device(&aux_priv->edev->pdev->dev, &bnxtctl_ops,
+ struct bnxtctl_dev, fwctl);
+ int rc;
+
+ if (!bnxtctl)
+ return -ENOMEM;
+
+ bnxtctl->aux_priv = aux_priv;
+
+ rc = fwctl_register(&bnxtctl->fwctl);
+ if (rc)
+ return rc;
+
+ auxiliary_set_drvdata(adev, no_free_ptr(bnxtctl));
+ return 0;
+}
+
+static void bnxtctl_remove(struct auxiliary_device *adev)
+{
+ struct bnxtctl_dev *ctldev = auxiliary_get_drvdata(adev);
+
+ fwctl_unregister(&ctldev->fwctl);
+ fwctl_put(&ctldev->fwctl);
+}
+
+static const struct auxiliary_device_id bnxtctl_id_table[] = {
+ { .name = "bnxt_en.fwctl", },
+ {}
+};
+MODULE_DEVICE_TABLE(auxiliary, bnxtctl_id_table);
+
+static struct auxiliary_driver bnxtctl_driver = {
+ .name = "bnxt_fwctl",
+ .probe = bnxtctl_probe,
+ .remove = bnxtctl_remove,
+ .id_table = bnxtctl_id_table,
+};
+
+module_auxiliary_driver(bnxtctl_driver);
+
+MODULE_IMPORT_NS("FWCTL");
+MODULE_DESCRIPTION("BNXT fwctl driver");
+MODULE_AUTHOR("Pavan Chebbi <pavan.chebbi@broadcom.com>");
+MODULE_AUTHOR("Andy Gospodarek <gospo@broadcom.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/uapi/fwctl/bnxt.h b/include/uapi/fwctl/bnxt.h
new file mode 100644
index 000000000000..5620812d839c
--- /dev/null
+++ b/include/uapi/fwctl/bnxt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2026, Broadcom Inc
+ */
+
+#ifndef _UAPI_FWCTL_BNXT_H_
+#define _UAPI_FWCTL_BNXT_H_
+
+#include <linux/types.h>
+
+#define MAX_DMA_MEM_SIZE 0x10000 /*64K*/
+#define DFLT_HWRM_CMD_TIMEOUT 500
+#define DEVICE_WRITE 0
+#define DEVICE_READ 1
+
+enum fwctl_bnxt_commands {
+ FWCTL_BNXT_QUERY_COMMANDS = 0,
+ FWCTL_BNXT_SEND_COMMAND,
+};
+
+/**
+ * struct fwctl_info_bnxt - ioctl(FWCTL_INFO) out_device_data
+ * @uctx_caps: The command capabilities driver accepts.
+ *
+ * Return basic information about the FW interface available.
+ */
+struct fwctl_info_bnxt {
+ __u32 uctx_caps;
+};
+
+#define MAX_NUM_DMA_INDICATIONS 10
+
+/**
+ * struct fwctl_dma_info_bnxt - describe the buffer that should be DMAed
+ * @data: DMA-intended buffer
+ * @len: length of the @data
+ * @offset: offset at which FW (HWRM) input structure needs DMA address
+ * @dma_direction: DMA direction, DEVICE_READ or DEVICE_WRITE
+ * @unused: pad
+ */
+struct fwctl_dma_info_bnxt {
+ __aligned_u64 data;
+ __u32 len;
+ __u16 offset;
+ __u8 dma_direction;
+ __u8 unused;
+};
+
+/**
+ * struct fwctl_rpc_bnxt - describe the fwctl message for bnxt
+ * @req: FW (HWRM) command input structure
+ * @req_len: length of @req
+ * @timeout: if the user wants to override the driver's default, 0 otherwise
+ * @num_dma: number of DMA buffers to be added to @req
+ * @payload: DMA buffer details in struct fwctl_dma_info_bnxt format
+ */
+struct fwctl_rpc_bnxt {
+ __aligned_u64 req;
+ __u32 req_len;
+ __u32 timeout;
+ __u32 num_dma;
+ __aligned_u64 payload;
+};
+#endif
diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
index 716ac0eee42d..2d6d4049c205 100644
--- a/include/uapi/fwctl/fwctl.h
+++ b/include/uapi/fwctl/fwctl.h
@@ -44,6 +44,7 @@ enum fwctl_device_type {
FWCTL_DEVICE_TYPE_ERROR = 0,
FWCTL_DEVICE_TYPE_MLX5 = 1,
FWCTL_DEVICE_TYPE_CXL = 2,
+ FWCTL_DEVICE_TYPE_BNXT = 3,
FWCTL_DEVICE_TYPE_PDS = 4,
};
--
2.39.1
On 29 Jan 07:54, Pavan Chebbi wrote:
>Create bnxt_fwctl device. This will bind to bnxt's aux device.
>On the upper edge, it will register with the fwctl subsystem.
>It will make use of bnxt's ULP functions to send FW commands.
>
>Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
>Reviewed-by: Dave Jiang <dave.jiang@intel.com>
>Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
>---
> MAINTAINERS | 6 +
> drivers/fwctl/Kconfig | 11 +
> drivers/fwctl/Makefile | 1 +
> drivers/fwctl/bnxt/Makefile | 4 +
> drivers/fwctl/bnxt/main.c | 416 ++++++++++++++++++++++++++++++++++++
> include/uapi/fwctl/bnxt.h | 64 ++++++
> include/uapi/fwctl/fwctl.h | 1 +
> 7 files changed, 503 insertions(+)
> create mode 100644 drivers/fwctl/bnxt/Makefile
> create mode 100644 drivers/fwctl/bnxt/main.c
> create mode 100644 include/uapi/fwctl/bnxt.h
>
>diff --git a/MAINTAINERS b/MAINTAINERS
>index 12f49de7fe03..38acd5d334b2 100644
>--- a/MAINTAINERS
>+++ b/MAINTAINERS
>@@ -10408,6 +10408,12 @@ L: linux-kernel@vger.kernel.org
> S: Maintained
> F: drivers/fwctl/pds/
>
>+FWCTL BNXT DRIVER
>+M: Pavan Chebbi <pavan.chebbi@broadcom.com>
>+L: linux-kernel@vger.kernel.org
>+S: Maintained
>+F: drivers/fwctl/bnxt/
>+
> GALAXYCORE GC0308 CAMERA SENSOR DRIVER
> M: Sebastian Reichel <sre@kernel.org>
> L: linux-media@vger.kernel.org
>diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
>index b5583b12a011..b3795a17f8f2 100644
>--- a/drivers/fwctl/Kconfig
>+++ b/drivers/fwctl/Kconfig
>@@ -29,5 +29,16 @@ config FWCTL_PDS
> to access the debug and configuration information of the AMD/Pensando
> DSC hardware family.
>
>+ If you don't know what to do here, say N.
>+
>+config FWCTL_BNXT
>+ tristate "bnxt control fwctl driver"
>+ depends on BNXT
>+ help
>+ BNXT provides interface for the user process to access the debug and
>+ configuration registers of the Broadcom NIC hardware family.
>+ This will allow configuration and debug tools to work out of the box on
>+ mainstream kernel.
>+
> If you don't know what to do here, say N.
> endif
>diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
>index c093b5f661d6..fdd46f3a0e4e 100644
>--- a/drivers/fwctl/Makefile
>+++ b/drivers/fwctl/Makefile
>@@ -2,5 +2,6 @@
> obj-$(CONFIG_FWCTL) += fwctl.o
> obj-$(CONFIG_FWCTL_MLX5) += mlx5/
> obj-$(CONFIG_FWCTL_PDS) += pds/
>+obj-$(CONFIG_FWCTL_BNXT) += bnxt/
>
> fwctl-y += main.o
>diff --git a/drivers/fwctl/bnxt/Makefile b/drivers/fwctl/bnxt/Makefile
>new file mode 100644
>index 000000000000..b47172761f1e
>--- /dev/null
>+++ b/drivers/fwctl/bnxt/Makefile
>@@ -0,0 +1,4 @@
>+# SPDX-License-Identifier: GPL-2.0
>+obj-$(CONFIG_FWCTL_BNXT) += bnxt_fwctl.o
>+
>+bnxt_fwctl-y += main.o
>diff --git a/drivers/fwctl/bnxt/main.c b/drivers/fwctl/bnxt/main.c
>new file mode 100644
>index 000000000000..67d65b36cb38
>--- /dev/null
>+++ b/drivers/fwctl/bnxt/main.c
>@@ -0,0 +1,416 @@
>+// SPDX-License-Identifier: GPL-2.0
>+/*
>+ * Copyright (c) 2026, Broadcom Corporation
>+ */
>+
>+#include <linux/auxiliary_bus.h>
>+#include <linux/slab.h>
>+#include <linux/pci.h>
>+#include <linux/fwctl.h>
>+#include <linux/bnxt/hsi.h>
>+#include <linux/bnxt/ulp.h>
>+#include <uapi/fwctl/fwctl.h>
>+#include <uapi/fwctl/bnxt.h>
>+
>+struct bnxtctl_uctx {
>+ struct fwctl_uctx uctx;
>+ u32 uctx_caps;
>+};
>+
>+struct bnxtctl_dev {
>+ struct fwctl_device fwctl;
>+ struct bnxt_aux_priv *aux_priv;
>+};
>+
>+DEFINE_FREE(bnxtctl, struct bnxtctl_dev *, if (_T) fwctl_put(&_T->fwctl))
>+
>+static int bnxtctl_open_uctx(struct fwctl_uctx *uctx)
>+{
>+ struct bnxtctl_uctx *bnxtctl_uctx =
>+ container_of(uctx, struct bnxtctl_uctx, uctx);
>+
>+ bnxtctl_uctx->uctx_caps = BIT(FWCTL_BNXT_QUERY_COMMANDS) |
>+ BIT(FWCTL_BNXT_SEND_COMMAND);
>+ return 0;
>+}
>+
>+static void bnxtctl_close_uctx(struct fwctl_uctx *uctx)
>+{
>+}
>+
>+static void *bnxtctl_info(struct fwctl_uctx *uctx, size_t *length)
>+{
>+ struct bnxtctl_uctx *bnxtctl_uctx =
>+ container_of(uctx, struct bnxtctl_uctx, uctx);
>+ struct fwctl_info_bnxt *info;
>+
>+ info = kzalloc(sizeof(*info), GFP_KERNEL);
>+ if (!info)
>+ return ERR_PTR(-ENOMEM);
>+
>+ info->uctx_caps = bnxtctl_uctx->uctx_caps;
>+
>+ *length = sizeof(*info);
>+ return info;
>+}
>+
>+static bool bnxtctl_validate_rpc(struct bnxt_en_dev *edev,
>+ struct bnxt_fw_msg *hwrm_in,
>+ enum fwctl_rpc_scope scope)
>+{
>+ struct input *req = (struct input *)hwrm_in->msg;
>+
>+ guard(mutex)(&edev->en_dev_lock);
>+ if (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)
>+ return false;
>+
>+ switch (le16_to_cpu(req->req_type)) {
>+ case HWRM_FUNC_RESET:
>+ case HWRM_PORT_CLR_STATS:
>+ case HWRM_FW_SET_STRUCTURED_DATA:
>+ case HWRM_PORT_PRBS_TEST:
>+ case HWRM_FW_LIVEPATCH:
>+ case HWRM_FW_RESET:
>+ case HWRM_FW_SYNC:
>+ case HWRM_FW_SET_TIME:
>+ case HWRM_MFG_TESTS:
>+ case HWRM_DBG_SERDES_TEST:
>+ case HWRM_DBG_LOG_BUFFER_FLUSH:
>+ case HWRM_DBG_DUMP:
>+ case HWRM_DBG_ERASE_NVM:
>+ case HWRM_DBG_CFG:
>+ case HWRM_DBG_COREDUMP_LIST:
>+ case HWRM_DBG_COREDUMP_INITIATE:
>+ case HWRM_DBG_COREDUMP_RETRIEVE:
>+ case HWRM_DBG_CRASHDUMP_HEADER:
>+ case HWRM_DBG_CRASHDUMP_ERASE:
>+ case HWRM_DBG_PTRACE:
>+ case HWRM_DBG_TOKEN_CFG:
>+ case HWRM_NVM_DEFRAG:
>+ case HWRM_NVM_FACTORY_DEFAULTS:
>+ case HWRM_NVM_FLUSH:
>+ case HWRM_NVM_INSTALL_UPDATE:
>+ case HWRM_NVM_MODIFY:
>+ case HWRM_NVM_VERIFY_UPDATE:
>+ case HWRM_NVM_ERASE_DIR_ENTRY:
>+ case HWRM_NVM_MOD_DIR_ENTRY:
>+ case HWRM_NVM_FIND_DIR_ENTRY:
>+ case HWRM_NVM_RAW_DUMP:
>+ return scope >= FWCTL_RPC_CONFIGURATION;
>+
>+ case HWRM_VER_GET:
>+ case HWRM_FW_GET_STRUCTURED_DATA:
>+ case HWRM_ERROR_RECOVERY_QCFG:
>+ case HWRM_FUNC_QCAPS:
>+ case HWRM_FUNC_QCFG:
>+ case HWRM_FUNC_QSTATS:
>+ case HWRM_PORT_QSTATS:
>+ case HWRM_PORT_PHY_QCFG:
>+ case HWRM_PORT_MAC_QCFG:
>+ case HWRM_PORT_PHY_QCAPS:
>+ case HWRM_PORT_PHY_I2C_READ:
>+ case HWRM_PORT_PHY_MDIO_READ:
>+ case HWRM_QUEUE_PRI2COS_QCFG:
>+ case HWRM_QUEUE_COS2BW_QCFG:
>+ case HWRM_QUEUE_DSCP2PRI_QCFG:
>+ case HWRM_VNIC_RSS_QCFG:
>+ case HWRM_QUEUE_GLOBAL_QCFG:
>+ case HWRM_QUEUE_ADPTV_QOS_RX_FEATURE_QCFG:
>+ case HWRM_QUEUE_ADPTV_QOS_TX_FEATURE_QCFG:
>+ case HWRM_QUEUE_QCAPS:
>+ case HWRM_QUEUE_ADPTV_QOS_RX_TUNING_QCFG:
>+ case HWRM_QUEUE_ADPTV_QOS_TX_TUNING_QCFG:
>+ case HWRM_TUNNEL_DST_PORT_QUERY:
>+ case HWRM_PORT_QSTATS_EXT:
>+ case HWRM_PORT_TX_FIR_QCFG:
>+ case HWRM_FW_LIVEPATCH_QUERY:
>+ case HWRM_FW_QSTATUS:
>+ case HWRM_FW_HEALTH_CHECK:
>+ case HWRM_FW_GET_TIME:
>+ case HWRM_PORT_DSC_DUMP:
>+ case HWRM_PORT_EP_TX_QCFG:
>+ case HWRM_PORT_QCFG:
>+ case HWRM_PORT_MAC_QCAPS:
>+ case HWRM_TEMP_MONITOR_QUERY:
>+ case HWRM_REG_POWER_QUERY:
>+ case HWRM_CORE_FREQUENCY_QUERY:
>+ case HWRM_STAT_QUERY_ROCE_STATS:
>+ case HWRM_STAT_QUERY_ROCE_STATS_EXT:
>+ case HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE:
>+ case HWRM_CFA_FLOW_INFO:
>+ case HWRM_CFA_ADV_FLOW_MGNT_QCAPS:
>+ case HWRM_FUNC_RESOURCE_QCAPS:
>+ case HWRM_FUNC_BACKING_STORE_QCAPS:
>+ case HWRM_FUNC_BACKING_STORE_QCFG:
>+ case HWRM_FUNC_QSTATS_EXT:
>+ case HWRM_FUNC_PTP_PIN_QCFG:
>+ case HWRM_FUNC_PTP_EXT_QCFG:
>+ case HWRM_FUNC_BACKING_STORE_QCFG_V2:
>+ case HWRM_FUNC_BACKING_STORE_QCAPS_V2:
>+ case HWRM_FUNC_SYNCE_QCFG:
>+ case HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY:
>+ case HWRM_PCIE_QSTATS:
>+ case HWRM_MFG_OTP_QCFG:
>+ case HWRM_MFG_FRU_EEPROM_READ:
>+ case HWRM_MFG_GET_NVM_MEASUREMENT:
>+ case HWRM_STAT_GENERIC_QSTATS:
>+ case HWRM_PORT_PHY_FDRSTAT:
>+ case HWRM_QUEUE_ADPTV_QOS_RX_QCFG:
>+ case HWRM_QUEUE_ADPTV_QOS_TX_QCFG:
>+ case HWRM_DBG_READ_DIRECT:
>+ case HWRM_DBG_READ_INDIRECT:
>+ case HWRM_DBG_RING_INFO_GET:
>+ case HWRM_DBG_QCAPS:
>+ case HWRM_DBG_QCFG:
>+ case HWRM_DBG_USEQ_FLUSH:
>+ case HWRM_DBG_USEQ_QCAPS:
>+ case HWRM_DBG_SIM_CABLE_STATE:
>+ case HWRM_DBG_TOKEN_QUERY_AUTH_IDS:
>+ case HWRM_NVM_GET_VARIABLE:
>+ case HWRM_NVM_GET_DEV_INFO:
>+ case HWRM_NVM_GET_DIR_ENTRIES:
>+ case HWRM_NVM_GET_DIR_INFO:
>+ case HWRM_NVM_READ:
>+ case HWRM_SELFTEST_QLIST:
>+ case HWRM_SELFTEST_RETRIEVE_SERDES_DATA:
>+ return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
>+
>+ case HWRM_PORT_PHY_I2C_WRITE:
>+ case HWRM_MFG_FRU_WRITE_CONTROL:
>+ case HWRM_MFG_FRU_EEPROM_WRITE:
>+ case HWRM_DBG_WRITE_DIRECT:
>+ case HWRM_NVM_SET_VARIABLE:
>+ case HWRM_NVM_WRITE:
>+ case HWRM_NVM_RAW_WRITE_BLK:
>+ case HWRM_PORT_PHY_MDIO_WRITE:
>+ return scope >= FWCTL_RPC_DEBUG_WRITE;
>+
>+ default:
>+ return false;
>+ }
>+}
>+
Can the DMA direction be derived from the scope of the command ? maybe this
will simplify the command dma setup, see below comment.
>+static int bnxt_fw_setup_input_dma(struct bnxtctl_dev *bnxt_dev,
>+ struct device *dev,
>+ struct fwctl_dma_info_bnxt *msg,
>+ struct bnxt_fw_msg *fw_msg,
>+ int num_dma,
>+ void **dma_virt_addr,
>+ dma_addr_t *dma_addr)
>+{
>+ struct fwctl_dma_info_bnxt *dma_buf = msg;
>+ u8 i, num_allocated = 0;
>+ void *dma_ptr;
>+ int rc;
>+
>+ for (i = 0; i < num_dma; i++) {
>+ if (msg->len == 0 || msg->len > MAX_DMA_MEM_SIZE) {
>+ rc = -EINVAL;
>+ goto err;
>+ }
>+ dma_virt_addr[i] = dma_alloc_coherent(dev->parent, msg->len,
>+ &dma_addr[i], GFP_KERNEL);
>+ if (!dma_virt_addr[i]) {
>+ rc = -ENOMEM;
>+ goto err;
>+ }
>+ num_allocated++;
>+ if (msg->dma_direction == DEVICE_WRITE) {
>+ if (copy_from_user(dma_virt_addr[i],
>+ u64_to_user_ptr(msg->data),
>+ msg->len)) {
>+ rc = -EFAULT;
>+ goto err;
>+ }
>+ }
>+ dma_ptr = fw_msg->msg + msg->offset;
>+
>+ if (!(PTR_ALIGN(dma_ptr, 8) == dma_ptr) ||
>+ msg->offset >= fw_msg->msg_len) {
>+ rc = -EINVAL;
>+ goto err;
>+ }
>+
>+ *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]);
>+ msg++;
>+ }
>+
>+ return 0;
>+err:
>+ for (i = 0; i < num_allocated; i++)
>+ dma_free_coherent(dev->parent, dma_buf[i].len,
>+ dma_virt_addr[i], dma_addr[i]);
>+
>+ return rc;
>+}
>+
>+static void *bnxtctl_fw_rpc(struct fwctl_uctx *uctx,
>+ enum fwctl_rpc_scope scope,
>+ void *in, size_t in_len, size_t *out_len)
>+{
>+ struct bnxtctl_dev *bnxtctl =
>+ container_of(uctx->fwctl, struct bnxtctl_dev, fwctl);
>+ struct bnxt_aux_priv *bnxt_aux_priv = bnxtctl->aux_priv;
>+ void *dma_virt_addr[MAX_NUM_DMA_INDICATIONS];
>+ dma_addr_t dma_addr[MAX_NUM_DMA_INDICATIONS];
>+ struct fwctl_dma_info_bnxt *dma_buf = NULL;
>+ struct device *dev = &uctx->fwctl->dev;
>+ struct fwctl_rpc_bnxt *msg = in;
>+ struct bnxt_fw_msg rpc_in = {0};
>+ int i, rc, err = 0;
>+
>+ rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
>+ if (IS_ERR(rpc_in.msg))
>+ return rpc_in.msg;
>+
>+ if (!bnxtctl_validate_rpc(bnxt_aux_priv->edev, &rpc_in, scope)) {
>+ err = -EPERM;
>+ goto free_msg_out;
>+ }
>+
>+ rpc_in.msg_len = msg->req_len;
>+ rpc_in.resp = kzalloc(*out_len, GFP_KERNEL);
>+ if (!rpc_in.resp) {
>+ err = -ENOMEM;
>+ goto free_msg_out;
>+ }
>+
>+ rpc_in.resp_max_len = *out_len;
>+ if (!msg->timeout)
>+ rpc_in.timeout = DFLT_HWRM_CMD_TIMEOUT;
>+ else
>+ rpc_in.timeout = msg->timeout;
>+
>+ if (msg->num_dma) {
>+ if (msg->num_dma > MAX_NUM_DMA_INDICATIONS) {
>+ dev_err(dev, "DMA buffers exceed the number supported\n");
>+ err = -EINVAL;
>+ goto free_msg_out;
>+ }
>+
>+ dma_buf = kcalloc(msg->num_dma, sizeof(*dma_buf), GFP_KERNEL);
>+ if (!dma_buf) {
>+ err = -ENOMEM;
>+ goto free_msg_out;
>+ }
>+
>+ if (copy_from_user(dma_buf, u64_to_user_ptr(msg->payload),
>+ msg->num_dma * sizeof(*dma_buf))) {
>+ dev_dbg(dev, "Failed to copy payload from user\n");
>+ err = -EFAULT;
>+ goto free_dmabuf_out;
>+ }
>+
I find it a bit weird that the user needs to provide dma information and
structure ! this is supposed to be completely hidden by the driver to
simplify user space, the driver handles dma and access to HW, user space
just provides the commands and payloads and driver carries the input/output
for that user space.
Also setting up a large enough DMA buffer only initially on uctx open could
be beneficial in your case and align better with fwctl usecases.
>+ err = bnxt_fw_setup_input_dma(bnxtctl, dev, dma_buf, &rpc_in,
>+ msg->num_dma, &dma_virt_addr[0],
>+ &dma_addr[0]);
>+ if (err)
>+ goto free_dmabuf_out;
>+ }
>+
>+ rc = bnxt_send_msg(bnxt_aux_priv->edev, &rpc_in);
>+ if (rc) {
>+ struct output *resp = rpc_in.resp;
>+
>+ /* Copy the response to user always, as it contains
>+ * detailed status of the command failure
>+ */
>+ if (!resp->error_code)
>+ /* bnxt_send_msg() returned much before FW
>+ * received the command.
>+ */
>+ resp->error_code = rc;
>+
>+ goto free_dma_out;
>+ }
>+
>+ for (i = 0; i < msg->num_dma; i++) {
>+ if (dma_buf[i].dma_direction != DEVICE_READ)
>+ continue;
>+ if (copy_to_user(u64_to_user_ptr(dma_buf[i].data),
>+ dma_virt_addr[i], dma_buf[i].len)) {
>+ dev_dbg(dev, "Failed to copy resp to user\n");
>+ err = -EFAULT;
>+ break;
>+ }
>+ }
>+free_dma_out:
>+ /* Cleanup any dma memory that bnxt_fw_setup_input_dma() may have
>+ * allocated
>+ */
>+ for (i = 0; i < msg->num_dma; i++)
>+ dma_free_coherent(dev->parent, dma_buf[i].len, dma_virt_addr[i],
>+ dma_addr[i]);
>+free_dmabuf_out:
>+ kfree(dma_buf);
>+free_msg_out:
>+ kfree(rpc_in.msg);
>+
>+ if (err) {
>+ kfree(rpc_in.resp);
>+ return ERR_PTR(err);
>+ }
>+
>+ return rpc_in.resp;
>+}
>+
>+static const struct fwctl_ops bnxtctl_ops = {
>+ .device_type = FWCTL_DEVICE_TYPE_BNXT,
>+ .uctx_size = sizeof(struct bnxtctl_uctx),
>+ .open_uctx = bnxtctl_open_uctx,
>+ .close_uctx = bnxtctl_close_uctx,
>+ .info = bnxtctl_info,
>+ .fw_rpc = bnxtctl_fw_rpc,
>+};
>+
>+static int bnxtctl_probe(struct auxiliary_device *adev,
>+ const struct auxiliary_device_id *id)
>+{
>+ struct bnxt_aux_priv *aux_priv =
>+ container_of(adev, struct bnxt_aux_priv, aux_dev);
>+ struct bnxtctl_dev *bnxtctl __free(bnxtctl) =
>+ fwctl_alloc_device(&aux_priv->edev->pdev->dev, &bnxtctl_ops,
>+ struct bnxtctl_dev, fwctl);
>+ int rc;
>+
>+ if (!bnxtctl)
>+ return -ENOMEM;
>+
>+ bnxtctl->aux_priv = aux_priv;
>+
>+ rc = fwctl_register(&bnxtctl->fwctl);
>+ if (rc)
>+ return rc;
>+
>+ auxiliary_set_drvdata(adev, no_free_ptr(bnxtctl));
>+ return 0;
>+}
>+
>+static void bnxtctl_remove(struct auxiliary_device *adev)
>+{
>+ struct bnxtctl_dev *ctldev = auxiliary_get_drvdata(adev);
>+
>+ fwctl_unregister(&ctldev->fwctl);
>+ fwctl_put(&ctldev->fwctl);
>+}
>+
>+static const struct auxiliary_device_id bnxtctl_id_table[] = {
>+ { .name = "bnxt_en.fwctl", },
>+ {}
>+};
>+MODULE_DEVICE_TABLE(auxiliary, bnxtctl_id_table);
>+
>+static struct auxiliary_driver bnxtctl_driver = {
>+ .name = "bnxt_fwctl",
>+ .probe = bnxtctl_probe,
>+ .remove = bnxtctl_remove,
>+ .id_table = bnxtctl_id_table,
>+};
>+
>+module_auxiliary_driver(bnxtctl_driver);
>+
>+MODULE_IMPORT_NS("FWCTL");
>+MODULE_DESCRIPTION("BNXT fwctl driver");
>+MODULE_AUTHOR("Pavan Chebbi <pavan.chebbi@broadcom.com>");
>+MODULE_AUTHOR("Andy Gospodarek <gospo@broadcom.com>");
>+MODULE_LICENSE("GPL");
>diff --git a/include/uapi/fwctl/bnxt.h b/include/uapi/fwctl/bnxt.h
>new file mode 100644
>index 000000000000..5620812d839c
>--- /dev/null
>+++ b/include/uapi/fwctl/bnxt.h
>@@ -0,0 +1,64 @@
>+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>+/*
>+ * Copyright (c) 2026, Broadcom Inc
>+ */
>+
>+#ifndef _UAPI_FWCTL_BNXT_H_
>+#define _UAPI_FWCTL_BNXT_H_
>+
>+#include <linux/types.h>
>+
>+#define MAX_DMA_MEM_SIZE 0x10000 /*64K*/
>+#define DFLT_HWRM_CMD_TIMEOUT 500
unit?
>+#define DEVICE_WRITE 0
>+#define DEVICE_READ 1
>+
>+enum fwctl_bnxt_commands {
>+ FWCTL_BNXT_QUERY_COMMANDS = 0,
>+ FWCTL_BNXT_SEND_COMMAND,
>+};
>+
>+/**
>+ * struct fwctl_info_bnxt - ioctl(FWCTL_INFO) out_device_data
>+ * @uctx_caps: The command capabilities driver accepts.
>+ *
>+ * Return basic information about the FW interface available.
>+ */
>+struct fwctl_info_bnxt {
>+ __u32 uctx_caps;
>+};
>+
>+#define MAX_NUM_DMA_INDICATIONS 10
>+
>+/**
>+ * struct fwctl_dma_info_bnxt - describe the buffer that should be DMAed
>+ * @data: DMA-intended buffer
>+ * @len: length of the @data
>+ * @offset: offset at which FW (HWRM) input structure needs DMA address
>+ * @dma_direction: DMA direction, DEVICE_READ or DEVICE_WRITE
>+ * @unused: pad
>+ */
>+struct fwctl_dma_info_bnxt {
>+ __aligned_u64 data;
>+ __u32 len;
>+ __u16 offset;
>+ __u8 dma_direction;
>+ __u8 unused;
>+};
>+
>+/**
>+ * struct fwctl_rpc_bnxt - describe the fwctl message for bnxt
>+ * @req: FW (HWRM) command input structure
>+ * @req_len: length of @req
>+ * @timeout: if the user wants to override the driver's default, 0 otherwise
What unit ? Also there is a define in the driver (TIMOUT=500); not clear
what unit.
>+ * @num_dma: number of DMA buffers to be added to @req
>+ * @payload: DMA buffer details in struct fwctl_dma_info_bnxt format
>+ */
if payload is a well known format, why not inline ? or why not combine req
and payload into one buffer and let the kernel handle the rest. Just not
sure how you HW interface works, so maybe once you explain the previous
comment this would make some sense ?
>+struct fwctl_rpc_bnxt {
>+ __aligned_u64 req;
>+ __u32 req_len;
>+ __u32 timeout;
>+ __u32 num_dma;
>+ __aligned_u64 payload;
>+};
>+#endif
>diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
>index 716ac0eee42d..2d6d4049c205 100644
>--- a/include/uapi/fwctl/fwctl.h
>+++ b/include/uapi/fwctl/fwctl.h
>@@ -44,6 +44,7 @@ enum fwctl_device_type {
> FWCTL_DEVICE_TYPE_ERROR = 0,
> FWCTL_DEVICE_TYPE_MLX5 = 1,
> FWCTL_DEVICE_TYPE_CXL = 2,
>+ FWCTL_DEVICE_TYPE_BNXT = 3,
> FWCTL_DEVICE_TYPE_PDS = 4,
> };
>
>--
>2.39.1
>
> > I find it a bit weird that the user needs to provide dma information and > structure ! this is supposed to be completely hidden by the driver to > simplify user space, the driver handles dma and access to HW, user space > just provides the commands and payloads and driver carries the input/output > for that user space. This is so because our FW commands require optional DMA-able buffers. The application is only giving us the information that the driver should encapsulate in additional DMA-able buffers. There is a defined format for exchange of this information, because every command has a different number of additional buffers. Also the majority of the commands don't need any additional buffers. I hope this answers your next comments also. > > Also setting up a large enough DMA buffer only initially on uctx open could > be beneficial in your case and align better with fwctl usecases. > <--> > >+/** > >+ * struct fwctl_rpc_bnxt - describe the fwctl message for bnxt > >+ * @req: FW (HWRM) command input structure > >+ * @req_len: length of @req > >+ * @timeout: if the user wants to override the driver's default, 0 otherwise > > What unit ? Also there is a define in the driver (TIMOUT=500); not clear > what unit. Its milliseconds. I can update. > > >+ * @num_dma: number of DMA buffers to be added to @req > >+ * @payload: DMA buffer details in struct fwctl_dma_info_bnxt format > >+ */ > > if payload is a well known format, why not inline ? or why not combine req > and payload into one buffer and let the kernel handle the rest. Just not > sure how you HW interface works, so maybe once you explain the previous > comment this would make some sense ? >
On Thu, Feb 05, 2026 at 09:55:08AM +0530, Pavan Chebbi wrote: > > > > I find it a bit weird that the user needs to provide dma information and > > structure ! this is supposed to be completely hidden by the driver to > > simplify user space, the driver handles dma and access to HW, user space > > just provides the commands and payloads and driver carries the input/output > > for that user space. > > This is so because our FW commands require optional DMA-able buffers. > The application is only giving us the information that the driver > should encapsulate in additional DMA-able buffers. > There is a defined format for exchange of this information, because > every command has a different number of additional buffers. Also the > majority of the commands don't need any additional buffers. > I hope this answers your next comments also. Now I have questions, who allocates these buffers, how do they get DMA mapped, who does all the copying?? Jason
On Thu, Feb 5, 2026 at 12:33 PM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Thu, Feb 05, 2026 at 09:55:08AM +0530, Pavan Chebbi wrote: > > > > > > I find it a bit weird that the user needs to provide dma information and > > > structure ! this is supposed to be completely hidden by the driver to > > > simplify user space, the driver handles dma and access to HW, user space > > > just provides the commands and payloads and driver carries the input/output > > > for that user space. > > > > This is so because our FW commands require optional DMA-able buffers. > > The application is only giving us the information that the driver > > should encapsulate in additional DMA-able buffers. > > There is a defined format for exchange of this information, because > > every command has a different number of additional buffers. Also the > > majority of the commands don't need any additional buffers. > > I hope this answers your next comments also. > > Now I have questions, who allocates these buffers, how do they get DMA > mapped, who does all the copying?? > Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls in this (or the v4 version) of this patch.
On Thu, Feb 05, 2026 at 12:47:53PM -0500, Andy Gospodarek wrote:
> Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls
> in this (or the v4 version) of this patch.
Oh.. No.. You can't do this:
+ rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
// ^^ eventually becomes fw_msg
+ dma_ptr = fw_msg->msg + msg->offset;
+ *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]);
There is nothing in here which ensures that every DMA physical address
in the mailbox given from userspace is *actually forced to a value by
the kernel*
The kernel only overwrites values from the user controlled struct
fwctl_dma_info_bnxt array.
Meaning userspace can just specify any physical address in the
message, not include any fwctl_dma_info_bnxt list and it will happily
send the user controlled physical address to the FW. Thus userspace
can DMA to whatever memory it likes.
IOW this is a *HUGE* security error!
You are going to struggle very much supporting this kind of ill suited
FW API through this interface, I think you need to rework the FW
protocol...
Or add some very complex kernel validation and do something about
forward compatibility...
Or only support commands the FW promises will never have a DMA address
in them.
Shouldn't you guys have caught this during your security review?
"don't DMA to random memory" is a very clear fundamental thing in the
fwctl rules!
Jason
On Fri, Feb 6, 2026 at 12:12 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Thu, Feb 05, 2026 at 12:47:53PM -0500, Andy Gospodarek wrote: > > > Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls > > in this (or the v4 version) of this patch. > > Oh.. No.. You can't do this: > > + rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len); > // ^^ eventually becomes fw_msg > + dma_ptr = fw_msg->msg + msg->offset; > + *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]); > > There is nothing in here which ensures that every DMA physical address > in the mailbox given from userspace is *actually forced to a value by > the kernel* > > The kernel only overwrites values from the user controlled struct > fwctl_dma_info_bnxt array. > > Meaning userspace can just specify any physical address in the > message, not include any fwctl_dma_info_bnxt list and it will happily > send the user controlled physical address to the FW. Thus userspace > can DMA to whatever memory it likes. > Reading this while I am fully awake helps me grapple with the point you are making. Yes, this hole does exist. Thanks for bringing this out. > IOW this is a *HUGE* security error! > > You are going to struggle very much supporting this kind of ill suited > FW API through this interface, I think you need to rework the FW > protocol... > > Or add some very complex kernel validation and do something about > forward compatibility... > > Or only support commands the FW promises will never have a DMA address > in them. > Or have the driver maintain some sort of look up table where it validates num_dmas and their offsets for each command it supports, and reject user commands which don't map... > Shouldn't you guys have caught this during your security review? > "don't DMA to random memory" is a very clear fundamental thing in the > fwctl rules! > > Jason
On Thu, Feb 5, 2026 at 1:42 PM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Thu, Feb 05, 2026 at 12:47:53PM -0500, Andy Gospodarek wrote: > > > Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls > > in this (or the v4 version) of this patch. > > Oh.. No.. You can't do this: > > + rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len); > // ^^ eventually becomes fw_msg > + dma_ptr = fw_msg->msg + msg->offset; > + *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]); > > There is nothing in here which ensures that every DMA physical address > in the mailbox given from userspace is *actually forced to a value by > the kernel* > > The kernel only overwrites values from the user controlled struct > fwctl_dma_info_bnxt array. > > Meaning userspace can just specify any physical address in the > message, not include any fwctl_dma_info_bnxt list and it will happily > send the user controlled physical address to the FW. Thus userspace > can DMA to whatever memory it likes. > > IOW this is a *HUGE* security error! > I'll let Pavan chime in when he sees this, but I'm not sure that's correct. You might be right that we have a problem that is as bad as you describe, but it's also possible we could do a better job explaining how these DMA operations work and why they are needed. Not every FW RPC will need to have chunks of data that is copied to or from userspace to FW. In the cases where data is copied, there is not a direct DMA from userspace to FW or vice-versa. All DMA operations should be done on buffers only allocated and mapped with dma_alloc_coherent -- never buffers or addresses that are straight from userspace. As I read through these again, I realize that what might have made these easier to read would be simply changing the names of a few of these structures when they are really just buffers being copied between user and kernel space -- not buffers that are actually being DMAed from hardware to userspace.
On Thu, Feb 05, 2026 at 07:19:43PM -0500, Andy Gospodarek wrote:
> On Thu, Feb 5, 2026 at 1:42 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >
> > On Thu, Feb 05, 2026 at 12:47:53PM -0500, Andy Gospodarek wrote:
> >
> > > Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls
> > > in this (or the v4 version) of this patch.
> >
> > Oh.. No.. You can't do this:
> >
> > + rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
> > // ^^ eventually becomes fw_msg
> > + dma_ptr = fw_msg->msg + msg->offset;
> > + *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]);
> >
> > There is nothing in here which ensures that every DMA physical address
> > in the mailbox given from userspace is *actually forced to a value by
> > the kernel*
> >
> > The kernel only overwrites values from the user controlled struct
> > fwctl_dma_info_bnxt array.
> >
> > Meaning userspace can just specify any physical address in the
> > message, not include any fwctl_dma_info_bnxt list and it will happily
> > send the user controlled physical address to the FW. Thus userspace
> > can DMA to whatever memory it likes.
> >
> > IOW this is a *HUGE* security error!
> >
>
> I'll let Pavan chime in when he sees this, but I'm not sure that's
> correct. You might be right that we have a problem that is as bad as
> you describe, but it's also possible we could do a better job
> explaining how these DMA operations work and why they are needed.
I think it is pretty clear, the DMA addreses are being read by FW from
the same memory buffer that the userspace fully controls.
Look at the code, the design takes an offset list from userspace and
patches in DMA addresses from the kernel into the same buffer that
the userspace data was copied from.
NOTHING is checking that the offset list is security acceptable for
the command!!
> As I read through these again, I realize that what might have made
> these easier to read would be simply changing the names of a few of
> these structures when they are really just buffers being copied
> between user and kernel space -- not buffers that are actually being
> DMAed from hardware to userspace.
I understand how the buffer copying works on the happy path, I am
pointing out that if the user submits a command that has
+struct fwctl_rpc_bnxt {
+ __aligned_u64 req;
+ __u32 req_len;
+ __u32 timeout;
+ __u32 num_dma;
^^^^^^^^^^^
=== 0
then the kernel doesn't even allocate any of these DMA buffers, it
doesn't change the command buffer and it sends raw userspace data
directly to FW.
Since that very same command buffer obviously contains the DMA
addresses to use there is nothing blocking userspace from doing this.
Stated another way, the kernel must NOT take in
+struct fwctl_dma_info_bnxt {
+ __aligned_u64 data;
+ __u32 len;
+ __u16 offset;
^^^^^^^
This parameter from userspace! The kernel MUST know exactly where all
DMA addresses lie in the message now and into the future to guarentee
that it and only it writes a DMA address of the kernel controlled
DMA bounce buffer.
Allowing a userspace controlled DMA address to pass into the device
directly from userspace is a complete security fail.
Jason
On Fri, Feb 6, 2026 at 6:09 AM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Thu, Feb 05, 2026 at 07:19:43PM -0500, Andy Gospodarek wrote:
> > On Thu, Feb 5, 2026 at 1:42 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > >
> > > On Thu, Feb 05, 2026 at 12:47:53PM -0500, Andy Gospodarek wrote:
> > >
> > > > Jason, this is all done in bnxtctl_fw_rpc() and the functions it calls
> > > > in this (or the v4 version) of this patch.
> > >
> > > Oh.. No.. You can't do this:
> > >
> > > + rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
> > > // ^^ eventually becomes fw_msg
> > > + dma_ptr = fw_msg->msg + msg->offset;
> > > + *(__le64 *)(dma_ptr) = cpu_to_le64(dma_addr[i]);
> > >
> > > There is nothing in here which ensures that every DMA physical address
> > > in the mailbox given from userspace is *actually forced to a value by
> > > the kernel*
> > >
> > > The kernel only overwrites values from the user controlled struct
> > > fwctl_dma_info_bnxt array.
> > >
> > > Meaning userspace can just specify any physical address in the
> > > message, not include any fwctl_dma_info_bnxt list and it will happily
> > > send the user controlled physical address to the FW. Thus userspace
> > > can DMA to whatever memory it likes.
> > >
> > > IOW this is a *HUGE* security error!
> > >
> >
> > I'll let Pavan chime in when he sees this, but I'm not sure that's
> > correct. You might be right that we have a problem that is as bad as
> > you describe, but it's also possible we could do a better job
> > explaining how these DMA operations work and why they are needed.
>
> I think it is pretty clear, the DMA addreses are being read by FW from
> the same memory buffer that the userspace fully controls.
>
> Look at the code, the design takes an offset list from userspace and
> patches in DMA addresses from the kernel into the same buffer that
> the userspace data was copied from.
>
> NOTHING is checking that the offset list is security acceptable for
> the command!!
>
> > As I read through these again, I realize that what might have made
> > these easier to read would be simply changing the names of a few of
> > these structures when they are really just buffers being copied
> > between user and kernel space -- not buffers that are actually being
> > DMAed from hardware to userspace.
>
> I understand how the buffer copying works on the happy path, I am
> pointing out that if the user submits a command that has
>
> +struct fwctl_rpc_bnxt {
> + __aligned_u64 req;
> + __u32 req_len;
> + __u32 timeout;
> + __u32 num_dma;
> ^^^^^^^^^^^
>
> === 0
>
> then the kernel doesn't even allocate any of these DMA buffers, it
> doesn't change the command buffer and it sends raw userspace data
> directly to FW.
>
> Since that very same command buffer obviously contains the DMA
> addresses to use there is nothing blocking userspace from doing this.
>
> Stated another way, the kernel must NOT take in
>
> +struct fwctl_dma_info_bnxt {
> + __aligned_u64 data;
> + __u32 len;
> + __u16 offset;
> ^^^^^^^
>
> This parameter from userspace! The kernel MUST know exactly where all
> DMA addresses lie in the message now and into the future to guarentee
> that it and only it writes a DMA address of the kernel controlled
> DMA bounce buffer.
>
> Allowing a userspace controlled DMA address to pass into the device
> directly from userspace is a complete security fail.
I think its important to quickly clarify the mechanism lest the whole
thing adds more questions.
Let me explain with an example as to how its happening. I am sure its
the case of naming the struct as *dma* info that is freaking everyone
out.
When a user has no additional host buffers to add to a FW command, it
just sets the num_dma = 0. At that time, req member of struct
fwctl_rpc_bnxt has the entire user buffer to carry FW command i/p and
o/p.
Sometimes a FW command needs a separate additional host buffers beyond
the input message. The input message will carry the address at which
additional host buffers are found.
Example is a command like below where FW will return additional data
in a separate tx_stat_host_addr and rx_stat_host_addr
struct hwrm_port_qstats_input {
__le16 req_type;
__le16 cmpl_ring;
__le16 seq_id;
__le16 target_id;
__le64 resp_addr;
__le16 port_id;
u8 flags;
u8 unused_0[5];
__le64 tx_stat_host_addr;
__le64 rx_stat_host_addr;
};
Now the user will tell driver that this FW command has two additional
host buffers that driver needs to supply to FW. So it will say num_dma
= 2 in the rpc message. And describe each additional buffer so that
driver can allocate dma_alloc_coherent memory for it.
FW command expects the DMA-able buffers' address be present at
'tx_stat_host_addr' and 'rx_stat_host_addr' fields of
hwrm_port_qstats_input. Here is where 'offset' comes into the picture.
In the above example, user is only telling the driver, offset of
'__le64 tx_stat_host_addr' in the input structure, using the 'offset'
field of the additional buffer description.
Does that make the mechanism clear...
>
> Jason
On Fri, Feb 06, 2026 at 06:54:37AM +0530, Pavan Chebbi wrote:
> Does that make the mechanism clear...
Nothing you have explained is different than how I understand it.
You clearly haven't understood what I'm explaining, please read both
my messages over several times until you get it.
I will help you one last time.
User uses this command:
struct hwrm_port_qstats_input {
__le16 req_type;
__le16 cmpl_ring;
__le16 seq_id;
__le16 target_id;
__le64 resp_addr;
__le16 port_id;
u8 flags;
u8 unused_0[5];
__le64 tx_stat_host_addr;
__le64 rx_stat_host_addr;
};
It sends the command buffer with tx_stat_host_addr set to 0xBAD
It sets num_dma = 0.
Explain what happens.
Jason
On Thu, Feb 5, 2026 at 9:55 AM Pavan Chebbi <pavan.chebbi@broadcom.com> wrote: > > > > > I find it a bit weird that the user needs to provide dma information and > > structure ! this is supposed to be completely hidden by the driver to > > simplify user space, the driver handles dma and access to HW, user space > > just provides the commands and payloads and driver carries the input/output > > for that user space. > > This is so because our FW commands require optional DMA-able buffers. > The application is only giving us the information that the driver > should encapsulate in additional DMA-able buffers. > There is a defined format for exchange of this information, because > every command has a different number of additional buffers. Also the > majority of the commands don't need any additional buffers. > I hope this answers your next comments also. > > > > > Also setting up a large enough DMA buffer only initially on uctx open could > > be beneficial in your case and align better with fwctl usecases. > > > > <--> > > > >+/** > > >+ * struct fwctl_rpc_bnxt - describe the fwctl message for bnxt > > >+ * @req: FW (HWRM) command input structure > > >+ * @req_len: length of @req > > >+ * @timeout: if the user wants to override the driver's default, 0 otherwise > > > > What unit ? Also there is a define in the driver (TIMOUT=500); not clear > > what unit. > > Its milliseconds. I can update. Saeed, if you are satisfied with my replies, can I make this comment update as a fix later? If that is fine, I wish that this series be pulled in. (this email is on v3. There is v4) The complete ulp callbacks cleanup, which are more roce driver centric, will be posted as a separate series as I explained in the other email. > > > > > >+ * @num_dma: number of DMA buffers to be added to @req > > >+ * @payload: DMA buffer details in struct fwctl_dma_info_bnxt format > > >+ */ > > > > if payload is a well known format, why not inline ? or why not combine req > > and payload into one buffer and let the kernel handle the rest. Just not > > sure how you HW interface works, so maybe once you explain the previous > > comment this would make some sense ? > >
On Thu, 29 Jan 2026 07:54:52 -0800
Pavan Chebbi <pavan.chebbi@broadcom.com> wrote:
> Create bnxt_fwctl device. This will bind to bnxt's aux device.
> On the upper edge, it will register with the fwctl subsystem.
> It will make use of bnxt's ULP functions to send FW commands.
>
> Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Hi
Various comments inline,
Thanks,
Jonathan
> diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
> index b5583b12a011..b3795a17f8f2 100644
> --- a/drivers/fwctl/Kconfig
> +++ b/drivers/fwctl/Kconfig
> @@ -29,5 +29,16 @@ config FWCTL_PDS
> to access the debug and configuration information of the AMD/Pensando
> DSC hardware family.
>
> + If you don't know what to do here, say N.
> +
> +config FWCTL_BNXT
> + tristate "bnxt control fwctl driver"
> + depends on BNXT
> + help
> + BNXT provides interface for the user process to access the debug and
> + configuration registers of the Broadcom NIC hardware family.
> + This will allow configuration and debug tools to work out of the box on
> + mainstream kernel.
> +
> If you don't know what to do here, say N.
Same thing on ordering as for the make file. Just adding at the end doesn't
make it easy to find entries once we have lots of them!
> endif
> diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
> index c093b5f661d6..fdd46f3a0e4e 100644
> --- a/drivers/fwctl/Makefile
> +++ b/drivers/fwctl/Makefile
> @@ -2,5 +2,6 @@
> obj-$(CONFIG_FWCTL) += fwctl.o
> obj-$(CONFIG_FWCTL_MLX5) += mlx5/
> obj-$(CONFIG_FWCTL_PDS) += pds/
> +obj-$(CONFIG_FWCTL_BNXT) += bnxt/
How are we ordering these? Alphabetical maybe?
>
> diff --git a/drivers/fwctl/bnxt/main.c b/drivers/fwctl/bnxt/main.c
> new file mode 100644
> index 000000000000..67d65b36cb38
> --- /dev/null
> +++ b/drivers/fwctl/bnxt/main.c
> +static void *bnxtctl_fw_rpc(struct fwctl_uctx *uctx,
> + enum fwctl_rpc_scope scope,
> + void *in, size_t in_len, size_t *out_len)
> +{
> + struct bnxtctl_dev *bnxtctl =
> + container_of(uctx->fwctl, struct bnxtctl_dev, fwctl);
> + struct bnxt_aux_priv *bnxt_aux_priv = bnxtctl->aux_priv;
> + void *dma_virt_addr[MAX_NUM_DMA_INDICATIONS];
> + dma_addr_t dma_addr[MAX_NUM_DMA_INDICATIONS];
> + struct fwctl_dma_info_bnxt *dma_buf = NULL;
> + struct device *dev = &uctx->fwctl->dev;
> + struct fwctl_rpc_bnxt *msg = in;
> + struct bnxt_fw_msg rpc_in = {0};
> + int i, rc, err = 0;
> +
> + rpc_in.msg = memdup_user(u64_to_user_ptr(msg->req), msg->req_len);
> + if (IS_ERR(rpc_in.msg))
> + return rpc_in.msg;
> +
> + if (!bnxtctl_validate_rpc(bnxt_aux_priv->edev, &rpc_in, scope)) {
> + err = -EPERM;
> + goto free_msg_out;
> + }
> +
> + rpc_in.msg_len = msg->req_len;
> + rpc_in.resp = kzalloc(*out_len, GFP_KERNEL);
> + if (!rpc_in.resp) {
> + err = -ENOMEM;
> + goto free_msg_out;
> + }
> +
> + rpc_in.resp_max_len = *out_len;
> + if (!msg->timeout)
> + rpc_in.timeout = DFLT_HWRM_CMD_TIMEOUT;
> + else
> + rpc_in.timeout = msg->timeout;
> +
> + if (msg->num_dma) {
> + if (msg->num_dma > MAX_NUM_DMA_INDICATIONS) {
> + dev_err(dev, "DMA buffers exceed the number supported\n");
> + err = -EINVAL;
> + goto free_msg_out;
> + }
> +
> + dma_buf = kcalloc(msg->num_dma, sizeof(*dma_buf), GFP_KERNEL);
> + if (!dma_buf) {
> + err = -ENOMEM;
> + goto free_msg_out;
> + }
> +
> + if (copy_from_user(dma_buf, u64_to_user_ptr(msg->payload),
> + msg->num_dma * sizeof(*dma_buf))) {
Why not memdup_user() for this one?
> + dev_dbg(dev, "Failed to copy payload from user\n");
> + err = -EFAULT;
> + goto free_dmabuf_out;
> + }
> +
> + err = bnxt_fw_setup_input_dma(bnxtctl, dev, dma_buf, &rpc_in,
> + msg->num_dma, &dma_virt_addr[0],
> + &dma_addr[0]);
> + if (err)
> + goto free_dmabuf_out;
> + }
> +
> + rc = bnxt_send_msg(bnxt_aux_priv->edev, &rpc_in);
> + if (rc) {
> + struct output *resp = rpc_in.resp;
> +
> + /* Copy the response to user always, as it contains
> + * detailed status of the command failure
> + */
> + if (!resp->error_code)
> + /* bnxt_send_msg() returned much before FW
> + * received the command.
> + */
> + resp->error_code = rc;
> +
> + goto free_dma_out;
> + }
> +
> + for (i = 0; i < msg->num_dma; i++) {
> + if (dma_buf[i].dma_direction != DEVICE_READ)
> + continue;
> + if (copy_to_user(u64_to_user_ptr(dma_buf[i].data),
> + dma_virt_addr[i], dma_buf[i].len)) {
> + dev_dbg(dev, "Failed to copy resp to user\n");
> + err = -EFAULT;
> + break;
> + }
> + }
> +free_dma_out:
> + /* Cleanup any dma memory that bnxt_fw_setup_input_dma() may have
> + * allocated
> + */
> + for (i = 0; i < msg->num_dma; i++)
> + dma_free_coherent(dev->parent, dma_buf[i].len, dma_virt_addr[i],
> + dma_addr[i]);
I'd have a tear down function to undo what bnxt_fw_setup_input_dma()
does so that the two functions can be easily matched against each other.
> +free_dmabuf_out:
> + kfree(dma_buf);
> +free_msg_out:
> + kfree(rpc_in.msg);
Free in reverse order of allocation. Even if that means
a dance with multiple if (err).
Personally I'd probably split the good and bad paths so we can have a nice
clean normal path and avoid if (err) dance when we know it is always set.
The extra clarity would be worth duplicating a few lines.
Would also allow extra labels to avoid kfree() of things that haven't
been allocated (obviously that's safe but still not nice)
I think you could reorder the allocations to do rpc_in.resp first
in which case this style would work better.
> +
> + if (err) {
> + kfree(rpc_in.resp);
> + return ERR_PTR(err);
> + }
> +
> + return rpc_in.resp;
> +}
> +}
> diff --git a/include/uapi/fwctl/bnxt.h b/include/uapi/fwctl/bnxt.h
> new file mode 100644
> index 000000000000..5620812d839c
> --- /dev/null
> +++ b/include/uapi/fwctl/bnxt.h
> @@ -0,0 +1,64 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * Copyright (c) 2026, Broadcom Inc
> + */
> +
> +#ifndef _UAPI_FWCTL_BNXT_H_
> +#define _UAPI_FWCTL_BNXT_H_
> +
> +#include <linux/types.h>
> +
> +#define MAX_DMA_MEM_SIZE 0x10000 /*64K*/
> +#define DFLT_HWRM_CMD_TIMEOUT 500
> +#define DEVICE_WRITE 0
> +#define DEVICE_READ 1
Those are very generic names to stick in a uapi header. Prefix
them probably or maybe rename the structure field to something like
dma_to_device.
Also, write and read seem a bit oddly aligned with the use in a direction
field. Maybe FWCTL_BNXT_DIR_TO_DEVICE and DIR_FROM_DEVICE.
> +
> +enum fwctl_bnxt_commands {
> + FWCTL_BNXT_QUERY_COMMANDS = 0,
> + FWCTL_BNXT_SEND_COMMAND,
Seems like these need to have hardware /firmware matching numbers?
If so I'd make the explicit by assigning them all. Just assigning
0 kind of gives the wrong impression.
> +};
> +/**
> + * struct fwctl_dma_info_bnxt - describe the buffer that should be DMAed
> + * @data: DMA-intended buffer
As below, I'd call out that's an address. The type obscures that.
> + * @len: length of the @data
> + * @offset: offset at which FW (HWRM) input structure needs DMA address
> + * @dma_direction: DMA direction, DEVICE_READ or DEVICE_WRITE
> + * @unused: pad
> + */
> +struct fwctl_dma_info_bnxt {
> + __aligned_u64 data;
> + __u32 len;
> + __u16 offset;
> + __u8 dma_direction;
> + __u8 unused;
> +};
> +
> +/**
> + * struct fwctl_rpc_bnxt - describe the fwctl message for bnxt
> + * @req: FW (HWRM) command input structure
> + * @req_len: length of @req
> + * @timeout: if the user wants to override the driver's default, 0 otherwise
> + * @num_dma: number of DMA buffers to be added to @req
> + * @payload: DMA buffer details in struct fwctl_dma_info_bnxt format
That comment has me confused. If it's in that format, why not use that structure?
Also it doesn't fit which confused me even more!
Ah. I checked the usage. It's a pointer. Say that in the comment.
It's a bit of a pitty can't type it such that the pointer version of __counted_by
will work on payload. Never mind...
> + */
> +struct fwctl_rpc_bnxt {
> + __aligned_u64 req;
> + __u32 req_len;
> + __u32 timeout;
> + __u32 num_dma;
> + __aligned_u64 payload;
> +};
> +#endif
On Fri, Jan 30, 2026 at 12:03:57PM +0000, Jonathan Cameron wrote: > > --- a/drivers/fwctl/Makefile > > +++ b/drivers/fwctl/Makefile > > @@ -2,5 +2,6 @@ > > obj-$(CONFIG_FWCTL) += fwctl.o > > obj-$(CONFIG_FWCTL_MLX5) += mlx5/ > > obj-$(CONFIG_FWCTL_PDS) += pds/ > > +obj-$(CONFIG_FWCTL_BNXT) += bnxt/ > How are we ordering these? Alphabetical maybe? Yes, please sort all the things, including the kconfig Jason
© 2016 - 2026 Red Hat, Inc.