Add async event queue interfaces initialization.
It allows driver to handle async events reported by HW.
Co-developed-by: Xin Guo <guoxin09@huawei.com>
Signed-off-by: Xin Guo <guoxin09@huawei.com>
Co-developed-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Zhu Yikai <zhuyikai1@h-partners.com>
Signed-off-by: Fan Gong <gongfan1@huawei.com>
---
drivers/net/ethernet/huawei/hinic3/Makefile | 1 +
.../ethernet/huawei/hinic3/hinic3_common.c | 13 +
.../ethernet/huawei/hinic3/hinic3_common.h | 2 +
.../net/ethernet/huawei/hinic3/hinic3_csr.h | 65 +++
.../net/ethernet/huawei/hinic3/hinic3_eqs.c | 503 ++++++++++++++++++
.../net/ethernet/huawei/hinic3/hinic3_eqs.h | 91 ++++
.../ethernet/huawei/hinic3/hinic3_hw_cfg.c | 43 ++
.../net/ethernet/huawei/hinic3/hinic3_hwif.c | 28 +
.../net/ethernet/huawei/hinic3/hinic3_hwif.h | 5 +
9 files changed, 751 insertions(+)
diff --git a/drivers/net/ethernet/huawei/hinic3/Makefile b/drivers/net/ethernet/huawei/hinic3/Makefile
index 509dfbfb0e96..5fb4d1370049 100644
--- a/drivers/net/ethernet/huawei/hinic3/Makefile
+++ b/drivers/net/ethernet/huawei/hinic3/Makefile
@@ -4,6 +4,7 @@
obj-$(CONFIG_HINIC3) += hinic3.o
hinic3-objs := hinic3_common.o \
+ hinic3_eqs.o \
hinic3_hw_cfg.o \
hinic3_hw_comm.o \
hinic3_hwdev.o \
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
index 0aa42068728c..a5aaf6febba9 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c
@@ -51,3 +51,16 @@ void hinic3_dma_free_coherent_align(struct device *dev,
dma_free_coherent(dev, mem_align->real_size,
mem_align->ori_vaddr, mem_align->ori_paddr);
}
+
+/* Data provided to/by cmdq is arranged in structs with little endian fields but
+ * every dword (32bits) should be swapped since HW swaps it again when it
+ * copies it from/to host memory.
+ */
+void hinic3_cmdq_buf_swab32(void *data, int len)
+{
+ u32 *mem = data;
+ u32 i;
+
+ for (i = 0; i < len / sizeof(u32); i++)
+ mem[i] = swab32(mem[i]);
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
index bb795dace04c..c8e8a491adbf 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h
@@ -24,4 +24,6 @@ int hinic3_dma_zalloc_coherent_align(struct device *dev, u32 size, u32 align,
void hinic3_dma_free_coherent_align(struct device *dev,
struct hinic3_dma_addr_align *mem_align);
+void hinic3_cmdq_buf_swab32(void *data, int len);
+
#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h
new file mode 100644
index 000000000000..39e15fbf0ed7
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */
+
+#ifndef _HINIC3_CSR_H_
+#define _HINIC3_CSR_H_
+
+#define HINIC3_CFG_REGS_FLAG 0x40000000
+#define HINIC3_REGS_FLAG_MASK 0x3FFFFFFF
+
+#define HINIC3_VF_CFG_REG_OFFSET 0x2000
+
+/* HW interface registers */
+#define HINIC3_CSR_FUNC_ATTR0_ADDR (HINIC3_CFG_REGS_FLAG + 0x0)
+#define HINIC3_CSR_FUNC_ATTR1_ADDR (HINIC3_CFG_REGS_FLAG + 0x4)
+#define HINIC3_CSR_FUNC_ATTR2_ADDR (HINIC3_CFG_REGS_FLAG + 0x8)
+#define HINIC3_CSR_FUNC_ATTR3_ADDR (HINIC3_CFG_REGS_FLAG + 0xC)
+#define HINIC3_CSR_FUNC_ATTR4_ADDR (HINIC3_CFG_REGS_FLAG + 0x10)
+#define HINIC3_CSR_FUNC_ATTR5_ADDR (HINIC3_CFG_REGS_FLAG + 0x14)
+#define HINIC3_CSR_FUNC_ATTR6_ADDR (HINIC3_CFG_REGS_FLAG + 0x18)
+
+#define HINIC3_FUNC_CSR_MAILBOX_DATA_OFF 0x80
+#define HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF (HINIC3_CFG_REGS_FLAG + 0x0100)
+#define HINIC3_FUNC_CSR_MAILBOX_INT_OFF (HINIC3_CFG_REGS_FLAG + 0x0104)
+#define HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF (HINIC3_CFG_REGS_FLAG + 0x0108)
+#define HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF (HINIC3_CFG_REGS_FLAG + 0x010C)
+
+#define HINIC3_CSR_DMA_ATTR_TBL_ADDR (HINIC3_CFG_REGS_FLAG + 0x380)
+#define HINIC3_CSR_DMA_ATTR_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x390)
+
+/* MSI-X registers */
+#define HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR (HINIC3_CFG_REGS_FLAG + 0x58)
+
+#define HINIC3_MSI_CLR_INDIR_RESEND_TIMER_CLR_MASK BIT(0)
+#define HINIC3_MSI_CLR_INDIR_INT_MSK_SET_MASK BIT(1)
+#define HINIC3_MSI_CLR_INDIR_INT_MSK_CLR_MASK BIT(2)
+#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_SET_MASK BIT(3)
+#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_CLR_MASK BIT(4)
+#define HINIC3_MSI_CLR_INDIR_SIMPLE_INDIR_IDX_MASK GENMASK(31, 22)
+#define HINIC3_MSI_CLR_INDIR_SET(val, member) \
+ FIELD_PREP(HINIC3_MSI_CLR_INDIR_##member##_MASK, val)
+
+/* EQ registers */
+#define HINIC3_AEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x210)
+
+#define HINIC3_EQ_INDIR_IDX_ADDR(type) \
+ HINIC3_AEQ_INDIR_IDX_ADDR
+
+#define HINIC3_AEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x240)
+
+#define HINIC3_CSR_EQ_PAGE_OFF_STRIDE 8
+
+#define HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) \
+ (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \
+ HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC3_CSR_AEQ_CTRL_0_ADDR (HINIC3_CFG_REGS_FLAG + 0x200)
+#define HINIC3_CSR_AEQ_CTRL_1_ADDR (HINIC3_CFG_REGS_FLAG + 0x204)
+#define HINIC3_CSR_AEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x20C)
+#define HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x50)
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c
new file mode 100644
index 000000000000..5c1cb895b455
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+#include <linux/delay.h>
+
+#include "hinic3_csr.h"
+#include "hinic3_eqs.h"
+#include "hinic3_hwdev.h"
+#include "hinic3_hwif.h"
+#include "hinic3_mbox.h"
+
+#define AEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0)
+#define AEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12)
+#define AEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(22, 20)
+#define AEQ_CTRL_0_INTR_MODE_MASK BIT(31)
+#define AEQ_CTRL_0_SET(val, member) \
+ FIELD_PREP(AEQ_CTRL_0_##member##_MASK, val)
+
+#define AEQ_CTRL_1_LEN_MASK GENMASK(20, 0)
+#define AEQ_CTRL_1_ELEM_SIZE_MASK GENMASK(25, 24)
+#define AEQ_CTRL_1_PAGE_SIZE_MASK GENMASK(31, 28)
+#define AEQ_CTRL_1_SET(val, member) \
+ FIELD_PREP(AEQ_CTRL_1_##member##_MASK, val)
+
+#define EQ_ELEM_DESC_TYPE_MASK GENMASK(6, 0)
+#define EQ_ELEM_DESC_SRC_MASK BIT(7)
+#define EQ_ELEM_DESC_SIZE_MASK GENMASK(15, 8)
+#define EQ_ELEM_DESC_WRAPPED_MASK BIT(31)
+#define EQ_ELEM_DESC_GET(val, member) \
+ FIELD_GET(EQ_ELEM_DESC_##member##_MASK, val)
+
+#define EQ_CI_SIMPLE_INDIR_CI_MASK GENMASK(20, 0)
+#define EQ_CI_SIMPLE_INDIR_ARMED_MASK BIT(21)
+#define EQ_CI_SIMPLE_INDIR_AEQ_IDX_MASK GENMASK(31, 30)
+#define EQ_CI_SIMPLE_INDIR_SET(val, member) \
+ FIELD_PREP(EQ_CI_SIMPLE_INDIR_##member##_MASK, val)
+
+#define EQ_CI_SIMPLE_INDIR_REG_ADDR \
+ HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR
+
+#define EQ_PROD_IDX_REG_ADDR \
+ HINIC3_CSR_AEQ_PROD_IDX_ADDR
+
+#define EQ_HI_PHYS_ADDR_REG(type, pg_num) \
+ HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num)
+
+#define EQ_LO_PHYS_ADDR_REG(type, pg_num) \
+ HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num)
+
+#define EQ_MSIX_RESEND_TIMER_CLEAR 1
+
+#define HINIC3_EQ_MAX_PAGES \
+ HINIC3_AEQ_MAX_PAGES
+
+#define HINIC3_TASK_PROCESS_EQE_LIMIT 1024
+#define HINIC3_EQ_UPDATE_CI_STEP 64
+#define HINIC3_EQS_WQ_NAME "hinic3_eqs"
+
+#define HINIC3_EQ_VALID_SHIFT 31
+#define HINIC3_EQ_WRAPPED(eq) \
+ ((eq)->wrapped << HINIC3_EQ_VALID_SHIFT)
+
+#define HINIC3_EQ_WRAPPED_SHIFT 20
+#define HINIC3_EQ_CONS_IDX(eq) \
+ ((eq)->cons_idx | ((eq)->wrapped << HINIC3_EQ_WRAPPED_SHIFT))
+
+static const struct hinic3_aeq_elem *get_curr_aeq_elem(const struct hinic3_eq *eq)
+{
+ return get_q_element(&eq->qpages, eq->cons_idx, NULL);
+}
+
+int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event,
+ hinic3_aeq_event_cb hwe_cb)
+{
+ struct hinic3_aeqs *aeqs;
+ unsigned long *cb_state;
+
+ aeqs = hwdev->aeqs;
+ cb_state = &aeqs->aeq_cb_state[event];
+ aeqs->aeq_cb[event] = hwe_cb;
+ spin_lock_init(&aeqs->aeq_lock);
+
+ return 0;
+}
+
+void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event)
+{
+ struct hinic3_aeqs *aeqs;
+ unsigned long *cb_state;
+
+ aeqs = hwdev->aeqs;
+ cb_state = &aeqs->aeq_cb_state[event];
+
+ spin_lock_bh(&aeqs->aeq_lock);
+ aeqs->aeq_cb[event] = NULL;
+ spin_unlock_bh(&aeqs->aeq_lock);
+}
+
+/* Set consumer index in the hw. */
+static void set_eq_cons_idx(struct hinic3_eq *eq, u32 arm_state)
+{
+ u32 addr = EQ_CI_SIMPLE_INDIR_REG_ADDR;
+ u32 eq_wrap_ci, val;
+
+ eq_wrap_ci = HINIC3_EQ_CONS_IDX(eq);
+ val = EQ_CI_SIMPLE_INDIR_SET(arm_state, ARMED) |
+ EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) |
+ EQ_CI_SIMPLE_INDIR_SET(eq->q_id, AEQ_IDX);
+
+ hinic3_hwif_write_reg(eq->hwdev->hwif, addr, val);
+}
+
+static struct hinic3_aeqs *aeq_to_aeqs(const struct hinic3_eq *eq)
+{
+ return container_of(eq, struct hinic3_aeqs, aeq[eq->q_id]);
+}
+
+static void aeq_event_handler(struct hinic3_aeqs *aeqs, u32 aeqe,
+ const struct hinic3_aeq_elem *aeqe_pos)
+{
+ struct hinic3_hwdev *hwdev = aeqs->hwdev;
+ u8 data[HINIC3_AEQE_DATA_SIZE], size;
+ enum hinic3_aeq_type event;
+ hinic3_aeq_event_cb hwe_cb;
+ unsigned long *cb_state;
+
+ if (EQ_ELEM_DESC_GET(aeqe, SRC))
+ return;
+
+ event = EQ_ELEM_DESC_GET(aeqe, TYPE);
+ if (event >= HINIC3_MAX_AEQ_EVENTS) {
+ dev_warn(hwdev->dev, "Aeq unknown event:%d\n", event);
+ return;
+ }
+
+ memcpy(data, aeqe_pos->aeqe_data, HINIC3_AEQE_DATA_SIZE);
+ hinic3_cmdq_buf_swab32(data, HINIC3_AEQE_DATA_SIZE);
+ size = EQ_ELEM_DESC_GET(aeqe, SIZE);
+ cb_state = &aeqs->aeq_cb_state[event];
+
+ spin_lock_bh(&aeqs->aeq_lock);
+ hwe_cb = aeqs->aeq_cb[event];
+ if (hwe_cb)
+ hwe_cb(aeqs->hwdev, data, size);
+ spin_unlock_bh(&aeqs->aeq_lock);
+}
+
+static int aeq_irq_handler(struct hinic3_eq *eq)
+{
+ const struct hinic3_aeq_elem *aeqe_pos;
+ struct hinic3_aeqs *aeqs;
+ u32 i, eqe_cnt = 0;
+ u32 aeqe;
+
+ aeqs = aeq_to_aeqs(eq);
+ for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) {
+ aeqe_pos = get_curr_aeq_elem(eq);
+ aeqe = be32_to_cpu(aeqe_pos->desc);
+ /* HW updates wrapped bit, when it adds eq element event */
+ if (EQ_ELEM_DESC_GET(aeqe, WRAPPED) == eq->wrapped)
+ return 0;
+
+ /* Prevent speculative reads from element */
+ dma_rmb();
+ aeq_event_handler(aeqs, aeqe, aeqe_pos);
+ eq->cons_idx++;
+ if (eq->cons_idx == eq->eq_len) {
+ eq->cons_idx = 0;
+ eq->wrapped = !eq->wrapped;
+ }
+
+ if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) {
+ eqe_cnt = 0;
+ set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED);
+ }
+ }
+
+ return -EAGAIN;
+}
+
+static void reschedule_eq_handler(struct hinic3_eq *eq)
+{
+ struct hinic3_aeqs *aeqs = aeq_to_aeqs(eq);
+
+ queue_work(aeqs->workq, &eq->aeq_work);
+}
+
+static int eq_irq_handler(struct hinic3_eq *eq)
+{
+ int err;
+
+ err = aeq_irq_handler(eq);
+
+ set_eq_cons_idx(eq, err ? HINIC3_EQ_NOT_ARMED :
+ HINIC3_EQ_ARMED);
+
+ return err;
+}
+
+static void eq_irq_work(struct work_struct *work)
+{
+ struct hinic3_eq *eq = container_of(work, struct hinic3_eq, aeq_work);
+ int err;
+
+ err = eq_irq_handler(eq);
+ if (err)
+ reschedule_eq_handler(eq);
+}
+
+static irqreturn_t aeq_interrupt(int irq, void *data)
+{
+ struct workqueue_struct *workq;
+ struct hinic3_eq *aeq = data;
+ struct hinic3_hwdev *hwdev;
+ struct hinic3_aeqs *aeqs;
+
+ aeqs = aeq_to_aeqs(aeq);
+ hwdev = aeq->hwdev;
+
+ /* clear resend timer cnt register */
+ workq = aeqs->workq;
+ hinic3_msix_intr_clear_resend_bit(hwdev, aeq->msix_entry_idx,
+ EQ_MSIX_RESEND_TIMER_CLEAR);
+ queue_work(workq, &aeq->aeq_work);
+
+ return IRQ_HANDLED;
+}
+
+static int set_eq_ctrls(struct hinic3_eq *eq)
+{
+ struct hinic3_hwif *hwif = eq->hwdev->hwif;
+ struct hinic3_queue_pages *qpages;
+ u8 pci_intf_idx, elem_size;
+ u32 mask, ctrl0, ctrl1;
+ u32 page_size_val;
+
+ qpages = &eq->qpages;
+ page_size_val = ilog2(qpages->page_size / HINIC3_MIN_PAGE_SIZE);
+ pci_intf_idx = hwif->attr.pci_intf_idx;
+
+ /* set ctrl0 using read-modify-write */
+ mask = AEQ_CTRL_0_INTR_IDX_MASK |
+ AEQ_CTRL_0_DMA_ATTR_MASK |
+ AEQ_CTRL_0_PCI_INTF_IDX_MASK |
+ AEQ_CTRL_0_INTR_MODE_MASK;
+ ctrl0 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR);
+ ctrl0 = (ctrl0 & ~mask) |
+ AEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) |
+ AEQ_CTRL_0_SET(0, DMA_ATTR) |
+ AEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) |
+ AEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE);
+ hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR, ctrl0);
+
+ /* HW expects log2(number of 32 byte units). */
+ elem_size = qpages->elem_size_shift - 5;
+ ctrl1 = AEQ_CTRL_1_SET(eq->eq_len, LEN) |
+ AEQ_CTRL_1_SET(elem_size, ELEM_SIZE) |
+ AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
+ hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_1_ADDR, ctrl1);
+
+ return 0;
+}
+
+static void aeq_elements_init(struct hinic3_eq *eq, u32 init_val)
+{
+ struct hinic3_aeq_elem *aeqe;
+ u32 i;
+
+ for (i = 0; i < eq->eq_len; i++) {
+ aeqe = get_q_element(&eq->qpages, i, NULL);
+ aeqe->desc = cpu_to_be32(init_val);
+ }
+
+ wmb(); /* Clear aeq elements bit */
+}
+
+static void eq_elements_init(struct hinic3_eq *eq, u32 init_val)
+{
+ aeq_elements_init(eq, init_val);
+}
+
+static int alloc_eq_pages(struct hinic3_eq *eq)
+{
+ struct hinic3_hwif *hwif = eq->hwdev->hwif;
+ struct hinic3_queue_pages *qpages;
+ dma_addr_t page_paddr;
+ u32 reg, init_val;
+ u16 pg_idx;
+ int err;
+
+ qpages = &eq->qpages;
+ err = hinic3_queue_pages_alloc(eq->hwdev, qpages, HINIC3_MIN_PAGE_SIZE);
+ if (err)
+ return err;
+
+ for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) {
+ page_paddr = qpages->pages[pg_idx].align_paddr;
+ reg = EQ_HI_PHYS_ADDR_REG(eq->type, pg_idx);
+ hinic3_hwif_write_reg(hwif, reg, upper_32_bits(page_paddr));
+ reg = EQ_LO_PHYS_ADDR_REG(eq->type, pg_idx);
+ hinic3_hwif_write_reg(hwif, reg, lower_32_bits(page_paddr));
+ }
+
+ init_val = HINIC3_EQ_WRAPPED(eq);
+ eq_elements_init(eq, init_val);
+
+ return 0;
+}
+
+static void eq_calc_page_size_and_num(struct hinic3_eq *eq, u32 elem_size)
+{
+ u32 max_pages, min_page_size, page_size, total_size;
+
+ /* No need for complicated arithmetics. All values must be power of 2.
+ * Multiplications give power of 2 and divisions give power of 2 without
+ * remainder.
+ */
+ max_pages = HINIC3_EQ_MAX_PAGES;
+ min_page_size = HINIC3_MIN_PAGE_SIZE;
+ total_size = eq->eq_len * elem_size;
+
+ if (total_size <= max_pages * min_page_size)
+ page_size = min_page_size;
+ else
+ page_size = total_size / max_pages;
+
+ hinic3_queue_pages_init(&eq->qpages, eq->eq_len, page_size, elem_size);
+}
+
+static int request_eq_irq(struct hinic3_eq *eq)
+{
+ INIT_WORK(&eq->aeq_work, eq_irq_work);
+ snprintf(eq->irq_name, sizeof(eq->irq_name),
+ "hinic3_aeq%u@pci:%s", eq->q_id,
+ pci_name(eq->hwdev->pdev));
+
+ return request_irq(eq->irq_id, aeq_interrupt, 0,
+ eq->irq_name, eq);
+}
+
+static void reset_eq(struct hinic3_eq *eq)
+{
+ /* clear eq_len to force eqe drop in hardware */
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_CSR_AEQ_CTRL_1_ADDR, 0);
+
+ hinic3_hwif_write_reg(eq->hwdev->hwif, EQ_PROD_IDX_REG_ADDR, 0);
+}
+
+static int init_eq(struct hinic3_eq *eq, struct hinic3_hwdev *hwdev, u16 q_id,
+ u32 q_len, enum hinic3_eq_type type,
+ struct msix_entry *msix_entry)
+{
+ u32 elem_size;
+ int err;
+
+ eq->hwdev = hwdev;
+ eq->q_id = q_id;
+ eq->type = type;
+ eq->eq_len = q_len;
+
+ /* Indirect access should set q_id first */
+ hinic3_hwif_write_reg(hwdev->hwif, HINIC3_EQ_INDIR_IDX_ADDR(eq->type),
+ eq->q_id);
+
+ reset_eq(eq);
+
+ eq->cons_idx = 0;
+ eq->wrapped = 0;
+
+ elem_size = HINIC3_AEQE_SIZE;
+ eq_calc_page_size_and_num(eq, elem_size);
+
+ err = alloc_eq_pages(eq);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to allocate pages for eq\n");
+ return err;
+ }
+
+ eq->msix_entry_idx = msix_entry->entry;
+ eq->irq_id = msix_entry->vector;
+
+ err = set_eq_ctrls(eq);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to set ctrls for eq\n");
+ goto err_free_queue_pages;
+ }
+
+ set_eq_cons_idx(eq, HINIC3_EQ_ARMED);
+
+ err = request_eq_irq(eq);
+ if (err) {
+ dev_err(hwdev->dev,
+ "Failed to request irq for the eq, err: %d\n", err);
+ goto err_free_queue_pages;
+ }
+
+ hinic3_set_msix_state(hwdev, eq->msix_entry_idx, HINIC3_MSIX_DISABLE);
+
+ return 0;
+
+err_free_queue_pages:
+ hinic3_queue_pages_free(hwdev, &eq->qpages);
+
+ return err;
+}
+
+static void remove_eq(struct hinic3_eq *eq)
+{
+ hinic3_set_msix_state(eq->hwdev, eq->msix_entry_idx,
+ HINIC3_MSIX_DISABLE);
+ free_irq(eq->irq_id, eq);
+ /* Indirect access should set q_id first */
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_EQ_INDIR_IDX_ADDR(eq->type),
+ eq->q_id);
+
+ cancel_work_sync(&eq->aeq_work);
+ /* clear eq_len to avoid hw access host memory */
+ hinic3_hwif_write_reg(eq->hwdev->hwif,
+ HINIC3_CSR_AEQ_CTRL_1_ADDR, 0);
+
+ /* update consumer index to avoid invalid interrupt */
+ eq->cons_idx = hinic3_hwif_read_reg(eq->hwdev->hwif,
+ EQ_PROD_IDX_REG_ADDR);
+ set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED);
+ hinic3_queue_pages_free(eq->hwdev, &eq->qpages);
+}
+
+int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs,
+ struct msix_entry *msix_entries)
+{
+ struct hinic3_aeqs *aeqs;
+ u16 q_id;
+ int err;
+
+ aeqs = kzalloc(sizeof(*aeqs), GFP_KERNEL);
+ if (!aeqs)
+ return -ENOMEM;
+
+ hwdev->aeqs = aeqs;
+ aeqs->hwdev = hwdev;
+ aeqs->num_aeqs = num_aeqs;
+ aeqs->workq = alloc_workqueue(HINIC3_EQS_WQ_NAME, WQ_MEM_RECLAIM,
+ HINIC3_MAX_AEQS);
+ if (!aeqs->workq) {
+ dev_err(hwdev->dev, "Failed to initialize aeq workqueue\n");
+ err = -ENOMEM;
+ goto err_free_aeqs;
+ }
+
+ for (q_id = 0; q_id < num_aeqs; q_id++) {
+ err = init_eq(&aeqs->aeq[q_id], hwdev, q_id,
+ HINIC3_DEFAULT_AEQ_LEN, HINIC3_AEQ,
+ &msix_entries[q_id]);
+ if (err) {
+ dev_err(hwdev->dev, "Failed to init aeq %u\n",
+ q_id);
+ goto err_remove_eqs;
+ }
+ }
+ for (q_id = 0; q_id < num_aeqs; q_id++)
+ hinic3_set_msix_state(hwdev, aeqs->aeq[q_id].msix_entry_idx,
+ HINIC3_MSIX_ENABLE);
+
+ return 0;
+
+err_remove_eqs:
+ while (q_id > 0) {
+ q_id--;
+ remove_eq(&aeqs->aeq[q_id]);
+ }
+
+ destroy_workqueue(aeqs->workq);
+
+err_free_aeqs:
+ kfree(aeqs);
+
+ return err;
+}
+
+void hinic3_aeqs_free(struct hinic3_hwdev *hwdev)
+{
+ struct hinic3_aeqs *aeqs = hwdev->aeqs;
+ enum hinic3_aeq_type aeq_event;
+ struct hinic3_eq *eq;
+ u16 q_id;
+
+ for (q_id = 0; q_id < aeqs->num_aeqs; q_id++) {
+ eq = aeqs->aeq + q_id;
+ remove_eq(eq);
+ hinic3_free_irq(hwdev, eq->irq_id);
+ }
+
+ for (aeq_event = 0; aeq_event < HINIC3_MAX_AEQ_EVENTS; aeq_event++)
+ hinic3_aeq_unregister_cb(hwdev, aeq_event);
+
+ destroy_workqueue(aeqs->workq);
+
+ kfree(aeqs);
+}
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h
new file mode 100644
index 000000000000..a41b5cc9fed2
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */
+
+#ifndef _HINIC3_EQS_H_
+#define _HINIC3_EQS_H_
+
+#include <linux/interrupt.h>
+
+#include "hinic3_hw_cfg.h"
+#include "hinic3_queue_common.h"
+
+#define HINIC3_MAX_AEQS 4
+
+#define HINIC3_AEQ_MAX_PAGES 4
+
+#define HINIC3_AEQE_SIZE 64
+
+#define HINIC3_AEQE_DESC_SIZE 4
+#define HINIC3_AEQE_DATA_SIZE (HINIC3_AEQE_SIZE - HINIC3_AEQE_DESC_SIZE)
+
+#define HINIC3_DEFAULT_AEQ_LEN 0x10000
+
+#define HINIC3_EQ_IRQ_NAME_LEN 64
+
+#define HINIC3_EQ_USLEEP_LOW_BOUND 900
+#define HINIC3_EQ_USLEEP_HIGH_BOUND 1000
+
+enum hinic3_eq_type {
+ HINIC3_AEQ = 0,
+};
+
+enum hinic3_eq_intr_mode {
+ HINIC3_INTR_MODE_ARMED = 0,
+ HINIC3_INTR_MODE_ALWAYS = 1,
+};
+
+enum hinic3_eq_ci_arm_state {
+ HINIC3_EQ_NOT_ARMED = 0,
+ HINIC3_EQ_ARMED = 1,
+};
+
+struct hinic3_eq {
+ struct hinic3_hwdev *hwdev;
+ struct hinic3_queue_pages qpages;
+ u16 q_id;
+ enum hinic3_eq_type type;
+ u32 eq_len;
+ u32 cons_idx;
+ u8 wrapped;
+ u32 irq_id;
+ u16 msix_entry_idx;
+ char irq_name[HINIC3_EQ_IRQ_NAME_LEN];
+ struct work_struct aeq_work;
+};
+
+struct hinic3_aeq_elem {
+ u8 aeqe_data[HINIC3_AEQE_DATA_SIZE];
+ __be32 desc;
+};
+
+enum hinic3_aeq_type {
+ HINIC3_HW_INTER_INT = 0,
+ HINIC3_MBX_FROM_FUNC = 1,
+ HINIC3_MSG_FROM_FW = 2,
+ HINIC3_MAX_AEQ_EVENTS = 6,
+};
+
+typedef void (*hinic3_aeq_event_cb)(struct hinic3_hwdev *hwdev, u8 *data,
+ u8 size);
+
+struct hinic3_aeqs {
+ struct hinic3_hwdev *hwdev;
+ hinic3_aeq_event_cb aeq_cb[HINIC3_MAX_AEQ_EVENTS];
+ unsigned long aeq_cb_state[HINIC3_MAX_AEQ_EVENTS];
+ struct hinic3_eq aeq[HINIC3_MAX_AEQS];
+ u16 num_aeqs;
+ struct workqueue_struct *workq;
+ /* lock for aeq event flag */
+ spinlock_t aeq_lock;
+};
+
+int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs,
+ struct msix_entry *msix_entries);
+void hinic3_aeqs_free(struct hinic3_hwdev *hwdev);
+int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event,
+ hinic3_aeq_event_cb hwe_cb);
+void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev,
+ enum hinic3_aeq_type event);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
index 87d9450c30ca..0599fc4f3fb0 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c
@@ -8,6 +8,49 @@
#include "hinic3_hwif.h"
#include "hinic3_mbox.h"
+int hinic3_alloc_irqs(struct hinic3_hwdev *hwdev, u16 num,
+ struct msix_entry *alloc_arr, u16 *act_num)
+{
+ struct hinic3_irq_info *irq_info;
+ struct hinic3_irq *curr;
+ u16 i, found = 0;
+
+ irq_info = &hwdev->cfg_mgmt->irq_info;
+ mutex_lock(&irq_info->irq_mutex);
+ for (i = 0; i < irq_info->num_irq && found < num; i++) {
+ curr = irq_info->irq + i;
+ if (curr->allocated)
+ continue;
+ curr->allocated = true;
+ alloc_arr[found].vector = curr->irq_id;
+ alloc_arr[found].entry = curr->msix_entry_idx;
+ found++;
+ }
+ mutex_unlock(&irq_info->irq_mutex);
+
+ *act_num = found;
+
+ return found == 0 ? -ENOMEM : 0;
+}
+
+void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id)
+{
+ struct hinic3_irq_info *irq_info;
+ struct hinic3_irq *curr;
+ u16 i;
+
+ irq_info = &hwdev->cfg_mgmt->irq_info;
+ mutex_lock(&irq_info->irq_mutex);
+ for (i = 0; i < irq_info->num_irq; i++) {
+ curr = irq_info->irq + i;
+ if (curr->irq_id == irq_id) {
+ curr->allocated = false;
+ break;
+ }
+ }
+ mutex_unlock(&irq_info->irq_mutex);
+}
+
bool hinic3_support_nic(struct hinic3_hwdev *hwdev)
{
return hwdev->cfg_mgmt->cap.supp_svcs_bitmap &
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
index 0865453bf0e7..27baa9693d20 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c
@@ -6,15 +6,43 @@
#include <linux/io.h>
#include "hinic3_common.h"
+#include "hinic3_csr.h"
#include "hinic3_hwdev.h"
#include "hinic3_hwif.h"
+#define HINIC3_GET_REG_ADDR(reg) ((reg) & (HINIC3_REGS_FLAG_MASK))
+
+static void __iomem *hinic3_reg_addr(struct hinic3_hwif *hwif, u32 reg)
+{
+ return hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg);
+}
+
+u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg)
+{
+ void __iomem *addr = hinic3_reg_addr(hwif, reg);
+
+ return ioread32be(addr);
+}
+
+void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val)
+{
+ void __iomem *addr = hinic3_reg_addr(hwif, reg);
+
+ iowrite32be(val, addr);
+}
+
void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
enum hinic3_msix_state flag)
{
/* Completed by later submission due to LoC limit. */
}
+void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ u8 clear_resend_en)
+{
+ /* Completed by later submission due to LoC limit. */
+}
+
u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev)
{
return hwdev->hwif->attr.func_global_idx;
diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
index 513c9680e6b6..2e300fb0ba25 100644
--- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
+++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h
@@ -50,8 +50,13 @@ enum hinic3_msix_state {
HINIC3_MSIX_DISABLE,
};
+u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg);
+void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val);
+
void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx,
enum hinic3_msix_state flag);
+void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx,
+ u8 clear_resend_en);
u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev);
--
2.43.0
Hi Fan, kernel test robot noticed the following build warnings: [auto build test WARNING on 5e95c0a3a55aea490420bd6994805edb050cc86b] url: https://github.com/intel-lab-lkp/linux/commits/Fan-Gong/hinic3-Async-Event-Queue-interfaces/20250722-152434 base: 5e95c0a3a55aea490420bd6994805edb050cc86b patch link: https://lore.kernel.org/r/bea50c6c329c5ffb77cfe059e07eeed187619346.1753152592.git.zhuyikai1%40h-partners.com patch subject: [PATCH net-next v10 1/8] hinic3: Async Event Queue interfaces config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250723/202507231813.7ti3bdJj-lkp@intel.com/config) compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250723/202507231813.7ti3bdJj-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507231813.7ti3bdJj-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c:77:17: warning: variable 'cb_state' set but not used [-Wunused-but-set-variable] 77 | unsigned long *cb_state; | ^ drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c:91:17: warning: variable 'cb_state' set but not used [-Wunused-but-set-variable] 91 | unsigned long *cb_state; | ^ drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c:127:17: warning: variable 'cb_state' set but not used [-Wunused-but-set-variable] 127 | unsigned long *cb_state; | ^ 3 warnings generated. vim +/cb_state +77 drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c 71 72 int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, 73 enum hinic3_aeq_type event, 74 hinic3_aeq_event_cb hwe_cb) 75 { 76 struct hinic3_aeqs *aeqs; > 77 unsigned long *cb_state; 78 79 aeqs = hwdev->aeqs; 80 cb_state = &aeqs->aeq_cb_state[event]; 81 aeqs->aeq_cb[event] = hwe_cb; 82 spin_lock_init(&aeqs->aeq_lock); 83 84 return 0; 85 } 86 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Tue, Jul 22, 2025 at 03:18:40PM +0800, Fan Gong wrote: > Add async event queue interfaces initialization. > It allows driver to handle async events reported by HW. > > Co-developed-by: Xin Guo <guoxin09@huawei.com> > Signed-off-by: Xin Guo <guoxin09@huawei.com> > Co-developed-by: Zhu Yikai <zhuyikai1@h-partners.com> > Signed-off-by: Zhu Yikai <zhuyikai1@h-partners.com> > Signed-off-by: Fan Gong <gongfan1@huawei.com> ... > diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c > index 0aa42068728c..a5aaf6febba9 100644 > --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c > +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c > @@ -51,3 +51,16 @@ void hinic3_dma_free_coherent_align(struct device *dev, > dma_free_coherent(dev, mem_align->real_size, > mem_align->ori_vaddr, mem_align->ori_paddr); > } > + > +/* Data provided to/by cmdq is arranged in structs with little endian fields but > + * every dword (32bits) should be swapped since HW swaps it again when it > + * copies it from/to host memory. > + */ This scheme may work on little endian hosts. But if so it seems unlikely to work on big endian hosts. I expect you want be32_to_cpu_array() for data coming from hw, with a source buffer as an array of __be32 while the destination buffer is an array of u32. And cpu_to_be32_array() for data going to the hw, with the types of the source and destination buffers reversed. If those types don't match your data, then we have a framework to have that discussion. That said, it is more usual for drivers to keep structures in the byte order they are received. Stored in structures with members with types, in this case it seems that would be __be32, and accessed using a combination of BIT/GENMASK, FIELD_PREP/FIELD_GET, and cpu_to_be*/be*_to_cpu (in this case cpu_to_be32/be32_to_cpu). An advantage of this approach is that the byte order of data is only changed when needed. Another is that it is clear what the byte order of data is. > +void hinic3_cmdq_buf_swab32(void *data, int len) > +{ > + u32 *mem = data; > + u32 i; > + > + for (i = 0; i < len / sizeof(u32); i++) > + mem[i] = swab32(mem[i]); > +} This seems to open code swab32_array(). ...
> > + > > +/* Data provided to/by cmdq is arranged in structs with little endian fields but > > + * every dword (32bits) should be swapped since HW swaps it again when it > > + * copies it from/to host memory. > > + */ > > This scheme may work on little endian hosts. > But if so it seems unlikely to work on big endian hosts. > > I expect you want be32_to_cpu_array() for data coming from hw, > with a source buffer as an array of __be32 while > the destination buffer is an array of u32. > > And cpu_to_be32_array() for data going to the hw, > with the types of the source and destination buffers reversed. > > If those types don't match your data, then we have > a framework to have that discussion. > > > That said, it is more usual for drivers to keep structures in the byte > order they are received. Stored in structures with members with types, in > this case it seems that would be __be32, and accessed using a combination > of BIT/GENMASK, FIELD_PREP/FIELD_GET, and cpu_to_be*/be*_to_cpu (in this > case cpu_to_be32/be32_to_cpu). > > An advantage of this approach is that the byte order of > data is only changed when needed. Another is that it is clear > what the byte order of data is. There is a simplified example: Here is a 64 bit little endian that may appear in cmdq: __le64 x After the swap it will become: __be32 x_lo __be32 x_hi This is NOT __be64. __be64 looks like this: __be32 x_hi __be32 x_lo So the swapped data by HW is neither BE or LE. In this case, we should use swab32 to obtain the correct LE data because our driver currently supports LE. This is for compensating for bad HW decisions. > > +void hinic3_cmdq_buf_swab32(void *data, int len) > > +{ > > + u32 *mem = data; > > + u32 i; > > + > > + for (i = 0; i < len / sizeof(u32); i++) > > + mem[i] = swab32(mem[i]); > > +} > > This seems to open code swab32_array(). We will use swab32_array in next patch. Besides, we will use LE for cmdq structs to avoid confusion and enhance readability.
On Thu, Jul 24, 2025 at 09:45:51PM +0800, Fan Gong wrote: > > > + > > > +/* Data provided to/by cmdq is arranged in structs with little endian fields but > > > + * every dword (32bits) should be swapped since HW swaps it again when it > > > + * copies it from/to host memory. > > > + */ > > > > This scheme may work on little endian hosts. > > But if so it seems unlikely to work on big endian hosts. > > > > I expect you want be32_to_cpu_array() for data coming from hw, > > with a source buffer as an array of __be32 while > > the destination buffer is an array of u32. > > > > And cpu_to_be32_array() for data going to the hw, > > with the types of the source and destination buffers reversed. > > > > If those types don't match your data, then we have > > a framework to have that discussion. > > > > > > That said, it is more usual for drivers to keep structures in the byte > > order they are received. Stored in structures with members with types, in > > this case it seems that would be __be32, and accessed using a combination > > of BIT/GENMASK, FIELD_PREP/FIELD_GET, and cpu_to_be*/be*_to_cpu (in this > > case cpu_to_be32/be32_to_cpu). > > > > An advantage of this approach is that the byte order of > > data is only changed when needed. Another is that it is clear > > what the byte order of data is. > > There is a simplified example: > > Here is a 64 bit little endian that may appear in cmdq: > __le64 x > After the swap it will become: > __be32 x_lo > __be32 x_hi > This is NOT __be64. > __be64 looks like this: > __be32 x_hi > __be32 x_lo Sure, byte swapping 64 bit entities is different to byte swapping two consecutive 32 bit entities. I completely agree. > > So the swapped data by HW is neither BE or LE. In this case, we should use > swab32 to obtain the correct LE data because our driver currently supports LE. > This is for compensating for bad HW decisions. Let us assume that the host is reading data provided by HW. If the swab32 approach works on a little endian host to allow the host to access 32-bit values in host byte order. Then this is because it outputs a 32-bit little endian values. But, given the same input, it will not work on a big endian host. This is because the same little endian output will be produced, while the host byte order is big endian. I think you need something based on be32_to_cpu()/cpu_to_be32(). This will effectively be swab32 on little endian hosts (no change!). And a no-op on big endian hosts (addressing my point above). More specifically, I think you should use be32_to_cpu_array() and cpu_to_be32_array() instead of swab32_array(). > > > > +void hinic3_cmdq_buf_swab32(void *data, int len) > > > +{ > > > + u32 *mem = data; > > > + u32 i; > > > + > > > + for (i = 0; i < len / sizeof(u32); i++) > > > + mem[i] = swab32(mem[i]); > > > +} > > > > This seems to open code swab32_array(). > > We will use swab32_array in next patch. > Besides, we will use LE for cmdq structs to avoid confusion and enhance > readability. Thanks.
> On Thu, Jul 24, 2025 at 09:45:51PM +0800, Fan Gong wrote: > > > > + > > > > +/* Data provided to/by cmdq is arranged in structs with little endian fields but > > > > + * every dword (32bits) should be swapped since HW swaps it again when it > > > > + * copies it from/to host memory. > > > > + */ > > > > > > This scheme may work on little endian hosts. > > > But if so it seems unlikely to work on big endian hosts. > > > > > > I expect you want be32_to_cpu_array() for data coming from hw, > > > with a source buffer as an array of __be32 while > > > the destination buffer is an array of u32. > > > > > > And cpu_to_be32_array() for data going to the hw, > > > with the types of the source and destination buffers reversed. > > > > > > If those types don't match your data, then we have > > > a framework to have that discussion. > > > > > > > > > That said, it is more usual for drivers to keep structures in the byte > > > order they are received. Stored in structures with members with types, in > > > this case it seems that would be __be32, and accessed using a combination > > > of BIT/GENMASK, FIELD_PREP/FIELD_GET, and cpu_to_be*/be*_to_cpu (in this > > > case cpu_to_be32/be32_to_cpu). > > > > > > An advantage of this approach is that the byte order of > > > data is only changed when needed. Another is that it is clear > > > what the byte order of data is. > > > > There is a simplified example: > > > > Here is a 64 bit little endian that may appear in cmdq: > > __le64 x > > After the swap it will become: > > __be32 x_lo > > __be32 x_hi > > This is NOT __be64. > > __be64 looks like this: > > __be32 x_hi > > __be32 x_lo > > Sure, byte swapping 64 bit entities is different to byte swapping two > consecutive 32 bit entities. I completely agree. > > > > > So the swapped data by HW is neither BE or LE. In this case, we should use > > swab32 to obtain the correct LE data because our driver currently supports LE. > > This is for compensating for bad HW decisions. > > Let us assume that the host is reading data provided by HW. > > If the swab32 approach works on a little endian host > to allow the host to access 32-bit values in host byte order. > Then this is because it outputs a 32-bit little endian values Values can be any size. 32 bit is arbitrary. . > > But, given the same input, it will not work on a big endian host. > This is because the same little endian output will be produced, > while the host byte order is big endian. > > I think you need something based on be32_to_cpu()/cpu_to_be32(). > This will effectively be swab32 on little endian hosts (no change!). > And a no-op on big endian hosts (addressing my point above). > > More specifically, I think you should use be32_to_cpu_array() and > cpu_to_be32_array() instead of swab32_array(). > Lets define a "coherent struct" as a structure made of fields that makes sense to human beings. Every field endianity is defined and fields are arranged in order that "makes sense". Fields can be of any integer size 8,16,32,64 and not necessarily naturally aligned. swab32_array transforms a coherent struct into "byte jumble". Small fields are reordered and larger (misaligned) fields may be split into 2 (or even 3) parts. swab32_array is reversible so a 2nd call with byte jumble as input will produce the original coherent struct. hinic3 dma has "swab32_array" built in. On send-to-device it expects a byte jubmle so the DMA engine will transform it into a coherent struct. On receive-from-device it provides a byte jumble so the driver needs to call swab32_array to transform it into a coherent struct. The hinic3_cmdq_buf_swab32 function will work correctly, producing byte jumble, on little endian and big endian hosts. The code that runs prior to hinic3_cmdq_buf_swab32 that initializes a coherent struct is endianity sensitive. It needs to initialize fields based on their coherent endianity with or without byte swap. Practically use cpu_to_le or cpu_to_be based on the coherent definition. Specifically, cmdq "coherent structs" in hinic3 use little endian and since Kconfig currently declares that big endian hosts are not supported then coherent structs are initialized without explicit cpu_to_le macros. And this is what the comment says: /* Data provided to/by cmdq is arranged in structs with little endian fields but * every dword (32bits) should be swapped since HW swaps it again when it * copies it from/to host memory. */
On Thu, Jul 31, 2025 at 03:58:39PM +0300, Gur Stavi wrote: > > On Thu, Jul 24, 2025 at 09:45:51PM +0800, Fan Gong wrote: > > > > > + > > > > > +/* Data provided to/by cmdq is arranged in structs with little endian fields but > > > > > + * every dword (32bits) should be swapped since HW swaps it again when it > > > > > + * copies it from/to host memory. > > > > > + */ > > > > > > > > This scheme may work on little endian hosts. > > > > But if so it seems unlikely to work on big endian hosts. > > > > > > > > I expect you want be32_to_cpu_array() for data coming from hw, > > > > with a source buffer as an array of __be32 while > > > > the destination buffer is an array of u32. > > > > > > > > And cpu_to_be32_array() for data going to the hw, > > > > with the types of the source and destination buffers reversed. > > > > > > > > If those types don't match your data, then we have > > > > a framework to have that discussion. > > > > > > > > > > > > That said, it is more usual for drivers to keep structures in the byte > > > > order they are received. Stored in structures with members with types, in > > > > this case it seems that would be __be32, and accessed using a combination > > > > of BIT/GENMASK, FIELD_PREP/FIELD_GET, and cpu_to_be*/be*_to_cpu (in this > > > > case cpu_to_be32/be32_to_cpu). > > > > > > > > An advantage of this approach is that the byte order of > > > > data is only changed when needed. Another is that it is clear > > > > what the byte order of data is. > > > > > > There is a simplified example: > > > > > > Here is a 64 bit little endian that may appear in cmdq: > > > __le64 x > > > After the swap it will become: > > > __be32 x_lo > > > __be32 x_hi > > > This is NOT __be64. > > > __be64 looks like this: > > > __be32 x_hi > > > __be32 x_lo > > > > Sure, byte swapping 64 bit entities is different to byte swapping two > > consecutive 32 bit entities. I completely agree. > > > > > > > > So the swapped data by HW is neither BE or LE. In this case, we should use > > > swab32 to obtain the correct LE data because our driver currently supports LE. > > > This is for compensating for bad HW decisions. > > > > Let us assume that the host is reading data provided by HW. > > > > If the swab32 approach works on a little endian host > > to allow the host to access 32-bit values in host byte order. > > Then this is because it outputs a 32-bit little endian values > > Values can be any size. 32 bit is arbitrary. > . > > > > But, given the same input, it will not work on a big endian host. > > This is because the same little endian output will be produced, > > while the host byte order is big endian. > > > > I think you need something based on be32_to_cpu()/cpu_to_be32(). > > This will effectively be swab32 on little endian hosts (no change!). > > And a no-op on big endian hosts (addressing my point above). > > > > More specifically, I think you should use be32_to_cpu_array() and > > cpu_to_be32_array() instead of swab32_array(). > > > > Lets define a "coherent struct" as a structure made of fields that makes sense > to human beings. Every field endianity is defined and fields are arranged in > order that "makes sense". Fields can be of any integer size 8,16,32,64 and not > necessarily naturally aligned. > > swab32_array transforms a coherent struct into "byte jumble". Small fields are > reordered and larger (misaligned) fields may be split into 2 (or even 3) parts. > swab32_array is reversible so a 2nd call with byte jumble as input will produce > the original coherent struct. > > hinic3 dma has "swab32_array" built in. > On send-to-device it expects a byte jubmle so the DMA engine will transform it > into a coherent struct. > On receive-from-device it provides a byte jumble so the driver needs > to call swab32_array to transform it into a coherent struct. > > The hinic3_cmdq_buf_swab32 function will work correctly, producing byte jumble, > on little endian and big endian hosts. > > The code that runs prior to hinic3_cmdq_buf_swab32 that initializes a coherent > struct is endianity sensitive. It needs to initialize fields based on their > coherent endianity with or without byte swap. Practically use cpu_to_le or > cpu_to_be based on the coherent definition. > > Specifically, cmdq "coherent structs" in hinic3 use little endian and since > Kconfig currently declares that big endian hosts are not supported then > coherent structs are initialized without explicit cpu_to_le macros. > > And this is what the comment says: > > /* Data provided to/by cmdq is arranged in structs with little endian fields but > * every dword (32bits) should be swapped since HW swaps it again when it > * copies it from/to host memory. > */ > Thanks, I think I am closer to understanding things now. Let me try and express things in my own words: 1. On the hardware side, things are stored in a way that may be represented as structures with little-endian values. The members of the structures may have different sizes: 8-bit, 16-bit, 32-bit, ... 2. The hardware runs the equivalent of swab32_array() over this data when writing it to (or reading it from) the host. So we get a "byte jumble". 3. In this patch, the hinic3_cmdq_buf_swab32 reverses this jumbling by running he equivalent of swab32_array() over this data again. As 3 exactly reverses 2, what is left are structures exactly as in 1. If so, I agree this makes sense and I am sorry for missing this before. And if so, is the intention for the cmdq "coherent structs" in the driver to look something like this. struct { u8 a; u8 b; __le16 c; __le32 d; }; If so, this seems sensible to me. But I think it would be best so include some code in this patchset that makes use of such structures - sorry if it is there, I couldn't find it just now. And, although there is no intention for the driver to run on big endian systems, the __le* fields should be accessed using cpu_to_le*/le*_to_cpu helpers.
> On Thu, Jul 31, 2025 at 03:58:39PM +0300, Gur Stavi wrote: > > > > Lets define a "coherent struct" as a structure made of fields that makes sense > > to human beings. Every field endianity is defined and fields are arranged in > > order that "makes sense". Fields can be of any integer size 8,16,32,64 and not > > necessarily naturally aligned. > > > > swab32_array transforms a coherent struct into "byte jumble". Small fields are > > reordered and larger (misaligned) fields may be split into 2 (or even 3) parts. > > swab32_array is reversible so a 2nd call with byte jumble as input will produce > > the original coherent struct. > > > > hinic3 dma has "swab32_array" built in. > > On send-to-device it expects a byte jubmle so the DMA engine will transform it > > into a coherent struct. > > On receive-from-device it provides a byte jumble so the driver needs > > to call swab32_array to transform it into a coherent struct. > > > > The hinic3_cmdq_buf_swab32 function will work correctly, producing byte jumble, > > on little endian and big endian hosts. > > > > The code that runs prior to hinic3_cmdq_buf_swab32 that initializes a coherent > > struct is endianity sensitive. It needs to initialize fields based on their > > coherent endianity with or without byte swap. Practically use cpu_to_le or > > cpu_to_be based on the coherent definition. > > > > Specifically, cmdq "coherent structs" in hinic3 use little endian and since > > Kconfig currently declares that big endian hosts are not supported then > > coherent structs are initialized without explicit cpu_to_le macros. > > > > And this is what the comment says: > > > > /* Data provided to/by cmdq is arranged in structs with little endian fields but > > * every dword (32bits) should be swapped since HW swaps it again when it > > * copies it from/to host memory. > > */ > > > > Thanks, I think I am closer to understanding things now. > > Let me try and express things in my own words: > > 1. On the hardware side, things are stored in a way that may be represented > as structures with little-endian values. The members of the structures may > have different sizes: 8-bit, 16-bit, 32-bit, ... > > 2. The hardware runs the equivalent of swab32_array() over this data > when writing it to (or reading it from) the host. So we get a > "byte jumble". > > 3. In this patch, the hinic3_cmdq_buf_swab32 reverses this jumbling > by running he equivalent of swab32_array() over this data again. > > As 3 exactly reverses 2, what is left are structures exactly as in 1. > Yes. Your understanding matches mine. > If so, I agree this makes sense and I am sorry for missing this before. > > And if so, is the intention for the cmdq "coherent structs" in the driver > to look something like this. > > struct { > u8 a; > u8 b; > __le16 c; > __le32 d; > }; > > If so, this seems sensible to me. > > But I think it would be best so include some code in this patchset > that makes use of such structures - sorry if it is there, I couldn't find > it just now. > > And, although there is no intention for the driver to run on big endian > systems, the __le* fields should be accessed using cpu_to_le*/le*_to_cpu > helpers. There was a long and somewhat heated debate about this issue. https://lore.kernel.org/netdev/20241230192326.384fd21d@kernel.org/ I agree that having __le in the code is better coding practice. But flooding the code with cpu_to_le and le_to_cpu does hurt readability. And there are precedences of drivers that avoid it. However, our dev team (I am mostly an advisor) decided to give it a try anyway. I hope they manage to survive it.
On Thu, Jul 31, 2025 at 09:34:20PM +0300, Gur Stavi wrote: > > On Thu, Jul 31, 2025 at 03:58:39PM +0300, Gur Stavi wrote: ... > > Thanks, I think I am closer to understanding things now. > > > > Let me try and express things in my own words: > > > > 1. On the hardware side, things are stored in a way that may be represented > > as structures with little-endian values. The members of the structures may > > have different sizes: 8-bit, 16-bit, 32-bit, ... > > > > 2. The hardware runs the equivalent of swab32_array() over this data > > when writing it to (or reading it from) the host. So we get a > > "byte jumble". > > > > 3. In this patch, the hinic3_cmdq_buf_swab32 reverses this jumbling > > by running he equivalent of swab32_array() over this data again. > > > > As 3 exactly reverses 2, what is left are structures exactly as in 1. > > > > Yes. Your understanding matches mine. Great. Sorry for taking a while to get there. > > If so, I agree this makes sense and I am sorry for missing this before. > > > > And if so, is the intention for the cmdq "coherent structs" in the driver > > to look something like this. > > > > struct { > > u8 a; > > u8 b; > > __le16 c; > > __le32 d; > > }; > > > > If so, this seems sensible to me. > > > > But I think it would be best so include some code in this patchset > > that makes use of such structures - sorry if it is there, I couldn't find > > it just now. > > > > And, although there is no intention for the driver to run on big endian > > systems, the __le* fields should be accessed using cpu_to_le*/le*_to_cpu > > helpers. > > There was a long and somewhat heated debate about this issue. > https://lore.kernel.org/netdev/20241230192326.384fd21d@kernel.org/ > I agree that having __le in the code is better coding practice. > But flooding the code with cpu_to_le and le_to_cpu does hurt readability. > And there are precedences of drivers that avoid it. > > However, our dev team (I am mostly an advisor) decided to give it a try anyway. > I hope they manage to survive it. Thanks, I appreciate it. I look forward to reviewing what they come up with.
> > > > So the swapped data by HW is neither BE or LE. In this case, we should use > > swab32 to obtain the correct LE data because our driver currently supports LE. > > This is for compensating for bad HW decisions. > > Let us assume that the host is reading data provided by HW. > > If the swab32 approach works on a little endian host > to allow the host to access 32-bit values in host byte order. > Then this is because it outputs a 32-bit little endian values. > > But, given the same input, it will not work on a big endian host. > This is because the same little endian output will be produced, > while the host byte order is big endian. > > I think you need something based on be32_to_cpu()/cpu_to_be32(). > This will effectively be swab32 on little endian hosts (no change!). > And a no-op on big endian hosts (addressing my point above). > > More specifically, I think you should use be32_to_cpu_array() and > cpu_to_be32_array() instead of swab32_array(). Thanks. We'll take your suggestion.
On Thu, Jul 31, 2025 at 06:49:34PM +0800, Fan Gong wrote: > > > > > > So the swapped data by HW is neither BE or LE. In this case, we should use > > > swab32 to obtain the correct LE data because our driver currently supports LE. > > > This is for compensating for bad HW decisions. > > > > Let us assume that the host is reading data provided by HW. > > > > If the swab32 approach works on a little endian host > > to allow the host to access 32-bit values in host byte order. > > Then this is because it outputs a 32-bit little endian values. > > > > But, given the same input, it will not work on a big endian host. > > This is because the same little endian output will be produced, > > while the host byte order is big endian. > > > > I think you need something based on be32_to_cpu()/cpu_to_be32(). > > This will effectively be swab32 on little endian hosts (no change!). > > And a no-op on big endian hosts (addressing my point above). > > > > More specifically, I think you should use be32_to_cpu_array() and > > cpu_to_be32_array() instead of swab32_array(). > > Thanks. We'll take your suggestion. Thanks, I really appreciate that.
On Thu, Jul 31, 2025 at 02:39:25PM +0100, Simon Horman wrote: > On Thu, Jul 31, 2025 at 06:49:34PM +0800, Fan Gong wrote: > > > > > > > > So the swapped data by HW is neither BE or LE. In this case, we should use > > > > swab32 to obtain the correct LE data because our driver currently supports LE. > > > > This is for compensating for bad HW decisions. > > > > > > Let us assume that the host is reading data provided by HW. > > > > > > If the swab32 approach works on a little endian host > > > to allow the host to access 32-bit values in host byte order. > > > Then this is because it outputs a 32-bit little endian values. > > > > > > But, given the same input, it will not work on a big endian host. > > > This is because the same little endian output will be produced, > > > while the host byte order is big endian. > > > > > > I think you need something based on be32_to_cpu()/cpu_to_be32(). > > > This will effectively be swab32 on little endian hosts (no change!). > > > And a no-op on big endian hosts (addressing my point above). > > > > > > More specifically, I think you should use be32_to_cpu_array() and > > > cpu_to_be32_array() instead of swab32_array(). > > > > Thanks. We'll take your suggestion. > > Thanks, I really appreciate that. Sorry, I missed Gur's related email before responding. It seems that conversation now supersedes this one. [1]https://lore.kernel.org/netdev/20250731125839.1137083-1-gur.stavi@huawei.com/
On Tue, Jul 22, 2025 at 03:18:40PM +0800, Fan Gong wrote: > Add async event queue interfaces initialization. > It allows driver to handle async events reported by HW. > > Co-developed-by: Xin Guo <guoxin09@huawei.com> > Signed-off-by: Xin Guo <guoxin09@huawei.com> > Co-developed-by: Zhu Yikai <zhuyikai1@h-partners.com> > Signed-off-by: Zhu Yikai <zhuyikai1@h-partners.com> > Signed-off-by: Fan Gong <gongfan1@huawei.com> ... > diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c ... > +int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, > + enum hinic3_aeq_type event, > + hinic3_aeq_event_cb hwe_cb) > +{ > + struct hinic3_aeqs *aeqs; > + unsigned long *cb_state; cb_state is set but otherwise unused in this function. Probably it should be removed. Flagged by GCC 15.1.0 and Clang 20.1.8 builds with KCFLAGS=-Wunused-but-set-variable > + > + aeqs = hwdev->aeqs; > + cb_state = &aeqs->aeq_cb_state[event]; > + aeqs->aeq_cb[event] = hwe_cb; > + spin_lock_init(&aeqs->aeq_lock); > + > + return 0; > +} > + > +void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, > + enum hinic3_aeq_type event) > +{ > + struct hinic3_aeqs *aeqs; > + unsigned long *cb_state; Ditto. > + > + aeqs = hwdev->aeqs; > + cb_state = &aeqs->aeq_cb_state[event]; > + > + spin_lock_bh(&aeqs->aeq_lock); > + aeqs->aeq_cb[event] = NULL; > + spin_unlock_bh(&aeqs->aeq_lock); > +} ... > +static void aeq_event_handler(struct hinic3_aeqs *aeqs, u32 aeqe, > + const struct hinic3_aeq_elem *aeqe_pos) > +{ > + struct hinic3_hwdev *hwdev = aeqs->hwdev; > + u8 data[HINIC3_AEQE_DATA_SIZE], size; > + enum hinic3_aeq_type event; > + hinic3_aeq_event_cb hwe_cb; > + unsigned long *cb_state; Ditto. > + > + if (EQ_ELEM_DESC_GET(aeqe, SRC)) > + return; > + > + event = EQ_ELEM_DESC_GET(aeqe, TYPE); > + if (event >= HINIC3_MAX_AEQ_EVENTS) { > + dev_warn(hwdev->dev, "Aeq unknown event:%d\n", event); > + return; > + } > + > + memcpy(data, aeqe_pos->aeqe_data, HINIC3_AEQE_DATA_SIZE); > + hinic3_cmdq_buf_swab32(data, HINIC3_AEQE_DATA_SIZE); > + size = EQ_ELEM_DESC_GET(aeqe, SIZE); > + cb_state = &aeqs->aeq_cb_state[event]; > + > + spin_lock_bh(&aeqs->aeq_lock); > + hwe_cb = aeqs->aeq_cb[event]; > + if (hwe_cb) > + hwe_cb(aeqs->hwdev, data, size); > + spin_unlock_bh(&aeqs->aeq_lock); > +} ... > +static void eq_calc_page_size_and_num(struct hinic3_eq *eq, u32 elem_size) > +{ > + u32 max_pages, min_page_size, page_size, total_size; > + > + /* No need for complicated arithmetics. All values must be power of 2. arithmetic > + * Multiplications give power of 2 and divisions give power of 2 without > + * remainder. > + */ > + max_pages = HINIC3_EQ_MAX_PAGES; > + min_page_size = HINIC3_MIN_PAGE_SIZE; > + total_size = eq->eq_len * elem_size; > + > + if (total_size <= max_pages * min_page_size) > + page_size = min_page_size; > + else > + page_size = total_size / max_pages; > + > + hinic3_queue_pages_init(&eq->qpages, eq->eq_len, page_size, elem_size); > +} ...
© 2016 - 2025 Red Hat, Inc.