.../net/ethernet/microsoft/mana/gdma_main.c | 64 +++++++++++++++++++ include/net/mana/gdma.h | 11 +++- 2 files changed, 73 insertions(+), 2 deletions(-)
To collaborate with hardware servicing events, upon receiving the special
EQE notification from the HW channel, remove the devices on this bus.
Then, after a waiting period based on the device specs, rescan the parent
bus to recover the devices.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
v3:
Updated for checkpatch warnings as suggested by Simon Horman.
v2:
Added dev_dbg for service type as suggested by Shradha Gupta.
Added driver cap bit.
---
.../net/ethernet/microsoft/mana/gdma_main.c | 64 +++++++++++++++++++
include/net/mana/gdma.h | 11 +++-
2 files changed, 73 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 4ffaf7588885..3102bd2b875b 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -352,11 +352,55 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
}
EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
+#define MANA_SERVICE_PERIOD 10
+
+struct mana_serv_work {
+ struct work_struct serv_work;
+ struct pci_dev *pdev;
+};
+
+static void mana_serv_func(struct work_struct *w)
+{
+ struct mana_serv_work *mns_wk;
+ struct pci_bus *bus, *parent;
+ struct pci_dev *pdev;
+
+ mns_wk = container_of(w, struct mana_serv_work, serv_work);
+ pdev = mns_wk->pdev;
+
+ if (!pdev)
+ goto out;
+
+ bus = pdev->bus;
+ if (!bus) {
+ dev_err(&pdev->dev, "MANA service: no bus\n");
+ goto out;
+ }
+
+ parent = bus->parent;
+ if (!parent) {
+ dev_err(&pdev->dev, "MANA service: no parent bus\n");
+ goto out;
+ }
+
+ pci_stop_and_remove_bus_device_locked(bus->self);
+
+ msleep(MANA_SERVICE_PERIOD * 1000);
+
+ pci_lock_rescan_remove();
+ pci_rescan_bus(parent);
+ pci_unlock_rescan_remove();
+
+out:
+ kfree(mns_wk);
+}
+
static void mana_gd_process_eqe(struct gdma_queue *eq)
{
u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
struct gdma_context *gc = eq->gdma_dev->gdma_context;
struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
+ struct mana_serv_work *mns_wk;
union gdma_eqe_info eqe_info;
enum gdma_eqe_type type;
struct gdma_event event;
@@ -400,6 +444,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
eq->eq.callback(eq->eq.context, eq, &event);
break;
+ case GDMA_EQE_HWC_FPGA_RECONFIG:
+ case GDMA_EQE_HWC_SOCMANA_CRASH:
+ dev_dbg(gc->dev, "Recv MANA service type:%d\n", type);
+
+ if (gc->in_service) {
+ dev_info(gc->dev, "Already in service\n");
+ break;
+ }
+
+ mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
+ if (!mns_wk)
+ break;
+
+ dev_info(gc->dev, "Start MANA service type:%d\n", type);
+ gc->in_service = true;
+ mns_wk->pdev = to_pci_dev(gc->dev);
+ INIT_WORK(&mns_wk->serv_work, mana_serv_func);
+ schedule_work(&mns_wk->serv_work);
+ break;
+
default:
break;
}
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 228603bf03f2..d0fbc9c64cc8 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -58,8 +58,9 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
- GDMA_EQE_HWC_SOC_RECONFIG = 132,
+ GDMA_EQE_HWC_FPGA_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_HWC_SOCMANA_CRASH = 135,
GDMA_EQE_RNIC_QP_FATAL = 176,
};
@@ -388,6 +389,8 @@ struct gdma_context {
u32 test_event_eq_id;
bool is_pf;
+ bool in_service;
+
phys_addr_t bar0_pa;
void __iomem *bar0_va;
void __iomem *shm_base;
@@ -558,12 +561,16 @@ enum {
/* Driver can handle holes (zeros) in the device list */
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
+/* Driver can self reset on EQE notification */
+#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
- GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
+ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
+ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE)
#define GDMA_DRV_CAP_FLAGS2 0
--
2.34.1
On Mon, May 12, 2025 at 12:57:54PM -0700, Haiyang Zhang wrote: > To collaborate with hardware servicing events, upon receiving the special > EQE notification from the HW channel, remove the devices on this bus. > Then, after a waiting period based on the device specs, rescan the parent > bus to recover the devices. > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Reviewed-by: Simon Horman <horms@kernel.org>
On Mon, May 12, 2025 at 12:57:54PM -0700, Haiyang Zhang wrote:
> To collaborate with hardware servicing events, upon receiving the special
> EQE notification from the HW channel, remove the devices on this bus.
> Then, after a waiting period based on the device specs, rescan the parent
> bus to recover the devices.
>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
> v3:
> Updated for checkpatch warnings as suggested by Simon Horman.
>
> v2:
> Added dev_dbg for service type as suggested by Shradha Gupta.
> Added driver cap bit.
>
> ---
> .../net/ethernet/microsoft/mana/gdma_main.c | 64 +++++++++++++++++++
> include/net/mana/gdma.h | 11 +++-
> 2 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 4ffaf7588885..3102bd2b875b 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -352,11 +352,55 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
> }
> EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
>
> +#define MANA_SERVICE_PERIOD 10
> +
> +struct mana_serv_work {
> + struct work_struct serv_work;
> + struct pci_dev *pdev;
> +};
> +
> +static void mana_serv_func(struct work_struct *w)
> +{
> + struct mana_serv_work *mns_wk;
> + struct pci_bus *bus, *parent;
> + struct pci_dev *pdev;
> +
> + mns_wk = container_of(w, struct mana_serv_work, serv_work);
> + pdev = mns_wk->pdev;
> +
> + if (!pdev)
> + goto out;
> +
> + bus = pdev->bus;
> + if (!bus) {
> + dev_err(&pdev->dev, "MANA service: no bus\n");
> + goto out;
> + }
> +
> + parent = bus->parent;
> + if (!parent) {
> + dev_err(&pdev->dev, "MANA service: no parent bus\n");
> + goto out;
> + }
> +
> + pci_stop_and_remove_bus_device_locked(bus->self);
> +
> + msleep(MANA_SERVICE_PERIOD * 1000);
> +
> + pci_lock_rescan_remove();
> + pci_rescan_bus(parent);
> + pci_unlock_rescan_remove();
> +
> +out:
> + kfree(mns_wk);
> +}
> +
> static void mana_gd_process_eqe(struct gdma_queue *eq)
> {
> u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
> struct gdma_context *gc = eq->gdma_dev->gdma_context;
> struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
> + struct mana_serv_work *mns_wk;
> union gdma_eqe_info eqe_info;
> enum gdma_eqe_type type;
> struct gdma_event event;
> @@ -400,6 +444,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
> eq->eq.callback(eq->eq.context, eq, &event);
> break;
>
> + case GDMA_EQE_HWC_FPGA_RECONFIG:
> + case GDMA_EQE_HWC_SOCMANA_CRASH:
> + dev_dbg(gc->dev, "Recv MANA service type:%d\n", type);
> +
> + if (gc->in_service) {
> + dev_info(gc->dev, "Already in service\n");
> + break;
> + }
> +
> + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
> + if (!mns_wk)
> + break;
> +
> + dev_info(gc->dev, "Start MANA service type:%d\n", type);
> + gc->in_service = true;
> + mns_wk->pdev = to_pci_dev(gc->dev);
> + INIT_WORK(&mns_wk->serv_work, mana_serv_func);
> + schedule_work(&mns_wk->serv_work);
> + break;
> +
> default:
> break;
> }
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 228603bf03f2..d0fbc9c64cc8 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -58,8 +58,9 @@ enum gdma_eqe_type {
> GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
> GDMA_EQE_HWC_INIT_DATA = 130,
> GDMA_EQE_HWC_INIT_DONE = 131,
> - GDMA_EQE_HWC_SOC_RECONFIG = 132,
> + GDMA_EQE_HWC_FPGA_RECONFIG = 132,
> GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
> + GDMA_EQE_HWC_SOCMANA_CRASH = 135,
> GDMA_EQE_RNIC_QP_FATAL = 176,
> };
>
> @@ -388,6 +389,8 @@ struct gdma_context {
> u32 test_event_eq_id;
>
> bool is_pf;
> + bool in_service;
> +
> phys_addr_t bar0_pa;
> void __iomem *bar0_va;
> void __iomem *shm_base;
> @@ -558,12 +561,16 @@ enum {
> /* Driver can handle holes (zeros) in the device list */
> #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
>
> +/* Driver can self reset on EQE notification */
> +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
> +
> #define GDMA_DRV_CAP_FLAGS1 \
> (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
> GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
> GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
> GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
> - GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
> + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
> + GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE)
>
> #define GDMA_DRV_CAP_FLAGS2 0
>
> --
> 2.34.1
Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
© 2016 - 2026 Red Hat, Inc.