From: Ajay Sharma <sharmaajay@microsoft.com>
Send the QP fatal error event to only the
context that created the qp.
Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
---
drivers/infiniband/hw/mana/device.c | 4 ++++
drivers/infiniband/hw/mana/main.c | 11 ++++++++---
drivers/infiniband/hw/mana/mana_ib.h | 18 +++++++++---------
drivers/infiniband/hw/mana/qp.c | 2 ++
4 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index e15da43c73a0..fcc8083e2783 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -101,6 +101,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
if (ret)
ibdev_dbg(&mib_dev->ib_dev, "Failed to get caps, use defaults");
+ xa_init(&mib_dev->rq_to_qp_lookup_table);
+
ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
mdev->gdma_context->dev);
if (ret)
@@ -112,6 +114,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
destroy_adapter:
mana_ib_destroy_adapter(mib_dev);
+ xa_destroy(&mib_dev->rq_to_qp_lookup_table);
free_error_eq:
mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
deregister_device:
@@ -129,6 +132,7 @@ static void mana_ib_remove(struct auxiliary_device *adev)
mana_ib_destroy_adapter(mib_dev);
mana_gd_deregister_device(&mib_dev->gc->mana_ib);
ib_unregister_device(&mib_dev->ib_dev);
+ xa_destroy(&mib_dev->rq_to_qp_lookup_table);
ib_dealloc_device(&mib_dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 82923475267d..29be8fd1ec7f 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -556,13 +556,18 @@ static void mana_ib_critical_event_handler(void *ctx, struct gdma_queue *queue,
{
struct mana_ib_dev *mib_dev = (struct mana_ib_dev *)ctx;
struct ib_event mib_event;
+ struct mana_ib_qp *qp;
+ u64 rq_id;
switch (event->type) {
case GDMA_EQE_SOC_EVENT_NOTIFICATION:
+ rq_id = event->details[0] & 0xFFFFFF;
+ qp = xa_load(&mib_dev->rq_to_qp_lookup_table, rq_id);
mib_event.event = IB_EVENT_QP_FATAL;
mib_event.device = &mib_dev->ib_dev;
- mib_event.element.qp =
- (struct ib_qp*)(event->details[0] & 0xFFFFFF);
- ib_dispatch_event(&mib_event);
+ if (qp && qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&mib_event, qp->ibqp.qp_context);
+ else
+ ibdev_dbg(&mib_dev->ib_dev, "found no qp or event handler");
ibdev_dbg(&mib_dev->ib_dev, "Received critical notification");
break;
default:
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 6b9406738cb2..243572b52336 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -48,15 +48,6 @@ struct mana_ib_adapter_caps {
u32 max_inline_data_size;
};
-struct mana_ib_dev {
- struct ib_device ib_dev;
- struct gdma_dev *gdma_dev;
- struct gdma_context *gc;
- struct gdma_queue *fatal_err_eq;
- mana_handle_t adapter_handle;
- struct mana_ib_adapter_caps adapter_caps;
-};
-
struct mana_ib_wq {
struct ib_wq ibwq;
struct ib_umem *umem;
@@ -113,6 +104,15 @@ struct mana_ib_ucontext {
u32 doorbell;
};
+struct mana_ib_dev {
+ struct ib_device ib_dev;
+ struct gdma_dev *gdma_dev;
+ struct gdma_context *gc;
+ struct gdma_queue *fatal_err_eq;
+ mana_handle_t adapter_handle;
+ struct mana_ib_adapter_caps adapter_caps;
+ struct xarray rq_to_qp_lookup_table;
+};
struct mana_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_ind_table;
};
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index ef3275ac92a0..19fae28985c3 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -210,6 +210,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
wq->id = wq_spec.queue_index;
cq->id = cq_spec.queue_index;
+ xa_store(&mib_dev->rq_to_qp_lookup_table, wq->id, qp, GFP_KERNEL);
+
ibdev_dbg(&mib_dev->ib_dev,
"ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
ret, wq->rx_object, wq->id, cq->id);
--
2.25.1
On Mon, Oct 16, 2023 at 03:12:02PM -0700, sharmaajay@linuxonhyperv.com wrote: > diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c > index ef3275ac92a0..19fae28985c3 100644 > --- a/drivers/infiniband/hw/mana/qp.c > +++ b/drivers/infiniband/hw/mana/qp.c > @@ -210,6 +210,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, > wq->id = wq_spec.queue_index; > cq->id = cq_spec.queue_index; > > + xa_store(&mib_dev->rq_to_qp_lookup_table, wq->id, qp, GFP_KERNEL); > + A store with no erase? A load with no locking? This can't be right Jason
> -----Original Message----- > From: Jason Gunthorpe <jgg@ziepe.ca> > Sent: Monday, October 23, 2023 11:24 AM > To: sharmaajay@linuxonhyperv.com > Cc: Long Li <longli@microsoft.com>; Leon Romanovsky <leon@kernel.org>; > Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S. > Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; > Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; linux- > rdma@vger.kernel.org; linux-hyperv@vger.kernel.org; > netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma > <sharmaajay@microsoft.com> > Subject: [EXTERNAL] Re: [Patch v7 5/5] RDMA/mana_ib: Send event to qp > > On Mon, Oct 16, 2023 at 03:12:02PM -0700, > sharmaajay@linuxonhyperv.com wrote: > > > diff --git a/drivers/infiniband/hw/mana/qp.c > > b/drivers/infiniband/hw/mana/qp.c index ef3275ac92a0..19fae28985c3 > > 100644 > > --- a/drivers/infiniband/hw/mana/qp.c > > +++ b/drivers/infiniband/hw/mana/qp.c > > @@ -210,6 +210,8 @@ static int mana_ib_create_qp_rss(struct ib_qp > *ibqp, struct ib_pd *pd, > > wq->id = wq_spec.queue_index; > > cq->id = cq_spec.queue_index; > > > > + xa_store(&mib_dev->rq_to_qp_lookup_table, wq->id, qp, > GFP_KERNEL); > > + > > A store with no erase? > > A load with no locking? > > This can't be right > > Jason This wq->id is assigned from the HW and is guaranteed to be unique. May be I am not following why do we need a lock here. Can you please explain ? Ajay
> Subject: RE: [EXTERNAL] Re: [Patch v7 5/5] RDMA/mana_ib: Send event to qp > > > > -----Original Message----- > > From: Jason Gunthorpe <jgg@ziepe.ca> > > Sent: Monday, October 23, 2023 11:24 AM > > To: sharmaajay@linuxonhyperv.com > > Cc: Long Li <longli@microsoft.com>; Leon Romanovsky <leon@kernel.org>; > > Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S. > > Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; > > Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; > > linux- rdma@vger.kernel.org; linux-hyperv@vger.kernel.org; > > netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma > > <sharmaajay@microsoft.com> > > Subject: [EXTERNAL] Re: [Patch v7 5/5] RDMA/mana_ib: Send event to qp > > > > On Mon, Oct 16, 2023 at 03:12:02PM -0700, > sharmaajay@linuxonhyperv.com > > wrote: > > > > > diff --git a/drivers/infiniband/hw/mana/qp.c > > > b/drivers/infiniband/hw/mana/qp.c index ef3275ac92a0..19fae28985c3 > > > 100644 > > > --- a/drivers/infiniband/hw/mana/qp.c > > > +++ b/drivers/infiniband/hw/mana/qp.c > > > @@ -210,6 +210,8 @@ static int mana_ib_create_qp_rss(struct ib_qp > > *ibqp, struct ib_pd *pd, > > > wq->id = wq_spec.queue_index; > > > cq->id = cq_spec.queue_index; > > > > > > + xa_store(&mib_dev->rq_to_qp_lookup_table, wq->id, qp, > > GFP_KERNEL); > > > + > > > > A store with no erase? > > > > A load with no locking? > > > > This can't be right > > > > Jason > > This wq->id is assigned from the HW and is guaranteed to be unique. May be I > am not following why do we need a lock here. Can you please explain ? > Ajay I think we need to check the return value of xa_store(), and call xa_erase() in mana_ib_destroy_qp(). wq->id is generated by the hardware. If we believe in hardware always behaves in good manner, we don't need a lock. Thanks, Long
On Fri, Oct 27, 2023 at 09:35:05PM +0000, Long Li wrote: > > Subject: RE: [EXTERNAL] Re: [Patch v7 5/5] RDMA/mana_ib: Send event to qp > > > > > > > -----Original Message----- > > > From: Jason Gunthorpe <jgg@ziepe.ca> > > > Sent: Monday, October 23, 2023 11:24 AM > > > To: sharmaajay@linuxonhyperv.com > > > Cc: Long Li <longli@microsoft.com>; Leon Romanovsky <leon@kernel.org>; > > > Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S. > > > Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; > > > Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; > > > linux- rdma@vger.kernel.org; linux-hyperv@vger.kernel.org; > > > netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma > > > <sharmaajay@microsoft.com> > > > Subject: [EXTERNAL] Re: [Patch v7 5/5] RDMA/mana_ib: Send event to qp > > > > > > On Mon, Oct 16, 2023 at 03:12:02PM -0700, > > sharmaajay@linuxonhyperv.com > > > wrote: > > > > > > > diff --git a/drivers/infiniband/hw/mana/qp.c > > > > b/drivers/infiniband/hw/mana/qp.c index ef3275ac92a0..19fae28985c3 > > > > 100644 > > > > --- a/drivers/infiniband/hw/mana/qp.c > > > > +++ b/drivers/infiniband/hw/mana/qp.c > > > > @@ -210,6 +210,8 @@ static int mana_ib_create_qp_rss(struct ib_qp > > > *ibqp, struct ib_pd *pd, > > > > wq->id = wq_spec.queue_index; > > > > cq->id = cq_spec.queue_index; > > > > > > > > + xa_store(&mib_dev->rq_to_qp_lookup_table, wq->id, qp, > > > GFP_KERNEL); > > > > + > > > > > > A store with no erase? > > > > > > A load with no locking? > > > > > > This can't be right > > > > > > Jason > > > > This wq->id is assigned from the HW and is guaranteed to be unique. May be I > > am not following why do we need a lock here. Can you please explain ? > > Ajay > > I think we need to check the return value of xa_store(), and call xa_erase() in mana_ib_destroy_qp(). > > wq->id is generated by the hardware. If we believe in hardware > always behaves in good manner, we don't need a lock. It has nothing to do with how the ID is created, you need to explain how the missing erase can't race with any loads, in a comment above the unlocked xa_load. Jason
© 2016 - 2025 Red Hat, Inc.