[PATCH rdma-next 18/50] RDMA/erdma: Separate user and kernel CQ creation paths

Leon Romanovsky posted 50 patches 1 month, 2 weeks ago
[PATCH rdma-next 18/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 2 weeks ago
From: Leon Romanovsky <leonro@nvidia.com>

Split CQ creation into distinct kernel and user flows. The erdma driver,
inherited from mlx4, uses a problematic pattern that shares and caches
umem in erdma_map_user_dbrecords(). This design blocks the driver from
supporting generic umem sources (VMA, dmabuf, memfd, and others).

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma_main.c  |  1 +
 drivers/infiniband/hw/erdma/erdma_verbs.c | 97 ++++++++++++++++++++-----------
 drivers/infiniband/hw/erdma/erdma_verbs.h |  2 +
 3 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index f35b30235018..1b6426e89d80 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -505,6 +505,7 @@ static const struct ib_device_ops erdma_device_ops = {
 	.alloc_pd = erdma_alloc_pd,
 	.alloc_ucontext = erdma_alloc_ucontext,
 	.create_cq = erdma_create_cq,
+	.create_user_cq = erdma_create_user_cq,
 	.create_qp = erdma_create_qp,
 	.dealloc_pd = erdma_dealloc_pd,
 	.dealloc_ucontext = erdma_dealloc_ucontext,
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 058edc42de58..6f809907fec5 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1952,8 +1952,8 @@ static int erdma_init_kernel_cq(struct erdma_cq *cq)
 	return -ENOMEM;
 }
 
-int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
-		    struct uverbs_attr_bundle *attrs)
+int erdma_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct erdma_cq *cq = to_ecq(ibcq);
@@ -1962,6 +1962,11 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	int ret;
 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
 		udata, struct erdma_ucontext, ibucontext);
+	struct erdma_ureq_create_cq ureq;
+	struct erdma_uresp_create_cq uresp;
+
+	if (ibcq->umem)
+		return -EOPNOTSUPP;
 
 	if (depth > dev->attrs.max_cqe)
 		return -EINVAL;
@@ -1977,31 +1982,22 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	if (ret < 0)
 		return ret;
 
-	if (!rdma_is_kernel_res(&ibcq->res)) {
-		struct erdma_ureq_create_cq ureq;
-		struct erdma_uresp_create_cq uresp;
-
-		ret = ib_copy_from_udata(&ureq, udata,
-					 min(udata->inlen, sizeof(ureq)));
-		if (ret)
-			goto err_out_xa;
+	ret = ib_copy_from_udata(&ureq, udata,
+				 min(udata->inlen, sizeof(ureq)));
+	if (ret)
+		goto err_out_xa;
 
-		ret = erdma_init_user_cq(ctx, cq, &ureq);
-		if (ret)
-			goto err_out_xa;
+	ret = erdma_init_user_cq(ctx, cq, &ureq);
+	if (ret)
+		goto err_out_xa;
 
-		uresp.cq_id = cq->cqn;
-		uresp.num_cqe = depth;
+	uresp.cq_id = cq->cqn;
+	uresp.num_cqe = depth;
 
-		ret = ib_copy_to_udata(udata, &uresp,
-				       min(sizeof(uresp), udata->outlen));
-		if (ret)
-			goto err_free_res;
-	} else {
-		ret = erdma_init_kernel_cq(cq);
-		if (ret)
-			goto err_out_xa;
-	}
+	ret = ib_copy_to_udata(udata, &uresp,
+			       min(sizeof(uresp), udata->outlen));
+	if (ret)
+		goto err_free_res;
 
 	ret = create_cq_cmd(ctx, cq);
 	if (ret)
@@ -2010,19 +2006,54 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	return 0;
 
 err_free_res:
-	if (!rdma_is_kernel_res(&ibcq->res)) {
-		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
-		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
-	} else {
-		dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
-				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
-		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
-			      cq->kern_cq.dbrec_dma);
-	}
+	erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+	put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
 
 err_out_xa:
 	xa_erase(&dev->cq_xa, cq->cqn);
+	return ret;
+}
+
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		    struct uverbs_attr_bundle *attrs)
+{
+	struct erdma_cq *cq = to_ecq(ibcq);
+	struct erdma_dev *dev = to_edev(ibcq->device);
+	unsigned int depth = attr->cqe;
+	int ret;
+
+	if (depth > dev->attrs.max_cqe)
+		return -EINVAL;
 
+	depth = roundup_pow_of_two(depth);
+	cq->ibcq.cqe = depth;
+	cq->depth = depth;
+	cq->assoc_eqn = attr->comp_vector + 1;
+
+	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
+			      XA_LIMIT(1, dev->attrs.max_cq - 1),
+			      &dev->next_alloc_cqn, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	ret = erdma_init_kernel_cq(cq);
+	if (ret)
+		goto err_out_xa;
+
+	ret = create_cq_cmd(NULL, cq);
+	if (ret)
+		goto err_free_res;
+
+	return 0;
+
+err_free_res:
+	dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
+			  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+	dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+		      cq->kern_cq.dbrec_dma);
+
+err_out_xa:
+	xa_erase(&dev->cq_xa, cq->cqn);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index 7d8d3fe501d5..21a4fb404806 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -435,6 +435,8 @@ int erdma_get_port_immutable(struct ib_device *dev, u32 port,
 			     struct ib_port_immutable *ib_port_immutable);
 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		    struct uverbs_attr_bundle *attrs);
+int erdma_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+			 struct uverbs_attr_bundle *attrs);
 int erdma_query_port(struct ib_device *dev, u32 port,
 		     struct ib_port_attr *attr);
 int erdma_query_gid(struct ib_device *dev, u32 port, int idx,

-- 
2.52.0
Re: [PATCH rdma-next 18/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Cheng Xu 1 month, 1 week ago

On 2/13/26 6:57 PM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Split CQ creation into distinct kernel and user flows. The erdma driver,
> inherited from mlx4, uses a problematic pattern that shares and caches
> umem in erdma_map_user_dbrecords(). This design blocks the driver from
> supporting generic umem sources (VMA, dmabuf, memfd, and others).
> 
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/hw/erdma/erdma_main.c  |  1 +
>  drivers/infiniband/hw/erdma/erdma_verbs.c | 97 ++++++++++++++++++++-----------
>  drivers/infiniband/hw/erdma/erdma_verbs.h |  2 +
>  3 files changed, 67 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
> index f35b30235018..1b6426e89d80 100644
> --- a/drivers/infiniband/hw/erdma/erdma_main.c
> +++ b/drivers/infiniband/hw/erdma/erdma_main.c
> @@ -505,6 +505,7 @@ static const struct ib_device_ops erdma_device_ops = {
>  	.alloc_pd = erdma_alloc_pd,
>  	.alloc_ucontext = erdma_alloc_ucontext,
>  	.create_cq = erdma_create_cq,
> +	.create_user_cq = erdma_create_user_cq,
>  	.create_qp = erdma_create_qp,
>  	.dealloc_pd = erdma_dealloc_pd,
>  	.dealloc_ucontext = erdma_dealloc_ucontext,

<...>

> +
> +int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> +		    struct uverbs_attr_bundle *attrs)

create_cq will be used for kernel CQ creation, and the third input parameter
'struct uverbs_attr_bundle *attrs' will be useless, so it can be removed? Same to
all drivers.


> +{

<...>

> +	ret = create_cq_cmd(NULL, cq);
> +	if (ret)
> +		goto err_free_res;


In create_cq_cmd, should add the following change:

diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 8c30df61ae3d..eca28524e04b 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -240,7 +240,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
                req.first_page_offset = mem->page_offset;
                req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
 
-               if (uctx->ext_db.enable) {
+               if (uctx && uctx->ext_db.enable) {
                        req.cfg1 |= FIELD_PREP(
                                ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
                        req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,


Thanks,
Cheng Xu

Re: [PATCH rdma-next 18/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 1 week ago
On Tue, Feb 24, 2026 at 01:51:41PM +0800, Cheng Xu wrote:
> 
> 
> On 2/13/26 6:57 PM, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > Split CQ creation into distinct kernel and user flows. The erdma driver,
> > inherited from mlx4, uses a problematic pattern that shares and caches
> > umem in erdma_map_user_dbrecords(). This design blocks the driver from
> > supporting generic umem sources (VMA, dmabuf, memfd, and others).
> > 
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> >  drivers/infiniband/hw/erdma/erdma_main.c  |  1 +
> >  drivers/infiniband/hw/erdma/erdma_verbs.c | 97 ++++++++++++++++++++-----------
> >  drivers/infiniband/hw/erdma/erdma_verbs.h |  2 +
> >  3 files changed, 67 insertions(+), 33 deletions(-)
> > 
> > diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
> > index f35b30235018..1b6426e89d80 100644
> > --- a/drivers/infiniband/hw/erdma/erdma_main.c
> > +++ b/drivers/infiniband/hw/erdma/erdma_main.c
> > @@ -505,6 +505,7 @@ static const struct ib_device_ops erdma_device_ops = {
> >  	.alloc_pd = erdma_alloc_pd,
> >  	.alloc_ucontext = erdma_alloc_ucontext,
> >  	.create_cq = erdma_create_cq,
> > +	.create_user_cq = erdma_create_user_cq,
> >  	.create_qp = erdma_create_qp,
> >  	.dealloc_pd = erdma_dealloc_pd,
> >  	.dealloc_ucontext = erdma_dealloc_ucontext,
> 
> <...>
> 
> > +
> > +int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> > +		    struct uverbs_attr_bundle *attrs)
> 
> create_cq will be used for kernel CQ creation, and the third input parameter
> 'struct uverbs_attr_bundle *attrs' will be useless, so it can be removed? Same to
> all drivers.

Yes, but only after conversion of all drivers. I have that removal patch
in my v2.

> 
> 
> > +{
> 
> <...>
> 
> > +	ret = create_cq_cmd(NULL, cq);
> > +	if (ret)
> > +		goto err_free_res;
> 
> 
> In create_cq_cmd, should add the following change:

I took slightly different approach and inlined create_cq_cmd() into erdma_create_*_cq().

Thanks

> 
> diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
> index 8c30df61ae3d..eca28524e04b 100644
> --- a/drivers/infiniband/hw/erdma/erdma_verbs.c
> +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
> @@ -240,7 +240,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
>                 req.first_page_offset = mem->page_offset;
>                 req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
>  
> -               if (uctx->ext_db.enable) {
> +               if (uctx && uctx->ext_db.enable) {
>                         req.cfg1 |= FIELD_PREP(
>                                 ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
>                         req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
> 
> 
> Thanks,
> Cheng Xu
>