[PATCH rdma-next 28/50] RDMA/siw: Split user and kernel CQ creation paths

Leon Romanovsky posted 50 patches 1 month, 2 weeks ago
[PATCH rdma-next 28/50] RDMA/siw: Split user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 2 weeks ago
From: Leon Romanovsky <leonro@nvidia.com>

Separate the CQ creation logic into distinct kernel and user flows.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_main.c  |   1 +
 drivers/infiniband/sw/siw/siw_verbs.c | 111 +++++++++++++++++++++++-----------
 drivers/infiniband/sw/siw/siw_verbs.h |   2 +
 3 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 5168307229a9..75dcf3578eac 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -232,6 +232,7 @@ static const struct ib_device_ops siw_device_ops = {
 	.alloc_pd = siw_alloc_pd,
 	.alloc_ucontext = siw_alloc_ucontext,
 	.create_cq = siw_create_cq,
+	.create_user_cq = siw_create_user_cq,
 	.create_qp = siw_create_qp,
 	.create_srq = siw_create_srq,
 	.dealloc_driver = siw_device_cleanup,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index efa2f097b582..92b25b389b69 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1139,15 +1139,15 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
  * @attrs: uverbs bundle
  */
 
-int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
-		  struct uverbs_attr_bundle *attrs)
+int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs)
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct siw_device *sdev = to_siw_dev(base_cq->device);
 	struct siw_cq *cq = to_siw_cq(base_cq);
 	int rv, size = attr->cqe;
 
-	if (attr->flags)
+	if (attr->flags || base_cq->umem)
 		return -EOPNOTSUPP;
 
 	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
@@ -1155,7 +1155,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 		rv = -ENOMEM;
 		goto err_out;
 	}
-	if (size < 1 || size > sdev->attrs.max_cqe) {
+	if (attr->cqe > sdev->attrs.max_cqe) {
 		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
 		rv = -EINVAL;
 		goto err_out;
@@ -1164,13 +1164,8 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	cq->base_cq.cqe = size;
 	cq->num_cqe = size;
 
-	if (udata)
-		cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
-					 sizeof(struct siw_cq_ctrl));
-	else
-		cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
-				    sizeof(struct siw_cq_ctrl));
-
+	cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
+				 sizeof(struct siw_cq_ctrl));
 	if (cq->queue == NULL) {
 		rv = -ENOMEM;
 		goto err_out;
@@ -1182,33 +1177,32 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 
 	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
 
-	if (udata) {
-		struct siw_uresp_create_cq uresp = {};
-		struct siw_ucontext *ctx =
-			rdma_udata_to_drv_context(udata, struct siw_ucontext,
-						  base_ucontext);
-		size_t length = size * sizeof(struct siw_cqe) +
-			sizeof(struct siw_cq_ctrl);
+	struct siw_uresp_create_cq uresp = {};
+	struct siw_ucontext *ctx =
+		rdma_udata_to_drv_context(udata, struct siw_ucontext,
+					  base_ucontext);
+	size_t length = size * sizeof(struct siw_cqe) +
+		sizeof(struct siw_cq_ctrl);
 
-		cq->cq_entry =
-			siw_mmap_entry_insert(ctx, cq->queue,
-					      length, &uresp.cq_key);
-		if (!cq->cq_entry) {
-			rv = -ENOMEM;
-			goto err_out;
-		}
+	cq->cq_entry =
+		siw_mmap_entry_insert(ctx, cq->queue,
+				      length, &uresp.cq_key);
+	if (!cq->cq_entry) {
+		rv = -ENOMEM;
+		goto err_out;
+	}
 
-		uresp.cq_id = cq->id;
-		uresp.num_cqe = size;
+	uresp.cq_id = cq->id;
+	uresp.num_cqe = size;
 
-		if (udata->outlen < sizeof(uresp)) {
-			rv = -EINVAL;
-			goto err_out;
-		}
-		rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
-		if (rv)
-			goto err_out;
+	if (udata->outlen < sizeof(uresp)) {
+		rv = -EINVAL;
+		goto err_out;
 	}
+	rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rv)
+		goto err_out;
+
 	return 0;
 
 err_out:
@@ -1227,6 +1221,55 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	return rv;
 }
 
+int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		  struct uverbs_attr_bundle *attrs)
+{
+	struct siw_device *sdev = to_siw_dev(base_cq->device);
+	struct siw_cq *cq = to_siw_cq(base_cq);
+	int rv, size = attr->cqe;
+
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
+		siw_dbg(base_cq->device, "too many CQ's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (size < 1 || size > sdev->attrs.max_cqe) {
+		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
+		rv = -EINVAL;
+		goto err_out;
+	}
+	size = roundup_pow_of_two(size);
+	cq->base_cq.cqe = size;
+	cq->num_cqe = size;
+
+	cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
+			    sizeof(struct siw_cq_ctrl));
+	if (cq->queue == NULL) {
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	get_random_bytes(&cq->id, 4);
+	siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id);
+
+	spin_lock_init(&cq->lock);
+
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
+
+	return 0;
+
+err_out:
+	siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
+
+	if (cq->queue)
+		vfree(cq->queue);
+	atomic_dec(&sdev->num_cq);
+
+	return rv;
+}
+
 /*
  * siw_poll_cq()
  *
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index e9f4463aecdc..527c356b55af 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -44,6 +44,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
 		     struct ib_udata *udata);
 int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 		  struct uverbs_attr_bundle *attrs);
+int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs);
 int siw_query_port(struct ib_device *base_dev, u32 port,
 		   struct ib_port_attr *attr);
 int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,

-- 
2.52.0
Re: [PATCH rdma-next 28/50] RDMA/siw: Split user and kernel CQ creation paths
Posted by Bernard Metzler 1 month, 2 weeks ago
On 13.02.2026 11:58, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Separate the CQ creation logic into distinct kernel and user flows.
> 
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>   drivers/infiniband/sw/siw/siw_main.c  |   1 +
>   drivers/infiniband/sw/siw/siw_verbs.c | 111 +++++++++++++++++++++++-----------
>   drivers/infiniband/sw/siw/siw_verbs.h |   2 +
>   3 files changed, 80 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> index 5168307229a9..75dcf3578eac 100644
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -232,6 +232,7 @@ static const struct ib_device_ops siw_device_ops = {
>   	.alloc_pd = siw_alloc_pd,
>   	.alloc_ucontext = siw_alloc_ucontext,
>   	.create_cq = siw_create_cq,
> +	.create_user_cq = siw_create_user_cq,
>   	.create_qp = siw_create_qp,
>   	.create_srq = siw_create_srq,
>   	.dealloc_driver = siw_device_cleanup,
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index efa2f097b582..92b25b389b69 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -1139,15 +1139,15 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
>    * @attrs: uverbs bundle
>    */
>   
> -int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
> -		  struct uverbs_attr_bundle *attrs)
> +int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
> +		       struct uverbs_attr_bundle *attrs)
>   {
>   	struct ib_udata *udata = &attrs->driver_udata;
>   	struct siw_device *sdev = to_siw_dev(base_cq->device);
>   	struct siw_cq *cq = to_siw_cq(base_cq);
>   	int rv, size = attr->cqe;
>   
> -	if (attr->flags)
> +	if (attr->flags || base_cq->umem)
>   		return -EOPNOTSUPP;
>   
>   	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
> @@ -1155,7 +1155,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
>   		rv = -ENOMEM;
>   		goto err_out;
>   	}
> -	if (size < 1 || size > sdev->attrs.max_cqe) {
> +	if (attr->cqe > sdev->attrs.max_cqe) {
>   		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
>   		rv = -EINVAL;
>   		goto err_out;
> @@ -1164,13 +1164,8 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
>   	cq->base_cq.cqe = size;
>   	cq->num_cqe = size;
>   
> -	if (udata)
> -		cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
> -					 sizeof(struct siw_cq_ctrl));
> -	else
> -		cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
> -				    sizeof(struct siw_cq_ctrl));
> -
> +	cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
> +				 sizeof(struct siw_cq_ctrl));
>   	if (cq->queue == NULL) {
>   		rv = -ENOMEM;
>   		goto err_out;
> @@ -1182,33 +1177,32 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
>   
>   	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
>   
> -	if (udata) {
> -		struct siw_uresp_create_cq uresp = {};
> -		struct siw_ucontext *ctx =
> -			rdma_udata_to_drv_context(udata, struct siw_ucontext,
> -						  base_ucontext);
> -		size_t length = size * sizeof(struct siw_cqe) +
> -			sizeof(struct siw_cq_ctrl);
> +	struct siw_uresp_create_cq uresp = {};
> +	struct siw_ucontext *ctx =
> +		rdma_udata_to_drv_context(udata, struct siw_ucontext,
> +					  base_ucontext);
> +	size_t length = size * sizeof(struct siw_cqe) +
> +		sizeof(struct siw_cq_ctrl);
>   
> -		cq->cq_entry =
> -			siw_mmap_entry_insert(ctx, cq->queue,
> -					      length, &uresp.cq_key);
> -		if (!cq->cq_entry) {
> -			rv = -ENOMEM;
> -			goto err_out;
> -		}
> +	cq->cq_entry =
> +		siw_mmap_entry_insert(ctx, cq->queue,
> +				      length, &uresp.cq_key);
> +	if (!cq->cq_entry) {
> +		rv = -ENOMEM;
> +		goto err_out;
> +	}
>   
> -		uresp.cq_id = cq->id;
> -		uresp.num_cqe = size;
> +	uresp.cq_id = cq->id;
> +	uresp.num_cqe = size;
>   
> -		if (udata->outlen < sizeof(uresp)) {
> -			rv = -EINVAL;
> -			goto err_out;
> -		}
> -		rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
> -		if (rv)
> -			goto err_out;
> +	if (udata->outlen < sizeof(uresp)) {
> +		rv = -EINVAL;
> +		goto err_out;
>   	}
> +	rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
> +	if (rv)
> +		goto err_out;
> +
>   	return 0;
>   
>   err_out:
> @@ -1227,6 +1221,55 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
>   	return rv;
>   }
>   
> +int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
> +		  struct uverbs_attr_bundle *attrs)
> +{
> +	struct siw_device *sdev = to_siw_dev(base_cq->device);
> +	struct siw_cq *cq = to_siw_cq(base_cq);
> +	int rv, size = attr->cqe;
> +
> +	if (attr->flags)
> +		return -EOPNOTSUPP;
> +
> +	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
> +		siw_dbg(base_cq->device, "too many CQ's\n");
> +		rv = -ENOMEM;
> +		goto err_out;
> +	}
> +	if (size < 1 || size > sdev->attrs.max_cqe) {

isn't there now also a check for zero sized CQ in
__ib_alloc_cq(), which obsoletes that < 1 check?

Everything looks right otherwise.

Thanks,
Bernard.

> +		siw_dbg(base_cq->device, "CQ size error: %d\n", size);
> +		rv = -EINVAL;
> +		goto err_out;
> +	}
> +	size = roundup_pow_of_two(size);
> +	cq->base_cq.cqe = size;
> +	cq->num_cqe = size;
> +
> +	cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
> +			    sizeof(struct siw_cq_ctrl));
> +	if (cq->queue == NULL) {
> +		rv = -ENOMEM;
> +		goto err_out;
> +	}
> +	get_random_bytes(&cq->id, 4);
> +	siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id);
> +
> +	spin_lock_init(&cq->lock);
> +
> +	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
> +
> +	return 0;
> +
> +err_out:
> +	siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
> +
> +	if (cq->queue)
> +		vfree(cq->queue);
> +	atomic_dec(&sdev->num_cq);
> +
> +	return rv;
> +}
> +
>   /*
>    * siw_poll_cq()
>    *
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
> index e9f4463aecdc..527c356b55af 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.h
> +++ b/drivers/infiniband/sw/siw/siw_verbs.h
> @@ -44,6 +44,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
>   		     struct ib_udata *udata);
>   int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
>   		  struct uverbs_attr_bundle *attrs);
> +int siw_create_user_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
> +		       struct uverbs_attr_bundle *attrs);
>   int siw_query_port(struct ib_device *base_dev, u32 port,
>   		   struct ib_port_attr *attr);
>   int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,
>
Re: [PATCH rdma-next 28/50] RDMA/siw: Split user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 2 weeks ago
On Fri, Feb 13, 2026 at 05:56:32PM +0100, Bernard Metzler wrote:
> On 13.02.2026 11:58, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > Separate the CQ creation logic into distinct kernel and user flows.
> > 
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> >   drivers/infiniband/sw/siw/siw_main.c  |   1 +
> >   drivers/infiniband/sw/siw/siw_verbs.c | 111 +++++++++++++++++++++++-----------
> >   drivers/infiniband/sw/siw/siw_verbs.h |   2 +
> >   3 files changed, 80 insertions(+), 34 deletions(-)

<...>

> > +int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
> > +		  struct uverbs_attr_bundle *attrs)
> > +{
> > +	struct siw_device *sdev = to_siw_dev(base_cq->device);
> > +	struct siw_cq *cq = to_siw_cq(base_cq);
> > +	int rv, size = attr->cqe;
> > +
> > +	if (attr->flags)
> > +		return -EOPNOTSUPP;
> > +
> > +	if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
> > +		siw_dbg(base_cq->device, "too many CQ's\n");
> > +		rv = -ENOMEM;
> > +		goto err_out;
> > +	}
> > +	if (size < 1 || size > sdev->attrs.max_cqe) {
> 
> isn't there now also a check for zero sized CQ in
> __ib_alloc_cq(), which obsoletes that < 1 check?

Thanks, this line needs to be changed to be if "(attr.cqe > sdev->attrs.max_cqe)"

> 
> Everything looks right otherwise.
> 
> Thanks,
> Bernard.

Thanks