[PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths

Leon Romanovsky posted 50 patches 1 month, 2 weeks ago
[PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 2 weeks ago
From: Leon Romanovsky <leonro@nvidia.com>

Split CQ creation into distinct kernel and user flows. The hns driver,
inherited from mlx4, uses a problematic pattern that shares and caches
umem in hns_roce_db_map_user(). This design blocks the driver from
supporting generic umem sources (VMA, dmabuf, memfd, and others).

In addition, let's delete counter that counts CQ creation errors. There
are multiple ways to debug kernel in modern kernel without need to rely
on that debugfs counter.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
 drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
 drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
 drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
 4 files changed, 82 insertions(+), 26 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 857a913326cd..0f24a916466b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -335,7 +335,10 @@ static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
 {
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 
-	if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
+	if (attr->flags)
+		return -EOPNOTSUPP;
+
+	if (attr->cqe > hr_dev->caps.max_cqes) {
 		ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
 			  attr->cqe, hr_dev->caps.max_cqes);
 		return -EINVAL;
@@ -407,8 +410,8 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
 	return 0;
 }
 
-int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
-		       struct uverbs_attr_bundle *attrs)
+int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
 	struct ib_udata *udata = &attrs->driver_udata;
@@ -418,31 +421,27 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	struct hns_roce_ib_create_cq ucmd = {};
 	int ret;
 
-	if (attr->flags) {
-		ret = -EOPNOTSUPP;
-		goto err_out;
-	}
+	if (ib_cq->umem)
+		return -EOPNOTSUPP;
 
 	ret = verify_cq_create_attr(hr_dev, attr);
 	if (ret)
-		goto err_out;
+		return ret;
 
-	if (udata) {
-		ret = get_cq_ucmd(hr_cq, udata, &ucmd);
-		if (ret)
-			goto err_out;
-	}
+	ret = get_cq_ucmd(hr_cq, udata, &ucmd);
+	if (ret)
+		return ret;
 
 	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
 
 	ret = set_cqe_size(hr_cq, udata, &ucmd);
 	if (ret)
-		goto err_out;
+		return ret;
 
 	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
 	if (ret) {
 		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
-		goto err_out;
+		return ret;
 	}
 
 	ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
@@ -464,13 +463,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		goto err_cqn;
 	}
 
-	if (udata) {
-		resp.cqn = hr_cq->cqn;
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
-		if (ret)
-			goto err_cqc;
-	}
+	resp.cqn = hr_cq->cqn;
+	ret = ib_copy_to_udata(udata, &resp,
+			       min(udata->outlen, sizeof(resp)));
+	if (ret)
+		goto err_cqc;
 
 	hr_cq->cons_index = 0;
 	hr_cq->arm_sn = 1;
@@ -487,9 +484,67 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	free_cq_db(hr_dev, hr_cq, udata);
 err_cq_buf:
 	free_cq_buf(hr_dev, hr_cq);
-err_out:
-	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
+	return ret;
+}
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+		       struct uverbs_attr_bundle *attrs)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+	struct hns_roce_ib_create_cq_resp resp = {};
+	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_ib_create_cq ucmd = {};
+	int ret;
+
+	ret = verify_cq_create_attr(hr_dev, attr);
+	if (ret)
+		return ret;
+
+	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
+
+	ret = set_cqe_size(hr_cq, NULL, &ucmd);
+	if (ret)
+		return ret;
 
+	ret = alloc_cq_buf(hr_dev, hr_cq, NULL, 0);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = alloc_cq_db(hr_dev, hr_cq, NULL, 0, &resp);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
+		goto err_cq_buf;
+	}
+
+	ret = alloc_cqn(hr_dev, hr_cq, NULL);
+	if (ret) {
+		ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+		goto err_cq_db;
+	}
+
+	ret = alloc_cqc(hr_dev, hr_cq);
+	if (ret) {
+		ibdev_err(ibdev,
+			  "failed to alloc CQ context, ret = %d.\n", ret);
+		goto err_cqn;
+	}
+
+	hr_cq->cons_index = 0;
+	hr_cq->arm_sn = 1;
+	refcount_set(&hr_cq->refcount, 1);
+	init_completion(&hr_cq->free);
+
+	return 0;
+
+err_cqn:
+	free_cqn(hr_dev, hr_cq->cqn);
+err_cq_db:
+	free_cq_db(hr_dev, hr_cq, NULL);
+err_cq_buf:
+	free_cq_buf(hr_dev, hr_cq);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
index b869cdc54118..481b30f2f5b5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -47,7 +47,6 @@ static const char * const sw_stat_info[] = {
 	[HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
 	[HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
 	[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
-	[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
 	[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
 	[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
 	[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3f032b8038af..fdc5f487d7a3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -902,7 +902,6 @@ enum hns_roce_sw_dfx_stat_index {
 	HNS_ROCE_DFX_MBX_EVENT_CNT,
 	HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
-	HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
 	HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
 	HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
@@ -1295,6 +1294,8 @@ int to_hr_qp_type(int qp_type);
 
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		       struct uverbs_attr_bundle *attrs);
+int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+			    struct uverbs_attr_bundle *attrs);
 
 int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
 int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index a3490bab297a..64de49bf8df7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -727,6 +727,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.create_ah = hns_roce_create_ah,
 	.create_user_ah = hns_roce_create_ah,
 	.create_cq = hns_roce_create_cq,
+	.create_user_cq = hns_roce_create_user_cq,
 	.create_qp = hns_roce_create_qp,
 	.dealloc_pd = hns_roce_dealloc_pd,
 	.dealloc_ucontext = hns_roce_dealloc_ucontext,

-- 
2.52.0
Re: [PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Junxian Huang 1 month ago

On 2026/2/13 18:58, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Split CQ creation into distinct kernel and user flows. The hns driver,
> inherited from mlx4, uses a problematic pattern that shares and caches
> umem in hns_roce_db_map_user(). This design blocks the driver from
> supporting generic umem sources (VMA, dmabuf, memfd, and others).
> 
> In addition, let's delete counter that counts CQ creation errors. There
> are multiple ways to debug kernel in modern kernel without need to rely
> on that debugfs counter.
> 
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
>  drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
>  drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
>  drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
>  4 files changed, 82 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
> index 857a913326cd..0f24a916466b 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_cq.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
> @@ -335,7 +335,10 @@ static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
>  {
>  	struct ib_device *ibdev = &hr_dev->ib_dev;
>  
> -	if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
> +	if (attr->flags)
> +		return -EOPNOTSUPP;
> +
> +	if (attr->cqe > hr_dev->caps.max_cqes) {
>  		ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
>  			  attr->cqe, hr_dev->caps.max_cqes);
>  		return -EINVAL;
> @@ -407,8 +410,8 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
>  	return 0;
>  }
>  
> -int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> -		       struct uverbs_attr_bundle *attrs)
> +int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +			    struct uverbs_attr_bundle *attrs)
>  {
>  	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
>  	struct ib_udata *udata = &attrs->driver_udata;
> @@ -418,31 +421,27 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  	struct hns_roce_ib_create_cq ucmd = {};
>  	int ret;
>  
> -	if (attr->flags) {
> -		ret = -EOPNOTSUPP;
> -		goto err_out;
> -	}
> +	if (ib_cq->umem)
> +		return -EOPNOTSUPP;
>  
>  	ret = verify_cq_create_attr(hr_dev, attr);
>  	if (ret)
> -		goto err_out;
> +		return ret;
>  
> -	if (udata) {
> -		ret = get_cq_ucmd(hr_cq, udata, &ucmd);
> -		if (ret)
> -			goto err_out;
> -	}
> +	ret = get_cq_ucmd(hr_cq, udata, &ucmd);
> +	if (ret)
> +		return ret;
>  
>  	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
>  
>  	ret = set_cqe_size(hr_cq, udata, &ucmd);
>  	if (ret)
> -		goto err_out;
> +		return ret;
>  
>  	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
>  	if (ret) {
>  		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
> -		goto err_out;
> +		return ret;
>  	}
>  
>  	ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
> @@ -464,13 +463,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  		goto err_cqn;
>  	}
>  
> -	if (udata) {
> -		resp.cqn = hr_cq->cqn;
> -		ret = ib_copy_to_udata(udata, &resp,
> -				       min(udata->outlen, sizeof(resp)));
> -		if (ret)
> -			goto err_cqc;
> -	}
> +	resp.cqn = hr_cq->cqn;
> +	ret = ib_copy_to_udata(udata, &resp,
> +			       min(udata->outlen, sizeof(resp)));
> +	if (ret)
> +		goto err_cqc;
>  
>  	hr_cq->cons_index = 0;
>  	hr_cq->arm_sn = 1;
> @@ -487,9 +484,67 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  	free_cq_db(hr_dev, hr_cq, udata);
>  err_cq_buf:
>  	free_cq_buf(hr_dev, hr_cq);
> -err_out:
> -	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
> +	return ret;
> +}
> +
> +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +		       struct uverbs_attr_bundle *attrs)
> +{
> +	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
> +	struct hns_roce_ib_create_cq_resp resp = {};
> +	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct hns_roce_ib_create_cq ucmd = {};

ucmd and resp are not needed since we don't have udata here.

Junxian

> +	int ret;
> +
> +	ret = verify_cq_create_attr(hr_dev, attr);
> +	if (ret)
> +		return ret;
> +
> +	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd)> +
> +	ret = set_cqe_size(hr_cq, NULL, &ucmd);
> +	if (ret)
> +		return ret;
>  
> +	ret = alloc_cq_buf(hr_dev, hr_cq, NULL, 0);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
> +		return ret;
> +	}
> +
> +	ret = alloc_cq_db(hr_dev, hr_cq, NULL, 0, &resp);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
> +		goto err_cq_buf;
> +	}
> +
> +	ret = alloc_cqn(hr_dev, hr_cq, NULL);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
> +		goto err_cq_db;
> +	}
> +
> +	ret = alloc_cqc(hr_dev, hr_cq);
> +	if (ret) {
> +		ibdev_err(ibdev,
> +			  "failed to alloc CQ context, ret = %d.\n", ret);
> +		goto err_cqn;
> +	}
> +
> +	hr_cq->cons_index = 0;
> +	hr_cq->arm_sn = 1;
> +	refcount_set(&hr_cq->refcount, 1);
> +	init_completion(&hr_cq->free);
> +
> +	return 0;
> +
> +err_cqn:
> +	free_cqn(hr_dev, hr_cq->cqn);
> +err_cq_db:
> +	free_cq_db(hr_dev, hr_cq, NULL);
> +err_cq_buf:
> +	free_cq_buf(hr_dev, hr_cq);
>  	return ret;
>  }
>  
> diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> index b869cdc54118..481b30f2f5b5 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> @@ -47,7 +47,6 @@ static const char * const sw_stat_info[] = {
>  	[HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
>  	[HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
>  	[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
> -	[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
>  	[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
>  	[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
>  	[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 3f032b8038af..fdc5f487d7a3 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -902,7 +902,6 @@ enum hns_roce_sw_dfx_stat_index {
>  	HNS_ROCE_DFX_MBX_EVENT_CNT,
>  	HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
> -	HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
>  	HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
> @@ -1295,6 +1294,8 @@ int to_hr_qp_type(int qp_type);
>  
>  int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  		       struct uverbs_attr_bundle *attrs);
> +int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +			    struct uverbs_attr_bundle *attrs);
>  
>  int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
>  int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index a3490bab297a..64de49bf8df7 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -727,6 +727,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
>  	.create_ah = hns_roce_create_ah,
>  	.create_user_ah = hns_roce_create_ah,
>  	.create_cq = hns_roce_create_cq,
> +	.create_user_cq = hns_roce_create_user_cq,
>  	.create_qp = hns_roce_create_qp,
>  	.dealloc_pd = hns_roce_dealloc_pd,
>  	.dealloc_ucontext = hns_roce_dealloc_ucontext,
>
Re: [PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month ago
On Thu, Feb 26, 2026 at 02:17:38PM +0800, Junxian Huang wrote:
> 
> 
> On 2026/2/13 18:58, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > Split CQ creation into distinct kernel and user flows. The hns driver,
> > inherited from mlx4, uses a problematic pattern that shares and caches
> > umem in hns_roce_db_map_user(). This design blocks the driver from
> > supporting generic umem sources (VMA, dmabuf, memfd, and others).
> > 
> > In addition, let's delete counter that counts CQ creation errors. There
> > are multiple ways to debug kernel in modern kernel without need to rely
> > on that debugfs counter.
> > 
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> >  drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
> >  drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
> >  drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
> >  drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
> >  4 files changed, 82 insertions(+), 26 deletions(-)

<...>

> > +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> > +		       struct uverbs_attr_bundle *attrs)
> > +{
> > +	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
> > +	struct hns_roce_ib_create_cq_resp resp = {};
> > +	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
> > +	struct ib_device *ibdev = &hr_dev->ib_dev;
> > +	struct hns_roce_ib_create_cq ucmd = {};
> 
> ucmd and resp are not needed since we don't have udata here.

Thanks, will fix.

> 
> Junxian
Re: [PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Cheng Xu 1 month, 1 week ago

On 2/13/26 6:58 PM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Split CQ creation into distinct kernel and user flows. The hns driver,
> inherited from mlx4, uses a problematic pattern that shares and caches
> umem in hns_roce_db_map_user(). This design blocks the driver from
> supporting generic umem sources (VMA, dmabuf, memfd, and others).
> 
> In addition, let's delete counter that counts CQ creation errors. There
> are multiple ways to debug kernel in modern kernel without need to rely
> on that debugfs counter.
> 
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
>  drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
>  drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
>  drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
>  4 files changed, 82 insertions(+), 26 deletions(-)
> 

Hi Leon,

The driver name in this patch's title should be "RDMA/hns".

Thanks,
Cheng Xu

> diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
> index 857a913326cd..0f24a916466b 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_cq.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
> @@ -335,7 +335,10 @@ static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
>  {
>  	struct ib_device *ibdev = &hr_dev->ib_dev;
>  
> -	if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
> +	if (attr->flags)
> +		return -EOPNOTSUPP;
> +
> +	if (attr->cqe > hr_dev->caps.max_cqes) {
>  		ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
>  			  attr->cqe, hr_dev->caps.max_cqes);
>  		return -EINVAL;
> @@ -407,8 +410,8 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
>  	return 0;
>  }
>  
> -int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> -		       struct uverbs_attr_bundle *attrs)
> +int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +			    struct uverbs_attr_bundle *attrs)
>  {
>  	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
>  	struct ib_udata *udata = &attrs->driver_udata;
> @@ -418,31 +421,27 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  	struct hns_roce_ib_create_cq ucmd = {};
>  	int ret;
>  
> -	if (attr->flags) {
> -		ret = -EOPNOTSUPP;
> -		goto err_out;
> -	}
> +	if (ib_cq->umem)
> +		return -EOPNOTSUPP;
>  
>  	ret = verify_cq_create_attr(hr_dev, attr);
>  	if (ret)
> -		goto err_out;
> +		return ret;
>  
> -	if (udata) {
> -		ret = get_cq_ucmd(hr_cq, udata, &ucmd);
> -		if (ret)
> -			goto err_out;
> -	}
> +	ret = get_cq_ucmd(hr_cq, udata, &ucmd);
> +	if (ret)
> +		return ret;
>  
>  	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
>  
>  	ret = set_cqe_size(hr_cq, udata, &ucmd);
>  	if (ret)
> -		goto err_out;
> +		return ret;
>  
>  	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
>  	if (ret) {
>  		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
> -		goto err_out;
> +		return ret;
>  	}
>  
>  	ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
> @@ -464,13 +463,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  		goto err_cqn;
>  	}
>  
> -	if (udata) {
> -		resp.cqn = hr_cq->cqn;
> -		ret = ib_copy_to_udata(udata, &resp,
> -				       min(udata->outlen, sizeof(resp)));
> -		if (ret)
> -			goto err_cqc;
> -	}
> +	resp.cqn = hr_cq->cqn;
> +	ret = ib_copy_to_udata(udata, &resp,
> +			       min(udata->outlen, sizeof(resp)));
> +	if (ret)
> +		goto err_cqc;
>  
>  	hr_cq->cons_index = 0;
>  	hr_cq->arm_sn = 1;
> @@ -487,9 +484,67 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  	free_cq_db(hr_dev, hr_cq, udata);
>  err_cq_buf:
>  	free_cq_buf(hr_dev, hr_cq);
> -err_out:
> -	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
> +	return ret;
> +}
> +
> +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +		       struct uverbs_attr_bundle *attrs)
> +{
> +	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
> +	struct hns_roce_ib_create_cq_resp resp = {};
> +	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct hns_roce_ib_create_cq ucmd = {};
> +	int ret;
> +
> +	ret = verify_cq_create_attr(hr_dev, attr);
> +	if (ret)
> +		return ret;
> +
> +	set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
> +
> +	ret = set_cqe_size(hr_cq, NULL, &ucmd);
> +	if (ret)
> +		return ret;
>  
> +	ret = alloc_cq_buf(hr_dev, hr_cq, NULL, 0);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
> +		return ret;
> +	}
> +
> +	ret = alloc_cq_db(hr_dev, hr_cq, NULL, 0, &resp);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
> +		goto err_cq_buf;
> +	}
> +
> +	ret = alloc_cqn(hr_dev, hr_cq, NULL);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
> +		goto err_cq_db;
> +	}
> +
> +	ret = alloc_cqc(hr_dev, hr_cq);
> +	if (ret) {
> +		ibdev_err(ibdev,
> +			  "failed to alloc CQ context, ret = %d.\n", ret);
> +		goto err_cqn;
> +	}
> +
> +	hr_cq->cons_index = 0;
> +	hr_cq->arm_sn = 1;
> +	refcount_set(&hr_cq->refcount, 1);
> +	init_completion(&hr_cq->free);
> +
> +	return 0;
> +
> +err_cqn:
> +	free_cqn(hr_dev, hr_cq->cqn);
> +err_cq_db:
> +	free_cq_db(hr_dev, hr_cq, NULL);
> +err_cq_buf:
> +	free_cq_buf(hr_dev, hr_cq);
>  	return ret;
>  }
>  
> diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> index b869cdc54118..481b30f2f5b5 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
> @@ -47,7 +47,6 @@ static const char * const sw_stat_info[] = {
>  	[HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
>  	[HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
>  	[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
> -	[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
>  	[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
>  	[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
>  	[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 3f032b8038af..fdc5f487d7a3 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -902,7 +902,6 @@ enum hns_roce_sw_dfx_stat_index {
>  	HNS_ROCE_DFX_MBX_EVENT_CNT,
>  	HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
> -	HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
>  	HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
>  	HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
> @@ -1295,6 +1294,8 @@ int to_hr_qp_type(int qp_type);
>  
>  int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
>  		       struct uverbs_attr_bundle *attrs);
> +int hns_roce_create_user_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
> +			    struct uverbs_attr_bundle *attrs);
>  
>  int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
>  int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index a3490bab297a..64de49bf8df7 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -727,6 +727,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
>  	.create_ah = hns_roce_create_ah,
>  	.create_user_ah = hns_roce_create_ah,
>  	.create_cq = hns_roce_create_cq,
> +	.create_user_cq = hns_roce_create_user_cq,
>  	.create_qp = hns_roce_create_qp,
>  	.dealloc_pd = hns_roce_dealloc_pd,
>  	.dealloc_ucontext = hns_roce_dealloc_ucontext,
> 
Re: [PATCH rdma-next 26/50] RDMA/erdma: Separate user and kernel CQ creation paths
Posted by Leon Romanovsky 1 month, 1 week ago
On Tue, Feb 24, 2026 at 10:20:39AM +0800, Cheng Xu wrote:
> 
> 
> On 2/13/26 6:58 PM, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > Split CQ creation into distinct kernel and user flows. The hns driver,
> > inherited from mlx4, uses a problematic pattern that shares and caches
> > umem in hns_roce_db_map_user(). This design blocks the driver from
> > supporting generic umem sources (VMA, dmabuf, memfd, and others).
> > 
> > In addition, let's delete counter that counts CQ creation errors. There
> > are multiple ways to debug kernel in modern kernel without need to rely
> > on that debugfs counter.
> > 
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> >  drivers/infiniband/hw/hns/hns_roce_cq.c      | 103 ++++++++++++++++++++-------
> >  drivers/infiniband/hw/hns/hns_roce_debugfs.c |   1 -
> >  drivers/infiniband/hw/hns/hns_roce_device.h  |   3 +-
> >  drivers/infiniband/hw/hns/hns_roce_main.c    |   1 +
> >  4 files changed, 82 insertions(+), 26 deletions(-)
> > 
> 
> Hi Leon,
> 
> The driver name in this patch's title should be "RDMA/hns".

Right, thanks