From nobody Thu Apr 2 22:08:38 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 362DA35A92E; Fri, 13 Feb 2026 11:00:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770980419; cv=none; b=OjpBJ3oTVbGJd5Zg7XA+Qa7qzmeZVkXm+bPQBukhVsX3Cc2LmTcKsgYXJBePWGdz/hcwyUfMfUA2eEYKxHKOyHEg8vFqRtD3RN6kV/VROS81IjpU6yuCl7q07mH/WM2Q51JIgNve+TgV3gqlpXufTnS/7Tpk1ZMvA6XnY0ansjE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770980419; c=relaxed/simple; bh=wEJW/ElIiQ34G6JWKBLls/fM/jLLfH5WfW9UCF7m7dI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=RfePdnDB5T2sIqf/Cv9aq1N3zfp93zyW1E5sJ4cUCG+Qu7l6d/QqT48iJ8daJA0wXWVwWOwvBl9E/uCHQ41g8TgqKXABdKHcaJ1q8GLbMOTMGyKYzdaObVQ26Bknp1fCkKgjx7T3Czi4ZvhLCecBnTFemE1xZbRPfqxMFnEi8Uw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=fMLKpf/7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="fMLKpf/7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 593EBC19423; Fri, 13 Feb 2026 11:00:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1770980419; bh=wEJW/ElIiQ34G6JWKBLls/fM/jLLfH5WfW9UCF7m7dI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=fMLKpf/7/DGGNTwjtT84VCiRUmzrundAgAKbLg3252wShyh+g9Z5RXK+TznVf0z8Q f8FpqPC7svm1cGb/YHie37HkLtQpO9V5n6IvXJQ4VI4aew7iju07gFxqSIOtn4/LYK /GSKzpKsTxNVoLIJr9gySv6MHZg5qAwLkYsDVhew98XJeNt7QQtxT3QGCXTKkhyq1f rwsaUS6fD+4Dgf9KFUYr4NqnXeZ0Xy9UWbMoEVvkDuTCUP6M3wIapfSR5y0pBLOtUX Jqb6Lmb6zE8ot6+treUWSO0PstkwWTG9ijxDDh3B+1GFrNu9ttt6KOwyIJ3tFdCkBD eJ8z2CR+HKXPA== From: Leon Romanovsky To: Jason Gunthorpe , Leon Romanovsky , Selvin Xavier , Kalesh AP , Potnuri Bharat Teja , Michael Margolin , Gal Pressman , Yossi Leybovich , Cheng Xu , Kai Shen , Chengchang Tang , Junxian Huang , Abhijit Gangurde , Allen Hubbe , Krzysztof Czurylo , Tatyana Nikolova , Long Li , Konstantin Taranov , Yishai Hadas , Michal Kalderon , Bryan Tan , Vishnu Dasa , Broadcom internal kernel review list , Christian Benvenuti , Nelson Escobar , Dennis Dalessandro , Bernard Metzler , Zhu Yanjun Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org Subject: [PATCH rdma-next 23/50] RDMA/irdma: Split user and kernel CQ creation paths Date: Fri, 13 Feb 2026 12:57:59 +0200 Message-ID: <20260213-refactor-umem-v1-23-f3be85847922@nvidia.com> X-Mailer: git-send-email 2.53.0 In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com> References: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" X-Mailer: b4 0.15-dev-47773 Content-Transfer-Encoding: quoted-printable From: Leon Romanovsky Separate the CQ creation logic into distinct kernel and user flows. Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 310 +++++++++++++++++++++++---------= ---- 1 file changed, 195 insertions(+), 115 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/ir= dma/verbs.c index cf8d19150574..f2b3cfe125af 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2461,15 +2461,9 @@ static inline int cq_validate_flags(u32 flags, u8 hw= _rev) return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0; } =20 -/** - * irdma_create_cq - create cq - * @ibcq: CQ allocated - * @attr: attributes for cq - * @attrs: uverbs attribute bundle - */ -static int irdma_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct uverbs_attr_bundle *attrs) +static int irdma_create_user_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs) { #define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req= , user_cq_buf) #define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_re= sp, cq_size) @@ -2489,14 +2483,22 @@ static int irdma_create_cq(struct ib_cq *ibcq, int err_code; int entries =3D attr->cqe; bool cqe_64byte_ena; - u8 cqe_size; + struct irdma_ucontext *ucontext; + struct irdma_create_cq_req req =3D {}; + struct irdma_cq_mr *cqmr; + struct irdma_pbl *iwpbl; + struct irdma_pbl *iwpbl_shadow; + struct irdma_cq_mr *cqmr_shadow; + + if (ibcq->umem) + return -EOPNOTSUPP; =20 err_code =3D cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev= ); if (err_code) return err_code; =20 - if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || - udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) + if (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || + udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN) return -EINVAL; =20 err_code =3D irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, @@ -2516,7 +2518,6 @@ static int irdma_create_cq(struct ib_cq *ibcq, ukinfo->cq_id =3D cq_num; cqe_64byte_ena =3D dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_6= 4_BYTE_CQE ? true : false; - cqe_size =3D cqe_64byte_ena ? 64 : 32; ukinfo->avoid_mem_cflct =3D cqe_64byte_ena; iwcq->ibcq.cqe =3D info.cq_uk_init_info.cq_size; if (attr->comp_vector < rf->ceqs_count) @@ -2526,110 +2527,203 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.type =3D IRDMA_CQ_TYPE_IWARP; info.vsi =3D &iwdev->vsi; =20 - if (udata) { - struct irdma_ucontext *ucontext; - struct irdma_create_cq_req req =3D {}; - struct irdma_cq_mr *cqmr; - struct irdma_pbl *iwpbl; - struct irdma_pbl *iwpbl_shadow; - struct irdma_cq_mr *cqmr_shadow; - - iwcq->user_mode =3D true; - ucontext =3D - rdma_udata_to_drv_context(udata, struct irdma_ucontext, - ibucontext); - if (ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen))) { - err_code =3D -EFAULT; - goto cq_free_rsrc; - } + iwcq->user_mode =3D true; + ucontext =3D + rdma_udata_to_drv_context(udata, struct irdma_ucontext, + ibucontext); + if (ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen))) { + err_code =3D -EFAULT; + goto cq_free_rsrc; + } =20 + spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); + iwpbl =3D irdma_get_pbl((unsigned long)req.user_cq_buf, + &ucontext->cq_reg_mem_list); + spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); + if (!iwpbl) { + err_code =3D -EPROTO; + goto cq_free_rsrc; + } + + cqmr =3D &iwpbl->cq_mr; + + if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & + IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) { spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - iwpbl =3D irdma_get_pbl((unsigned long)req.user_cq_buf, - &ucontext->cq_reg_mem_list); + iwpbl_shadow =3D irdma_get_pbl( + (unsigned long)req.user_shadow_area, + &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); - if (!iwpbl) { + + if (!iwpbl_shadow) { err_code =3D -EPROTO; goto cq_free_rsrc; } + cqmr_shadow =3D &iwpbl_shadow->cq_mr; + info.shadow_area_pa =3D cqmr_shadow->cq_pbl.addr; + cqmr->split =3D true; + } else { + info.shadow_area_pa =3D cqmr->shadow; + } + if (iwpbl->pbl_allocated) { + info.virtual_map =3D true; + info.pbl_chunk_size =3D 1; + info.first_pm_pbl_idx =3D cqmr->cq_pbl.idx; + } else { + info.cq_base_pa =3D cqmr->cq_pbl.addr; + } =20 - cqmr =3D &iwpbl->cq_mr; + info.shadow_read_threshold =3D min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); =20 - if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & - IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) { - spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - iwpbl_shadow =3D irdma_get_pbl( - (unsigned long)req.user_shadow_area, - &ucontext->cq_reg_mem_list); - spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); + if (irdma_sc_cq_init(cq, &info)) { + ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); + err_code =3D -EPROTO; + goto cq_free_rsrc; + } =20 - if (!iwpbl_shadow) { - err_code =3D -EPROTO; - goto cq_free_rsrc; - } - cqmr_shadow =3D &iwpbl_shadow->cq_mr; - info.shadow_area_pa =3D cqmr_shadow->cq_pbl.addr; - cqmr->split =3D true; - } else { - info.shadow_area_pa =3D cqmr->shadow; - } - if (iwpbl->pbl_allocated) { - info.virtual_map =3D true; - info.pbl_chunk_size =3D 1; - info.first_pm_pbl_idx =3D cqmr->cq_pbl.idx; - } else { - info.cq_base_pa =3D cqmr->cq_pbl.addr; - } - } else { - /* Kmode allocations */ - int rsize; + cqp_request =3D irdma_alloc_and_get_cqp_request(&rf->cqp, true); + if (!cqp_request) { + err_code =3D -ENOMEM; + goto cq_free_rsrc; + } =20 - if (entries < 1 || entries > rf->max_cqe) { - err_code =3D -EINVAL; - goto cq_free_rsrc; - } + cqp_info =3D &cqp_request->info; + cqp_info->cqp_cmd =3D IRDMA_OP_CQ_CREATE; + cqp_info->post_sq =3D 1; + cqp_info->in.u.cq_create.cq =3D cq; + cqp_info->in.u.cq_create.check_overflow =3D true; + cqp_info->in.u.cq_create.scratch =3D (uintptr_t)cqp_request; + err_code =3D irdma_handle_cqp_op(rf, cqp_request); + irdma_put_cqp_request(&rf->cqp, cqp_request); + if (err_code) + goto cq_free_rsrc; =20 - entries +=3D 2; - if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >=3D IRDMA_GEN_2) - entries *=3D 2; + struct irdma_create_cq_resp resp =3D {}; =20 - if (entries & 1) - entries +=3D 1; /* cq size must be an even number */ + resp.cq_id =3D info.cq_uk_init_info.cq_id; + resp.cq_size =3D info.cq_uk_init_info.cq_size; + if (ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen))) { + ibdev_dbg(&iwdev->ibdev, + "VERBS: copy to user data\n"); + err_code =3D -EPROTO; + goto cq_destroy; + } =20 - if (entries * cqe_size =3D=3D IRDMA_HW_PAGE_SIZE) - entries +=3D 2; + init_completion(&iwcq->free_cq); =20 - ukinfo->cq_size =3D entries; + /* Populate table entry after CQ is fully created. */ + smp_store_release(&rf->cq_table[cq_num], iwcq); =20 - if (cqe_64byte_ena) - rsize =3D info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_c= qe); - else - rsize =3D info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); - iwcq->kmem.size =3D ALIGN(round_up(rsize, 256), 256); - iwcq->kmem.va =3D dma_alloc_coherent(dev->hw->device, - iwcq->kmem.size, - &iwcq->kmem.pa, GFP_KERNEL); - if (!iwcq->kmem.va) { - err_code =3D -ENOMEM; - goto cq_free_rsrc; - } + return 0; +cq_destroy: + irdma_cq_wq_destroy(rf, cq); +cq_free_rsrc: + irdma_cq_free_rsrc(rf, iwcq); =20 - iwcq->kmem_shadow.size =3D ALIGN(IRDMA_SHADOW_AREA_SIZE << 3, - 64); - iwcq->kmem_shadow.va =3D dma_alloc_coherent(dev->hw->device, - iwcq->kmem_shadow.size, - &iwcq->kmem_shadow.pa, - GFP_KERNEL); - if (!iwcq->kmem_shadow.va) { - err_code =3D -ENOMEM; - goto cq_free_rsrc; - } - info.shadow_area_pa =3D iwcq->kmem_shadow.pa; - ukinfo->shadow_area =3D iwcq->kmem_shadow.va; - ukinfo->cq_base =3D iwcq->kmem.va; - info.cq_base_pa =3D iwcq->kmem.pa; + return err_code; +} + +static int irdma_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct ib_device *ibdev =3D ibcq->device; + struct irdma_device *iwdev =3D to_iwdev(ibdev); + struct irdma_pci_f *rf =3D iwdev->rf; + struct irdma_cq *iwcq =3D to_iwcq(ibcq); + u32 cq_num =3D 0; + struct irdma_sc_cq *cq; + struct irdma_sc_dev *dev =3D &rf->sc_dev; + struct irdma_cq_init_info info =3D {}; + struct irdma_cqp_request *cqp_request; + struct cqp_cmds_info *cqp_info; + struct irdma_cq_uk_init_info *ukinfo =3D &info.cq_uk_init_info; + int err_code; + int entries =3D attr->cqe; + bool cqe_64byte_ena; + u8 cqe_size; + int rsize; + + err_code =3D cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev= ); + if (err_code) + return err_code; + + err_code =3D irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, + &rf->next_cq); + if (err_code) + return err_code; + + cq =3D &iwcq->sc_cq; + cq->back_cq =3D iwcq; + refcount_set(&iwcq->refcnt, 1); + spin_lock_init(&iwcq->lock); + INIT_LIST_HEAD(&iwcq->resize_list); + INIT_LIST_HEAD(&iwcq->cmpl_generated); + iwcq->cq_num =3D cq_num; + info.dev =3D dev; + ukinfo->cq_size =3D max(entries, 4); + ukinfo->cq_id =3D cq_num; + cqe_64byte_ena =3D dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_6= 4_BYTE_CQE ? + true : false; + cqe_size =3D cqe_64byte_ena ? 64 : 32; + ukinfo->avoid_mem_cflct =3D cqe_64byte_ena; + iwcq->ibcq.cqe =3D info.cq_uk_init_info.cq_size; + if (attr->comp_vector < rf->ceqs_count) + info.ceq_id =3D attr->comp_vector; + info.ceq_id_valid =3D true; + info.ceqe_mask =3D 1; + info.type =3D IRDMA_CQ_TYPE_IWARP; + info.vsi =3D &iwdev->vsi; + + /* Kmode allocations */ + if (entries < 1 || entries > rf->max_cqe) { + err_code =3D -EINVAL; + goto cq_free_rsrc; } =20 + entries +=3D 2; + if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >=3D IRDMA_GEN_2) + entries *=3D 2; + + if (entries & 1) + entries +=3D 1; /* cq size must be an even number */ + + if (entries * cqe_size =3D=3D IRDMA_HW_PAGE_SIZE) + entries +=3D 2; + + ukinfo->cq_size =3D entries; + + if (cqe_64byte_ena) + rsize =3D info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cq= e); + else + rsize =3D info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); + iwcq->kmem.size =3D ALIGN(round_up(rsize, 256), 256); + iwcq->kmem.va =3D dma_alloc_coherent(dev->hw->device, + iwcq->kmem.size, + &iwcq->kmem.pa, GFP_KERNEL); + if (!iwcq->kmem.va) { + err_code =3D -ENOMEM; + goto cq_free_rsrc; + } + + iwcq->kmem_shadow.size =3D ALIGN(IRDMA_SHADOW_AREA_SIZE << 3, + 64); + iwcq->kmem_shadow.va =3D dma_alloc_coherent(dev->hw->device, + iwcq->kmem_shadow.size, + &iwcq->kmem_shadow.pa, + GFP_KERNEL); + if (!iwcq->kmem_shadow.va) { + err_code =3D -ENOMEM; + goto cq_free_rsrc; + } + info.shadow_area_pa =3D iwcq->kmem_shadow.pa; + ukinfo->shadow_area =3D iwcq->kmem_shadow.va; + ukinfo->cq_base =3D iwcq->kmem.va; + info.cq_base_pa =3D iwcq->kmem.pa; + info.shadow_read_threshold =3D min(info.cq_uk_init_info.cq_size / 2, (u32)IRDMA_MAX_CQ_READ_THRESH); =20 @@ -2656,28 +2750,13 @@ static int irdma_create_cq(struct ib_cq *ibcq, if (err_code) goto cq_free_rsrc; =20 - if (udata) { - struct irdma_create_cq_resp resp =3D {}; - - resp.cq_id =3D info.cq_uk_init_info.cq_id; - resp.cq_size =3D info.cq_uk_init_info.cq_size; - if (ib_copy_to_udata(udata, &resp, - min(sizeof(resp), udata->outlen))) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: copy to user data\n"); - err_code =3D -EPROTO; - goto cq_destroy; - } - } - init_completion(&iwcq->free_cq); =20 /* Populate table entry after CQ is fully created. */ smp_store_release(&rf->cq_table[cq_num], iwcq); =20 return 0; -cq_destroy: - irdma_cq_wq_destroy(rf, cq); + cq_free_rsrc: irdma_cq_free_rsrc(rf, iwcq); =20 @@ -5355,6 +5434,7 @@ static const struct ib_device_ops irdma_dev_ops =3D { .alloc_pd =3D irdma_alloc_pd, .alloc_ucontext =3D irdma_alloc_ucontext, .create_cq =3D irdma_create_cq, + .create_user_cq =3D irdma_create_user_cq, .create_qp =3D irdma_create_qp, .dealloc_driver =3D irdma_ib_dealloc_device, .dealloc_mw =3D irdma_dealloc_mw, --=20 2.52.0