From nobody Thu Apr 2 22:12:32 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7F0FA32C924; Fri, 13 Feb 2026 10:59:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770980373; cv=none; b=CGeTkmcq5kJI0VG0wB2V9zI9Lf0UQEK8cbDRmrYlgIFbu1oZmf6W9KnZejcVfNhYhyb4V2v8y0zRFBpNDzTMweiZ4jzhVdlW4PdKBtTLHO9scxipAiLs0DZubH99E3abIDpV9fQ7G9MmAGaYCZqqTzZcK9K2wx9P7GkTuIh/8vE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770980373; c=relaxed/simple; bh=60pBjLgyboHd3chw+dOmTg8slowfyyIq1cBDoIGl9YQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=Gh5tDKzcnBggCcAf3tA2DbVtyXKGrC4DQa1cxVlM6xL5Wh/SvMPtTp7h946vSNFWpLZsxHLiWVZ2XApsbc01Mgu3PwOqR6Uuv/X8Zy3Y25wvcUgkEmyRW0VDRnBCXLl6iDt5HyGlxRD1yuuGFhxmqMXBP6mEaE69Z39U2gIJHEQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Q72EEKBc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Q72EEKBc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 56216C116C6; Fri, 13 Feb 2026 10:59:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1770980373; bh=60pBjLgyboHd3chw+dOmTg8slowfyyIq1cBDoIGl9YQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Q72EEKBcADy6oOsvCnhYXIRmuV8O5u/uD5csUGS1ZSsCCecFJqx2ybun50/nzbDKw 0yB9TAjOVHrAnPh3SBcK+j1VB1ufYh32oHgVVI5oowKtwcidPP5KR15apM3NP0JAwh Tp5AoKo5OzX5ImYDMreAfmHSg/WbLUCeCa8NufXMvxlZRiGjUt5lVEnJCpm6y2Cw2W we/pcBDFUZxpAcilARq/rrPHF/lSpy7l0YX+XLlRxLHqZtzOnSOaPAxldReJrDEAfM w1aWGUGpnMWYIGcYl+bvvoI3XVH/hbaecyKSehc9uJ4CWohq4vr0M9xjamN86BpZg8 l5VQBJemJ9MPw== From: Leon Romanovsky To: Jason Gunthorpe , Leon Romanovsky , Selvin Xavier , Kalesh AP , Potnuri Bharat Teja , Michael Margolin , Gal Pressman , Yossi Leybovich , Cheng Xu , Kai Shen , Chengchang Tang , Junxian Huang , Abhijit Gangurde , Allen Hubbe , Krzysztof Czurylo , Tatyana Nikolova , Long Li , Konstantin Taranov , Yishai Hadas , Michal Kalderon , Bryan Tan , Vishnu Dasa , Broadcom internal kernel review list , Christian Benvenuti , Nelson Escobar , Dennis Dalessandro , Bernard Metzler , Zhu Yanjun Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org Subject: [PATCH rdma-next 11/50] RDMA/mlx5: Provide a modern CQ creation interface Date: Fri, 13 Feb 2026 12:57:47 +0200 Message-ID: <20260213-refactor-umem-v1-11-f3be85847922@nvidia.com> X-Mailer: git-send-email 2.53.0 In-Reply-To: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com> References: <20260213-refactor-umem-v1-0-f3be85847922@nvidia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" X-Mailer: b4 0.15-dev-47773 Content-Transfer-Encoding: quoted-printable From: Leon Romanovsky The uverbs CQ creation UAPI allows users to supply their own umem for a CQ. Update mlx5 to support this workflow while preserving support for creating umem through the legacy interface. Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 154 +++++++++++++++++++++++--------= ---- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 + 3 files changed, 107 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/c= q.c index 1b4290166e87..52a435efd0de 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -749,16 +749,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, st= ruct ib_udata *udata, =20 *cqe_size =3D ucmd.cqe_size; =20 - cq->buf.umem =3D - ib_umem_get(&dev->ib_dev, ucmd.buf_addr, - entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(cq->buf.umem)) { - err =3D PTR_ERR(cq->buf.umem); - return err; - } + if (!cq->ibcq.umem) + cq->ibcq.umem =3D ib_umem_get(&dev->ib_dev, ucmd.buf_addr, + entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->ibcq.umem)) + return PTR_ERR(cq->ibcq.umem); =20 page_size =3D mlx5_umem_find_best_cq_quantized_pgoff( - cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + cq->ibcq.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, &page_offset_quantized); if (!page_size) { err =3D -EINVAL; @@ -769,12 +768,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, st= ruct ib_udata *udata, if (err) goto err_umem; =20 - ncont =3D ib_umem_num_dma_blocks(cq->buf.umem, page_size); + ncont =3D ib_umem_num_dma_blocks(cq->ibcq.umem, page_size); mlx5_ib_dbg( dev, "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, - ib_umem_num_pages(cq->buf.umem), page_size, ncont); + ib_umem_num_pages(cq->ibcq.umem), page_size, ncont); =20 *inlen =3D MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -785,7 +784,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, stru= ct ib_udata *udata, } =20 pas =3D (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); + mlx5_ib_populate_pas(cq->ibcq.umem, page_size, pas, 0); =20 cqc =3D MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, @@ -858,7 +857,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, stru= ct ib_udata *udata, mlx5_ib_db_unmap_user(context, &cq->db); =20 err_umem: - ib_umem_release(cq->buf.umem); + /* UMEM is released by ib_core */ return err; } =20 @@ -868,7 +867,6 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, stru= ct ib_udata *udata) udata, struct mlx5_ib_ucontext, ibucontext); =20 mlx5_ib_db_unmap_user(context, &cq->db); - ib_umem_release(cq->buf.umem); } =20 static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf) @@ -949,8 +947,9 @@ static void notify_soft_wc_handler(struct work_struct *= work) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } =20 -int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *at= tr, - struct uverbs_attr_bundle *attrs) +int mlx5_ib_create_user_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs) { struct ib_udata *udata =3D &attrs->driver_udata; struct ib_device *ibdev =3D ibcq->device; @@ -967,8 +966,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct = ib_cq_init_attr *attr, int eqn; int err; =20 - if (entries < 0 || - (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) + if (attr->cqe > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return -EINVAL; =20 if (check_cq_create_flags(attr->flags)) @@ -981,27 +979,15 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struc= t ib_cq_init_attr *attr, cq->ibcq.cqe =3D entries - 1; mutex_init(&cq->resize_mutex); spin_lock_init(&cq->lock); - cq->resize_buf =3D NULL; - cq->resize_umem =3D NULL; if (attr->flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) cq->private_flags |=3D MLX5_IB_CQ_PR_TIMESTAMP_COMPLETION; INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); =20 - if (udata) { - err =3D create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, - &index, &inlen, attrs); - if (err) - return err; - } else { - cqe_size =3D cache_line_size() =3D=3D 128 ? 128 : 64; - err =3D create_cq_kernel(dev, cq, entries, cqe_size, &cqb, - &index, &inlen); - if (err) - return err; - - INIT_WORK(&cq->notify_work, notify_soft_wc_handler); - } + err =3D create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, &index, + &inlen, attrs); + if (err) + return err; =20 err =3D mlx5_comp_eqn_get(dev->mdev, vector, &eqn); if (err) @@ -1021,12 +1007,8 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const stru= ct ib_cq_init_attr *attr, if (attr->flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); =20 - if (udata) { - cq->mcq.comp =3D mlx5_add_cq_to_tasklet; - cq->mcq.tasklet_ctx.comp =3D mlx5_ib_cq_comp; - } else { - cq->mcq.comp =3D mlx5_ib_cq_comp; - } + cq->mcq.comp =3D mlx5_add_cq_to_tasklet; + cq->mcq.tasklet_ctx.comp =3D mlx5_ib_cq_comp; =20 err =3D mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(= out)); if (err) @@ -1037,12 +1019,10 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const str= uct ib_cq_init_attr *attr, =20 INIT_LIST_HEAD(&cq->wc_list); =20 - if (udata) - if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { - err =3D -EFAULT; - goto err_cmd; - } - + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { + err =3D -EFAULT; + goto err_cmd; + } =20 kvfree(cqb); return 0; @@ -1052,10 +1032,82 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const str= uct ib_cq_init_attr *attr, =20 err_cqb: kvfree(cqb); - if (udata) - destroy_cq_user(cq, udata); - else - destroy_cq_kernel(dev, cq); + destroy_cq_user(cq, udata); + return err; +} + + +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *at= tr, + struct uverbs_attr_bundle *attrs) +{ + struct ib_device *ibdev =3D ibcq->device; + int entries =3D attr->cqe; + int vector =3D attr->comp_vector; + struct mlx5_ib_dev *dev =3D to_mdev(ibdev); + struct mlx5_ib_cq *cq =3D to_mcq(ibcq); + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + int index; + int inlen; + u32 *cqb =3D NULL; + void *cqc; + int cqe_size; + int eqn; + int err; + + if (attr->cqe > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) + return -EINVAL; + + entries =3D roundup_pow_of_two(entries + 1); + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) + return -EINVAL; + + cq->ibcq.cqe =3D entries - 1; + mutex_init(&cq->resize_mutex); + spin_lock_init(&cq->lock); + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); + + cqe_size =3D cache_line_size() =3D=3D 128 ? 128 : 64; + err =3D create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, + &inlen); + if (err) + return err; + + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); + + err =3D mlx5_comp_eqn_get(dev->mdev, vector, &eqn); + if (err) + goto err_cqb; + + cq->cqe_size =3D cqe_size; + + cqc =3D MLX5_ADDR_OF(create_cq_in, cqb, cq_context); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + MLX5_SET(cqc, cqc, uar_page, index); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + + cq->mcq.comp =3D mlx5_ib_cq_comp; + + err =3D mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, + sizeof(out)); + if (err) + goto err_cqb; + + mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); + cq->mcq.event =3D mlx5_ib_cq_event; + + INIT_LIST_HEAD(&cq->wc_list); + kvfree(cqb); + return 0; + +err_cqb: + kvfree(cqb); + destroy_cq_kernel(dev, cq); return err; } =20 @@ -1390,8 +1442,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries= , struct ib_udata *udata) =20 if (udata) { cq->ibcq.cqe =3D entries - 1; - ib_umem_release(cq->buf.umem); - cq->buf.umem =3D cq->resize_umem; + ib_umem_release(cq->ibcq.umem); + cq->ibcq.umem =3D cq->resize_umem; cq->resize_umem =3D NULL; } else { struct mlx5_ib_cq_buf tbuf; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5= /main.c index eba023b7af0f..4f49f65e2c16 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4447,6 +4447,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops =3D= { .check_mr_status =3D mlx5_ib_check_mr_status, .create_ah =3D mlx5_ib_create_ah, .create_cq =3D mlx5_ib_create_cq, + .create_user_cq =3D mlx5_ib_create_user_cq, .create_qp =3D mlx5_ib_create_qp, .create_srq =3D mlx5_ib_create_srq, .create_user_ah =3D mlx5_ib_create_ah, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/m= lx5/mlx5_ib.h index ce3372aea48b..2556e326afde 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1371,6 +1371,9 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int= wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *at= tr, struct uverbs_attr_bundle *attrs); +int mlx5_ib_create_user_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs); int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_pre_destroy_cq(struct ib_cq *cq); --=20 2.52.0