From nobody Fri Dec 19 08:37:19 2025 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 9838E28B7E2; Wed, 7 May 2025 15:59:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=13.77.154.182 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746633548; cv=none; b=FKn3zZICn3UOn2MgSOv76NfTPSmkzWROoDuGl5udj0M2PQwtUtZYI6Ya4S+TO+vZw0Wyv8tu0MhGmH+DCU7BRmRPdLVvYAVRkTcrE2TZWqAW2fJGsfvE+p2D7Aji8yi+9cIphSBq+FK084sqL2t4b+ZCQdrW3OTDgRgUZSesxLU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746633548; c=relaxed/simple; bh=5Htt0XtrGB1HIsbJmKus2qqkNIm8S0mutRTig4SsQms=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References; b=Ylu1Ga0944FWl/ki15uLSxtv6VgDitPeNRnyO8unwRKoRn9pIaTaCcSk6ZjbznzPgDBJ89d4N6ZFjHj+L91ZT2F6EA3QRBVK6xnuE5mjyenjL1MQfJNdFEMv6rb4RzHh7TpOEkPmES1xdZST8AcCqo1IXxbnYAz3Ivb3EmRU7Js= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.microsoft.com; spf=pass smtp.mailfrom=linux.microsoft.com; dkim=pass (1024-bit key) header.d=linux.microsoft.com header.i=@linux.microsoft.com header.b=Wcy/Mvz5; arc=none smtp.client-ip=13.77.154.182 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.microsoft.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.microsoft.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.microsoft.com header.i=@linux.microsoft.com header.b="Wcy/Mvz5" Received: by linux.microsoft.com (Postfix, from userid 1186) id 485B021199CC; Wed, 7 May 2025 08:59:05 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 485B021199CC DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1746633545; bh=VGxGE0RP7EsaST1HSCQAWnMLR4KrGknrkI/BU2X2CRE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Wcy/Mvz5xQ1uG+ZaapHE7TzsOmtGRKYGj9IfbdoIfa5N46x/KE/rhg3ekZ2yAgNjq /sa1j2v3QZ0ABDCWdBj9S21CnQOUIIAo3+BQ3gtnleK9uYu7jxLf0aDTkm6MAjvJxy PhRcvIn2mNFnfCNcM8j3SzVbiMpisXCurXVxFi4k= From: Konstantin Taranov To: kotaranov@microsoft.com, pabeni@redhat.com, haiyangz@microsoft.com, kys@microsoft.com, edumazet@google.com, kuba@kernel.org, davem@davemloft.net, decui@microsoft.com, wei.liu@kernel.org, longli@microsoft.com, jgg@ziepe.ca, leon@kernel.org Cc: linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org Subject: [PATCH rdma-next v4 2/4] RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic Date: Wed, 7 May 2025 08:59:03 -0700 Message-Id: <1746633545-17653-3-git-send-email-kotaranov@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1746633545-17653-1-git-send-email-kotaranov@linux.microsoft.com> References: <1746633545-17653-1-git-send-email-kotaranov@linux.microsoft.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: Konstantin Taranov Allow mana_ib to be created over ethernet gdma device and over rnic gdma device. The HW has two devices with different capabilities and different use-cases. Initialize required resources depending on the used gdma device. Signed-off-by: Konstantin Taranov --- drivers/infiniband/hw/mana/device.c | 174 +++++++++++++-------------- drivers/infiniband/hw/mana/main.c | 55 ++++++++- drivers/infiniband/hw/mana/mana_ib.h | 6 + 3 files changed, 138 insertions(+), 97 deletions(-) diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/ma= na/device.c index b310893..165c0a1 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device *ad= ev, const struct auxiliary_device_id *id) { struct mana_adev *madev =3D container_of(adev, struct mana_adev, adev); + struct gdma_context *gc =3D madev->mdev->gdma_context; + struct mana_context *mc =3D gc->mana.driver_data; struct gdma_dev *mdev =3D madev->mdev; struct net_device *ndev; - struct mana_context *mc; struct mana_ib_dev *dev; u8 mac_addr[ETH_ALEN]; int ret; =20 - mc =3D mdev->driver_data; - dev =3D ib_alloc_device(mana_ib_dev, ib_dev); if (!dev) return -ENOMEM; =20 ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops); - - dev->ib_dev.phys_port_cnt =3D mc->num_ports; - - ibdev_dbg(&dev->ib_dev, "mdev=3D%p id=3D%d num_ports=3D%d\n", mdev, - mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); - dev->ib_dev.node_type =3D RDMA_NODE_IB_CA; - - /* - * num_comp_vectors needs to set to the max MSIX index - * when interrupts and event queues are implemented - */ - dev->ib_dev.num_comp_vectors =3D mdev->gdma_context->max_num_queues; - dev->ib_dev.dev.parent =3D mdev->gdma_context->dev; - - ndev =3D mana_get_primary_netdev(mc, 0, &dev->dev_tracker); - if (!ndev) { - ret =3D -ENODEV; - ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); - goto free_ib_device; - } - ether_addr_copy(mac_addr, ndev->dev_addr); - addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); - ret =3D ib_device_set_netdev(&dev->ib_dev, ndev, 1); - /* mana_get_primary_netdev() returns ndev with refcount held */ - netdev_put(ndev, &dev->dev_tracker); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); - goto free_ib_device; - } - - ret =3D mana_gd_register_device(&mdev->gdma_context->mana_ib); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to register device, ret %d", - ret); - goto free_ib_device; - } - dev->gdma_dev =3D &mdev->gdma_context->mana_ib; - - dev->nb.notifier_call =3D mana_ib_netdev_event; - ret =3D register_netdevice_notifier(&dev->nb); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", - ret); - goto deregister_device; - } - - ret =3D mana_ib_gd_query_adapter_caps(dev); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", - ret); - goto deregister_net_notifier; - } - - ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); - - ret =3D mana_ib_create_eqs(dev); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); - goto deregister_net_notifier; - } - - ret =3D mana_ib_gd_create_rnic_adapter(dev); - if (ret) - goto destroy_eqs; - + dev->ib_dev.num_comp_vectors =3D gc->max_num_queues; + dev->ib_dev.dev.parent =3D gc->dev; + dev->gdma_dev =3D mdev; xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ); - ret =3D mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr); - if (ret) { - ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", - ret); - goto destroy_rnic; + + if (mana_ib_is_rnic(dev)) { + dev->ib_dev.phys_port_cnt =3D 1; + ndev =3D mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + if (!ndev) { + ret =3D -ENODEV; + ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); + goto free_ib_device; + } + ether_addr_copy(mac_addr, ndev->dev_addr); + addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); + ret =3D ib_device_set_netdev(&dev->ib_dev, ndev, 1); + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); + goto free_ib_device; + } + + dev->nb.notifier_call =3D mana_ib_netdev_event; + ret =3D register_netdevice_notifier(&dev->nb); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", + ret); + goto free_ib_device; + } + + ret =3D mana_ib_gd_query_adapter_caps(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret); + goto deregister_net_notifier; + } + + ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); + + ret =3D mana_ib_create_eqs(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); + goto deregister_net_notifier; + } + + ret =3D mana_ib_gd_create_rnic_adapter(dev); + if (ret) + goto destroy_eqs; + + ret =3D mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret); + goto destroy_rnic; + } + } else { + dev->ib_dev.phys_port_cnt =3D mc->num_ports; + ret =3D mana_eth_query_adapter_caps(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret); + goto free_ib_device; + } } =20 - dev->av_pool =3D dma_pool_create("mana_ib_av", mdev->gdma_context->dev, - MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0); + dev->av_pool =3D dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SI= ZE, + MANA_AV_BUFFER_SIZE, 0); if (!dev->av_pool) { ret =3D -ENOMEM; goto destroy_rnic; } =20 - ret =3D ib_register_device(&dev->ib_dev, "mana_%d", - mdev->gdma_context->dev); + ibdev_dbg(&dev->ib_dev, "mdev=3D%p id=3D%d num_ports=3D%d\n", mdev, + mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); + + ret =3D ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d"= : "manae_%d", + gc->dev); if (ret) goto deallocate_pool; =20 @@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device *ade= v, deallocate_pool: dma_pool_destroy(dev->av_pool); destroy_rnic: - xa_destroy(&dev->qp_table_wq); - mana_ib_gd_destroy_rnic_adapter(dev); + if (mana_ib_is_rnic(dev)) + mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: - mana_ib_destroy_eqs(dev); + if (mana_ib_is_rnic(dev)) + mana_ib_destroy_eqs(dev); deregister_net_notifier: - unregister_netdevice_notifier(&dev->nb); -deregister_device: - mana_gd_deregister_device(dev->gdma_dev); + if (mana_ib_is_rnic(dev)) + unregister_netdevice_notifier(&dev->nb); free_ib_device: + xa_destroy(&dev->qp_table_wq); ib_dealloc_device(&dev->ib_dev); return ret; } @@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device *a= dev) =20 ib_unregister_device(&dev->ib_dev); dma_pool_destroy(dev->av_pool); + if (mana_ib_is_rnic(dev)) { + mana_ib_gd_destroy_rnic_adapter(dev); + mana_ib_destroy_eqs(dev); + unregister_netdevice_notifier(&dev->nb); + } xa_destroy(&dev->qp_table_wq); - mana_ib_gd_destroy_rnic_adapter(dev); - mana_ib_destroy_eqs(dev); - unregister_netdevice_notifier(&dev->nb); - mana_gd_deregister_device(dev->gdma_dev); ib_dealloc_device(&dev->ib_dev); } =20 static const struct auxiliary_device_id mana_id_table[] =3D { - { - .name =3D "mana.rdma", - }, + { .name =3D "mana.rdma", }, + { .name =3D "mana.eth", }, {}, }; =20 MODULE_DEVICE_TABLE(auxiliary, mana_id_table); =20 static struct auxiliary_driver mana_driver =3D { - .name =3D "rdma", .probe =3D mana_ib_probe, .remove =3D mana_ib_remove, .id_table =3D mana_id_table, diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana= /main.c index bb0f685..3837e30 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -4,6 +4,7 @@ */ =20 #include "mana_ib.h" +#include "linux/pci.h" =20 void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, u32 port) @@ -551,6 +552,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct = vm_area_struct *vma) int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { + struct mana_ib_dev *dev =3D container_of(ibdev, struct mana_ib_dev, ib_de= v); struct ib_port_attr attr; int err; =20 @@ -560,10 +562,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibde= v, u32 port_num, =20 immutable->pkey_tbl_len =3D attr.pkey_tbl_len; immutable->gid_tbl_len =3D attr.gid_tbl_len; - immutable->core_cap_flags =3D RDMA_CORE_PORT_RAW_PACKET; - if (port_num =3D=3D 1) { - immutable->core_cap_flags |=3D RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + if (mana_ib_is_rnic(dev)) { + immutable->core_cap_flags =3D RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; immutable->max_mad_size =3D IB_MGMT_MAD_SIZE; + } else { + immutable->core_cap_flags =3D RDMA_CORE_PORT_RAW_PACKET; } =20 return 0; @@ -572,10 +576,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibde= v, u32 port_num, int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *p= rops, struct ib_udata *uhw) { - struct mana_ib_dev *dev =3D container_of(ibdev, - struct mana_ib_dev, ib_dev); + struct mana_ib_dev *dev =3D container_of(ibdev, struct mana_ib_dev, ib_de= v); + struct pci_dev *pdev =3D to_pci_dev(mdev_to_gc(dev)->dev); =20 memset(props, 0, sizeof(*props)); + props->vendor_id =3D pdev->vendor; + props->vendor_part_id =3D dev->gdma_dev->dev_id.type; props->max_mr_size =3D MANA_IB_MAX_MR_SIZE; props->page_size_cap =3D dev->adapter_caps.page_size_cap; props->max_qp =3D dev->adapter_caps.max_qp_count; @@ -596,6 +602,8 @@ int mana_ib_query_device(struct ib_device *ibdev, struc= t ib_device_attr *props, props->max_ah =3D INT_MAX; props->max_pkeys =3D 1; props->local_ca_ack_delay =3D MANA_CA_ACK_DELAY; + if (!mana_ib_is_rnic(dev)) + props->raw_packet_caps =3D IB_RAW_PACKET_CAP_IP_CSUM; =20 return 0; } @@ -603,6 +611,7 @@ int mana_ib_query_device(struct ib_device *ibdev, struc= t ib_device_attr *props, int mana_ib_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { + struct mana_ib_dev *dev =3D container_of(ibdev, struct mana_ib_dev, ib_de= v); struct net_device *ndev =3D mana_ib_get_netdev(ibdev, port); =20 if (!ndev) @@ -623,7 +632,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 por= t, props->active_width =3D IB_WIDTH_4X; props->active_speed =3D IB_SPEED_EDR; props->pkey_tbl_len =3D 1; - if (port =3D=3D 1) { + if (mana_ib_is_rnic(dev)) { props->gid_tbl_len =3D 16; props->port_cap_flags =3D IB_PORT_CM_SUP; props->ip_gids =3D true; @@ -703,6 +712,37 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *= dev) return 0; } =20 +int mana_eth_query_adapter_caps(struct mana_ib_dev *dev) +{ + struct mana_ib_adapter_caps *caps =3D &dev->adapter_caps; + struct gdma_query_max_resources_resp resp =3D {}; + struct gdma_general_req req =3D {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, + sizeof(req), sizeof(resp)); + + err =3D mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(r= esp), &resp); + if (err) { + ibdev_err(&dev->ib_dev, + "Failed to query adapter caps err %d", err); + return err; + } + + caps->max_qp_count =3D min_t(u32, resp.max_sq, resp.max_rq); + caps->max_cq_count =3D resp.max_cq; + caps->max_mr_count =3D resp.max_mst; + caps->max_pd_count =3D 0x6000; + caps->max_qp_wr =3D min_t(u32, + 0x100000 / GDMA_MAX_SQE_SIZE, + 0x100000 / GDMA_MAX_RQE_SIZE); + caps->max_send_sge_count =3D 30; + caps->max_recv_sge_count =3D 15; + caps->page_size_cap =3D PAGE_SZ_BM; + + return 0; +} + static void mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *= event) { @@ -921,6 +961,9 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, stru= ct mana_ib_cq *cq, u32 do struct mana_rnic_create_cq_req req =3D {}; int err; =20 + if (!mdev->eqs) + return -EINVAL; + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(res= p)); req.hdr.dev_id =3D gc->mana_ib.dev_id; req.adapter =3D mdev->adapter_handle; diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/m= ana/mana_ib.h index f0dbd90..42bebd6 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -544,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *= qp) complete(&qp->free); } =20 +static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev) +{ + return mdev->gdma_dev->dev_id.type =3D=3D GDMA_DEVICE_MANA_IB; +} + static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibde= v, u32 port) { struct mana_ib_dev *mdev =3D container_of(ibdev, struct mana_ib_dev, ib_d= ev); @@ -643,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port= , int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); =20 int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev); +int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev); =20 int mana_ib_create_eqs(struct mana_ib_dev *mdev); =20 --=20 2.43.0