From nobody Thu Feb 12 03:18:24 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 29D32C77B61 for ; Fri, 28 Apr 2023 14:49:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1345902AbjD1OtN (ORCPT ); Fri, 28 Apr 2023 10:49:13 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46816 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229976AbjD1OtL (ORCPT ); Fri, 28 Apr 2023 10:49:11 -0400 Received: from out-45.mta0.migadu.com (out-45.mta0.migadu.com [IPv6:2001:41d0:1004:224b::2d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 221F126A5 for ; Fri, 28 Apr 2023 07:49:09 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1682693347; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding; bh=YNMbbKrD1uO8+HlRA4/dk4UblIu/ueU/BPvVytbOgl4=; b=taQ1d8UgMZ6btQ6BX0XzU6E8zej475DmJp7Pn8K+OpjF8bA1SSC+IkxARINZ2f1QKbNkgf 36AkLVAtlQNjw01BtNAnfBzvC8ZnXDiPadUM9gCK4VbYH+JEedG+vVsMXbmG+wgE885dzF v2A7c6qjLWt/lZ7A5iT5i7tf3id0iiA= From: Cai Huoqing To: cai.huoqing@linux.dev Cc: Oded Gabbay , Ohad Sharabi , Greg Kroah-Hartman , dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH v2] accel/habanalabs: Make use of rhashtable Date: Fri, 28 Apr 2023 22:48:54 +0800 Message-Id: <20230428144903.26048-1-cai.huoqing@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Using rhashtable to accelerate the search for userptr by address, instead of using a list. Preferably, the lookup complexity of a hash table is O(1). This patch will speedup the method hl_userptr_is_pinned by rhashtable_lookup_fast. Signed-off-by: Cai Huoqing --- v1->v2: Use rhashtable_free_and_destroy in hl_userptr_delete_list. .../habanalabs/common/command_submission.c | 16 ++++++-- drivers/accel/habanalabs/common/habanalabs.h | 19 +++++---- drivers/accel/habanalabs/common/memory.c | 39 +++++++++++-------- drivers/accel/habanalabs/gaudi/gaudi.c | 16 +++++--- drivers/accel/habanalabs/goya/goya.c | 14 ++++--- 5 files changed, 65 insertions(+), 39 deletions(-) diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers= /accel/habanalabs/common/command_submission.c index af9d2e22c6e7..35c2ab934396 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_= cs_job *job) parser.job_id =3D job->id; =20 parser.hw_queue_id =3D job->hw_queue_id; - parser.job_userptr_list =3D &job->userptr_list; + parser.job_userptr_ht =3D &job->userptr_ht; parser.patched_cb =3D NULL; parser.user_cb =3D job->user_cb; parser.user_cb_size =3D job->user_cb_size; @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, str= uct hl_cs_job *job) struct hl_cs *cs =3D job->cs; =20 if (is_cb_patched(hdev, job)) { - hl_userptr_delete_list(hdev, &job->userptr_list); + hl_userptr_delete_list(hdev, &job->userptr_ht); =20 /* * We might arrive here from rollback and patched CB wasn't @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device= *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb) { struct hl_cs_job *job; + int rc; =20 job =3D kzalloc(sizeof(*job), GFP_ATOMIC); if (!job) @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_devi= ce *hdev, job->queue_type =3D queue_type; job->is_kernel_allocated_cb =3D is_kernel_allocated_cb; =20 - if (is_cb_patched(hdev, job)) - INIT_LIST_HEAD(&job->userptr_list); + if (is_cb_patched(hdev, job)) { + rc =3D rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params); + if (rc) + goto free_job; + } =20 if (job->queue_type =3D=3D QUEUE_TYPE_EXT) INIT_WORK(&job->finish_work, job_wq_completion); =20 return job; + +free_job: + kfree(job); + return NULL; } =20 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/h= abanalabs/common/habanalabs.h index eaae69a9f817..9c876d1480d2 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -540,6 +541,8 @@ struct hl_hints_range { u64 end_addr; }; =20 +extern const struct rhashtable_params hl_userptr_rht_params; + /** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr { /** * struct hl_userptr - memory mapping chunk information * @vm_type: type of the VM. - * @job_node: linked-list node for hanging the object on the Job's list. + * @job_node: hashtable node for hanging the object on the Job's list. * @pages: pointer to struct page array * @npages: size of @pages array * @sgt: pointer to the scatter-gather table that holds the pages. @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr { */ struct hl_userptr { enum vm_type vm_type; /* must be first */ - struct list_head job_node; + struct rhash_head job_node; struct page **pages; unsigned int npages; struct sg_table *sgt; @@ -2028,7 +2031,7 @@ struct hl_cs { * @patched_cb: in case of patching, this is internal CB which is submitte= d on * the queue instead of the CB we got from the IOCTL. * @finish_work: workqueue object to run when job is completed. - * @userptr_list: linked-list of userptr mappings that belong to this job = and + * @userptr_ht: hashtable of userptr mappings that belong to this job and * wait for completion. * @debugfs_list: node in debugfs list of command submission jobs. * @refcount: reference counter for usage of the CS job. @@ -2056,7 +2059,7 @@ struct hl_cs_job { struct hl_cb *user_cb; struct hl_cb *patched_cb; struct work_struct finish_work; - struct list_head userptr_list; + struct rhashtable userptr_ht; struct list_head debugfs_list; struct kref refcount; enum hl_queue_type queue_type; @@ -2075,7 +2078,7 @@ struct hl_cs_job { * @user_cb: the CB we got from the user. * @patched_cb: in case of patching, this is internal CB which is submitte= d on * the queue instead of the CB we got from the IOCTL. - * @job_userptr_list: linked-list of userptr mappings that belong to the r= elated + * @job_userptr_ht: hashtable of userptr mappings that belong to the relat= ed * job and wait for completion. * @cs_sequence: the sequence number of the related CS. * @queue_type: the type of the H/W queue this job is submitted to. @@ -2098,7 +2101,7 @@ struct hl_cs_job { struct hl_cs_parser { struct hl_cb *user_cb; struct hl_cb *patched_cb; - struct list_head *job_userptr_list; + struct rhashtable *job_userptr_ht; u64 cs_sequence; enum hl_queue_type queue_type; u32 ctx_id; @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 ad= dr, u64 size, struct hl_userptr *userptr); void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userp= tr); void hl_userptr_delete_list(struct hl_device *hdev, - struct list_head *userptr_list); + struct rhashtable *userptr_ht); bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, - struct list_head *userptr_list, + struct rhashtable *userptr_ht, struct hl_userptr **userptr); =20 int hl_mmu_init(struct hl_device *hdev); diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/haban= alabs/common/memory.c index a7b6a273ce21..fa2104e33639 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF); =20 #define MEM_HANDLE_INVALID ULONG_MAX =20 +const struct rhashtable_params hl_userptr_rht_params =3D { + .head_offset =3D offsetof(struct hl_userptr, job_node), + .key_offset =3D offsetof(struct hl_userptr, addr), + .key_len =3D sizeof(u64), + .automatic_shrinking =3D true, +}; + static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle); =20 @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 ad= dr, u64 size, userptr->size =3D size; userptr->addr =3D addr; userptr->dma_mapped =3D false; - INIT_LIST_HEAD(&userptr->job_node); =20 rc =3D get_user_memory(hdev, addr, size, npages, start, offset, userptr); @@ -2522,32 +2528,32 @@ void hl_unpin_host_memory(struct hl_device *hdev, s= truct hl_userptr *userptr) unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); =20 - list_del(&userptr->job_node); - sg_free_table(userptr->sgt); kfree(userptr->sgt); } =20 +static void hl_userptr_free_cb(void *ptr, void *arg) +{ + struct hl_userptr *userptr =3D ptr; + struct hl_device *hdev =3D (struct hl_device *)arg; + + hl_unpin_host_memory(hdev, userptr); + kfree(userptr); +} + /** * hl_userptr_delete_list() - clear userptr list. * @hdev: pointer to the habanalabs device structure. - * @userptr_list: pointer to the list to clear. + * @userptr_ht: pointer to the hashtable to clear. * * This function does the following: * - Iterates over the list and unpins the host memory and frees the userp= tr * structure. */ void hl_userptr_delete_list(struct hl_device *hdev, - struct list_head *userptr_list) + struct rhashtable *userptr_ht) { - struct hl_userptr *userptr, *tmp; - - list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { - hl_unpin_host_memory(hdev, userptr); - kfree(userptr); - } - - INIT_LIST_HEAD(userptr_list); + rhashtable_free_and_destroy(userptr_ht, hl_userptr_free_cb, hdev); } =20 /** @@ -2555,7 +2561,7 @@ void hl_userptr_delete_list(struct hl_device *hdev, * @hdev: pointer to the habanalabs device structure. * @addr: user address to check. * @size: user block size to check. - * @userptr_list: pointer to the list to clear. + * @userptr_ht: pointer to the hashtable to clear. * @userptr: pointer to userptr to check. * * This function does the following: @@ -2563,10 +2569,11 @@ void hl_userptr_delete_list(struct hl_device *hdev, * pinned. If so, returns true, otherwise returns false. */ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, - u32 size, struct list_head *userptr_list, + u32 size, struct rhashtable *userptr_ht, struct hl_userptr **userptr) { - list_for_each_entry((*userptr), userptr_list, job_node) { + (*userptr) =3D rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_p= arams); + if (*userptr) { if ((addr =3D=3D (*userptr)->addr) && (size =3D=3D (*userptr)->size)) return true; } diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanal= abs/gaudi/gaudi.c index a29aa8f7b6f3..1e1433042413 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev, } =20 free_job: - hl_userptr_delete_list(hdev, &job->userptr_list); + hl_userptr_delete_list(hdev, &job->userptr_ht); hl_debugfs_remove_job(hdev, job); kfree(job); atomic_dec(&cb->cs_cnt); @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_devic= e *hdev, int rc; =20 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), - parser->job_userptr_list, &userptr)) + parser->job_userptr_ht, &userptr)) goto already_pinned; =20 userptr =3D kzalloc(sizeof(*userptr), GFP_KERNEL); @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_devi= ce *hdev, if (rc) goto free_userptr; =20 - list_add_tail(&userptr->job_node, parser->job_userptr_list); + rc =3D rhashtable_insert_fast(parser->job_userptr_ht, + &userptr->job_node, hl_userptr_rht_params); + if (rc) + goto unpin_memory; =20 rc =3D hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_devic= e *hdev, return 0; =20 unpin_memory: - list_del(&userptr->job_node); + rhashtable_remove_fast(parser->job_userptr_ht, + &userptr->job_node, hl_userptr_rht_params); hl_unpin_host_memory(hdev, userptr); free_userptr: kfree(userptr); @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *h= dev, if ((!skip_host_mem_pin) && (!hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), - parser->job_userptr_list, &userptr))) { + parser->job_userptr_ht, &userptr))) { dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", addr, user_dma_pkt->tsize); return -EFAULT; @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hd= ev, =20 free_userptr: if (rc) - hl_userptr_delete_list(hdev, parser->job_userptr_list); + hl_userptr_delete_list(hdev, parser->job_userptr_ht); return rc; } =20 diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalab= s/goya/goya.c index fb0ac9df841a..bfcbb9e8b126 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device= *hdev, int rc; =20 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), - parser->job_userptr_list, &userptr)) + parser->job_userptr_ht, &userptr)) goto already_pinned; =20 userptr =3D kzalloc(sizeof(*userptr), GFP_KERNEL); @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_devic= e *hdev, if (rc) goto free_userptr; =20 - list_add_tail(&userptr->job_node, parser->job_userptr_list); + rc =3D rhashtable_insert_fast(parser->job_userptr_ht, + &userptr->job_node, hl_userptr_rht_params); + if (rc) + goto unpin_memory; =20 rc =3D hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device= *hdev, return 0; =20 unpin_memory: - list_del(&userptr->job_node); + rhashtable_remove_fast(parser->job_userptr_ht, + &userptr->job_node, hl_userptr_rht_params); hl_unpin_host_memory(hdev, userptr); free_userptr: kfree(userptr); @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hd= ev, if ((!skip_host_mem_pin) && (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), - parser->job_userptr_list, &userptr) =3D=3D false)) { + parser->job_userptr_ht, &userptr) =3D=3D false)) { dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", addr, user_dma_pkt->tsize); return -EFAULT; @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hde= v, =20 free_userptr: if (rc) - hl_userptr_delete_list(hdev, parser->job_userptr_list); + hl_userptr_delete_list(hdev, parser->job_userptr_ht); return rc; } =20 --=20 2.34.1