From nobody Sat Feb 7 17:55:01 2026 Received: from out-181.mta0.migadu.com (out-181.mta0.migadu.com [91.218.175.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 685E6314D3E for ; Mon, 5 Jan 2026 15:19:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626343; cv=none; b=jiJgoriBYTXTxNIKqS/hC9D91SH87KzfWx5/rb6d7N1k8RUpSkbNjtst0BzIBpjSM5bH5aGqPu7SpSelefhmKNNUZU8JtIGxe2tqjnfB78fUbGnVSRVm+iJWfRQl4mCs0gb60a76eUBJqYu+qM820dMNZRZbd651Z7ul7JDWiJE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626343; c=relaxed/simple; bh=syoRHFiLcNZCWrblG6er48j/8pERQxV9ubDXqykL2kE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=i3ishRULeFcQz2M+z2YcMDVSlsgjsN3ZdkMbNIaFAhSDQvNdG3zBGp8Avi9fvMu/pzsgWr3ia37t3cc24WTDslpvc0VDgQbTCzNjdR928E2JsOYFNFuAYGr8oLCSXkAutTbyH3TUBSZL5tID4qq4n4NWfaXWe792TRG9CrZwBnE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=U3D+wjHy; arc=none smtp.client-ip=91.218.175.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="U3D+wjHy" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767626339; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=uQiM0lkfSnUrI5tORQhcEhs3HllYoQ4iRDxxf/iOG+U=; b=U3D+wjHyfo0tecKXdg59m6ehdUa79KscwcevVkoggzV5iOMo6TLn6WaBsbRO3pSoNvgi5Q fbnvbwBAD034tD2dwrmzo8j/xpKm+1kauUfRkhl7DvjsUdEIuyONnB5aUzxWMwe/Gph4IN Xj2EEqe2nmHo99UUpzQAYSJUgCFW5Aw= From: Leon Hwang To: bpf@vger.kernel.org Cc: Martin KaFai Lau , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Shuah Khan , Leon Hwang , Saket Kumar Bhaskar , "David S . Miller" , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com Subject: [PATCH bpf-next v2 1/5] bpf: lru: Tidy hash handling in LRU code Date: Mon, 5 Jan 2026 23:18:09 +0800 Message-ID: <20260105151813.6968-2-leon.hwang@linux.dev> In-Reply-To: <20260105151813.6968-1-leon.hwang@linux.dev> References: <20260105151813.6968-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" The hash field is not used by the LRU list itself. Setting hash while manipulating the LRU list also obscures the intent of the code and makes it harder to follow. Tidy this up by moving the hash assignment to prealloc_lru_pop(), where the element is prepared for insertion into the hash table. Signed-off-by: Leon Hwang --- kernel/bpf/bpf_lru_list.c | 24 +++++++++--------------- kernel/bpf/bpf_lru_list.h | 5 ++--- kernel/bpf/hashtab.c | 5 ++--- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index e7a2fc60523f..f4e183a9c28f 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -344,10 +344,8 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_= lru *lru, static void __local_list_add_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l, int cpu, - struct bpf_lru_node *node, - u32 hash) + struct bpf_lru_node *node) { - *(u32 *)((void *)node + lru->hash_offset) =3D hash; node->cpu =3D cpu; node->type =3D BPF_LRU_LOCAL_LIST_T_PENDING; bpf_lru_node_clear_ref(node); @@ -393,8 +391,7 @@ __local_list_pop_pending(struct bpf_lru *lru, struct bp= f_lru_locallist *loc_l) return NULL; } =20 -static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, - u32 hash) +static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru) { struct list_head *free_list; struct bpf_lru_node *node =3D NULL; @@ -415,7 +412,6 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(str= uct bpf_lru *lru, =20 if (!list_empty(free_list)) { node =3D list_first_entry(free_list, struct bpf_lru_node, list); - *(u32 *)((void *)node + lru->hash_offset) =3D hash; bpf_lru_node_clear_ref(node); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } @@ -425,8 +421,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(str= uct bpf_lru *lru, return node; } =20 -static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, - u32 hash) +static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru) { struct bpf_lru_locallist *loc_l, *steal_loc_l; struct bpf_common_lru *clru =3D &lru->common_lru; @@ -446,7 +441,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(str= uct bpf_lru *lru, } =20 if (node) - __local_list_add_pending(lru, loc_l, cpu, node, hash); + __local_list_add_pending(lru, loc_l, cpu, node); =20 raw_spin_unlock_irqrestore(&loc_l->lock, flags); =20 @@ -481,19 +476,19 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(s= truct bpf_lru *lru, =20 if (node) { raw_spin_lock_irqsave(&loc_l->lock, flags); - __local_list_add_pending(lru, loc_l, cpu, node, hash); + __local_list_add_pending(lru, loc_l, cpu, node); raw_spin_unlock_irqrestore(&loc_l->lock, flags); } =20 return node; } =20 -struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) +struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru) { if (lru->percpu) - return bpf_percpu_lru_pop_free(lru, hash); + return bpf_percpu_lru_pop_free(lru); else - return bpf_common_lru_pop_free(lru, hash); + return bpf_common_lru_pop_free(lru); } =20 static void bpf_common_lru_push_free(struct bpf_lru *lru, @@ -643,7 +638,7 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) raw_spin_lock_init(&l->lock); } =20 -int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, +int bpf_lru_init(struct bpf_lru *lru, bool percpu, del_from_htab_func del_from_htab, void *del_arg) { int cpu; @@ -681,7 +676,6 @@ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 = hash_offset, lru->percpu =3D percpu; lru->del_from_htab =3D del_from_htab; lru->del_arg =3D del_arg; - lru->hash_offset =3D hash_offset; =20 return 0; } diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index fe2661a58ea9..29e8300e0fd1 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -57,7 +57,6 @@ struct bpf_lru { }; del_from_htab_func del_from_htab; void *del_arg; - unsigned int hash_offset; unsigned int target_free; unsigned int nr_scans; bool percpu; @@ -69,12 +68,12 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_= node *node) WRITE_ONCE(node->ref, 1); } =20 -int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, +int bpf_lru_init(struct bpf_lru *lru, bool percpu, del_from_htab_func del_from_htab, void *delete_arg); void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems); void bpf_lru_destroy(struct bpf_lru *lru); -struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash); +struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru); void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node); =20 #endif diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index c8a9b27f8663..d029690246f8 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -296,12 +296,13 @@ static void htab_free_elems(struct bpf_htab *htab) static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, u32 hash) { - struct bpf_lru_node *node =3D bpf_lru_pop_free(&htab->lru, hash); + struct bpf_lru_node *node =3D bpf_lru_pop_free(&htab->lru); struct htab_elem *l; =20 if (node) { bpf_map_inc_elem_count(&htab->map); l =3D container_of(node, struct htab_elem, lru_node); + l->hash =3D hash; memcpy(l->key, key, htab->map.key_size); return l; } @@ -342,8 +343,6 @@ static int prealloc_init(struct bpf_htab *htab) if (htab_is_lru(htab)) err =3D bpf_lru_init(&htab->lru, htab->map.map_flags & BPF_F_NO_COMMON_LRU, - offsetof(struct htab_elem, hash) - - offsetof(struct htab_elem, lru_node), htab_lru_map_delete_node, htab); else --=20 2.52.0 From nobody Sat Feb 7 17:55:01 2026 Received: from out-171.mta0.migadu.com (out-171.mta0.migadu.com [91.218.175.171]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4930C3191B2 for ; Mon, 5 Jan 2026 15:19:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.171 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626350; cv=none; b=s3WGeK2XD/i+G+LzE1f8/dxijgZDpMgU6/dyMxxaBOnFZ6o9xD4VFDfjtqCrMqilZv5YFwA/kH0hT8SKCxTdDh7b72LYvLI2FuqRWyMiPFgq48+SQSOg9lrMFXT/fjWI9S49aFdHXcmcsR042fXyNrEs95PQ/FZkIoala2zfKp8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626350; c=relaxed/simple; bh=MEYCAJTP+gciIEx+Dxm8aAPbsaociGWG+GPRUZbDF6g=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qWtyoNLOc6J6pYQezGnQHVI4W3twnCunlOXKOFaHzNQ7fyoYGyoN1dQ2s4+5L005AVHO2l7nslUyTro70OMeenwMJ1mKdKZBG7f6mg4ue+bZ5cqKV1xRpP9nu/zq/3yPLRTyn+Imw5FVd0O7AjUPtaC6TBnZbxPV7RNaGpiF0AQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=kDB7MS0n; arc=none smtp.client-ip=91.218.175.171 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="kDB7MS0n" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767626345; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=1Hk4lLHbn+xMSZg4HI2VBIcbSjPr/5SQlwXAZJ1OODQ=; b=kDB7MS0nez6RSI7xKNBLEhptMEHKWyXnZrarqiwVLI66N9EtNKl0ZCbbMareH0FxyXHMli u5s3fGW4ZDYhldgS6oB/mPoDkQBum72FCS099rEXRyxuMbvLvKMybWSgRR9V1hOtSPHY8k 3xsWKUtNydOTiR/jz+QgOhROJKHXKIU= From: Leon Hwang To: bpf@vger.kernel.org Cc: Martin KaFai Lau , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Shuah Khan , Leon Hwang , Saket Kumar Bhaskar , "David S . Miller" , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com Subject: [PATCH bpf-next v2 2/5] bpf: lru: Factor out bpf_lru_node_reset_state helper Date: Mon, 5 Jan 2026 23:18:10 +0800 Message-ID: <20260105151813.6968-3-leon.hwang@linux.dev> In-Reply-To: <20260105151813.6968-1-leon.hwang@linux.dev> References: <20260105151813.6968-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Introduce the helper bpf_lru_node_reset_state to set type and clear ref. No functional change intended. Signed-off-by: Leon Hwang --- kernel/bpf/bpf_lru_list.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index f4e183a9c28f..b17b05f41900 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -41,6 +41,12 @@ static void bpf_lru_node_clear_ref(struct bpf_lru_node *= node) WRITE_ONCE(node->ref, 0); } =20 +static void bpf_lru_node_reset_state(struct bpf_lru_node *node, enum bpf_l= ru_list_type type) +{ + node->type =3D type; + bpf_lru_node_clear_ref(node); +} + static void bpf_lru_list_count_inc(struct bpf_lru_list *l, enum bpf_lru_list_type type) { @@ -85,8 +91,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, return; =20 bpf_lru_list_count_inc(l, tgt_type); - node->type =3D tgt_type; - bpf_lru_node_clear_ref(node); + bpf_lru_node_reset_state(node, tgt_type); list_move(&node->list, &l->lists[tgt_type]); } =20 @@ -347,8 +352,7 @@ static void __local_list_add_pending(struct bpf_lru *lr= u, struct bpf_lru_node *node) { node->cpu =3D cpu; - node->type =3D BPF_LRU_LOCAL_LIST_T_PENDING; - bpf_lru_node_clear_ref(node); + bpf_lru_node_reset_state(node, BPF_LRU_LOCAL_LIST_T_PENDING); list_add(&node->list, local_pending_list(loc_l)); } =20 @@ -513,8 +517,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lr= u, goto check_lru_list; } =20 - node->type =3D BPF_LRU_LOCAL_LIST_T_FREE; - bpf_lru_node_clear_ref(node); + bpf_lru_node_reset_state(node, BPF_LRU_LOCAL_LIST_T_FREE); list_move(&node->list, local_free_list(loc_l)); =20 raw_spin_unlock_irqrestore(&loc_l->lock, flags); @@ -559,8 +562,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru= , void *buf, struct bpf_lru_node *node; =20 node =3D (struct bpf_lru_node *)(buf + node_offset); - node->type =3D BPF_LRU_LIST_T_FREE; - bpf_lru_node_clear_ref(node); + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf +=3D elem_size; } @@ -588,8 +590,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru= , void *buf, again: node =3D (struct bpf_lru_node *)(buf + node_offset); node->cpu =3D cpu; - node->type =3D BPF_LRU_LIST_T_FREE; - bpf_lru_node_clear_ref(node); + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); i++; buf +=3D elem_size; --=20 2.52.0 From nobody Sat Feb 7 17:55:01 2026 Received: from out-188.mta0.migadu.com (out-188.mta0.migadu.com [91.218.175.188]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D59AF31B823 for ; Mon, 5 Jan 2026 15:19:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.188 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626355; cv=none; b=lPiQG5hskqSkSy3I2bsGqqBG63MtYYhC7zQnNG7etyennIr0FoGqs61wJhM9EhLvI8coGZB6bFFi90ob5EOS0Ygi0Z4+dXvDXdH7k+jHgfN81Z0pVnpcz5Gft/+/N+HJgRiVDvCHSLmEBFK5WHPGOUr1ahb2JnXAV6oqvCRD/O8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626355; c=relaxed/simple; bh=NIoKlj4HsNVo49lVc+PMLR4Ylvp1LSZIljGMAzowXKI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=SkiEHKy3vatUpvp8AvKccCqSKwsCmFpm4TTZvLqL7GJT8U1+CAfpG3J2z4SAzUUjgHaODAEFy77nank+0QsURKO14mcCWtaTtT76pdpZerUTvs5F8E8gDwQx3AviFnaIQXWg13d9KaObYQVuoGnbzdRzxtx+Q2AniElzYizir5w= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=m7ownR07; arc=none smtp.client-ip=91.218.175.188 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="m7ownR07" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767626351; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=0R7druIIXBBqOR3kur07q84s7+FkgECuEa0v6KyiuXA=; b=m7ownR07pfMakfapmONcEKFhY0MwdppqDLNXxD/hBWh/25SxQnAEeY3XvcUhID8kJGFlmR 7LGMOEZzXQK8//TWLIdARMiekW8kRG7adp9rL8n9zyQRJQoPIm9rIGv9Ho1OIBchXol0ms vzM9grjXQZoW204j8KA2peeGiIdwJPA= From: Leon Hwang To: bpf@vger.kernel.org Cc: Martin KaFai Lau , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Shuah Khan , Leon Hwang , Saket Kumar Bhaskar , "David S . Miller" , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com Subject: [PATCH bpf-next v2 3/5] bpf: lru: Factor out bpf_lru_move_next_inactive_rotation helper Date: Mon, 5 Jan 2026 23:18:11 +0800 Message-ID: <20260105151813.6968-4-leon.hwang@linux.dev> In-Reply-To: <20260105151813.6968-1-leon.hwang@linux.dev> References: <20260105151813.6968-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Factor out a bpf_lru_move_next_inactive_rotation() helper to update next_inactive_rotation when handling the extra-node case. No functional change intended. Signed-off-by: Leon Hwang --- kernel/bpf/bpf_lru_list.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index b17b05f41900..563707af8035 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -61,6 +61,15 @@ static void bpf_lru_list_count_dec(struct bpf_lru_list *= l, l->counts[type]--; } =20 +static void bpf_lru_move_next_inactive_rotation(struct bpf_lru_list *l, st= ruct bpf_lru_node *node) +{ + /* If the removing node is the next_inactive_rotation candidate, + * move the next_inactive_rotation pointer also. + */ + if (&node->list =3D=3D l->next_inactive_rotation) + l->next_inactive_rotation =3D l->next_inactive_rotation->prev; +} + static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, struct bpf_lru_node *node, struct list_head *free_list, @@ -69,11 +78,7 @@ static void __bpf_lru_node_move_to_free(struct bpf_lru_l= ist *l, if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; =20 - /* If the removing node is the next_inactive_rotation candidate, - * move the next_inactive_rotation pointer also. - */ - if (&node->list =3D=3D l->next_inactive_rotation) - l->next_inactive_rotation =3D l->next_inactive_rotation->prev; + bpf_lru_move_next_inactive_rotation(l, node); =20 bpf_lru_list_count_dec(l, node->type); =20 @@ -114,11 +119,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l, } bpf_lru_node_clear_ref(node); =20 - /* If the moving node is the next_inactive_rotation candidate, - * move the next_inactive_rotation pointer also. - */ - if (&node->list =3D=3D l->next_inactive_rotation) - l->next_inactive_rotation =3D l->next_inactive_rotation->prev; + bpf_lru_move_next_inactive_rotation(l, node); =20 list_move(&node->list, &l->lists[tgt_type]); } --=20 2.52.0 From nobody Sat Feb 7 17:55:01 2026 Received: from out-184.mta0.migadu.com (out-184.mta0.migadu.com [91.218.175.184]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4E24131D375 for ; Mon, 5 Jan 2026 15:19:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.184 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626361; cv=none; b=I8mPkcXRkujwQJGb/ERz3iMfabjK/uOP/ru5w5vStU+HO/XHOEGPTBL+yR+dgqQ+6qQueNEojjWOXyLCFGQzWC/75aZG2cpUqrVzaXnmSMBvU96pzC9SpIM7KfgFToee0icSLCmHBW4qiXo5JqcG/6yIja4TcL/jTNe4WKf1Ev8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626361; c=relaxed/simple; bh=+KT7JF0EIzpSFpd1ROZ8gO6Vb7gT98yry6iHpcryvoM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=nv2rwbD3xtakpokXWvfqQQln183gyekrjqzloPs5ws0H7Ixqvps9EPXX6QVdW2bGG13pZfgmKZgTz02BalnbszaEYTUtJ3HqAfi6qSG+iLla/q/pWjpNip+WxQCkWiB1zzhKV6F1d7vzuyNjxOk6Bj2vMt6a4L0+KRc/gzAKfSo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=bNAtpMpx; arc=none smtp.client-ip=91.218.175.184 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="bNAtpMpx" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767626356; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=HwgBU8MpNNJnauOwNwlhb3lqNANLCRpDxCkizS3TfMU=; b=bNAtpMpx2tcUlzeNZwmZ5pZMBpmmgYyYN/b2+5/Ol80riGSVgv36SrE39uh+YkGpfbC+g4 TgwCLI349DQZAmC72nTqhIkszoLiey70LKOBRoPk/oWeujhcwSoSzSgjej9qvgq8VS1KXl z7NlS2BNVpCAJoWPcYdz0fKrZ1Rwg6M= From: Leon Hwang To: bpf@vger.kernel.org Cc: Martin KaFai Lau , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Shuah Khan , Leon Hwang , Saket Kumar Bhaskar , "David S . Miller" , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com Subject: [PATCH bpf-next v2 4/5] bpf: lru: Fix unintended eviction when updating lru hash maps Date: Mon, 5 Jan 2026 23:18:12 +0800 Message-ID: <20260105151813.6968-5-leon.hwang@linux.dev> In-Reply-To: <20260105151813.6968-1-leon.hwang@linux.dev> References: <20260105151813.6968-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" When updating an existing element in lru_[percpu_,]hash maps, the current implementation always calls prealloc_lru_pop() to get a new node before checking if the key already exists. If the map is full, this triggers LRU eviction and removes an existing element, even though the update operation only needs to modify the value of an existing key in-place. This is problematic because: 1. Users may unexpectedly lose entries when doing simple value updates 2. The eviction overhead is unnecessary for existing key updates Fix this by first checking if the key exists before allocating a new node. If the key is found, update the value using the extra lru node without triggering any eviction. Fixes: 29ba732acbee ("bpf: Add BPF_MAP_TYPE_LRU_HASH") Fixes: 8f8449384ec3 ("bpf: Add BPF_MAP_TYPE_LRU_PERCPU_HASH") Signed-off-by: Leon Hwang --- kernel/bpf/bpf_lru_list.c | 164 +++++++++++++++++++++++++++++++++++--- kernel/bpf/bpf_lru_list.h | 5 +- kernel/bpf/hashtab.c | 85 ++++++++++++++++++-- 3 files changed, 239 insertions(+), 15 deletions(-) diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 563707af8035..142b0f10b011 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -124,6 +124,41 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l, list_move(&node->list, &l->lists[tgt_type]); } =20 +static struct bpf_lru_node *__bpf_lru_node_move_from_extra(struct bpf_lru_= list *l, + enum bpf_lru_list_type tgt_type) +{ + struct bpf_lru_node *node; + + node =3D list_first_entry_or_null(&l->extra, struct bpf_lru_node, list); + if (!node) + return NULL; + + if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) + return NULL; + + bpf_lru_list_count_inc(l, tgt_type); + bpf_lru_node_reset_state(node, tgt_type); + list_move(&node->list, &l->lists[tgt_type]); + return node; +} + +static bool __bpf_lru_node_move_to_extra(struct bpf_lru_list *l, + struct bpf_lru_node *node) +{ + if (!list_empty(&l->extra)) + return false; + + if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) + return false; + + bpf_lru_move_next_inactive_rotation(l, node); + + bpf_lru_list_count_dec(l, node->type); + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); + list_move(&node->list, &l->extra); + return true; +} + static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) { return l->counts[BPF_LRU_LIST_T_INACTIVE] < @@ -305,6 +340,69 @@ static void __local_list_flush(struct bpf_lru_list *l, } } =20 +static struct bpf_lru_node *bpf_percpu_lru_pop_extra(struct bpf_lru *lru) +{ + int cpu =3D raw_smp_processor_id(); + struct bpf_lru_node *node; + struct bpf_lru_list *l; + unsigned long flags; + + l =3D per_cpu_ptr(lru->percpu_lru, cpu); + + raw_spin_lock_irqsave(&l->lock, flags); + + node =3D __bpf_lru_node_move_from_extra(l, BPF_LRU_LIST_T_ACTIVE); + + raw_spin_unlock_irqrestore(&l->lock, flags); + + return node; +} + +static struct bpf_lru_node *bpf_lru_locallist_extra_pop(struct bpf_lru_loc= allist *l) +{ + struct bpf_lru_node *node; + + node =3D list_first_entry_or_null(&l->extra, struct bpf_lru_node, list); + if (node) + list_del(&node->list); + + return node; +} + +static void __local_list_add_pending(struct bpf_lru *lru, + struct bpf_lru_locallist *loc_l, + int cpu, + struct bpf_lru_node *node); + +static struct bpf_lru_node *bpf_common_lru_pop_extra(struct bpf_lru *lru) +{ + struct bpf_common_lru *clru =3D &lru->common_lru; + int cpu =3D raw_smp_processor_id(); + struct bpf_lru_locallist *loc_l; + struct bpf_lru_node *node; + unsigned long flags; + + loc_l =3D per_cpu_ptr(clru->local_list, cpu); + + raw_spin_lock_irqsave(&loc_l->lock, flags); + + node =3D bpf_lru_locallist_extra_pop(loc_l); + if (node) + __local_list_add_pending(lru, loc_l, cpu, node); + + raw_spin_unlock_irqrestore(&loc_l->lock, flags); + + return node; +} + +struct bpf_lru_node *bpf_lru_pop_extra(struct bpf_lru *lru) +{ + if (lru->percpu) + return bpf_percpu_lru_pop_extra(lru); + else + return bpf_common_lru_pop_extra(lru); +} + static void bpf_lru_list_push_free(struct bpf_lru_list *l, struct bpf_lru_node *node) { @@ -496,6 +594,16 @@ struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *= lru) return bpf_common_lru_pop_free(lru); } =20 +static bool bpf_lru_locallist_extra_push(struct bpf_lru_locallist *loc_l, = struct bpf_lru_node *node) +{ + if (!list_empty(&loc_l->extra)) + return false; + + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); + list_move(&node->list, &loc_l->extra); + return true; +} + static void bpf_common_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { @@ -518,8 +626,10 @@ static void bpf_common_lru_push_free(struct bpf_lru *l= ru, goto check_lru_list; } =20 - bpf_lru_node_reset_state(node, BPF_LRU_LOCAL_LIST_T_FREE); - list_move(&node->list, local_free_list(loc_l)); + if (!bpf_lru_locallist_extra_push(loc_l, node)) { + bpf_lru_node_reset_state(node, BPF_LRU_LOCAL_LIST_T_FREE); + list_move(&node->list, local_free_list(loc_l)); + } =20 raw_spin_unlock_irqrestore(&loc_l->lock, flags); return; @@ -539,7 +649,8 @@ static void bpf_percpu_lru_push_free(struct bpf_lru *lr= u, =20 raw_spin_lock_irqsave(&l->lock, flags); =20 - __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); + if (!__bpf_lru_node_move_to_extra(l, node)) + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); =20 raw_spin_unlock_irqrestore(&l->lock, flags); } @@ -554,9 +665,11 @@ void bpf_lru_push_free(struct bpf_lru *lru, struct bpf= _lru_node *node) =20 static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, - u32 nr_elems) + u32 nr_elems, u32 nr_extra_elems) { - struct bpf_lru_list *l =3D &lru->common_lru.lru_list; + struct bpf_common_lru *clru =3D &lru->common_lru; + struct bpf_lru_list *l =3D &clru->lru_list; + int cpu; u32 i; =20 for (i =3D 0; i < nr_elems; i++) { @@ -570,11 +683,26 @@ static void bpf_common_lru_populate(struct bpf_lru *l= ru, void *buf, =20 lru->target_free =3D clamp((nr_elems / num_possible_cpus()) / 2, 1, LOCAL_FREE_TARGET); + + if (WARN_ON_ONCE(nr_extra_elems !=3D num_possible_cpus())) + return; + + for_each_possible_cpu(cpu) { + struct bpf_lru_locallist *loc_l; + struct bpf_lru_node *node; + + loc_l =3D per_cpu_ptr(clru->local_list, cpu); + node =3D (struct bpf_lru_node *)(buf + node_offset); + node->cpu =3D cpu; + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); + list_add(&node->list, &loc_l->extra); + buf +=3D elem_size; + } } =20 static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, - u32 nr_elems) + u32 nr_elems, u32 nr_extra_elems) { u32 i, pcpu_entries; int cpu; @@ -600,17 +728,31 @@ static void bpf_percpu_lru_populate(struct bpf_lru *l= ru, void *buf, if (i % pcpu_entries) goto again; } + + if (WARN_ON_ONCE(nr_extra_elems !=3D num_possible_cpus())) + return; + + for_each_possible_cpu(cpu) { + struct bpf_lru_node *node; + + l =3D per_cpu_ptr(lru->percpu_lru, cpu); + node =3D (struct bpf_lru_node *)(buf + node_offset); + node->cpu =3D cpu; + bpf_lru_node_reset_state(node, BPF_LRU_LIST_T_FREE); + list_add(&node->list, &l->extra); + buf +=3D elem_size; + } } =20 void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, - u32 elem_size, u32 nr_elems) + u32 elem_size, u32 nr_elems, u32 nr_extra_elems) { if (lru->percpu) bpf_percpu_lru_populate(lru, buf, node_offset, elem_size, - nr_elems); + nr_elems, nr_extra_elems); else bpf_common_lru_populate(lru, buf, node_offset, elem_size, - nr_elems); + nr_elems, nr_extra_elems); } =20 static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cp= u) @@ -620,6 +762,8 @@ static void bpf_lru_locallist_init(struct bpf_lru_local= list *loc_l, int cpu) for (i =3D 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++) INIT_LIST_HEAD(&loc_l->lists[i]); =20 + INIT_LIST_HEAD(&loc_l->extra); + loc_l->next_steal =3D cpu; =20 raw_spin_lock_init(&loc_l->lock); @@ -637,6 +781,8 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) =20 l->next_inactive_rotation =3D &l->lists[BPF_LRU_LIST_T_INACTIVE]; =20 + INIT_LIST_HEAD(&l->extra); + raw_spin_lock_init(&l->lock); } =20 diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 29e8300e0fd1..446779341b34 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -33,12 +33,14 @@ struct bpf_lru_list { unsigned int counts[NR_BPF_LRU_LIST_COUNT]; /* The next inactive list rotation starts from here */ struct list_head *next_inactive_rotation; + struct list_head extra; /* for percpu lru */ =20 raw_spinlock_t lock ____cacheline_aligned_in_smp; }; =20 struct bpf_lru_locallist { struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; + struct list_head extra; /* for common lru */ u16 next_steal; raw_spinlock_t lock; }; @@ -71,9 +73,10 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_n= ode *node) int bpf_lru_init(struct bpf_lru *lru, bool percpu, del_from_htab_func del_from_htab, void *delete_arg); void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, - u32 elem_size, u32 nr_elems); + u32 elem_size, u32 nr_elems, u32 nr_extra_elems); void bpf_lru_destroy(struct bpf_lru *lru); struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru); void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node); +struct bpf_lru_node *bpf_lru_pop_extra(struct bpf_lru *lru); =20 #endif diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index d029690246f8..8665eb6b8a7d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -207,12 +207,12 @@ static struct htab_elem *get_htab_elem(struct bpf_hta= b *htab, int i) } =20 /* Both percpu and fd htab support in-place update, so no need for - * extra elem. LRU itself can remove the least used element, so - * there is no need for an extra elem during map_update. + * extra elem. LRU requires extra elems to avoid unintended eviction when + * updating the existing elems. */ static bool htab_has_extra_elems(struct bpf_htab *htab) { - return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab); + return htab_is_lru(htab) || (!htab_is_percpu(htab) && !is_fd_htab(htab)); } =20 static void htab_free_prealloced_internal_structs(struct bpf_htab *htab) @@ -313,6 +313,7 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_ht= ab *htab, void *key, static int prealloc_init(struct bpf_htab *htab) { u32 num_entries =3D htab->map.max_entries; + u32 lru_num_entries =3D num_entries; int err =3D -ENOMEM, i; =20 if (htab_has_extra_elems(htab)) @@ -354,7 +355,8 @@ static int prealloc_init(struct bpf_htab *htab) if (htab_is_lru(htab)) bpf_lru_populate(&htab->lru, htab->elems, offsetof(struct htab_elem, lru_node), - htab->elem_size, num_entries); + htab->elem_size, lru_num_entries, + num_entries - lru_num_entries); else pcpu_freelist_populate(&htab->freelist, htab->elems + offsetof(struct htab_elem, fnode), @@ -557,7 +559,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *a= ttr) if (err) goto free_map_locked; =20 - if (htab_has_extra_elems(htab)) { + if (htab_has_extra_elems(htab) && !htab_is_lru(htab)) { err =3D alloc_extra_elems(htab); if (err) goto free_prealloc; @@ -1182,6 +1184,69 @@ static void htab_lru_push_free(struct bpf_htab *htab= , struct htab_elem *elem) bpf_lru_push_free(&htab->lru, &elem->lru_node); } =20 +static int htab_lru_map_update_elem_in_place(struct bpf_htab *htab, void *= key, void *value, + u64 map_flags, struct bucket *b, + struct hlist_nulls_head *head, u32 hash, + bool percpu, bool onallcpus) +{ + struct htab_elem *l_new, *l_old, *l_free; + struct bpf_map *map =3D &htab->map; + u32 key_size =3D map->key_size; + struct bpf_lru_node *node; + unsigned long flags; + void *l_val; + int ret; + + node =3D bpf_lru_pop_extra(&htab->lru); + if (!node) + return -ENOENT; + + l_new =3D container_of(node, struct htab_elem, lru_node); + l_new->hash =3D hash; + memcpy(l_new->key, key, key_size); + if (!percpu) { + l_val =3D htab_elem_value(l_new, map->key_size); + copy_map_value(map, l_val, value); + bpf_obj_free_fields(map->record, l_val); + } + + ret =3D htab_lock_bucket(b, &flags); + if (ret) + goto err_lock_bucket; + + l_old =3D lookup_elem_raw(head, hash, key, key_size); + + ret =3D check_flags(htab, l_old, map_flags); + if (ret) + goto err; + + if (l_old) { + bpf_lru_node_set_ref(&l_new->lru_node); + if (percpu) { + /* per-cpu hash map can update value in-place. + * Keep the same logic in __htab_lru_percpu_map_update_elem(). + */ + pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), + value, onallcpus); + l_free =3D l_new; + } else { + hlist_nulls_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_del_rcu(&l_old->hash_node); + l_free =3D l_old; + } + } else { + ret =3D -ENOENT; + } + +err: + htab_unlock_bucket(b, flags); + +err_lock_bucket: + bpf_lru_push_free(&htab->lru, ret ? node : &l_free->lru_node); + + return ret; +} + static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void = *value, u64 map_flags) { @@ -1206,6 +1271,11 @@ static long htab_lru_map_update_elem(struct bpf_map = *map, void *key, void *value b =3D __select_bucket(htab, hash); head =3D &b->head; =20 + ret =3D htab_lru_map_update_elem_in_place(htab, key, value, map_flags, b,= head, hash, false, + false); + if (!ret) + return 0; + /* For LRU, we need to alloc before taking bucket's * spinlock because getting free nodes from LRU may need * to remove older elements from htab and this removal @@ -1336,6 +1406,11 @@ static long __htab_lru_percpu_map_update_elem(struct= bpf_map *map, void *key, b =3D __select_bucket(htab, hash); head =3D &b->head; =20 + ret =3D htab_lru_map_update_elem_in_place(htab, key, value, map_flags, b,= head, hash, true, + onallcpus); + if (!ret) + return 0; + /* For LRU, we need to alloc before taking bucket's * spinlock because LRU's elem alloc may need * to remove older elem from htab and this removal --=20 2.52.0 From nobody Sat Feb 7 17:55:01 2026 Received: from out-181.mta0.migadu.com (out-181.mta0.migadu.com [91.218.175.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C23A031326A for ; Mon, 5 Jan 2026 15:19:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626366; cv=none; b=JeN2P/T7aHnW4ClvOPS9yALgdw4l9HblU8SdCiUL+1iCMa6Tk/QYMViueh4m/putUwtU7yC8blDVBUfJJteazu7B06w6c7if/Zp8zqZOOmZZPF1WI5WE38byLZTcZkDw6K3p+f+bG5dc3kGvGu8lOj2PB8xR8lfaqqJo/Jibh8E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767626366; c=relaxed/simple; bh=lMJEaWB8f3Hg9RvJw5w+N2p4NV2VpqSAAdZbuwsiP80=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=A5KOjj5nUUWeWAJqVmkw4ID7vM+vsNCLlVqHdhBX9WIQWv8fqAdnMhAgDkpMWOhmILbv3Bax8/Rz6p07eBIbX00LXguZvwosFsfGEWfej8WfKgU/yV+Ly4MXfuMydICLnGbzLdbJWpS47v7/hYRYsyRVhvDSVm1uULjpof0UTSM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=a0bIZEoW; arc=none smtp.client-ip=91.218.175.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="a0bIZEoW" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767626362; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=hihqJCyk34Is6LN9qO5d1mtAMbuF5DLsZJ+b+PHXRnA=; b=a0bIZEoWFCQCQ15o7y2TYtc8aqJ0F7KYUIaoUh3dg6eCtbqSbSjBm/S+QW2nKMKtBajier POgLK0tMAus/7wpSTgDrUUbM/I5i2px1CmBoeo0eOFBHOawUqSvCkTnOSm2GHUPEby9Z3A jUNtgMqaIL2AYComt7LQBC6/wWHfmYM= From: Leon Hwang To: bpf@vger.kernel.org Cc: Martin KaFai Lau , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Shuah Khan , Leon Hwang , Saket Kumar Bhaskar , "David S . Miller" , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com Subject: [PATCH bpf-next v2 5/5] selftests/bpf: Add tests to verify no unintended eviction when updating lru_[percpu_,]hash maps Date: Mon, 5 Jan 2026 23:18:13 +0800 Message-ID: <20260105151813.6968-6-leon.hwang@linux.dev> In-Reply-To: <20260105151813.6968-1-leon.hwang@linux.dev> References: <20260105151813.6968-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Add four tests to verify that updating an existing element in LRU hash maps does not cause unintended eviction of other elements. The test creates lru_hash/lru_percpu_hash maps with max_entries slots and populates all of them. It then updates an existing key and verifies that: 1. The update succeeds without error 2. The updated key has the new value 3. All other keys still exist with their original values This validates the fix that prevents unnecessary LRU eviction when updating existing elements in full LRU hash maps. Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/htab_update.c | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/t= esting/selftests/bpf/prog_tests/htab_update.c index d0b405eb2966..a0c93aae2b99 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -143,3 +143,132 @@ void test_htab_update(void) if (test__start_subtest("concurrent_update")) test_concurrent_update(); } + +static void __setaffinity(cpu_set_t *cpus, int cpu) +{ + CPU_ZERO(cpus); + CPU_SET(cpu, cpus); + pthread_setaffinity_np(pthread_self(), sizeof(*cpus), cpus); +} + +static void test_lru_hash_map_update_elem(enum bpf_map_type map_type, u64 = map_flags) +{ + bool percpu =3D map_type =3D=3D BPF_MAP_TYPE_LRU_PERCPU_HASH; + int err, map_fd, i, key, nr_cpus, max_entries =3D 128; + u64 *values, value =3D 0xDEADC0DE; + cpu_set_t cpus; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags =3D map_flags, + ); + + nr_cpus =3D libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus")) + return; + + values =3D calloc(nr_cpus, sizeof(u64)); + if (!ASSERT_OK_PTR(values, "calloc values")) + return; + for (i =3D 0; i < nr_cpus; i++) + values[i] =3D value; + + map_fd =3D bpf_map_create(map_type, "test_lru", sizeof(int), sizeof(u64),= max_entries, &opts); + if (!ASSERT_GE(map_fd, 0, "bpf_map_create")) { + free(values); + return; + } + + /* populate all slots */ + for (key =3D 0; key < max_entries; key++) { + __setaffinity(&cpus, key%nr_cpus); + err =3D bpf_map_update_elem(map_fd, &key, values, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + } + + /* LRU eviction should not happen */ + +#define CHECK_OTHER_CPUS_VALUES(__val) \ + do { \ + if (!percpu) \ + break; \ + for (i =3D 1; i < nr_cpus; i++) \ + if (!ASSERT_EQ(values[i], __val, "bpf_map_lookup_elem value")) \ + goto out; \ + } while (0) + + __setaffinity(&cpus, 0); + key =3D 0; + memset(values, 0, nr_cpus * sizeof(u64)); + err =3D bpf_map_update_elem(map_fd, &key, values, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + + err =3D bpf_map_lookup_elem(map_fd, &key, values); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(*values, 0, "bpf_map_lookup_elem value")) + goto out; + CHECK_OTHER_CPUS_VALUES(0); + + for (key =3D 1; key < max_entries; key++) { + err =3D bpf_map_lookup_elem(map_fd, &key, values); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(*values, value, "bpf_map_lookup_elem value")) + goto out; + CHECK_OTHER_CPUS_VALUES(value); + } + + for (i =3D 0; i < nr_cpus; i++) + values[i] =3D value; + + key =3D max_entries; + err =3D bpf_map_update_elem(map_fd, &key, values, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + + err =3D bpf_map_lookup_elem(map_fd, &key, values); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(*values, value, "bpf_map_lookup_elem value")) + goto out; + CHECK_OTHER_CPUS_VALUES(value); + +#undef CHECK_OTHER_CPUS_VALUES + +out: + close(map_fd); + free(values); +} + +static void test_update_lru_hash_map_common_lru(void) +{ + test_lru_hash_map_update_elem(BPF_MAP_TYPE_LRU_HASH, 0); +} + +static void test_update_lru_hash_map_percpu_lru(void) +{ + test_lru_hash_map_update_elem(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU); +} + +static void test_update_lru_percpu_hash_map_common_lru(void) +{ + test_lru_hash_map_update_elem(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0); +} + +static void test_update_lru_percpu_hash_map_percpu_lru(void) +{ + test_lru_hash_map_update_elem(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMM= ON_LRU); +} + +void test_update_lru_hash_maps(void) +{ + if (test__start_subtest("lru_hash/common_lru")) + test_update_lru_hash_map_common_lru(); + if (test__start_subtest("lru_hash/percpu_lru")) + test_update_lru_hash_map_percpu_lru(); + if (test__start_subtest("lru_percpu_hash/common_lru")) + test_update_lru_percpu_hash_map_common_lru(); + if (test__start_subtest("lru_percpu_hash/percpu_lru")) + test_update_lru_percpu_hash_map_percpu_lru(); +} --=20 2.52.0