include/linux/rhashtable.h | 94 +++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 12 deletions(-)
Sometimes, the result of the rhashtable_lookup() is expected to be found
or not found. Therefore, we can use likely() or unlikely() for such cases.
Following new functions are introduced, which will use likely or unlikely
during the lookup:
rhashtable_lookup_likely
rhashtable_lookup_unlikely
rhltable_lookup_likely
rhltable_lookup_unlikely
A micro-benchmark is made for these new functions: lookup a existed entry
repeatedly for 100000000 times, and rhashtable_lookup_likely() gets ~30%
speedup.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
This patch base on the patch that I commit before:
rhashtable: use __always_inline for rhashtable
The new functions that we introduced can be used by other modules, and I'm
not sure if it is a good idea to do it in this series, as they belong to
different tree. So I decide to do it in the target tree after this patch
merged.
---
include/linux/rhashtable.h | 94 +++++++++++++++++++++++++++++++++-----
1 file changed, 82 insertions(+), 12 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e740157f3cd7..a4953ab334c5 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -355,12 +355,29 @@ static inline void rht_unlock(struct bucket_table *tbl,
local_irq_restore(flags);
}
-static inline struct rhash_head *__rht_ptr(
- struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
+enum rht_lookup_freq {
+ RHT_LOOKUP_NORMAL,
+ RHT_LOOKUP_LIKELY,
+ RHT_LOOKUP_UNLIKELY,
+};
+
+static __always_inline struct rhash_head *__rht_ptr(
+ struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt,
+ const enum rht_lookup_freq freq)
{
- return (struct rhash_head *)
- ((unsigned long)p & ~BIT(0) ?:
- (unsigned long)RHT_NULLS_MARKER(bkt));
+ unsigned long p_val = (unsigned long)p & ~BIT(0);
+
+ BUILD_BUG_ON(!__builtin_constant_p(freq));
+
+ if (freq == RHT_LOOKUP_LIKELY)
+ return (struct rhash_head *)
+ (likely(p_val) ? p_val : (unsigned long)RHT_NULLS_MARKER(bkt));
+ else if (freq == RHT_LOOKUP_UNLIKELY)
+ return (struct rhash_head *)
+ (unlikely(p_val) ? p_val : (unsigned long)RHT_NULLS_MARKER(bkt));
+ else
+ return (struct rhash_head *)
+ (p_val ?: (unsigned long)RHT_NULLS_MARKER(bkt));
}
/*
@@ -370,10 +387,17 @@ static inline struct rhash_head *__rht_ptr(
* rht_ptr_exclusive() dereferences in a context where exclusive
* access is guaranteed, such as when destroying the table.
*/
+static __always_inline struct rhash_head *__rht_ptr_rcu(
+ struct rhash_lock_head __rcu *const *bkt,
+ const enum rht_lookup_freq freq)
+{
+ return __rht_ptr(rcu_dereference(*bkt), bkt, freq);
+}
+
static inline struct rhash_head *rht_ptr_rcu(
struct rhash_lock_head __rcu *const *bkt)
{
- return __rht_ptr(rcu_dereference(*bkt), bkt);
+ return __rht_ptr_rcu(bkt, RHT_LOOKUP_NORMAL);
}
static inline struct rhash_head *rht_ptr(
@@ -381,13 +405,15 @@ static inline struct rhash_head *rht_ptr(
struct bucket_table *tbl,
unsigned int hash)
{
- return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
+ return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt,
+ RHT_LOOKUP_NORMAL);
}
static inline struct rhash_head *rht_ptr_exclusive(
struct rhash_lock_head __rcu *const *bkt)
{
- return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt);
+ return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt,
+ RHT_LOOKUP_NORMAL);
}
static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
@@ -588,7 +614,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
/* Internal function, do not use. */
static __always_inline struct rhash_head *__rhashtable_lookup(
struct rhashtable *ht, const void *key,
- const struct rhashtable_params params)
+ const struct rhashtable_params params,
+ const enum rht_lookup_freq freq)
{
struct rhashtable_compare_arg arg = {
.ht = ht,
@@ -599,12 +626,13 @@ static __always_inline struct rhash_head *__rhashtable_lookup(
struct rhash_head *he;
unsigned int hash;
+ BUILD_BUG_ON(!__builtin_constant_p(freq));
tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
- rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
+ rht_for_each_rcu_from(he, __rht_ptr_rcu(bkt, freq), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
@@ -643,11 +671,32 @@ static __always_inline void *rhashtable_lookup(
struct rhashtable *ht, const void *key,
const struct rhashtable_params params)
{
- struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params,
+ RHT_LOOKUP_NORMAL);
return he ? rht_obj(ht, he) : NULL;
}
+static __always_inline void *rhashtable_lookup_likely(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params,
+ RHT_LOOKUP_LIKELY);
+
+ return likely(he) ? rht_obj(ht, he) : NULL;
+}
+
+static __always_inline void *rhashtable_lookup_unlikely(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params,
+ RHT_LOOKUP_UNLIKELY);
+
+ return unlikely(he) ? rht_obj(ht, he) : NULL;
+}
+
/**
* rhashtable_lookup_fast - search hash table, without RCU read lock
* @ht: hash table
@@ -693,11 +742,32 @@ static __always_inline struct rhlist_head *rhltable_lookup(
struct rhltable *hlt, const void *key,
const struct rhashtable_params params)
{
- struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
+ struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params,
+ RHT_LOOKUP_NORMAL);
return he ? container_of(he, struct rhlist_head, rhead) : NULL;
}
+static __always_inline struct rhlist_head *rhltable_lookup_likely(
+ struct rhltable *hlt, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params,
+ RHT_LOOKUP_LIKELY);
+
+ return likely(he) ? container_of(he, struct rhlist_head, rhead) : NULL;
+}
+
+static __always_inline struct rhlist_head *rhltable_lookup_unlikely(
+ struct rhltable *hlt, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params,
+ RHT_LOOKUP_UNLIKELY);
+
+ return unlikely(he) ? container_of(he, struct rhlist_head, rhead) : NULL;
+}
+
/* Internal function, please use rhashtable_insert_fast() instead. This
* function returns the existing element already in hashes if there is a clash,
* otherwise it returns an error via ERR_PTR().
--
2.51.0
On Sun, 28 Sep 2025, Menglong Dong wrote: > Sometimes, the result of the rhashtable_lookup() is expected to be found > or not found. Therefore, we can use likely() or unlikely() for such cases. > > Following new functions are introduced, which will use likely or unlikely > during the lookup: > > rhashtable_lookup_likely > rhashtable_lookup_unlikely > rhltable_lookup_likely > rhltable_lookup_unlikely > > A micro-benchmark is made for these new functions: lookup a existed entry > repeatedly for 100000000 times, and rhashtable_lookup_likely() gets ~30% > speedup. I generally like this patch - it seems well structured and leaves the code easy to maintain. I think you have made a good case for rhashtable_lookup_likely() and it seems sensible to optimise that case. I'm less sure of rhashtable_lookup_unlikely() - you have provided no measurements for that. In general we expect an rhashtable to be between 33% and 75% full. The inevitable hash collisions will mean that the number of used slots in the bucket table will be a little less that this. But let's assume 50% of the buckets are in use at any time on average. If you are using rhashtable_lookup_likely() you expect to find the target so you expect the bucket to not be empty, so it is reasonable to tell the compiler that it is "likely" that the pointer isn't NULL. However if you don't expect to find the target, that DOESN'T mean you expect the bucket to be empty - it could have another item it in. All you can really say is that the probability of an empty bucket matches the degree of utilisation of the table - so about 50% as discussed above. So I don't think it is reasonable to ever tell the compiler that an bucket being empty is "likely". You also use "likely()" for deciding whether or not to subtract the key offset from the address before returning a pointer. This is a valid thing to tell the compiler, but we would need numbers to confirm whether or not it was worth adding to the API. If, however, you could provide numbers showing that in an rhashtable with lots of entries, a lookup for a non-existing key was faster with the rhashtable_lookup_unlikely() code, then I would find that interesting and worth pursuing. In general it would be interesting to know the number for both successful lookups and unsuccessful lookups across all three proposed lookup versions. I don't know how helpful it would be, but I would find it interesting... Thanks, NeilBrown
On Mon, Sep 29, 2025 at 11:41 AM NeilBrown <neilb@ownmail.net> wrote: > > On Sun, 28 Sep 2025, Menglong Dong wrote: > > Sometimes, the result of the rhashtable_lookup() is expected to be found > > or not found. Therefore, we can use likely() or unlikely() for such cases. > > > > Following new functions are introduced, which will use likely or unlikely > > during the lookup: > > > > rhashtable_lookup_likely > > rhashtable_lookup_unlikely > > rhltable_lookup_likely > > rhltable_lookup_unlikely > > > > A micro-benchmark is made for these new functions: lookup a existed entry > > repeatedly for 100000000 times, and rhashtable_lookup_likely() gets ~30% > > speedup. > > I generally like this patch - it seems well structured and leaves the > code easy to maintain. > > I think you have made a good case for rhashtable_lookup_likely() and it > seems sensible to optimise that case. > > I'm less sure of rhashtable_lookup_unlikely() - you have provided no > measurements for that. > > In general we expect an rhashtable to be between 33% and 75% full. The > inevitable hash collisions will mean that the number of used slots in > the bucket table will be a little less that this. But let's assume 50% > of the buckets are in use at any time on average. > > If you are using rhashtable_lookup_likely() you expect to find the > target so you expect the bucket to not be empty, so it is reasonable to > tell the compiler that it is "likely" that the pointer isn't NULL. > > However if you don't expect to find the target, that DOESN'T mean you > expect the bucket to be empty - it could have another item it in. All > you can really say is that the probability of an empty bucket matches > the degree of utilisation of the table - so about 50% as discussed > above. > > So I don't think it is reasonable to ever tell the compiler that an > bucket being empty is "likely". You also use "likely()" for deciding > whether or not to subtract the key offset from the address before > returning a pointer. This is a valid thing to tell the compiler, but we > would need numbers to confirm whether or not it was worth adding to the > API. > > If, however, you could provide numbers showing that in an rhashtable > with lots of entries, a lookup for a non-existing key was faster with > the rhashtable_lookup_unlikely() code, then I would find that > interesting and worth pursuing. Ah, you are right, I wasn't aware of this part. I think it makes no sense to use unlikely() if the possibility of hitting the budget is ~50%. The only thing that may matter is the using of unlikely() in rhashtable_lookup() before returning the pointer. I'll do a bench testing on that part, and I'll remove the unlikely version directly in the next version if it doesn't help. Thanks a lot! Menglong Dong > > In general it would be interesting to know the number for both > successful lookups and unsuccessful lookups across all three proposed > lookup versions. I don't know how helpful it would be, but I would find > it interesting... > > Thanks, > NeilBrown
© 2016 - 2025 Red Hat, Inc.