The existing min_heap_sift_down() uses the bottom-up heapify variant,
which reduces the number of comparisons from ~2 * log2(n) to
~1 * log2(n) when all elements are distinct. However, in workloads
where the heap contains many equal elements, this bottom-up variant
can degenerate and perform up to 2 * log2(n) comparisons, while the
traditional top-down variant needs only O(1) comparisons in such cases.
To address this, introduce min_heap_sift_down_eqaware(), a top-down
heapify variant optimized for scenarios with many equal elements. This
variant avoids unnecessary comparisons and swaps when elements are
already equal or in the correct position.
Cc: stable@vger.kernel.org # 6.11+
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
include/linux/min_heap.h | 51 ++++++++++++++++++++++++++++++++++++++++
lib/min_heap.c | 7 ++++++
2 files changed, 58 insertions(+)
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 79ddc0adbf2b..b0d603fe5379 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -292,6 +292,52 @@ void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_si
__min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
__minheap_obj_size(_heap), _func, _args)
+/*
+ * Sift the element at pos down the heap.
+ *
+ * Variants of heap functions using an equal-elements-aware sift_down.
+ * These may perform better when the heap contains many equal elements.
+ */
+static __always_inline
+void __min_heap_sift_down_eqaware_inline(min_heap_char * heap, size_t pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
+{
+ void *data = heap->data;
+ void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
+ /* pre-scale counters for performance */
+ size_t a = pos * elem_size;
+ size_t b, c, smallest;
+ size_t n = heap->nr * elem_size;
+
+ if (!swp)
+ swp = select_swap_func(data, elem_size);
+
+ for (;;) {
+ b = 2 * a + elem_size;
+ c = b + elem_size;
+ smallest = a;
+
+ if (b >= n)
+ break;
+
+ if (func->less(data + b, data + smallest, args))
+ smallest = b;
+
+ if (c < n && func->less(data + c, data + smallest, args))
+ smallest = c;
+
+ if (smallest == a)
+ break;
+
+ do_swap(data + a, data + smallest, elem_size, swp, args);
+ a = smallest;
+ }
+}
+
+#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \
+ __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
+ __minheap_obj_size(_heap), _func, _args)
+
/* Sift up ith element from the heap, O(log2(nr)). */
static __always_inline
void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
@@ -433,6 +479,8 @@ void *__min_heap_peek(struct min_heap_char *heap);
bool __min_heap_full(min_heap_char *heap);
void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size,
const struct min_heap_callbacks *func, void *args);
+void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args);
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args);
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
@@ -455,6 +503,9 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
#define min_heap_sift_down(_heap, _pos, _func, _args) \
__min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
__minheap_obj_size(_heap), _func, _args)
+#define min_heap_sift_down_eqaware(_heap, _pos, _func, _args) \
+ __min_heap_sift_down_eqaware(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
+ __minheap_obj_size(_heap), _func, _args)
#define min_heap_sift_up(_heap, _idx, _func, _args) \
__min_heap_sift_up(container_of(&(_heap)->nr, min_heap_char, nr), \
__minheap_obj_size(_heap), _idx, _func, _args)
diff --git a/lib/min_heap.c b/lib/min_heap.c
index 96f01a4c5fb6..2225f40d0d7a 100644
--- a/lib/min_heap.c
+++ b/lib/min_heap.c
@@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size,
}
EXPORT_SYMBOL(__min_heap_sift_down);
+void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
+{
+ __min_heap_sift_down_eqaware_inline(heap, pos, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_down_eqaware);
+
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args)
{
--
2.34.1
Hi Kuan-Wei Thanks for this patch series to address the bcache latency regression. I tested it but results show regression still remains. Upon review of the patch changes, I notice that the min_heap_sift_down_eqaware_inline #define macro in this patch may have been mapped incorrectly: +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) I changed it to map to its "eqaware" counterpart like this and the regression does not happen again. +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_eqaware_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) Do you think this correction is appropriate? Best regards Robert Pang On Wed, Jun 11, 2025 at 6:55 AM Kuan-Wei Chiu <visitorckw@gmail.com> wrote: > > The existing min_heap_sift_down() uses the bottom-up heapify variant, > which reduces the number of comparisons from ~2 * log2(n) to > ~1 * log2(n) when all elements are distinct. However, in workloads > where the heap contains many equal elements, this bottom-up variant > can degenerate and perform up to 2 * log2(n) comparisons, while the > traditional top-down variant needs only O(1) comparisons in such cases. > > To address this, introduce min_heap_sift_down_eqaware(), a top-down > heapify variant optimized for scenarios with many equal elements. This > variant avoids unnecessary comparisons and swaps when elements are > already equal or in the correct position. > > Cc: stable@vger.kernel.org # 6.11+ > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com> > --- > include/linux/min_heap.h | 51 ++++++++++++++++++++++++++++++++++++++++ > lib/min_heap.c | 7 ++++++ > 2 files changed, 58 insertions(+) > > diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h > index 79ddc0adbf2b..b0d603fe5379 100644 > --- a/include/linux/min_heap.h > +++ b/include/linux/min_heap.h > @@ -292,6 +292,52 @@ void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_si > __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > __minheap_obj_size(_heap), _func, _args) > > +/* > + * Sift the element at pos down the heap. > + * > + * Variants of heap functions using an equal-elements-aware sift_down. > + * These may perform better when the heap contains many equal elements. > + */ > +static __always_inline > +void __min_heap_sift_down_eqaware_inline(min_heap_char * heap, size_t pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args) > +{ > + void *data = heap->data; > + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; > + /* pre-scale counters for performance */ > + size_t a = pos * elem_size; > + size_t b, c, smallest; > + size_t n = heap->nr * elem_size; > + > + if (!swp) > + swp = select_swap_func(data, elem_size); > + > + for (;;) { > + b = 2 * a + elem_size; > + c = b + elem_size; > + smallest = a; > + > + if (b >= n) > + break; > + > + if (func->less(data + b, data + smallest, args)) > + smallest = b; > + > + if (c < n && func->less(data + c, data + smallest, args)) > + smallest = c; > + > + if (smallest == a) > + break; > + > + do_swap(data + a, data + smallest, elem_size, swp, args); > + a = smallest; > + } > +} > + > +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ > + __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > + __minheap_obj_size(_heap), _func, _args) > + > /* Sift up ith element from the heap, O(log2(nr)). */ > static __always_inline > void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, > @@ -433,6 +479,8 @@ void *__min_heap_peek(struct min_heap_char *heap); > bool __min_heap_full(min_heap_char *heap); > void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, > const struct min_heap_callbacks *func, void *args); > +void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args); > void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, > const struct min_heap_callbacks *func, void *args); > void __min_heapify_all(min_heap_char *heap, size_t elem_size, > @@ -455,6 +503,9 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, > #define min_heap_sift_down(_heap, _pos, _func, _args) \ > __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > __minheap_obj_size(_heap), _func, _args) > +#define min_heap_sift_down_eqaware(_heap, _pos, _func, _args) \ > + __min_heap_sift_down_eqaware(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > + __minheap_obj_size(_heap), _func, _args) > #define min_heap_sift_up(_heap, _idx, _func, _args) \ > __min_heap_sift_up(container_of(&(_heap)->nr, min_heap_char, nr), \ > __minheap_obj_size(_heap), _idx, _func, _args) > diff --git a/lib/min_heap.c b/lib/min_heap.c > index 96f01a4c5fb6..2225f40d0d7a 100644 > --- a/lib/min_heap.c > +++ b/lib/min_heap.c > @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, > } > EXPORT_SYMBOL(__min_heap_sift_down); > > +void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args) > +{ > + __min_heap_sift_down_eqaware_inline(heap, pos, elem_size, func, args); > +} > +EXPORT_SYMBOL(__min_heap_sift_down_eqaware); > + > void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, > const struct min_heap_callbacks *func, void *args) > { > -- > 2.34.1 >
On Thu, Jun 12, 2025 at 10:00:14PM +0900, Robert Pang wrote: > Hi Kuan-Wei > > Thanks for this patch series to address the bcache latency regression. > I tested it but results show regression still remains. Upon review of > the patch changes, I notice that the min_heap_sift_down_eqaware_inline > #define macro in this patch may have been mapped incorrectly: > > +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ > + __min_heap_sift_down_inline(container_of(&(_heap)->nr, > min_heap_char, nr), _pos, \ > + __minheap_obj_size(_heap), _func, _args) > > I changed it to map to its "eqaware" counterpart like this and the > regression does not happen again. > > +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ > + __min_heap_sift_down_eqaware_inline(container_of(&(_heap)->nr, > min_heap_char, nr), _pos, \ > + __minheap_obj_size(_heap), _func, _args) > > Do you think this correction is appropriate? > That's definitely my mistake. Thanks for testing and pointing it out. I'll fix the typo in the next version. Regards, Kuan-Wei > Best regards > Robert Pang > > On Wed, Jun 11, 2025 at 6:55 AM Kuan-Wei Chiu <visitorckw@gmail.com> wrote: > > > > The existing min_heap_sift_down() uses the bottom-up heapify variant, > > which reduces the number of comparisons from ~2 * log2(n) to > > ~1 * log2(n) when all elements are distinct. However, in workloads > > where the heap contains many equal elements, this bottom-up variant > > can degenerate and perform up to 2 * log2(n) comparisons, while the > > traditional top-down variant needs only O(1) comparisons in such cases. > > > > To address this, introduce min_heap_sift_down_eqaware(), a top-down > > heapify variant optimized for scenarios with many equal elements. This > > variant avoids unnecessary comparisons and swaps when elements are > > already equal or in the correct position. > > > > Cc: stable@vger.kernel.org # 6.11+ > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com> > > --- > > include/linux/min_heap.h | 51 ++++++++++++++++++++++++++++++++++++++++ > > lib/min_heap.c | 7 ++++++ > > 2 files changed, 58 insertions(+) > > > > diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h > > index 79ddc0adbf2b..b0d603fe5379 100644 > > --- a/include/linux/min_heap.h > > +++ b/include/linux/min_heap.h > > @@ -292,6 +292,52 @@ void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_si > > __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > > __minheap_obj_size(_heap), _func, _args) > > > > +/* > > + * Sift the element at pos down the heap. > > + * > > + * Variants of heap functions using an equal-elements-aware sift_down. > > + * These may perform better when the heap contains many equal elements. > > + */ > > +static __always_inline > > +void __min_heap_sift_down_eqaware_inline(min_heap_char * heap, size_t pos, size_t elem_size, > > + const struct min_heap_callbacks *func, void *args) > > +{ > > + void *data = heap->data; > > + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; > > + /* pre-scale counters for performance */ > > + size_t a = pos * elem_size; > > + size_t b, c, smallest; > > + size_t n = heap->nr * elem_size; > > + > > + if (!swp) > > + swp = select_swap_func(data, elem_size); > > + > > + for (;;) { > > + b = 2 * a + elem_size; > > + c = b + elem_size; > > + smallest = a; > > + > > + if (b >= n) > > + break; > > + > > + if (func->less(data + b, data + smallest, args)) > > + smallest = b; > > + > > + if (c < n && func->less(data + c, data + smallest, args)) > > + smallest = c; > > + > > + if (smallest == a) > > + break; > > + > > + do_swap(data + a, data + smallest, elem_size, swp, args); > > + a = smallest; > > + } > > +} > > + > > +#define min_heap_sift_down_eqaware_inline(_heap, _pos, _func, _args) \ > > + __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > > + __minheap_obj_size(_heap), _func, _args) > > + > > /* Sift up ith element from the heap, O(log2(nr)). */ > > static __always_inline > > void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, > > @@ -433,6 +479,8 @@ void *__min_heap_peek(struct min_heap_char *heap); > > bool __min_heap_full(min_heap_char *heap); > > void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, > > const struct min_heap_callbacks *func, void *args); > > +void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size, > > + const struct min_heap_callbacks *func, void *args); > > void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, > > const struct min_heap_callbacks *func, void *args); > > void __min_heapify_all(min_heap_char *heap, size_t elem_size, > > @@ -455,6 +503,9 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, > > #define min_heap_sift_down(_heap, _pos, _func, _args) \ > > __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > > __minheap_obj_size(_heap), _func, _args) > > +#define min_heap_sift_down_eqaware(_heap, _pos, _func, _args) \ > > + __min_heap_sift_down_eqaware(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > > + __minheap_obj_size(_heap), _func, _args) > > #define min_heap_sift_up(_heap, _idx, _func, _args) \ > > __min_heap_sift_up(container_of(&(_heap)->nr, min_heap_char, nr), \ > > __minheap_obj_size(_heap), _idx, _func, _args) > > diff --git a/lib/min_heap.c b/lib/min_heap.c > > index 96f01a4c5fb6..2225f40d0d7a 100644 > > --- a/lib/min_heap.c > > +++ b/lib/min_heap.c > > @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, > > } > > EXPORT_SYMBOL(__min_heap_sift_down); > > > > +void __min_heap_sift_down_eqaware(min_heap_char *heap, size_t pos, size_t elem_size, > > + const struct min_heap_callbacks *func, void *args) > > +{ > > + __min_heap_sift_down_eqaware_inline(heap, pos, elem_size, func, args); > > +} > > +EXPORT_SYMBOL(__min_heap_sift_down_eqaware); > > + > > void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, > > const struct min_heap_callbacks *func, void *args) > > { > > -- > > 2.34.1 > >
© 2016 - 2025 Red Hat, Inc.