include/linux/sort.h | 11 +++++++++++ lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 5 deletions(-)
Andrew - if you're ok with this patch I'd like to get it in soon as a
bugfix, I've been getting quite a few reports on this one.
I don't much care for the naming though, thoughts there?
-- >8 --
bcachefs calls sort() during recovery to sort all keys it found in the
journal, and this may be very large - gigabytes on large machines.
This has been causing "task blocked" warnings, so needs a
cond_resched().
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/sort.h | 11 +++++++++++
lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/include/linux/sort.h b/include/linux/sort.h
index e163287ac6c1..8e5603b10941 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -13,4 +13,15 @@ void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func);
+/* Versions that periodically call cond_resched(): */
+
+void sort_r_nonatomic(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv);
+
+void sort_nonatomic(void *base, size_t num, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func);
+
#endif
diff --git a/lib/sort.c b/lib/sort.c
index 8e73dc55476b..60b51dac00ec 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -186,6 +186,8 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
return i / 2;
}
+#include <linux/sched.h>
+
/**
* sort_r - sort an array of elements
* @base: pointer to data to sort
@@ -212,10 +214,11 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
* O(n*n) worst-case behavior and extra memory requirements that make
* it less suitable for kernel use.
*/
-void sort_r(void *base, size_t num, size_t size,
- cmp_r_func_t cmp_func,
- swap_r_func_t swap_func,
- const void *priv)
+static void __sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv,
+ bool may_schedule)
{
/* pre-scale counters for performance */
size_t n = num * size, a = (num/2) * size;
@@ -286,6 +289,9 @@ void sort_r(void *base, size_t num, size_t size,
b = parent(b, lsbit, size);
do_swap(base + b, base + c, size, swap_func, priv);
}
+
+ if (may_schedule)
+ cond_resched();
}
n -= size;
@@ -293,8 +299,25 @@ void sort_r(void *base, size_t num, size_t size,
if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0)
do_swap(base, base + size, size, swap_func, priv);
}
+
+void sort_r(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ __sort_r(base, num, size, cmp_func, swap_func, priv, false);
+}
EXPORT_SYMBOL(sort_r);
+void sort_r_nonatomic(void *base, size_t num, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ __sort_r(base, num, size, cmp_func, swap_func, priv, true);
+}
+EXPORT_SYMBOL(sort_r_nonatomic);
+
void sort(void *base, size_t num, size_t size,
cmp_func_t cmp_func,
swap_func_t swap_func)
@@ -304,6 +327,19 @@ void sort(void *base, size_t num, size_t size,
.swap = swap_func,
};
- return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+ return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, false);
}
EXPORT_SYMBOL(sort);
+
+void sort_nonatomic(void *base, size_t num, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap = swap_func,
+ };
+
+ return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, true);
+}
+EXPORT_SYMBOL(sort_nonatomic);
--
2.49.0
On Wed, 26 Mar 2025 11:26:06 -0400 Kent Overstreet <kent.overstreet@linux.dev> wrote: > bcachefs calls sort() during recovery to sort all keys it found in the > journal, and this may be very large - gigabytes on large machines. > > This has been causing "task blocked" warnings, so needs a > cond_resched(). I assume this has been happening for a while, so a cc:stable is appropriate?
On Thu, Mar 27, 2025 at 11:09:23AM -0700, Andrew Morton wrote: > On Wed, 26 Mar 2025 11:26:06 -0400 Kent Overstreet <kent.overstreet@linux.dev> wrote: > > > bcachefs calls sort() during recovery to sort all keys it found in the > > journal, and this may be very large - gigabytes on large machines. > > > > This has been causing "task blocked" warnings, so needs a > > cond_resched(). > > I assume this has been happening for a while, so a cc:stable is appropriate? Well, right now I'm only doing backports for the most critical stuff, and this doesn't meet that bar. I'm planning on doing normal backports after I take the experimental label off, in at most another year. And I've told the stable team I don't want them touching fs/bcachefs/, I want to be doing those backports and running them through my normal QA process. We could for this, but perhaps better not to let the process get confused :)
On Wed, Mar 26, 2025 at 11:26:06AM -0400, Kent Overstreet wrote: > Andrew - if you're ok with this patch I'd like to get it in soon as a > bugfix, I've been getting quite a few reports on this one. > > I don't much care for the naming though, thoughts there? > > -- >8 -- > > bcachefs calls sort() during recovery to sort all keys it found in the > journal, and this may be very large - gigabytes on large machines. > > This has been causing "task blocked" warnings, so needs a > cond_resched(). > > Cc: Kuan-Wei Chiu <visitorckw@gmail.com> > Cc: Andrew Morton <akpm@linux-foundation.org> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > --- > include/linux/sort.h | 11 +++++++++++ > lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 52 insertions(+), 5 deletions(-) > I don't have strong opinions on this, but I recall that UBIFS had a similar issue with list_sort(), and they addressed it by calling cond_resched() within the compare function. Would that approach be simpler and more appropriate than introducing a new API in the library code? Regards, Kuan-Wei
On Wed, Mar 26, 2025 at 11:54:15PM +0800, Kuan-Wei Chiu wrote: > On Wed, Mar 26, 2025 at 11:26:06AM -0400, Kent Overstreet wrote: > > Andrew - if you're ok with this patch I'd like to get it in soon as a > > bugfix, I've been getting quite a few reports on this one. > > > > I don't much care for the naming though, thoughts there? > > > > -- >8 -- > > > > bcachefs calls sort() during recovery to sort all keys it found in the > > journal, and this may be very large - gigabytes on large machines. > > > > This has been causing "task blocked" warnings, so needs a > > cond_resched(). > > > > Cc: Kuan-Wei Chiu <visitorckw@gmail.com> > > Cc: Andrew Morton <akpm@linux-foundation.org> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > > --- > > include/linux/sort.h | 11 +++++++++++ > > lib/sort.c | 46 +++++++++++++++++++++++++++++++++++++++----- > > 2 files changed, 52 insertions(+), 5 deletions(-) > > > > I don't have strong opinions on this, but I recall that UBIFS had a > similar issue with list_sort(), and they addressed it by calling > cond_resched() within the compare function. Would that approach be > simpler and more appropriate than introducing a new API in the library > code? That'd be an option, but it would be heavier; sort() has nested loops so it has a more natural place to put it. And I'd say the nonatomic scheduling version should likely be the default, anyways; even if other users aren't hitting the 10 second warning, going 1 second without scheduling isn't good. Not going to audit all the existing callers, but we should probably provide it.
© 2016 - 2025 Red Hat, Inc.