mm/mempool.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
The real-time(rt) threads are delayed for 5 seconds in mempool_alloc,
which will seriously affect the timeliness of front-end applications
and the user experience lag issues.
The real-time(rt) threads should retry mempool allocation without
delay and in order to obtain the required memory resources as soon as
possible.
The following example shows that the real-time(rt) QoSCoreThread
prio=98 blocks 5 seconds in mempool_alloc, seriously affecting the
user experience.
Running process: system_server (pid 2245)
Running thread: QoSCoreThread 2529
State: Uninterruptible Sleep - Block I/O
Start: 12,859.616 ms
Systrace Time: 100,063.057104
Duration: 5,152.591 ms
On CPU:
Running instead: kswapd0
Args: {kernel callsite when blocked:: "mempool_alloc+0x130/0x1e8"}
QoSCoreThread-2529 ( 2245) [000] d..2. 100063.057104: sched_switch:
prev_comm=QoSCoreThread prev_pid=2529 prev_prio=000255001000098
prev_state=D ==> next_comm=kswapd0 next_pid=107
next_prio=000063310000120
[GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209675: sched_waking:
comm=QoSCoreThread pid=2529 prio=98 target_cpu=000
[GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209676:
sched_blocked_reason: pid=2529 iowait=1 caller=mempool_alloc+0x130/0x1e8
[GT]ColdPool#14-23937 ( 23854) [000] dNs3. 100068.209695: sched_wakeup:
comm=QoSCoreThread pid=2529 prio=98 target_cpu=000
[GT]ColdPool#14-23937 ( 23854) [000] d..2. 100068.209732: sched_switch:
prev_comm=[GT]ColdPool#14 prev_pid=23937 prev_prio=000003010342130
prev_state=R ==> next_comm=QoSCoreThread next_pid=2529
next_prio=000255131000098
Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
mm/mempool.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -18,6 +18,7 @@
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/writeback.h>
+#include <linux/sched/prio.h>
#include "slab.h"
#ifdef CONFIG_SLUB_DEBUG_ON
@@ -386,7 +387,7 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
void *element;
unsigned long flags;
wait_queue_entry_t wait;
- gfp_t gfp_temp;
+ gfp_t gfp_temp, gfp_src = gfp_mask;
VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
might_alloc(gfp_mask);
@@ -433,6 +434,16 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
return NULL;
}
+ /*
+ * We will try to direct reclaim cyclically, if the rt-thread
+ * is without __GFP_NORETRY.
+ */
+ if (!(gfp_src & __GFP_NORETRY) && current->prio < MAX_RT_PRIO) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_src;
+ goto repeat_alloc;
+ }
+
/* Let's wait for someone else to return an element to @pool */
init_wait(&wait);
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
--
2.48.1
On Tue, 17 Jun 2025 17:10:44 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote: > The real-time(rt) threads are delayed for 5 seconds in mempool_alloc, > which will seriously affect the timeliness of front-end applications > and the user experience lag issues. Oh God, do we really do that? Yes we do! I'm surprised this wasn't reported some time over the intervening 13 years. Yes, a hard-coded 5 second delay might be a slight problem in a realtime kernel. > The real-time(rt) threads should retry mempool allocation without > delay and in order to obtain the required memory resources as soon as > possible. Well, does this actually work in your testing? I guess it can improve the situation, some of the time. If it's a uniprocessor non-preemptible then perhaps interrupt-time writeback completion might save us, otherwise it's time to hit the power button. > The following example shows that the real-time(rt) QoSCoreThread > prio=98 blocks 5 seconds in mempool_alloc, seriously affecting the > user experience. > > Running process: system_server (pid 2245) > Running thread: QoSCoreThread 2529 > State: Uninterruptible Sleep - Block I/O > Start: 12,859.616 ms > Systrace Time: 100,063.057104 > Duration: 5,152.591 ms > On CPU: > Running instead: kswapd0 > Args: {kernel callsite when blocked:: "mempool_alloc+0x130/0x1e8"} > > QoSCoreThread-2529 ( 2245) [000] d..2. 100063.057104: sched_switch: > prev_comm=QoSCoreThread prev_pid=2529 prev_prio=000255001000098 > prev_state=D ==> next_comm=kswapd0 next_pid=107 > next_prio=000063310000120 > [GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209675: sched_waking: > comm=QoSCoreThread pid=2529 prio=98 target_cpu=000 > [GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209676: > sched_blocked_reason: pid=2529 iowait=1 caller=mempool_alloc+0x130/0x1e8 > [GT]ColdPool#14-23937 ( 23854) [000] dNs3. 100068.209695: sched_wakeup: > comm=QoSCoreThread pid=2529 prio=98 target_cpu=000 > [GT]ColdPool#14-23937 ( 23854) [000] d..2. 100068.209732: sched_switch: > prev_comm=[GT]ColdPool#14 prev_pid=23937 prev_prio=000003010342130 > prev_state=R ==> next_comm=QoSCoreThread next_pid=2529 > next_prio=000255131000098 Do you have a call trace for these stalls? I'm interested to see who is calling mempool_alloc() here. Perhaps a suitable solution is to teach the caller(s) to stop passing __GFP_DIRECT_RECLAIM and to handle the NULL return. > --- a/mm/mempool.c > +++ b/mm/mempool.c > @@ -18,6 +18,7 @@ > #include <linux/export.h> > #include <linux/mempool.h> > #include <linux/writeback.h> > +#include <linux/sched/prio.h> > #include "slab.h" > > #ifdef CONFIG_SLUB_DEBUG_ON > @@ -386,7 +387,7 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) > void *element; > unsigned long flags; > wait_queue_entry_t wait; > - gfp_t gfp_temp; > + gfp_t gfp_temp, gfp_src = gfp_mask; > > VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); > might_alloc(gfp_mask); > @@ -433,6 +434,16 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) > return NULL; > } > > + /* > + * We will try to direct reclaim cyclically, if the rt-thread "synchronously" > + * is without __GFP_NORETRY. > + */ > + if (!(gfp_src & __GFP_NORETRY) && current->prio < MAX_RT_PRIO) { > + spin_unlock_irqrestore(&pool->lock, flags); > + gfp_temp = gfp_src; > + goto repeat_alloc; > + } > + > /* Let's wait for someone else to return an element to @pool */ > init_wait(&wait); > prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
在 2025/6/18 8:25, Andrew Morton 写道: > On Tue, 17 Jun 2025 17:10:44 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote: > >> The real-time(rt) threads are delayed for 5 seconds in mempool_alloc, >> which will seriously affect the timeliness of front-end applications >> and the user experience lag issues. > Oh God, do we really do that? > > Yes we do! I'm surprised this wasn't reported some time over the > intervening 13 years. > > Yes, a hard-coded 5 second delay might be a slight problem in a > realtime kernel. > >> The real-time(rt) threads should retry mempool allocation without >> delay and in order to obtain the required memory resources as soon as >> possible. > Well, does this actually work in your testing? > > I guess it can improve the situation, some of the time. If it's a > uniprocessor non-preemptible then perhaps interrupt-time writeback > completion might save us, otherwise it's time to hit the power button. Hi Andrew Morton, It should be solved. We conducted the same test but did not reproduce the issue. I added trace_mm_mempool_alloc_start() in mempool_alloc_noprof(), void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; wait_queue_entry_t wait; gfp_t gfp_temp, gfp_src = gfp_mask; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); might_alloc(gfp_mask); + trace_mm_mempool_alloc_start(gfp_mask, current->prio); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ we can see that the gfps of the QoSCoreThread thread are as follows, QoSCoreThread-2421 [005] ..... 120.217517: mm_mempool_alloc_start: prio=98, gfp_flags=GFP_ATOMIC QoSCoreThread-2421 [005] ..... 120.217512: mm_mempool_alloc_start: prio=98, gfp_flags=GFP_NOFS QoSCoreThread-2421 [005] ..... 120.217513: mm_mempool_alloc_start: prio=98, gfp_flags=GFP_NOIO QoSCoreThread-2421 [005] ..... 120.217524: mm_mempool_alloc_start: prio=98, gfp_flags=GFP_KERNEL 1) GFP_ATOMIC can directly return NULL; 2) GFP_NOFS, GFP_NOIO, and GFP_KERNEL all contain __GFP_DIRECT_RECLAIM, and if the allocation fails, them will enter sleep to wait. So, regarding the situation in 2), my modification can solve the issue. >> The following example shows that the real-time(rt) QoSCoreThread >> prio=98 blocks 5 seconds in mempool_alloc, seriously affecting the >> user experience. >> >> Running process: system_server (pid 2245) >> Running thread: QoSCoreThread 2529 >> State: Uninterruptible Sleep - Block I/O >> Start: 12,859.616 ms >> Systrace Time: 100,063.057104 >> Duration: 5,152.591 ms >> On CPU: >> Running instead: kswapd0 >> Args: {kernel callsite when blocked:: "mempool_alloc+0x130/0x1e8"} >> >> QoSCoreThread-2529 ( 2245) [000] d..2. 100063.057104: sched_switch: >> prev_comm=QoSCoreThread prev_pid=2529 prev_prio=000255001000098 >> prev_state=D ==> next_comm=kswapd0 next_pid=107 >> next_prio=000063310000120 >> [GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209675: sched_waking: >> comm=QoSCoreThread pid=2529 prio=98 target_cpu=000 >> [GT]ColdPool#14-23937 ( 23854) [000] dNs2. 100068.209676: >> sched_blocked_reason: pid=2529 iowait=1 caller=mempool_alloc+0x130/0x1e8 >> [GT]ColdPool#14-23937 ( 23854) [000] dNs3. 100068.209695: sched_wakeup: >> comm=QoSCoreThread pid=2529 prio=98 target_cpu=000 >> [GT]ColdPool#14-23937 ( 23854) [000] d..2. 100068.209732: sched_switch: >> prev_comm=[GT]ColdPool#14 prev_pid=23937 prev_prio=000003010342130 >> prev_state=R ==> next_comm=QoSCoreThread next_pid=2529 >> next_prio=000255131000098 > Do you have a call trace for these stalls? I'm interested to see who > is calling mempool_alloc() here. Perhaps a suitable solution is to > teach the caller(s) to stop passing __GFP_DIRECT_RECLAIM and to handle > the NULL return. Sorry, we conducted the same test but did not reproduce the issue, so the probability is relatively low. I think there are multiple possibilities for this situation, and different rt-threads may sleep in different situations. So some improvement strategies should be implemented in mempool_alloc. >> --- a/mm/mempool.c >> +++ b/mm/mempool.c >> @@ -18,6 +18,7 @@ >> #include <linux/export.h> >> #include <linux/mempool.h> >> #include <linux/writeback.h> >> +#include <linux/sched/prio.h> >> #include "slab.h" >> >> #ifdef CONFIG_SLUB_DEBUG_ON >> @@ -386,7 +387,7 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) >> void *element; >> unsigned long flags; >> wait_queue_entry_t wait; >> - gfp_t gfp_temp; >> + gfp_t gfp_temp, gfp_src = gfp_mask; >> >> VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); >> might_alloc(gfp_mask); >> @@ -433,6 +434,16 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) >> return NULL; >> } >> >> + /* >> + * We will try to direct reclaim cyclically, if the rt-thread > "synchronously" > >> + * is without __GFP_NORETRY. >> + */ >> + if (!(gfp_src & __GFP_NORETRY) && current->prio < MAX_RT_PRIO) { >> + spin_unlock_irqrestore(&pool->lock, flags); >> + gfp_temp = gfp_src; >> + goto repeat_alloc; >> + } >> + >> /* Let's wait for someone else to return an element to @pool */ >> init_wait(&wait); >> prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); Thanks Zhiguo
© 2016 - 2025 Red Hat, Inc.