fs/fs-writeback.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
(was regression: mm: vmscan: - size XL irqoff time increase v6.10+)
Break off queueing of IO after we've been at it for a ms or so and a
preemption is due, to keep writeback latency impact at least reasonable.
The IO we're queueing under spinlock still has to be started under that
same lock.
wakeup_rt tracing caught this function spanning 66ms in a i7-4790 box.
With this patch applied on top of one to mitigate even worse IRQ holdoff
induced hits (78ms) by isolate_lru_folios(), the same trivial load that
leads to this and worse (osc kernel package build + bonnie):
T: 1 ( 6211) P:99 I:1500 C: 639971 Min: 1 Act: 7 Avg: 12 Max: 66696
resulted in this perfectly reasonable max:
T: 0 ( 6078) P:99 I:1000 C:1031230 Min: 1 Act: 7 Avg: 4 Max: 4449
Note: cyclictest -Smp99 is only the messenger. This is not an RT issue,
RT is fingering bad generic behavior.
Signed-off-by: Mike Galbraith <efault@gmx.de>
---
fs/fs-writeback.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -29,6 +29,7 @@
#include <linux/tracepoint.h>
#include <linux/device.h>
#include <linux/memcontrol.h>
+#include <linux/sched/clock.h>
#include "internal.h"
/*
@@ -1424,6 +1425,10 @@ static int move_expired_inodes(struct li
struct inode *inode;
int do_sb_sort = 0;
int moved = 0;
+#ifndef CONFIG_PREEMPT_RT
+ u64 then = local_clock();
+ int iter = 0;
+#endif
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
@@ -1439,6 +1444,19 @@ static int move_expired_inodes(struct li
if (sb && sb != inode->i_sb)
do_sb_sort = 1;
sb = inode->i_sb;
+#ifndef CONFIG_PREEMPT_RT
+ /*
+ * We're under ->list_lock here, and the IO being queued
+ * still has to be started. Stop queueing when we've been
+ * at it for a ms or so and a preemption is due, to keep
+ * latency impact reasonable.
+ */
+ if (iter++ < 100 || !need_resched())
+ continue;
+ if (local_clock() - then > NSEC_PER_MSEC)
+ break;
+ iter = 0;
+#endif
}
/* just one sb in list, splice to dispatch_queue and we're done */
(grr.. CC was supposed to be kvack) On Thu, 2024-10-31 at 11:21 +0100, Mike Galbraith wrote: > (was regression: mm: vmscan: - size XL irqoff time increase v6.10+) > > > Break off queueing of IO after we've been at it for a ms or so and a > preemption is due, to keep writeback latency impact at least reasonable. > The IO we're queueing under spinlock still has to be started under that > same lock. > > wakeup_rt tracing caught this function spanning 66ms in a i7-4790 box. > > With this patch applied on top of one to mitigate even worse IRQ holdoff > induced hits (78ms) by isolate_lru_folios(), the same trivial load that > leads to this and worse (osc kernel package build + bonnie): > T: 1 ( 6211) P:99 I:1500 C: 639971 Min: 1 Act: 7 Avg: 12 Max: 66696 > > resulted in this perfectly reasonable max: > T: 0 ( 6078) P:99 I:1000 C:1031230 Min: 1 Act: 7 Avg: 4 Max: 4449 > > Note: cyclictest -Smp99 is only the messenger. This is not an RT issue, > RT is fingering bad generic behavior. > > Signed-off-by: Mike Galbraith <efault@gmx.de> > --- > fs/fs-writeback.c | 18 ++++++++++++++++++ > 1 file changed, 18 insertions(+) > > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -29,6 +29,7 @@ > #include <linux/tracepoint.h> > #include <linux/device.h> > #include <linux/memcontrol.h> > +#include <linux/sched/clock.h> > #include "internal.h" > > /* > @@ -1424,6 +1425,10 @@ static int move_expired_inodes(struct li > struct inode *inode; > int do_sb_sort = 0; > int moved = 0; > +#ifndef CONFIG_PREEMPT_RT > + u64 then = local_clock(); > + int iter = 0; > +#endif > > while (!list_empty(delaying_queue)) { > inode = wb_inode(delaying_queue->prev); > @@ -1439,6 +1444,19 @@ static int move_expired_inodes(struct li > if (sb && sb != inode->i_sb) > do_sb_sort = 1; > sb = inode->i_sb; > +#ifndef CONFIG_PREEMPT_RT > + /* > + * We're under ->list_lock here, and the IO being queued > + * still has to be started. Stop queueing when we've been > + * at it for a ms or so and a preemption is due, to keep > + * latency impact reasonable. > + */ > + if (iter++ < 100 || !need_resched()) > + continue; > + if (local_clock() - then > NSEC_PER_MSEC) > + break; > + iter = 0; > +#endif > } > > /* just one sb in list, splice to dispatch_queue and we're done */ >
© 2016 - 2024 Red Hat, Inc.