mm/page-writeback: Consolidate wb_thresh bumping logic into __wb_calc_thresh

[PATCH] mm/page-writeback: Consolidate wb_thresh bumping logic into __wb_calc_thresh

Posted by Jim Zhao 1 year, 2 months ago

Address the feedback from "mm/page-writeback: raise wb_thresh to prevent
write blocking with strictlimit"(39ac99852fca98ca44d52716d792dfaf24981f53).
The wb_thresh bumping logic is scattered across wb_position_ratio,
__wb_calc_thresh, and wb_update_dirty_ratelimit. For consistency,
consolidate all wb_thresh bumping logic into __wb_calc_thresh.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jim Zhao <jimzhao.ai@gmail.com>
---
 mm/page-writeback.c | 53 ++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 37 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d213ead95675..8b13bcb42de3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -936,26 +936,25 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
 	wb_min_max_ratio(wb, &wb_min_ratio, &wb_max_ratio);
 
 	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
-	wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
-	if (wb_thresh > wb_max_thresh)
-		wb_thresh = wb_max_thresh;
 
 	/*
-	 * With strictlimit flag, the wb_thresh is treated as
-	 * a hard limit in balance_dirty_pages() and wb_position_ratio().
-	 * It's possible that wb_thresh is close to zero, not because
-	 * the device is slow, but because it has been inactive.
-	 * To prevent occasional writes from being blocked, we raise wb_thresh.
+	 * It's very possible that wb_thresh is close to 0 not because the
+	 * device is slow, but that it has remained inactive for long time.
+	 * Honour such devices a reasonable good (hopefully IO efficient)
+	 * threshold, so that the occasional writes won't be blocked and active
+	 * writes can rampup the threshold quickly.
 	 */
-	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
-		unsigned long limit = hard_dirty_limit(dom, dtc->thresh);
-		u64 wb_scale_thresh = 0;
-
-		if (limit > dtc->dirty)
-			wb_scale_thresh = (limit - dtc->dirty) / 100;
-		wb_thresh = max(wb_thresh, min(wb_scale_thresh, wb_max_thresh / 4));
+	if (thresh > dtc->dirty) {
+		if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT))
+			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100);
+		else
+			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8);
 	}
 
+	wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
+	if (wb_thresh > wb_max_thresh)
+		wb_thresh = wb_max_thresh;
+
 	return wb_thresh;
 }
 
@@ -963,6 +962,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
 {
 	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
 
+	domain_dirty_avail(&gdtc, true);
 	return __wb_calc_thresh(&gdtc, thresh);
 }
 
@@ -1139,12 +1139,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
 	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
 		long long wb_pos_ratio;
 
-		if (dtc->wb_dirty < 8) {
-			dtc->pos_ratio = min_t(long long, pos_ratio * 2,
-					   2 << RATELIMIT_CALC_SHIFT);
-			return;
-		}
-
 		if (dtc->wb_dirty >= wb_thresh)
 			return;
 
@@ -1215,14 +1209,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
 	 */
 	if (unlikely(wb_thresh > dtc->thresh))
 		wb_thresh = dtc->thresh;
-	/*
-	 * It's very possible that wb_thresh is close to 0 not because the
-	 * device is slow, but that it has remained inactive for long time.
-	 * Honour such devices a reasonable good (hopefully IO efficient)
-	 * threshold, so that the occasional writes won't be blocked and active
-	 * writes can rampup the threshold quickly.
-	 */
-	wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
 	/*
 	 * scale global setpoint to wb's:
 	 *	wb_setpoint = setpoint * wb_thresh / thresh
@@ -1478,17 +1464,10 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
 	 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
 	 * Hence, to calculate "step" properly, we have to use wb_dirty as
 	 * "dirty" and wb_setpoint as "setpoint".
-	 *
-	 * We rampup dirty_ratelimit forcibly if wb_dirty is low because
-	 * it's possible that wb_thresh is close to zero due to inactivity
-	 * of backing device.
 	 */
 	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
 		dirty = dtc->wb_dirty;
-		if (dtc->wb_dirty < 8)
-			setpoint = dtc->wb_dirty + 1;
-		else
-			setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
+		setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
 	}
 
 	if (dirty < setpoint) {
-- 
2.20.1

[PATCH] mm/page-writeback: Consolidate wb_thresh bumping logic into __wb_calc_thresh

Posted by Joshua Watt 4 months ago

From: Joshua Watt <jpewhacker@gmail.com>

This patch strangely breaks NFS 4 clients for me. The behavior is that a
client will start getting an I/O error which in turn is caused by the client
getting a NFS3ERR_BADSESSION when attempting to write data to the server. I
bisected the kernel from the latest master
(9029dc666353504ea7c1ebfdf09bc1aab40f6147) to this commit (log below). Also,
when I revert this commit on master the bug disappears.

The server is running kernel 5.4.161, and the client that exhibits the
behavior is running in qemux86, and has mounted the server with the options
rw,relatime,vers=4.1,rsize=1048576,wsize=1048576,namlen=255,soft,proto=tcp,port=52049,timeo=600,retrans=2,sec=null,clientaddr=172.16.6.90,local_lock=none,addr=172.16.6.0

The program that I wrote to reproduce this is pretty simple; it does a file
lock over NFS, then writes data to the file once per second. After about 32
seconds, it receives the I/O error, and this reproduced every time. I can
provide the sample program if necessary.

I also captured the NFS traffic both in the passing case and the failure case,
and can provide them if useful.

I did look at the two dumps and I'm not exactly sure what the difference is,
other than with this patch the client tries to write every 30 seconds (and
fails), where as without it attempts to write back every 5 seconds. I have no
idea why this patch would cause this problem.

Any help is appreciated. Thank you.

git bisect start
# status: waiting for both good and bad commits
# good: [4fe89d07dcc2804c8b562f6c7896a45643d34b2f] Linux 6.0
git bisect good 4fe89d07dcc2804c8b562f6c7896a45643d34b2f
# status: waiting for bad commit, 1 good commit known
# bad: [e5f0a698b34ed76002dc5cff3804a61c80233a7a] Linux 6.17
git bisect bad e5f0a698b34ed76002dc5cff3804a61c80233a7a
# good: [5f20e6ab1f65aaaaae248e6946d5cb6d039e7de8] Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
git bisect good 5f20e6ab1f65aaaaae248e6946d5cb6d039e7de8
# good: [28eb75e178d389d325f1666e422bc13bbbb9804c] Merge tag 'drm-next-2024-11-21' of https://gitlab.freedesktop.org/drm/kernel
git bisect good 28eb75e178d389d325f1666e422bc13bbbb9804c
# bad: [1afba39f9305fe4061a4e70baa6ebab9d41459da] Merge drm/drm-next into drm-misc-next
git bisect bad 1afba39f9305fe4061a4e70baa6ebab9d41459da
# bad: [350130afc22bd083ea18e17452dd3979c88b08ff] Merge tag 'ubifs-for-linus-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs
git bisect bad 350130afc22bd083ea18e17452dd3979c88b08ff
# good: [cf33d96f50903214226b379b3f10d1f262dae018] Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
git bisect good cf33d96f50903214226b379b3f10d1f262dae018
# good: [c9c0543b52d8cfe3a3b15d1e39ab9dbc91be6df4] Merge tag 'platform-drivers-x86-v6.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
git bisect good c9c0543b52d8cfe3a3b15d1e39ab9dbc91be6df4
# good: [40648d246fa4307ef11d185933cb0d79fc9ff46c] Merge tag 'trace-tools-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
git bisect good 40648d246fa4307ef11d185933cb0d79fc9ff46c
# bad: [125ca745467d4f87ae58e671a4a5714e024d2908] Merge tag 'staging-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
git bisect bad 125ca745467d4f87ae58e671a4a5714e024d2908
# bad: [d1366e74342e75555af2648a2964deb2d5c92200] mm/compaction: fix UBSAN shift-out-of-bounds warning
git bisect bad d1366e74342e75555af2648a2964deb2d5c92200
# good: [553e77529fb61e5520b839a0ce412a46cba996e0] mm: pgtable: introduce generic pagetable_dtor_free()
git bisect good 553e77529fb61e5520b839a0ce412a46cba996e0
# good: [65a1cf15802c7a571299df507b1b72ba89ef1da8] mm/zsmalloc: convert migrate_zspage() to use zpdesc
git bisect good 65a1cf15802c7a571299df507b1b72ba89ef1da8
# good: [136c5b40e0ad84f4b4a38584089cd565b97f799c] selftests/mm: use selftests framework to print test result
git bisect good 136c5b40e0ad84f4b4a38584089cd565b97f799c
# good: [fb7d3bc4149395c1ae99029c852eab6c28fc3c88] mm/filemap: drop streaming/uncached pages when writeback completes
git bisect good fb7d3bc4149395c1ae99029c852eab6c28fc3c88
# good: [7882d8fc8fe0c2b2a01f09e56edf82df6b3013fd] selftests/mm/mkdirty: fix memory leak in test_uffdio_copy()
git bisect good 7882d8fc8fe0c2b2a01f09e56edf82df6b3013fd
# bad: [6aeb991c54b281710591ce388158bc1739afc227] mm/page-writeback: consolidate wb_thresh bumping logic into __wb_calc_thresh
git bisect bad 6aeb991c54b281710591ce388158bc1739afc227
# good: [f752e677f85993c812fe9de7b4427f3f18408a11] mm: separate move/undo parts from migrate_pages_batch()
git bisect good f752e677f85993c812fe9de7b4427f3f18408a11
# good: [686fa9537d78d2f1bea42bf3891828510202be14] mm/page_alloc: remove the incorrect and misleading comment
git bisect good 686fa9537d78d2f1bea42bf3891828510202be14
# first bad commit: [6aeb991c54b281710591ce388158bc1739afc227] mm/page-writeback: consolidate wb_thresh bumping logic into __wb_calc_thresh