From: Tang Yizhou <yizhou.tang@shopee.com>
Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half
device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the
writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in
xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear
outdated.
In addition, Christoph mentioned that the xfs iomap process should be
similar to writeback, so xfs_max_map_length() was written following the
logic of writeback_chunk_size().
v2: Thanks for Christoph's advice. Resync with the writeback code.
Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com>
---
fs/fs-writeback.c | 5 ----
fs/xfs/xfs_iomap.c | 52 ++++++++++++++++++++++++---------------
include/linux/writeback.h | 5 ++++
3 files changed, 37 insertions(+), 25 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d8bec3c1bb1f..31c72e207e1b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -31,11 +31,6 @@
#include <linux/memcontrol.h>
#include "internal.h"
-/*
- * 4MB minimal write chunk size
- */
-#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
-
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1e11f48814c0..80f759fa9534 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,6 +4,8 @@
* Copyright (c) 2016-2018 Christoph Hellwig.
* All Rights Reserved.
*/
+#include <linux/writeback.h>
+
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
@@ -744,6 +746,34 @@ xfs_ilock_for_iomap(
return 0;
}
+/*
+ * We cap the maximum length we map to a sane size to keep the chunks
+ * of work done where somewhat symmetric with the work writeback does.
+ * This is a completely arbitrary number pulled out of thin air as a
+ * best guess for initial testing.
+ *
+ * Following the logic of writeback_chunk_size(), the length will be
+ * rounded to the nearest 4MB boundary.
+ *
+ * Note that the values needs to be less than 32-bits wide until the
+ * lower level functions are updated.
+ */
+static loff_t
+xfs_max_map_length(struct inode *inode, loff_t length)
+{
+ struct bdi_writeback *wb;
+ long pages;
+
+ spin_lock(&inode->i_lock);
+ wb = inode_to_wb(wb);
+ pages = min(wb->avg_write_bandwidth / 2,
+ global_wb_domain.dirty_limit / DIRTY_SCOPE);
+ spin_unlock(&inode->i_lock);
+ pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES);
+
+ return min_t(loff_t, length, pages * PAGE_SIZE);
+}
+
/*
* Check that the imap we are going to return to the caller spans the entire
* range that the caller requested for the IO.
@@ -878,16 +908,7 @@ xfs_direct_write_iomap_begin(
if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY))
goto out_unlock;
- /*
- * We cap the maximum length we map to a sane size to keep the chunks
- * of work done where somewhat symmetric with the work writeback does.
- * This is a completely arbitrary number pulled out of thin air as a
- * best guess for initial testing.
- *
- * Note that the values needs to be less than 32-bits wide until the
- * lower level functions are updated.
- */
- length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+ length = xfs_max_map_length(inode, length);
end_fsb = xfs_iomap_end_fsb(mp, offset, length);
if (offset + length > XFS_ISIZE(ip))
@@ -1096,16 +1117,7 @@ xfs_buffered_write_iomap_begin(
allocfork = XFS_COW_FORK;
end_fsb = imap.br_startoff + imap.br_blockcount;
} else {
- /*
- * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
- * pages to keep the chunks of work done where somewhat
- * symmetric with the work writeback does. This is a completely
- * arbitrary number pulled out of thin air.
- *
- * Note that the values needs to be less than 32-bits wide until
- * the lower level functions are updated.
- */
- count = min_t(loff_t, count, 1024 * PAGE_SIZE);
+ count = xfs_max_map_length(inode, count);
end_fsb = xfs_iomap_end_fsb(mp, offset, count);
if (xfs_is_always_cow_inode(ip))
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..657bc4dd22d0 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -17,6 +17,11 @@ struct bio;
DECLARE_PER_CPU(int, dirty_throttle_leaks);
+/*
+ * 4MB minimal write chunk size
+ */
+#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
+
/*
* The global dirty threshold is normally equal to the global dirty limit,
* except when the system suddenly allocates a lot of anonymous memory and
--
2.25.1
Hi Tang, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on brauner-vfs/vfs.all xfs-linux/for-next linus/master v6.12-rc2 next-20241008] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Tang-Yizhou/mm-page-writeback-c-Rename-BANDWIDTH_INTERVAL-to-BW_DIRTYLIMIT_INTERVAL/20241006-225445 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20241006152849.247152-4-yizhou.tang%40shopee.com patch subject: [PATCH v2 3/3] xfs: Let the max iomap length be consistent with the writeback code config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20241009/202410091647.eyo14ayt-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 14.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241009/202410091647.eyo14ayt-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410091647.eyo14ayt-lkp@intel.com/ All errors (new ones prefixed by >>): fs/xfs/xfs_iomap.c: In function 'xfs_max_map_length': fs/xfs/xfs_iomap.c:768:14: error: implicit declaration of function 'inode_to_wb' [-Wimplicit-function-declaration] 768 | wb = inode_to_wb(wb); | ^~~~~~~~~~~ >> fs/xfs/xfs_iomap.c:768:12: error: assignment to 'struct bdi_writeback *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 768 | wb = inode_to_wb(wb); | ^ Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for GET_FREE_REGION Depends on [n]: SPARSEMEM [=n] Selected by [m]: - RESOURCE_KUNIT_TEST [=m] && RUNTIME_TESTING_MENU [=y] && KUNIT [=m] vim +768 fs/xfs/xfs_iomap.c 748 749 /* 750 * We cap the maximum length we map to a sane size to keep the chunks 751 * of work done where somewhat symmetric with the work writeback does. 752 * This is a completely arbitrary number pulled out of thin air as a 753 * best guess for initial testing. 754 * 755 * Following the logic of writeback_chunk_size(), the length will be 756 * rounded to the nearest 4MB boundary. 757 * 758 * Note that the values needs to be less than 32-bits wide until the 759 * lower level functions are updated. 760 */ 761 static loff_t 762 xfs_max_map_length(struct inode *inode, loff_t length) 763 { 764 struct bdi_writeback *wb; 765 long pages; 766 767 spin_lock(&inode->i_lock); > 768 wb = inode_to_wb(wb); 769 pages = min(wb->avg_write_bandwidth / 2, 770 global_wb_domain.dirty_limit / DIRTY_SCOPE); 771 spin_unlock(&inode->i_lock); 772 pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); 773 774 return min_t(loff_t, length, pages * PAGE_SIZE); 775 } 776 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Tang, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on brauner-vfs/vfs.all xfs-linux/for-next linus/master v6.12-rc2 next-20241008] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Tang-Yizhou/mm-page-writeback-c-Rename-BANDWIDTH_INTERVAL-to-BW_DIRTYLIMIT_INTERVAL/20241006-225445 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20241006152849.247152-4-yizhou.tang%40shopee.com patch subject: [PATCH v2 3/3] xfs: Let the max iomap length be consistent with the writeback code config: x86_64-kexec (https://download.01.org/0day-ci/archive/20241009/202410091559.uWiy0Oj0-lkp@intel.com/config) compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241009/202410091559.uWiy0Oj0-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410091559.uWiy0Oj0-lkp@intel.com/ All errors (new ones prefixed by >>): >> fs/xfs/xfs_iomap.c:768:7: error: call to undeclared function 'inode_to_wb'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 768 | wb = inode_to_wb(wb); | ^ >> fs/xfs/xfs_iomap.c:768:5: error: incompatible integer to pointer conversion assigning to 'struct bdi_writeback *' from 'int' [-Wint-conversion] 768 | wb = inode_to_wb(wb); | ^ ~~~~~~~~~~~~~~~ 2 errors generated. vim +/inode_to_wb +768 fs/xfs/xfs_iomap.c 748 749 /* 750 * We cap the maximum length we map to a sane size to keep the chunks 751 * of work done where somewhat symmetric with the work writeback does. 752 * This is a completely arbitrary number pulled out of thin air as a 753 * best guess for initial testing. 754 * 755 * Following the logic of writeback_chunk_size(), the length will be 756 * rounded to the nearest 4MB boundary. 757 * 758 * Note that the values needs to be less than 32-bits wide until the 759 * lower level functions are updated. 760 */ 761 static loff_t 762 xfs_max_map_length(struct inode *inode, loff_t length) 763 { 764 struct bdi_writeback *wb; 765 long pages; 766 767 spin_lock(&inode->i_lock); > 768 wb = inode_to_wb(wb); 769 pages = min(wb->avg_write_bandwidth / 2, 770 global_wb_domain.dirty_limit / DIRTY_SCOPE); 771 spin_unlock(&inode->i_lock); 772 pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); 773 774 return min_t(loff_t, length, pages * PAGE_SIZE); 775 } 776 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Sun, Oct 06, 2024 at 11:28:49PM +0800, Tang Yizhou wrote: > From: Tang Yizhou <yizhou.tang@shopee.com> > > Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half > device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the > writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in > xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear > outdated. > > In addition, Christoph mentioned that the xfs iomap process should be > similar to writeback, so xfs_max_map_length() was written following the > logic of writeback_chunk_size(). > > v2: Thanks for Christoph's advice. Resync with the writeback code. > > Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com> > --- > fs/fs-writeback.c | 5 ---- > fs/xfs/xfs_iomap.c | 52 ++++++++++++++++++++++++--------------- > include/linux/writeback.h | 5 ++++ > 3 files changed, 37 insertions(+), 25 deletions(-) > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index d8bec3c1bb1f..31c72e207e1b 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -31,11 +31,6 @@ > #include <linux/memcontrol.h> > #include "internal.h" > > -/* > - * 4MB minimal write chunk size > - */ > -#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) > - > /* > * Passed into wb_writeback(), essentially a subset of writeback_control > */ > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 1e11f48814c0..80f759fa9534 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -4,6 +4,8 @@ > * Copyright (c) 2016-2018 Christoph Hellwig. > * All Rights Reserved. > */ > +#include <linux/writeback.h> > + > #include "xfs.h" > #include "xfs_fs.h" > #include "xfs_shared.h" > @@ -744,6 +746,34 @@ xfs_ilock_for_iomap( > return 0; > } > > +/* > + * We cap the maximum length we map to a sane size to keep the chunks > + * of work done where somewhat symmetric with the work writeback does. > + * This is a completely arbitrary number pulled out of thin air as a > + * best guess for initial testing. > + * > + * Following the logic of writeback_chunk_size(), the length will be > + * rounded to the nearest 4MB boundary. > + * > + * Note that the values needs to be less than 32-bits wide until the > + * lower level functions are updated. > + */ > +static loff_t > +xfs_max_map_length(struct inode *inode, loff_t length) > +{ > + struct bdi_writeback *wb; > + long pages; > + > + spin_lock(&inode->i_lock); > + wb = inode_to_wb(wb); > + pages = min(wb->avg_write_bandwidth / 2, > + global_wb_domain.dirty_limit / DIRTY_SCOPE); > + spin_unlock(&inode->i_lock); > + pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); > + > + return min_t(loff_t, length, pages * PAGE_SIZE); > +} I think this map size limiting is completely unnecessary for buffered writeback - buffered writes are throttled against writeback by balance_dirty_pages(), not by extent allocation size. The size of the delayed allocation or the overwrite map is largely irrelevant - we're going to map the entire range during a write, do it just doesn't matter what size the mapping is... > /* > * Check that the imap we are going to return to the caller spans the entire > * range that the caller requested for the IO. > @@ -878,16 +908,7 @@ xfs_direct_write_iomap_begin( > if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) > goto out_unlock; > > - /* > - * We cap the maximum length we map to a sane size to keep the chunks > - * of work done where somewhat symmetric with the work writeback does. > - * This is a completely arbitrary number pulled out of thin air as a > - * best guess for initial testing. > - * > - * Note that the values needs to be less than 32-bits wide until the > - * lower level functions are updated. > - */ > - length = min_t(loff_t, length, 1024 * PAGE_SIZE); > + length = xfs_max_map_length(inode, length); > end_fsb = xfs_iomap_end_fsb(mp, offset, length); And I'd just remove this altogether from the direct IO path. The DIO code will chain as many bios as it takes to issue Io over the entire mapping that is returned. Given that buffered writeback has done arbitrarily large writeback for quite some time now, I don't think there is any need to limit the bio chain lengths in the DIO path like this anymore, either. i.e. I'd be looking at removing the "arbitrary size limit" code, not making it more complex. -Dave. -- Dave Chinner david@fromorbit.com
On Tue, Oct 08, 2024 at 08:36:02AM +1100, Dave Chinner wrote: > I think this map size limiting is completely unnecessary for > buffered writeback - buffered writes are throttled against writeback > by balance_dirty_pages(), not by extent allocation size. The size of > the delayed allocation or the overwrite map is largely irrelevant - > we're going to map the entire range during a write, do it just > doesn't matter what size the mapping is... Yes. This goes back all the way to your original iomap prototype, but even back then balance_dirty_pages should have done all the work.
On Sun 06-10-24 23:28:49, Tang Yizhou wrote: > From: Tang Yizhou <yizhou.tang@shopee.com> > > Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half > device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the > writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in > xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear > outdated. > > In addition, Christoph mentioned that the xfs iomap process should be > similar to writeback, so xfs_max_map_length() was written following the > logic of writeback_chunk_size(). Well, I'd defer to XFS maintainers here but at least to me it does not make a huge amount of sense to scale mapping size with the device writeback throughput. E.g. if the device writeback throughput is low, it does not mean that it is good to perform current write(2) in small chunks... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR
On Mon, Oct 07, 2024 at 06:36:09PM +0200, Jan Kara wrote: > On Sun 06-10-24 23:28:49, Tang Yizhou wrote: > > From: Tang Yizhou <yizhou.tang@shopee.com> > > > > Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half > > device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the > > writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in > > xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear > > outdated. > > > > In addition, Christoph mentioned that the xfs iomap process should be > > similar to writeback, so xfs_max_map_length() was written following the > > logic of writeback_chunk_size(). > > Well, I'd defer to XFS maintainers here but at least to me it does not make > a huge amount of sense to scale mapping size with the device writeback > throughput. E.g. if the device writeback throughput is low, it does not > mean that it is good to perform current write(2) in small chunks... Yeah, I was wondering if it still makes sense to throttle incoming writes given that iomap will just call back for more mappings anyway. --D > Honza > > -- > Jan Kara <jack@suse.com> > SUSE Labs, CR >
On Sun, Oct 06, 2024 at 11:28:49PM +0800, Tang Yizhou wrote: > From: Tang Yizhou <yizhou.tang@shopee.com> > > Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half > device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the > writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in > xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear > outdated. > > In addition, Christoph mentioned that the xfs iomap process should be > similar to writeback, so xfs_max_map_length() was written following the > logic of writeback_chunk_size(). > > v2: Thanks for Christoph's advice. Resync with the writeback code. > > Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com> > --- > fs/fs-writeback.c | 5 ---- > fs/xfs/xfs_iomap.c | 52 ++++++++++++++++++++++++--------------- > include/linux/writeback.h | 5 ++++ > 3 files changed, 37 insertions(+), 25 deletions(-) > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index d8bec3c1bb1f..31c72e207e1b 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -31,11 +31,6 @@ > #include <linux/memcontrol.h> > #include "internal.h" > > -/* > - * 4MB minimal write chunk size > - */ > -#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) > - > /* > * Passed into wb_writeback(), essentially a subset of writeback_control > */ > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 1e11f48814c0..80f759fa9534 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -4,6 +4,8 @@ > * Copyright (c) 2016-2018 Christoph Hellwig. > * All Rights Reserved. > */ > +#include <linux/writeback.h> > + > #include "xfs.h" > #include "xfs_fs.h" > #include "xfs_shared.h" > @@ -744,6 +746,34 @@ xfs_ilock_for_iomap( > return 0; > } > > +/* > + * We cap the maximum length we map to a sane size to keep the chunks > + * of work done where somewhat symmetric with the work writeback does. > + * This is a completely arbitrary number pulled out of thin air as a > + * best guess for initial testing. > + * > + * Following the logic of writeback_chunk_size(), the length will be > + * rounded to the nearest 4MB boundary. > + * > + * Note that the values needs to be less than 32-bits wide until the > + * lower level functions are updated. > + */ > +static loff_t > +xfs_max_map_length(struct inode *inode, loff_t length) > +{ > + struct bdi_writeback *wb; > + long pages; > + > + spin_lock(&inode->i_lock); Why's it necessary to hold a spinlock? AFAICT writeback_chunk_size doesn't hold it. > + wb = inode_to_wb(wb); Hmm, it looks like you're trying to cap writes based on storage device bandwidth instead of a static limit. That could be nifty, but does this work for a file on the realtime device? Or any device that isn't the super_block s_bdi? > + pages = min(wb->avg_write_bandwidth / 2, > + global_wb_domain.dirty_limit / DIRTY_SCOPE); > + spin_unlock(&inode->i_lock); > + pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); > + > + return min_t(loff_t, length, pages * PAGE_SIZE); > +} There's nothing in here that's xfs-specific, shouldn't this be a fs-writeback.c function for any other filesystem that might want to cap the size of a write? --D > + > /* > * Check that the imap we are going to return to the caller spans the entire > * range that the caller requested for the IO. > @@ -878,16 +908,7 @@ xfs_direct_write_iomap_begin( > if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) > goto out_unlock; > > - /* > - * We cap the maximum length we map to a sane size to keep the chunks > - * of work done where somewhat symmetric with the work writeback does. > - * This is a completely arbitrary number pulled out of thin air as a > - * best guess for initial testing. > - * > - * Note that the values needs to be less than 32-bits wide until the > - * lower level functions are updated. > - */ > - length = min_t(loff_t, length, 1024 * PAGE_SIZE); > + length = xfs_max_map_length(inode, length); > end_fsb = xfs_iomap_end_fsb(mp, offset, length); > > if (offset + length > XFS_ISIZE(ip)) > @@ -1096,16 +1117,7 @@ xfs_buffered_write_iomap_begin( > allocfork = XFS_COW_FORK; > end_fsb = imap.br_startoff + imap.br_blockcount; > } else { > - /* > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > - * pages to keep the chunks of work done where somewhat > - * symmetric with the work writeback does. This is a completely > - * arbitrary number pulled out of thin air. > - * > - * Note that the values needs to be less than 32-bits wide until > - * the lower level functions are updated. > - */ > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > + count = xfs_max_map_length(inode, count); > end_fsb = xfs_iomap_end_fsb(mp, offset, count); > > if (xfs_is_always_cow_inode(ip)) > diff --git a/include/linux/writeback.h b/include/linux/writeback.h > index d6db822e4bb3..657bc4dd22d0 100644 > --- a/include/linux/writeback.h > +++ b/include/linux/writeback.h > @@ -17,6 +17,11 @@ struct bio; > > DECLARE_PER_CPU(int, dirty_throttle_leaks); > > +/* > + * 4MB minimal write chunk size > + */ > +#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) > + > /* > * The global dirty threshold is normally equal to the global dirty limit, > * except when the system suddenly allocates a lot of anonymous memory and > -- > 2.25.1 > >
On Mon, Oct 7, 2024 at 12:30 AM Darrick J. Wong <djwong@kernel.org> wrote: > > On Sun, Oct 06, 2024 at 11:28:49PM +0800, Tang Yizhou wrote: > > From: Tang Yizhou <yizhou.tang@shopee.com> > > > > Since commit 1a12d8bd7b29 ("writeback: scale IO chunk size up to half > > device bandwidth"), macro MAX_WRITEBACK_PAGES has been removed from the > > writeback path. Therefore, the MAX_WRITEBACK_PAGES comments in > > xfs_direct_write_iomap_begin() and xfs_buffered_write_iomap_begin() appear > > outdated. > > > > In addition, Christoph mentioned that the xfs iomap process should be > > similar to writeback, so xfs_max_map_length() was written following the > > logic of writeback_chunk_size(). > > > > v2: Thanks for Christoph's advice. Resync with the writeback code. > > > > Signed-off-by: Tang Yizhou <yizhou.tang@shopee.com> > > --- > > fs/fs-writeback.c | 5 ---- > > fs/xfs/xfs_iomap.c | 52 ++++++++++++++++++++++++--------------- > > include/linux/writeback.h | 5 ++++ > > 3 files changed, 37 insertions(+), 25 deletions(-) > > > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > > index d8bec3c1bb1f..31c72e207e1b 100644 > > --- a/fs/fs-writeback.c > > +++ b/fs/fs-writeback.c > > @@ -31,11 +31,6 @@ > > #include <linux/memcontrol.h> > > #include "internal.h" > > > > -/* > > - * 4MB minimal write chunk size > > - */ > > -#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) > > - > > /* > > * Passed into wb_writeback(), essentially a subset of writeback_control > > */ > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > > index 1e11f48814c0..80f759fa9534 100644 > > --- a/fs/xfs/xfs_iomap.c > > +++ b/fs/xfs/xfs_iomap.c > > @@ -4,6 +4,8 @@ > > * Copyright (c) 2016-2018 Christoph Hellwig. > > * All Rights Reserved. > > */ > > +#include <linux/writeback.h> > > + > > #include "xfs.h" > > #include "xfs_fs.h" > > #include "xfs_shared.h" > > @@ -744,6 +746,34 @@ xfs_ilock_for_iomap( > > return 0; > > } > > > > +/* > > + * We cap the maximum length we map to a sane size to keep the chunks > > + * of work done where somewhat symmetric with the work writeback does. > > + * This is a completely arbitrary number pulled out of thin air as a > > + * best guess for initial testing. > > + * > > + * Following the logic of writeback_chunk_size(), the length will be > > + * rounded to the nearest 4MB boundary. > > + * > > + * Note that the values needs to be less than 32-bits wide until the > > + * lower level functions are updated. > > + */ > > +static loff_t > > +xfs_max_map_length(struct inode *inode, loff_t length) > > +{ > > + struct bdi_writeback *wb; > > + long pages; > > + > > + spin_lock(&inode->i_lock); > > Why's it necessary to hold a spinlock? AFAICT writeback_chunk_size > doesn't hold it. > Since the caller of writeback_chunk_size(), writeback_sb_inodes(), already holds wb->list_lock. According to the function comments of inode_to_wb(), holding either inode->i_lock or the associated wb's list_lock is acceptable. > > + wb = inode_to_wb(wb); > > Hmm, it looks like you're trying to cap writes based on storage device > bandwidth instead of a static limit. That could be nifty, but does this > work for a file on the realtime device? Or any device that isn't the > super_block s_bdi? > I'm not very sure. Considering that the implementation of writeback_chunk_size() in the writeback path has remained unchanged for many years, I believe its logic works well in various scenarios. > > + pages = min(wb->avg_write_bandwidth / 2, > > + global_wb_domain.dirty_limit / DIRTY_SCOPE); > > + spin_unlock(&inode->i_lock); > > + pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); > > + > > + return min_t(loff_t, length, pages * PAGE_SIZE); > > +} > > There's nothing in here that's xfs-specific, shouldn't this be a > fs-writeback.c function for any other filesystem that might want to cap > the size of a write? > These logics are indeed not xfs-specific. However, I checked the related implementations in ext4 and btrfs, and it seems that these file systems do not require similar logic to cap the size. If we move the implementation of this function to fs-writeback.c, the only user would still be xfs. Additionally, there are some differences in the implementation details between this function and writeback_chunk_size(), so it doesn't seem convenient to reuse the code. Yi > --D > > > + > > /* > > * Check that the imap we are going to return to the caller spans the entire > > * range that the caller requested for the IO. > > @@ -878,16 +908,7 @@ xfs_direct_write_iomap_begin( > > if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) > > goto out_unlock; > > > > - /* > > - * We cap the maximum length we map to a sane size to keep the chunks > > - * of work done where somewhat symmetric with the work writeback does. > > - * This is a completely arbitrary number pulled out of thin air as a > > - * best guess for initial testing. > > - * > > - * Note that the values needs to be less than 32-bits wide until the > > - * lower level functions are updated. > > - */ > > - length = min_t(loff_t, length, 1024 * PAGE_SIZE); > > + length = xfs_max_map_length(inode, length); > > end_fsb = xfs_iomap_end_fsb(mp, offset, length); > > > > if (offset + length > XFS_ISIZE(ip)) > > @@ -1096,16 +1117,7 @@ xfs_buffered_write_iomap_begin( > > allocfork = XFS_COW_FORK; > > end_fsb = imap.br_startoff + imap.br_blockcount; > > } else { > > - /* > > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > > - * pages to keep the chunks of work done where somewhat > > - * symmetric with the work writeback does. This is a completely > > - * arbitrary number pulled out of thin air. > > - * > > - * Note that the values needs to be less than 32-bits wide until > > - * the lower level functions are updated. > > - */ > > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > > + count = xfs_max_map_length(inode, count); > > end_fsb = xfs_iomap_end_fsb(mp, offset, count); > > > > if (xfs_is_always_cow_inode(ip)) > > diff --git a/include/linux/writeback.h b/include/linux/writeback.h > > index d6db822e4bb3..657bc4dd22d0 100644 > > --- a/include/linux/writeback.h > > +++ b/include/linux/writeback.h > > @@ -17,6 +17,11 @@ struct bio; > > > > DECLARE_PER_CPU(int, dirty_throttle_leaks); > > > > +/* > > + * 4MB minimal write chunk size > > + */ > > +#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) > > + > > /* > > * The global dirty threshold is normally equal to the global dirty limit, > > * except when the system suddenly allocates a lot of anonymous memory and > > -- > > 2.25.1 > > > >
© 2016 - 2024 Red Hat, Inc.