fs/iomap/direct-io.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-)
From: Pankaj Raghav <p.raghav@samsung.com>
iomap_dio_zero() uses a custom allocated memory of zeroes for padding
zeroes. This was a temporary solution until there was a way to request a
zero folio that was greater than the PAGE_SIZE.
Use largest_zero_folio() function instead of using the custom allocated
memory of zeroes. There is no guarantee from largest_zero_folio()
function that it will always return a PMD sized folio. Adapt the code so
that it can also work if largest_zero_folio() returns a ZERO_PAGE.
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
fs/iomap/direct-io.c | 38 +++++++++++++++-----------------------
1 file changed, 15 insertions(+), 23 deletions(-)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b84f6af2eb4c..a7a281ea3e50 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -24,13 +24,6 @@
#define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_DIRTY (1U << 31)
-/*
- * Used for sub block zeroing in iomap_dio_zero()
- */
-#define IOMAP_ZERO_PAGE_SIZE (SZ_64K)
-#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE))
-static struct page *zero_page;
-
struct iomap_dio {
struct kiocb *iocb;
const struct iomap_dio_ops *dops;
@@ -285,24 +278,35 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
{
struct inode *inode = file_inode(dio->iocb->ki_filp);
struct bio *bio;
+ struct folio *zero_folio = largest_zero_folio();
+ int nr_vecs = max(1, i_blocksize(inode) / folio_size(zero_folio));
if (!len)
return 0;
+
/*
- * Max block size supported is 64k
+ * This limit shall never be reached as most filesystems have a
+ * maximum blocksize of 64k.
*/
- if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE))
+ if (WARN_ON_ONCE(nr_vecs > BIO_MAX_VECS))
return -EINVAL;
- bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
+ bio = iomap_dio_alloc_bio(iter, dio, nr_vecs,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- __bio_add_page(bio, zero_page, len, 0);
+ while (len > 0) {
+ unsigned int io_len = min(len, folio_size(zero_folio));
+
+ bio_add_folio_nofail(bio, zero_folio, io_len, 0);
+ len -= io_len;
+ }
iomap_dio_submit_bio(iter, dio, bio, pos);
+
return 0;
}
@@ -822,15 +826,3 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);
-
-static int __init iomap_dio_init(void)
-{
- zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
- IOMAP_ZERO_PAGE_ORDER);
-
- if (!zero_page)
- return -ENOMEM;
-
- return 0;
-}
-fs_initcall(iomap_dio_init);
base-commit: 931e46dcbc7e6035a90e9c4a27a84b660e083f0a
--
2.50.1
On Thu, 14 Aug 2025 16:21:37 +0200, Pankaj Raghav (Samsung) wrote: > iomap_dio_zero() uses a custom allocated memory of zeroes for padding > zeroes. This was a temporary solution until there was a way to request a > zero folio that was greater than the PAGE_SIZE. > > Use largest_zero_folio() function instead of using the custom allocated > memory of zeroes. There is no guarantee from largest_zero_folio() > function that it will always return a PMD sized folio. Adapt the code so > that it can also work if largest_zero_folio() returns a ZERO_PAGE. > > [...] Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. Patches in the vfs-6.18.iomap branch should appear in linux-next soon. Please report any outstanding bugs that were missed during review in a new review to the original patch series allowing us to drop it. It's encouraged to provide Acked-bys and Reviewed-bys even though the patch has now been applied. If possible patch trailers will be updated. Note that commit hashes shown below are subject to change due to rebase, trailer updates or similar. If in doubt, please check the listed branch. tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git branch: vfs-6.18.iomap [1/1] iomap: use largest_zero_folio() in iomap_dio_zero() https://git.kernel.org/vfs/vfs/c/5589673e8d8d
On Fri, Aug 15, 2025 at 04:02:58PM +0200, Christian Brauner wrote: > On Thu, 14 Aug 2025 16:21:37 +0200, Pankaj Raghav (Samsung) wrote: > > iomap_dio_zero() uses a custom allocated memory of zeroes for padding > > zeroes. This was a temporary solution until there was a way to request a > > zero folio that was greater than the PAGE_SIZE. > > > > Use largest_zero_folio() function instead of using the custom allocated > > memory of zeroes. There is no guarantee from largest_zero_folio() > > function that it will always return a PMD sized folio. Adapt the code so > > that it can also work if largest_zero_folio() returns a ZERO_PAGE. > > > > [...] > > Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. > Patches in the vfs-6.18.iomap branch should appear in linux-next soon. Hmm, AFAIK largest_zero_folio just showed up in mm.git a few days ago. Wouldn't it be better to queue up this change there?
Em 18/08/2025 01:41, Christoph Hellwig escreveu: > On Fri, Aug 15, 2025 at 04:02:58PM +0200, Christian Brauner wrote: >> On Thu, 14 Aug 2025 16:21:37 +0200, Pankaj Raghav (Samsung) wrote: >>> iomap_dio_zero() uses a custom allocated memory of zeroes for padding >>> zeroes. This was a temporary solution until there was a way to request a >>> zero folio that was greater than the PAGE_SIZE. >>> >>> Use largest_zero_folio() function instead of using the custom allocated >>> memory of zeroes. There is no guarantee from largest_zero_folio() >>> function that it will always return a PMD sized folio. Adapt the code so >>> that it can also work if largest_zero_folio() returns a ZERO_PAGE. >>> >>> [...] >> >> Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. >> Patches in the vfs-6.18.iomap branch should appear in linux-next soon. > > Hmm, AFAIK largest_zero_folio just showed up in mm.git a few days ago. > Wouldn't it be better to queue up this change there? > > Indeed, compiling vfs/vfs.all as of today fails with: fs/iomap/direct-io.c:281:36: error: implicit declaration of function ‘largest_zero_folio’; did you mean ‘is_zero_folio’? [-Wimplicit-function-declaration] Reverting "iomap: use largest_zero_folio() in iomap_dio_zero()" fixes the compilation.
On 18/08/2025 16:12, André Almeida wrote: > Em 18/08/2025 01:41, Christoph Hellwig escreveu: >> On Fri, Aug 15, 2025 at 04:02:58PM +0200, Christian Brauner wrote: >>> On Thu, 14 Aug 2025 16:21:37 +0200, Pankaj Raghav (Samsung) wrote: >>>> iomap_dio_zero() uses a custom allocated memory of zeroes for padding >>>> zeroes. This was a temporary solution until there was a way to >>>> request a >>>> zero folio that was greater than the PAGE_SIZE. >>>> >>>> Use largest_zero_folio() function instead of using the custom allocated >>>> memory of zeroes. There is no guarantee from largest_zero_folio() >>>> function that it will always return a PMD sized folio. Adapt the >>>> code so >>>> that it can also work if largest_zero_folio() returns a ZERO_PAGE. >>>> >>>> [...] >>> >>> Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. >>> Patches in the vfs-6.18.iomap branch should appear in linux-next soon. >> >> Hmm, AFAIK largest_zero_folio just showed up in mm.git a few days ago. >> Wouldn't it be better to queue up this change there? >> >> > > Indeed, compiling vfs/vfs.all as of today fails with: > > fs/iomap/direct-io.c:281:36: error: implicit declaration of function > ‘largest_zero_folio’; did you mean ‘is_zero_folio’? [-Wimplicit- > function-declaration] > > Reverting "iomap: use largest_zero_folio() in iomap_dio_zero()" fixes > the compilation. > I also got some reports from Stephen in linux-next. As Christoph suggested, maybe we drop the patches from Christian's tree and queue it up via Andrew's tree -- Pankaj
On Mon, 18 Aug 2025 16:35:04 +0200 Pankaj Raghav <p.raghav@samsung.com> wrote: > >>> Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. > >>> Patches in the vfs-6.18.iomap branch should appear in linux-next soon. > >> > >> Hmm, AFAIK largest_zero_folio just showed up in mm.git a few days ago. > >> Wouldn't it be better to queue up this change there? > >> > >> > > > > Indeed, compiling vfs/vfs.all as of today fails with: > > > > fs/iomap/direct-io.c:281:36: error: implicit declaration of function > > ‘largest_zero_folio’; did you mean ‘is_zero_folio’? [-Wimplicit- > > function-declaration] > > > > Reverting "iomap: use largest_zero_folio() in iomap_dio_zero()" fixes > > the compilation. > > > > I also got some reports from Stephen in linux-next. As Christoph > suggested, maybe we drop the patches from Christian's tree and queue it > up via Andrew's tree Thanks, I added it to mm.git.
On Mon, Aug 18, 2025 at 08:14:03PM -0700, Andrew Morton wrote: > On Mon, 18 Aug 2025 16:35:04 +0200 Pankaj Raghav <p.raghav@samsung.com> wrote: > > > >>> Applied to the vfs-6.18.iomap branch of the vfs/vfs.git tree. > > >>> Patches in the vfs-6.18.iomap branch should appear in linux-next soon. > > >> > > >> Hmm, AFAIK largest_zero_folio just showed up in mm.git a few days ago. > > >> Wouldn't it be better to queue up this change there? > > >> > > >> > > > > > > Indeed, compiling vfs/vfs.all as of today fails with: > > > > > > fs/iomap/direct-io.c:281:36: error: implicit declaration of function > > > ‘largest_zero_folio’; did you mean ‘is_zero_folio’? [-Wimplicit- > > > function-declaration] > > > > > > Reverting "iomap: use largest_zero_folio() in iomap_dio_zero()" fixes > > > the compilation. > > > > > > > I also got some reports from Stephen in linux-next. As Christoph > > suggested, maybe we drop the patches from Christian's tree and queue it > > up via Andrew's tree > > Thanks, I added it to mm.git. Please ask before you move stuff around between trees. You've complained to me before about this before too. I haven't agreed to that at all. There's a bunch more iomap work coming and this will most certainly not start going through mm trees. So if there's merge conflicts where we rely on a helper that's in mm-next the good thing would simply to provide a branch for us with that helper that we can base this off of.
On Thu, Aug 14, 2025 at 04:21:37PM +0200, Pankaj Raghav (Samsung) wrote: > From: Pankaj Raghav <p.raghav@samsung.com> > > iomap_dio_zero() uses a custom allocated memory of zeroes for padding > zeroes. This was a temporary solution until there was a way to request a > zero folio that was greater than the PAGE_SIZE. > > Use largest_zero_folio() function instead of using the custom allocated > memory of zeroes. There is no guarantee from largest_zero_folio() > function that it will always return a PMD sized folio. Adapt the code so > that it can also work if largest_zero_folio() returns a ZERO_PAGE. > > Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> Seems fine to me, though I wonder if this oughn't go along with the rest of the largest_zero_folio changes? Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> --D > --- > fs/iomap/direct-io.c | 38 +++++++++++++++----------------------- > 1 file changed, 15 insertions(+), 23 deletions(-) > > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c > index b84f6af2eb4c..a7a281ea3e50 100644 > --- a/fs/iomap/direct-io.c > +++ b/fs/iomap/direct-io.c > @@ -24,13 +24,6 @@ > #define IOMAP_DIO_WRITE (1U << 30) > #define IOMAP_DIO_DIRTY (1U << 31) > > -/* > - * Used for sub block zeroing in iomap_dio_zero() > - */ > -#define IOMAP_ZERO_PAGE_SIZE (SZ_64K) > -#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE)) > -static struct page *zero_page; > - > struct iomap_dio { > struct kiocb *iocb; > const struct iomap_dio_ops *dops; > @@ -285,24 +278,35 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, > { > struct inode *inode = file_inode(dio->iocb->ki_filp); > struct bio *bio; > + struct folio *zero_folio = largest_zero_folio(); > + int nr_vecs = max(1, i_blocksize(inode) / folio_size(zero_folio)); > > if (!len) > return 0; > + > /* > - * Max block size supported is 64k > + * This limit shall never be reached as most filesystems have a > + * maximum blocksize of 64k. > */ > - if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE)) > + if (WARN_ON_ONCE(nr_vecs > BIO_MAX_VECS)) > return -EINVAL; > > - bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); > + bio = iomap_dio_alloc_bio(iter, dio, nr_vecs, > + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); > fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, > GFP_KERNEL); > bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); > bio->bi_private = dio; > bio->bi_end_io = iomap_dio_bio_end_io; > > - __bio_add_page(bio, zero_page, len, 0); > + while (len > 0) { > + unsigned int io_len = min(len, folio_size(zero_folio)); > + > + bio_add_folio_nofail(bio, zero_folio, io_len, 0); > + len -= io_len; > + } > iomap_dio_submit_bio(iter, dio, bio, pos); > + > return 0; > } > > @@ -822,15 +826,3 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > return iomap_dio_complete(dio); > } > EXPORT_SYMBOL_GPL(iomap_dio_rw); > - > -static int __init iomap_dio_init(void) > -{ > - zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, > - IOMAP_ZERO_PAGE_ORDER); > - > - if (!zero_page) > - return -ENOMEM; > - > - return 0; > -} > -fs_initcall(iomap_dio_init); > > base-commit: 931e46dcbc7e6035a90e9c4a27a84b660e083f0a > -- > 2.50.1 > >
On Thu, Aug 14, 2025 at 11:27:13AM -0700, Darrick J. Wong wrote: > On Thu, Aug 14, 2025 at 04:21:37PM +0200, Pankaj Raghav (Samsung) wrote: > > From: Pankaj Raghav <p.raghav@samsung.com> > > > > iomap_dio_zero() uses a custom allocated memory of zeroes for padding > > zeroes. This was a temporary solution until there was a way to request a > > zero folio that was greater than the PAGE_SIZE. > > > > Use largest_zero_folio() function instead of using the custom allocated > > memory of zeroes. There is no guarantee from largest_zero_folio() > > function that it will always return a PMD sized folio. Adapt the code so > > that it can also work if largest_zero_folio() returns a ZERO_PAGE. > > > > Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> > > Seems fine to me, though I wonder if this oughn't go along with the > rest of the largest_zero_folio changes? I included them in one of the early versions but later removed as we had to rework the implementation multiple times. I just wanted to reduce the scope of the series and send out changes that uses the API separately :). > > Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Thanks! -- Pankaj Raghav
© 2016 - 2025 Red Hat, Inc.