From: Taotao Chen <chentaotao@didiglobal.com>
Implement IOCB_DONTCACHE flag handling in ext4 write paths:
1. Check IOCB_DONTCACHE flag passed via fsdata
2. Propagate FGP_DONTCACHE to page cache operations
Existing write behavior remains unchanged when IOCB_DONTCACHE is not set.
Signed-off-by: Taotao Chen <chentaotao@didiglobal.com>
---
fs/ext4/inode.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 94c7d2d828a6..787dd152a47e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,16 +1147,22 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
int ret, needed_blocks;
+ int iocb_flag;
handle_t *handle;
int retries = 0;
struct folio *folio;
pgoff_t index;
+ fgf_t fgp = FGP_WRITEBEGIN;
unsigned from, to;
ret = ext4_emergency_state(inode->i_sb);
if (unlikely(ret))
return ret;
+ iocb_flag = (int)(uintptr_t)(*fsdata);
+ if (iocb_flag & IOCB_DONTCACHE)
+ fgp |= FGP_DONTCACHE;
+
trace_ext4_write_begin(inode, pos, len);
/*
* Reserve one block more for addition to orphan list in case
@@ -1184,7 +1190,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
* the folio (if needed) without using GFP_NOFS.
*/
retry_grab:
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, index, fgp,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -2917,6 +2923,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct folio **foliop, void **fsdata)
{
int ret, retries = 0;
+ int iocb_flag;
+ fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
pgoff_t index;
struct inode *inode = mapping->host;
@@ -2928,10 +2936,15 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
+ ret = ext4_write_begin(file, mapping, pos, len, foliop, fsdata);
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
- return ext4_write_begin(file, mapping, pos,
- len, foliop, fsdata);
+ return ret;
}
+
+ iocb_flag = (int)(uintptr_t)(*fsdata);
+ if (iocb_flag & IOCB_DONTCACHE)
+ fgp |= FGP_DONTCACHE;
+
*fsdata = (void *)0;
trace_ext4_da_write_begin(inode, pos, len);
@@ -2945,7 +2958,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
}
retry:
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, index, fgp,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
--
2.34.1
On Mon, Apr 21, 2025 at 10:50:30AM +0000, 陈涛涛 Taotao Chen wrote:
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 94c7d2d828a6..787dd152a47e 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1147,16 +1147,22 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
> {
> struct inode *inode = mapping->host;
> int ret, needed_blocks;
> + int iocb_flag;
> handle_t *handle;
> int retries = 0;
> struct folio *folio;
> pgoff_t index;
> + fgf_t fgp = FGP_WRITEBEGIN;
> unsigned from, to;
>
> ret = ext4_emergency_state(inode->i_sb);
> if (unlikely(ret))
> return ret;
>
> + iocb_flag = (int)(uintptr_t)(*fsdata);
> + if (iocb_flag & IOCB_DONTCACHE)
> + fgp |= FGP_DONTCACHE;
> +
See my comment against the first patch in this series. It *should* be
possible to solve the problem just for ext4 by adding this line here:
*fsdata = (void *)0;
The problem is that it's super-fragile, since how *fsdata gets used
changes at different points in time, so it makes code review and
maintenance more difficult. (As evidenced by the fact that you missed
this; this is not a criticism on your programming ability, but rather
for the design choise of overloading the use of *fsdata. This is a
trap that someone else might fall into when doing future code
changes.)
And of course, the question is whether PATCH 1/3 could potentially
break other file systems. We would need audit all of the other
*_write_begin() functions, and then document this for the sake of
future file system developers that might want to change their
write_begin() function.
This is why my preference would be to add an extra flags paramter to
write_begin(), but that is going to be a lot more work.
Cheers,
- Ted
© 2016 - 2026 Red Hat, Inc.