From: Zhang Yi <yi.zhang@huawei.com>
In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
and zeroing a partial block, only data=journal modes guarantee that the
zeroed data is synchronously persisted after the operation completes.
For data=ordered/writeback mode and non-journal modes, this guarantee is
missing.
Introduce a partial_zero parameter to explicitly trigger writeback for
all scenarios where a partial block is zeroed, ensuring the zeroed data
is durably persisted.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/extents.c | 9 ++++++++-
fs/ext4/inode.c | 19 ++++++++++++++-----
3 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 859ae05339ad..bfe86479a83c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
- loff_t length);
+ loff_t length, bool *did_zero);
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 002b1ec8cee2..16386f499138 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t align_start, align_end, new_size = 0;
loff_t end = offset + len;
unsigned int blocksize = i_blocksize(inode);
+ bool partial_zeroed = false;
int ret, flags;
trace_ext4_zero_range(inode, offset, len, mode);
@@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
return ret;
/* Zero out partial block at the edges of the range */
- ret = ext4_zero_partial_blocks(inode, offset, len);
+ ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
if (ret)
return ret;
+ if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset,
+ end - 1);
+ if (ret)
+ return ret;
+ }
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8aa4369e3150..b934ad86a96d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
return 0;
}
-int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
+int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
+ bool *did_zero)
{
struct super_block *sb = inode->i_sb;
unsigned partial_start, partial_end;
@@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
/* Handle partial zero within the single block */
if (start == end &&
(partial_start || (partial_end != sb->s_blocksize - 1))) {
- err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
+ err = ext4_block_zero_range(inode, lstart, length, did_zero,
+ NULL);
return err;
}
/* Handle partial zero out on the start of the range */
if (partial_start) {
err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
- NULL, NULL);
+ did_zero, NULL);
if (err)
return err;
}
/* Handle partial zero out on the end of the range */
if (partial_end != sb->s_blocksize - 1)
err = ext4_block_zero_range(inode, byte_end - partial_end,
- partial_end + 1, NULL, NULL);
+ partial_end + 1, did_zero, NULL);
return err;
}
@@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t end = offset + length;
handle_t *handle;
unsigned int credits;
+ bool partial_zeroed = false;
int ret;
trace_ext4_punch_hole(inode, offset, length, 0);
@@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
if (ret)
return ret;
- ret = ext4_zero_partial_blocks(inode, offset, length);
+ ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
if (ret)
return ret;
+ if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset,
+ end - 1);
+ if (ret)
+ return ret;
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_chunk_trans_extent(inode, 0);
--
2.52.0
On Fri 27-03-26 18:29:35, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
>
> In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
> and zeroing a partial block, only data=journal modes guarantee that the
> zeroed data is synchronously persisted after the operation completes.
> For data=ordered/writeback mode and non-journal modes, this guarantee is
> missing.
>
> Introduce a partial_zero parameter to explicitly trigger writeback for
> all scenarios where a partial block is zeroed, ensuring the zeroed data
> is durably persisted.
>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
> fs/ext4/ext4.h | 2 +-
> fs/ext4/extents.c | 9 ++++++++-
> fs/ext4/inode.c | 19 ++++++++++++++-----
> 3 files changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 859ae05339ad..bfe86479a83c 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
> int pextents);
> extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
> extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
> - loff_t length);
> + loff_t length, bool *did_zero);
> extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
> extern qsize_t *ext4_get_reserved_space(struct inode *inode);
> extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 002b1ec8cee2..16386f499138 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> loff_t align_start, align_end, new_size = 0;
> loff_t end = offset + len;
> unsigned int blocksize = i_blocksize(inode);
> + bool partial_zeroed = false;
> int ret, flags;
>
> trace_ext4_zero_range(inode, offset, len, mode);
> @@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> return ret;
>
> /* Zero out partial block at the edges of the range */
> - ret = ext4_zero_partial_blocks(inode, offset, len);
> + ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
> if (ret)
> return ret;
> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> + end - 1);
> + if (ret)
> + return ret;
> + }
Shouldn't we handle this somewhat below, where we do:
if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);
It would be logical to keep these together... Similarly for
ext4_punch_hole() below. An yes, the check when calling ext4_handle_sync()
should be extended with an IS_SYNC() check, which is a preexisting bug.
Honza
>
> handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
> if (IS_ERR(handle)) {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 8aa4369e3150..b934ad86a96d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
> return 0;
> }
>
> -int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
> +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
> + bool *did_zero)
> {
> struct super_block *sb = inode->i_sb;
> unsigned partial_start, partial_end;
> @@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
> /* Handle partial zero within the single block */
> if (start == end &&
> (partial_start || (partial_end != sb->s_blocksize - 1))) {
> - err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
> + err = ext4_block_zero_range(inode, lstart, length, did_zero,
> + NULL);
> return err;
> }
> /* Handle partial zero out on the start of the range */
> if (partial_start) {
> err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
> - NULL, NULL);
> + did_zero, NULL);
> if (err)
> return err;
> }
> /* Handle partial zero out on the end of the range */
> if (partial_end != sb->s_blocksize - 1)
> err = ext4_block_zero_range(inode, byte_end - partial_end,
> - partial_end + 1, NULL, NULL);
> + partial_end + 1, did_zero, NULL);
> return err;
> }
>
> @@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
> loff_t end = offset + length;
> handle_t *handle;
> unsigned int credits;
> + bool partial_zeroed = false;
> int ret;
>
> trace_ext4_punch_hole(inode, offset, length, 0);
> @@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
> if (ret)
> return ret;
>
> - ret = ext4_zero_partial_blocks(inode, offset, length);
> + ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
> if (ret)
> return ret;
> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> + end - 1);
> + if (ret)
> + return ret;
> + }
>
> if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
> credits = ext4_chunk_trans_extent(inode, 0);
> --
> 2.52.0
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
On 4/2/2026 1:06 AM, Jan Kara wrote:
> On Fri 27-03-26 18:29:35, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
>> and zeroing a partial block, only data=journal modes guarantee that the
>> zeroed data is synchronously persisted after the operation completes.
>> For data=ordered/writeback mode and non-journal modes, this guarantee is
>> missing.
>>
>> Introduce a partial_zero parameter to explicitly trigger writeback for
>> all scenarios where a partial block is zeroed, ensuring the zeroed data
>> is durably persisted.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>> fs/ext4/ext4.h | 2 +-
>> fs/ext4/extents.c | 9 ++++++++-
>> fs/ext4/inode.c | 19 ++++++++++++++-----
>> 3 files changed, 23 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 859ae05339ad..bfe86479a83c 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
>> int pextents);
>> extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
>> extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
>> - loff_t length);
>> + loff_t length, bool *did_zero);
>> extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
>> extern qsize_t *ext4_get_reserved_space(struct inode *inode);
>> extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index 002b1ec8cee2..16386f499138 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>> loff_t align_start, align_end, new_size = 0;
>> loff_t end = offset + len;
>> unsigned int blocksize = i_blocksize(inode);
>> + bool partial_zeroed = false;
>> int ret, flags;
>>
>> trace_ext4_zero_range(inode, offset, len, mode);
>> @@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>> return ret;
>>
>> /* Zero out partial block at the edges of the range */
>> - ret = ext4_zero_partial_blocks(inode, offset, len);
>> + ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
>> if (ret)
>> return ret;
>> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
>> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
>> + end - 1);
>> + if (ret)
>> + return ret;
>> + }
>
> Shouldn't we handle this somewhat below, where we do:
>
> if (file->f_flags & O_SYNC)
> ext4_handle_sync(handle);
>
> It would be logical to keep these together... Similarly for
> ext4_punch_hole() below. An yes, the check when calling ext4_handle_sync()
> should be extended with an IS_SYNC() check, which is a preexisting bug.
>
> Honza
Yeah, right, I've done this in the next patch. Thank you for your review!
Cheers,
Yi.
>
>>
>> handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
>> if (IS_ERR(handle)) {
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 8aa4369e3150..b934ad86a96d 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
>> return 0;
>> }
>>
>> -int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
>> +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
>> + bool *did_zero)
>> {
>> struct super_block *sb = inode->i_sb;
>> unsigned partial_start, partial_end;
>> @@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
>> /* Handle partial zero within the single block */
>> if (start == end &&
>> (partial_start || (partial_end != sb->s_blocksize - 1))) {
>> - err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
>> + err = ext4_block_zero_range(inode, lstart, length, did_zero,
>> + NULL);
>> return err;
>> }
>> /* Handle partial zero out on the start of the range */
>> if (partial_start) {
>> err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
>> - NULL, NULL);
>> + did_zero, NULL);
>> if (err)
>> return err;
>> }
>> /* Handle partial zero out on the end of the range */
>> if (partial_end != sb->s_blocksize - 1)
>> err = ext4_block_zero_range(inode, byte_end - partial_end,
>> - partial_end + 1, NULL, NULL);
>> + partial_end + 1, did_zero, NULL);
>> return err;
>> }
>>
>> @@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>> loff_t end = offset + length;
>> handle_t *handle;
>> unsigned int credits;
>> + bool partial_zeroed = false;
>> int ret;
>>
>> trace_ext4_punch_hole(inode, offset, length, 0);
>> @@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>> if (ret)
>> return ret;
>>
>> - ret = ext4_zero_partial_blocks(inode, offset, length);
>> + ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
>> if (ret)
>> return ret;
>> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
>> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
>> + end - 1);
>> + if (ret)
>> + return ret;
>> + }
>>
>> if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
>> credits = ext4_chunk_trans_extent(inode, 0);
>> --
>> 2.52.0
>>
© 2016 - 2026 Red Hat, Inc.