fs/9p/vfs_inode.c | 11 ++++++++--- fs/9p/vfs_inode_dotl.c | 13 +++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-)
When using writeback caching (cache=mmap), v9fs_vfs_getattr/setattr have
two issues that can cause data corruption:
1. filemap_fdatawrite() initiates writeback but doesn't wait for
completion. The subsequent server stat sees stale file size.
2. v9fs_stat2inode()/v9fs_stat2inode_dotl() unconditionally overwrite
i_size from the server response, even when dirty pages exist locally.
This causes processes using lseek(SEEK_END) to see incorrect file
sizes.
Fix by using filemap_write_and_wait() instead of filemap_fdatawrite(),
and passing V9FS_STAT2INODE_KEEP_ISIZE when CACHE_WRITEBACK is enabled
to preserve the local i_size.
Also fix v9fs_vfs_getattr_dotl() to check for CACHE_WRITEBACK specifically
rather than any cache mode.
Signed-off-by: Pierre Barre <pierre@barre.sh>
---
fs/9p/vfs_inode.c | 11 ++++++++---
fs/9p/vfs_inode_dotl.c | 13 +++++++++----
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 97abe65bf7c1..f4c294ca759b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -977,7 +977,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
return 0;
} else if (v9ses->cache & CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode)) {
- int retval = filemap_fdatawrite(inode->i_mapping);
+ int retval = filemap_write_and_wait(inode->i_mapping);
if (retval)
p9_debug(P9_DEBUG_ERROR,
@@ -993,7 +993,12 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
+ /*
+ * With writeback caching, the client is authoritative for i_size.
+ * Don't let the server overwrite it with a potentially stale value.
+ */
+ v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb,
+ (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
p9stat_free(st);
@@ -1058,7 +1063,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
/* Write all dirty data */
if (d_is_reg(dentry)) {
- retval = filemap_fdatawrite(inode->i_mapping);
+ retval = filemap_write_and_wait(inode->i_mapping);
if (retval)
p9_debug(P9_DEBUG_ERROR,
"flushing writeback during setattr returned %d\n", retval);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 643e759eacb2..362a68a2bca3 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -431,9 +431,9 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
- } else if (v9ses->cache) {
+ } else if (v9ses->cache & CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode)) {
- int retval = filemap_fdatawrite(inode->i_mapping);
+ int retval = filemap_write_and_wait(inode->i_mapping);
if (retval)
p9_debug(P9_DEBUG_ERROR,
@@ -453,7 +453,12 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
+ /*
+ * With writeback caching, the client is authoritative for i_size.
+ * Don't let the server overwrite it with a potentially stale value.
+ */
+ v9fs_stat2inode_dotl(st, d_inode(dentry),
+ (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -561,7 +566,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
/* Write all dirty data */
if (S_ISREG(inode->i_mode)) {
- retval = filemap_fdatawrite(inode->i_mapping);
+ retval = filemap_write_and_wait(inode->i_mapping);
if (retval < 0)
p9_debug(P9_DEBUG_ERROR,
"Flushing file prior to setattr failed: %d\n", retval);
--
2.43.0
On Saturday, 27 December 2025 09:37:51 CET Pierre Barre wrote:
> When using writeback caching (cache=mmap), v9fs_vfs_getattr/setattr have
> two issues that can cause data corruption:
>
> 1. filemap_fdatawrite() initiates writeback but doesn't wait for
> completion. The subsequent server stat sees stale file size.
>
> 2. v9fs_stat2inode()/v9fs_stat2inode_dotl() unconditionally overwrite
> i_size from the server response, even when dirty pages exist locally.
> This causes processes using lseek(SEEK_END) to see incorrect file
> sizes.
>
> Fix by using filemap_write_and_wait() instead of filemap_fdatawrite(),
> and passing V9FS_STAT2INODE_KEEP_ISIZE when CACHE_WRITEBACK is enabled
> to preserve the local i_size.
>
> Also fix v9fs_vfs_getattr_dotl() to check for CACHE_WRITEBACK specifically
> rather than any cache mode.
>
> Signed-off-by: Pierre Barre <pierre@barre.sh>
> ---
> fs/9p/vfs_inode.c | 11 ++++++++---
> fs/9p/vfs_inode_dotl.c | 13 +++++++++----
> 2 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index 97abe65bf7c1..f4c294ca759b 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -977,7 +977,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct
> path *path, return 0;
> } else if (v9ses->cache & CACHE_WRITEBACK) {
> if (S_ISREG(inode->i_mode)) {
> - int retval = filemap_fdatawrite(inode->i_mapping);
> + int retval = filemap_write_and_wait(inode->i_mapping);
Haven't reviewed thorougly, but this looks wrong to me. The point about write-
back is not having to wait for completion.
> if (retval)
> p9_debug(P9_DEBUG_ERROR,
> @@ -993,7 +993,12 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct
> path *path, if (IS_ERR(st))
> return PTR_ERR(st);
>
> - v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
> + /*
> + * With writeback caching, the client is authoritative for i_size.
> + * Don't let the server overwrite it with a potentially stale value.
> + */
> + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb,
> + (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
And this measure alone (along with the same change in v9fs_vfs_getattr_dotl()
that is) would not fix the misbehavior you encountered?
> generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
>
> p9stat_free(st);
> @@ -1058,7 +1063,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
>
> /* Write all dirty data */
> if (d_is_reg(dentry)) {
> - retval = filemap_fdatawrite(inode->i_mapping);
> + retval = filemap_write_and_wait(inode->i_mapping);
> if (retval)
> p9_debug(P9_DEBUG_ERROR,
> "flushing writeback during setattr returned %d\n", retval);
> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
> index 643e759eacb2..362a68a2bca3 100644
> --- a/fs/9p/vfs_inode_dotl.c
> +++ b/fs/9p/vfs_inode_dotl.c
> @@ -431,9 +431,9 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
> if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
> generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
> return 0;
> - } else if (v9ses->cache) {
> + } else if (v9ses->cache & CACHE_WRITEBACK) {
OK, so here is an inconsistency between v9fs_vfs_getattr_dotl() and
v9fs_vfs_getattr(). But to me it should be the other way around, i.e.
v9fs_vfs_getattr() should do it any cache mode, not only for write-back?
/Christian
> if (S_ISREG(inode->i_mode)) {
> - int retval = filemap_fdatawrite(inode->i_mapping);
> + int retval = filemap_write_and_wait(inode->i_mapping);
>
> if (retval)
> p9_debug(P9_DEBUG_ERROR,
> @@ -453,7 +453,12 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
> if (IS_ERR(st))
> return PTR_ERR(st);
>
> - v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
> + /*
> + * With writeback caching, the client is authoritative for i_size.
> + * Don't let the server overwrite it with a potentially stale value.
> + */
> + v9fs_stat2inode_dotl(st, d_inode(dentry),
> + (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
> generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
> /* Change block size to what the server returned */
> stat->blksize = st->st_blksize;
> @@ -561,7 +566,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
>
> /* Write all dirty data */
> if (S_ISREG(inode->i_mode)) {
> - retval = filemap_fdatawrite(inode->i_mapping);
> + retval = filemap_write_and_wait(inode->i_mapping);
> if (retval < 0)
> p9_debug(P9_DEBUG_ERROR,
> "Flushing file prior to setattr failed: %d\n", retval);
Hi Christian,
On Sat, Dec 27, 2025, at 12:30, Christian Schoenebeck wrote:
> On Saturday, 27 December 2025 09:37:51 CET Pierre Barre wrote:
>> When using writeback caching (cache=mmap), v9fs_vfs_getattr/setattr have
>> two issues that can cause data corruption:
>>
>> 1. filemap_fdatawrite() initiates writeback but doesn't wait for
>> completion. The subsequent server stat sees stale file size.
>>
>> 2. v9fs_stat2inode()/v9fs_stat2inode_dotl() unconditionally overwrite
>> i_size from the server response, even when dirty pages exist locally.
>> This causes processes using lseek(SEEK_END) to see incorrect file
>> sizes.
>>
>> Fix by using filemap_write_and_wait() instead of filemap_fdatawrite(),
>> and passing V9FS_STAT2INODE_KEEP_ISIZE when CACHE_WRITEBACK is enabled
>> to preserve the local i_size.
>>
>> Also fix v9fs_vfs_getattr_dotl() to check for CACHE_WRITEBACK specifically
>> rather than any cache mode.
>>
>> Signed-off-by: Pierre Barre <pierre@barre.sh>
>> ---
>> fs/9p/vfs_inode.c | 11 ++++++++---
>> fs/9p/vfs_inode_dotl.c | 13 +++++++++----
>> 2 files changed, 17 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
>> index 97abe65bf7c1..f4c294ca759b 100644
>> --- a/fs/9p/vfs_inode.c
>> +++ b/fs/9p/vfs_inode.c
>> @@ -977,7 +977,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct
>> path *path, return 0;
>> } else if (v9ses->cache & CACHE_WRITEBACK) {
>> if (S_ISREG(inode->i_mode)) {
>> - int retval = filemap_fdatawrite(inode->i_mapping);
>> + int retval = filemap_write_and_wait(inode->i_mapping);
>
> Haven't reviewed thorougly, but this looks wrong to me. The point about write-
> back is not having to wait for completion.
>
You're right, apologies for not thinking that through properly.
>> if (retval)
>> p9_debug(P9_DEBUG_ERROR,
>> @@ -993,7 +993,12 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct
>> path *path, if (IS_ERR(st))
>> return PTR_ERR(st);
>>
>> - v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
>> + /*
>> + * With writeback caching, the client is authoritative for i_size.
>> + * Don't let the server overwrite it with a potentially stale value.
>> + */
>> + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb,
>> + (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
>
> And this measure alone (along with the same change in v9fs_vfs_getattr_dotl()
> that is) would not fix the misbehavior you encountered?
Indeed, I tested with only that change and the same workload, and this fixes it.
>> generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
>>
>> p9stat_free(st);
>> @@ -1058,7 +1063,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
>>
>> /* Write all dirty data */
>> if (d_is_reg(dentry)) {
>> - retval = filemap_fdatawrite(inode->i_mapping);
>> + retval = filemap_write_and_wait(inode->i_mapping);
>> if (retval)
>> p9_debug(P9_DEBUG_ERROR,
>> "flushing writeback during setattr returned %d\n", retval);
>> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
>> index 643e759eacb2..362a68a2bca3 100644
>> --- a/fs/9p/vfs_inode_dotl.c
>> +++ b/fs/9p/vfs_inode_dotl.c
>> @@ -431,9 +431,9 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
>> if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
>> generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
>> return 0;
>> - } else if (v9ses->cache) {
>> + } else if (v9ses->cache & CACHE_WRITEBACK) {
>
> OK, so here is an inconsistency between v9fs_vfs_getattr_dotl() and
> v9fs_vfs_getattr(). But to me it should be the other way around, i.e.
> v9fs_vfs_getattr() should do it any cache mode, not only for write-back?
My understanding was that dirty pages only exist with CACHE_WRITEBACK, so filemap_fdatawrite() would be a no-op for other modes. Is there a case I'm missing? Anyhow, even if this understanding is correct, this should probably be a separate patch.
Thanks for the review,
Pierre
> /Christian
>
>> if (S_ISREG(inode->i_mode)) {
>> - int retval = filemap_fdatawrite(inode->i_mapping);
>> + int retval = filemap_write_and_wait(inode->i_mapping);
>>
>> if (retval)
>> p9_debug(P9_DEBUG_ERROR,
>> @@ -453,7 +453,12 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
>> if (IS_ERR(st))
>> return PTR_ERR(st);
>>
>> - v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
>> + /*
>> + * With writeback caching, the client is authoritative for i_size.
>> + * Don't let the server overwrite it with a potentially stale value.
>> + */
>> + v9fs_stat2inode_dotl(st, d_inode(dentry),
>> + (v9ses->cache & CACHE_WRITEBACK) ? V9FS_STAT2INODE_KEEP_ISIZE : 0);
>> generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
>> /* Change block size to what the server returned */
>> stat->blksize = st->st_blksize;
>> @@ -561,7 +566,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
>>
>> /* Write all dirty data */
>> if (S_ISREG(inode->i_mode)) {
>> - retval = filemap_fdatawrite(inode->i_mapping);
>> + retval = filemap_write_and_wait(inode->i_mapping);
>> if (retval < 0)
>> p9_debug(P9_DEBUG_ERROR,
>> "Flushing file prior to setattr failed: %d\n", retval);
© 2016 - 2026 Red Hat, Inc.