[PATCH 01/27] ext4: remove writable userspace mappings before truncating page cache

Zhang Yi posted 27 patches 1 month ago
[PATCH 01/27] ext4: remove writable userspace mappings before truncating page cache
Posted by Zhang Yi 1 month ago
From: Zhang Yi <yi.zhang@huawei.com>

When zeroing a range of folios on the filesystem which block size is
less than the page size, the file's mapped partial blocks within one
page will be marked as unwritten, we should remove writable userspace
mappings to ensure that ext4_page_mkwrite() can be called during
subsequent write access to these folios. Otherwise, data written by
subsequent mmap writes may not be saved to disk.

 $mkfs.ext4 -b 1024 /dev/vdb
 $mount /dev/vdb /mnt
 $xfs_io -t -f -c "pwrite -S 0x58 0 4096" -c "mmap -rw 0 4096" \
               -c "mwrite -S 0x5a 2048 2048" -c "fzero 2048 2048" \
               -c "mwrite -S 0x59 2048 2048" -c "close" /mnt/foo

 $od -Ax -t x1z /mnt/foo
 000000 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58
 *
 000800 59 59 59 59 59 59 59 59 59 59 59 59 59 59 59 59
 *
 001000

 $umount /mnt && mount /dev/vdb /mnt
 $od -Ax -t x1z /mnt/foo
 000000 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58
 *
 000800 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 *
 001000

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/ext4.h    |  2 ++
 fs/ext4/extents.c |  1 +
 fs/ext4/inode.c   | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 44b0d418143c..6d0267afd4c1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3020,6 +3020,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
+extern void ext4_truncate_folios_range(struct inode *inode, loff_t start,
+				       loff_t end);
 extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
 extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 34e25eee6521..2a054c3689f0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4677,6 +4677,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		}
 
 		/* Now release the pages and zero block aligned part of pages */
+		ext4_truncate_folios_range(inode, start, end);
 		truncate_pagecache_range(inode, start, end - 1);
 		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 54bdd4884fe6..8b34e79112d5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -31,6 +31,7 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
+#include <linux/rmap.h>
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
@@ -3870,6 +3871,46 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
 	return ret;
 }
 
+static inline void ext4_truncate_folio(struct inode *inode,
+				       loff_t start, loff_t end)
+{
+	unsigned long blocksize = i_blocksize(inode);
+	struct folio *folio;
+
+	if (round_up(start, blocksize) >= round_down(end, blocksize))
+		return;
+
+	folio = filemap_lock_folio(inode->i_mapping, start >> PAGE_SHIFT);
+	if (IS_ERR(folio))
+		return;
+
+	if (folio_mkclean(folio))
+		folio_mark_dirty(folio);
+	folio_unlock(folio);
+	folio_put(folio);
+}
+
+/*
+ * When truncating a range of folios, if the block size is less than the
+ * page size, the file's mapped partial blocks within one page could be
+ * freed or converted to unwritten. We should call this function to remove
+ * writable userspace mappings so that ext4_page_mkwrite() can be called
+ * during subsequent write access to these folios.
+ */
+void ext4_truncate_folios_range(struct inode *inode, loff_t start, loff_t end)
+{
+	unsigned long blocksize = i_blocksize(inode);
+
+	if (end > inode->i_size)
+		end = inode->i_size;
+	if (start >= end || blocksize >= PAGE_SIZE)
+		return;
+
+	ext4_truncate_folio(inode, start, min(round_up(start, PAGE_SIZE), end));
+	if (end > round_up(start, PAGE_SIZE))
+		ext4_truncate_folio(inode, round_down(end, PAGE_SIZE), end);
+}
+
 static void ext4_wait_dax_page(struct inode *inode)
 {
 	filemap_invalidate_unlock(inode->i_mapping);
-- 
2.46.1