[PATCH v2] f2fs: avoid cp_wait use-after-free in f2fs_write_end_io()

Wenjie Qi posted 1 patch 1 week, 6 days ago
fs/f2fs/checkpoint.c | 20 ++++++++++++++++++--
fs/f2fs/data.c       | 25 +++++++++++++++++--------
2 files changed, 35 insertions(+), 10 deletions(-)
[PATCH v2] f2fs: avoid cp_wait use-after-free in f2fs_write_end_io()
Posted by Wenjie Qi 1 week, 6 days ago
f2fs_write_end_io() decrements the writeback page counter before waking
sbi->cp_wait for the last F2FS_WB_CP_DATA completion.

That decrement can drop the F2FS_WB_CP_DATA count to zero. It can unblock
a concurrent unmount path waiting in f2fs_wait_on_all_pages(). Unmount can
then continue through f2fs_put_super() and free sbi while the end_io
callback is still about to evaluate wq_has_sleeper() and wake_up() on
sbi->cp_wait.

Commit 2d9c4a4ed4ee ("f2fs: fix UAF caused by decrementing
sbi->nr_pages[] in f2fs_write_end_io()") fixed one post-decrement sbi
access by moving the warm-node-list handling before dec_page_count(). The
compressed writeback path follows the same rule and documents that
sbi accesses must happen before dec_page_count() can drop the
F2FS_WB_CP_DATA count to zero.

Use atomic_dec_and_lock_irqsave() for F2FS_WB_CP_DATA completions so the
zero transition is serialized with cp_wait.lock. When the count reaches
zero, wake waiters while holding the same lock.

In f2fs_wait_on_all_pages(), prepare the waiter and recheck the page count
while holding cp_wait.lock before sleeping. This keeps the wakeup visible
to waiters without requiring the end_io callback to access sbi after the
F2FS_WB_CP_DATA count has reached zero. It also avoids a missed wakeup that
would otherwise make the waiter sleep until DEFAULT_SCHEDULE_TIMEOUT.

Fixes: ce2739e482bc ("f2fs: fix to avoid UAF in f2fs_write_end_io()")
Cc: stable@kernel.org
Signed-off-by: Wenjie Qi <qiwenjie@xiaomi.com>
---
 fs/f2fs/checkpoint.c | 20 ++++++++++++++++++--
 fs/f2fs/data.c       | 25 +++++++++++++++++--------
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c00a6b6ebcbd..b16d2d30ec6a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1497,24 +1497,40 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
 	f2fs_unlock_all(sbi);
 }
 
+static bool f2fs_prepare_cp_wait(struct f2fs_sb_info *sbi,
+				 struct wait_queue_entry *wait, int type)
+{
+	unsigned long flags;
+	bool wait_more;
+
+	prepare_to_wait(&sbi->cp_wait, wait, TASK_UNINTERRUPTIBLE);
+	spin_lock_irqsave(&sbi->cp_wait.lock, flags);
+	wait_more = get_pages(sbi, type);
+	spin_unlock_irqrestore(&sbi->cp_wait.lock, flags);
+
+	return wait_more;
+}
+
 void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
 {
 	DEFINE_WAIT(wait);
 
 	for (;;) {
-		if (!get_pages(sbi, type))
+		if (!f2fs_prepare_cp_wait(sbi, &wait, type))
 			break;
 
 		if (unlikely(f2fs_cp_error(sbi) &&
 			!is_sbi_flag_set(sbi, SBI_IS_CLOSE)))
 			break;
+		finish_wait(&sbi->cp_wait, &wait);
 
 		if (type == F2FS_DIRTY_META)
 			f2fs_sync_meta_pages(sbi, LONG_MAX, FS_CP_META_IO);
 		else if (type == F2FS_WB_CP_DATA)
 			f2fs_submit_merged_write(sbi, DATA);
 
-		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+		if (!f2fs_prepare_cp_wait(sbi, &wait, type))
+			break;
 		io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT);
 	}
 	finish_wait(&sbi->cp_wait, &wait);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d83a21998ec2..d92f0b70ba2f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -392,15 +392,24 @@ static void f2fs_write_end_io(struct bio *bio)
 		if (f2fs_in_warm_node_list(folio))
 			f2fs_del_fsync_node_entry(sbi, folio);
 
-		dec_page_count(sbi, type);
+		if (type == F2FS_WB_CP_DATA) {
+			unsigned long flags;
 
-		/*
-		 * we should access sbi before folio_end_writeback() to
-		 * avoid racing w/ kill_f2fs_super()
-		 */
-		if (type == F2FS_WB_CP_DATA && !get_pages(sbi, type) &&
-				wq_has_sleeper(&sbi->cp_wait))
-			wake_up(&sbi->cp_wait);
+			/*
+			 * Hold cp_wait.lock across the zero transition and the
+			 * wakeup so f2fs_wait_on_all_pages() cannot miss it or
+			 * free sbi before this callback stops touching cp_wait.
+			 */
+			if (atomic_dec_and_lock_irqsave(&sbi->nr_pages[type],
+							&sbi->cp_wait.lock,
+							flags)) {
+				wake_up_locked(&sbi->cp_wait);
+				spin_unlock_irqrestore(&sbi->cp_wait.lock,
+						       flags);
+			}
+		} else {
+			dec_page_count(sbi, type);
+		}
 
 		folio_clear_f2fs_gcing(folio);
 		folio_end_writeback(folio);
-- 
2.43.0