[PATCH 07/13] mm, swap: hold a reference of si during scan and clean up flags

Kairui Song posted 13 patches 1 month ago
[PATCH 07/13] mm, swap: hold a reference of si during scan and clean up flags
Posted by Kairui Song 1 month ago
From: Kairui Song <kasong@tencent.com>

The flag SWP_SCANNING was used as an indicator of whether a device
is being scanned, and prevents swap off. But it's already no longer
used.  The only thing protects the scanning now is the si lock.

However allocation path may drop the si lock, in theory this could
leaf to UAF.

So clean this up, just hold a reference for whole allocation path.
So per CPU counter killing will wait for existing scan and other
usage. The flag SWP_SCANNING can also be dropped.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 include/linux/swap.h |  1 -
 mm/swapfile.c        | 62 +++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 16dcf8bd1a4e..1651174959c8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -219,7 +219,6 @@ enum {
 	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
 	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 14),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4e629536a07c..d6b6e71ccc19 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1088,6 +1088,21 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
 	return cluster_alloc_swap(si, usage, nr, slots, order);
 }
 
+static bool get_swap_device_info(struct swap_info_struct *si)
+{
+	if (!percpu_ref_tryget_live(&si->users))
+		return false;
+	/*
+	 * Guarantee the si->users are checked before accessing other
+	 * fields of swap_info_struct.
+	 *
+	 * Paired with the spin_unlock() after setup_swap_info() in
+	 * enable_swap_info().
+	 */
+	smp_rmb();
+	return true;
+}
+
 int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
 {
 	int order = swap_entry_order(entry_order);
@@ -1115,13 +1130,16 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
 		/* requeue si to after same-priority siblings */
 		plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
 		spin_unlock(&swap_avail_lock);
-		spin_lock(&si->lock);
-		n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
-					    n_goal, swp_entries, order);
-		spin_unlock(&si->lock);
-		if (n_ret || size > 1)
-			goto check_out;
-		cond_resched();
+		if (get_swap_device_info(si)) {
+			spin_lock(&si->lock);
+			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+					n_goal, swp_entries, order);
+			spin_unlock(&si->lock);
+			put_swap_device(si);
+			if (n_ret || size > 1)
+				goto check_out;
+			cond_resched();
+		}
 
 		spin_lock(&swap_avail_lock);
 		/*
@@ -1272,16 +1290,8 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
 	si = swp_swap_info(entry);
 	if (!si)
 		goto bad_nofile;
-	if (!percpu_ref_tryget_live(&si->users))
+	if (!get_swap_device_info(si))
 		goto out;
-	/*
-	 * Guarantee the si->users are checked before accessing other
-	 * fields of swap_info_struct.
-	 *
-	 * Paired with the spin_unlock() after setup_swap_info() in
-	 * enable_swap_info().
-	 */
-	smp_rmb();
 	offset = swp_offset(entry);
 	if (offset >= si->max)
 		goto put_out;
@@ -1761,10 +1771,13 @@ swp_entry_t get_swap_page_of_type(int type)
 		goto fail;
 
 	/* This is called for allocating swap entry, not cache */
-	spin_lock(&si->lock);
-	if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0))
-		atomic_long_dec(&nr_swap_pages);
-	spin_unlock(&si->lock);
+	if (get_swap_device_info(si)) {
+		spin_lock(&si->lock);
+		if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0))
+			atomic_long_dec(&nr_swap_pages);
+		spin_unlock(&si->lock);
+		put_swap_device(si);
+	}
 fail:
 	return entry;
 }
@@ -2650,15 +2663,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	spin_lock(&p->lock);
 	drain_mmlist();
 
-	/* wait for anyone still in scan_swap_map_slots */
-	while (p->flags >= SWP_SCANNING) {
-		spin_unlock(&p->lock);
-		spin_unlock(&swap_lock);
-		schedule_timeout_uninterruptible(1);
-		spin_lock(&swap_lock);
-		spin_lock(&p->lock);
-	}
-
 	swap_file = p->swap_file;
 	p->swap_file = NULL;
 	p->max = 0;
-- 
2.47.0