From: Kairui Song <kasong@tencent.com>
There are currently three swap cache users that are trying to replace an
existing folio with a new one: huge memory splitting, migration, and
shmem replacement. What they are doing is quite similar.
Introduce a common helper for this. In later commits, this can be easily
switched to use the swap table by updating this helper.
The newly added helper also makes the swap cache API better defined, and
make debugging easier by adding a few more debug checks.
Migration and shmem replace are meant to clone the folio, including
content, swap entry value, and flags. And splitting will adjust each
sub folio's swap entry according to order, which could be non-uniform in
the future. So document it clearly that it's the caller's responsibility
to set up the new folio's swap entries and flags before calling the helper.
The helper will just follow the new folio's entry value.
This also prepares for replacing high-order folios in the swap cache.
Currently, only splitting to order 0 is allowed for swap cache folios.
Using the new helper, we can handle high-order folio splitting better.
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
mm/huge_memory.c | 4 +---
mm/migrate.c | 11 +++--------
mm/shmem.c | 10 ++--------
mm/swap.h | 5 +++++
mm/swap_state.c | 33 +++++++++++++++++++++++++++++++++
5 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 26cedfcd7418..4c66e358685b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3798,9 +3798,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* NOTE: shmem in swap cache is not supported yet.
*/
if (swap_cache) {
- __xa_store(&swap_cache->i_pages,
- swap_cache_index(new_folio->swap),
- new_folio, 0);
+ __swap_cache_replace_folio(folio, new_folio);
continue;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 8e435a078fc3..c69cc13db692 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -566,7 +566,6 @@ static int __folio_migrate_mapping(struct address_space *mapping,
struct zone *oldzone, *newzone;
int dirty;
long nr = folio_nr_pages(folio);
- long entries, i;
if (!mapping) {
/* Take off deferred split queue while frozen and memcg set */
@@ -615,9 +614,6 @@ static int __folio_migrate_mapping(struct address_space *mapping,
if (folio_test_swapcache(folio)) {
folio_set_swapcache(newfolio);
newfolio->private = folio_get_private(folio);
- entries = nr;
- } else {
- entries = 1;
}
/* Move dirty while folio refs frozen and newfolio not yet exposed */
@@ -627,11 +623,10 @@ static int __folio_migrate_mapping(struct address_space *mapping,
folio_set_dirty(newfolio);
}
- /* Swap cache still stores N entries instead of a high-order entry */
- for (i = 0; i < entries; i++) {
+ if (folio_test_swapcache(folio))
+ __swap_cache_replace_folio(folio, newfolio);
+ else
xas_store(&xas, newfolio);
- xas_next(&xas);
- }
/*
* Drop cache reference from old folio by unfreezing
diff --git a/mm/shmem.c b/mm/shmem.c
index 5f395fab489c..8930780325da 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2086,10 +2086,8 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
struct folio *new, *old = *foliop;
swp_entry_t entry = old->swap;
struct address_space *swap_mapping = swap_address_space(entry);
- pgoff_t swap_index = swap_cache_index(entry);
- XA_STATE(xas, &swap_mapping->i_pages, swap_index);
int nr_pages = folio_nr_pages(old);
- int error = 0, i;
+ int error = 0;
/*
* We have arrived here because our zones are constrained, so don't
@@ -2118,12 +2116,8 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
new->swap = entry;
folio_set_swapcache(new);
- /* Swap cache still stores N entries instead of a high-order entry */
xa_lock_irq(&swap_mapping->i_pages);
- for (i = 0; i < nr_pages; i++) {
- WARN_ON_ONCE(xas_store(&xas, new));
- xas_next(&xas);
- }
+ __swap_cache_replace_folio(old, new);
xa_unlock_irq(&swap_mapping->i_pages);
mem_cgroup_replace_folio(old, new);
diff --git a/mm/swap.h b/mm/swap.h
index 6c4acb549bec..fe579c81c6c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -185,6 +185,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
void swap_cache_del_folio(struct folio *folio);
void __swap_cache_del_folio(struct folio *folio,
swp_entry_t entry, void *shadow);
+void __swap_cache_replace_folio(struct folio *old, struct folio *new);
void swap_cache_clear_shadow(int type, unsigned long begin,
unsigned long end);
@@ -336,6 +337,10 @@ static inline void __swap_cache_del_folio(struct folio *folio, swp_entry_t entry
{
}
+static inline void __swap_cache_replace_folio(struct folio *old, struct folio *new)
+{
+}
+
static inline unsigned int folio_swap_flags(struct folio *folio)
{
return 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f3a32a06a950..d1f5b8fa52fc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -234,6 +234,39 @@ void swap_cache_del_folio(struct folio *folio)
folio_ref_sub(folio, folio_nr_pages(folio));
}
+/**
+ * __swap_cache_replace_folio - Replace a folio in the swap cache.
+ * @old: The old folio to be replaced.
+ * @new: The new folio.
+ *
+ * Replace an existing folio in the swap cache with a new folio. The
+ * caller is responsible for setting up the new folio's flag and swap
+ * entries. Replacement will take the new folio's swap entry value as
+ * the starting offset to override all slots covered by the new folio.
+ *
+ * Context: Caller must ensure both folios are locked, also lock the
+ * swap address_space that holds the old folio to avoid races.
+ */
+void __swap_cache_replace_folio(struct folio *old, struct folio *new)
+{
+ swp_entry_t entry = new->swap;
+ unsigned long nr_pages = folio_nr_pages(new);
+ unsigned long offset = swap_cache_index(entry);
+ unsigned long end = offset + nr_pages;
+
+ XA_STATE(xas, &swap_address_space(entry)->i_pages, offset);
+
+ VM_WARN_ON_ONCE(!folio_test_swapcache(old) || !folio_test_swapcache(new));
+ VM_WARN_ON_ONCE(!folio_test_locked(old) || !folio_test_locked(new));
+ VM_WARN_ON_ONCE(!entry.val);
+
+ /* Swap cache still stores N entries instead of a high-order entry */
+ do {
+ WARN_ON_ONCE(xas_store(&xas, new) != old);
+ xas_next(&xas);
+ } while (++offset < end);
+}
+
/**
* swap_cache_clear_shadow - Clears a set of shadows in the swap cache.
* @type: Indicates the swap device.
--
2.51.0