[v2] mm, swap: introduce swap table as swap cache (phase I)

[PATCH v2 08/15] mm/shmem, swap: remove redundant error handling for replacing folio

Posted by Kairui Song 4 days, 10 hours ago

From: Kairui Song <kasong@tencent.com>

Shmem may replace a folio in the swap cache if the cached one doesn't
fit the swapin's GFP zone. When doing so, shmem has already double
checked that the swap cache folio is locked, still has the swap cache
flag set, and contains the wanted swap entry. So it is impossible to
fail due to an Xarray mismatch. There is even a comment for that.

Delete the defensive error handling path, and add a WARN_ON instead:
if that happened, something has broken the basic principle of how the
swap cache works, we should catch and fix that.

Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
---
 mm/shmem.c | 42 ++++++++++++------------------------------
 1 file changed, 12 insertions(+), 30 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 4e27e8e5da3b..cc6a0007c7a6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1698,13 +1698,13 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
 		}
 
 		/*
-		 * The delete_from_swap_cache() below could be left for
+		 * The swap_cache_del_folio() below could be left for
 		 * shrink_folio_list()'s folio_free_swap() to dispose of;
 		 * but I'm a little nervous about letting this folio out of
 		 * shmem_writeout() in a hybrid half-tmpfs-half-swap state
 		 * e.g. folio_mapping(folio) might give an unexpected answer.
 		 */
-		delete_from_swap_cache(folio);
+		swap_cache_del_folio(folio);
 		goto redirty;
 	}
 	if (nr_pages > 1)
@@ -2082,7 +2082,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
 	new->swap = entry;
 
 	memcg1_swapin(entry, nr_pages);
-	shadow = get_shadow_from_swap_cache(entry);
+	shadow = swap_cache_get_shadow(entry);
 	if (shadow)
 		workingset_refault(new, shadow);
 	folio_add_lru(new);
@@ -2158,35 +2158,17 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
 	/* Swap cache still stores N entries instead of a high-order entry */
 	xa_lock_irq(&swap_mapping->i_pages);
 	for (i = 0; i < nr_pages; i++) {
-		void *item = xas_load(&xas);
-
-		if (item != old) {
-			error = -ENOENT;
-			break;
-		}
-
-		xas_store(&xas, new);
+		WARN_ON_ONCE(xas_store(&xas, new));
 		xas_next(&xas);
 	}
-	if (!error) {
-		mem_cgroup_replace_folio(old, new);
-		shmem_update_stats(new, nr_pages);
-		shmem_update_stats(old, -nr_pages);
-	}
 	xa_unlock_irq(&swap_mapping->i_pages);
 
-	if (unlikely(error)) {
-		/*
-		 * Is this possible?  I think not, now that our callers
-		 * check both the swapcache flag and folio->private
-		 * after getting the folio lock; but be defensive.
-		 * Reverse old to newpage for clear and free.
-		 */
-		old = new;
-	} else {
-		folio_add_lru(new);
-		*foliop = new;
-	}
+	mem_cgroup_replace_folio(old, new);
+	shmem_update_stats(new, nr_pages);
+	shmem_update_stats(old, -nr_pages);
+
+	folio_add_lru(new);
+	*foliop = new;
 
 	folio_clear_swapcache(old);
 	old->private = NULL;
@@ -2220,7 +2202,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 	nr_pages = folio_nr_pages(folio);
 	folio_wait_writeback(folio);
 	if (!skip_swapcache)
-		delete_from_swap_cache(folio);
+		swap_cache_del_folio(folio);
 	/*
 	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
 	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
@@ -2459,7 +2441,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 		folio->swap.val = 0;
 		swapcache_clear(si, swap, nr_pages);
 	} else {
-		delete_from_swap_cache(folio);
+		swap_cache_del_folio(folio);
 	}
 	folio_mark_dirty(folio);
 	swap_free_nr(swap, nr_pages);
-- 
2.51.0

Re: [PATCH v2 08/15] mm/shmem, swap: remove redundant error handling for replacing folio

Posted by Baolin Wang 2 days, 2 hours ago


On 2025/9/6 03:13, Kairui Song wrote:
> From: Kairui Song <kasong@tencent.com>
> 
> Shmem may replace a folio in the swap cache if the cached one doesn't
> fit the swapin's GFP zone. When doing so, shmem has already double
> checked that the swap cache folio is locked, still has the swap cache
> flag set, and contains the wanted swap entry. So it is impossible to
> fail due to an Xarray mismatch. There is even a comment for that.
> 
> Delete the defensive error handling path, and add a WARN_ON instead:
> if that happened, something has broken the basic principle of how the
> swap cache works, we should catch and fix that.
> 
> Signed-off-by: Kairui Song <kasong@tencent.com>
> Reviewed-by: David Hildenbrand <david@redhat.com>
> ---
>   mm/shmem.c | 42 ++++++++++++------------------------------
>   1 file changed, 12 insertions(+), 30 deletions(-)
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 4e27e8e5da3b..cc6a0007c7a6 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1698,13 +1698,13 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
>   		}
>   
>   		/*
> -		 * The delete_from_swap_cache() below could be left for
> +		 * The swap_cache_del_folio() below could be left for
>   		 * shrink_folio_list()'s folio_free_swap() to dispose of;
>   		 * but I'm a little nervous about letting this folio out of
>   		 * shmem_writeout() in a hybrid half-tmpfs-half-swap state
>   		 * e.g. folio_mapping(folio) might give an unexpected answer.
>   		 */
> -		delete_from_swap_cache(folio);
> +		swap_cache_del_folio(folio);
>   		goto redirty;
>   	}

You should reorganize your patch set, as the swap_cache_del_folio() 
function is introduced in patch 9.

>   	if (nr_pages > 1)
> @@ -2082,7 +2082,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
>   	new->swap = entry;
>   
>   	memcg1_swapin(entry, nr_pages);
> -	shadow = get_shadow_from_swap_cache(entry);
> +	shadow = swap_cache_get_shadow(entry);

Ditto.

>   	if (shadow)
>   		workingset_refault(new, shadow);
>   	folio_add_lru(new);
> @@ -2158,35 +2158,17 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
>   	/* Swap cache still stores N entries instead of a high-order entry */
>   	xa_lock_irq(&swap_mapping->i_pages);
>   	for (i = 0; i < nr_pages; i++) {
> -		void *item = xas_load(&xas);
> -
> -		if (item != old) {
> -			error = -ENOENT;
> -			break;
> -		}
> -
> -		xas_store(&xas, new);
> +		WARN_ON_ONCE(xas_store(&xas, new));
>   		xas_next(&xas);
>   	}
> -	if (!error) {
> -		mem_cgroup_replace_folio(old, new);
> -		shmem_update_stats(new, nr_pages);
> -		shmem_update_stats(old, -nr_pages);
> -	}
>   	xa_unlock_irq(&swap_mapping->i_pages);
>   
> -	if (unlikely(error)) {
> -		/*
> -		 * Is this possible?  I think not, now that our callers
> -		 * check both the swapcache flag and folio->private
> -		 * after getting the folio lock; but be defensive.
> -		 * Reverse old to newpage for clear and free.
> -		 */
> -		old = new;
> -	} else {
> -		folio_add_lru(new);
> -		*foliop = new;
> -	}
> +	mem_cgroup_replace_folio(old, new);
> +	shmem_update_stats(new, nr_pages);
> +	shmem_update_stats(old, -nr_pages);
> +
> +	folio_add_lru(new);
> +	*foliop = new;
>   
>   	folio_clear_swapcache(old);
>   	old->private = NULL;
> @@ -2220,7 +2202,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
>   	nr_pages = folio_nr_pages(folio);
>   	folio_wait_writeback(folio);
>   	if (!skip_swapcache)
> -		delete_from_swap_cache(folio);
> +		swap_cache_del_folio(folio);
>   	/*
>   	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
>   	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
> @@ -2459,7 +2441,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
>   		folio->swap.val = 0;
>   		swapcache_clear(si, swap, nr_pages);
>   	} else {
> -		delete_from_swap_cache(folio);
> +		swap_cache_del_folio(folio);
>   	}
>   	folio_mark_dirty(folio);
>   	swap_free_nr(swap, nr_pages);

Re: [PATCH v2 08/15] mm/shmem, swap: remove redundant error handling for replacing folio

Posted by Kairui Song 1 day, 20 hours ago

On Mon, Sep 8, 2025 at 2:04 PM Baolin Wang
<baolin.wang@linux.alibaba.com> wrote:
>
>
>
> On 2025/9/6 03:13, Kairui Song wrote:
> > From: Kairui Song <kasong@tencent.com>
> >
> > Shmem may replace a folio in the swap cache if the cached one doesn't
> > fit the swapin's GFP zone. When doing so, shmem has already double
> > checked that the swap cache folio is locked, still has the swap cache
> > flag set, and contains the wanted swap entry. So it is impossible to
> > fail due to an Xarray mismatch. There is even a comment for that.
> >
> > Delete the defensive error handling path, and add a WARN_ON instead:
> > if that happened, something has broken the basic principle of how the
> > swap cache works, we should catch and fix that.
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > Reviewed-by: David Hildenbrand <david@redhat.com>
> > ---
> >   mm/shmem.c | 42 ++++++++++++------------------------------
> >   1 file changed, 12 insertions(+), 30 deletions(-)
> >
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 4e27e8e5da3b..cc6a0007c7a6 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -1698,13 +1698,13 @@ int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
> >               }
> >
> >               /*
> > -              * The delete_from_swap_cache() below could be left for
> > +              * The swap_cache_del_folio() below could be left for
> >                * shrink_folio_list()'s folio_free_swap() to dispose of;
> >                * but I'm a little nervous about letting this folio out of
> >                * shmem_writeout() in a hybrid half-tmpfs-half-swap state
> >                * e.g. folio_mapping(folio) might give an unexpected answer.
> >                */
> > -             delete_from_swap_cache(folio);
> > +             swap_cache_del_folio(folio);
> >               goto redirty;
> >       }
>
> You should reorganize your patch set, as the swap_cache_del_folio()
> function is introduced in patch 9.
>
> >       if (nr_pages > 1)
> > @@ -2082,7 +2082,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> >       new->swap = entry;
> >
> >       memcg1_swapin(entry, nr_pages);
> > -     shadow = get_shadow_from_swap_cache(entry);
> > +     shadow = swap_cache_get_shadow(entry);
>
> Ditto.
>
> >       if (shadow)
> >               workingset_refault(new, shadow);
> >       folio_add_lru(new);
> > @@ -2158,35 +2158,17 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
> >       /* Swap cache still stores N entries instead of a high-order entry */
> >       xa_lock_irq(&swap_mapping->i_pages);
> >       for (i = 0; i < nr_pages; i++) {
> > -             void *item = xas_load(&xas);
> > -
> > -             if (item != old) {
> > -                     error = -ENOENT;
> > -                     break;
> > -             }
> > -
> > -             xas_store(&xas, new);
> > +             WARN_ON_ONCE(xas_store(&xas, new));
> >               xas_next(&xas);
> >       }
> > -     if (!error) {
> > -             mem_cgroup_replace_folio(old, new);
> > -             shmem_update_stats(new, nr_pages);
> > -             shmem_update_stats(old, -nr_pages);
> > -     }
> >       xa_unlock_irq(&swap_mapping->i_pages);
> >
> > -     if (unlikely(error)) {
> > -             /*
> > -              * Is this possible?  I think not, now that our callers
> > -              * check both the swapcache flag and folio->private
> > -              * after getting the folio lock; but be defensive.
> > -              * Reverse old to newpage for clear and free.
> > -              */
> > -             old = new;
> > -     } else {
> > -             folio_add_lru(new);
> > -             *foliop = new;
> > -     }
> > +     mem_cgroup_replace_folio(old, new);
> > +     shmem_update_stats(new, nr_pages);
> > +     shmem_update_stats(old, -nr_pages);
> > +
> > +     folio_add_lru(new);
> > +     *foliop = new;
> >
> >       folio_clear_swapcache(old);
> >       old->private = NULL;
> > @@ -2220,7 +2202,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
> >       nr_pages = folio_nr_pages(folio);
> >       folio_wait_writeback(folio);
> >       if (!skip_swapcache)
> > -             delete_from_swap_cache(folio);
> > +             swap_cache_del_folio(folio);
> >       /*
> >        * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
> >        * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
> > @@ -2459,7 +2441,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> >               folio->swap.val = 0;
> >               swapcache_clear(si, swap, nr_pages);
> >       } else {
> > -             delete_from_swap_cache(folio);
> > +             swap_cache_del_folio(folio);

Oh you are right, or I should keep the delete_from_swap_cache here.

Let me just rebase and move this patch later then. Thanks!

> >       }
> >       folio_mark_dirty(folio);
> >       swap_free_nr(swap, nr_pages);
>
>