While we handle pte_lockptr() == pmd_lockptr() correctly in
zap_pte_table_if_empty(), we don't handle it in zap_empty_pte_table(),
making the spin_trylock() always fail and forcing us onto the slow path.
So let's handle the scenario where pte_lockptr() == pmd_lockptr()
better, which can only happen if CONFIG_SPLIT_PTE_PTLOCKS is not set.
This is only relevant once we unlock CONFIG_PT_RECLAIM on architectures
that are not x86-64.
Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org>
---
mm/memory.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index c3055b2577c27..3852075ea62d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1833,16 +1833,18 @@ static bool pte_table_reclaim_possible(unsigned long start, unsigned long end,
return details && details->reclaim_pt && (end - start >= PMD_SIZE);
}
-static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval)
+static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd,
+ spinlock_t *ptl, pmd_t *pmdval)
{
spinlock_t *pml = pmd_lockptr(mm, pmd);
- if (!spin_trylock(pml))
+ if (ptl != pml && !spin_trylock(pml))
return false;
*pmdval = pmdp_get(pmd);
pmd_clear(pmd);
- spin_unlock(pml);
+ if (ptl != pml)
+ spin_unlock(pml);
return true;
}
@@ -1934,7 +1936,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
* from being repopulated by another thread.
*/
if (can_reclaim_pt && direct_reclaim && addr == end)
- direct_reclaim = zap_empty_pte_table(mm, pmd, &pmdval);
+ direct_reclaim = zap_empty_pte_table(mm, pmd, ptl, &pmdval);
add_mm_rss_vec(mm, rss);
lazy_mmu_mode_disable();
--
2.52.0
On 1/20/26 6:07 AM, David Hildenbrand (Red Hat) wrote:
> While we handle pte_lockptr() == pmd_lockptr() correctly in
> zap_pte_table_if_empty(), we don't handle it in zap_empty_pte_table(),
> making the spin_trylock() always fail and forcing us onto the slow path.
>
> So let's handle the scenario where pte_lockptr() == pmd_lockptr()
> better, which can only happen if CONFIG_SPLIT_PTE_PTLOCKS is not set.
>
> This is only relevant once we unlock CONFIG_PT_RECLAIM on architectures
> that are not x86-64.
>
> Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org>
> ---
> mm/memory.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
Thanks!
>
> diff --git a/mm/memory.c b/mm/memory.c
> index c3055b2577c27..3852075ea62d4 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1833,16 +1833,18 @@ static bool pte_table_reclaim_possible(unsigned long start, unsigned long end,
> return details && details->reclaim_pt && (end - start >= PMD_SIZE);
> }
>
> -static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval)
> +static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd,
> + spinlock_t *ptl, pmd_t *pmdval)
> {
> spinlock_t *pml = pmd_lockptr(mm, pmd);
>
> - if (!spin_trylock(pml))
> + if (ptl != pml && !spin_trylock(pml))
> return false;
>
> *pmdval = pmdp_get(pmd);
> pmd_clear(pmd);
> - spin_unlock(pml);
> + if (ptl != pml)
> + spin_unlock(pml);
> return true;
> }
>
> @@ -1934,7 +1936,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
> * from being repopulated by another thread.
> */
> if (can_reclaim_pt && direct_reclaim && addr == end)
> - direct_reclaim = zap_empty_pte_table(mm, pmd, &pmdval);
> + direct_reclaim = zap_empty_pte_table(mm, pmd, ptl, &pmdval);
>
> add_mm_rss_vec(mm, rss);
> lazy_mmu_mode_disable();
© 2016 - 2026 Red Hat, Inc.