In move_pages_pte(), we may modify the dst_pte and src_pte after acquiring
the ptl, so convert it to using pte_offset_map_rw_nolock(). But since we
will use pte_same() to detect the change of the pte entry, there is no
need to get pmdval, so just pass a dummy variable to it.
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
---
mm/userfaultfd.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index ce13c40626472..48b87c62fc3dd 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1135,7 +1135,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
spinlock_t *src_ptl, *dst_ptl;
pte_t *src_pte = NULL;
pte_t *dst_pte = NULL;
-
+ pmd_t dummy_pmdval;
struct folio *src_folio = NULL;
struct anon_vma *src_anon_vma = NULL;
struct mmu_notifier_range range;
@@ -1146,7 +1146,14 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
src_addr, src_addr + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);
retry:
- dst_pte = pte_offset_map_nolock(mm, dst_pmd, dst_addr, &dst_ptl);
+ /*
+ * Use the maywrite version to indicate that dst_pte will be modified,
+ * but since we will use pte_same() to detect the change of the pte
+ * entry, there is no need to get pmdval, so just pass a dummy variable
+ * to it.
+ */
+ dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dummy_pmdval,
+ &dst_ptl);
/* Retry if a huge pmd materialized from under us */
if (unlikely(!dst_pte)) {
@@ -1154,7 +1161,9 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
goto out;
}
- src_pte = pte_offset_map_nolock(mm, src_pmd, src_addr, &src_ptl);
+ /* same as dst_pte */
+ src_pte = pte_offset_map_rw_nolock(mm, src_pmd, src_addr, &dummy_pmdval,
+ &src_ptl);
/*
* We held the mmap_lock for reading so MADV_DONTNEED
--
2.20.1