[PATCH v3] mm/userfaultfd: detect VMA replacement after copy retry in mfill_copy_folio_retry()

David Carlier posted 1 patch 1 day, 10 hours ago
There is a newer version of this series
mm/userfaultfd.c | 64 ++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 57 insertions(+), 7 deletions(-)
[PATCH v3] mm/userfaultfd: detect VMA replacement after copy retry in mfill_copy_folio_retry()
Posted by David Carlier 1 day, 10 hours ago
In mfill_copy_folio_retry(), all locks are dropped to retry
copy_from_user() with page faults enabled. During this window, the VMA
can be replaced entirely (e.g. munmap + mmap + UFFDIO_REGISTER by
another thread), but the caller proceeds with a folio allocated from the
original VMA's backing store.

Checking ops alone is insufficient: the replacement VMA could be the
same type (e.g. shmem -> shmem) with identical flags but a different
backing inode. Take a snapshot of the VMA's inode and flags before
dropping locks, and compare after re-acquiring them. If anything
changed, bail out with -EAGAIN.

Fixes: 56a3706fd7f9 ("shmem, userfaultfd: implement shmem uffd operations using vm_uffd_ops")
Suggested-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Carlier <devnexen@gmail.com>
---
 mm/userfaultfd.c | 64 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 481ec7eb4442..d10eb81dc3b2 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -443,33 +443,83 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
 	return ret;
 }
 
+struct vma_snapshot {
+	struct inode *inode;
+	vma_flags_t flags;
+};
+
+static void vma_snapshot_take(struct vm_area_struct *vma,
+			      struct vma_snapshot *s)
+{
+	memcpy(&s->flags, &vma->flags, sizeof(s->flags));
+	if (vma->vm_file) {
+		s->inode = vma->vm_file->f_inode;
+		ihold(s->inode);
+	} else {
+		s->inode = NULL;
+	}
+}
+
+static bool vma_snapshot_changed(struct vm_area_struct *vma,
+				 struct vma_snapshot *s)
+{
+	if (memcmp(&s->flags, &vma->flags, sizeof(s->flags)))
+		return true;
+
+	if (s->inode && vma->vm_file->f_inode != s->inode)
+		return true;
+
+	if (!s->inode && !vma_is_anonymous(vma))
+		return true;
+
+	return false;
+}
+
+static void vma_snapshot_release(struct vma_snapshot *s)
+{
+	if (s->inode) {
+		iput(s->inode);
+		s->inode = NULL;
+	}
+}
+
 static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
 {
 	unsigned long src_addr = state->src_addr;
+	struct vma_snapshot s;
 	void *kaddr;
 	int err;
 
+	/* Take a quick snapshot of the current vma */
+	vma_snapshot_take(state->vma, &s);
+
 	/* retry copying with mm_lock dropped */
 	mfill_put_vma(state);
 
 	kaddr = kmap_local_folio(folio, 0);
 	err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE);
 	kunmap_local(kaddr);
-	if (unlikely(err))
-		return -EFAULT;
+	if (unlikely(err)) {
+		err = -EFAULT;
+		goto out;
+	}
 
 	flush_dcache_folio(folio);
 
 	/* reget VMA and PMD, they could change underneath us */
 	err = mfill_get_vma(state);
 	if (err)
-		return err;
+		goto out;
 
-	err = mfill_establish_pmd(state);
-	if (err)
-		return err;
+	if (vma_snapshot_changed(state->vma, &s)) {
+		err = -EAGAIN;
+		goto out;
+	}
 
-	return 0;
+	err = mfill_establish_pmd(state);
+out:
+	vma_snapshot_release(&s);
+	return err;
 }
 
 static int __mfill_atomic_pte(struct mfill_state *state,
-- 
2.53.0