mm/userfaultfd.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
During this window, the VMA can be replaced with a different type (e.g.
hugetlb), making the caller's ops pointer stale. Subsequent use of the
stale ops can lead to incorrect folio handling or a kernel crash.
Pass the caller's ops into mfill_copy_folio_retry() and compare against
the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
if they differ so the operation can be retried.
Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
Signed-off-by: David Carlier <devnexen@gmail.com>
---
mm/userfaultfd.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 481ec7eb4442..214923a411c1 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
return ret;
}
-static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
+static int mfill_copy_folio_retry(struct mfill_state *state,
+ const struct vm_uffd_ops *ops,
+ struct folio *folio)
{
unsigned long src_addr = state->src_addr;
void *kaddr;
@@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
if (err)
return err;
+ /*
+ * The VMA type may have changed while the lock was dropped
+ * (e.g. replaced with a hugetlb mapping), making the caller's
+ * ops pointer stale.
+ */
+ if (vma_uffd_ops(state->vma) != ops)
+ return -EAGAIN;
+
err = mfill_establish_pmd(state);
if (err)
return err;
@@ -495,7 +505,7 @@ static int __mfill_atomic_pte(struct mfill_state *state,
* will take care of unlocking if needed.
*/
if (unlikely(ret)) {
- ret = mfill_copy_folio_retry(state, folio);
+ ret = mfill_copy_folio_retry(state, ops, folio);
if (ret)
goto err_folio_put;
}
--
2.53.0
On Thu, 9 Apr 2026 13:06:53 +0100 David Carlier <devnexen@gmail.com> wrote:
> mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> During this window, the VMA can be replaced with a different type (e.g.
> hugetlb), making the caller's ops pointer stale. Subsequent use of the
> stale ops can lead to incorrect folio handling or a kernel crash.
>
> Pass the caller's ops into mfill_copy_folio_retry() and compare against
> the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> if they differ so the operation can be retried.
>
> Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
> Signed-off-by: David Carlier <devnexen@gmail.com>
> ---
> mm/userfaultfd.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 481ec7eb4442..214923a411c1 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
> return ret;
> }
>
> -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
> +static int mfill_copy_folio_retry(struct mfill_state *state,
> + const struct vm_uffd_ops *ops,
> + struct folio *folio)
> {
> unsigned long src_addr = state->src_addr;
> void *kaddr;
> @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
> if (err)
> return err;
>
> + /*
> + * The VMA type may have changed while the lock was dropped
> + * (e.g. replaced with a hugetlb mapping), making the caller's
> + * ops pointer stale.
> + */
> + if (vma_uffd_ops(state->vma) != ops)
> + return -EAGAIN;
> +
hmm I am not sure if this is correct for shmem MAP_PRIVATE.
mfill_atomic_pte_copy() overrides ops to &anon_uffd_ops for MAP_PRIVATE
mappings:
if (!(state->vma->vm_flags & VM_SHARED))
ops = &anon_uffd_ops;
This overridden ops pointer propagates through __mfill_atomic_pte() into
mfill_copy_folio_retry(). But the new check here calls vma_uffd_ops()
which returns the original file-backed ops (e.g. &shmem_uffd_ops).
For shmem MAP_PRIVATE VMAs, the comparison always fails even when
the VMA type has not changed.
Maybe save the original (non-overridden) ops before the MAP_PRIVATE override
and compare against that?
Hi Usama,
On Fri, Apr 10, 2026 at 04:48:08AM -0700, Usama Arif wrote:
> On Thu, 9 Apr 2026 13:06:53 +0100 David Carlier <devnexen@gmail.com> wrote:
>
> > mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> > During this window, the VMA can be replaced with a different type (e.g.
> > hugetlb), making the caller's ops pointer stale. Subsequent use of the
> > stale ops can lead to incorrect folio handling or a kernel crash.
> >
> > Pass the caller's ops into mfill_copy_folio_retry() and compare against
> > the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> > if they differ so the operation can be retried.
> >
> > Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
> > Signed-off-by: David Carlier <devnexen@gmail.com>
> > ---
> > mm/userfaultfd.c | 14 ++++++++++++--
> > 1 file changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > index 481ec7eb4442..214923a411c1 100644
> > --- a/mm/userfaultfd.c
> > +++ b/mm/userfaultfd.c
> > @@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
> > return ret;
> > }
> >
> > -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
> > +static int mfill_copy_folio_retry(struct mfill_state *state,
> > + const struct vm_uffd_ops *ops,
> > + struct folio *folio)
> > {
> > unsigned long src_addr = state->src_addr;
> > void *kaddr;
> > @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
> > if (err)
> > return err;
> >
> > + /*
> > + * The VMA type may have changed while the lock was dropped
> > + * (e.g. replaced with a hugetlb mapping), making the caller's
> > + * ops pointer stale.
> > + */
> > + if (vma_uffd_ops(state->vma) != ops)
> > + return -EAGAIN;
> > +
>
> hmm I am not sure if this is correct for shmem MAP_PRIVATE.
>
> mfill_atomic_pte_copy() overrides ops to &anon_uffd_ops for MAP_PRIVATE
> mappings:
>
> if (!(state->vma->vm_flags & VM_SHARED))
> ops = &anon_uffd_ops;
>
> This overridden ops pointer propagates through __mfill_atomic_pte() into
> mfill_copy_folio_retry(). But the new check here calls vma_uffd_ops()
> which returns the original file-backed ops (e.g. &shmem_uffd_ops).
> For shmem MAP_PRIVATE VMAs, the comparison always fails even when
> the VMA type has not changed.
Good catch.
@Andrew, can you please drop the patch for now?
> Maybe save the original (non-overridden) ops before the MAP_PRIVATE override
> and compare against that?
--
Sincerely yours,
Mike.
On Thu, Apr 09, 2026 at 01:06:53PM +0100, David Carlier wrote:
> mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> During this window, the VMA can be replaced with a different type (e.g.
> hugetlb), making the caller's ops pointer stale. Subsequent use of the
> stale ops can lead to incorrect folio handling or a kernel crash.
>
> Pass the caller's ops into mfill_copy_folio_retry() and compare against
> the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> if they differ so the operation can be retried.
>
> Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
> Signed-off-by: David Carlier <devnexen@gmail.com>
> ---
> mm/userfaultfd.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 481ec7eb4442..214923a411c1 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
> return ret;
> }
>
> -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
> +static int mfill_copy_folio_retry(struct mfill_state *state,
> + const struct vm_uffd_ops *ops,
> + struct folio *folio)
> {
> unsigned long src_addr = state->src_addr;
> void *kaddr;
> @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
> if (err)
> return err;
>
> + /*
> + * The VMA type may have changed while the lock was dropped
> + * (e.g. replaced with a hugetlb mapping), making the caller's
> + * ops pointer stale.
> + */
> + if (vma_uffd_ops(state->vma) != ops)
> + return -EAGAIN;
I agree with -EAGAIN here, but we discussed over all the things on possible
inode change and I don't know why we don't consider that.
I still think those should be considered.
If the vma snapshot idea is not welcomed, fine. We need to think of
something to cover those too. Current patch won't cover "ops unchaged" but
"inode changed", or offset changed, for example.
Thanks,
> +
> err = mfill_establish_pmd(state);
> if (err)
> return err;
> @@ -495,7 +505,7 @@ static int __mfill_atomic_pte(struct mfill_state *state,
> * will take care of unlocking if needed.
> */
> if (unlikely(ret)) {
> - ret = mfill_copy_folio_retry(state, folio);
> + ret = mfill_copy_folio_retry(state, ops, folio);
> if (ret)
> goto err_folio_put;
> }
> --
> 2.53.0
>
--
Peter Xu
On Thu, Apr 09, 2026 at 01:09:41PM -0400, Peter Xu wrote: > On Thu, Apr 09, 2026 at 01:06:53PM +0100, David Carlier wrote: > > @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio > > if (err) > > return err; > > > > + /* > > + * The VMA type may have changed while the lock was dropped > > + * (e.g. replaced with a hugetlb mapping), making the caller's > > + * ops pointer stale. > > + */ > > + if (vma_uffd_ops(state->vma) != ops) > > + return -EAGAIN; > > I agree with -EAGAIN here, but we discussed over all the things on possible > inode change and I don't know why we don't consider that. > > I still think those should be considered. > > If the vma snapshot idea is not welcomed, fine. We need to think of > something to cover those too. Current patch won't cover "ops unchaged" but > "inode changed", or offset changed, for example. This patch is enough to fix the regression introduced by my refactoring. The inode/file/vma_snapshot checks are needed to solve the issue that existed roughly for a decade. This should be a separate patch and it's really not urgent. > Thanks, > -- > Peter Xu > -- Sincerely yours, Mike.
On 4/9/26 14:06, David Carlier wrote:
> mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> During this window, the VMA can be replaced with a different type (e.g.
> hugetlb), making the caller's ops pointer stale. Subsequent use of the
> stale ops can lead to incorrect folio handling or a kernel crash.
>
> Pass the caller's ops into mfill_copy_folio_retry() and compare against
> the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> if they differ so the operation can be retried.
>
> Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
I don't have such sha1, is it a stale mm-unstable commit? Seems to be
4974a6aaa768 ("userfaultfd: mfill_atomic(): remove retry logic") now in
mm-unstable (and can further change)
> Signed-off-by: David Carlier <devnexen@gmail.com>
> ---
> mm/userfaultfd.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 481ec7eb4442..214923a411c1 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
> return ret;
> }
>
> -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
> +static int mfill_copy_folio_retry(struct mfill_state *state,
> + const struct vm_uffd_ops *ops,
> + struct folio *folio)
> {
> unsigned long src_addr = state->src_addr;
> void *kaddr;
> @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
> if (err)
> return err;
>
> + /*
> + * The VMA type may have changed while the lock was dropped
> + * (e.g. replaced with a hugetlb mapping), making the caller's
> + * ops pointer stale.
> + */
> + if (vma_uffd_ops(state->vma) != ops)
> + return -EAGAIN;
> +
> err = mfill_establish_pmd(state);
> if (err)
> return err;
> @@ -495,7 +505,7 @@ static int __mfill_atomic_pte(struct mfill_state *state,
> * will take care of unlocking if needed.
> */
> if (unlikely(ret)) {
> - ret = mfill_copy_folio_retry(state, folio);
> + ret = mfill_copy_folio_retry(state, ops, folio);
> if (ret)
> goto err_folio_put;
> }
On Thu, 9 Apr 2026 19:04:49 +0200 "Vlastimil Babka (SUSE)" <vbabka@kernel.org> wrote:
> On 4/9/26 14:06, David Carlier wrote:
> > mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> > During this window, the VMA can be replaced with a different type (e.g.
> > hugetlb), making the caller's ops pointer stale. Subsequent use of the
> > stale ops can lead to incorrect folio handling or a kernel crash.
> >
> > Pass the caller's ops into mfill_copy_folio_retry() and compare against
> > the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> > if they differ so the operation can be retried.
> >
> > Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
>
> I don't have such sha1, is it a stale mm-unstable commit? Seems to be
> 4974a6aaa768 ("userfaultfd: mfill_atomic(): remove retry logic") now in
> mm-unstable (and can further change)
Yup. I queued this as a squashable fix
"userfaultfd-mfill_atomic-remove-retry-logic-fix.patch". Against
Mike's "userfaultfd: mfill_atomic(): remove retry logic", queued for
2nd week of he merge window.
I'll remove that Fixes: tag - it changes every day and results in
"invalid Fixes:" emails from Mark.
On Thu, Apr 09, 2026 at 01:06:53PM +0100, David Carlier wrote:
> mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call.
> During this window, the VMA can be replaced with a different type (e.g.
> hugetlb), making the caller's ops pointer stale. Subsequent use of the
> stale ops can lead to incorrect folio handling or a kernel crash.
>
> Pass the caller's ops into mfill_copy_folio_retry() and compare against
> the current vma_uffd_ops() after re-acquiring the lock. Return -EAGAIN
> if they differ so the operation can be retried.
>
> Fixes: 59da5c32ffa3 ("userfaultfd: mfill_atomic(): remove retry logic")
> Signed-off-by: David Carlier <devnexen@gmail.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
> mm/userfaultfd.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 481ec7eb4442..214923a411c1 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -443,7 +443,9 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
> return ret;
> }
>
> -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio)
> +static int mfill_copy_folio_retry(struct mfill_state *state,
> + const struct vm_uffd_ops *ops,
> + struct folio *folio)
> {
> unsigned long src_addr = state->src_addr;
> void *kaddr;
> @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio
> if (err)
> return err;
>
> + /*
> + * The VMA type may have changed while the lock was dropped
> + * (e.g. replaced with a hugetlb mapping), making the caller's
> + * ops pointer stale.
> + */
> + if (vma_uffd_ops(state->vma) != ops)
> + return -EAGAIN;
> +
> err = mfill_establish_pmd(state);
> if (err)
> return err;
> @@ -495,7 +505,7 @@ static int __mfill_atomic_pte(struct mfill_state *state,
> * will take care of unlocking if needed.
> */
> if (unlikely(ret)) {
> - ret = mfill_copy_folio_retry(state, folio);
> + ret = mfill_copy_folio_retry(state, ops, folio);
> if (ret)
> goto err_folio_put;
> }
> --
> 2.53.0
>
--
Sincerely yours,
Mike.
© 2016 - 2026 Red Hat, Inc.