[v2] mm: userfaultfd: assorted fixes and cleanups

[PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by Tal Zussman 8 months, 1 week ago

BUG_ON() is deprecated [1]. Convert all the BUG_ON()s and VM_BUG_ON()s
to use VM_WARN_ON_ONCE().

While at it, also convert the WARN_ON_ONCE()s in move_pages() to use
VM_WARN_ON_ONCE(), as the relevant conditions are already checked in
validate_range() in move_pages()'s caller.

[1] https://www.kernel.org/doc/html/v6.15/process/coding-style.html#use-warn-rather-than-bug

Signed-off-by: Tal Zussman <tz2294@columbia.edu>
---
 fs/userfaultfd.c | 59 +++++++++++++++++++++++++-------------------------
 mm/userfaultfd.c | 66 +++++++++++++++++++++++++++-----------------------------
 2 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 22f4bf956ba1..80c95c712266 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -165,14 +165,14 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
 static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
 {
 	if (refcount_dec_and_test(&ctx->refcount)) {
-		VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
-		VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
-		VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
-		VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
-		VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
-		VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
-		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
-		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
+		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_pending_wqh.lock));
+		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_pending_wqh));
+		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_wqh.lock));
+		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_wqh));
+		VM_WARN_ON_ONCE(spin_is_locked(&ctx->event_wqh.lock));
+		VM_WARN_ON_ONCE(waitqueue_active(&ctx->event_wqh));
+		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fd_wqh.lock));
+		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fd_wqh));
 		mmdrop(ctx->mm);
 		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
 	}
@@ -383,12 +383,12 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	if (!ctx)
 		goto out;
 
-	BUG_ON(ctx->mm != mm);
+	VM_WARN_ON_ONCE(ctx->mm != mm);
 
 	/* Any unrecognized flag is a bug. */
-	VM_BUG_ON(reason & ~__VM_UFFD_FLAGS);
+	VM_WARN_ON_ONCE(reason & ~__VM_UFFD_FLAGS);
 	/* 0 or > 1 flags set is a bug; we expect exactly 1. */
-	VM_BUG_ON(!reason || (reason & (reason - 1)));
+	VM_WARN_ON_ONCE(!reason || (reason & (reason - 1)));
 
 	if (ctx->features & UFFD_FEATURE_SIGBUS)
 		goto out;
@@ -411,12 +411,11 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 		 * to be sure not to return SIGBUS erroneously on
 		 * nowait invocations.
 		 */
-		BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
+		VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
 #ifdef CONFIG_DEBUG_VM
 		if (printk_ratelimit()) {
-			printk(KERN_WARNING
-			       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
-			       vmf->flags);
+			pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n",
+				vmf->flags);
 			dump_stack();
 		}
 #endif
@@ -602,7 +601,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 */
 out:
 	atomic_dec(&ctx->mmap_changing);
-	VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0);
+	VM_WARN_ON_ONCE(atomic_read(&ctx->mmap_changing) < 0);
 	userfaultfd_ctx_put(ctx);
 }
 
@@ -710,7 +709,7 @@ void dup_userfaultfd_fail(struct list_head *fcs)
 		struct userfaultfd_ctx *ctx = fctx->new;
 
 		atomic_dec(&octx->mmap_changing);
-		VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);
+		VM_WARN_ON_ONCE(atomic_read(&octx->mmap_changing) < 0);
 		userfaultfd_ctx_put(octx);
 		userfaultfd_ctx_put(ctx);
 
@@ -1317,8 +1316,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	do {
 		cond_resched();
 
-		BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
-		       !!(cur->vm_flags & __VM_UFFD_FLAGS));
+		VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^
+				!!(cur->vm_flags & __VM_UFFD_FLAGS));
 
 		/* check not compatible vmas */
 		ret = -EINVAL;
@@ -1372,7 +1371,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 
 		found = true;
 	} for_each_vma_range(vmi, cur, end);
-	BUG_ON(!found);
+	VM_WARN_ON_ONCE(!found);
 
 	ret = userfaultfd_register_range(ctx, vma, vm_flags, start, end,
 					 wp_async);
@@ -1464,8 +1463,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	do {
 		cond_resched();
 
-		BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
-		       !!(cur->vm_flags & __VM_UFFD_FLAGS));
+		VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^
+				!!(cur->vm_flags & __VM_UFFD_FLAGS));
 
 		/*
 		 * Check not compatible vmas, not strictly required
@@ -1479,7 +1478,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 
 		found = true;
 	} for_each_vma_range(vmi, cur, end);
-	BUG_ON(!found);
+	VM_WARN_ON_ONCE(!found);
 
 	vma_iter_set(&vmi, start);
 	prev = vma_prev(&vmi);
@@ -1490,7 +1489,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	for_each_vma_range(vmi, vma, end) {
 		cond_resched();
 
-		BUG_ON(!vma_can_userfault(vma, vma->vm_flags, wp_async));
+		VM_WARN_ON_ONCE(!vma_can_userfault(vma, vma->vm_flags, wp_async));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
@@ -1564,7 +1563,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
 	 * len == 0 means wake all and we don't want to wake all here,
 	 * so check it again to be sure.
 	 */
-	VM_BUG_ON(!range.len);
+	VM_WARN_ON_ONCE(!range.len);
 
 	wake_userfault(ctx, &range);
 	ret = 0;
@@ -1621,7 +1620,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 		return -EFAULT;
 	if (ret < 0)
 		goto out;
-	BUG_ON(!ret);
+	VM_WARN_ON_ONCE(!ret);
 	/* len == 0 would wake all */
 	range.len = ret;
 	if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE)) {
@@ -1676,7 +1675,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	if (ret < 0)
 		goto out;
 	/* len == 0 would wake all */
-	BUG_ON(!ret);
+	VM_WARN_ON_ONCE(!ret);
 	range.len = ret;
 	if (!(uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_DONTWAKE)) {
 		range.start = uffdio_zeropage.range.start;
@@ -1788,7 +1787,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 		goto out;
 
 	/* len == 0 would wake all */
-	BUG_ON(!ret);
+	VM_WARN_ON_ONCE(!ret);
 	range.len = ret;
 	if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) {
 		range.start = uffdio_continue.range.start;
@@ -1845,7 +1844,7 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
 		goto out;
 
 	/* len == 0 would wake all */
-	BUG_ON(!ret);
+	VM_WARN_ON_ONCE(!ret);
 	range.len = ret;
 	if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) {
 		range.start = uffdio_poison.range.start;
@@ -2106,7 +2105,7 @@ static int new_userfaultfd(int flags)
 	struct file *file;
 	int fd;
 
-	BUG_ON(!current->mm);
+	VM_WARN_ON_ONCE(!current->mm);
 
 	/* Check the UFFD_* constants for consistency.  */
 	BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index bc473ad21202..41e67ded5a6e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -561,7 +561,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 	}
 
 	while (src_addr < src_start + len) {
-		BUG_ON(dst_addr >= dst_start + len);
+		VM_WARN_ON_ONCE(dst_addr >= dst_start + len);
 
 		/*
 		 * Serialize via vma_lock and hugetlb_fault_mutex.
@@ -602,7 +602,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 		if (unlikely(err == -ENOENT)) {
 			up_read(&ctx->map_changing_lock);
 			uffd_mfill_unlock(dst_vma);
-			BUG_ON(!folio);
+			VM_WARN_ON_ONCE(!folio);
 
 			err = copy_folio_from_user(folio,
 						   (const void __user *)src_addr, true);
@@ -614,7 +614,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 			dst_vma = NULL;
 			goto retry;
 		} else
-			BUG_ON(folio);
+			VM_WARN_ON_ONCE(folio);
 
 		if (!err) {
 			dst_addr += vma_hpagesize;
@@ -635,9 +635,9 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 out:
 	if (folio)
 		folio_put(folio);
-	BUG_ON(copied < 0);
-	BUG_ON(err > 0);
-	BUG_ON(!copied && !err);
+	VM_WARN_ON_ONCE(copied < 0);
+	VM_WARN_ON_ONCE(err > 0);
+	VM_WARN_ON_ONCE(!copied && !err);
 	return copied ? copied : err;
 }
 #else /* !CONFIG_HUGETLB_PAGE */
@@ -711,12 +711,12 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
 	/*
 	 * Sanitize the command parameters:
 	 */
-	BUG_ON(dst_start & ~PAGE_MASK);
-	BUG_ON(len & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(dst_start & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(len & ~PAGE_MASK);
 
 	/* Does the address range wrap, or is the span zero-sized? */
-	BUG_ON(src_start + len <= src_start);
-	BUG_ON(dst_start + len <= dst_start);
+	VM_WARN_ON_ONCE(src_start + len <= src_start);
+	VM_WARN_ON_ONCE(dst_start + len <= dst_start);
 
 	src_addr = src_start;
 	dst_addr = dst_start;
@@ -775,7 +775,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
 	while (src_addr < src_start + len) {
 		pmd_t dst_pmdval;
 
-		BUG_ON(dst_addr >= dst_start + len);
+		VM_WARN_ON_ONCE(dst_addr >= dst_start + len);
 
 		dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
 		if (unlikely(!dst_pmd)) {
@@ -818,7 +818,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
 
 			up_read(&ctx->map_changing_lock);
 			uffd_mfill_unlock(dst_vma);
-			BUG_ON(!folio);
+			VM_WARN_ON_ONCE(!folio);
 
 			kaddr = kmap_local_folio(folio, 0);
 			err = copy_from_user(kaddr,
@@ -832,7 +832,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
 			flush_dcache_folio(folio);
 			goto retry;
 		} else
-			BUG_ON(folio);
+			VM_WARN_ON_ONCE(folio);
 
 		if (!err) {
 			dst_addr += PAGE_SIZE;
@@ -852,9 +852,9 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
 out:
 	if (folio)
 		folio_put(folio);
-	BUG_ON(copied < 0);
-	BUG_ON(err > 0);
-	BUG_ON(!copied && !err);
+	VM_WARN_ON_ONCE(copied < 0);
+	VM_WARN_ON_ONCE(err > 0);
+	VM_WARN_ON_ONCE(!copied && !err);
 	return copied ? copied : err;
 }
 
@@ -940,11 +940,11 @@ int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
 	/*
 	 * Sanitize the command parameters:
 	 */
-	BUG_ON(start & ~PAGE_MASK);
-	BUG_ON(len & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(start & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(len & ~PAGE_MASK);
 
 	/* Does the address range wrap, or is the span zero-sized? */
-	BUG_ON(start + len <= start);
+	VM_WARN_ON_ONCE(start + len <= start);
 
 	mmap_read_lock(dst_mm);
 
@@ -1709,15 +1709,13 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
 	ssize_t moved = 0;
 
 	/* Sanitize the command parameters. */
-	if (WARN_ON_ONCE(src_start & ~PAGE_MASK) ||
-	    WARN_ON_ONCE(dst_start & ~PAGE_MASK) ||
-	    WARN_ON_ONCE(len & ~PAGE_MASK))
-		goto out;
+	VM_WARN_ON_ONCE(src_start & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(dst_start & ~PAGE_MASK);
+	VM_WARN_ON_ONCE(len & ~PAGE_MASK);
 
 	/* Does the address range wrap, or is the span zero-sized? */
-	if (WARN_ON_ONCE(src_start + len <= src_start) ||
-	    WARN_ON_ONCE(dst_start + len <= dst_start))
-		goto out;
+	VM_WARN_ON_ONCE(src_start + len < src_start);
+	VM_WARN_ON_ONCE(dst_start + len < dst_start);
 
 	err = uffd_move_lock(mm, dst_start, src_start, &dst_vma, &src_vma);
 	if (err)
@@ -1867,9 +1865,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
 	up_read(&ctx->map_changing_lock);
 	uffd_move_unlock(dst_vma, src_vma);
 out:
-	VM_WARN_ON(moved < 0);
-	VM_WARN_ON(err > 0);
-	VM_WARN_ON(!moved && !err);
+	VM_WARN_ON_ONCE(moved < 0);
+	VM_WARN_ON_ONCE(err > 0);
+	VM_WARN_ON_ONCE(!moved && !err);
 	return moved ? moved : err;
 }
 
@@ -1956,9 +1954,9 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
 	for_each_vma_range(vmi, vma, end) {
 		cond_resched();
 
-		BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
-		BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
-		       vma->vm_userfaultfd_ctx.ctx != ctx);
+		VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async));
+		VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx &&
+				vma->vm_userfaultfd_ctx.ctx != ctx);
 		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
 		/*
@@ -2035,8 +2033,8 @@ void userfaultfd_release_all(struct mm_struct *mm,
 	prev = NULL;
 	for_each_vma(vmi, vma) {
 		cond_resched();
-		BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
-		       !!(vma->vm_flags & __VM_UFFD_FLAGS));
+		VM_WARN_ON_ONCE(!!vma->vm_userfaultfd_ctx.ctx ^
+				!!(vma->vm_flags & __VM_UFFD_FLAGS));
 		if (vma->vm_userfaultfd_ctx.ctx != ctx) {
 			prev = vma;
 			continue;

-- 
2.39.5

Re: [PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by Peter Xu 8 months ago

On Sat, Jun 07, 2025 at 02:40:01AM -0400, Tal Zussman wrote:
> BUG_ON() is deprecated [1]. Convert all the BUG_ON()s and VM_BUG_ON()s
> to use VM_WARN_ON_ONCE().
> 
> While at it, also convert the WARN_ON_ONCE()s in move_pages() to use
> VM_WARN_ON_ONCE(), as the relevant conditions are already checked in
> validate_range() in move_pages()'s caller.
> 
> [1] https://www.kernel.org/doc/html/v6.15/process/coding-style.html#use-warn-rather-than-bug
> 
> Signed-off-by: Tal Zussman <tz2294@columbia.edu>
> ---
>  fs/userfaultfd.c | 59 +++++++++++++++++++++++++-------------------------
>  mm/userfaultfd.c | 66 +++++++++++++++++++++++++++-----------------------------
>  2 files changed, 61 insertions(+), 64 deletions(-)
> 
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 22f4bf956ba1..80c95c712266 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -165,14 +165,14 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
>  static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
>  {
>  	if (refcount_dec_and_test(&ctx->refcount)) {
> -		VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
> -		VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
> -		VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
> -		VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
> -		VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
> -		VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
> -		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
> -		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_pending_wqh.lock));
> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_pending_wqh));
> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_wqh.lock));
> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_wqh));
> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->event_wqh.lock));
> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->event_wqh));
> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fd_wqh.lock));
> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fd_wqh));
>  		mmdrop(ctx->mm);
>  		kmem_cache_free(userfaultfd_ctx_cachep, ctx);

I didn't follow closely on the latest discussions on BUG_ON, but here I
just stumbled on top of this chunk, it does look like a slight overkill
using tons of bools for each of them.. even if the doc suggested
WARN_ON_ONCE().

David might have a better picture of what's our plan for mm to properly
assert while reducing the overhead as much as possible.

For this specific one, if we really want to convert we could also merge
them into one, so one bool to cover all.

-- 
Peter Xu

Re: [PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by David Hildenbrand 8 months ago

On 10.06.25 15:11, Peter Xu wrote:
> On Sat, Jun 07, 2025 at 02:40:01AM -0400, Tal Zussman wrote:
>> BUG_ON() is deprecated [1]. Convert all the BUG_ON()s and VM_BUG_ON()s
>> to use VM_WARN_ON_ONCE().
>>
>> While at it, also convert the WARN_ON_ONCE()s in move_pages() to use
>> VM_WARN_ON_ONCE(), as the relevant conditions are already checked in
>> validate_range() in move_pages()'s caller.
>>
>> [1] https://www.kernel.org/doc/html/v6.15/process/coding-style.html#use-warn-rather-than-bug
>>
>> Signed-off-by: Tal Zussman <tz2294@columbia.edu>
>> ---
>>   fs/userfaultfd.c | 59 +++++++++++++++++++++++++-------------------------
>>   mm/userfaultfd.c | 66 +++++++++++++++++++++++++++-----------------------------
>>   2 files changed, 61 insertions(+), 64 deletions(-)
>>
>> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
>> index 22f4bf956ba1..80c95c712266 100644
>> --- a/fs/userfaultfd.c
>> +++ b/fs/userfaultfd.c
>> @@ -165,14 +165,14 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
>>   static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
>>   {
>>   	if (refcount_dec_and_test(&ctx->refcount)) {
>> -		VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock));
>> -		VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
>> -		VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
>> -		VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
>> -		VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
>> -		VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
>> -		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
>> -		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
>> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_pending_wqh.lock));
>> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_pending_wqh));
>> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_wqh.lock));
>> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_wqh));
>> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->event_wqh.lock));
>> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->event_wqh));
>> +		VM_WARN_ON_ONCE(spin_is_locked(&ctx->fd_wqh.lock));
>> +		VM_WARN_ON_ONCE(waitqueue_active(&ctx->fd_wqh));
>>   		mmdrop(ctx->mm);
>>   		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
> 
> I didn't follow closely on the latest discussions on BUG_ON, but here I
> just stumbled on top of this chunk, it does look like a slight overkill
> using tons of bools for each of them.. even if the doc suggested
> WARN_ON_ONCE().
> 
> David might have a better picture of what's our plan for mm to properly
> assert while reducing the overhead as much as possible.

There is currently still a discussion whether VM_WARN_ON an 
VM_WARN_ON_ONCE could be unified.

In a CONFIG_DEBUG_VM kernel, the overhead of a couple of booleans is 
usually the least concern (everything is big and slow already) :)

> 
> For this specific one, if we really want to convert we could also merge
> them into one, so one bool to cover all.

One loses precision, but yeah, they are supposed to be found during 
early testing, in which case one can usually reproduce + debug fairly 
easily.

-- 
Cheers,

David / dhildenb

Re: [PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by David Hildenbrand 8 months ago

On 07.06.25 08:40, Tal Zussman wrote:
> BUG_ON() is deprecated [1]. Convert all the BUG_ON()s and VM_BUG_ON()s
> to use VM_WARN_ON_ONCE().
> 
> While at it, also convert the WARN_ON_ONCE()s in move_pages() to use
> VM_WARN_ON_ONCE(), as the relevant conditions are already checked in
> validate_range() in move_pages()'s caller.
> 
> [1] https://www.kernel.org/doc/html/v6.15/process/coding-style.html#use-warn-rather-than-bug
> 
> Signed-off-by: Tal Zussman <tz2294@columbia.edu>
> ---


[...]

>   	if (ctx->features & UFFD_FEATURE_SIGBUS)
>   		goto out;
> @@ -411,12 +411,11 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
>   		 * to be sure not to return SIGBUS erroneously on
>   		 * nowait invocations.
>   		 */
> -		BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
> +		VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
>   #ifdef CONFIG_DEBUG_VM
>   		if (printk_ratelimit()) {
> -			printk(KERN_WARNING
> -			       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
> -			       vmf->flags);
> +			pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n",
> +				vmf->flags);

You didn't cover that in the patch description.

I do wonder if we really still want the dump_stack() here and could 
simplify to

	pr_warn_ratelimited().

But I recall that the stack was helpful at least once for me (well, I 
was able to reproduce and could have figured it out differently.).

[...]

>   	err = uffd_move_lock(mm, dst_start, src_start, &dst_vma, &src_vma);
>   	if (err)
> @@ -1867,9 +1865,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
>   	up_read(&ctx->map_changing_lock);
>   	uffd_move_unlock(dst_vma, src_vma);
>   out:
> -	VM_WARN_ON(moved < 0);
> -	VM_WARN_ON(err > 0);
> -	VM_WARN_ON(!moved && !err);
> +	VM_WARN_ON_ONCE(moved < 0);
> +	VM_WARN_ON_ONCE(err > 0);
> +	VM_WARN_ON_ONCE(!moved && !err);
>   	return moved ? moved : err;


Here you convert VM_WARN_ON to VM_WARN_ON_ONCE without stating it in the 
description (including the why).

> @@ -1956,9 +1954,9 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
>   	for_each_vma_range(vmi, vma, end) {
>   		cond_resched();
>   
> -		BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
> -		BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
> -		       vma->vm_userfaultfd_ctx.ctx != ctx);
> +		VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async));
> +		VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx &&
> +				vma->vm_userfaultfd_ctx.ctx != ctx);
>   		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));

Which raises the question, why this here should still be a WARN

-- 
Cheers,

David / dhildenb

Re: [PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by Tal Zussman 7 months, 3 weeks ago

On Tue, Jun 10, 2025 at 3:26 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 07.06.25 08:40, Tal Zussman wrote:
> >
> >   if (ctx->features & UFFD_FEATURE_SIGBUS)
> >   goto out;
> > @@ -411,12 +411,11 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
> >   * to be sure not to return SIGBUS erroneously on
> >   * nowait invocations.
> >   */
> > - BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
> > + VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
> >   #ifdef CONFIG_DEBUG_VM
> >   if (printk_ratelimit()) {
> > - printk(KERN_WARNING
> > -       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
> > -       vmf->flags);
> > + pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n",
> > + vmf->flags);
>
> You didn't cover that in the patch description.
>
> I do wonder if we really still want the dump_stack() here and could
> simplify to
>
> pr_warn_ratelimited().
>
> But I recall that the stack was helpful at least once for me (well, I
> was able to reproduce and could have figured it out differently.).

I'll include this in the description as well. Given that you found the stack
helpful before, I'll leave it as-is for now.

> [...]
>
> >   err = uffd_move_lock(mm, dst_start, src_start, &dst_vma, &src_vma);
> >   if (err)
> > @@ -1867,9 +1865,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
> >   up_read(&ctx->map_changing_lock);
> >   uffd_move_unlock(dst_vma, src_vma);
> >   out:
> > - VM_WARN_ON(moved < 0);
> > - VM_WARN_ON(err > 0);
> > - VM_WARN_ON(!moved && !err);
> > + VM_WARN_ON_ONCE(moved < 0);
> > + VM_WARN_ON_ONCE(err > 0);
> > + VM_WARN_ON_ONCE(!moved && !err);
> >   return moved ? moved : err;
>
>
> Here you convert VM_WARN_ON to VM_WARN_ON_ONCE without stating it in the
> description (including the why).

Will update in the description. These checks represent impossible conditions
and are analogous to the BUG_ON()s in move_pages(), but were added later.
So instead of BUG_ON(), they use VM_WARN_ON() as a replacement when
VM_WARN_ON_ONCE() is likely a better fit, as per other conversions.

> > @@ -1956,9 +1954,9 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
> >   for_each_vma_range(vmi, vma, end) {
> >   cond_resched();
> >
> > - BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
> > - BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
> > -       vma->vm_userfaultfd_ctx.ctx != ctx);
> > + VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async));
> > + VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx &&
> > + vma->vm_userfaultfd_ctx.ctx != ctx);
> >   WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
>
> Which raises the question, why this here should still be a WARN

After checking it, it looks like the relevant condition is enforced in the
registration path, so this can be converted to a debug check. I'll update
the patch accordingly.

However, I think the checks in userfaultfd_register() may be redundant.
First, it checks !(cur->vm_flags & VM_MAYWRITE). Then, after a hugetlb
check, it checks
((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE)), which should
never be hit.

Am I missing something?

Additionally, the comment above the first !(cur->vm_flags & VM_MAYWRITE)
check is a bit confusing. It seems to be based on the fact that file-backed
MAP_SHARED mappings without write permissions will not have VM_MAYWRITE set,
while MAP_PRIVATE mappings will always(?) have it set, but doesn't say it as
explicitly. Am I understanding this check correctly?

Thanks for the review!

> --
> Cheers,
>
> David / dhildenb
>

Re: [PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s

Posted by David Hildenbrand 7 months, 2 weeks ago

On 18.06.25 01:10, Tal Zussman wrote:
> On Tue, Jun 10, 2025 at 3:26 AM David Hildenbrand <david@redhat.com> wrote:
>>
>> On 07.06.25 08:40, Tal Zussman wrote:
>>>
>>>    if (ctx->features & UFFD_FEATURE_SIGBUS)
>>>    goto out;
>>> @@ -411,12 +411,11 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
>>>    * to be sure not to return SIGBUS erroneously on
>>>    * nowait invocations.
>>>    */
>>> - BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
>>> + VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
>>>    #ifdef CONFIG_DEBUG_VM
>>>    if (printk_ratelimit()) {
>>> - printk(KERN_WARNING
>>> -       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
>>> -       vmf->flags);
>>> + pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n",
>>> + vmf->flags);
>>
>> You didn't cover that in the patch description.
>>
>> I do wonder if we really still want the dump_stack() here and could
>> simplify to
>>
>> pr_warn_ratelimited().
>>
>> But I recall that the stack was helpful at least once for me (well, I
>> was able to reproduce and could have figured it out differently.).
> 
> I'll include this in the description as well. Given that you found the stack
> helpful before, I'll leave it as-is for now.
> 
>> [...]
>>
>>>    err = uffd_move_lock(mm, dst_start, src_start, &dst_vma, &src_vma);
>>>    if (err)
>>> @@ -1867,9 +1865,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
>>>    up_read(&ctx->map_changing_lock);
>>>    uffd_move_unlock(dst_vma, src_vma);
>>>    out:
>>> - VM_WARN_ON(moved < 0);
>>> - VM_WARN_ON(err > 0);
>>> - VM_WARN_ON(!moved && !err);
>>> + VM_WARN_ON_ONCE(moved < 0);
>>> + VM_WARN_ON_ONCE(err > 0);
>>> + VM_WARN_ON_ONCE(!moved && !err);
>>>    return moved ? moved : err;
>>
>>
>> Here you convert VM_WARN_ON to VM_WARN_ON_ONCE without stating it in the
>> description (including the why).
> 
> Will update in the description. These checks represent impossible conditions
> and are analogous to the BUG_ON()s in move_pages(), but were added later.
> So instead of BUG_ON(), they use VM_WARN_ON() as a replacement when
> VM_WARN_ON_ONCE() is likely a better fit, as per other conversions.
> 
>>> @@ -1956,9 +1954,9 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
>>>    for_each_vma_range(vmi, vma, end) {
>>>    cond_resched();
>>>
>>> - BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
>>> - BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
>>> -       vma->vm_userfaultfd_ctx.ctx != ctx);
>>> + VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async));
>>> + VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx &&
>>> + vma->vm_userfaultfd_ctx.ctx != ctx);
>>>    WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
>>
>> Which raises the question, why this here should still be a WARN
> 
> After checking it, it looks like the relevant condition is enforced in the
> registration path, so this can be converted to a debug check. I'll update
> the patch accordingly.

Sorry for the late reply. Yeah, a lot of that probably needs a cleanup.

> 
> However, I think the checks in userfaultfd_register() may be redundant.
> First, it checks !(cur->vm_flags & VM_MAYWRITE). Then, after a hugetlb
> check, it checks
> ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE)), which should
> never be hit.

Yes, likely.

> 
> Am I missing something?
> 
> Additionally, the comment above the first !(cur->vm_flags & VM_MAYWRITE)
> check is a bit confusing. It seems to be based on the fact that file-backed
> MAP_SHARED mappings without write permissions will not have VM_MAYWRITE set,
> while MAP_PRIVATE mappings will always(?) have it set, but doesn't say it as
> explicitly. Am I understanding this check correctly?

private anon mappings should always have VM_MAYWRITE. (there is no 
driver to really change that).

For other mappings (MAP_SHARED), VM_MAYWRITE is cleared for R/O files I 
think. VM_MAYWRITE really just tells you that you can do 
mprotect(PROT_WRITE) later, to effectively set VM_WRITE.

So if VM_MAYWRITE is clear, we cannot possibly write to such mappings 
later (not even after mprotect()).

-- 
Cheers,

David / dhildenb

[PATCH v2 1/4] userfaultfd: correctly prevent registering VM_DROPPABLE regions
[PATCH v2 2/4] userfaultfd: remove (VM_)BUG_ON()s
[PATCH v2 3/4] userfaultfd: prevent unregistering VMAs through a different userfaultfd
[PATCH v2 4/4] userfaultfd: remove UFFD_CLOEXEC, UFFD_NONBLOCK, and UFFD_FLAGS_SET