[PATCH v3 1/5] mm,hugetlb: Change mechanism to detect a COW on private mapping

Oscar Salvador posted 5 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v3 1/5] mm,hugetlb: Change mechanism to detect a COW on private mapping
Posted by Oscar Salvador 3 months, 1 week ago
hugetlb_wp() checks whether the process is trying to COW on a private mapping
in order to know whether the reservation for that address was already consumed
or not.
If it was consumed and we are the ownner of the mapping, the folio will have to
be unmapped from the other processes.

Currently, that check is done by looking up the folio in the pagecache and
compare it to the folio which is mapped in our pagetables.
If it differs, it means we already mapped it privately before, consuming a
reservation on the way.
All we are interested in is whether the mapped folio is anonymous, so we can
simplify and check for that instead.

Also, we transition from a trylock to a folio_lock, since the former was only
needed when hugetlb_fault() had to lock both folios, in order to avoid deadlock.

Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
Reported-by: Gavin Guo <gavinguo@igalia.com>
Closes: https://lore.kernel.org/lkml/20250513093448.592150-1-gavinguo@igalia.com/
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Suggested-by: Peter Xu <peterx@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
---
 mm/hugetlb.c | 82 +++++++++++++++-------------------------------------
 1 file changed, 24 insertions(+), 58 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8746ed2fec13..87f2d8acdc8a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6152,8 +6152,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  * cannot race with other handlers or page migration.
  * Keep the pte_same checks anyway to make transition from the mutex easier.
  */
-static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
-		       struct vm_fault *vmf)
+static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
@@ -6215,16 +6214,17 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 		       PageAnonExclusive(&old_folio->page), &old_folio->page);
 
 	/*
-	 * If the process that created a MAP_PRIVATE mapping is about to
-	 * perform a COW due to a shared page count, attempt to satisfy
-	 * the allocation without using the existing reserves. The pagecache
-	 * page is used to determine if the reserve at this address was
-	 * consumed or not. If reserves were used, a partial faulted mapping
-	 * at the time of fork() could consume its reserves on COW instead
-	 * of the full address range.
+	 * If the process that created a MAP_PRIVATE mapping is about to perform
+	 * a COW due to a shared page count, attempt to satisfy the allocation
+	 * without using the existing reserves.
+	 * In order to determine where this is a COW on a MAP_PRIVATE mapping it
+	 * is enough to check whether the old_folio is anonymous. This means that
+	 * the reserve for this address was consumed. If reserves were used, a
+	 * partial faulted mapping at the fime of fork() could consume its reserves
+	 * on COW instead of the full address range.
 	 */
 	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
-			old_folio != pagecache_folio)
+	    folio_test_anon(old_folio))
 		cow_from_owner = true;
 
 	folio_get(old_folio);
@@ -6603,7 +6603,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 	hugetlb_count_add(pages_per_huge_page(h), mm);
 	if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
-		ret = hugetlb_wp(folio, vmf);
+		ret = hugetlb_wp(vmf);
 	}
 
 	spin_unlock(vmf->ptl);
@@ -6671,10 +6671,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	vm_fault_t ret;
 	u32 hash;
 	struct folio *folio = NULL;
-	struct folio *pagecache_folio = NULL;
 	struct hstate *h = hstate_vma(vma);
 	struct address_space *mapping;
-	int need_wait_lock = 0;
 	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address & huge_page_mask(h),
@@ -6769,8 +6767,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * If we are going to COW/unshare the mapping later, we examine the
 	 * pending reservations for this page now. This will ensure that any
 	 * allocations necessary to record that reservation occur outside the
-	 * spinlock. Also lookup the pagecache page now as it is used to
-	 * determine if a reservation has been consumed.
+	 * spinlock.
 	 */
 	if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
 	    !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) {
@@ -6780,11 +6777,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		/* Just decrements count, does not deallocate */
 		vma_end_reservation(h, vma, vmf.address);
-
-		pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
-							     vmf.pgoff);
-		if (IS_ERR(pagecache_folio))
-			pagecache_folio = NULL;
 	}
 
 	vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
@@ -6798,10 +6790,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	    (flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
 		if (!userfaultfd_wp_async(vma)) {
 			spin_unlock(vmf.ptl);
-			if (pagecache_folio) {
-				folio_unlock(pagecache_folio);
-				folio_put(pagecache_folio);
-			}
 			hugetlb_vma_unlock_read(vma);
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 			return handle_userfault(&vmf, VM_UFFD_WP);
@@ -6813,24 +6801,20 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Fallthrough to CoW */
 	}
 
-	/*
-	 * hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and
-	 * pagecache_folio, so here we need take the former one
-	 * when folio != pagecache_folio or !pagecache_folio.
-	 */
-	folio = page_folio(pte_page(vmf.orig_pte));
-	if (folio != pagecache_folio)
-		if (!folio_trylock(folio)) {
-			need_wait_lock = 1;
-			goto out_ptl;
-		}
-
-	folio_get(folio);
-
 	if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
 		if (!huge_pte_write(vmf.orig_pte)) {
-			ret = hugetlb_wp(pagecache_folio, &vmf);
-			goto out_put_page;
+			/* hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) */
+			folio = page_folio(pte_page(vmf.orig_pte));
+			folio_get(folio);
+			spin_unlock(vmf.ptl);
+			folio_lock(folio);
+			spin_lock(vmf.ptl);
+			if (likely(pte_same(vmf.orig_pte,
+				      huge_ptep_get(mm, vmf.address, vmf.pte))))
+				ret = hugetlb_wp(&vmf);
+			folio_unlock(folio);
+			folio_put(folio);
+			goto out_ptl;
 		} else if (likely(flags & FAULT_FLAG_WRITE)) {
 			vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
 		}
@@ -6839,17 +6823,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (huge_ptep_set_access_flags(vma, vmf.address, vmf.pte, vmf.orig_pte,
 						flags & FAULT_FLAG_WRITE))
 		update_mmu_cache(vma, vmf.address, vmf.pte);
-out_put_page:
-	if (folio != pagecache_folio)
-		folio_unlock(folio);
-	folio_put(folio);
 out_ptl:
 	spin_unlock(vmf.ptl);
-
-	if (pagecache_folio) {
-		folio_unlock(pagecache_folio);
-		folio_put(pagecache_folio);
-	}
 out_mutex:
 	hugetlb_vma_unlock_read(vma);
 
@@ -6861,15 +6836,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma_end_read(vma);
 
 	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-	/*
-	 * Generally it's safe to hold refcount during waiting page lock. But
-	 * here we just wait to defer the next page fault to avoid busy loop and
-	 * the page is not used after unlocked before returning from the current
-	 * page fault. So we are safe from accessing freed page, even if we wait
-	 * here without taking refcount.
-	 */
-	if (need_wait_lock)
-		folio_wait_locked(folio);
 	return ret;
 }
 
-- 
2.50.0
Re: [PATCH v3 1/5] mm,hugetlb: Change mechanism to detect a COW on private mapping
Posted by Gavin Guo 3 months, 1 week ago
Hi Oscar,

On 6/27/25 18:29, Oscar Salvador wrote:
> hugetlb_wp() checks whether the process is trying to COW on a private mapping
> in order to know whether the reservation for that address was already consumed
> or not.
> If it was consumed and we are the ownner of the mapping, the folio will have to
> be unmapped from the other processes.
> 
> Currently, that check is done by looking up the folio in the pagecache and
> compare it to the folio which is mapped in our pagetables.
> If it differs, it means we already mapped it privately before, consuming a
> reservation on the way.
> All we are interested in is whether the mapped folio is anonymous, so we can
> simplify and check for that instead.
> 
> Also, we transition from a trylock to a folio_lock, since the former was only
> needed when hugetlb_fault() had to lock both folios, in order to avoid deadlock.
> 
> Fixes: 40549ba8f8e0 ("hugetlb: use new vma_lock for pmd sharing synchronization")
> Reported-by: Gavin Guo <gavinguo@igalia.com>
> Closes: https://lore.kernel.org/lkml/20250513093448.592150-1-gavinguo@igalia.com/
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Suggested-by: Peter Xu <peterx@redhat.com>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
>   mm/hugetlb.c | 82 +++++++++++++++-------------------------------------
>   1 file changed, 24 insertions(+), 58 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 8746ed2fec13..87f2d8acdc8a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6152,8 +6152,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
>    * cannot race with other handlers or page migration.
>    * Keep the pte_same checks anyway to make transition from the mutex easier.
>    */
> -static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
> -		       struct vm_fault *vmf)
> +static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
>   {
>   	struct vm_area_struct *vma = vmf->vma;
>   	struct mm_struct *mm = vma->vm_mm;
> @@ -6215,16 +6214,17 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
>   		       PageAnonExclusive(&old_folio->page), &old_folio->page);
>   
>   	/*
> -	 * If the process that created a MAP_PRIVATE mapping is about to
> -	 * perform a COW due to a shared page count, attempt to satisfy
> -	 * the allocation without using the existing reserves. The pagecache
> -	 * page is used to determine if the reserve at this address was
> -	 * consumed or not. If reserves were used, a partial faulted mapping
> -	 * at the time of fork() could consume its reserves on COW instead
> -	 * of the full address range.
> +	 * If the process that created a MAP_PRIVATE mapping is about to perform
> +	 * a COW due to a shared page count, attempt to satisfy the allocation
> +	 * without using the existing reserves.
> +	 * In order to determine where this is a COW on a MAP_PRIVATE mapping it
> +	 * is enough to check whether the old_folio is anonymous. This means that
> +	 * the reserve for this address was consumed. If reserves were used, a
> +	 * partial faulted mapping at the fime of fork() could consume its reserves
> +	 * on COW instead of the full address range.
>   	 */
>   	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
> -			old_folio != pagecache_folio)
> +	    folio_test_anon(old_folio))
>   		cow_from_owner = true;
>   
>   	folio_get(old_folio);
> @@ -6603,7 +6603,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
>   	hugetlb_count_add(pages_per_huge_page(h), mm);
>   	if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
>   		/* Optimization, do the COW without a second fault */
> -		ret = hugetlb_wp(folio, vmf);
> +		ret = hugetlb_wp(vmf);
>   	}
>   
>   	spin_unlock(vmf->ptl);
> @@ -6671,10 +6671,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   	vm_fault_t ret;
>   	u32 hash;
>   	struct folio *folio = NULL;
> -	struct folio *pagecache_folio = NULL;
>   	struct hstate *h = hstate_vma(vma);
>   	struct address_space *mapping;
> -	int need_wait_lock = 0;
>   	struct vm_fault vmf = {
>   		.vma = vma,
>   		.address = address & huge_page_mask(h),
> @@ -6769,8 +6767,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   	 * If we are going to COW/unshare the mapping later, we examine the
>   	 * pending reservations for this page now. This will ensure that any
>   	 * allocations necessary to record that reservation occur outside the
> -	 * spinlock. Also lookup the pagecache page now as it is used to
> -	 * determine if a reservation has been consumed.
> +	 * spinlock.
>   	 */
>   	if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
>   	    !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) {
> @@ -6780,11 +6777,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   		}
>   		/* Just decrements count, does not deallocate */
>   		vma_end_reservation(h, vma, vmf.address);
> -
> -		pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
> -							     vmf.pgoff);
> -		if (IS_ERR(pagecache_folio))
> -			pagecache_folio = NULL;
>   	}
>   
>   	vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
> @@ -6798,10 +6790,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   	    (flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
>   		if (!userfaultfd_wp_async(vma)) {
>   			spin_unlock(vmf.ptl);
> -			if (pagecache_folio) {
> -				folio_unlock(pagecache_folio);
> -				folio_put(pagecache_folio);
> -			}
>   			hugetlb_vma_unlock_read(vma);
>   			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
>   			return handle_userfault(&vmf, VM_UFFD_WP);
> @@ -6813,24 +6801,20 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   		/* Fallthrough to CoW */
>   	}
>   
> -	/*
> -	 * hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and
> -	 * pagecache_folio, so here we need take the former one
> -	 * when folio != pagecache_folio or !pagecache_folio.
> -	 */
> -	folio = page_folio(pte_page(vmf.orig_pte));
> -	if (folio != pagecache_folio)
> -		if (!folio_trylock(folio)) {
> -			need_wait_lock = 1;
> -			goto out_ptl;
> -		}
> -
> -	folio_get(folio);
> -
>   	if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
>   		if (!huge_pte_write(vmf.orig_pte)) {
> -			ret = hugetlb_wp(pagecache_folio, &vmf);
> -			goto out_put_page;
> +			/* hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) */
> +			folio = page_folio(pte_page(vmf.orig_pte));
> +			folio_get(folio);
> +			spin_unlock(vmf.ptl);
> +			folio_lock(folio);
> +			spin_lock(vmf.ptl);
> +			if (likely(pte_same(vmf.orig_pte,
> +				      huge_ptep_get(mm, vmf.address, vmf.pte))))
> +				ret = hugetlb_wp(&vmf);
> +			folio_unlock(folio);
> +			folio_put(folio);
> +			goto out_ptl;
>   		} else if (likely(flags & FAULT_FLAG_WRITE)) {
>   			vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
>   		}
> @@ -6839,17 +6823,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   	if (huge_ptep_set_access_flags(vma, vmf.address, vmf.pte, vmf.orig_pte,
>   						flags & FAULT_FLAG_WRITE))
>   		update_mmu_cache(vma, vmf.address, vmf.pte);
> -out_put_page:
> -	if (folio != pagecache_folio)
> -		folio_unlock(folio);
> -	folio_put(folio);
>   out_ptl:
>   	spin_unlock(vmf.ptl);
> -
> -	if (pagecache_folio) {
> -		folio_unlock(pagecache_folio);
> -		folio_put(pagecache_folio);
> -	}
>   out_mutex:
>   	hugetlb_vma_unlock_read(vma);
>   
> @@ -6861,15 +6836,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>   		vma_end_read(vma);
>   
>   	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
> -	/*
> -	 * Generally it's safe to hold refcount during waiting page lock. But
> -	 * here we just wait to defer the next page fault to avoid busy loop and
> -	 * the page is not used after unlocked before returning from the current
> -	 * page fault. So we are safe from accessing freed page, even if we wait
> -	 * here without taking refcount.
> -	 */
> -	if (need_wait_lock)
> -		folio_wait_locked(folio);
>   	return ret;
>   }
>   

Sorry for the late response. I've changed to a different project and 
won't have time to work on this bug anymore. But, finally, I find time 
on the weekend to conduct the testing. It appears that the ABBA deadlock 
still exists. Please refer to the log:

https://drive.google.com/file/d/1xep11ULPoB8Ttv0C0VxDoL7FPdOOccR7/view

The reproducer is here:
https://drive.google.com/file/d/1DVRnIW-vSayU5J1re9Ct_br3jJQU6Vpb/view?usp=drive_link

This callstack is particularly suspicious:
[  858.623348][   T34] INFO: task repro_20250402_:6302 blocked for more 
than 143 seconds.
[  858.624082][   T34]       Not tainted 6.16.0-rc3+ #37
[  858.624546][   T34] "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  858.625308][   T34] task:repro_20250402_ state:D stack:28288 pid:6302 
  tgid:6272  ppid:3494   task_flags:0x400040 flags:0x00004006
[  858.626361][   T34] Call Trace:
[  858.626660][   T34]  <TASK>
[  858.626932][   T34]  __schedule+0x17ac/0x4f90
[  858.627347][   T34]  ? lockdep_unlock+0x74/0x100
[  858.627807][   T34]  ? schedule+0x158/0x330
[  858.628211][   T34]  ? __pfx___schedule+0x10/0x10
[  858.628660][   T34]  ? lock_acquire+0xf5/0x290
[  858.629104][   T34]  ? schedule+0x96/0x330
[  858.629500][   T34]  schedule+0x158/0x330
[  858.629899][   T34]  io_schedule+0x92/0x110
[  858.630294][   T34]  folio_wait_bit_common+0x69a/0xba0
[  858.630792][   T34]  ? __pfx_folio_wait_bit_common+0x10/0x10
[  858.631337][   T34]  ? __pfx_wake_page_function+0x10/0x10
[  858.631837][   T34]  ? do_raw_spin_lock+0x126/0x2a0
[  858.632291][   T34]  ? lock_acquire+0xf5/0x290
[  858.632711][   T34]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  858.633225][   T34]  hugetlb_fault+0x204e/0x2b40
[  858.633705][   T34]  ? __pfx_hugetlb_fault+0x10/0x10
[  858.634203][   T34]  handle_mm_fault+0x17b3/0x1c80
[  858.634658][   T34]  ? handle_mm_fault+0xdb/0x1c80
[  858.635102][   T34]  ? lock_vma_under_rcu+0xfe/0x770
[  858.635573][   T34]  ? lock_vma_under_rcu+0x6d7/0x770
[  858.636046][   T34]  ? __pfx_handle_mm_fault+0x10/0x10
[  858.636524][   T34]  ? __pfx_lock_vma_under_rcu+0x10/0x10
[  858.637042][   T34]  do_user_addr_fault+0xace/0x1490
[  858.637517][   T34]  ? __pfx_do_user_addr_fault+0x10/0x10
[  858.638033][   T34]  ? trace_page_fault_user+0xb9/0x260
[  858.638525][   T34]  exc_page_fault+0x75/0xe0
[  858.638943][   T34]  asm_exc_page_fault+0x26/0x30

I've a quick glance at the point in hugetlb_fault:
$ addr2line hugetlb_fault+0x204e/0x2b40 -e vmlinux -f -i
spin_lock
/home/gavin/os/work_kernel/./include/linux/spinlock.h:351
hugetlb_fault
/home/gavin/os/work_kernel/mm/hugetlb.c:6801

6787         if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
6788                 if (!huge_pte_write(vmf.orig_pte)) {
6789                         /*
6790                          * Anonymous folios need to be lock since 
hugetlb_wp()
6791                          * checks whether we can re-use the folio 
exclusively
6792                          * for us in case we are the only user of it.
6793                          */
6794                         folio = page_folio(pte_page(vmf.orig_pte));
6795                         folio_get(folio);
6796                         if (!folio_test_anon(folio))
6797                                 goto lock_unneeded;
6798
6799                         spin_unlock(vmf.ptl);
6800                         folio_lock(folio);
6801                         spin_lock(vmf.ptl);
6802                         if (unlikely(!pte_same(vmf.orig_pte, 
huge_ptep_get(mm,
6803                                                vmf.address, vmf.pte))))
6804                                 goto unlock_folio;
6805 lock_unneeded:
6806                         ret = hugetlb_wp(&vmf);
6807 unlock_folio:
6808                         if (folio_test_anon(folio))
6809                                 folio_unlock(folio);
6810                         folio_put(folio);
6811                         goto out_ptl;

It appears that folio_lock is the culprit and interplays with the 
mutex_lock:

[  858.533925][   T34] INFO: task repro_20250402_:6273 blocked for more 
than 143 seconds.
[  858.535565][   T34]       Not tainted 6.16.0-rc3+ #37
[  858.536520][   T34] "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  858.538045][   T34] task:repro_20250402_ state:D stack:25856 pid:6273 
  tgid:6272  ppid:3494   task_flags:0x400040 flags:0x00004006
[  858.540214][   T34] Call Trace:
[  858.540857][   T34]  <TASK>
[  858.541413][   T34]  __schedule+0x17ac/0x4f90
[  858.542298][   T34]  ? schedule+0x158/0x330
[  858.543116][   T34]  ? __pfx___schedule+0x10/0x10
[  858.544970][   T34]  ? lock_acquire+0xf5/0x290
[  858.545864][   T34]  ? schedule+0x96/0x330
[  858.546652][   T34]  ? schedule+0x96/0x330
[  858.547466][   T34]  schedule+0x158/0x330
[  858.548258][   T34]  schedule_preempt_disabled+0x15/0x30
[  858.549258][   T34]  __mutex_lock+0x61d/0xdb0
[  858.550099][   T34]  ? __mutex_lock+0x417/0xdb0
[  858.550988][   T34]  ? hugetlb_wp+0xfe2/0x3220
[  858.551829][   T34]  ? __pfx___mutex_lock+0x10/0x10
[  858.552754][   T34]  ? up_write+0x132/0x420
[  858.553555][   T34]  ? vma_interval_tree_iter_next+0x1a4/0x300
[  858.554751][   T34]  hugetlb_wp+0xfe2/0x3220
[  858.555650][   T34]  ? __pfx_hugetlb_wp+0x10/0x10
[  858.556542][   T34]  ? do_raw_spin_lock+0x126/0x2a0
[  858.557456][   T34]  ? lock_acquire+0xf5/0x290
[  858.558317][   T34]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  858.559321][   T34]  hugetlb_fault+0x20b6/0x2b40
[  858.560245][   T34]  ? __pfx_hugetlb_fault+0x10/0x10
[  858.561225][   T34]  ? mt_find+0x15a/0x5f0
[  858.562026][   T34]  handle_mm_fault+0x17b3/0x1c80