include/linux/hugetlb.h | 8 -------- include/linux/mm.h | 8 -------- mm/hugetlb.c | 11 ----------- mm/memory-failure.c | 8 ++++---- 4 files changed, 4 insertions(+), 31 deletions(-)
Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
can trigger a recursive spinlock self-deadlock (AA deadlock) on
hugetlb_lock when racing with a concurrent unmap:
thread#0 thread#1
-------- --------
madvise(folio, MADV_HWPOISON)
-> poisons the folio successfully
madvise(folio, MADV_HWPOISON) unmap(folio)
try_memory_failure_hugetlb
get_huge_page_for_hwpoison
spin_lock_irq(&hugetlb_lock) <- held
__get_huge_page_for_hwpoison
hugetlb_update_hwpoison()
-> MF_HUGETLB_FOLIO_PRE_POISONED
goto out:
folio_put()
refcount: 1 -> 0
free_huge_folio()
spin_lock_irqsave(&hugetlb_lock)
-> AA DEADLOCK!
The out: path in __get_huge_page_for_hwpoison() calls folio_put() to
drop the GUP reference while the hugetlb_lock is still held by the
hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap
has released the page table mapping reference, folio_put() drops the
folio refcount to zero, triggering free_huge_folio() which attempts
to re-acquire the non-recursive hugetlb_lock.
Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper
into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the
folio_put() at the out: label so the folio is always released outside
the lock.
Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
---
Changelog since v3[1]:
- update commit message to fit current issue
[1]: https://lore.kernel.org/linux-mm/20260520020128.3506168-1-mawupeng1@huawei.com/
---
include/linux/hugetlb.h | 8 --------
include/linux/mm.h | 8 --------
mm/hugetlb.c | 11 -----------
mm/memory-failure.c | 8 ++++----
4 files changed, 4 insertions(+), 31 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 93418625d3c5..059749ed519f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void folio_putback_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void hugetlb_fix_reserve_counts(struct inode *inode);
@@ -422,12 +420,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
return 0;
}
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void folio_putback_hugetlb(struct folio *folio)
{
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0b776907152e..4c4d1a61a6a7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4975,8 +4975,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
*/
extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
#else
@@ -4984,12 +4982,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
{
}
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f24bf49be047..67243923fa24 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7154,17 +7154,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
return ret;
}
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- int ret;
-
- spin_lock_irq(&hugetlb_lock);
- ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
- spin_unlock_irq(&hugetlb_lock);
- return ret;
-}
-
/**
* folio_putback_hugetlb - unisolate a hugetlb folio
* @folio: the isolated hugetlb folio
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ee42d4361309..28522180cf7f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1966,10 +1966,7 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
folio_free_raw_hwp(folio, true);
}
-/*
- * Called from hugetlb code with hugetlb_lock held.
- */
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
@@ -1977,6 +1974,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool count_increased = false;
int ret, rc;
+ spin_lock_irq(&hugetlb_lock);
if (!folio_test_hugetlb(folio)) {
ret = MF_HUGETLB_NON_HUGEPAGE;
goto out;
@@ -2013,8 +2011,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
*migratable_cleared = true;
}
+ spin_unlock_irq(&hugetlb_lock);
return ret;
out:
+ spin_unlock_irq(&hugetlb_lock);
if (count_increased)
folio_put(folio);
return ret;
--
2.43.0
On Fri, 22 May 2026 09:03:05 +0800 Wupeng Ma <mawupeng1@huawei.com> wrote:
> Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
> can trigger a recursive spinlock self-deadlock (AA deadlock) on
> hugetlb_lock when racing with a concurrent unmap:
Well we don't want that.
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
So I'll add cc:stable here.
AI review didn't like the unlocked page_folio():
https://sashiko.dev/#/patchset/20260522010305.4099834-1-mawupeng1@huawei.com
So I'll add a followup patch which addresses that (and which addresses
Miaohe's naming nit).
Please let's check this - perhaps the locking alteration isn't needed.
From: Andrew Morton <akpm@linux-foundation.org>
Subject: mm-memory-failure-fix-hugetlb_lock-aa-deadlock-in-get_huge_page_for_hwpoison-fix
Date: Fri May 22 08:44:25 PM PDT 2026
- address possible race identified by Sashiko
- s/out/out_unlock/, per Miaohe
Link: https://sashiko.dev/#/patchset/20260522010305.4099834-1-mawupeng1@huawei.com
Link: https://lore.kernel.org/f39f405e-4b4b-8f79-70fe-a2b5b62114eb@huawei.com
Cc: David Hildenbrand <david@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador (SUSE) <osalvador@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wupeng Ma <mawupeng1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/memory-failure.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-hugetlb_lock-aa-deadlock-in-get_huge_page_for_hwpoison-fix
+++ a/mm/memory-failure.c
@@ -1970,14 +1970,15 @@ static int get_huge_page_for_hwpoison(un
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
- struct folio *folio = page_folio(page);
+ struct folio *folio;
bool count_increased = false;
int ret, rc;
spin_lock_irq(&hugetlb_lock);
+ folio = page_folio(page);
if (!folio_test_hugetlb(folio)) {
ret = MF_HUGETLB_NON_HUGEPAGE;
- goto out;
+ goto out_unlock;
} else if (flags & MF_COUNT_INCREASED) {
ret = MF_HUGETLB_IN_USED;
count_increased = true;
@@ -1993,13 +1994,13 @@ static int get_huge_page_for_hwpoison(un
} else {
ret = MF_HUGETLB_RETRY;
if (!(flags & MF_NO_RETRY))
- goto out;
+ goto out_unlock;
}
rc = hugetlb_update_hwpoison(folio, page);
if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
ret = rc;
- goto out;
+ goto out_unlock;
}
/*
@@ -2013,7 +2014,7 @@ static int get_huge_page_for_hwpoison(un
spin_unlock_irq(&hugetlb_lock);
return ret;
-out:
+out_unlock:
spin_unlock_irq(&hugetlb_lock);
if (count_increased)
folio_put(folio);
_
On 2026/5/22 9:03, Wupeng Ma wrote:
> Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
> can trigger a recursive spinlock self-deadlock (AA deadlock) on
> hugetlb_lock when racing with a concurrent unmap:
>
> thread#0 thread#1
> -------- --------
> madvise(folio, MADV_HWPOISON)
> -> poisons the folio successfully
> madvise(folio, MADV_HWPOISON) unmap(folio)
> try_memory_failure_hugetlb
> get_huge_page_for_hwpoison
> spin_lock_irq(&hugetlb_lock) <- held
> __get_huge_page_for_hwpoison
> hugetlb_update_hwpoison()
> -> MF_HUGETLB_FOLIO_PRE_POISONED
> goto out:
> folio_put()
> refcount: 1 -> 0
> free_huge_folio()
> spin_lock_irqsave(&hugetlb_lock)
> -> AA DEADLOCK!
>
> The out: path in __get_huge_page_for_hwpoison() calls folio_put() to
> drop the GUP reference while the hugetlb_lock is still held by the
> hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap
> has released the page table mapping reference, folio_put() drops the
> folio refcount to zero, triggering free_huge_folio() which attempts
> to re-acquire the non-recursive hugetlb_lock.
>
> Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper
> into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the
> folio_put() at the out: label so the folio is always released outside
> the lock.
>
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
Thanks for your patch.
> ---
> Changelog since v3[1]:
> - update commit message to fit current issue
>
> [1]: https://lore.kernel.org/linux-mm/20260520020128.3506168-1-mawupeng1@huawei.com/
> ---
> include/linux/hugetlb.h | 8 --------
> include/linux/mm.h | 8 --------
> mm/hugetlb.c | 11 -----------
> mm/memory-failure.c | 8 ++++----
> 4 files changed, 4 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 93418625d3c5..059749ed519f 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
> long freed);
> bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
> int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> - bool *migratable_cleared);
> void folio_putback_hugetlb(struct folio *folio);
> void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
> void hugetlb_fix_reserve_counts(struct inode *inode);
> @@ -422,12 +420,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
> return 0;
> }
>
> -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> - bool *migratable_cleared)
> -{
> - return 0;
> -}
> -
> static inline void folio_putback_hugetlb(struct folio *folio)
> {
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0b776907152e..4c4d1a61a6a7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -4975,8 +4975,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
> */
> extern const struct attribute_group memory_failure_attr_group;
> extern void memory_failure_queue(unsigned long pfn, int flags);
> -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> - bool *migratable_cleared);
> void num_poisoned_pages_inc(unsigned long pfn);
> void num_poisoned_pages_sub(unsigned long pfn, long i);
> #else
> @@ -4984,12 +4982,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
> {
> }
>
> -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> - bool *migratable_cleared)
> -{
> - return 0;
> -}
> -
> static inline void num_poisoned_pages_inc(unsigned long pfn)
> {
> }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index f24bf49be047..67243923fa24 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -7154,17 +7154,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
> return ret;
> }
>
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> - bool *migratable_cleared)
> -{
> - int ret;
> -
> - spin_lock_irq(&hugetlb_lock);
> - ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
> - spin_unlock_irq(&hugetlb_lock);
> - return ret;
> -}
> -
> /**
> * folio_putback_hugetlb - unisolate a hugetlb folio
> * @folio: the isolated hugetlb folio
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index ee42d4361309..28522180cf7f 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1966,10 +1966,7 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
> folio_free_raw_hwp(folio, true);
> }
>
> -/*
> - * Called from hugetlb code with hugetlb_lock held.
> - */
> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> bool *migratable_cleared)
> {
> struct page *page = pfn_to_page(pfn);
> @@ -1977,6 +1974,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> bool count_increased = false;
> int ret, rc;
>
> + spin_lock_irq(&hugetlb_lock);
> if (!folio_test_hugetlb(folio)) {
> ret = MF_HUGETLB_NON_HUGEPAGE;
> goto out;
> @@ -2013,8 +2011,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> *migratable_cleared = true;
> }
>
> + spin_unlock_irq(&hugetlb_lock);
> return ret;
> out:
It might be better to rename out: as out_unlock. But that's trivial.
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Thanks.
.
On 5/22/2026 9:03 AM, Wupeng Ma wrote:
> Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
> can trigger a recursive spinlock self-deadlock (AA deadlock) on
> hugetlb_lock when racing with a concurrent unmap:
>
> thread#0 thread#1
> -------- --------
> madvise(folio, MADV_HWPOISON)
> -> poisons the folio successfully
> madvise(folio, MADV_HWPOISON) unmap(folio)
> try_memory_failure_hugetlb
> get_huge_page_for_hwpoison
> spin_lock_irq(&hugetlb_lock) <- held
> __get_huge_page_for_hwpoison
> hugetlb_update_hwpoison()
> -> MF_HUGETLB_FOLIO_PRE_POISONED
> goto out:
> folio_put()
> refcount: 1 -> 0
> free_huge_folio()
> spin_lock_irqsave(&hugetlb_lock)
> -> AA DEADLOCK!
>
> The out: path in __get_huge_page_for_hwpoison() calls folio_put() to
> drop the GUP reference while the hugetlb_lock is still held by the
> hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap
> has released the page table mapping reference, folio_put() drops the
> folio refcount to zero, triggering free_huge_folio() which attempts
> to re-acquire the non-recursive hugetlb_lock.
>
> Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper
> into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the
> folio_put() at the out: label so the folio is always released outside
> the lock.
>
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> On May 22, 2026, at 09:03, Wupeng Ma <mawupeng1@huawei.com> wrote:
>
> Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
> can trigger a recursive spinlock self-deadlock (AA deadlock) on
> hugetlb_lock when racing with a concurrent unmap:
>
> thread#0 thread#1
> -------- --------
> madvise(folio, MADV_HWPOISON)
> -> poisons the folio successfully
> madvise(folio, MADV_HWPOISON) unmap(folio)
> try_memory_failure_hugetlb
> get_huge_page_for_hwpoison
> spin_lock_irq(&hugetlb_lock) <- held
> __get_huge_page_for_hwpoison
> hugetlb_update_hwpoison()
> -> MF_HUGETLB_FOLIO_PRE_POISONED
> goto out:
> folio_put()
> refcount: 1 -> 0
> free_huge_folio()
> spin_lock_irqsave(&hugetlb_lock)
> -> AA DEADLOCK!
>
> The out: path in __get_huge_page_for_hwpoison() calls folio_put() to
> drop the GUP reference while the hugetlb_lock is still held by the
> hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap
> has released the page table mapping reference, folio_put() drops the
> folio refcount to zero, triggering free_huge_folio() which attempts
> to re-acquire the non-recursive hugetlb_lock.
>
> Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper
> into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the
> folio_put() at the out: label so the folio is always released outside
> the lock.
>
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Thanks.
On Fri, May 22, 2026 at 09:03:05AM +0800, Wupeng Ma wrote:
> Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page
> can trigger a recursive spinlock self-deadlock (AA deadlock) on
> hugetlb_lock when racing with a concurrent unmap:
>
> thread#0 thread#1
> -------- --------
> madvise(folio, MADV_HWPOISON)
> -> poisons the folio successfully
> madvise(folio, MADV_HWPOISON) unmap(folio)
> try_memory_failure_hugetlb
> get_huge_page_for_hwpoison
> spin_lock_irq(&hugetlb_lock) <- held
> __get_huge_page_for_hwpoison
> hugetlb_update_hwpoison()
> -> MF_HUGETLB_FOLIO_PRE_POISONED
> goto out:
> folio_put()
> refcount: 1 -> 0
> free_huge_folio()
> spin_lock_irqsave(&hugetlb_lock)
> -> AA DEADLOCK!
>
> The out: path in __get_huge_page_for_hwpoison() calls folio_put() to
> drop the GUP reference while the hugetlb_lock is still held by the
> hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap
> has released the page table mapping reference, folio_put() drops the
> folio refcount to zero, triggering free_huge_folio() which attempts
> to re-acquire the non-recursive hugetlb_lock.
>
> Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper
> into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the
> folio_put() at the out: label so the folio is always released outside
> the lock.
>
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Wupeng Ma <mawupeng1@huawei.com>
I was also able to hit this with two threads and adding some delays.
Acked-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
--
Oscar Salvador
SUSE Labs
© 2016 - 2026 Red Hat, Inc.