Now that HWPoison page(s) within HugeTLB page will be rejected by
buddy allocator during dissolve_free_hugetlb_folio(), there is no
need to drain_all_pages() and take_page_off_buddy() anymore. In fact,
calling take_page_off_buddy() after dissolve_free_hugetlb_folio()
succeeded returns false, making caller think page_handl_poion() failed.
On the other hand, for hardware corrupted pages in buddy allocator,
take_page_off_buddy() is still a must-have.
Given hugepage and free buddy page should be treated differently,
refactor page_handle_poison() and __page_handle_poison():
- __page_handle_poison() is unwind into page_handle_poison().
- Callers of page_handle_poison() also need to explicitly tell if
page is HugeTLB hugepage or free buddy page.
- Add helper hugepage_handle_poison() for several existing HugeTLB
specific callsites.
Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
---
mm/memory-failure.c | 84 ++++++++++++++++++++++-----------------------
1 file changed, 41 insertions(+), 43 deletions(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d204de6c9792a..1fdaee1e48bb8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -162,54 +162,48 @@ static struct rb_root_cached pfn_space_itree = RB_ROOT_CACHED;
static DEFINE_MUTEX(pfn_space_lock);
-/*
- * Return values:
- * 1: the page is dissolved (if needed) and taken off from buddy,
- * 0: the page is dissolved (if needed) and not taken off from buddy,
- * < 0: failed to dissolve.
+/**
+ * Handle the HugeTLB hugepage that @page belongs to. Return values:
+ * = 0: the hugepage is free hugepage and is dissolved.
+ * < 0: hugepage is in-use or failed to dissolve.
*/
-static int __page_handle_poison(struct page *page)
+static int hugepage_handle_poison(struct page *page)
{
- int ret;
+ return dissolve_free_hugetlb_folio(page_folio(page));
+}
+
+/**
+ * Helper at the end of handling @page having hardware errors.
+ * @huge: @page is part of a HugeTLB hugepage.
+ * @free: @page is free buddy page.
+ * @release: memory-failure module should release a pending refcount.
+ */
+static bool page_handle_poison(struct page *page, bool huge, bool free,
+ bool release)
+{
+ int ret = 0;
/*
- * zone_pcp_disable() can't be used here. It will
- * hold pcp_batch_high_lock and dissolve_free_hugetlb_folio() might hold
- * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
- * optimization is enabled. This will break current lock dependency
- * chain and leads to deadlock.
- * Disabling pcp before dissolving the page was a deterministic
- * approach because we made sure that those pages cannot end up in any
- * PCP list. Draining PCP lists expels those pages to the buddy system,
- * but nothing guarantees that those pages do not get back to a PCP
- * queue if we need to refill those.
+ * Buddy allocator will exclude the HWPoison page after hugepage
+ * is successfully dissolved.
*/
- ret = dissolve_free_hugetlb_folio(page_folio(page));
- if (!ret) {
+ if (huge)
+ ret = hugepage_handle_poison(page);
+
+ if (free) {
drain_all_pages(page_zone(page));
- ret = take_page_off_buddy(page);
+ ret = take_page_off_buddy(page) ? 0 : -1;
}
- return ret;
-}
-
-static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
-{
- if (hugepage_or_freepage) {
+ if ((huge || free) && ret < 0)
/*
- * Doing this check for free pages is also fine since
- * dissolve_free_hugetlb_folio() returns 0 for non-hugetlb folios as well.
+ * We could fail to take off the target page from buddy
+ * for example due to racy page allocation, but that's
+ * acceptable because soft-offlined page is not broken
+ * and if someone really want to use it, they should
+ * take it.
*/
- if (__page_handle_poison(page) <= 0)
- /*
- * We could fail to take off the target page from buddy
- * for example due to racy page allocation, but that's
- * acceptable because soft-offlined page is not broken
- * and if someone really want to use it, they should
- * take it.
- */
- return false;
- }
+ return false;
SetPageHWPoison(page);
if (release)
@@ -1174,7 +1168,7 @@ static int me_huge_page(struct page_state *ps, struct page *p)
* subpages.
*/
folio_put(folio);
- if (__page_handle_poison(p) > 0) {
+ if (!hugepage_handle_poison(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
} else {
@@ -2067,7 +2061,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
*/
if (res == 0) {
folio_unlock(folio);
- if (__page_handle_poison(p) > 0) {
+ if (!hugepage_handle_poison(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
} else {
@@ -2815,7 +2809,7 @@ static int soft_offline_in_use_page(struct page *page)
if (ret) {
pr_info("%#lx: invalidated\n", pfn);
- page_handle_poison(page, false, true);
+ page_handle_poison(page, false, false, true);
return 0;
}
@@ -2836,7 +2830,7 @@ static int soft_offline_in_use_page(struct page *page)
if (!ret) {
bool release = !huge;
- if (!page_handle_poison(page, huge, release))
+ if (!page_handle_poison(page, huge, false, release))
ret = -EBUSY;
} else {
if (!list_empty(&pagelist))
@@ -2884,6 +2878,8 @@ int soft_offline_page(unsigned long pfn, int flags)
{
int ret;
bool try_again = true;
+ bool huge;
+ bool free;
struct page *page;
if (!pfn_valid(pfn)) {
@@ -2929,7 +2925,9 @@ int soft_offline_page(unsigned long pfn, int flags)
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
- if (!page_handle_poison(page, true, false)) {
+ huge = folio_test_hugetlb(page_folio(page));
+ free = is_free_buddy_page(page);
+ if (!page_handle_poison(page, huge, free, false)) {
if (try_again) {
try_again = false;
flags &= ~MF_COUNT_INCREASED;
--
2.52.0.457.g6b5491de43-goog
On 2026/1/12 8:49, Jiaqi Yan wrote:
> Now that HWPoison page(s) within HugeTLB page will be rejected by
> buddy allocator during dissolve_free_hugetlb_folio(), there is no
> need to drain_all_pages() and take_page_off_buddy() anymore. In fact,
> calling take_page_off_buddy() after dissolve_free_hugetlb_folio()
> succeeded returns false, making caller think page_handl_poion() failed.
s/page_handl_poion/page_handle_poison/
>
> On the other hand, for hardware corrupted pages in buddy allocator,
> take_page_off_buddy() is still a must-have.
>
> Given hugepage and free buddy page should be treated differently,
> refactor page_handle_poison() and __page_handle_poison():
>
> - __page_handle_poison() is unwind into page_handle_poison().
>
> - Callers of page_handle_poison() also need to explicitly tell if
> page is HugeTLB hugepage or free buddy page.
>
> - Add helper hugepage_handle_poison() for several existing HugeTLB
> specific callsites.
>
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> ---
> mm/memory-failure.c | 84 ++++++++++++++++++++++-----------------------
> 1 file changed, 41 insertions(+), 43 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index d204de6c9792a..1fdaee1e48bb8 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -162,54 +162,48 @@ static struct rb_root_cached pfn_space_itree = RB_ROOT_CACHED;
>
> static DEFINE_MUTEX(pfn_space_lock);
>
> -/*
> - * Return values:
> - * 1: the page is dissolved (if needed) and taken off from buddy,
> - * 0: the page is dissolved (if needed) and not taken off from buddy,
> - * < 0: failed to dissolve.
> +/**
> + * Handle the HugeTLB hugepage that @page belongs to. Return values:
> + * = 0: the hugepage is free hugepage and is dissolved.
In soft offline scene, dissolve_free_hugetlb_folio would return 0 when the page becomes
a normal page due to race.
> + * < 0: hugepage is in-use or failed to dissolve.
> */
> -static int __page_handle_poison(struct page *page)
> +static int hugepage_handle_poison(struct page *page)
> {
> - int ret;
> + return dissolve_free_hugetlb_folio(page_folio(page));
> +}
> +
> +/**
> + * Helper at the end of handling @page having hardware errors.
> + * @huge: @page is part of a HugeTLB hugepage.
> + * @free: @page is free buddy page.
> + * @release: memory-failure module should release a pending refcount.
> + */
> +static bool page_handle_poison(struct page *page, bool huge, bool free,
> + bool release)
> +{
> + int ret = 0;
>
> /*
> - * zone_pcp_disable() can't be used here. It will
> - * hold pcp_batch_high_lock and dissolve_free_hugetlb_folio() might hold
> - * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
> - * optimization is enabled. This will break current lock dependency
> - * chain and leads to deadlock.
> - * Disabling pcp before dissolving the page was a deterministic
> - * approach because we made sure that those pages cannot end up in any
> - * PCP list. Draining PCP lists expels those pages to the buddy system,
> - * but nothing guarantees that those pages do not get back to a PCP
> - * queue if we need to refill those.
> + * Buddy allocator will exclude the HWPoison page after hugepage
> + * is successfully dissolved.
> */
> - ret = dissolve_free_hugetlb_folio(page_folio(page));
> - if (!ret) {
> + if (huge)
> + ret = hugepage_handle_poison(page);
> +
> + if (free) {
Nit: huge and free won't be both true. So we could write it as:
if (huge) {
...
} else if (free) {
> drain_all_pages(page_zone(page));
> - ret = take_page_off_buddy(page);
> + ret = take_page_off_buddy(page) ? 0 : -1;
> }
>
> - return ret;
> -}
> -
> -static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
> -{
> - if (hugepage_or_freepage) {
> + if ((huge || free) && ret < 0)
Nit: ret won't be <0 if both huge and free are false. So I think we might simplify it as:
if (ret < 0)
> /*
> - * Doing this check for free pages is also fine since
> - * dissolve_free_hugetlb_folio() returns 0 for non-hugetlb folios as well.
> + * We could fail to take off the target page from buddy
> + * for example due to racy page allocation, but that's
> + * acceptable because soft-offlined page is not broken
> + * and if someone really want to use it, they should
> + * take it.
> */
> - if (__page_handle_poison(page) <= 0)
> - /*
> - * We could fail to take off the target page from buddy
> - * for example due to racy page allocation, but that's
> - * acceptable because soft-offlined page is not broken
> - * and if someone really want to use it, they should
> - * take it.
> - */
> - return false;
> - }
> + return false;
>
> SetPageHWPoison(page);
> if (release)
> @@ -1174,7 +1168,7 @@ static int me_huge_page(struct page_state *ps, struct page *p)
> * subpages.
> */
> folio_put(folio);
> - if (__page_handle_poison(p) > 0) {
> + if (!hugepage_handle_poison(p)) {
> page_ref_inc(p);
> res = MF_RECOVERED;
> } else {
> @@ -2067,7 +2061,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
> */
> if (res == 0) {
> folio_unlock(folio);
> - if (__page_handle_poison(p) > 0) {
> + if (!hugepage_handle_poison(p)) {
> page_ref_inc(p);
> res = MF_RECOVERED;
> } else {
> @@ -2815,7 +2809,7 @@ static int soft_offline_in_use_page(struct page *page)
>
> if (ret) {
> pr_info("%#lx: invalidated\n", pfn);
> - page_handle_poison(page, false, true);
> + page_handle_poison(page, false, false, true);
> return 0;
> }
>
> @@ -2836,7 +2830,7 @@ static int soft_offline_in_use_page(struct page *page)
> if (!ret) {
> bool release = !huge;
>
> - if (!page_handle_poison(page, huge, release))
> + if (!page_handle_poison(page, huge, false, release))
This might not work for soft offline. PageHWPoison is not yet set so folio_clear_hugetlb_hwpoison
won't be called when dissolve hugetlb hugepages...
> ret = -EBUSY;
> } else {
> if (!list_empty(&pagelist))
> @@ -2884,6 +2878,8 @@ int soft_offline_page(unsigned long pfn, int flags)
> {
> int ret;
> bool try_again = true;
> + bool huge;
> + bool free;
> struct page *page;
>
> if (!pfn_valid(pfn)) {
> @@ -2929,7 +2925,9 @@ int soft_offline_page(unsigned long pfn, int flags)
> if (ret > 0) {
> ret = soft_offline_in_use_page(page);
> } else if (ret == 0) {
> - if (!page_handle_poison(page, true, false)) {
> + huge = folio_test_hugetlb(page_folio(page));
folio_test_hugetlb check is racy because there's no guarantee that hugetlb hugepage won't
be dissolved before calling page_handle_poison. That will lead to problem...
soft_offline_page
folio_test_hugetlb -- true now
page_handle_poison
/* Hugepage is dissolved somewhere. */
hugepage_handle_poison -- return 0 because page is normal page or free buddy page.
SetPageHWPoison(page);
page_ref_inc(page); -- refcnt is increased while page might be on buddy...
> + free = is_free_buddy_page(page);
> + if (!page_handle_poison(page, huge, free, false)) {
We assume free is always true due to ret is 0. So we can write it as:
if (!page_handle_poison(page, huge, true, false)) {
> if (try_again) {
> try_again = false;
> flags &= ~MF_COUNT_INCREASED;
>
Thanks.
.
On Wed, Jan 14, 2026 at 7:41 PM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2026/1/12 8:49, Jiaqi Yan wrote:
> > Now that HWPoison page(s) within HugeTLB page will be rejected by
> > buddy allocator during dissolve_free_hugetlb_folio(), there is no
> > need to drain_all_pages() and take_page_off_buddy() anymore. In fact,
> > calling take_page_off_buddy() after dissolve_free_hugetlb_folio()
> > succeeded returns false, making caller think page_handl_poion() failed.
>
> s/page_handl_poion/page_handle_poison/
>
> >
> > On the other hand, for hardware corrupted pages in buddy allocator,
> > take_page_off_buddy() is still a must-have.
> >
> > Given hugepage and free buddy page should be treated differently,
> > refactor page_handle_poison() and __page_handle_poison():
> >
> > - __page_handle_poison() is unwind into page_handle_poison().
> >
> > - Callers of page_handle_poison() also need to explicitly tell if
> > page is HugeTLB hugepage or free buddy page.
> >
> > - Add helper hugepage_handle_poison() for several existing HugeTLB
> > specific callsites.
> >
> > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> > ---
> > mm/memory-failure.c | 84 ++++++++++++++++++++++-----------------------
> > 1 file changed, 41 insertions(+), 43 deletions(-)
> >
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index d204de6c9792a..1fdaee1e48bb8 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -162,54 +162,48 @@ static struct rb_root_cached pfn_space_itree = RB_ROOT_CACHED;
> >
> > static DEFINE_MUTEX(pfn_space_lock);
> >
> > -/*
> > - * Return values:
> > - * 1: the page is dissolved (if needed) and taken off from buddy,
> > - * 0: the page is dissolved (if needed) and not taken off from buddy,
> > - * < 0: failed to dissolve.
> > +/**
> > + * Handle the HugeTLB hugepage that @page belongs to. Return values:
> > + * = 0: the hugepage is free hugepage and is dissolved.
>
> In soft offline scene, dissolve_free_hugetlb_folio would return 0 when the page becomes
> a normal page due to race.
>
> > + * < 0: hugepage is in-use or failed to dissolve.
> > */
> > -static int __page_handle_poison(struct page *page)
> > +static int hugepage_handle_poison(struct page *page)
> > {
> > - int ret;
> > + return dissolve_free_hugetlb_folio(page_folio(page));
> > +}
> > +
> > +/**
> > + * Helper at the end of handling @page having hardware errors.
> > + * @huge: @page is part of a HugeTLB hugepage.
> > + * @free: @page is free buddy page.
> > + * @release: memory-failure module should release a pending refcount.
> > + */
> > +static bool page_handle_poison(struct page *page, bool huge, bool free,
> > + bool release)
> > +{
> > + int ret = 0;
> >
> > /*
> > - * zone_pcp_disable() can't be used here. It will
> > - * hold pcp_batch_high_lock and dissolve_free_hugetlb_folio() might hold
> > - * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
> > - * optimization is enabled. This will break current lock dependency
> > - * chain and leads to deadlock.
> > - * Disabling pcp before dissolving the page was a deterministic
> > - * approach because we made sure that those pages cannot end up in any
> > - * PCP list. Draining PCP lists expels those pages to the buddy system,
> > - * but nothing guarantees that those pages do not get back to a PCP
> > - * queue if we need to refill those.
> > + * Buddy allocator will exclude the HWPoison page after hugepage
> > + * is successfully dissolved.
> > */
> > - ret = dissolve_free_hugetlb_folio(page_folio(page));
> > - if (!ret) {
> > + if (huge)
> > + ret = hugepage_handle_poison(page);
> > +
> > + if (free) {
>
> Nit: huge and free won't be both true. So we could write it as:
> if (huge) {
> ...
> } else if (free) {
>
> > drain_all_pages(page_zone(page));
> > - ret = take_page_off_buddy(page);
> > + ret = take_page_off_buddy(page) ? 0 : -1;
> > }
> >
> > - return ret;
> > -}
> > -
> > -static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
> > -{
> > - if (hugepage_or_freepage) {
> > + if ((huge || free) && ret < 0)
>
> Nit: ret won't be <0 if both huge and free are false. So I think we might simplify it as:
>
> if (ret < 0)
>
> > /*
> > - * Doing this check for free pages is also fine since
> > - * dissolve_free_hugetlb_folio() returns 0 for non-hugetlb folios as well.
> > + * We could fail to take off the target page from buddy
> > + * for example due to racy page allocation, but that's
> > + * acceptable because soft-offlined page is not broken
> > + * and if someone really want to use it, they should
> > + * take it.
> > */
> > - if (__page_handle_poison(page) <= 0)
> > - /*
> > - * We could fail to take off the target page from buddy
> > - * for example due to racy page allocation, but that's
> > - * acceptable because soft-offlined page is not broken
> > - * and if someone really want to use it, they should
> > - * take it.
> > - */
> > - return false;
> > - }
> > + return false;
> >
> > SetPageHWPoison(page);
> > if (release)
> > @@ -1174,7 +1168,7 @@ static int me_huge_page(struct page_state *ps, struct page *p)
> > * subpages.
> > */
> > folio_put(folio);
> > - if (__page_handle_poison(p) > 0) {
> > + if (!hugepage_handle_poison(p)) {
> > page_ref_inc(p);
> > res = MF_RECOVERED;
> > } else {
> > @@ -2067,7 +2061,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
> > */
> > if (res == 0) {
> > folio_unlock(folio);
> > - if (__page_handle_poison(p) > 0) {
> > + if (!hugepage_handle_poison(p)) {
> > page_ref_inc(p);
> > res = MF_RECOVERED;
> > } else {
> > @@ -2815,7 +2809,7 @@ static int soft_offline_in_use_page(struct page *page)
> >
> > if (ret) {
> > pr_info("%#lx: invalidated\n", pfn);
> > - page_handle_poison(page, false, true);
> > + page_handle_poison(page, false, false, true);
> > return 0;
> > }
> >
> > @@ -2836,7 +2830,7 @@ static int soft_offline_in_use_page(struct page *page)
> > if (!ret) {
> > bool release = !huge;
> >
> > - if (!page_handle_poison(page, huge, release))
> > + if (!page_handle_poison(page, huge, false, release))
>
> This might not work for soft offline. PageHWPoison is not yet set so folio_clear_hugetlb_hwpoison
> won't be called when dissolve hugetlb hugepages...
Thanks for pointing this problem (and the later problem) out, Miaohe!
You are right, and I think the reason for both problems is, soft
offline is a totally different case than memory_failure(): there is no
PG_HWPoison until the end of page_handle_poison(). So
free_has_hwpoisoned() can't help dissolve_free_hugetlb_folio() to
exclude the page that triggered soft_offline_page().
For free_has_hwpoisoned(), I should only change the call sites in the
memory_failure() path, and leave
soft_offline_page()/page_handle_poison()/__ __page_handle_poison()
alone. Looking at the current code, HWPoison hugetlb pages happens to
be handled by __page_handle_poison() in either me_huge_page() or
try_memory_failure_hugetlb(). So I think I can replace them with a new
function that doesn't do take_page_off_buddy(), something like:
/*
+ * Only for a HugeTLB page being handled by memory_failure(). The key
+ * difference to soft_offline() is that, no HWPoison subpage will make
+ * into buddy allocator after a successful dissolve_free_hugetlb_folio(),
+ * so take_page_off_buddy() is unnecessary.
+ */
+static int __hugepage_handle_poison(struct page *page)
+{
+ struct folio *folio = page_folio(page);
+
+ VM_WARN_ON_FOLIO(!folio_test_hwpoison(folio), folio);
+
+ /*
+ * Can't use dissolve_free_hugetlb_folio() without a reliable
+ * raw_hwp_list telling which subpage is HWPoison.
+ */
+ if (folio_test_hugetlb_raw_hwp_unreliable(folio))
+ /* raw_hwp_list becomes unreliable when kmalloc() fails. */
+ return -ENOMEM;
+
+ return dissolve_free_hugetlb_folio(folio);
+}
+
On the other hand, just leave __page_handle_poison() and
page_handle_poison() as is to do take_page_off_buddy() for soft
offline case.
>
> > ret = -EBUSY;
> > } else {
> > if (!list_empty(&pagelist))
> > @@ -2884,6 +2878,8 @@ int soft_offline_page(unsigned long pfn, int flags)
> > {
> > int ret;
> > bool try_again = true;
> > + bool huge;
> > + bool free;
> > struct page *page;
> >
> > if (!pfn_valid(pfn)) {
> > @@ -2929,7 +2925,9 @@ int soft_offline_page(unsigned long pfn, int flags)
> > if (ret > 0) {
> > ret = soft_offline_in_use_page(page);
> > } else if (ret == 0) {
> > - if (!page_handle_poison(page, true, false)) {
> > + huge = folio_test_hugetlb(page_folio(page));
>
> folio_test_hugetlb check is racy because there's no guarantee that hugetlb hugepage won't
> be dissolved before calling page_handle_poison. That will lead to problem...
>
> soft_offline_page
> folio_test_hugetlb -- true now
> page_handle_poison
> /* Hugepage is dissolved somewhere. */
> hugepage_handle_poison -- return 0 because page is normal page or free buddy page.
> SetPageHWPoison(page);
> page_ref_inc(page); -- refcnt is increased while page might be on buddy...
>
> > + free = is_free_buddy_page(page);
> > + if (!page_handle_poison(page, huge, free, false)) {
>
> We assume free is always true due to ret is 0. So we can write it as:
> if (!page_handle_poison(page, huge, true, false)) {
>
> > if (try_again) {
> > try_again = false;
> > flags &= ~MF_COUNT_INCREASED;
> >
>
> Thanks.
> .
>
© 2016 - 2026 Red Hat, Inc.