[PATCH v3 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter

Naoya Horiguchi posted 4 patches 3 years, 6 months ago
There is a newer version of this series
[PATCH v3 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter
Posted by Naoya Horiguchi 3 years, 6 months ago
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

Currently PageHWPoison flag does not behave well when experiencing memory
hotremove/hotplug.  Any data field in struct page is unreliable when the
associated memory is offlined, and the current mechanism can't tell whether
a memory section is onlined because a new memory devices is installed or
because previous failed offline operations are undone.  Especially if
there's a hwpoisoned memory, it's unclear what the best option is.

So introduce a new mechanism to make struct memory_block remember that
a memory block has hwpoisoned memory inside it. And make any online event
fail if the onlined memory block contains hwpoison.  struct memory_block
is freed and reallocated over ACPI-based hotremove/hotplug, but not over
sysfs-based hotremove/hotplug.  So it's desirable to implement hwpoison
counter on this struct.

Note that clear_hwpoisoned_pages() is relocated to be called earlier than
now, just before unregistering struct memory_block.  Otherwise, the
per-memory_block hwpoison counter is freed and we fail to adjust global
hwpoison counter properly.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
 drivers/base/memory.c  | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/memory.h |  3 +++
 include/linux/mm.h     |  8 ++++++++
 mm/internal.h          |  8 --------
 mm/memory-failure.c    | 31 ++++++++++---------------------
 mm/sparse.c            |  2 --
 6 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9aa0da991cfb..c9bde4c4ffdf 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -183,6 +183,9 @@ static int memory_block_online(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
+	if (atomic_long_read(&mem->nr_hwpoison))
+		return -EHWPOISON;
+
 	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
 				  start_pfn, nr_pages);
 
@@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
 			continue;
+		clear_hwpoisoned_pages(atomic_long_read(&mem->nr_hwpoison));
 		unregister_memory_block_under_nodes(mem);
 		remove_memory_block(mem);
 	}
@@ -1164,3 +1168,35 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+unsigned long memblk_nr_poison(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		return atomic_long_read(&mem->nr_hwpoison);
+	return 0;
+}
+
+#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index aa619464a1df..74e6b3ad947f 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -85,6 +85,9 @@ struct memory_block {
 	unsigned long nr_vmemmap_pages;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
+#ifdef CONFIG_MEMORY_FAILURE
+	atomic_long_t nr_hwpoison;
+#endif
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2bb5d1596041..2fe42bb9a517 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3280,6 +3280,10 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 extern void num_poisoned_pages_inc(unsigned long pfn);
+extern void memblk_nr_poison_inc(unsigned long pfn);
+extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+extern unsigned long memblk_nr_poison(unsigned long pfn);
+extern void clear_hwpoisoned_pages(long nr_poison);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 {
@@ -3289,6 +3293,10 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
+
+static inline void clear_hwpoisoned_pages(long nr_poison)
+{
+}
 #endif
 
 #ifndef arch_memory_failure
diff --git a/mm/internal.h b/mm/internal.h
index b3002e03c28f..42ba8b96cab5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value;
 extern u64 hwpoison_filter_memcg;
 extern u32 hwpoison_filter_enable;
 
-#ifdef CONFIG_MEMORY_FAILURE
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
-#else
-static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-}
-#endif
-
 extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a069d43bc87f..03479895086d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,14 +74,17 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
-static inline void num_poisoned_pages_inc(unsigned long pfn)
+void num_poisoned_pages_inc(unsigned long pfn)
 {
 	atomic_long_inc(&num_poisoned_pages);
+	memblk_nr_poison_inc(pfn);
 }
 
 static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 {
 	atomic_long_sub(i, &num_poisoned_pages);
+	if (pfn != -1UL)
+		memblk_nr_poison_sub(pfn, i);
 }
 
 /*
@@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
+		/*
+		 * TODO: per-memory_block counter might break when the page
+		 * size to be unpoisoned is larger than a memory_block.
+		 */
 		num_poisoned_pages_sub(pfn, count);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
@@ -2618,25 +2625,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 	return ret;
 }
 
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
+void clear_hwpoisoned_pages(long nr_poison)
 {
-	int i, total = 0;
-
-	/*
-	 * A further optimization is to have per section refcounted
-	 * num_poisoned_pages.  But that would need more space per memmap, so
-	 * for now just do a quick global check to speed up this routine in the
-	 * absence of bad pages.
-	 */
-	if (atomic_long_read(&num_poisoned_pages) == 0)
-		return;
-
-	for (i = 0; i < nr_pages; i++) {
-		if (PageHWPoison(&memmap[i])) {
-			total++;
-			ClearPageHWPoison(&memmap[i]);
-		}
-	}
-	if (total)
-		num_poisoned_pages_sub(total);
+	num_poisoned_pages_sub(-1UL, nr_poison);
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index e5a8a3a0edd7..2779b419ef2a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
 		unsigned long nr_pages, unsigned long map_offset,
 		struct vmem_altmap *altmap)
 {
-	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
-			nr_pages - map_offset);
 	section_deactivate(pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-- 
2.25.1
[PATCH v4 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by Naoya Horiguchi 3 years, 6 months ago
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

Currently PageHWPoison flag does not behave well when experiencing memory
hotremove/hotplug.  Any data field in struct page is unreliable when the
associated memory is offlined, and the current mechanism can't tell whether
a memory section is onlined because a new memory devices is installed or
because previous failed offline operations are undone.  Especially if
there's a hwpoisoned memory, it's unclear what the best option is.

So introduce a new mechanism to make struct memory_block remember that
a memory block has hwpoisoned memory inside it. And make any online event
fail if the onlined memory block contains hwpoison.  struct memory_block
is freed and reallocated over ACPI-based hotremove/hotplug, but not over
sysfs-based hotremove/hotplug.  So it's desirable to implement hwpoison
counter on this struct.

Note that clear_hwpoisoned_pages() is relocated to be called earlier than
now, just before unregistering struct memory_block.  Otherwise, the
per-memory_block hwpoison counter is freed and we fail to adjust global
hwpoison counter properly.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
ChangeLog v3 -> v4:
- fixed build error (https://lore.kernel.org/linux-mm/202209231134.tnhKHRfg-lkp@intel.com/)
  by using memblk_nr_poison() to access to the member ->nr_hwpoison
---
 drivers/base/memory.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/memory.h |  3 +++
 include/linux/mm.h     |  8 ++++++++
 mm/internal.h          |  8 --------
 mm/memory-failure.c    | 31 ++++++++++---------------------
 mm/sparse.c            |  2 --
 6 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9aa0da991cfb..f470bbfc68d0 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -183,6 +183,9 @@ static int memory_block_online(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
+	if (memblk_nr_poison(start_pfn))
+		return -EHWPOISON;
+
 	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
 				  start_pfn, nr_pages);
 
@@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
 			continue;
+		clear_hwpoisoned_pages(memblk_nr_poison(start));
 		unregister_memory_block_under_nodes(mem);
 		remove_memory_block(mem);
 	}
@@ -1164,3 +1168,40 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+unsigned long memblk_nr_poison(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		return atomic_long_read(&mem->nr_hwpoison);
+	return 0;
+}
+
+#else
+unsigned long memblk_nr_poison(unsigned long pfn)
+{
+	return 0;
+}
+#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index aa619464a1df..74e6b3ad947f 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -85,6 +85,9 @@ struct memory_block {
 	unsigned long nr_vmemmap_pages;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
+#ifdef CONFIG_MEMORY_FAILURE
+	atomic_long_t nr_hwpoison;
+#endif
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2bb5d1596041..5445943bbb4b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3280,6 +3280,9 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 extern void num_poisoned_pages_inc(unsigned long pfn);
+extern void memblk_nr_poison_inc(unsigned long pfn);
+extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+extern void clear_hwpoisoned_pages(long nr_poison);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 {
@@ -3289,7 +3292,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
+
+static inline void clear_hwpoisoned_pages(long nr_poison)
+{
+}
 #endif
+extern unsigned long memblk_nr_poison(unsigned long pfn);
 
 #ifndef arch_memory_failure
 static inline int arch_memory_failure(unsigned long pfn, int flags)
diff --git a/mm/internal.h b/mm/internal.h
index b3002e03c28f..42ba8b96cab5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value;
 extern u64 hwpoison_filter_memcg;
 extern u32 hwpoison_filter_enable;
 
-#ifdef CONFIG_MEMORY_FAILURE
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
-#else
-static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-}
-#endif
-
 extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a069d43bc87f..03479895086d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,14 +74,17 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
-static inline void num_poisoned_pages_inc(unsigned long pfn)
+void num_poisoned_pages_inc(unsigned long pfn)
 {
 	atomic_long_inc(&num_poisoned_pages);
+	memblk_nr_poison_inc(pfn);
 }
 
 static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 {
 	atomic_long_sub(i, &num_poisoned_pages);
+	if (pfn != -1UL)
+		memblk_nr_poison_sub(pfn, i);
 }
 
 /*
@@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
+		/*
+		 * TODO: per-memory_block counter might break when the page
+		 * size to be unpoisoned is larger than a memory_block.
+		 */
 		num_poisoned_pages_sub(pfn, count);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
@@ -2618,25 +2625,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 	return ret;
 }
 
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
+void clear_hwpoisoned_pages(long nr_poison)
 {
-	int i, total = 0;
-
-	/*
-	 * A further optimization is to have per section refcounted
-	 * num_poisoned_pages.  But that would need more space per memmap, so
-	 * for now just do a quick global check to speed up this routine in the
-	 * absence of bad pages.
-	 */
-	if (atomic_long_read(&num_poisoned_pages) == 0)
-		return;
-
-	for (i = 0; i < nr_pages; i++) {
-		if (PageHWPoison(&memmap[i])) {
-			total++;
-			ClearPageHWPoison(&memmap[i]);
-		}
-	}
-	if (total)
-		num_poisoned_pages_sub(total);
+	num_poisoned_pages_sub(-1UL, nr_poison);
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index e5a8a3a0edd7..2779b419ef2a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
 		unsigned long nr_pages, unsigned long map_offset,
 		struct vmem_altmap *altmap)
 {
-	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
-			nr_pages - map_offset);
 	section_deactivate(pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-- 
2.37.3.518.g79f2338b37
[PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by Naoya Horiguchi 3 years, 6 months ago
There seems another build error in aarch64 with MEMORY_HOTPLUG disabled.
https://lore.kernel.org/lkml/20220923110144.GA1413812@ik1-406-35019.vs.sakura.ne.jp/
, so let me revise this patch again to handle it.

- Naoya Horiguchi

---
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Fri, 23 Sep 2022 22:51:20 +0900
Subject: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter

Currently PageHWPoison flag does not behave well when experiencing memory
hotremove/hotplug.  Any data field in struct page is unreliable when the
associated memory is offlined, and the current mechanism can't tell whether
a memory section is onlined because a new memory devices is installed or
because previous failed offline operations are undone.  Especially if
there's a hwpoisoned memory, it's unclear what the best option is.

So introduce a new mechanism to make struct memory_block remember that
a memory block has hwpoisoned memory inside it. And make any online event
fail if the onlined memory block contains hwpoison.  struct memory_block
is freed and reallocated over ACPI-based hotremove/hotplug, but not over
sysfs-based hotremove/hotplug.  So it's desirable to implement hwpoison
counter on this struct.

Note that clear_hwpoisoned_pages() is relocated to be called earlier than
now, just before unregistering struct memory_block.  Otherwise, the
per-memory_block hwpoison counter is freed and we fail to adjust global
hwpoison counter properly.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: kernel test robot <lkp@intel.com>
---
ChangeLog v4 -> v5:
- add Reported-by of lkp bot,
- check both CONFIG_MEMORY_FAILURE and CONFIG_MEMORY_HOTPLUG in introduced #ifdefs,
  intending to fix "undefined reference" errors in aarch64.

ChangeLog v3 -> v4:
- fix build error (https://lore.kernel.org/linux-mm/202209231134.tnhKHRfg-lkp@intel.com/)
  by using memblk_nr_poison() to access to the member ->nr_hwpoison
---
 drivers/base/memory.c  | 34 ++++++++++++++++++++++++++++++++++
 include/linux/memory.h |  3 +++
 include/linux/mm.h     | 24 ++++++++++++++++++++++++
 mm/internal.h          |  8 --------
 mm/memory-failure.c    | 31 ++++++++++---------------------
 mm/sparse.c            |  2 --
 6 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9aa0da991cfb..99e0e789616c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -183,6 +183,9 @@ static int memory_block_online(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
+	if (memblk_nr_poison(start_pfn))
+		return -EHWPOISON;
+
 	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
 				  start_pfn, nr_pages);
 
@@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
 			continue;
+		clear_hwpoisoned_pages(memblk_nr_poison(start));
 		unregister_memory_block_under_nodes(mem);
 		remove_memory_block(mem);
 	}
@@ -1164,3 +1168,33 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 	}
 	return ret;
 }
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+unsigned long memblk_nr_poison(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		return atomic_long_read(&mem->nr_hwpoison);
+	return 0;
+}
+#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index aa619464a1df..ad8cd9bb3239 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -85,6 +85,9 @@ struct memory_block {
 	unsigned long nr_vmemmap_pages;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+	atomic_long_t nr_hwpoison;
+#endif
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2bb5d1596041..936864d6f8be 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3280,6 +3280,7 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 extern void num_poisoned_pages_inc(unsigned long pfn);
+extern void clear_hwpoisoned_pages(long nr_poison);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 {
@@ -3289,6 +3290,29 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
+
+static inline void clear_hwpoisoned_pages(long nr_poison)
+{
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+extern void memblk_nr_poison_inc(unsigned long pfn);
+extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+extern unsigned long memblk_nr_poison(unsigned long pfn);
+#else
+static inline void memblk_nr_poison_inc(unsigned long pfn)
+{
+}
+
+static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+}
+
+static inline unsigned long memblk_nr_poison(unsigned long pfn)
+{
+	return 0;
+}
 #endif
 
 #ifndef arch_memory_failure
diff --git a/mm/internal.h b/mm/internal.h
index b3002e03c28f..42ba8b96cab5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value;
 extern u64 hwpoison_filter_memcg;
 extern u32 hwpoison_filter_enable;
 
-#ifdef CONFIG_MEMORY_FAILURE
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
-#else
-static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-}
-#endif
-
 extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a069d43bc87f..03479895086d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,14 +74,17 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
-static inline void num_poisoned_pages_inc(unsigned long pfn)
+void num_poisoned_pages_inc(unsigned long pfn)
 {
 	atomic_long_inc(&num_poisoned_pages);
+	memblk_nr_poison_inc(pfn);
 }
 
 static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 {
 	atomic_long_sub(i, &num_poisoned_pages);
+	if (pfn != -1UL)
+		memblk_nr_poison_sub(pfn, i);
 }
 
 /*
@@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
+		/*
+		 * TODO: per-memory_block counter might break when the page
+		 * size to be unpoisoned is larger than a memory_block.
+		 */
 		num_poisoned_pages_sub(pfn, count);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
@@ -2618,25 +2625,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 	return ret;
 }
 
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
+void clear_hwpoisoned_pages(long nr_poison)
 {
-	int i, total = 0;
-
-	/*
-	 * A further optimization is to have per section refcounted
-	 * num_poisoned_pages.  But that would need more space per memmap, so
-	 * for now just do a quick global check to speed up this routine in the
-	 * absence of bad pages.
-	 */
-	if (atomic_long_read(&num_poisoned_pages) == 0)
-		return;
-
-	for (i = 0; i < nr_pages; i++) {
-		if (PageHWPoison(&memmap[i])) {
-			total++;
-			ClearPageHWPoison(&memmap[i]);
-		}
-	}
-	if (total)
-		num_poisoned_pages_sub(total);
+	num_poisoned_pages_sub(-1UL, nr_poison);
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index e5a8a3a0edd7..2779b419ef2a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
 		unsigned long nr_pages, unsigned long map_offset,
 		struct vmem_altmap *altmap)
 {
-	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
-			nr_pages - map_offset);
 	section_deactivate(pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-- 
2.37.3.518.g79f2338b37
Re: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by David Hildenbrand 3 years, 6 months ago
>   /*
> @@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn)
>   unlock_mutex:
>   	mutex_unlock(&mf_mutex);
>   	if (!ret || freeit) {
> +		/*
> +		 * TODO: per-memory_block counter might break when the page
> +		 * size to be unpoisoned is larger than a memory_block.
> +		 */

Hmm, but that happens easily e.g., with 1 GiB hugetlb page and 128 MiB 
memory section/block size. What would be the right thing to do here? The 
TODO should rather spell that out instead of just stating the problem.

-- 
Thanks,

David / dhildenb
Re: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by HORIGUCHI NAOYA(堀口 直也) 3 years, 5 months ago
On Mon, Sep 26, 2022 at 10:05:05AM +0200, David Hildenbrand wrote:
> >   /*
> > @@ -2414,6 +2417,10 @@ int unpoison_memory(unsigned long pfn)
> >   unlock_mutex:
> >   	mutex_unlock(&mf_mutex);
> >   	if (!ret || freeit) {
> > +		/*
> > +		 * TODO: per-memory_block counter might break when the page
> > +		 * size to be unpoisoned is larger than a memory_block.
> > +		 */
> 
> Hmm, but that happens easily e.g., with 1 GiB hugetlb page and 128 MiB
> memory section/block size. What would be the right thing to do here? The
> TODO should rather spell that out instead of just stating the problem.

What should we need here is to cancel the per-memory_block hwpoison counts
in each memory block associated with the hugepage to be unpoisoned.
I found that it can be done with additional several lines of code, so
v6 will contain them.  Then, this TODO comment is no longer needed.

Thanks,
Naoya Horiguchi
Re: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by Miaohe Lin 3 years, 6 months ago
On 2022/9/23 22:12, Naoya Horiguchi wrote:
> There seems another build error in aarch64 with MEMORY_HOTPLUG disabled.
> https://lore.kernel.org/lkml/20220923110144.GA1413812@ik1-406-35019.vs.sakura.ne.jp/
> , so let me revise this patch again to handle it.
> 
> - Naoya Horiguchi
> 
> ---
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> Date: Fri, 23 Sep 2022 22:51:20 +0900
> Subject: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter
> 
> Currently PageHWPoison flag does not behave well when experiencing memory
> hotremove/hotplug.  Any data field in struct page is unreliable when the
> associated memory is offlined, and the current mechanism can't tell whether
> a memory section is onlined because a new memory devices is installed or
> because previous failed offline operations are undone.  Especially if
> there's a hwpoisoned memory, it's unclear what the best option is.
> 
> So introduce a new mechanism to make struct memory_block remember that
> a memory block has hwpoisoned memory inside it. And make any online event
> fail if the onlined memory block contains hwpoison.  struct memory_block
> is freed and reallocated over ACPI-based hotremove/hotplug, but not over
> sysfs-based hotremove/hotplug.  So it's desirable to implement hwpoison
> counter on this struct.
> 
> Note that clear_hwpoisoned_pages() is relocated to be called earlier than
> now, just before unregistering struct memory_block.  Otherwise, the
> per-memory_block hwpoison counter is freed and we fail to adjust global
> hwpoison counter properly.
> 
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> Reported-by: kernel test robot <lkp@intel.com>

LGTM with some nits below. Thanks.

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

> ---
> ChangeLog v4 -> v5:
> - add Reported-by of lkp bot,
> - check both CONFIG_MEMORY_FAILURE and CONFIG_MEMORY_HOTPLUG in introduced #ifdefs,
>   intending to fix "undefined reference" errors in aarch64.
> 
> ChangeLog v3 -> v4:
> - fix build error (https://lore.kernel.org/linux-mm/202209231134.tnhKHRfg-lkp@intel.com/)
>   by using memblk_nr_poison() to access to the member ->nr_hwpoison
> ---
>  drivers/base/memory.c  | 34 ++++++++++++++++++++++++++++++++++
>  include/linux/memory.h |  3 +++
>  include/linux/mm.h     | 24 ++++++++++++++++++++++++
>  mm/internal.h          |  8 --------
>  mm/memory-failure.c    | 31 ++++++++++---------------------
>  mm/sparse.c            |  2 --
>  6 files changed, 71 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 9aa0da991cfb..99e0e789616c 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -183,6 +183,9 @@ static int memory_block_online(struct memory_block *mem)
>  	struct zone *zone;
>  	int ret;
>  
> +	if (memblk_nr_poison(start_pfn))
> +		return -EHWPOISON;
> +
>  	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
>  				  start_pfn, nr_pages);
>  
> @@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
>  		mem = find_memory_block_by_id(block_id);
>  		if (WARN_ON_ONCE(!mem))
>  			continue;
> +		clear_hwpoisoned_pages(memblk_nr_poison(start));

clear_hwpoisoned_pages seems not a proper name now? PageHWPoison info is kept now. But this should be trivial.

>  		unregister_memory_block_under_nodes(mem);
>  		remove_memory_block(mem);
>  	}
> @@ -1164,3 +1168,33 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
>  	}
>  	return ret;
>  }
> +
> +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
> +void memblk_nr_poison_inc(unsigned long pfn)
> +{
> +	const unsigned long block_id = pfn_to_block_id(pfn);
> +	struct memory_block *mem = find_memory_block_by_id(block_id);
> +
> +	if (mem)
> +		atomic_long_inc(&mem->nr_hwpoison);
> +}
> +
> +void memblk_nr_poison_sub(unsigned long pfn, long i)
> +{
> +	const unsigned long block_id = pfn_to_block_id(pfn);
> +	struct memory_block *mem = find_memory_block_by_id(block_id);
> +
> +	if (mem)
> +		atomic_long_sub(i, &mem->nr_hwpoison);
> +}
> +
> +unsigned long memblk_nr_poison(unsigned long pfn)

memblk_nr_poison() is only used inside this file. Make it static?

Thanks,
Miaohe Lin
Re: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter counter
Posted by HORIGUCHI NAOYA(堀口 直也) 3 years, 5 months ago
On Sat, Sep 24, 2022 at 08:27:35PM +0800, Miaohe Lin wrote:
> On 2022/9/23 22:12, Naoya Horiguchi wrote:
> > There seems another build error in aarch64 with MEMORY_HOTPLUG disabled.
> > https://lore.kernel.org/lkml/20220923110144.GA1413812@ik1-406-35019.vs.sakura.ne.jp/
> > , so let me revise this patch again to handle it.
> > 
> > - Naoya Horiguchi
> > 
> > ---
> > From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > Date: Fri, 23 Sep 2022 22:51:20 +0900
> > Subject: [PATCH v5 4/4] mm/hwpoison: introduce per-memory_block hwpoison counter
> > 
> > Currently PageHWPoison flag does not behave well when experiencing memory
> > hotremove/hotplug.  Any data field in struct page is unreliable when the
> > associated memory is offlined, and the current mechanism can't tell whether
> > a memory section is onlined because a new memory devices is installed or
> > because previous failed offline operations are undone.  Especially if
> > there's a hwpoisoned memory, it's unclear what the best option is.
> > 
> > So introduce a new mechanism to make struct memory_block remember that
> > a memory block has hwpoisoned memory inside it. And make any online event
> > fail if the onlined memory block contains hwpoison.  struct memory_block
> > is freed and reallocated over ACPI-based hotremove/hotplug, but not over
> > sysfs-based hotremove/hotplug.  So it's desirable to implement hwpoison
> > counter on this struct.
> > 
> > Note that clear_hwpoisoned_pages() is relocated to be called earlier than
> > now, just before unregistering struct memory_block.  Otherwise, the
> > per-memory_block hwpoison counter is freed and we fail to adjust global
> > hwpoison counter properly.
> > 
> > Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > Reported-by: kernel test robot <lkp@intel.com>
> 
> LGTM with some nits below. Thanks.
> 
> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thank you.

> 
> > ---
> > ChangeLog v4 -> v5:
> > - add Reported-by of lkp bot,
> > - check both CONFIG_MEMORY_FAILURE and CONFIG_MEMORY_HOTPLUG in introduced #ifdefs,
> >   intending to fix "undefined reference" errors in aarch64.
> > 
> > ChangeLog v3 -> v4:
> > - fix build error (https://lore.kernel.org/linux-mm/202209231134.tnhKHRfg-lkp@intel.com/)
> >   by using memblk_nr_poison() to access to the member ->nr_hwpoison
> > ---
> >  drivers/base/memory.c  | 34 ++++++++++++++++++++++++++++++++++
> >  include/linux/memory.h |  3 +++
> >  include/linux/mm.h     | 24 ++++++++++++++++++++++++
> >  mm/internal.h          |  8 --------
> >  mm/memory-failure.c    | 31 ++++++++++---------------------
> >  mm/sparse.c            |  2 --
> >  6 files changed, 71 insertions(+), 31 deletions(-)
> > 
> > diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> > index 9aa0da991cfb..99e0e789616c 100644
> > --- a/drivers/base/memory.c
> > +++ b/drivers/base/memory.c
> > @@ -183,6 +183,9 @@ static int memory_block_online(struct memory_block *mem)
> >  	struct zone *zone;
> >  	int ret;
> >  
> > +	if (memblk_nr_poison(start_pfn))
> > +		return -EHWPOISON;
> > +
> >  	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
> >  				  start_pfn, nr_pages);
> >  
> > @@ -864,6 +867,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
> >  		mem = find_memory_block_by_id(block_id);
> >  		if (WARN_ON_ONCE(!mem))
> >  			continue;
> > +		clear_hwpoisoned_pages(memblk_nr_poison(start));
> 
> clear_hwpoisoned_pages seems not a proper name now? PageHWPoison info is kept now. But this should be trivial.
> 

Right, I think that the name num_poisoned_pages_sub() is clear enough, so
I'll open this function.

> >  		unregister_memory_block_under_nodes(mem);
> >  		remove_memory_block(mem);
> >  	}
> > @@ -1164,3 +1168,33 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
> >  	}
> >  	return ret;
> >  }
> > +
> > +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
> > +void memblk_nr_poison_inc(unsigned long pfn)
> > +{
> > +	const unsigned long block_id = pfn_to_block_id(pfn);
> > +	struct memory_block *mem = find_memory_block_by_id(block_id);
> > +
> > +	if (mem)
> > +		atomic_long_inc(&mem->nr_hwpoison);
> > +}
> > +
> > +void memblk_nr_poison_sub(unsigned long pfn, long i)
> > +{
> > +	const unsigned long block_id = pfn_to_block_id(pfn);
> > +	struct memory_block *mem = find_memory_block_by_id(block_id);
> > +
> > +	if (mem)
> > +		atomic_long_sub(i, &mem->nr_hwpoison);
> > +}
> > +
> > +unsigned long memblk_nr_poison(unsigned long pfn)
> 
> memblk_nr_poison() is only used inside this file. Make it static?

Thanks, I'll add it.

Thanks,
Naoya Horiguchi