Perform real-time memory usage monitoring on the slub page
allocation paths, ie, kmalloc_large_alloced and alloc_slab_page.
When the usage exceeds the set threshole value, the panic function
will be triggered.
Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com>
---
mm/Kconfig | 11 ++++++++
mm/slub.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 87 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig
index 09aebca1cae3..60cf72d4f0da 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -255,6 +255,17 @@ config SLUB_TINY
If unsure, say N.
+config SLUB_LEAK_PANIC
+ bool "Trigger panic when slub leaks"
+ default y
+ help
+ Detect slub leaks by monitoring its usage in real time on the page
+ allocation path of the slub. When the slub occupancy exceeds the
+ user-set value, it is considered that the slub is leaking at this
+ time, and a panic operation will be triggered immediately. Uers
+ can enable and set leak threshold by using the kernel command line
+ parameters "slub.leak_panic" and "slub.leak_panic_threshold".
+
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
diff --git a/mm/slub.c b/mm/slub.c
index 21f71cb6cc06..91049f87ab98 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -42,6 +42,9 @@
#include <kunit/test.h>
#include <kunit/test-bug.h>
#include <linux/sort.h>
+#ifdef CONFIG_SLUB_LEAK_PANIC
+#include <linux/vmstat.h>
+#endif
#include <linux/debugfs.h>
#include <trace/events/kmem.h>
@@ -218,6 +221,15 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
#endif /* CONFIG_SLUB_DEBUG */
+/* Internal slub_leak_panic definitions */
+#ifdef CONFIG_SLUB_LEAK_PANIC
+#define K(x) ((x) << (PAGE_SHIFT-10))
+static bool __read_mostly slub_leak_panic_enabled;
+static unsigned int __read_mostly slub_leak_panic_threshold;
+static long max_slab_count, temp_slab_count;
+#endif
+
+
/* Structure holding parameters for get_partial() call chain */
struct partial_context {
gfp_t flags;
@@ -2424,6 +2436,21 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
if (folio_is_pfmemalloc(folio))
slab_set_pfmemalloc(slab);
+#ifdef CONFIG_SLUB_LEAK_PANIC
+ if (likely(slub_leak_panic_enabled) && slub_leak_panic_threshold > 0) {
+ max_slab_count = K(totalram_pages()) * slub_leak_panic_threshold / 100;
+ temp_slab_count = K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B))
+ + K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B))
+ + K(1 << order);
+ if (temp_slab_count > max_slab_count)
+ panic("SLAB LEAK: %s(temp_count %6luKB > max_count %6luKB):\n"
+ "%s gfp_mask=%#x(%pGg), order=%d kB, oom_score_adj=%d\n",
+ __func__, temp_slab_count, max_slab_count,
+ current->comm, flags, &flags, order,
+ current->signal->oom_score_adj);
+ }
+#endif
+
return slab;
}
@@ -4212,6 +4239,19 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
+#ifdef CONFIG_SLUB_LEAK_PANIC
+ if (likely(slub_leak_panic_enabled) && slub_leak_panic_threshold > 0) {
+ max_slab_count = K(totalram_pages()) * slub_leak_panic_threshold / 100;
+ temp_slab_count = K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B))
+ + K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B));
+ if (temp_slab_count > max_slab_count)
+ panic("SLAB LEAK: %s(temp_count %6luKB > max_count %6luKB):\n"
+ "%s gfp_mask=%#x(%pGg), order=%d kB, oom_score_adj=%d\n",
+ __func__, temp_slab_count, max_slab_count,
+ current->comm, flags, &flags, order,
+ current->signal->oom_score_adj);
+ }
+#endif
}
ptr = kasan_kmalloc_large(ptr, size, flags);
@@ -7443,3 +7483,39 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
sinfo->cache_order = oo_order(s->oo);
}
#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * The /sys/module/slub ABI
+ */
+#ifdef CONFIG_SLUB_LEAK_PANIC
+/*
+ * What: /sys/module/slub/parameters/leak_panic
+ * /sys/module/slub/parameters/leak_panic_threshold
+ * Date: Sep 2024
+ * KernelVersion: v6.6+
+ * Description: Used for slub memory leak check. When the user
+ * successfully allocates the slub page, it also performs
+ * statistics on the total slub usage in the system.
+ * When the usage exceeds the set value
+ * (threshold * memtotal / 100), it is considered that
+ * there is a risk of slub leakage in the system at this time.
+ * A panic operation will be triggered.
+ * Users: userspace
+ */
+MODULE_PARM_DESC(leak_panic, "Disable/Enable slub_leak_panic");
+module_param_named(leak_panic, slub_leak_panic_enabled, bool, 0644);
+
+static int slub_leak_panic_threshold_set(const char *val, const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(val, kp, 0, 100);
+}
+
+static const struct kernel_param_ops slub_leak_panic_threshold_ops = {
+ .set = slub_leak_panic_threshold_set,
+ .get = param_get_uint,
+};
+
+MODULE_PARM_DESC(leak_panic_threshold,
+ "Upper limit value of slub, expressed as a percentage of memtotal (0 ~ 100)");
+module_param_cb(leak_panic_threshold,
+ &slub_leak_panic_threshold_ops, &slub_leak_panic_threshold, 0644);
+#endif /* CONFIG_SLUB_LEAK_PANIC */
--
2.17.1
Hi Fangzheng, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on linus/master v6.11 next-20240925] [cannot apply to vbabka-slab/for-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Fangzheng-Zhang/mm-slub-Add-panic-function-when-slub-leaks/20240925-112601 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20240925032256.1782-2-fangzheng.zhang%40unisoc.com patch subject: [PATCH 1/2] mm/slub: Add panic function when slub leaks config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20240925/202409251929.fHee67vM-lkp@intel.com/config) compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240925/202409251929.fHee67vM-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409251929.fHee67vM-lkp@intel.com/ All errors (new ones prefixed by >>): >> mm/slub.c:7354:2: error: unterminated conditional directive 7354 | #ifdef CONFIG_SLUB_LEAK_PANIC | ^ 1 error generated. vim +7354 mm/slub.c 7350 7351 /* 7352 * The /sys/module/slub ABI 7353 */ > 7354 #ifdef CONFIG_SLUB_LEAK_PANIC 7355 /* 7356 * What: /sys/module/slub/parameters/leak_panic 7357 * /sys/module/slub/parameters/leak_panic_threshold 7358 * Date: Sep 2024 7359 * KernelVersion: v6.6+ 7360 * Description: Used for slub memory leak check. When the user 7361 * successfully allocates the slub page, it also performs 7362 * statistics on the total slub usage in the system. 7363 * When the usage exceeds the set value 7364 * (threshold * memtotal / 100), it is considered that 7365 * there is a risk of slub leakage in the system at this time. 7366 * A panic operation will be triggered. 7367 * Users: userspace 7368 */ 7369 MODULE_PARM_DESC(leak_panic, "Disable/Enable slub_leak_panic"); 7370 module_param_named(leak_panic, slub_leak_panic_enabled, bool, 0644); 7371 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Fangzheng, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on linus/master v6.11 next-20240925] [cannot apply to vbabka-slab/for-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Fangzheng-Zhang/mm-slub-Add-panic-function-when-slub-leaks/20240925-112601 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20240925032256.1782-2-fangzheng.zhang%40unisoc.com patch subject: [PATCH 1/2] mm/slub: Add panic function when slub leaks config: alpha-allnoconfig (https://download.01.org/0day-ci/archive/20240925/202409251901.XFGbDalC-lkp@intel.com/config) compiler: alpha-linux-gcc (GCC) 13.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240925/202409251901.XFGbDalC-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409251901.XFGbDalC-lkp@intel.com/ All errors (new ones prefixed by >>): >> mm/slub.c:7354: error: unterminated #ifdef 7354 | #ifdef CONFIG_SLUB_LEAK_PANIC | vim +7354 mm/slub.c 7350 7351 /* 7352 * The /sys/module/slub ABI 7353 */ > 7354 #ifdef CONFIG_SLUB_LEAK_PANIC 7355 /* 7356 * What: /sys/module/slub/parameters/leak_panic 7357 * /sys/module/slub/parameters/leak_panic_threshold 7358 * Date: Sep 2024 7359 * KernelVersion: v6.6+ 7360 * Description: Used for slub memory leak check. When the user 7361 * successfully allocates the slub page, it also performs 7362 * statistics on the total slub usage in the system. 7363 * When the usage exceeds the set value 7364 * (threshold * memtotal / 100), it is considered that 7365 * there is a risk of slub leakage in the system at this time. 7366 * A panic operation will be triggered. 7367 * Users: userspace 7368 */ 7369 MODULE_PARM_DESC(leak_panic, "Disable/Enable slub_leak_panic"); 7370 module_param_named(leak_panic, slub_leak_panic_enabled, bool, 0644); 7371 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Wed, Sep 25, 2024 at 11:22:55AM +0800, Fangzheng Zhang wrote: > Perform real-time memory usage monitoring on the slub page > allocation paths, ie, kmalloc_large_alloced and alloc_slab_page. > When the usage exceeds the set threshole value, the panic function > will be triggered. > > Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com> > --- > mm/Kconfig | 11 ++++++++ > mm/slub.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 87 insertions(+) > > diff --git a/mm/Kconfig b/mm/Kconfig > index 09aebca1cae3..60cf72d4f0da 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -255,6 +255,17 @@ config SLUB_TINY > > If unsure, say N. > > +config SLUB_LEAK_PANIC > + bool "Trigger panic when slub leaks" > + default y > + help > + Detect slub leaks by monitoring its usage in real time on the page > + allocation path of the slub. When the slub occupancy exceeds the > + user-set value, it is considered that the slub is leaking at this > + time, and a panic operation will be triggered immediately. Uers > + can enable and set leak threshold by using the kernel command line > + parameters "slub.leak_panic" and "slub.leak_panic_threshold". > + > config SLAB_MERGE_DEFAULT > bool "Allow slab caches to be merged" > default y > diff --git a/mm/slub.c b/mm/slub.c > index 21f71cb6cc06..91049f87ab98 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -42,6 +42,9 @@ > #include <kunit/test.h> > #include <kunit/test-bug.h> > #include <linux/sort.h> > +#ifdef CONFIG_SLUB_LEAK_PANIC > +#include <linux/vmstat.h> > +#endif Please redo this to not require #ifdef in .c files, otherwise it gets very unmaintainable over time. thanks, greg k-h
On Wed, Sep 25, 2024 at 11:22:55AM +0800, Fangzheng Zhang wrote: > Perform real-time memory usage monitoring on the slub page > allocation paths, ie, kmalloc_large_alloced and alloc_slab_page. > When the usage exceeds the set threshole value, the panic function > will be triggered. > > Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com> > --- > mm/Kconfig | 11 ++++++++ > mm/slub.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 87 insertions(+) > > diff --git a/mm/Kconfig b/mm/Kconfig > index 09aebca1cae3..60cf72d4f0da 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -255,6 +255,17 @@ config SLUB_TINY > > If unsure, say N. > > +config SLUB_LEAK_PANIC > + bool "Trigger panic when slub leaks" > + default y "default y" is only there if the feature is required to boot the machine properly. That's not the case here, sorry. thanks, greg k-h
On Wed, Sep 25, 2024 at 4:10 PM Greg KH <gregkh@linuxfoundation.org> wrote: > > On Wed, Sep 25, 2024 at 11:22:55AM +0800, Fangzheng Zhang wrote: > > Perform real-time memory usage monitoring on the slub page > > allocation paths, ie, kmalloc_large_alloced and alloc_slab_page. > > When the usage exceeds the set threshole value, the panic function > > will be triggered. > > > > Signed-off-by: Fangzheng Zhang <fangzheng.zhang@unisoc.com> > > --- > > mm/Kconfig | 11 ++++++++ > > mm/slub.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 87 insertions(+) > > > > diff --git a/mm/Kconfig b/mm/Kconfig > > index 09aebca1cae3..60cf72d4f0da 100644 > > --- a/mm/Kconfig > > +++ b/mm/Kconfig > > @@ -255,6 +255,17 @@ config SLUB_TINY > > > > If unsure, say N. > > > > +config SLUB_LEAK_PANIC > > + bool "Trigger panic when slub leaks" > > + default y > > "default y" is only there if the feature is required to boot the machine > properly. That's not the case here, sorry. > > thanks, > > greg k-h Hi Greg, The reason why the "default y" is set here is that config is only valid for the functional code that works. To truly implement slub_leak_panic, the parameters must be set effectively. And, the current parameters are all in the default n. Thanks.
© 2016 - 2024 Red Hat, Inc.