debugobjects: Rework object handling

[patch 14/25] debugobjects: Move min/max count into pool struct

Posted by Thomas Gleixner 1 month, 3 weeks ago

Having the accounting in the datastructure is better in terms of cache
lines and allows more optimizations later on.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c |   55 +++++++++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -46,9 +46,14 @@ struct debug_bucket {
 struct obj_pool {
 	struct hlist_head	objects;
 	unsigned int		cnt;
+	unsigned int		min_cnt;
+	unsigned int		max_cnt;
 } ____cacheline_aligned;
 
-static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
+	.max_cnt	= ODEBUG_POOL_PERCPU_SIZE,
+};
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
@@ -56,8 +61,14 @@ static struct debug_obj		obj_static_pool
 
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
-static struct obj_pool		pool_global;
-static struct obj_pool		pool_to_free;
+static struct obj_pool pool_global = {
+	.min_cnt	= ODEBUG_POOL_MIN_LEVEL,
+	.max_cnt	= ODEBUG_POOL_SIZE,
+};
+
+static struct obj_pool pool_to_free = {
+	.max_cnt	= UINT_MAX,
+};
 
 static HLIST_HEAD(pool_boot);
 
@@ -79,13 +90,9 @@ static int __data_racy			debug_objects_f
 static int __data_racy			debug_objects_warnings __read_mostly;
 static bool __data_racy			debug_objects_enabled __read_mostly
 					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int				debug_objects_pool_size __ro_after_init
-					= ODEBUG_POOL_SIZE;
-static int				debug_objects_pool_min_level __ro_after_init
-					= ODEBUG_POOL_MIN_LEVEL;
 
-static const struct debug_obj_descr *descr_test  __read_mostly;
-static struct kmem_cache	*obj_cache __ro_after_init;
+static const struct debug_obj_descr	*descr_test  __read_mostly;
+static struct kmem_cache		*obj_cache __ro_after_init;
 
 /*
  * Track numbers of kmem_cache_alloc()/free() calls done.
@@ -124,14 +131,14 @@ static __always_inline unsigned int pool
 	return READ_ONCE(pool->cnt);
 }
 
-static inline bool pool_global_should_refill(void)
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
 {
-	return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
+	return pool_count(pool) < pool->min_cnt;
 }
 
-static inline bool pool_global_must_refill(void)
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
 {
-	return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
+	return pool_count(pool) < pool->min_cnt / 2;
 }
 
 static void free_object_list(struct hlist_head *head)
@@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void
 	 * Recheck with the lock held as the worker thread might have
 	 * won the race and freed the global free list already.
 	 */
-	while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
+	while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
 		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
 		hlist_del(&obj->node);
 		WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
@@ -197,11 +204,11 @@ static void fill_pool(void)
 	 *   - One other CPU is already allocating
 	 *   - the global pool has not reached the critical level yet
 	 */
-	if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
+	if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
 		return;
 
 	atomic_inc(&cpus_allocating);
-	while (pool_global_should_refill()) {
+	while (pool_should_refill(&pool_global)) {
 		struct debug_obj *new, *last = NULL;
 		HLIST_HEAD(head);
 		int cnt;
@@ -337,7 +344,7 @@ static void free_obj_work(struct work_st
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
 		return;
 
-	if (pool_global.cnt >= debug_objects_pool_size)
+	if (pool_global.cnt >= pool_global.max_cnt)
 		goto free_objs;
 
 	/*
@@ -347,7 +354,7 @@ static void free_obj_work(struct work_st
 	 * may be gearing up to use more and more objects, don't free any
 	 * of them until the next round.
 	 */
-	while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
+	while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
 		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
 		hlist_del(&obj->node);
 		hlist_add_head(&obj->node, &pool_global.objects);
@@ -408,7 +415,7 @@ static void __free_object(struct debug_o
 	}
 
 	raw_spin_lock(&pool_lock);
-	work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
+	work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
 	       (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
 	obj_pool_used--;
 
@@ -424,7 +431,7 @@ static void __free_object(struct debug_o
 			}
 		}
 
-		if ((pool_global.cnt > debug_objects_pool_size) &&
+		if ((pool_global.cnt > pool_global.max_cnt) &&
 		    (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
 			int i;
 
@@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void
 	if (unlikely(!obj_cache))
 		return;
 
-	if (likely(!pool_global_should_refill()))
+	if (likely(!pool_should_refill(&pool_global)))
 		return;
 
 	/* Try reusing objects from obj_to_free_list */
 	fill_pool_from_freelist();
 
-	if (likely(!pool_global_should_refill()))
+	if (likely(!pool_should_refill(&pool_global)))
 		return;
 
 	/*
@@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
 	 * system.
 	 */
 	extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
-	debug_objects_pool_size += extras;
-	debug_objects_pool_min_level += extras;
+	pool_global.max_cnt += extras;
+	pool_global.min_cnt += extras;
 
 	/* Everything worked. Expose the cache */
 	obj_cache = cache;

Re: [patch 14/25] debugobjects: Move min/max count into pool struct

Posted by Leizhen (ThunderTown) 1 month, 2 weeks ago


On 2024/10/8 0:50, Thomas Gleixner wrote:
> Having the accounting in the datastructure is better in terms of cache
> lines and allows more optimizations later on.

Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>

> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  lib/debugobjects.c |   55 +++++++++++++++++++++++++++++------------------------
>  1 file changed, 31 insertions(+), 24 deletions(-)
> 
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -46,9 +46,14 @@ struct debug_bucket {
>  struct obj_pool {
>  	struct hlist_head	objects;
>  	unsigned int		cnt;
> +	unsigned int		min_cnt;
> +	unsigned int		max_cnt;
>  } ____cacheline_aligned;
>  
> -static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
> +
> +static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
> +	.max_cnt	= ODEBUG_POOL_PERCPU_SIZE,
> +};
>  
>  static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
>  
> @@ -56,8 +61,14 @@ static struct debug_obj		obj_static_pool
>  
>  static DEFINE_RAW_SPINLOCK(pool_lock);
>  
> -static struct obj_pool		pool_global;
> -static struct obj_pool		pool_to_free;
> +static struct obj_pool pool_global = {
> +	.min_cnt	= ODEBUG_POOL_MIN_LEVEL,
> +	.max_cnt	= ODEBUG_POOL_SIZE,
> +};
> +
> +static struct obj_pool pool_to_free = {
> +	.max_cnt	= UINT_MAX,
> +};
>  
>  static HLIST_HEAD(pool_boot);
>  
> @@ -79,13 +90,9 @@ static int __data_racy			debug_objects_f
>  static int __data_racy			debug_objects_warnings __read_mostly;
>  static bool __data_racy			debug_objects_enabled __read_mostly
>  					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
> -static int				debug_objects_pool_size __ro_after_init
> -					= ODEBUG_POOL_SIZE;
> -static int				debug_objects_pool_min_level __ro_after_init
> -					= ODEBUG_POOL_MIN_LEVEL;
>  
> -static const struct debug_obj_descr *descr_test  __read_mostly;
> -static struct kmem_cache	*obj_cache __ro_after_init;
> +static const struct debug_obj_descr	*descr_test  __read_mostly;
> +static struct kmem_cache		*obj_cache __ro_after_init;
>  
>  /*
>   * Track numbers of kmem_cache_alloc()/free() calls done.
> @@ -124,14 +131,14 @@ static __always_inline unsigned int pool
>  	return READ_ONCE(pool->cnt);
>  }
>  
> -static inline bool pool_global_should_refill(void)
> +static __always_inline bool pool_should_refill(struct obj_pool *pool)
>  {
> -	return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
> +	return pool_count(pool) < pool->min_cnt;
>  }
>  
> -static inline bool pool_global_must_refill(void)
> +static __always_inline bool pool_must_refill(struct obj_pool *pool)
>  {
> -	return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
> +	return pool_count(pool) < pool->min_cnt / 2;
>  }
>  
>  static void free_object_list(struct hlist_head *head)
> @@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void
>  	 * Recheck with the lock held as the worker thread might have
>  	 * won the race and freed the global free list already.
>  	 */
> -	while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
> +	while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
>  		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
>  		hlist_del(&obj->node);
>  		WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
> @@ -197,11 +204,11 @@ static void fill_pool(void)
>  	 *   - One other CPU is already allocating
>  	 *   - the global pool has not reached the critical level yet
>  	 */
> -	if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
> +	if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
>  		return;
>  
>  	atomic_inc(&cpus_allocating);
> -	while (pool_global_should_refill()) {
> +	while (pool_should_refill(&pool_global)) {
>  		struct debug_obj *new, *last = NULL;
>  		HLIST_HEAD(head);
>  		int cnt;
> @@ -337,7 +344,7 @@ static void free_obj_work(struct work_st
>  	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
>  		return;
>  
> -	if (pool_global.cnt >= debug_objects_pool_size)
> +	if (pool_global.cnt >= pool_global.max_cnt)
>  		goto free_objs;
>  
>  	/*
> @@ -347,7 +354,7 @@ static void free_obj_work(struct work_st
>  	 * may be gearing up to use more and more objects, don't free any
>  	 * of them until the next round.
>  	 */
> -	while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
> +	while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
>  		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
>  		hlist_del(&obj->node);
>  		hlist_add_head(&obj->node, &pool_global.objects);
> @@ -408,7 +415,7 @@ static void __free_object(struct debug_o
>  	}
>  
>  	raw_spin_lock(&pool_lock);
> -	work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
> +	work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
>  	       (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
>  	obj_pool_used--;
>  
> @@ -424,7 +431,7 @@ static void __free_object(struct debug_o
>  			}
>  		}
>  
> -		if ((pool_global.cnt > debug_objects_pool_size) &&
> +		if ((pool_global.cnt > pool_global.max_cnt) &&
>  		    (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
>  			int i;
>  
> @@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void
>  	if (unlikely(!obj_cache))
>  		return;
>  
> -	if (likely(!pool_global_should_refill()))
> +	if (likely(!pool_should_refill(&pool_global)))
>  		return;
>  
>  	/* Try reusing objects from obj_to_free_list */
>  	fill_pool_from_freelist();
>  
> -	if (likely(!pool_global_should_refill()))
> +	if (likely(!pool_should_refill(&pool_global)))
>  		return;
>  
>  	/*
> @@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
>  	 * system.
>  	 */
>  	extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
> -	debug_objects_pool_size += extras;
> -	debug_objects_pool_min_level += extras;
> +	pool_global.max_cnt += extras;
> +	pool_global.min_cnt += extras;
>  
>  	/* Everything worked. Expose the cache */
>  	obj_cache = cache;
> 
> .
> 

-- 
Regards,
  Zhen Lei

[tip: core/debugobjects] debugobjects: Move min/max count into pool struct

Posted by tip-bot2 for Thomas Gleixner 1 month, 1 week ago

The following commit has been merged into the core/debugobjects branch of tip:

Commit-ID:     96a9a0421c77301f9b551f3460ac04471a3c0612
Gitweb:        https://git.kernel.org/tip/96a9a0421c77301f9b551f3460ac04471a3c0612
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Mon, 07 Oct 2024 18:50:08 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 15 Oct 2024 17:30:32 +02:00

debugobjects: Move min/max count into pool struct

Having the accounting in the datastructure is better in terms of cache
lines and allows more optimizations later on.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/all/20241007164913.831908427@linutronix.de

---
 lib/debugobjects.c | 55 +++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 3d1d973..fbe8f26 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -46,9 +46,14 @@ struct debug_bucket {
 struct obj_pool {
 	struct hlist_head	objects;
 	unsigned int		cnt;
+	unsigned int		min_cnt;
+	unsigned int		max_cnt;
 } ____cacheline_aligned;
 
-static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
+	.max_cnt	= ODEBUG_POOL_PERCPU_SIZE,
+};
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
@@ -56,8 +61,14 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
-static struct obj_pool		pool_global;
-static struct obj_pool		pool_to_free;
+static struct obj_pool pool_global = {
+	.min_cnt	= ODEBUG_POOL_MIN_LEVEL,
+	.max_cnt	= ODEBUG_POOL_SIZE,
+};
+
+static struct obj_pool pool_to_free = {
+	.max_cnt	= UINT_MAX,
+};
 
 static HLIST_HEAD(pool_boot);
 
@@ -79,13 +90,9 @@ static int __data_racy			debug_objects_fixups __read_mostly;
 static int __data_racy			debug_objects_warnings __read_mostly;
 static bool __data_racy			debug_objects_enabled __read_mostly
 					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int				debug_objects_pool_size __ro_after_init
-					= ODEBUG_POOL_SIZE;
-static int				debug_objects_pool_min_level __ro_after_init
-					= ODEBUG_POOL_MIN_LEVEL;
 
-static const struct debug_obj_descr *descr_test  __read_mostly;
-static struct kmem_cache	*obj_cache __ro_after_init;
+static const struct debug_obj_descr	*descr_test  __read_mostly;
+static struct kmem_cache		*obj_cache __ro_after_init;
 
 /*
  * Track numbers of kmem_cache_alloc()/free() calls done.
@@ -124,14 +131,14 @@ static __always_inline unsigned int pool_count(struct obj_pool *pool)
 	return READ_ONCE(pool->cnt);
 }
 
-static inline bool pool_global_should_refill(void)
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
 {
-	return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
+	return pool_count(pool) < pool->min_cnt;
 }
 
-static inline bool pool_global_must_refill(void)
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
 {
-	return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
+	return pool_count(pool) < pool->min_cnt / 2;
 }
 
 static void free_object_list(struct hlist_head *head)
@@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void)
 	 * Recheck with the lock held as the worker thread might have
 	 * won the race and freed the global free list already.
 	 */
-	while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
+	while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
 		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
 		hlist_del(&obj->node);
 		WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
@@ -197,11 +204,11 @@ static void fill_pool(void)
 	 *   - One other CPU is already allocating
 	 *   - the global pool has not reached the critical level yet
 	 */
-	if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
+	if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
 		return;
 
 	atomic_inc(&cpus_allocating);
-	while (pool_global_should_refill()) {
+	while (pool_should_refill(&pool_global)) {
 		struct debug_obj *new, *last = NULL;
 		HLIST_HEAD(head);
 		int cnt;
@@ -337,7 +344,7 @@ static void free_obj_work(struct work_struct *work)
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
 		return;
 
-	if (pool_global.cnt >= debug_objects_pool_size)
+	if (pool_global.cnt >= pool_global.max_cnt)
 		goto free_objs;
 
 	/*
@@ -347,7 +354,7 @@ static void free_obj_work(struct work_struct *work)
 	 * may be gearing up to use more and more objects, don't free any
 	 * of them until the next round.
 	 */
-	while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
+	while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
 		obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
 		hlist_del(&obj->node);
 		hlist_add_head(&obj->node, &pool_global.objects);
@@ -408,7 +415,7 @@ static void __free_object(struct debug_obj *obj)
 	}
 
 	raw_spin_lock(&pool_lock);
-	work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
+	work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
 	       (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
 	obj_pool_used--;
 
@@ -424,7 +431,7 @@ static void __free_object(struct debug_obj *obj)
 			}
 		}
 
-		if ((pool_global.cnt > debug_objects_pool_size) &&
+		if ((pool_global.cnt > pool_global.max_cnt) &&
 		    (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
 			int i;
 
@@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void)
 	if (unlikely(!obj_cache))
 		return;
 
-	if (likely(!pool_global_should_refill()))
+	if (likely(!pool_should_refill(&pool_global)))
 		return;
 
 	/* Try reusing objects from obj_to_free_list */
 	fill_pool_from_freelist();
 
-	if (likely(!pool_global_should_refill()))
+	if (likely(!pool_should_refill(&pool_global)))
 		return;
 
 	/*
@@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
 	 * system.
 	 */
 	extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
-	debug_objects_pool_size += extras;
-	debug_objects_pool_min_level += extras;
+	pool_global.max_cnt += extras;
+	pool_global.min_cnt += extras;
 
 	/* Everything worked. Expose the cache */
 	obj_cache = cache;