Having the accounting in the datastructure is better in terms of cache
lines and allows more optimizations later on.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
lib/debugobjects.c | 55 +++++++++++++++++++++++++++++------------------------
1 file changed, 31 insertions(+), 24 deletions(-)
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -46,9 +46,14 @@ struct debug_bucket {
struct obj_pool {
struct hlist_head objects;
unsigned int cnt;
+ unsigned int min_cnt;
+ unsigned int max_cnt;
} ____cacheline_aligned;
-static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = {
+ .max_cnt = ODEBUG_POOL_PERCPU_SIZE,
+};
static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
@@ -56,8 +61,14 @@ static struct debug_obj obj_static_pool
static DEFINE_RAW_SPINLOCK(pool_lock);
-static struct obj_pool pool_global;
-static struct obj_pool pool_to_free;
+static struct obj_pool pool_global = {
+ .min_cnt = ODEBUG_POOL_MIN_LEVEL,
+ .max_cnt = ODEBUG_POOL_SIZE,
+};
+
+static struct obj_pool pool_to_free = {
+ .max_cnt = UINT_MAX,
+};
static HLIST_HEAD(pool_boot);
@@ -79,13 +90,9 @@ static int __data_racy debug_objects_f
static int __data_racy debug_objects_warnings __read_mostly;
static bool __data_racy debug_objects_enabled __read_mostly
= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int debug_objects_pool_size __ro_after_init
- = ODEBUG_POOL_SIZE;
-static int debug_objects_pool_min_level __ro_after_init
- = ODEBUG_POOL_MIN_LEVEL;
-static const struct debug_obj_descr *descr_test __read_mostly;
-static struct kmem_cache *obj_cache __ro_after_init;
+static const struct debug_obj_descr *descr_test __read_mostly;
+static struct kmem_cache *obj_cache __ro_after_init;
/*
* Track numbers of kmem_cache_alloc()/free() calls done.
@@ -124,14 +131,14 @@ static __always_inline unsigned int pool
return READ_ONCE(pool->cnt);
}
-static inline bool pool_global_should_refill(void)
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
{
- return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
+ return pool_count(pool) < pool->min_cnt;
}
-static inline bool pool_global_must_refill(void)
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
{
- return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
+ return pool_count(pool) < pool->min_cnt / 2;
}
static void free_object_list(struct hlist_head *head)
@@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void
* Recheck with the lock held as the worker thread might have
* won the race and freed the global free list already.
*/
- while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
+ while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
hlist_del(&obj->node);
WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
@@ -197,11 +204,11 @@ static void fill_pool(void)
* - One other CPU is already allocating
* - the global pool has not reached the critical level yet
*/
- if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
+ if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
return;
atomic_inc(&cpus_allocating);
- while (pool_global_should_refill()) {
+ while (pool_should_refill(&pool_global)) {
struct debug_obj *new, *last = NULL;
HLIST_HEAD(head);
int cnt;
@@ -337,7 +344,7 @@ static void free_obj_work(struct work_st
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
return;
- if (pool_global.cnt >= debug_objects_pool_size)
+ if (pool_global.cnt >= pool_global.max_cnt)
goto free_objs;
/*
@@ -347,7 +354,7 @@ static void free_obj_work(struct work_st
* may be gearing up to use more and more objects, don't free any
* of them until the next round.
*/
- while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
+ while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
hlist_del(&obj->node);
hlist_add_head(&obj->node, &pool_global.objects);
@@ -408,7 +415,7 @@ static void __free_object(struct debug_o
}
raw_spin_lock(&pool_lock);
- work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
+ work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
(pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
obj_pool_used--;
@@ -424,7 +431,7 @@ static void __free_object(struct debug_o
}
}
- if ((pool_global.cnt > debug_objects_pool_size) &&
+ if ((pool_global.cnt > pool_global.max_cnt) &&
(pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
int i;
@@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void
if (unlikely(!obj_cache))
return;
- if (likely(!pool_global_should_refill()))
+ if (likely(!pool_should_refill(&pool_global)))
return;
/* Try reusing objects from obj_to_free_list */
fill_pool_from_freelist();
- if (likely(!pool_global_should_refill()))
+ if (likely(!pool_should_refill(&pool_global)))
return;
/*
@@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
* system.
*/
extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
- debug_objects_pool_size += extras;
- debug_objects_pool_min_level += extras;
+ pool_global.max_cnt += extras;
+ pool_global.min_cnt += extras;
/* Everything worked. Expose the cache */
obj_cache = cache;
On 2024/10/8 0:50, Thomas Gleixner wrote:
> Having the accounting in the datastructure is better in terms of cache
> lines and allows more optimizations later on.
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> lib/debugobjects.c | 55 +++++++++++++++++++++++++++++------------------------
> 1 file changed, 31 insertions(+), 24 deletions(-)
>
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -46,9 +46,14 @@ struct debug_bucket {
> struct obj_pool {
> struct hlist_head objects;
> unsigned int cnt;
> + unsigned int min_cnt;
> + unsigned int max_cnt;
> } ____cacheline_aligned;
>
> -static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
> +
> +static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = {
> + .max_cnt = ODEBUG_POOL_PERCPU_SIZE,
> +};
>
> static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
>
> @@ -56,8 +61,14 @@ static struct debug_obj obj_static_pool
>
> static DEFINE_RAW_SPINLOCK(pool_lock);
>
> -static struct obj_pool pool_global;
> -static struct obj_pool pool_to_free;
> +static struct obj_pool pool_global = {
> + .min_cnt = ODEBUG_POOL_MIN_LEVEL,
> + .max_cnt = ODEBUG_POOL_SIZE,
> +};
> +
> +static struct obj_pool pool_to_free = {
> + .max_cnt = UINT_MAX,
> +};
>
> static HLIST_HEAD(pool_boot);
>
> @@ -79,13 +90,9 @@ static int __data_racy debug_objects_f
> static int __data_racy debug_objects_warnings __read_mostly;
> static bool __data_racy debug_objects_enabled __read_mostly
> = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
> -static int debug_objects_pool_size __ro_after_init
> - = ODEBUG_POOL_SIZE;
> -static int debug_objects_pool_min_level __ro_after_init
> - = ODEBUG_POOL_MIN_LEVEL;
>
> -static const struct debug_obj_descr *descr_test __read_mostly;
> -static struct kmem_cache *obj_cache __ro_after_init;
> +static const struct debug_obj_descr *descr_test __read_mostly;
> +static struct kmem_cache *obj_cache __ro_after_init;
>
> /*
> * Track numbers of kmem_cache_alloc()/free() calls done.
> @@ -124,14 +131,14 @@ static __always_inline unsigned int pool
> return READ_ONCE(pool->cnt);
> }
>
> -static inline bool pool_global_should_refill(void)
> +static __always_inline bool pool_should_refill(struct obj_pool *pool)
> {
> - return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
> + return pool_count(pool) < pool->min_cnt;
> }
>
> -static inline bool pool_global_must_refill(void)
> +static __always_inline bool pool_must_refill(struct obj_pool *pool)
> {
> - return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
> + return pool_count(pool) < pool->min_cnt / 2;
> }
>
> static void free_object_list(struct hlist_head *head)
> @@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void
> * Recheck with the lock held as the worker thread might have
> * won the race and freed the global free list already.
> */
> - while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
> + while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
> obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
> hlist_del(&obj->node);
> WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
> @@ -197,11 +204,11 @@ static void fill_pool(void)
> * - One other CPU is already allocating
> * - the global pool has not reached the critical level yet
> */
> - if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
> + if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
> return;
>
> atomic_inc(&cpus_allocating);
> - while (pool_global_should_refill()) {
> + while (pool_should_refill(&pool_global)) {
> struct debug_obj *new, *last = NULL;
> HLIST_HEAD(head);
> int cnt;
> @@ -337,7 +344,7 @@ static void free_obj_work(struct work_st
> if (!raw_spin_trylock_irqsave(&pool_lock, flags))
> return;
>
> - if (pool_global.cnt >= debug_objects_pool_size)
> + if (pool_global.cnt >= pool_global.max_cnt)
> goto free_objs;
>
> /*
> @@ -347,7 +354,7 @@ static void free_obj_work(struct work_st
> * may be gearing up to use more and more objects, don't free any
> * of them until the next round.
> */
> - while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
> + while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
> obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
> hlist_del(&obj->node);
> hlist_add_head(&obj->node, &pool_global.objects);
> @@ -408,7 +415,7 @@ static void __free_object(struct debug_o
> }
>
> raw_spin_lock(&pool_lock);
> - work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
> + work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
> (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
> obj_pool_used--;
>
> @@ -424,7 +431,7 @@ static void __free_object(struct debug_o
> }
> }
>
> - if ((pool_global.cnt > debug_objects_pool_size) &&
> + if ((pool_global.cnt > pool_global.max_cnt) &&
> (pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
> int i;
>
> @@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void
> if (unlikely(!obj_cache))
> return;
>
> - if (likely(!pool_global_should_refill()))
> + if (likely(!pool_should_refill(&pool_global)))
> return;
>
> /* Try reusing objects from obj_to_free_list */
> fill_pool_from_freelist();
>
> - if (likely(!pool_global_should_refill()))
> + if (likely(!pool_should_refill(&pool_global)))
> return;
>
> /*
> @@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
> * system.
> */
> extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
> - debug_objects_pool_size += extras;
> - debug_objects_pool_min_level += extras;
> + pool_global.max_cnt += extras;
> + pool_global.min_cnt += extras;
>
> /* Everything worked. Expose the cache */
> obj_cache = cache;
>
> .
>
--
Regards,
Zhen Lei
The following commit has been merged into the core/debugobjects branch of tip:
Commit-ID: 96a9a0421c77301f9b551f3460ac04471a3c0612
Gitweb: https://git.kernel.org/tip/96a9a0421c77301f9b551f3460ac04471a3c0612
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Mon, 07 Oct 2024 18:50:08 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 15 Oct 2024 17:30:32 +02:00
debugobjects: Move min/max count into pool struct
Having the accounting in the datastructure is better in terms of cache
lines and allows more optimizations later on.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/all/20241007164913.831908427@linutronix.de
---
lib/debugobjects.c | 55 +++++++++++++++++++++++++--------------------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 3d1d973..fbe8f26 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -46,9 +46,14 @@ struct debug_bucket {
struct obj_pool {
struct hlist_head objects;
unsigned int cnt;
+ unsigned int min_cnt;
+ unsigned int max_cnt;
} ____cacheline_aligned;
-static DEFINE_PER_CPU(struct obj_pool, pool_pcpu);
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = {
+ .max_cnt = ODEBUG_POOL_PERCPU_SIZE,
+};
static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
@@ -56,8 +61,14 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
static DEFINE_RAW_SPINLOCK(pool_lock);
-static struct obj_pool pool_global;
-static struct obj_pool pool_to_free;
+static struct obj_pool pool_global = {
+ .min_cnt = ODEBUG_POOL_MIN_LEVEL,
+ .max_cnt = ODEBUG_POOL_SIZE,
+};
+
+static struct obj_pool pool_to_free = {
+ .max_cnt = UINT_MAX,
+};
static HLIST_HEAD(pool_boot);
@@ -79,13 +90,9 @@ static int __data_racy debug_objects_fixups __read_mostly;
static int __data_racy debug_objects_warnings __read_mostly;
static bool __data_racy debug_objects_enabled __read_mostly
= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int debug_objects_pool_size __ro_after_init
- = ODEBUG_POOL_SIZE;
-static int debug_objects_pool_min_level __ro_after_init
- = ODEBUG_POOL_MIN_LEVEL;
-static const struct debug_obj_descr *descr_test __read_mostly;
-static struct kmem_cache *obj_cache __ro_after_init;
+static const struct debug_obj_descr *descr_test __read_mostly;
+static struct kmem_cache *obj_cache __ro_after_init;
/*
* Track numbers of kmem_cache_alloc()/free() calls done.
@@ -124,14 +131,14 @@ static __always_inline unsigned int pool_count(struct obj_pool *pool)
return READ_ONCE(pool->cnt);
}
-static inline bool pool_global_should_refill(void)
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
{
- return READ_ONCE(pool_global.cnt) < debug_objects_pool_min_level;
+ return pool_count(pool) < pool->min_cnt;
}
-static inline bool pool_global_must_refill(void)
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
{
- return READ_ONCE(pool_global.cnt) < (debug_objects_pool_min_level / 2);
+ return pool_count(pool) < pool->min_cnt / 2;
}
static void free_object_list(struct hlist_head *head)
@@ -178,7 +185,7 @@ static void fill_pool_from_freelist(void)
* Recheck with the lock held as the worker thread might have
* won the race and freed the global free list already.
*/
- while (pool_to_free.cnt && (pool_global.cnt < debug_objects_pool_min_level)) {
+ while (pool_to_free.cnt && (pool_global.cnt < pool_global.min_cnt)) {
obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
hlist_del(&obj->node);
WRITE_ONCE(pool_to_free.cnt, pool_to_free.cnt - 1);
@@ -197,11 +204,11 @@ static void fill_pool(void)
* - One other CPU is already allocating
* - the global pool has not reached the critical level yet
*/
- if (!pool_global_must_refill() && atomic_read(&cpus_allocating))
+ if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
return;
atomic_inc(&cpus_allocating);
- while (pool_global_should_refill()) {
+ while (pool_should_refill(&pool_global)) {
struct debug_obj *new, *last = NULL;
HLIST_HEAD(head);
int cnt;
@@ -337,7 +344,7 @@ static void free_obj_work(struct work_struct *work)
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
return;
- if (pool_global.cnt >= debug_objects_pool_size)
+ if (pool_global.cnt >= pool_global.max_cnt)
goto free_objs;
/*
@@ -347,7 +354,7 @@ static void free_obj_work(struct work_struct *work)
* may be gearing up to use more and more objects, don't free any
* of them until the next round.
*/
- while (pool_to_free.cnt && pool_global.cnt < debug_objects_pool_size) {
+ while (pool_to_free.cnt && pool_global.cnt < pool_global.max_cnt) {
obj = hlist_entry(pool_to_free.objects.first, typeof(*obj), node);
hlist_del(&obj->node);
hlist_add_head(&obj->node, &pool_global.objects);
@@ -408,7 +415,7 @@ static void __free_object(struct debug_obj *obj)
}
raw_spin_lock(&pool_lock);
- work = (pool_global.cnt > debug_objects_pool_size) && obj_cache &&
+ work = (pool_global.cnt > pool_global.max_cnt) && obj_cache &&
(pool_to_free.cnt < ODEBUG_FREE_WORK_MAX);
obj_pool_used--;
@@ -424,7 +431,7 @@ static void __free_object(struct debug_obj *obj)
}
}
- if ((pool_global.cnt > debug_objects_pool_size) &&
+ if ((pool_global.cnt > pool_global.max_cnt) &&
(pool_to_free.cnt < ODEBUG_FREE_WORK_MAX)) {
int i;
@@ -629,13 +636,13 @@ static void debug_objects_fill_pool(void)
if (unlikely(!obj_cache))
return;
- if (likely(!pool_global_should_refill()))
+ if (likely(!pool_should_refill(&pool_global)))
return;
/* Try reusing objects from obj_to_free_list */
fill_pool_from_freelist();
- if (likely(!pool_global_should_refill()))
+ if (likely(!pool_should_refill(&pool_global)))
return;
/*
@@ -1427,8 +1434,8 @@ void __init debug_objects_mem_init(void)
* system.
*/
extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
- debug_objects_pool_size += extras;
- debug_objects_pool_min_level += extras;
+ pool_global.max_cnt += extras;
+ pool_global.min_cnt += extras;
/* Everything worked. Expose the cache */
obj_cache = cache;
© 2016 - 2026 Red Hat, Inc.