debugobjects: Rework object handling

[patch 25/25] debugobjects: Track object usage to avoid premature freeing of objects

Posted by Thomas Gleixner 1 month, 3 weeks ago

The freelist is freed at a constant rate independent of the actual usage
requirements. That's bad in scenarios where usage comes in bursts. The end
of a burst puts the object on the free list and freeing proceeds even when
the next burst which requires objects started again.

Keep track of the usage with a exponentially wheighted moving average and
take that into account in the worker function which frees objects from the
free list.

This further reduces the kmem_cache allocation/free rate for a full kernel
compile:

   	    kmem_cache_alloc()	kmem_cache_free()
Baseline:   225k		245k
Usage:	    170k		117k

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 5 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -13,6 +13,7 @@
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -86,6 +87,7 @@ static struct obj_pool pool_to_free = {
 
 static HLIST_HEAD(pool_boot);
 
+static unsigned long		avg_usage;
 static bool			obj_freeing;
 
 static int __data_racy			debug_objects_maxchain __read_mostly;
@@ -382,11 +384,28 @@ static bool kmem_alloc_batch(struct hlis
 	return true;
 }
 
+static bool pool_can_fill(struct obj_pool *dst, struct obj_pool *src)
+{
+	unsigned int cnt = pool_count(dst);
+
+	if (cnt >= dst->min_cnt)
+		return true;
+
+	return READ_ONCE(src->cnt) >= cnt - dst->min_cnt;
+}
+
 static void fill_pool(void)
 {
 	static atomic_t cpus_allocating;
 
 	/*
+	 * If the free pool has enough capacity, then don't try allocating
+	 * unless the global pool has reached the cricital level.
+	 */
+	if (!pool_must_refill(&pool_global) && pool_can_fill(&pool_global, &pool_to_free))
+		return;
+
+	/*
 	 * Avoid allocation and lock contention when:
 	 *   - One other CPU is already allocating
 	 *   - the global pool has not reached the critical level yet
@@ -427,11 +446,31 @@ static struct debug_obj *lookup_object(v
 	return NULL;
 }
 
+static void calc_usage(void)
+{
+	static DEFINE_RAW_SPINLOCK(avg_lock);
+	static unsigned long avg_period;
+	unsigned long cur, now = jiffies;
+
+	if (!time_after_eq(now, READ_ONCE(avg_period)))
+		return;
+
+	if (!raw_spin_trylock(&avg_lock))
+		return;
+
+	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+	raw_spin_unlock(&avg_lock);
+}
+
 static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
 				      const struct debug_obj_descr *descr)
 {
 	struct debug_obj *obj;
 
+	calc_usage();
+
 	if (static_branch_likely(&obj_cache_enabled))
 		obj = pcpu_alloc();
 	else
@@ -450,14 +489,26 @@ static struct debug_obj *alloc_object(vo
 /* workqueue function to free objects. */
 static void free_obj_work(struct work_struct *work)
 {
-	bool free = true;
+	static unsigned long last_use_avg;
+	unsigned long cur_used, last_used, delta;
+	unsigned int max_free = 0;
 
 	WRITE_ONCE(obj_freeing, false);
 
+	/* Rate limit freeing based on current use average */
+	cur_used = READ_ONCE(avg_usage);
+	last_used = last_use_avg;
+	last_use_avg = cur_used;
+
 	if (!pool_count(&pool_to_free))
 		return;
 
-	for (unsigned int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+	if (cur_used <= last_used) {
+		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
+	}
+
+	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
 		HLIST_HEAD(tofree);
 
 		/* Acquire and drop the lock for each batch */
@@ -468,9 +519,10 @@ static void free_obj_work(struct work_st
 			/* Refill the global pool if possible */
 			if (pool_move_batch(&pool_global, &pool_to_free)) {
 				/* Don't free as there seems to be demand */
-				free = false;
-			} else if (free) {
+				max_free = 0;
+			} else if (max_free) {
 				pool_pop_batch(&tofree, &pool_to_free);
+				max_free--;
 			} else {
 				return;
 			}
@@ -1110,7 +1162,7 @@ static int debug_stats_show(struct seq_f
 	for_each_possible_cpu(cpu)
 		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
 
-	pool_used = data_race(pool_global.stats.cur_used);
+	pool_used = READ_ONCE(pool_global.stats.cur_used);
 	pcp_free = min(pool_used, pcp_free);
 	pool_used -= pcp_free;

Re: [patch 25/25] debugobjects: Track object usage to avoid premature freeing of objects

Posted by Leizhen (ThunderTown) 1 month, 2 weeks ago


On 2024/10/8 0:50, Thomas Gleixner wrote:
> The freelist is freed at a constant rate independent of the actual usage
> requirements. That's bad in scenarios where usage comes in bursts. The end
> of a burst puts the object on the free list and freeing proceeds even when
> the next burst which requires objects started again.
> 
> Keep track of the usage with a exponentially wheighted moving average and
> take that into account in the worker function which frees objects from the
> free list.
> 
> This further reduces the kmem_cache allocation/free rate for a full kernel
> compile:
> 
>    	    kmem_cache_alloc()	kmem_cache_free()
> Baseline:   225k		245k
> Usage:	    170k		117k
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  lib/debugobjects.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 57 insertions(+), 5 deletions(-)
> 
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -13,6 +13,7 @@
>  #include <linux/hash.h>
>  #include <linux/kmemleak.h>
>  #include <linux/sched.h>
> +#include <linux/sched/loadavg.h>
>  #include <linux/sched/task_stack.h>
>  #include <linux/seq_file.h>
>  #include <linux/slab.h>
> @@ -86,6 +87,7 @@ static struct obj_pool pool_to_free = {
>  
>  static HLIST_HEAD(pool_boot);
>  
> +static unsigned long		avg_usage;
>  static bool			obj_freeing;
>  
>  static int __data_racy			debug_objects_maxchain __read_mostly;
> @@ -382,11 +384,28 @@ static bool kmem_alloc_batch(struct hlis
>  	return true;
>  }
>  
> +static bool pool_can_fill(struct obj_pool *dst, struct obj_pool *src)
> +{
> +	unsigned int cnt = pool_count(dst);
> +
> +	if (cnt >= dst->min_cnt)
> +		return true;

There's already an interception in function debug_objects_fill_pool().
It's unlikely to be true.

debug_objects_fill_pool() --> fill_pool() --> pool_can_fill()
:
	if (likely(!pool_should_refill(&pool_global)))
		return;

> +
> +	return READ_ONCE(src->cnt) >= cnt - dst->min_cnt;

I don't understand. However, similar to above, fill_pool_from_freelist() has
been called before fill_pool() is called.

> +}
> +
>  static void fill_pool(void)
>  {
>  	static atomic_t cpus_allocating;
>  
>  	/*
> +	 * If the free pool has enough capacity, then don't try allocating
> +	 * unless the global pool has reached the cricital level.
> +	 */
> +	if (!pool_must_refill(&pool_global) && pool_can_fill(&pool_global, &pool_to_free))
> +		return;

As above, this code seems unnecessary.

> +
> +	/*
>  	 * Avoid allocation and lock contention when:
>  	 *   - One other CPU is already allocating
>  	 *   - the global pool has not reached the critical level yet
> @@ -427,11 +446,31 @@ static struct debug_obj *lookup_object(v
>  	return NULL;
>  }
>  
> +static void calc_usage(void)
> +{
> +	static DEFINE_RAW_SPINLOCK(avg_lock);
> +	static unsigned long avg_period;
> +	unsigned long cur, now = jiffies;
> +
> +	if (!time_after_eq(now, READ_ONCE(avg_period)))
> +		return;
> +
> +	if (!raw_spin_trylock(&avg_lock))
> +		return;
> +
> +	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
> +	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
> +	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
> +	raw_spin_unlock(&avg_lock);
> +}
> +
>  static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
>  				      const struct debug_obj_descr *descr)
>  {
>  	struct debug_obj *obj;
>  
> +	calc_usage();
> +
>  	if (static_branch_likely(&obj_cache_enabled))
>  		obj = pcpu_alloc();
>  	else
> @@ -450,14 +489,26 @@ static struct debug_obj *alloc_object(vo
>  /* workqueue function to free objects. */
>  static void free_obj_work(struct work_struct *work)
>  {
> -	bool free = true;
> +	static unsigned long last_use_avg;
> +	unsigned long cur_used, last_used, delta;
> +	unsigned int max_free = 0;
>  
>  	WRITE_ONCE(obj_freeing, false);
>  
> +	/* Rate limit freeing based on current use average */
> +	cur_used = READ_ONCE(avg_usage);
> +	last_used = last_use_avg;
> +	last_use_avg = cur_used;
> +
>  	if (!pool_count(&pool_to_free))
>  		return;
>  
> -	for (unsigned int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
> +	if (cur_used <= last_used) {
> +		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
> +		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
> +	}
> +
> +	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
>  		HLIST_HEAD(tofree);
>  
>  		/* Acquire and drop the lock for each batch */
> @@ -468,9 +519,10 @@ static void free_obj_work(struct work_st
>  			/* Refill the global pool if possible */
>  			if (pool_move_batch(&pool_global, &pool_to_free)) {
>  				/* Don't free as there seems to be demand */
> -				free = false;
> -			} else if (free) {
> +				max_free = 0;
> +			} else if (max_free) {
>  				pool_pop_batch(&tofree, &pool_to_free);
> +				max_free--;
>  			} else {
>  				return;
>  			}
> @@ -1110,7 +1162,7 @@ static int debug_stats_show(struct seq_f
>  	for_each_possible_cpu(cpu)
>  		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
>  
> -	pool_used = data_race(pool_global.stats.cur_used);
> +	pool_used = READ_ONCE(pool_global.stats.cur_used);
>  	pcp_free = min(pool_used, pcp_free);
>  	pool_used -= pcp_free;
>  
> 
> .
> 

-- 
Regards,
  Zhen Lei

Re: [patch 25/25] debugobjects: Track object usage to avoid premature freeing of objects

Posted by Thomas Gleixner 1 month, 2 weeks ago

On Thu, Oct 10 2024 at 21:13, Leizhen wrote:
>> +static bool pool_can_fill(struct obj_pool *dst, struct obj_pool *src)
>> +{
>> +	unsigned int cnt = pool_count(dst);
>> +
>> +	if (cnt >= dst->min_cnt)
>> +		return true;
>
> There's already an interception in function debug_objects_fill_pool().
> It's unlikely to be true.
>
> debug_objects_fill_pool() --> fill_pool() --> pool_can_fill()
> :
> 	if (likely(!pool_should_refill(&pool_global)))
> 		return;

While they are different checks, you're right.

If fill_pool_from_freelist() reused objects and was no able to refill
the global pool above the threshold level, then fill_pool() won't find
objects enough objects in the to free pool to refill, so it's just
checking for a completely unlikely corner case.

I just validated that it does not make a difference. Updated patch below

Thanks for spotting this!

       tglx
---
Subject: debugobjects: Track object usage to avoid premature freeing of objects
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 14 Sep 2024 21:33:19 +0200

The freelist is freed at a constant rate independent of the actual usage
requirements. That's bad in scenarios where usage comes in bursts. The end
of a burst puts the object on the free list and freeing proceeds even when
the next burst which requires objects started again.

Keep track of the usage with a exponentially wheighted moving average and
take that into account in the worker function which frees objects from the
free list.

This further reduces the kmem_cache allocation/free rate for a full kernel
compile:

   	    kmem_cache_alloc()	kmem_cache_free()
Baseline:   225k		245k
Usage:	    170k		117k

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c |   45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -13,6 +13,7 @@
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -86,6 +87,7 @@ static struct obj_pool pool_to_free = {
 
 static HLIST_HEAD(pool_boot);
 
+static unsigned long		avg_usage;
 static bool			obj_freeing;
 
 static int __data_racy			debug_objects_maxchain __read_mostly;
@@ -427,11 +429,31 @@ static struct debug_obj *lookup_object(v
 	return NULL;
 }
 
+static void calc_usage(void)
+{
+	static DEFINE_RAW_SPINLOCK(avg_lock);
+	static unsigned long avg_period;
+	unsigned long cur, now = jiffies;
+
+	if (!time_after_eq(now, READ_ONCE(avg_period)))
+		return;
+
+	if (!raw_spin_trylock(&avg_lock))
+		return;
+
+	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+	raw_spin_unlock(&avg_lock);
+}
+
 static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
 				      const struct debug_obj_descr *descr)
 {
 	struct debug_obj *obj;
 
+	calc_usage();
+
 	if (static_branch_likely(&obj_cache_enabled))
 		obj = pcpu_alloc();
 	else
@@ -450,14 +472,26 @@ static struct debug_obj *alloc_object(vo
 /* workqueue function to free objects. */
 static void free_obj_work(struct work_struct *work)
 {
-	bool free = true;
+	static unsigned long last_use_avg;
+	unsigned long cur_used, last_used, delta;
+	unsigned int max_free = 0;
 
 	WRITE_ONCE(obj_freeing, false);
 
+	/* Rate limit freeing based on current use average */
+	cur_used = READ_ONCE(avg_usage);
+	last_used = last_use_avg;
+	last_use_avg = cur_used;
+
 	if (!pool_count(&pool_to_free))
 		return;
 
-	for (unsigned int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+	if (cur_used <= last_used) {
+		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
+	}
+
+	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
 		HLIST_HEAD(tofree);
 
 		/* Acquire and drop the lock for each batch */
@@ -468,9 +502,10 @@ static void free_obj_work(struct work_st
 			/* Refill the global pool if possible */
 			if (pool_move_batch(&pool_global, &pool_to_free)) {
 				/* Don't free as there seems to be demand */
-				free = false;
-			} else if (free) {
+				max_free = 0;
+			} else if (max_free) {
 				pool_pop_batch(&tofree, &pool_to_free);
+				max_free--;
 			} else {
 				return;
 			}
@@ -1110,7 +1145,7 @@ static int debug_stats_show(struct seq_f
 	for_each_possible_cpu(cpu)
 		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
 
-	pool_used = data_race(pool_global.stats.cur_used);
+	pool_used = READ_ONCE(pool_global.stats.cur_used);
 	pcp_free = min(pool_used, pcp_free);
 	pool_used -= pcp_free;

Re: [patch 25/25] debugobjects: Track object usage to avoid premature freeing of objects

Posted by Leizhen (ThunderTown) 1 month, 2 weeks ago


On 2024/10/14 2:45, Thomas Gleixner wrote:
> On Thu, Oct 10 2024 at 21:13, Leizhen wrote:
>>> +static bool pool_can_fill(struct obj_pool *dst, struct obj_pool *src)
>>> +{
>>> +	unsigned int cnt = pool_count(dst);
>>> +
>>> +	if (cnt >= dst->min_cnt)
>>> +		return true;
>>
>> There's already an interception in function debug_objects_fill_pool().
>> It's unlikely to be true.
>>
>> debug_objects_fill_pool() --> fill_pool() --> pool_can_fill()
>> :
>> 	if (likely(!pool_should_refill(&pool_global)))
>> 		return;
> 
> While they are different checks, you're right.
> 
> If fill_pool_from_freelist() reused objects and was no able to refill
> the global pool above the threshold level, then fill_pool() won't find
> objects enough objects in the to free pool to refill, so it's just
> checking for a completely unlikely corner case.
> 
> I just validated that it does not make a difference. Updated patch below
> 
> Thanks for spotting this!
> 
>        tglx
> ---
> Subject: debugobjects: Track object usage to avoid premature freeing of objects
> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Sat, 14 Sep 2024 21:33:19 +0200
> 
> The freelist is freed at a constant rate independent of the actual usage
> requirements. That's bad in scenarios where usage comes in bursts. The end
> of a burst puts the object on the free list and freeing proceeds even when
> the next burst which requires objects started again.
> 
> Keep track of the usage with a exponentially wheighted moving average and
> take that into account in the worker function which frees objects from the
> free list.
> 
> This further reduces the kmem_cache allocation/free rate for a full kernel
> compile:
> 
>    	    kmem_cache_alloc()	kmem_cache_free()
> Baseline:   225k		245k
> Usage:	    170k		117k

Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>

> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  lib/debugobjects.c |   45 ++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 40 insertions(+), 5 deletions(-)
> 
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -13,6 +13,7 @@
>  #include <linux/hash.h>
>  #include <linux/kmemleak.h>
>  #include <linux/sched.h>
> +#include <linux/sched/loadavg.h>
>  #include <linux/sched/task_stack.h>
>  #include <linux/seq_file.h>
>  #include <linux/slab.h>
> @@ -86,6 +87,7 @@ static struct obj_pool pool_to_free = {
>  
>  static HLIST_HEAD(pool_boot);
>  
> +static unsigned long		avg_usage;
>  static bool			obj_freeing;
>  
>  static int __data_racy			debug_objects_maxchain __read_mostly;
> @@ -427,11 +429,31 @@ static struct debug_obj *lookup_object(v
>  	return NULL;
>  }
>  
> +static void calc_usage(void)
> +{
> +	static DEFINE_RAW_SPINLOCK(avg_lock);
> +	static unsigned long avg_period;
> +	unsigned long cur, now = jiffies;
> +
> +	if (!time_after_eq(now, READ_ONCE(avg_period)))
> +		return;
> +
> +	if (!raw_spin_trylock(&avg_lock))
> +		return;
> +
> +	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
> +	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
> +	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
> +	raw_spin_unlock(&avg_lock);
> +}
> +
>  static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
>  				      const struct debug_obj_descr *descr)
>  {
>  	struct debug_obj *obj;
>  
> +	calc_usage();
> +
>  	if (static_branch_likely(&obj_cache_enabled))
>  		obj = pcpu_alloc();
>  	else
> @@ -450,14 +472,26 @@ static struct debug_obj *alloc_object(vo
>  /* workqueue function to free objects. */
>  static void free_obj_work(struct work_struct *work)
>  {
> -	bool free = true;
> +	static unsigned long last_use_avg;
> +	unsigned long cur_used, last_used, delta;
> +	unsigned int max_free = 0;
>  
>  	WRITE_ONCE(obj_freeing, false);
>  
> +	/* Rate limit freeing based on current use average */
> +	cur_used = READ_ONCE(avg_usage);
> +	last_used = last_use_avg;
> +	last_use_avg = cur_used;
> +
>  	if (!pool_count(&pool_to_free))
>  		return;
>  
> -	for (unsigned int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
> +	if (cur_used <= last_used) {
> +		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
> +		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
> +	}
> +
> +	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
>  		HLIST_HEAD(tofree);
>  
>  		/* Acquire and drop the lock for each batch */
> @@ -468,9 +502,10 @@ static void free_obj_work(struct work_st
>  			/* Refill the global pool if possible */
>  			if (pool_move_batch(&pool_global, &pool_to_free)) {
>  				/* Don't free as there seems to be demand */
> -				free = false;
> -			} else if (free) {
> +				max_free = 0;
> +			} else if (max_free) {
>  				pool_pop_batch(&tofree, &pool_to_free);
> +				max_free--;
>  			} else {
>  				return;
>  			}
> @@ -1110,7 +1145,7 @@ static int debug_stats_show(struct seq_f
>  	for_each_possible_cpu(cpu)
>  		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
>  
> -	pool_used = data_race(pool_global.stats.cur_used);
> +	pool_used = READ_ONCE(pool_global.stats.cur_used);
>  	pcp_free = min(pool_used, pcp_free);
>  	pool_used -= pcp_free;
>  
> .
> 

-- 
Regards,
  Zhen Lei

[tip: core/debugobjects] debugobjects: Track object usage to avoid premature freeing of objects

Posted by tip-bot2 for Thomas Gleixner 1 month, 1 week ago

The following commit has been merged into the core/debugobjects branch of tip:

Commit-ID:     ff8d523cc4520a5ce86cde0fd57c304e2b4f61b3
Gitweb:        https://git.kernel.org/tip/ff8d523cc4520a5ce86cde0fd57c304e2b4f61b3
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Sun, 13 Oct 2024 20:45:57 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 15 Oct 2024 17:30:33 +02:00

debugobjects: Track object usage to avoid premature freeing of objects

The freelist is freed at a constant rate independent of the actual usage
requirements. That's bad in scenarios where usage comes in bursts. The end
of a burst puts the objects on the free list and freeing proceeds even when
the next burst which requires objects started again.

Keep track of the usage with a exponentially wheighted moving average and
take that into account in the worker function which frees objects from the
free list.

This further reduces the kmem_cache allocation/free rate for a full kernel
compile:

   	    kmem_cache_alloc()	kmem_cache_free()
Baseline:   225k		173k
Usage:	    170k		117k

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/all/87bjznhme2.ffs@tglx

---
 lib/debugobjects.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index cc32844..7f50c44 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -13,6 +13,7 @@
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -86,6 +87,7 @@ static struct obj_pool pool_to_free = {
 
 static HLIST_HEAD(pool_boot);
 
+static unsigned long		avg_usage;
 static bool			obj_freeing;
 
 static int __data_racy			debug_objects_maxchain __read_mostly;
@@ -427,11 +429,31 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 	return NULL;
 }
 
+static void calc_usage(void)
+{
+	static DEFINE_RAW_SPINLOCK(avg_lock);
+	static unsigned long avg_period;
+	unsigned long cur, now = jiffies;
+
+	if (!time_after_eq(now, READ_ONCE(avg_period)))
+		return;
+
+	if (!raw_spin_trylock(&avg_lock))
+		return;
+
+	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+	raw_spin_unlock(&avg_lock);
+}
+
 static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
 				      const struct debug_obj_descr *descr)
 {
 	struct debug_obj *obj;
 
+	calc_usage();
+
 	if (static_branch_likely(&obj_cache_enabled))
 		obj = pcpu_alloc();
 	else
@@ -450,14 +472,26 @@ static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
 /* workqueue function to free objects. */
 static void free_obj_work(struct work_struct *work)
 {
-	bool free = true;
+	static unsigned long last_use_avg;
+	unsigned long cur_used, last_used, delta;
+	unsigned int max_free = 0;
 
 	WRITE_ONCE(obj_freeing, false);
 
+	/* Rate limit freeing based on current use average */
+	cur_used = READ_ONCE(avg_usage);
+	last_used = last_use_avg;
+	last_use_avg = cur_used;
+
 	if (!pool_count(&pool_to_free))
 		return;
 
-	for (unsigned int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+	if (cur_used <= last_used) {
+		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
+	}
+
+	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
 		HLIST_HEAD(tofree);
 
 		/* Acquire and drop the lock for each batch */
@@ -468,9 +502,10 @@ static void free_obj_work(struct work_struct *work)
 			/* Refill the global pool if possible */
 			if (pool_move_batch(&pool_global, &pool_to_free)) {
 				/* Don't free as there seems to be demand */
-				free = false;
-			} else if (free) {
+				max_free = 0;
+			} else if (max_free) {
 				pool_pop_batch(&tofree, &pool_to_free);
+				max_free--;
 			} else {
 				return;
 			}
@@ -1110,7 +1145,7 @@ static int debug_stats_show(struct seq_file *m, void *v)
 	for_each_possible_cpu(cpu)
 		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
 
-	pool_used = data_race(pool_global.stats.cur_used);
+	pool_used = READ_ONCE(pool_global.stats.cur_used);
 	pcp_free = min(pool_used, pcp_free);
 	pool_used -= pcp_free;