[patch 20/25] debugobjects: Prepare kmem_cache allocations for batching

Thomas Gleixner posted 25 patches 1 month, 3 weeks ago
[patch 20/25] debugobjects: Prepare kmem_cache allocations for batching
Posted by Thomas Gleixner 1 month, 3 weeks ago
Allocate a batch and then push it into the pool. Utilize the
debug_obj::last_node pointer for keeping track of the batch boundary.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c |   80 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 31 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -164,6 +164,22 @@ static bool pool_move_batch(struct obj_p
 	return true;
 }
 
+static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
+{
+	struct hlist_node *last;
+	struct debug_obj *obj;
+
+	if (dst->cnt >= dst->max_cnt)
+		return false;
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+
+	hlist_splice_init(head, last, &dst->objects);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
 static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
 {
 	if (!src->cnt)
@@ -288,6 +304,28 @@ static void fill_pool_from_freelist(void
 	clear_bit(0, &state);
 }
 
+static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
+{
+	struct hlist_node *last = NULL;
+	struct debug_obj *obj;
+
+	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+		obj = kmem_cache_zalloc(cache, gfp);
+		if (!obj) {
+			free_object_list(head);
+			return false;
+		}
+		debug_objects_allocated++;
+
+		if (!last)
+			last = &obj->node;
+		obj->batch_last = last;
+
+		hlist_add_head(&obj->node, head);
+	}
+	return true;
+}
+
 static void fill_pool(void)
 {
 	static atomic_t cpus_allocating;
@@ -302,25 +340,14 @@ static void fill_pool(void)
 
 	atomic_inc(&cpus_allocating);
 	while (pool_should_refill(&pool_global)) {
-		struct debug_obj *new, *last = NULL;
 		HLIST_HEAD(head);
-		int cnt;
 
-		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
-			new = kmem_cache_zalloc(obj_cache, __GFP_HIGH | __GFP_NOWARN);
-			if (!new)
-				break;
-			hlist_add_head(&new->node, &head);
-			if (!last)
-				last = new;
-		}
-		if (!cnt)
+		if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
 			break;
 
 		guard(raw_spinlock_irqsave)(&pool_lock);
-		hlist_splice_init(&head, &last->node, &pool_global.objects);
-		debug_objects_allocated += cnt;
-		WRITE_ONCE(pool_global.cnt, pool_global.cnt + cnt);
+		if (!pool_push_batch(&pool_global, &head))
+			pool_push_batch(&pool_to_free, &head);
 	}
 	atomic_dec(&cpus_allocating);
 }
@@ -1302,26 +1329,18 @@ void __init debug_objects_early_init(voi
 static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
 {
 	struct debug_bucket *db = obj_hash;
-	struct debug_obj *obj, *new;
 	struct hlist_node *tmp;
+	struct debug_obj *obj;
 	HLIST_HEAD(objects);
 	int i;
 
-	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
-		obj = kmem_cache_zalloc(cache, GFP_KERNEL);
-		if (!obj)
+	for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
+		if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
 			goto free;
-		hlist_add_head(&obj->node, &objects);
+		pool_push_batch(&pool_global, &objects);
 	}
 
-	debug_objects_allocated = ODEBUG_POOL_SIZE;
-	pool_global.cnt = ODEBUG_POOL_SIZE;
-
-	/*
-	 * Move the allocated objects to the global pool and disconnect the
-	 * boot pool.
-	 */
-	hlist_move_list(&objects, &pool_global.objects);
+	/* Disconnect the boot pool. */
 	pool_boot.first = NULL;
 
 	/* Replace the active object references */
@@ -1329,9 +1348,8 @@ static bool __init debug_objects_replace
 		hlist_move_list(&db->list, &objects);
 
 		hlist_for_each_entry(obj, &objects, node) {
-			new = hlist_entry(pool_global.objects.first, typeof(*obj), node);
-			hlist_del(&new->node);
-			pool_global.cnt--;
+			struct debug_obj *new = pcpu_alloc();
+
 			/* copy object data */
 			*new = *obj;
 			hlist_add_head(&new->node, &db->list);
@@ -1340,7 +1358,7 @@ static bool __init debug_objects_replace
 	return true;
 free:
 	/* Can't use free_object_list() as the cache is not populated yet */
-	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
+	hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
 		hlist_del(&obj->node);
 		kmem_cache_free(cache, obj);
 	}
Re: [patch 20/25] debugobjects: Prepare kmem_cache allocations for batching
Posted by Leizhen (ThunderTown) 1 month, 2 weeks ago

On 2024/10/8 0:50, Thomas Gleixner wrote:
> Allocate a batch and then push it into the pool. Utilize the
> debug_obj::last_node pointer for keeping track of the batch boundary.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  lib/debugobjects.c |   80 ++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 49 insertions(+), 31 deletions(-)
> 
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -164,6 +164,22 @@ static bool pool_move_batch(struct obj_p
>  	return true;
>  }
>  
> +static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
> +{
> +	struct hlist_node *last;
> +	struct debug_obj *obj;
> +
> +	if (dst->cnt >= dst->max_cnt)
> +		return false;
> +
> +	obj = hlist_entry(head->first, typeof(*obj), node);
> +	last = obj->batch_last;
> +
> +	hlist_splice_init(head, last, &dst->objects);
> +	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
> +	return true;
> +}
> +
>  static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
>  {
>  	if (!src->cnt)
> @@ -288,6 +304,28 @@ static void fill_pool_from_freelist(void
>  	clear_bit(0, &state);
>  }
>  
> +static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
> +{
> +	struct hlist_node *last = NULL;
> +	struct debug_obj *obj;
> +
> +	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
> +		obj = kmem_cache_zalloc(cache, gfp);
> +		if (!obj) {
> +			free_object_list(head);
> +			return false;
> +		}
> +		debug_objects_allocated++;
> +
> +		if (!last)
> +			last = &obj->node;
> +		obj->batch_last = last;
> +
> +		hlist_add_head(&obj->node, head);

There seems to be a need to use hlist_splice_init() outside the loop like patch 2/25.
Prepare the local list first, if all ODEBUG_BATCH_SIZE is successfully allocated,
attach it to 'head' at a time, including account debug_objects_allocated. Then back
to patch 8/25, keep debug_objects_freed still in lock protection and do not move it
into free_object_list(). Seems like it should be.

> +	}
> +	return true;
> +}
> +
>  static void fill_pool(void)
>  {
>  	static atomic_t cpus_allocating;
> @@ -302,25 +340,14 @@ static void fill_pool(void)
>  
>  	atomic_inc(&cpus_allocating);
>  	while (pool_should_refill(&pool_global)) {
> -		struct debug_obj *new, *last = NULL;
>  		HLIST_HEAD(head);
> -		int cnt;
>  
> -		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
> -			new = kmem_cache_zalloc(obj_cache, __GFP_HIGH | __GFP_NOWARN);
> -			if (!new)
> -				break;
> -			hlist_add_head(&new->node, &head);
> -			if (!last)
> -				last = new;
> -		}
> -		if (!cnt)
> +		if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
>  			break;
>  
>  		guard(raw_spinlock_irqsave)(&pool_lock);
> -		hlist_splice_init(&head, &last->node, &pool_global.objects);
> -		debug_objects_allocated += cnt;
> -		WRITE_ONCE(pool_global.cnt, pool_global.cnt + cnt);
> +		if (!pool_push_batch(&pool_global, &head))
> +			pool_push_batch(&pool_to_free, &head);
>  	}
>  	atomic_dec(&cpus_allocating);
>  }
> @@ -1302,26 +1329,18 @@ void __init debug_objects_early_init(voi
>  static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
>  {
>  	struct debug_bucket *db = obj_hash;
> -	struct debug_obj *obj, *new;
>  	struct hlist_node *tmp;
> +	struct debug_obj *obj;
>  	HLIST_HEAD(objects);
>  	int i;
>  
> -	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
> -		obj = kmem_cache_zalloc(cache, GFP_KERNEL);
> -		if (!obj)
> +	for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
> +		if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
>  			goto free;
> -		hlist_add_head(&obj->node, &objects);
> +		pool_push_batch(&pool_global, &objects);
>  	}
>  
> -	debug_objects_allocated = ODEBUG_POOL_SIZE;
> -	pool_global.cnt = ODEBUG_POOL_SIZE;
> -
> -	/*
> -	 * Move the allocated objects to the global pool and disconnect the
> -	 * boot pool.
> -	 */
> -	hlist_move_list(&objects, &pool_global.objects);
> +	/* Disconnect the boot pool. */
>  	pool_boot.first = NULL;
>  
>  	/* Replace the active object references */
> @@ -1329,9 +1348,8 @@ static bool __init debug_objects_replace
>  		hlist_move_list(&db->list, &objects);
>  
>  		hlist_for_each_entry(obj, &objects, node) {
> -			new = hlist_entry(pool_global.objects.first, typeof(*obj), node);
> -			hlist_del(&new->node);
> -			pool_global.cnt--;
> +			struct debug_obj *new = pcpu_alloc();
> +
>  			/* copy object data */
>  			*new = *obj;
>  			hlist_add_head(&new->node, &db->list);
> @@ -1340,7 +1358,7 @@ static bool __init debug_objects_replace
>  	return true;
>  free:
>  	/* Can't use free_object_list() as the cache is not populated yet */
> -	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
> +	hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
>  		hlist_del(&obj->node);
>  		kmem_cache_free(cache, obj);
>  	}
> 
> .
> 

-- 
Regards,
  Zhen Lei
Re: [patch 20/25] debugobjects: Prepare kmem_cache allocations for batching
Posted by Thomas Gleixner 1 month, 2 weeks ago
On Thu, Oct 10 2024 at 16:40, Leizhen wrote:
> On 2024/10/8 0:50, Thomas Gleixner wrote:
>> +static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
>> +{
>> +	struct hlist_node *last = NULL;
>> +	struct debug_obj *obj;
>> +
>> +	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
>> +		obj = kmem_cache_zalloc(cache, gfp);
>> +		if (!obj) {
>> +			free_object_list(head);
>> +			return false;
>> +		}
>> +		debug_objects_allocated++;
>> +
>> +		if (!last)
>> +			last = &obj->node;
>> +		obj->batch_last = last;
>> +
>> +		hlist_add_head(&obj->node, head);
>
> There seems to be a need to use hlist_splice_init() outside the loop like patch 2/25.
> Prepare the local list first, if all ODEBUG_BATCH_SIZE is successfully allocated,
> attach it to 'head' at a time, including account debug_objects_allocated. Then back
> to patch 8/25, keep debug_objects_freed still in lock protection and do not move it
> into free_object_list(). Seems like it should be.

In case that the allocation of 16 object fails, the accounting
concurrency problem vs. free_object_list is pretty irrelevant.

It's debug information and moving it under lock protection for a corner
case is just making the code more complex.

Thanks,

        tglx
Re: [patch 20/25] debugobjects: Prepare kmem_cache allocations for batching
Posted by Leizhen (ThunderTown) 1 month, 2 weeks ago

On 2024/10/12 4:47, Thomas Gleixner wrote:
> On Thu, Oct 10 2024 at 16:40, Leizhen wrote:
>> On 2024/10/8 0:50, Thomas Gleixner wrote:
>>> +static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
>>> +{
>>> +	struct hlist_node *last = NULL;
>>> +	struct debug_obj *obj;
>>> +
>>> +	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
>>> +		obj = kmem_cache_zalloc(cache, gfp);
>>> +		if (!obj) {
>>> +			free_object_list(head);
>>> +			return false;
>>> +		}
>>> +		debug_objects_allocated++;
>>> +
>>> +		if (!last)
>>> +			last = &obj->node;
>>> +		obj->batch_last = last;
>>> +
>>> +		hlist_add_head(&obj->node, head);
>>
>> There seems to be a need to use hlist_splice_init() outside the loop like patch 2/25.
>> Prepare the local list first, if all ODEBUG_BATCH_SIZE is successfully allocated,
>> attach it to 'head' at a time, including account debug_objects_allocated. Then back
>> to patch 8/25, keep debug_objects_freed still in lock protection and do not move it
>> into free_object_list(). Seems like it should be.
> 
> In case that the allocation of 16 object fails, the accounting
> concurrency problem vs. free_object_list is pretty irrelevant.
> 
> It's debug information and moving it under lock protection for a corner
> case is just making the code more complex.

OK, got it, with 8/25.

> 
> Thanks,
> 
>         tglx
> 
> .
> 

-- 
Regards,
  Zhen Lei
[tip: core/debugobjects] debugobjects: Prepare kmem_cache allocations for batching
Posted by tip-bot2 for Thomas Gleixner 1 month, 1 week ago
The following commit has been merged into the core/debugobjects branch of tip:

Commit-ID:     aebbfe0779b271c099cc80c5e2995c2087b28dcf
Gitweb:        https://git.kernel.org/tip/aebbfe0779b271c099cc80c5e2995c2087b28dcf
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Mon, 07 Oct 2024 18:50:15 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 15 Oct 2024 17:30:32 +02:00

debugobjects: Prepare kmem_cache allocations for batching

Allocate a batch and then push it into the pool. Utilize the
debug_obj::last_node pointer for keeping track of the batch boundary.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20241007164914.198647184@linutronix.de

---
 lib/debugobjects.c | 80 +++++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2fed7b8..cdd5d23 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -164,6 +164,22 @@ static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src)
 	return true;
 }
 
+static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
+{
+	struct hlist_node *last;
+	struct debug_obj *obj;
+
+	if (dst->cnt >= dst->max_cnt)
+		return false;
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+
+	hlist_splice_init(head, last, &dst->objects);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
 static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
 {
 	if (!src->cnt)
@@ -288,6 +304,28 @@ static void fill_pool_from_freelist(void)
 	clear_bit(0, &state);
 }
 
+static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
+{
+	struct hlist_node *last = NULL;
+	struct debug_obj *obj;
+
+	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+		obj = kmem_cache_zalloc(cache, gfp);
+		if (!obj) {
+			free_object_list(head);
+			return false;
+		}
+		debug_objects_allocated++;
+
+		if (!last)
+			last = &obj->node;
+		obj->batch_last = last;
+
+		hlist_add_head(&obj->node, head);
+	}
+	return true;
+}
+
 static void fill_pool(void)
 {
 	static atomic_t cpus_allocating;
@@ -302,25 +340,14 @@ static void fill_pool(void)
 
 	atomic_inc(&cpus_allocating);
 	while (pool_should_refill(&pool_global)) {
-		struct debug_obj *new, *last = NULL;
 		HLIST_HEAD(head);
-		int cnt;
 
-		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
-			new = kmem_cache_zalloc(obj_cache, __GFP_HIGH | __GFP_NOWARN);
-			if (!new)
-				break;
-			hlist_add_head(&new->node, &head);
-			if (!last)
-				last = new;
-		}
-		if (!cnt)
+		if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
 			break;
 
 		guard(raw_spinlock_irqsave)(&pool_lock);
-		hlist_splice_init(&head, &last->node, &pool_global.objects);
-		debug_objects_allocated += cnt;
-		WRITE_ONCE(pool_global.cnt, pool_global.cnt + cnt);
+		if (!pool_push_batch(&pool_global, &head))
+			pool_push_batch(&pool_to_free, &head);
 	}
 	atomic_dec(&cpus_allocating);
 }
@@ -1302,26 +1329,18 @@ void __init debug_objects_early_init(void)
 static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
 {
 	struct debug_bucket *db = obj_hash;
-	struct debug_obj *obj, *new;
 	struct hlist_node *tmp;
+	struct debug_obj *obj;
 	HLIST_HEAD(objects);
 	int i;
 
-	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
-		obj = kmem_cache_zalloc(cache, GFP_KERNEL);
-		if (!obj)
+	for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
+		if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
 			goto free;
-		hlist_add_head(&obj->node, &objects);
+		pool_push_batch(&pool_global, &objects);
 	}
 
-	debug_objects_allocated = ODEBUG_POOL_SIZE;
-	pool_global.cnt = ODEBUG_POOL_SIZE;
-
-	/*
-	 * Move the allocated objects to the global pool and disconnect the
-	 * boot pool.
-	 */
-	hlist_move_list(&objects, &pool_global.objects);
+	/* Disconnect the boot pool. */
 	pool_boot.first = NULL;
 
 	/* Replace the active object references */
@@ -1329,9 +1348,8 @@ static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache
 		hlist_move_list(&db->list, &objects);
 
 		hlist_for_each_entry(obj, &objects, node) {
-			new = hlist_entry(pool_global.objects.first, typeof(*obj), node);
-			hlist_del(&new->node);
-			pool_global.cnt--;
+			struct debug_obj *new = pcpu_alloc();
+
 			/* copy object data */
 			*new = *obj;
 			hlist_add_head(&new->node, &db->list);
@@ -1340,7 +1358,7 @@ static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache
 	return true;
 free:
 	/* Can't use free_object_list() as the cache is not populated yet */
-	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
+	hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
 		hlist_del(&obj->node);
 		kmem_cache_free(cache, obj);
 	}