[PATCH RFC v2 10/10] maple_tree: use percpu sheaves for maple_node_cache

Vlastimil Babka posted 10 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH RFC v2 10/10] maple_tree: use percpu sheaves for maple_node_cache
Posted by Vlastimil Babka 10 months, 1 week ago
Setup the maple_node_cache with percpu sheaves of size 32 to hopefully
improve its performance. Change the single node rcu freeing in
ma_free_rcu() to use kfree_rcu() instead of the custom callback, which
allows the rcu_free sheaf batching to be used. Note there are other
users of mt_free_rcu() where larger parts of maple tree are submitted to
call_rcu() as a whole, and that cannot use the rcu_free sheaf, but it's
still possible for maple nodes freed this way to be reused via the barn,
even if only some cpus are allowed to process rcu callbacks.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 lib/maple_tree.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index f7153ade1be5f16423f0ca073846a7f3dfa60523..56e7a00f6f0941bff163091c999a873e4273f071 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -208,7 +208,7 @@ static void mt_free_rcu(struct rcu_head *head)
 static void ma_free_rcu(struct maple_node *node)
 {
 	WARN_ON(node->parent != ma_parent_ptr(node));
-	call_rcu(&node->rcu, mt_free_rcu);
+	kfree_rcu(node, rcu);
 }
 
 static void mas_set_height(struct ma_state *mas)
@@ -6258,9 +6258,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
 
 void __init maple_tree_init(void)
 {
+	struct kmem_cache_args args = {
+		.align  = sizeof(struct maple_node),
+		.sheaf_capacity = 32,
+	};
+
 	maple_node_cache = kmem_cache_create("maple_node",
-			sizeof(struct maple_node), sizeof(struct maple_node),
-			SLAB_PANIC, NULL);
+			sizeof(struct maple_node), &args,
+			SLAB_PANIC);
 }
 
 /**

-- 
2.48.1
Re: [PATCH RFC v2 10/10] maple_tree: use percpu sheaves for maple_node_cache
Posted by Suren Baghdasaryan 9 months, 4 weeks ago
On Fri, Feb 14, 2025 at 8:28 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> Setup the maple_node_cache with percpu sheaves of size 32 to hopefully
> improve its performance.

I guess 32 might change in the future based on further testing?

> Change the single node rcu freeing in
> ma_free_rcu() to use kfree_rcu() instead of the custom callback, which
> allows the rcu_free sheaf batching to be used. Note there are other
> users of mt_free_rcu() where larger parts of maple tree are submitted to
> call_rcu() as a whole, and that cannot use the rcu_free sheaf, but it's
> still possible for maple nodes freed this way to be reused via the barn,
> even if only some cpus are allowed to process rcu callbacks.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Reviewed-by: Suren Baghdasaryan <surenb@google.com>

> ---
>  lib/maple_tree.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index f7153ade1be5f16423f0ca073846a7f3dfa60523..56e7a00f6f0941bff163091c999a873e4273f071 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -208,7 +208,7 @@ static void mt_free_rcu(struct rcu_head *head)
>  static void ma_free_rcu(struct maple_node *node)
>  {
>         WARN_ON(node->parent != ma_parent_ptr(node));
> -       call_rcu(&node->rcu, mt_free_rcu);
> +       kfree_rcu(node, rcu);
>  }
>
>  static void mas_set_height(struct ma_state *mas)
> @@ -6258,9 +6258,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
>
>  void __init maple_tree_init(void)
>  {
> +       struct kmem_cache_args args = {
> +               .align  = sizeof(struct maple_node),
> +               .sheaf_capacity = 32,
> +       };
> +
>         maple_node_cache = kmem_cache_create("maple_node",
> -                       sizeof(struct maple_node), sizeof(struct maple_node),
> -                       SLAB_PANIC, NULL);
> +                       sizeof(struct maple_node), &args,
> +                       SLAB_PANIC);
>  }
>
>  /**
>
> --
> 2.48.1
>