[PATCH v5 10/14] mm, slab: allow NUMA restricted allocations to use percpu sheaves

Vlastimil Babka posted 14 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH v5 10/14] mm, slab: allow NUMA restricted allocations to use percpu sheaves
Posted by Vlastimil Babka 2 months, 2 weeks ago
Currently allocations asking for a specific node explicitly or via
mempolicy in strict_numa node bypass percpu sheaves. Since sheaves
contain mostly local objects, we can try allocating from them if the
local node happens to be the requested node or allowed by the mempolicy.
If we find the object from percpu sheaves is not from the expected node,
we skip the sheaves - this should be rare.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 50fc35b8fc9b3101821c338e9469c134677ded51..b98983b8d2e3e04ea256d91efcf0215ff0ae7e38 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4765,18 +4765,42 @@ __pcs_handle_empty(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, gfp_t
 }
 
 static __fastpath_inline
-void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
+void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
 {
 	struct slub_percpu_sheaves *pcs;
 	void *object;
 
 #ifdef CONFIG_NUMA
-	if (static_branch_unlikely(&strict_numa)) {
-		if (current->mempolicy)
-			return NULL;
+	if (static_branch_unlikely(&strict_numa) &&
+			 node == NUMA_NO_NODE) {
+
+		struct mempolicy *mpol = current->mempolicy;
+
+		if (mpol) {
+			/*
+			 * Special BIND rule support. If the local node
+			 * is in permitted set then do not redirect
+			 * to a particular node.
+			 * Otherwise we apply the memory policy to get
+			 * the node we need to allocate on.
+			 */
+			if (mpol->mode != MPOL_BIND ||
+					!node_isset(numa_mem_id(), mpol->nodes))
+
+				node = mempolicy_slab_node();
+		}
 	}
 #endif
 
+	if (unlikely(node != NUMA_NO_NODE)) {
+		/*
+		 * We assume the percpu sheaves contain only local objects
+		 * although it's not completely guaranteed, so we verify later.
+		 */
+		if (node != numa_mem_id())
+			return NULL;
+	}
+
 	if (!local_trylock(&s->cpu_sheaves->lock))
 		return NULL;
 
@@ -4788,7 +4812,21 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
 			return NULL;
 	}
 
-	object = pcs->main->objects[--pcs->main->size];
+	object = pcs->main->objects[pcs->main->size - 1];
+
+	if (unlikely(node != NUMA_NO_NODE)) {
+		/*
+		 * Verify that the object was from the node we want. This could
+		 * be false because of cpu migration during an unlocked part of
+		 * the current allocation or previous freeing process.
+		 */
+		if (folio_nid(virt_to_folio(object)) != node) {
+			local_unlock(&s->cpu_sheaves->lock);
+			return NULL;
+		}
+	}
+
+	pcs->main->size--;
 
 	local_unlock(&s->cpu_sheaves->lock);
 
@@ -4888,8 +4926,8 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	if (unlikely(object))
 		goto out;
 
-	if (s->cpu_sheaves && node == NUMA_NO_NODE)
-		object = alloc_from_pcs(s, gfpflags);
+	if (s->cpu_sheaves)
+		object = alloc_from_pcs(s, gfpflags, node);
 
 	if (!object)
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);

-- 
2.50.1
Re: [PATCH v5 10/14] mm, slab: allow NUMA restricted allocations to use percpu sheaves
Posted by Harry Yoo 1 month, 1 week ago
On Wed, Jul 23, 2025 at 03:34:43PM +0200, Vlastimil Babka wrote:
> Currently allocations asking for a specific node explicitly or via
> mempolicy in strict_numa node bypass percpu sheaves. Since sheaves
> contain mostly local objects, we can try allocating from them if the
> local node happens to be the requested node or allowed by the mempolicy.
> If we find the object from percpu sheaves is not from the expected node,
> we skip the sheaves - this should be rare.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---

With or without ifdeffery suggested by Suren
(or probably IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE?),

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>

>  mm/slub.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 50fc35b8fc9b3101821c338e9469c134677ded51..b98983b8d2e3e04ea256d91efcf0215ff0ae7e38 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4765,18 +4765,42 @@ __pcs_handle_empty(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, gfp_t
>  }
>  
>  static __fastpath_inline
> -void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
> +void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
>  {
>  	struct slub_percpu_sheaves *pcs;
>  	void *object;
>  
>  #ifdef CONFIG_NUMA
> -	if (static_branch_unlikely(&strict_numa)) {
> -		if (current->mempolicy)
> -			return NULL;
> +	if (static_branch_unlikely(&strict_numa) &&
> +			 node == NUMA_NO_NODE) {
> +
> +		struct mempolicy *mpol = current->mempolicy;
> +
> +		if (mpol) {
> +			/*
> +			 * Special BIND rule support. If the local node
> +			 * is in permitted set then do not redirect
> +			 * to a particular node.
> +			 * Otherwise we apply the memory policy to get
> +			 * the node we need to allocate on.
> +			 */
> +			if (mpol->mode != MPOL_BIND ||
> +					!node_isset(numa_mem_id(), mpol->nodes))
> +
> +				node = mempolicy_slab_node();
> +		}
>  	}
>  #endif
>  
> +	if (unlikely(node != NUMA_NO_NODE)) {
> +		/*
> +		 * We assume the percpu sheaves contain only local objects
> +		 * although it's not completely guaranteed, so we verify later.
> +		 */
> +		if (node != numa_mem_id())
> +			return NULL;
> +	}
> +
>  	if (!local_trylock(&s->cpu_sheaves->lock))
>  		return NULL;
>  
> @@ -4788,7 +4812,21 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
>  			return NULL;
>  	}
>  
> -	object = pcs->main->objects[--pcs->main->size];
> +	object = pcs->main->objects[pcs->main->size - 1];
> +
> +	if (unlikely(node != NUMA_NO_NODE)) {
> +		/*
> +		 * Verify that the object was from the node we want. This could
> +		 * be false because of cpu migration during an unlocked part of
> +		 * the current allocation or previous freeing process.
> +		 */
> +		if (folio_nid(virt_to_folio(object)) != node) {
> +			local_unlock(&s->cpu_sheaves->lock);
> +			return NULL;
> +		}
> +	}
> +
> +	pcs->main->size--;
>  
>  	local_unlock(&s->cpu_sheaves->lock);
>  
> @@ -4888,8 +4926,8 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>  	if (unlikely(object))
>  		goto out;
>  
> -	if (s->cpu_sheaves && node == NUMA_NO_NODE)
> -		object = alloc_from_pcs(s, gfpflags);
> +	if (s->cpu_sheaves)
> +		object = alloc_from_pcs(s, gfpflags, node);
>  
>  	if (!object)
>  		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
> 
> -- 
> 2.50.1
> 

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH v5 10/14] mm, slab: allow NUMA restricted allocations to use percpu sheaves
Posted by Vlastimil Babka 1 month, 1 week ago
On 8/25/25 08:52, Harry Yoo wrote:
> On Wed, Jul 23, 2025 at 03:34:43PM +0200, Vlastimil Babka wrote:
>> Currently allocations asking for a specific node explicitly or via
>> mempolicy in strict_numa node bypass percpu sheaves. Since sheaves
>> contain mostly local objects, we can try allocating from them if the
>> local node happens to be the requested node or allowed by the mempolicy.
>> If we find the object from percpu sheaves is not from the expected node,
>> we skip the sheaves - this should be rare.
>> 
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
> 
> With or without ifdeffery suggested by Suren
> (or probably IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE?),
> 
> Reviewed-by: Harry Yoo <harry.yoo@oracle.com>

Thanks both, I've extracted IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE)
to a local bool variable.
Re: [PATCH v5 10/14] mm, slab: allow NUMA restricted allocations to use percpu sheaves
Posted by Suren Baghdasaryan 1 month, 2 weeks ago
On Wed, Jul 23, 2025 at 6:35 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> Currently allocations asking for a specific node explicitly or via
> mempolicy in strict_numa node bypass percpu sheaves. Since sheaves
> contain mostly local objects, we can try allocating from them if the
> local node happens to be the requested node or allowed by the mempolicy.
> If we find the object from percpu sheaves is not from the expected node,
> we skip the sheaves - this should be rare.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 45 insertions(+), 7 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 50fc35b8fc9b3101821c338e9469c134677ded51..b98983b8d2e3e04ea256d91efcf0215ff0ae7e38 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4765,18 +4765,42 @@ __pcs_handle_empty(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, gfp_t
>  }
>
>  static __fastpath_inline
> -void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
> +void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
>  {
>         struct slub_percpu_sheaves *pcs;
>         void *object;
>
>  #ifdef CONFIG_NUMA
> -       if (static_branch_unlikely(&strict_numa)) {
> -               if (current->mempolicy)
> -                       return NULL;
> +       if (static_branch_unlikely(&strict_numa) &&
> +                        node == NUMA_NO_NODE) {
> +
> +               struct mempolicy *mpol = current->mempolicy;
> +
> +               if (mpol) {
> +                       /*
> +                        * Special BIND rule support. If the local node
> +                        * is in permitted set then do not redirect
> +                        * to a particular node.
> +                        * Otherwise we apply the memory policy to get
> +                        * the node we need to allocate on.
> +                        */
> +                       if (mpol->mode != MPOL_BIND ||
> +                                       !node_isset(numa_mem_id(), mpol->nodes))
> +
> +                               node = mempolicy_slab_node();
> +               }
>         }
>  #endif
>
> +       if (unlikely(node != NUMA_NO_NODE)) {

Should this and later (node != NUMA_NO_NODE) checks be still under
#ifdef CONFIG_NUMA ?

> +               /*
> +                * We assume the percpu sheaves contain only local objects
> +                * although it's not completely guaranteed, so we verify later.
> +                */
> +               if (node != numa_mem_id())
> +                       return NULL;
> +       }
> +
>         if (!local_trylock(&s->cpu_sheaves->lock))
>                 return NULL;
>
> @@ -4788,7 +4812,21 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp)
>                         return NULL;
>         }
>
> -       object = pcs->main->objects[--pcs->main->size];
> +       object = pcs->main->objects[pcs->main->size - 1];
> +
> +       if (unlikely(node != NUMA_NO_NODE)) {
> +               /*
> +                * Verify that the object was from the node we want. This could
> +                * be false because of cpu migration during an unlocked part of
> +                * the current allocation or previous freeing process.
> +                */
> +               if (folio_nid(virt_to_folio(object)) != node) {
> +                       local_unlock(&s->cpu_sheaves->lock);
> +                       return NULL;
> +               }
> +       }
> +
> +       pcs->main->size--;
>
>         local_unlock(&s->cpu_sheaves->lock);
>
> @@ -4888,8 +4926,8 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>         if (unlikely(object))
>                 goto out;
>
> -       if (s->cpu_sheaves && node == NUMA_NO_NODE)
> -               object = alloc_from_pcs(s, gfpflags);
> +       if (s->cpu_sheaves)
> +               object = alloc_from_pcs(s, gfpflags, node);
>
>         if (!object)
>                 object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
>
> --
> 2.50.1
>