[PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc

Vitaly Wool posted 4 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vitaly Wool 2 months, 4 weeks ago
Reimplement k[v]realloc_node() to be able to set node and
alignment should a user need to do so. In order to do that while
retaining the maximal backward compatibility, add
k[v]realloc_node_align() functions and redefine the rest of API
using these new ones.

While doing that, we also keep the number of  _noprof variants to a
minimum, which implies some changes to the existing users of older
_noprof functions, that basically being bcachefs.

With that change we also provide the ability for the Rust part of
the kernel to set node and alignment in its K[v]xxx
[re]allocations.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
---
 fs/bcachefs/darray.c   |  2 +-
 fs/bcachefs/util.h     |  2 +-
 include/linux/bpfptr.h |  2 +-
 include/linux/slab.h   | 38 +++++++++++++++----------
 lib/rhashtable.c       |  4 +--
 mm/slub.c              | 64 +++++++++++++++++++++++++++++-------------
 6 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c
index e86d36d23e9e..928e83a1ce42 100644
--- a/fs/bcachefs/darray.c
+++ b/fs/bcachefs/darray.c
@@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
 			return -ENOMEM;
 
 		void *data = likely(bytes < INT_MAX)
-			? kvmalloc_noprof(bytes, gfp)
+			? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE)
 			: vmalloc_noprof(bytes);
 		if (!data)
 			return -ENOMEM;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 6488f098d140..7112fd40ee21 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags)
 {
 	void *p = unlikely(n >= INT_MAX)
 		? vmalloc_noprof(n)
-		: kvmalloc_noprof(n, flags & ~__GFP_ZERO);
+		: kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE);
 	if (p && (flags & __GFP_ZERO))
 		memset(p, 0, n);
 	return p;
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 1af241525a17..f6e0795db484 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
 
 static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len)
 {
-	void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN);
+	void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE);
 
 	if (!p)
 		return ERR_PTR(-ENOMEM);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d5a8ab98035c..2877900cb9a7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -465,9 +465,13 @@ int kmem_cache_shrink(struct kmem_cache *s);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void * __must_check krealloc_noprof(const void *objp, size_t new_size,
-				    gfp_t flags) __realloc_size(2);
-#define krealloc(...)				alloc_hooks(krealloc_noprof(__VA_ARGS__))
+void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size,
+					       unsigned long align,
+					       gfp_t flags, int nid) __realloc_size(2);
+#define krealloc_noprof(_o, _s, _f)	krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE)
+#define krealloc_node_align(...)	alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__))
+#define krealloc_node(_o, _s, _f, _n)	krealloc_node_align(_o, _s, 1, _f, _n)
+#define krealloc(...)			krealloc_node(__VA_ARGS__, NUMA_NO_NODE)
 
 void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
@@ -1041,18 +1045,20 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
 #define kzalloc(...)				alloc_hooks(kzalloc_noprof(__VA_ARGS__))
 #define kzalloc_node(_size, _flags, _node)	kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
 
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
-#define kvmalloc_node_noprof(size, flags, node)	\
-	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
-#define kvmalloc_node(...)			alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
-
-#define kvmalloc(_size, _flags)			kvmalloc_node(_size, _flags, NUMA_NO_NODE)
-#define kvmalloc_noprof(_size, _flags)		kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+			     gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node_align_noprof(_size, _align, _flags, _node)	\
+	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node)
+#define kvmalloc_node_align(...)		\
+	alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__))
+#define kvmalloc_node(_s, _f, _n)		kvmalloc_node_align(_s, 1, _f, _n)
+#define kvmalloc(...)				kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE)
 #define kvzalloc(_size, _flags)			kvmalloc(_size, (_flags)|__GFP_ZERO)
 
 #define kvzalloc_node(_size, _flags, _node)	kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
+
 #define kmem_buckets_valloc(_b, _size, _flags)	\
-	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE))
 
 static inline __alloc_size(1, 2) void *
 kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
@@ -1062,7 +1068,7 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
 	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 
-	return kvmalloc_node_noprof(bytes, flags, node);
+	return kvmalloc_node_align_noprof(bytes, 1, flags, node);
 }
 
 #define kvmalloc_array_noprof(...)		kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE)
@@ -1073,9 +1079,11 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
 #define kvcalloc_node(...)			alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__))
 #define kvcalloc(...)				alloc_hooks(kvcalloc_noprof(__VA_ARGS__))
 
-void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
-		__realloc_size(2);
-#define kvrealloc(...)				alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
+void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+				  gfp_t flags, int nid) __realloc_size(2);
+#define kvrealloc_node_align(...)		alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__))
+#define kvrealloc_node(_p, _s, _f, _n)		kvrealloc_node_align(_p, _s, 1, _f, _n)
+#define kvrealloc(...)				kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE)
 
 extern void kvfree(const void *addr);
 DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 3e555d012ed6..fde0f0e556f8 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -184,8 +184,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	static struct lock_class_key __key;
 
 	tbl = alloc_hooks_tag(ht->alloc_tag,
-			kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
-					     gfp|__GFP_ZERO, NUMA_NO_NODE));
+			kvmalloc_node_align_noprof(struct_size(tbl, buckets, nbuckets),
+					     1, gfp|__GFP_ZERO, NUMA_NO_NODE));
 
 	size = nbuckets;
 
diff --git a/mm/slub.c b/mm/slub.c
index c4b64821e680..6fad4cdea6c4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4845,7 +4845,7 @@ void kfree(const void *object)
 EXPORT_SYMBOL(kfree);
 
 static __always_inline __realloc_size(2) void *
-__do_krealloc(const void *p, size_t new_size, gfp_t flags)
+__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
 {
 	void *ret;
 	size_t ks = 0;
@@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
 	if (!kasan_check_byte(p))
 		return NULL;
 
+	/* refuse to proceed if alignment is bigger than what kmalloc() provides */
+	if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
+		return NULL;
+
+	/*
+	 * If reallocation is not necessary (e. g. the new size is less
+	 * than the current allocated size), the current allocation will be
+	 * preserved unless __GFP_THISNODE is set. In the latter case a new
+	 * allocation on the requested node will be attempted.
+	 */
+	if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE &&
+		     nid != page_to_nid(virt_to_page(p)))
+		goto alloc_new;
+
 	if (is_kfence_address(p)) {
 		ks = orig_size = kfence_ksize(p);
 	} else {
@@ -4903,7 +4917,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
 	return (void *)p;
 
 alloc_new:
-	ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+	ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_);
 	if (ret && p) {
 		/* Disable KASAN checks as the object's redzone is accessed. */
 		kasan_disable_current();
@@ -4915,10 +4929,12 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
 }
 
 /**
- * krealloc - reallocate memory. The contents will remain unchanged.
+ * krealloc_node_align - reallocate memory. The contents will remain unchanged.
  * @p: object to reallocate memory for.
  * @new_size: how many bytes of memory are required.
+ * @align: desired alignment.
  * @flags: the type of memory to allocate.
+ * @nid: NUMA node or NUMA_NO_NODE
  *
  * If @p is %NULL, krealloc() behaves exactly like kmalloc().  If @new_size
  * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
@@ -4947,7 +4963,8 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
  *
  * Return: pointer to the allocated memory or %NULL in case of error
  */
-void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
+void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align,
+				 gfp_t flags, int nid)
 {
 	void *ret;
 
@@ -4956,13 +4973,13 @@ void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
 		return ZERO_SIZE_PTR;
 	}
 
-	ret = __do_krealloc(p, new_size, flags);
+	ret = __do_krealloc(p, new_size, align, flags, nid);
 	if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
 		kfree(p);
 
 	return ret;
 }
-EXPORT_SYMBOL(krealloc_noprof);
+EXPORT_SYMBOL(krealloc_node_align_noprof);
 
 static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
 {
@@ -4993,6 +5010,7 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
  * failure, fall back to non-contiguous (vmalloc) allocation.
  * @size: size of the request.
  * @b: which set of kmalloc buckets to allocate from.
+ * @align: desired alignment.
  * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
  * @node: numa node to allocate from
  *
@@ -5005,19 +5023,22 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
  *
  * Return: pointer to the allocated memory of %NULL in case of failure
  */
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+			     gfp_t flags, int node)
 {
 	void *ret;
 
 	/*
 	 * It doesn't really make sense to fallback to vmalloc for sub page
-	 * requests
+	 * requests and small alignments
 	 */
-	ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
-				kmalloc_gfp_adjust(flags, size),
-				node, _RET_IP_);
-	if (ret || size <= PAGE_SIZE)
-		return ret;
+	if (size >= align) {
+		ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
+					kmalloc_gfp_adjust(flags, size),
+					node, _RET_IP_);
+		if (ret || size <= PAGE_SIZE)
+			return ret;
+	}
 
 	/* non-sleeping allocations are not supported by vmalloc */
 	if (!gfpflags_allow_blocking(flags))
@@ -5035,7 +5056,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
 	 * about the resulting pointer, and cannot play
 	 * protection games.
 	 */
-	return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END,
 			flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
 			node, __builtin_return_address(0));
 }
@@ -5079,10 +5100,12 @@ void kvfree_sensitive(const void *addr, size_t len)
 EXPORT_SYMBOL(kvfree_sensitive);
 
 /**
- * kvrealloc - reallocate memory; contents remain unchanged
+ * kvrealloc_node_align - reallocate memory; contents remain unchanged
  * @p: object to reallocate memory for
  * @size: the size to reallocate
+ * @align: desired alignment
  * @flags: the flags for the page level allocator
+ * @nid: NUMA node id
  *
  * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
  * and @p is not a %NULL pointer, the object pointed to is freed.
@@ -5100,17 +5123,18 @@ EXPORT_SYMBOL(kvfree_sensitive);
  *
  * Return: pointer to the allocated memory or %NULL in case of error
  */
-void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
+void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+				  gfp_t flags, int nid)
 {
 	void *n;
 
 	if (is_vmalloc_addr(p))
-		return vrealloc_noprof(p, size, flags);
+		return vrealloc_node_align_noprof(p, size, align, flags, nid);
 
-	n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size));
+	n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid);
 	if (!n) {
 		/* We failed to krealloc(), fall back to kvmalloc(). */
-		n = kvmalloc_noprof(size, flags);
+		n = kvmalloc_node_align_noprof(size, align, flags, nid);
 		if (!n)
 			return NULL;
 
@@ -5126,7 +5150,7 @@ void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
 
 	return n;
 }
-EXPORT_SYMBOL(kvrealloc_noprof);
+EXPORT_SYMBOL(kvrealloc_node_align_noprof);
 
 struct detached_freelist {
 	struct slab *slab;
-- 
2.39.2
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Harry Yoo 2 months, 4 weeks ago
On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
> Reimplement k[v]realloc_node() to be able to set node and
> alignment should a user need to do so. In order to do that while
> retaining the maximal backward compatibility, add
> k[v]realloc_node_align() functions and redefine the rest of API
> using these new ones.
> 
> While doing that, we also keep the number of  _noprof variants to a
> minimum, which implies some changes to the existing users of older
> _noprof functions, that basically being bcachefs.
> 
> With that change we also provide the ability for the Rust part of
> the kernel to set node and alignment in its K[v]xxx
> [re]allocations.
> 
> Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
> ---
>  fs/bcachefs/darray.c   |  2 +-
>  fs/bcachefs/util.h     |  2 +-
>  include/linux/bpfptr.h |  2 +-
>  include/linux/slab.h   | 38 +++++++++++++++----------
>  lib/rhashtable.c       |  4 +--
>  mm/slub.c              | 64 +++++++++++++++++++++++++++++-------------
>  6 files changed, 72 insertions(+), 40 deletions(-)
 
> diff --git a/mm/slub.c b/mm/slub.c
> index c4b64821e680..6fad4cdea6c4 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4845,7 +4845,7 @@ void kfree(const void *object)
>  EXPORT_SYMBOL(kfree);
>  
>  static __always_inline __realloc_size(2) void *
> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
>  {
>  	void *ret;
>  	size_t ks = 0;
> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
>  	if (!kasan_check_byte(p))
>  		return NULL;
>  
> +	/* refuse to proceed if alignment is bigger than what kmalloc() provides */
> +	if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
> +		return NULL;

Hmm but what happens if `p` is aligned to `align`, but the new object is not?

For example, what will happen if we  allocate object with size=64, align=64
and then do krealloc with size=96, align=64...

Or am I missing something?

> +	/*
> +	 * If reallocation is not necessary (e. g. the new size is less
> +	 * than the current allocated size), the current allocation will be
> +	 * preserved unless __GFP_THISNODE is set. In the latter case a new
> +	 * allocation on the requested node will be attempted.
> +	 */
> +	if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE &&
> +		     nid != page_to_nid(virt_to_page(p)))
> +		goto alloc_new;
> +
>  	if (is_kfence_address(p)) {
>  		ks = orig_size = kfence_ksize(p);
>  	} else {

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vlastimil Babka 2 months, 3 weeks ago
On 7/11/25 10:58, Harry Yoo wrote:
> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
>> Reimplement k[v]realloc_node() to be able to set node and
>> alignment should a user need to do so. In order to do that while
>> retaining the maximal backward compatibility, add
>> k[v]realloc_node_align() functions and redefine the rest of API
>> using these new ones.
>> 
>> While doing that, we also keep the number of  _noprof variants to a
>> minimum, which implies some changes to the existing users of older
>> _noprof functions, that basically being bcachefs.
>> 
>> With that change we also provide the ability for the Rust part of
>> the kernel to set node and alignment in its K[v]xxx
>> [re]allocations.
>> 
>> Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
>> ---
>>  fs/bcachefs/darray.c   |  2 +-
>>  fs/bcachefs/util.h     |  2 +-
>>  include/linux/bpfptr.h |  2 +-
>>  include/linux/slab.h   | 38 +++++++++++++++----------
>>  lib/rhashtable.c       |  4 +--
>>  mm/slub.c              | 64 +++++++++++++++++++++++++++++-------------
>>  6 files changed, 72 insertions(+), 40 deletions(-)
>  
>> diff --git a/mm/slub.c b/mm/slub.c
>> index c4b64821e680..6fad4cdea6c4 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -4845,7 +4845,7 @@ void kfree(const void *object)
>>  EXPORT_SYMBOL(kfree);
>>  
>>  static __always_inline __realloc_size(2) void *
>> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
>> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
>>  {
>>  	void *ret;
>>  	size_t ks = 0;
>> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>  	if (!kasan_check_byte(p))
>>  		return NULL;
>>  
>> +	/* refuse to proceed if alignment is bigger than what kmalloc() provides */
>> +	if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
>> +		return NULL;
> 
> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
> 
> For example, what will happen if we  allocate object with size=64, align=64
> and then do krealloc with size=96, align=64...
> 
> Or am I missing something?

Good point. We extended the alignment guarantees in commit ad59baa31695
("slab, rust: extend kmalloc() alignment guarantees to remove Rust padding")
for rust in a way that size 96 gives you alignment of 32. It assumes that
rust side will ask for alignments that are power-of-two and sizes that are
multiples of alignment. I think if that assumption is still honored than
this will keep working, but the check added above (is it just a sanity check
or something the rust side relies on?) doesn't seem correct?

>> +	/*
>> +	 * If reallocation is not necessary (e. g. the new size is less
>> +	 * than the current allocated size), the current allocation will be
>> +	 * preserved unless __GFP_THISNODE is set. In the latter case a new
>> +	 * allocation on the requested node will be attempted.
>> +	 */
>> +	if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE &&
>> +		     nid != page_to_nid(virt_to_page(p)))
>> +		goto alloc_new;
>> +
>>  	if (is_kfence_address(p)) {
>>  		ks = orig_size = kfence_ksize(p);
>>  	} else {
>
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vitaly Wool 2 months, 3 weeks ago

> On Jul 11, 2025, at 5:43 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
> 
> On 7/11/25 10:58, Harry Yoo wrote:
>> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
>>> Reimplement k[v]realloc_node() to be able to set node and
>>> alignment should a user need to do so. In order to do that while
>>> retaining the maximal backward compatibility, add
>>> k[v]realloc_node_align() functions and redefine the rest of API
>>> using these new ones.
>>> 
>>> While doing that, we also keep the number of  _noprof variants to a
>>> minimum, which implies some changes to the existing users of older
>>> _noprof functions, that basically being bcachefs.
>>> 
>>> With that change we also provide the ability for the Rust part of
>>> the kernel to set node and alignment in its K[v]xxx
>>> [re]allocations.
>>> 
>>> Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
>>> ---
>>> fs/bcachefs/darray.c   |  2 +-
>>> fs/bcachefs/util.h     |  2 +-
>>> include/linux/bpfptr.h |  2 +-
>>> include/linux/slab.h   | 38 +++++++++++++++----------
>>> lib/rhashtable.c       |  4 +--
>>> mm/slub.c              | 64 +++++++++++++++++++++++++++++-------------
>>> 6 files changed, 72 insertions(+), 40 deletions(-)
>> 
>>> diff --git a/mm/slub.c b/mm/slub.c
>>> index c4b64821e680..6fad4cdea6c4 100644
>>> --- a/mm/slub.c
>>> +++ b/mm/slub.c
>>> @@ -4845,7 +4845,7 @@ void kfree(const void *object)
>>> EXPORT_SYMBOL(kfree);
>>> 
>>> static __always_inline __realloc_size(2) void *
>>> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
>>> {
>>> void *ret;
>>> size_t ks = 0;
>>> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>> if (!kasan_check_byte(p))
>>> return NULL;
>>> 
>>> + /* refuse to proceed if alignment is bigger than what kmalloc() provides */
>>> + if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
>>> + return NULL;
>> 
>> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
>> 
>> For example, what will happen if we  allocate object with size=64, align=64
>> and then do krealloc with size=96, align=64...
>> 
>> Or am I missing something?
> 
> Good point. We extended the alignment guarantees in commit ad59baa31695
> ("slab, rust: extend kmalloc() alignment guarantees to remove Rust padding")
> for rust in a way that size 96 gives you alignment of 32. It assumes that
> rust side will ask for alignments that are power-of-two and sizes that are
> multiples of alignment. I think if that assumption is still honored than
> this will keep working, but the check added above (is it just a sanity check
> or something the rust side relies on?) doesn't seem correct?
> 

It is a sanity check and it should have looked like this:

        if (!IS_ALIGNED((unsigned long)p, align) && new_size <= ks)
                return NULL;

and the reasoning for this is the following: if we don’t intend to reallocate (new size is not bigger than the original size), but the user requests a larger alignment, it’s a miss. Does that sound reasonable?

~Vitaly
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vlastimil Babka 2 months, 3 weeks ago
On 7/12/25 14:43, Vitaly Wool wrote:
> 
> 
>> On Jul 11, 2025, at 5:43 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
>> 
>> On 7/11/25 10:58, Harry Yoo wrote:
>>> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
>>>> static __always_inline __realloc_size(2) void *
>>>> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>>> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
>>>> {
>>>> void *ret;
>>>> size_t ks = 0;
>>>> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>>> if (!kasan_check_byte(p))
>>>> return NULL;
>>>> 
>>>> + /* refuse to proceed if alignment is bigger than what kmalloc() provides */
>>>> + if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
>>>> + return NULL;
>>> 
>>> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
>>> 
>>> For example, what will happen if we  allocate object with size=64, align=64
>>> and then do krealloc with size=96, align=64...
>>> 
>>> Or am I missing something?
>> 
>> Good point. We extended the alignment guarantees in commit ad59baa31695
>> ("slab, rust: extend kmalloc() alignment guarantees to remove Rust padding")
>> for rust in a way that size 96 gives you alignment of 32. It assumes that
>> rust side will ask for alignments that are power-of-two and sizes that are
>> multiples of alignment. I think if that assumption is still honored than
>> this will keep working, but the check added above (is it just a sanity check
>> or something the rust side relies on?) doesn't seem correct?
>> 
> 
> It is a sanity check and it should have looked like this:
> 
>         if (!IS_ALIGNED((unsigned long)p, align) && new_size <= ks)
>                 return NULL;
> 
> and the reasoning for this is the following: if we don’t intend to reallocate (new size is not bigger than the original size), but the user requests a larger alignment, it’s a miss. Does that sound reasonable?

So taking a step back indeed the align passed to krealloc is indeed used
only for this check. If it's really just a sanity check, then I'd rather not
add this parameter to krealloc functions at all - kmalloc() itself also
doesn't have it, so it's inconsistent that krealloc() would have it - but
only to return NULL and not e.g. try to reallocate for alignment.

If it's not just a sanity check, it means it's expected that for some
sequence of valid kvrealloc_node_align() calls it can return NULL and then
rely on the fallback to vmalloc. That would be rather wasteful for the cases
like going from 64 to 96 bytes etc. So in that case it would be better if
krealloc did the reallocation, same as in cases when size increases. Of
course it would still have to rely on the documented alignment guarantees
only and not provide anything arbitrary. aligned_size() in
rust/kernel/alloc/allocator.rs is responsible for that, AFAIK.

And I think it's not a sanity check but the latter - if the following is a
valid k(v)realloc sequence (from Rust POV). The individual size+align
combinations AFAIK are, but if it's valid to make them follow one another
them like this, I don't know.

krealloc(size=96, align=32) -> can give object with 32 alignment only
krealloc(size=64, align=64) -> doesn't increase size but wants alignment 64

> ~Vitaly
> 

Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Alice Ryhl 2 months, 3 weeks ago
On Mon, Jul 14, 2025 at 10:14 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 7/12/25 14:43, Vitaly Wool wrote:
> >
> >
> >> On Jul 11, 2025, at 5:43 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
> >>
> >> On 7/11/25 10:58, Harry Yoo wrote:
> >>> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
> >>>> static __always_inline __realloc_size(2) void *
> >>>> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
> >>>> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
> >>>> {
> >>>> void *ret;
> >>>> size_t ks = 0;
> >>>> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
> >>>> if (!kasan_check_byte(p))
> >>>> return NULL;
> >>>>
> >>>> + /* refuse to proceed if alignment is bigger than what kmalloc() provides */
> >>>> + if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
> >>>> + return NULL;
> >>>
> >>> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
> >>>
> >>> For example, what will happen if we  allocate object with size=64, align=64
> >>> and then do krealloc with size=96, align=64...
> >>>
> >>> Or am I missing something?
> >>
> >> Good point. We extended the alignment guarantees in commit ad59baa31695
> >> ("slab, rust: extend kmalloc() alignment guarantees to remove Rust padding")
> >> for rust in a way that size 96 gives you alignment of 32. It assumes that
> >> rust side will ask for alignments that are power-of-two and sizes that are
> >> multiples of alignment. I think if that assumption is still honored than
> >> this will keep working, but the check added above (is it just a sanity check
> >> or something the rust side relies on?) doesn't seem correct?
> >>
> >
> > It is a sanity check and it should have looked like this:
> >
> >         if (!IS_ALIGNED((unsigned long)p, align) && new_size <= ks)
> >                 return NULL;
> >
> > and the reasoning for this is the following: if we don’t intend to reallocate (new size is not bigger than the original size), but the user requests a larger alignment, it’s a miss. Does that sound reasonable?
>
> So taking a step back indeed the align passed to krealloc is indeed used
> only for this check. If it's really just a sanity check, then I'd rather not
> add this parameter to krealloc functions at all - kmalloc() itself also
> doesn't have it, so it's inconsistent that krealloc() would have it - but
> only to return NULL and not e.g. try to reallocate for alignment.
>
> If it's not just a sanity check, it means it's expected that for some
> sequence of valid kvrealloc_node_align() calls it can return NULL and then
> rely on the fallback to vmalloc. That would be rather wasteful for the cases
> like going from 64 to 96 bytes etc. So in that case it would be better if
> krealloc did the reallocation, same as in cases when size increases. Of
> course it would still have to rely on the documented alignment guarantees
> only and not provide anything arbitrary. aligned_size() in
> rust/kernel/alloc/allocator.rs is responsible for that, AFAIK.
>
> And I think it's not a sanity check but the latter - if the following is a
> valid k(v)realloc sequence (from Rust POV). The individual size+align
> combinations AFAIK are, but if it's valid to make them follow one another
> them like this, I don't know.
>
> krealloc(size=96, align=32) -> can give object with 32 alignment only
> krealloc(size=64, align=64) -> doesn't increase size but wants alignment 64

On the Rust side, you specify what the old size/align was, and what
you which size/align you want after the call, and they can be anything
including that combination.

Alice
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vitaly Wool 2 months, 3 weeks ago

On 7/14/25 10:14, Vlastimil Babka wrote:
> On 7/12/25 14:43, Vitaly Wool wrote:
>>
>>
>>> On Jul 11, 2025, at 5:43 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
>>>
>>> On 7/11/25 10:58, Harry Yoo wrote:
>>>> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
>>>>> static __always_inline __realloc_size(2) void *
>>>>> -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>>>> +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
>>>>> {
>>>>> void *ret;
>>>>> size_t ks = 0;
>>>>> @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
>>>>> if (!kasan_check_byte(p))
>>>>> return NULL;
>>>>>
>>>>> + /* refuse to proceed if alignment is bigger than what kmalloc() provides */
>>>>> + if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
>>>>> + return NULL;
>>>>
>>>> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
>>>>
>>>> For example, what will happen if we  allocate object with size=64, align=64
>>>> and then do krealloc with size=96, align=64...
>>>>
>>>> Or am I missing something?
>>>
>>> Good point. We extended the alignment guarantees in commit ad59baa31695
>>> ("slab, rust: extend kmalloc() alignment guarantees to remove Rust padding")
>>> for rust in a way that size 96 gives you alignment of 32. It assumes that
>>> rust side will ask for alignments that are power-of-two and sizes that are
>>> multiples of alignment. I think if that assumption is still honored than
>>> this will keep working, but the check added above (is it just a sanity check
>>> or something the rust side relies on?) doesn't seem correct?
>>>
>>
>> It is a sanity check and it should have looked like this:
>>
>>          if (!IS_ALIGNED((unsigned long)p, align) && new_size <= ks)
>>                  return NULL;
>>
>> and the reasoning for this is the following: if we don’t intend to reallocate (new size is not bigger than the original size), but the user requests a larger alignment, it’s a miss. Does that sound reasonable?
> 
> So taking a step back indeed the align passed to krealloc is indeed used
> only for this check. If it's really just a sanity check, then I'd rather not
> add this parameter to krealloc functions at all - kmalloc() itself also
> doesn't have it, so it's inconsistent that krealloc() would have it - but
> only to return NULL and not e.g. try to reallocate for alignment.
> 
> If it's not just a sanity check, it means it's expected that for some
> sequence of valid kvrealloc_node_align() calls it can return NULL and then
> rely on the fallback to vmalloc. That would be rather wasteful for the cases
> like going from 64 to 96 bytes etc. So in that case it would be better if
> krealloc did the reallocation, same as in cases when size increases. Of
> course it would still have to rely on the documented alignment guarantees
> only and not provide anything arbitrary. aligned_size() in
> rust/kernel/alloc/allocator.rs is responsible for that, AFAIK.
> 
> And I think it's not a sanity check but the latter - if the following is a
> valid k(v)realloc sequence (from Rust POV). The individual size+align
> combinations AFAIK are, but if it's valid to make them follow one another
> them like this, I don't know.
> 
> krealloc(size=96, align=32) -> can give object with 32 alignment only
> krealloc(size=64, align=64) -> doesn't increase size but wants alignment 64
> 

We should be able to correctly process these. I agree that making such 
cases fall back to vrealloc is suboptimal but it's a technically correct 
behavior. I understand that you would rather have a reallocation on the 
slub side in these cases, so this will look as

           if (!IS_ALIGNED((unsigned long)p, align) && new_size <= ks)
                   goto alloc_new;

I'll modify/retest for the next patchset iteration.

~Vitaly
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Kent Overstreet 2 months, 4 weeks ago
On Fri, Jul 11, 2025 at 05:58:23PM +0900, Harry Yoo wrote:
> On Wed, Jul 09, 2025 at 07:24:41PM +0200, Vitaly Wool wrote:
> > Reimplement k[v]realloc_node() to be able to set node and
> > alignment should a user need to do so. In order to do that while
> > retaining the maximal backward compatibility, add
> > k[v]realloc_node_align() functions and redefine the rest of API
> > using these new ones.
> > 
> > While doing that, we also keep the number of  _noprof variants to a
> > minimum, which implies some changes to the existing users of older
> > _noprof functions, that basically being bcachefs.
> > 
> > With that change we also provide the ability for the Rust part of
> > the kernel to set node and alignment in its K[v]xxx
> > [re]allocations.
> > 
> > Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
> > ---
> >  fs/bcachefs/darray.c   |  2 +-
> >  fs/bcachefs/util.h     |  2 +-
> >  include/linux/bpfptr.h |  2 +-
> >  include/linux/slab.h   | 38 +++++++++++++++----------
> >  lib/rhashtable.c       |  4 +--
> >  mm/slub.c              | 64 +++++++++++++++++++++++++++++-------------
> >  6 files changed, 72 insertions(+), 40 deletions(-)
>  
> > diff --git a/mm/slub.c b/mm/slub.c
> > index c4b64821e680..6fad4cdea6c4 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -4845,7 +4845,7 @@ void kfree(const void *object)
> >  EXPORT_SYMBOL(kfree);
> >  
> >  static __always_inline __realloc_size(2) void *
> > -__do_krealloc(const void *p, size_t new_size, gfp_t flags)
> > +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
> >  {
> >  	void *ret;
> >  	size_t ks = 0;
> > @@ -4859,6 +4859,20 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
> >  	if (!kasan_check_byte(p))
> >  		return NULL;
> >  
> > +	/* refuse to proceed if alignment is bigger than what kmalloc() provides */
> > +	if (!IS_ALIGNED((unsigned long)p, align) || new_size < align)
> > +		return NULL;
> 
> Hmm but what happens if `p` is aligned to `align`, but the new object is not?
> 
> For example, what will happen if we  allocate object with size=64, align=64
> and then do krealloc with size=96, align=64...
> 
> Or am I missing something?

You generally need size to be a multiple of align...
Re: [PATCH v12 2/4] mm/slub: allow to set node and align in k[v]realloc
Posted by Vlastimil Babka 2 months, 4 weeks ago
On 7/9/25 19:24, Vitaly Wool wrote:
> Reimplement k[v]realloc_node() to be able to set node and
> alignment should a user need to do so. In order to do that while
> retaining the maximal backward compatibility, add
> k[v]realloc_node_align() functions and redefine the rest of API
> using these new ones.
> 
> While doing that, we also keep the number of  _noprof variants to a
> minimum, which implies some changes to the existing users of older
> _noprof functions, that basically being bcachefs.
> 
> With that change we also provide the ability for the Rust part of
> the kernel to set node and alignment in its K[v]xxx
> [re]allocations.
> 
> Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>