Reimplement k[v]realloc_node() to be able to set node and
alignment should a user need to do so. In order to do that while
retaining the maximal backward compatibility, add
k[v]realloc_node_align() functions and redefine the rest of API
using these new ones.
While doing that, we also keep the number of _noprof variants to a
minimum, which implies some changes to the existing users of older
_noprof functions, that basically being bcachefs.
With that change we also provide the ability for the Rust part of
the kernel to set node and alignment in its K[v]xxx
[re]allocations.
Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se>
---
fs/bcachefs/darray.c | 2 +-
fs/bcachefs/util.h | 2 +-
include/linux/bpfptr.h | 2 +-
include/linux/slab.h | 39 +++++++++++++++----------
lib/rhashtable.c | 4 +--
mm/slub.c | 64 +++++++++++++++++++++++++++++-------------
6 files changed, 73 insertions(+), 40 deletions(-)
diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c
index e86d36d23e9e..928e83a1ce42 100644
--- a/fs/bcachefs/darray.c
+++ b/fs/bcachefs/darray.c
@@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
return -ENOMEM;
void *data = likely(bytes < INT_MAX)
- ? kvmalloc_noprof(bytes, gfp)
+ ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE)
: vmalloc_noprof(bytes);
if (!data)
return -ENOMEM;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 0a4b1d433621..2d6d4b547db8 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags)
{
void *p = unlikely(n >= INT_MAX)
? vmalloc_noprof(n)
- : kvmalloc_noprof(n, flags & ~__GFP_ZERO);
+ : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE);
if (p && (flags & __GFP_ZERO))
memset(p, 0, n);
return p;
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 1af241525a17..f6e0795db484 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len)
{
- void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN);
+ void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE);
if (!p)
return ERR_PTR(-ENOMEM);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d5a8ab98035c..6dc300bac2a1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -465,9 +465,13 @@ int kmem_cache_shrink(struct kmem_cache *s);
/*
* Common kmalloc functions provided by all allocators
*/
-void * __must_check krealloc_noprof(const void *objp, size_t new_size,
- gfp_t flags) __realloc_size(2);
-#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
+void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size,
+ unsigned long align,
+ gfp_t flags, int nid) __realloc_size(2);
+#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE)
+#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__))
+#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n)
+#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE)
void kfree(const void *objp);
void kfree_sensitive(const void *objp);
@@ -1041,18 +1045,20 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
-#define kvmalloc_node_noprof(size, flags, node) \
- __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
-#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
-
-#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE)
-#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+ gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \
+ __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node)
+#define kvmalloc_node_align(...) \
+ alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__))
+#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n)
+#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE)
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
+
#define kmem_buckets_valloc(_b, _size, _flags) \
- alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+ alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE))
static inline __alloc_size(1, 2) void *
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
@@ -1062,7 +1068,7 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- return kvmalloc_node_noprof(bytes, flags, node);
+ return kvmalloc_node_align_noprof(bytes, 1, flags, node);
}
#define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE)
@@ -1073,9 +1079,12 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
#define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__))
#define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__))
-void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
- __realloc_size(2);
-#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
+void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+ gfp_t flags, int nid) __realloc_size(2);
+#define kvrealloc_node_align(...) \
+ alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__))
+#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n)
+#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE)
extern void kvfree(const void *addr);
DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 3e555d012ed6..fde0f0e556f8 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -184,8 +184,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
static struct lock_class_key __key;
tbl = alloc_hooks_tag(ht->alloc_tag,
- kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
- gfp|__GFP_ZERO, NUMA_NO_NODE));
+ kvmalloc_node_align_noprof(struct_size(tbl, buckets, nbuckets),
+ 1, gfp|__GFP_ZERO, NUMA_NO_NODE));
size = nbuckets;
diff --git a/mm/slub.c b/mm/slub.c
index 31e11ef256f9..80f7b609a18d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4844,7 +4844,7 @@ void kfree(const void *object)
EXPORT_SYMBOL(kfree);
static __always_inline __realloc_size(2) void *
-__do_krealloc(const void *p, size_t new_size, gfp_t flags)
+__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
{
void *ret;
size_t ks = 0;
@@ -4858,6 +4858,16 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
if (!kasan_check_byte(p))
return NULL;
+ /*
+ * If reallocation is not necessary (e. g. the new size is less
+ * than the current allocated size), the current allocation will be
+ * preserved unless __GFP_THISNODE is set. In the latter case a new
+ * allocation on the requested node will be attempted.
+ */
+ if (unlikely(flags & __GFP_THISNODE) && nid != NUMA_NO_NODE &&
+ nid != page_to_nid(virt_to_page(p)))
+ goto alloc_new;
+
if (is_kfence_address(p)) {
ks = orig_size = kfence_ksize(p);
} else {
@@ -4880,6 +4890,10 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
if (new_size > ks)
goto alloc_new;
+ /* If the old object doesn't satisfy the new alignment, allocate a new one */
+ if (!IS_ALIGNED((unsigned long)p, align))
+ goto alloc_new;
+
/* Zero out spare memory. */
if (want_init_on_alloc(flags)) {
kasan_disable_current();
@@ -4902,7 +4916,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
return (void *)p;
alloc_new:
- ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+ ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_);
if (ret && p) {
/* Disable KASAN checks as the object's redzone is accessed. */
kasan_disable_current();
@@ -4914,10 +4928,12 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
}
/**
- * krealloc - reallocate memory. The contents will remain unchanged.
+ * krealloc_node_align - reallocate memory. The contents will remain unchanged.
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
+ * @align: desired alignment.
* @flags: the type of memory to allocate.
+ * @nid: NUMA node or NUMA_NO_NODE
*
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
@@ -4946,7 +4962,8 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
*
* Return: pointer to the allocated memory or %NULL in case of error
*/
-void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
+void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align,
+ gfp_t flags, int nid)
{
void *ret;
@@ -4955,13 +4972,13 @@ void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
return ZERO_SIZE_PTR;
}
- ret = __do_krealloc(p, new_size, flags);
+ ret = __do_krealloc(p, new_size, align, flags, nid);
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
kfree(p);
return ret;
}
-EXPORT_SYMBOL(krealloc_noprof);
+EXPORT_SYMBOL(krealloc_node_align_noprof);
static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
{
@@ -4992,6 +5009,7 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
* @b: which set of kmalloc buckets to allocate from.
+ * @align: desired alignment.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
@@ -5004,19 +5022,22 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+ gfp_t flags, int node)
{
void *ret;
/*
* It doesn't really make sense to fallback to vmalloc for sub page
- * requests
+ * requests and small alignments
*/
- ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
- kmalloc_gfp_adjust(flags, size),
- node, _RET_IP_);
- if (ret || size <= PAGE_SIZE)
- return ret;
+ if (size >= align) {
+ ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
+ kmalloc_gfp_adjust(flags, size),
+ node, _RET_IP_);
+ if (ret || size <= PAGE_SIZE)
+ return ret;
+ }
/* non-sleeping allocations are not supported by vmalloc */
if (!gfpflags_allow_blocking(flags))
@@ -5034,7 +5055,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
* about the resulting pointer, and cannot play
* protection games.
*/
- return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END,
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
@@ -5078,10 +5099,12 @@ void kvfree_sensitive(const void *addr, size_t len)
EXPORT_SYMBOL(kvfree_sensitive);
/**
- * kvrealloc - reallocate memory; contents remain unchanged
+ * kvrealloc_node_align - reallocate memory; contents remain unchanged
* @p: object to reallocate memory for
* @size: the size to reallocate
+ * @align: desired alignment
* @flags: the flags for the page level allocator
+ * @nid: NUMA node id
*
* If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
* and @p is not a %NULL pointer, the object pointed to is freed.
@@ -5099,17 +5122,18 @@ EXPORT_SYMBOL(kvfree_sensitive);
*
* Return: pointer to the allocated memory or %NULL in case of error
*/
-void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
+void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+ gfp_t flags, int nid)
{
void *n;
if (is_vmalloc_addr(p))
- return vrealloc_noprof(p, size, flags);
+ return vrealloc_node_align_noprof(p, size, align, flags, nid);
- n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size));
+ n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid);
if (!n) {
/* We failed to krealloc(), fall back to kvmalloc(). */
- n = kvmalloc_noprof(size, flags);
+ n = kvmalloc_node_align_noprof(size, align, flags, nid);
if (!n)
return NULL;
@@ -5125,7 +5149,7 @@ void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
return n;
}
-EXPORT_SYMBOL(kvrealloc_noprof);
+EXPORT_SYMBOL(kvrealloc_node_align_noprof);
struct detached_freelist {
struct slab *slab;
--
2.39.2
On 7/15/25 15:58, Vitaly Wool wrote: > Reimplement k[v]realloc_node() to be able to set node and > alignment should a user need to do so. In order to do that while > retaining the maximal backward compatibility, add > k[v]realloc_node_align() functions and redefine the rest of API > using these new ones. > > While doing that, we also keep the number of _noprof variants to a > minimum, which implies some changes to the existing users of older > _noprof functions, that basically being bcachefs. > > With that change we also provide the ability for the Rust part of > the kernel to set node and alignment in its K[v]xxx > [re]allocations. > > Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se> > > /** > - * krealloc - reallocate memory. The contents will remain unchanged. > + * krealloc_node_align - reallocate memory. The contents will remain unchanged. > * @p: object to reallocate memory for. > * @new_size: how many bytes of memory are required. > + * @align: desired alignment. It should be better noted that only alignments up to those guaranteed by kmalloc() (with a link to its doc where it's described) are expected and not arbitrary ones. So we don't give the wrong impression here. > * @flags: the type of memory to allocate. > + * @nid: NUMA node or NUMA_NO_NODE > * > * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size > * is 0 and @p is not a %NULL pointer, the object pointed to is freed. > @@ -4946,7 +4962,8 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) > * > * Return: pointer to the allocated memory or %NULL in case of error > */ > -void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags) > +void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align, > + gfp_t flags, int nid) > { > void *ret; > > @@ -4955,13 +4972,13 @@ void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags) > return ZERO_SIZE_PTR; > } > > - ret = __do_krealloc(p, new_size, flags); > + ret = __do_krealloc(p, new_size, align, flags, nid); > if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret)) > kfree(p); > > return ret; > } > -EXPORT_SYMBOL(krealloc_noprof); > +EXPORT_SYMBOL(krealloc_node_align_noprof); > > static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) > { > @@ -4992,6 +5009,7 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) > * failure, fall back to non-contiguous (vmalloc) allocation. > * @size: size of the request. > * @b: which set of kmalloc buckets to allocate from. > + * @align: desired alignment. I guess here we should say something similar to what I suggested to krealloc(). The "size >= align" check below doesn't mean the alignment can be arbitrary. It would be incompatible with how kvrealloc_node_align_noprof() works anyway. > * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. > * @node: numa node to allocate from > * > @@ -5004,19 +5022,22 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) > * > * Return: pointer to the allocated memory of %NULL in case of failure > */ > -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) > +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, > + gfp_t flags, int node) > { > void *ret; > > /* > * It doesn't really make sense to fallback to vmalloc for sub page > - * requests > + * requests and small alignments > */ > - ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), > - kmalloc_gfp_adjust(flags, size), > - node, _RET_IP_); > - if (ret || size <= PAGE_SIZE) > - return ret; > + if (size >= align) { I think it's similar to the check in __do_krealloc() in v12 and not particularly useful. We don't need to support align > size for rust anyway? Does vmalloc even honor that? It would also be inconsistent that kvmalloc() would give you this possibility of size<align, but starting with a small size and size>=align and then kvrealloc() to size<align wouldn't actually work. > + ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), > + kmalloc_gfp_adjust(flags, size), > + node, _RET_IP_); > + if (ret || size <= PAGE_SIZE) > + return ret; > + } > > /* non-sleeping allocations are not supported by vmalloc */ > if (!gfpflags_allow_blocking(flags)) > @@ -5034,7 +5055,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) > * about the resulting pointer, and cannot play > * protection games. > */ > - return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, > + return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END, > flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, > node, __builtin_return_address(0)); > } > @@ -5078,10 +5099,12 @@ void kvfree_sensitive(const void *addr, size_t len) > EXPORT_SYMBOL(kvfree_sensitive); > > /** > - * kvrealloc - reallocate memory; contents remain unchanged > + * kvrealloc_node_align - reallocate memory; contents remain unchanged > * @p: object to reallocate memory for > * @size: the size to reallocate > + * @align: desired alignment Ditto. > * @flags: the flags for the page level allocator > + * @nid: NUMA node id > * > * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0 > * and @p is not a %NULL pointer, the object pointed to is freed. > @@ -5099,17 +5122,18 @@ EXPORT_SYMBOL(kvfree_sensitive); > * > * Return: pointer to the allocated memory or %NULL in case of error > */ > -void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) > +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, > + gfp_t flags, int nid) > { > void *n; > > if (is_vmalloc_addr(p)) > - return vrealloc_noprof(p, size, flags); > + return vrealloc_node_align_noprof(p, size, align, flags, nid); > > - n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size)); > + n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid); > if (!n) { > /* We failed to krealloc(), fall back to kvmalloc(). */ > - n = kvmalloc_noprof(size, flags); > + n = kvmalloc_node_align_noprof(size, align, flags, nid); > if (!n) > return NULL; > > @@ -5125,7 +5149,7 @@ void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) > > return n; > } > -EXPORT_SYMBOL(kvrealloc_noprof); > +EXPORT_SYMBOL(kvrealloc_node_align_noprof); > > struct detached_freelist { > struct slab *slab;
Hi Kent, On Tue Jul 15, 2025 at 3:58 PM CEST, Vitaly Wool wrote: > diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c > index e86d36d23e9e..928e83a1ce42 100644 > --- a/fs/bcachefs/darray.c > +++ b/fs/bcachefs/darray.c > @@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ > return -ENOMEM; > > void *data = likely(bytes < INT_MAX) > - ? kvmalloc_noprof(bytes, gfp) > + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) > : vmalloc_noprof(bytes); > if (!data) > return -ENOMEM; > diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h > index 0a4b1d433621..2d6d4b547db8 100644 > --- a/fs/bcachefs/util.h > +++ b/fs/bcachefs/util.h > @@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags) > { > void *p = unlikely(n >= INT_MAX) > ? vmalloc_noprof(n) > - : kvmalloc_noprof(n, flags & ~__GFP_ZERO); > + : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE); > if (p && (flags & __GFP_ZERO)) > memset(p, 0, n); > return p; I assume this is because kvmalloc(), and hence kvrealloc(), does this: /* Don't even allow crazy sizes */ if (unlikely(size > INT_MAX)) { WARN_ON_ONCE(!(flags & __GFP_NOWARN)); return NULL; } Do we still consider this a "crazy size"? :)
On 7/15/25 16:33, Danilo Krummrich wrote: > On Tue Jul 15, 2025 at 3:58 PM CEST, Vitaly Wool wrote: >> diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c >> index e86d36d23e9e..928e83a1ce42 100644 >> --- a/fs/bcachefs/darray.c >> +++ b/fs/bcachefs/darray.c >> @@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ >> return -ENOMEM; >> >> void *data = likely(bytes < INT_MAX) >> - ? kvmalloc_noprof(bytes, gfp) >> + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) >> : vmalloc_noprof(bytes); >> if (!data) >> return -ENOMEM; >> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h >> index 0a4b1d433621..2d6d4b547db8 100644 >> --- a/fs/bcachefs/util.h >> +++ b/fs/bcachefs/util.h >> @@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags) >> { >> void *p = unlikely(n >= INT_MAX) >> ? vmalloc_noprof(n) >> - : kvmalloc_noprof(n, flags & ~__GFP_ZERO); >> + : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE); >> if (p && (flags & __GFP_ZERO)) >> memset(p, 0, n); >> return p; > > I assume this is because kvmalloc(), and hence kvrealloc(), does this: > > /* Don't even allow crazy sizes */ > if (unlikely(size > INT_MAX)) { > WARN_ON_ONCE(!(flags & __GFP_NOWARN)); > return NULL; > } > > Do we still consider this a "crazy size"? :) Yeah, with "we" including Linus: https://lore.kernel.org/all/CAHk-=wi=PrbZnwnvhKEF6UUQNCZdNsUbr+hk-jOWGr-q4Mmz=Q@mail.gmail.com/
On Tue Jul 15, 2025 at 5:34 PM CEST, Vlastimil Babka wrote: > On 7/15/25 16:33, Danilo Krummrich wrote: >> On Tue Jul 15, 2025 at 3:58 PM CEST, Vitaly Wool wrote: >>> diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c >>> index e86d36d23e9e..928e83a1ce42 100644 >>> --- a/fs/bcachefs/darray.c >>> +++ b/fs/bcachefs/darray.c >>> @@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ >>> return -ENOMEM; >>> >>> void *data = likely(bytes < INT_MAX) >>> - ? kvmalloc_noprof(bytes, gfp) >>> + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) >>> : vmalloc_noprof(bytes); >>> if (!data) >>> return -ENOMEM; >>> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h >>> index 0a4b1d433621..2d6d4b547db8 100644 >>> --- a/fs/bcachefs/util.h >>> +++ b/fs/bcachefs/util.h >>> @@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags) >>> { >>> void *p = unlikely(n >= INT_MAX) >>> ? vmalloc_noprof(n) >>> - : kvmalloc_noprof(n, flags & ~__GFP_ZERO); >>> + : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE); >>> if (p && (flags & __GFP_ZERO)) >>> memset(p, 0, n); >>> return p; >> >> I assume this is because kvmalloc(), and hence kvrealloc(), does this: >> >> /* Don't even allow crazy sizes */ >> if (unlikely(size > INT_MAX)) { >> WARN_ON_ONCE(!(flags & __GFP_NOWARN)); >> return NULL; >> } >> >> Do we still consider this a "crazy size"? :) > > Yeah, with "we" including Linus: > https://lore.kernel.org/all/CAHk-=wi=PrbZnwnvhKEF6UUQNCZdNsUbr+hk-jOWGr-q4Mmz=Q@mail.gmail.com/ I don't know why bcachefs needs this, hence my question. But I agree that this clearly raises an eyebrow. :)
On Tue Jul 15, 2025 at 5:48 PM CEST, Danilo Krummrich wrote: > On Tue Jul 15, 2025 at 5:34 PM CEST, Vlastimil Babka wrote: >> On 7/15/25 16:33, Danilo Krummrich wrote: >>> On Tue Jul 15, 2025 at 3:58 PM CEST, Vitaly Wool wrote: >>>> diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c >>>> index e86d36d23e9e..928e83a1ce42 100644 >>>> --- a/fs/bcachefs/darray.c >>>> +++ b/fs/bcachefs/darray.c >>>> @@ -21,7 +21,7 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ >>>> return -ENOMEM; >>>> >>>> void *data = likely(bytes < INT_MAX) >>>> - ? kvmalloc_noprof(bytes, gfp) >>>> + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) >>>> : vmalloc_noprof(bytes); >>>> if (!data) >>>> return -ENOMEM; >>>> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h >>>> index 0a4b1d433621..2d6d4b547db8 100644 >>>> --- a/fs/bcachefs/util.h >>>> +++ b/fs/bcachefs/util.h >>>> @@ -61,7 +61,7 @@ static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags) >>>> { >>>> void *p = unlikely(n >= INT_MAX) >>>> ? vmalloc_noprof(n) >>>> - : kvmalloc_noprof(n, flags & ~__GFP_ZERO); >>>> + : kvmalloc_node_align_noprof(n, 1, flags & ~__GFP_ZERO, NUMA_NO_NODE); >>>> if (p && (flags & __GFP_ZERO)) >>>> memset(p, 0, n); >>>> return p; >>> >>> I assume this is because kvmalloc(), and hence kvrealloc(), does this: >>> >>> /* Don't even allow crazy sizes */ >>> if (unlikely(size > INT_MAX)) { >>> WARN_ON_ONCE(!(flags & __GFP_NOWARN)); >>> return NULL; >>> } >>> >>> Do we still consider this a "crazy size"? :) >> >> Yeah, with "we" including Linus: >> https://lore.kernel.org/all/CAHk-=wi=PrbZnwnvhKEF6UUQNCZdNsUbr+hk-jOWGr-q4Mmz=Q@mail.gmail.com/ > > I don't know why bcachefs needs this, hence my question. But I agree that this > clearly raises an eyebrow. :) I.e. this is me noting that we're considering anything larger to be unreasonable while having an in-tree user bypassing this check intentionally.
© 2016 - 2025 Red Hat, Inc.