mm/percpu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-)
The 'allocation failed, ...' warning messages can cause unlimited log
spam, contrary to the implementation's intent.
The warn_limit variable is accessed without synchronization. If more
than <warn_limit> threads enter the warning path at the same time, the
variable will get decremented past 0. Once it becomes negative, the
non-zero check will always return true leading to unlimited log spam.
Use atomic operations to access warn_limit and change the check to test
for positive (> 0) as it can still become negative.
While the change cited in Fixes is only adjacent, the warning limit
implementation was correct before it. Only non-atomic allocations were
considered for warnings, and those happened to hold pcpu_alloc_mutex
while accessing warn_limit.
Fixes: f7d77dfc91f7 ("mm/percpu.c: print error message too if atomic alloc failed")
Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
---
mm/percpu.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/mm/percpu.c b/mm/percpu.c
index a56f35dcc417..c1a4089eb4c3 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1734,7 +1734,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
bool is_atomic;
bool do_warn;
struct obj_cgroup *objcg = NULL;
- static int warn_limit = 10;
+ static atomic_t warn_limit = ATOMIC_INIT(10);
struct pcpu_chunk *chunk, *next;
const char *err;
int slot, off, cpu, ret;
@@ -1904,13 +1904,17 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
- if (do_warn && warn_limit) {
- pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
- size, align, is_atomic, err);
- if (!is_atomic)
- dump_stack();
- if (!--warn_limit)
- pr_info("limit reached, disable warning\n");
+ if (do_warn && atomic_read(&warn_limit) > 0) {
+ int remaining = atomic_dec_return(&warn_limit);
+
+ if (remaining >= 0) {
+ pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
+ size, align, is_atomic, err);
+ if (!is_atomic)
+ dump_stack();
+ if (remaining == 0)
+ pr_info("limit reached, disable warning\n");
+ }
}
if (is_atomic) {
--
2.49.0
On Fri, 22 Aug 2025, Vlad Dumitrescu wrote: > + if (do_warn && atomic_read(&warn_limit) > 0) { > + int remaining = atomic_dec_return(&warn_limit); The code creates a race condition since another atomic_dec_return() can happen on another cpu between these two lines. warn_limit can go negative. Use a single atomic operation instead?
On 9/2/25 10:39, Christoph Lameter (Ampere) wrote: > On Fri, 22 Aug 2025, Vlad Dumitrescu wrote: > >> + if (do_warn && atomic_read(&warn_limit) > 0) { >> + int remaining = atomic_dec_return(&warn_limit); > > > The code creates a race condition since another atomic_dec_return() can > happen on another cpu between these two lines. warn_limit can go negative. Yes, which is why I mentioned it in the description. But compared to before, it should be benign. > Use a single atomic operation instead? Did you have something like this in mind? - if (do_warn && atomic_read(&warn_limit) > 0) { - int remaining = atomic_dec_return(&warn_limit); + if (do_warn) { + int remaining = atomic_dec_if_positive(&warn_limit); Should end up with the same visible result, but w/o going negative. Would you like me to send v2?Vlad
On Tue, 2 Sep 2025, Vlad Dumitrescu wrote: > On 9/2/25 10:39, Christoph Lameter (Ampere) wrote: > > On Fri, 22 Aug 2025, Vlad Dumitrescu wrote: > > > >> + if (do_warn && atomic_read(&warn_limit) > 0) { > >> + int remaining = atomic_dec_return(&warn_limit); > > > > > > The code creates a race condition since another atomic_dec_return() can > > happen on another cpu between these two lines. warn_limit can go negative. > > Yes, which is why I mentioned it in the description. But compared to before, > it should be benign. > > > Use a single atomic operation instead? > > Did you have something like this in mind? > > - if (do_warn && atomic_read(&warn_limit) > 0) { > - int remaining = atomic_dec_return(&warn_limit); > + if (do_warn) { > + int remaining = atomic_dec_if_positive(&warn_limit); Something like it... Maybe if (do_warn && (atomic_dec_if_positive(&warn_limit)) ) { pr_warn ... }
On 08/22/25 at 03:55pm, Vlad Dumitrescu wrote: > The 'allocation failed, ...' warning messages can cause unlimited log > spam, contrary to the implementation's intent. > > The warn_limit variable is accessed without synchronization. If more > than <warn_limit> threads enter the warning path at the same time, the > variable will get decremented past 0. Once it becomes negative, the > non-zero check will always return true leading to unlimited log spam. > > Use atomic operations to access warn_limit and change the check to test > for positive (> 0) as it can still become negative. > > While the change cited in Fixes is only adjacent, the warning limit > implementation was correct before it. Only non-atomic allocations were > considered for warnings, and those happened to hold pcpu_alloc_mutex > while accessing warn_limit. > > Fixes: f7d77dfc91f7 ("mm/percpu.c: print error message too if atomic alloc failed") > Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com> > --- > mm/percpu.c | 20 ++++++++++++-------- > 1 file changed, 12 insertions(+), 8 deletions(-) > > diff --git a/mm/percpu.c b/mm/percpu.c > index a56f35dcc417..c1a4089eb4c3 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -1734,7 +1734,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, > bool is_atomic; > bool do_warn; > struct obj_cgroup *objcg = NULL; > - static int warn_limit = 10; > + static atomic_t warn_limit = ATOMIC_INIT(10); > struct pcpu_chunk *chunk, *next; > const char *err; > int slot, off, cpu, ret; > @@ -1904,13 +1904,17 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, > fail: > trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); > > - if (do_warn && warn_limit) { > - pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", > - size, align, is_atomic, err); > - if (!is_atomic) > - dump_stack(); > - if (!--warn_limit) > - pr_info("limit reached, disable warning\n"); > + if (do_warn && atomic_read(&warn_limit) > 0) { > + int remaining = atomic_dec_return(&warn_limit); > + > + if (remaining >= 0) { > + pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", > + size, align, is_atomic, err); > + if (!is_atomic) > + dump_stack(); > + if (remaining == 0) > + pr_info("limit reached, disable warning\n"); > + } A good catch, and the new code logic makes code more robust, thanks for the fix. Reviewed-by: Baoquan He <bhe@redhat.com> > } > > if (is_atomic) { > -- > 2.49.0 >
© 2016 - 2025 Red Hat, Inc.