kernel/trace/ring_buffer.c | 86 +++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 33 deletions(-)
From: Steven Rostedt <rostedt@goodmis.org>
The allocation of the per CPU buffer descriptor, the buffer page
descriptors and the buffer page data itself can be pretty ugly:
kzalloc_node(ALIGN(sizeof(struct buffer_page), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
And the data pages:
page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO, order);
if (!page)
return NULL;
bpage->page = page_address(page);
rb_init_page(bpage->page);
Add helper functions to make the code easier to read.
This does make all allocations of the data page (bpage->page) allocated
with the __GFP_RETRY_MAYFAIL flag (and not just the bulk allocator). Which
is actually better, as allocating the data page for the ring buffer tracing
should try hard but not trigger the OOM killer.
Link: https://lore.kernel.org/all/CAHk-=wjMMSAaqTjBSfYenfuzE1bMjLj+2DLtLWJuGt07UGCH_Q@mail.gmail.com/
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 86 +++++++++++++++++++++++---------------
1 file changed, 53 insertions(+), 33 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1244d2c5c384..36330986b604 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -401,6 +401,41 @@ static void free_buffer_page(struct buffer_page *bpage)
kfree(bpage);
}
+/*
+ * For best performance, allocate cpu buffer data cache line sized
+ * and per CPU.
+ */
+#define alloc_cpu_buffer(cpu) (struct ring_buffer_per_cpu *) \
+ kzalloc_node(ALIGN(sizeof(struct ring_buffer_per_cpu), \
+ cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
+
+#define alloc_cpu_page(cpu) (struct buffer_page *) \
+ kzalloc_node(ALIGN(sizeof(struct buffer_page), \
+ cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
+
+static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
+{
+ struct buffer_data_page *dpage;
+ struct page *page;
+ gfp_t mflags;
+
+ /*
+ * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+ * gracefully without invoking oom-killer and the system is not
+ * destabilized.
+ */
+ mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO;
+
+ page = alloc_pages_node(cpu_to_node(cpu), mflags, order);
+ if (!page)
+ return NULL;
+
+ dpage = page_address(page);
+ rb_init_page(dpage);
+
+ return dpage;
+}
+
/*
* We need to fit the time_stamp delta into 27 bits.
*/
@@ -2243,8 +2278,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
for (i = 0; i < nr_pages; i++) {
struct page *page;
- bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- mflags, cpu_to_node(cpu_buffer->cpu));
+ bpage = alloc_cpu_page(cpu_buffer->cpu);
if (!bpage)
goto free_pages;
@@ -2267,13 +2301,10 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
bpage->range = 1;
bpage->id = i + 1;
} else {
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
- mflags | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page)
+ int order = cpu_buffer->buffer->subbuf_order;
+ bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
+ if (!bpage->page)
goto free_pages;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
}
bpage->order = cpu_buffer->buffer->subbuf_order;
@@ -2324,14 +2355,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
{
- struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL;
+ struct ring_buffer_per_cpu *cpu_buffer __free(kfree) =
+ alloc_cpu_buffer(cpu);
struct ring_buffer_cpu_meta *meta;
struct buffer_page *bpage;
struct page *page;
int ret;
- cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu));
if (!cpu_buffer)
return NULL;
@@ -2347,8 +2377,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
mutex_init(&cpu_buffer->mapping_lock);
- bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- GFP_KERNEL, cpu_to_node(cpu));
+ bpage = alloc_cpu_page(cpu);
if (!bpage)
return NULL;
@@ -2370,13 +2399,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
rb_meta_buffer_update(cpu_buffer, bpage);
bpage->range = 1;
} else {
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page)
+ int order = cpu_buffer->buffer->subbuf_order;
+ bpage->page = alloc_cpu_data(cpu, order);
+ if (!bpage->page)
goto fail_free_reader;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
}
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -6486,22 +6512,16 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
- if (bpage->data)
- goto out;
-
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_NORETRY | __GFP_COMP | __GFP_ZERO,
- cpu_buffer->buffer->subbuf_order);
- if (!page) {
- kfree(bpage);
- return ERR_PTR(-ENOMEM);
+ if (bpage->data) {
+ rb_init_page(bpage->data);
+ } else {
+ bpage->data = alloc_cpu_data(cpu, cpu_buffer->buffer->subbuf_order);
+ if (!bpage->data) {
+ kfree(bpage);
+ return ERR_PTR(-ENOMEM);
+ }
}
- bpage->data = page_address(page);
-
- out:
- rb_init_page(bpage->data);
-
return bpage;
}
EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
--
2.51.0
Hi Steven,
kernel test robot noticed the following build warnings:
[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.18-rc7 next-20251125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/ring-buffer-Add-helper-functions-for-allocations/20251125-031044
base: https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link: https://lore.kernel.org/r/20251124140906.71a2abf6%40gandalf.local.home
patch subject: [PATCH] ring-buffer: Add helper functions for allocations
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20251125/202511252040.Jny2Yxxn-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251125/202511252040.Jny2Yxxn-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511252040.Jny2Yxxn-lkp@intel.com/
All warnings (new ones prefixed by >>):
kernel/trace/ring_buffer.c: In function '__rb_allocate_pages':
>> kernel/trace/ring_buffer.c:2279:30: warning: unused variable 'page' [-Wunused-variable]
2279 | struct page *page;
| ^~~~
>> kernel/trace/ring_buffer.c:2242:15: warning: variable 'mflags' set but not used [-Wunused-but-set-variable]
2242 | gfp_t mflags;
| ^~~~~~
kernel/trace/ring_buffer.c: In function 'rb_allocate_cpu_buffer':
kernel/trace/ring_buffer.c:2362:22: warning: unused variable 'page' [-Wunused-variable]
2362 | struct page *page;
| ^~~~
kernel/trace/ring_buffer.c: In function 'ring_buffer_alloc_read_page':
kernel/trace/ring_buffer.c:6493:22: warning: unused variable 'page' [-Wunused-variable]
6493 | struct page *page;
| ^~~~
vim +/page +2279 kernel/trace/ring_buffer.c
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2234)
74e2afc6df5782 Qiujun Huang 2020-10-15 2235 static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
74e2afc6df5782 Qiujun Huang 2020-10-15 2236 long nr_pages, struct list_head *pages)
7a8e76a3829f10 Steven Rostedt 2008-09-29 2237 {
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2238) struct trace_buffer *buffer = cpu_buffer->buffer;
4009cc31e7813e Steven Rostedt 2025-03-05 2239 struct ring_buffer_cpu_meta *meta = NULL;
044fa782ebb947 Steven Rostedt 2008-12-02 2240 struct buffer_page *bpage, *tmp;
927e56db625322 Steven Rostedt (VMware 2018-04-04 2241) bool user_thread = current->mm != NULL;
927e56db625322 Steven Rostedt (VMware 2018-04-04 @2242) gfp_t mflags;
9b94a8fba501f3 Steven Rostedt (Red Hat 2016-05-12 2243) long i;
3adc54fa82a68b Steven Rostedt 2009-03-30 2244
927e56db625322 Steven Rostedt (VMware 2018-04-04 2245) /*
927e56db625322 Steven Rostedt (VMware 2018-04-04 2246) * Check if the available memory is there first.
927e56db625322 Steven Rostedt (VMware 2018-04-04 2247) * Note, si_mem_available() only gives us a rough estimate of available
927e56db625322 Steven Rostedt (VMware 2018-04-04 2248) * memory. It may not be accurate. But we don't care, we just want
927e56db625322 Steven Rostedt (VMware 2018-04-04 2249) * to prevent doing any allocation when it is obvious that it is
927e56db625322 Steven Rostedt (VMware 2018-04-04 2250) * not going to succeed.
927e56db625322 Steven Rostedt (VMware 2018-04-04 2251) */
2a872fa4e9c8ad Steven Rostedt (VMware 2018-04-02 2252) i = si_mem_available();
2a872fa4e9c8ad Steven Rostedt (VMware 2018-04-02 2253) if (i < nr_pages)
2a872fa4e9c8ad Steven Rostedt (VMware 2018-04-02 2254) return -ENOMEM;
2a872fa4e9c8ad Steven Rostedt (VMware 2018-04-02 2255)
d7ec4bfed6c974 Vaibhav Nagarnaik 2011-06-07 2256 /*
848618857d2535 Joel Fernandes 2017-07-12 2257 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
848618857d2535 Joel Fernandes 2017-07-12 2258 * gracefully without invoking oom-killer and the system is not
848618857d2535 Joel Fernandes 2017-07-12 2259 * destabilized.
d7ec4bfed6c974 Vaibhav Nagarnaik 2011-06-07 2260 */
927e56db625322 Steven Rostedt (VMware 2018-04-04 2261) mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
927e56db625322 Steven Rostedt (VMware 2018-04-04 2262)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2263) /*
927e56db625322 Steven Rostedt (VMware 2018-04-04 2264) * If a user thread allocates too much, and si_mem_available()
927e56db625322 Steven Rostedt (VMware 2018-04-04 2265) * reports there's enough memory, even though there is not.
927e56db625322 Steven Rostedt (VMware 2018-04-04 2266) * Make sure the OOM killer kills this thread. This can happen
927e56db625322 Steven Rostedt (VMware 2018-04-04 2267) * even with RETRY_MAYFAIL because another task may be doing
927e56db625322 Steven Rostedt (VMware 2018-04-04 2268) * an allocation after this task has taken all memory.
927e56db625322 Steven Rostedt (VMware 2018-04-04 2269) * This is the task the OOM killer needs to take out during this
927e56db625322 Steven Rostedt (VMware 2018-04-04 2270) * loop, even if it was triggered by an allocation somewhere else.
927e56db625322 Steven Rostedt (VMware 2018-04-04 2271) */
927e56db625322 Steven Rostedt (VMware 2018-04-04 2272) if (user_thread)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2273) set_current_oom_origin();
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2274)
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2275) if (buffer->range_addr_start)
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2276) meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2277)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2278) for (i = 0; i < nr_pages; i++) {
927e56db625322 Steven Rostedt (VMware 2018-04-04 @2279) struct page *page;
927e56db625322 Steven Rostedt (VMware 2018-04-04 2280)
1e3d56c5556b8d Steven Rostedt 2025-11-24 2281 bpage = alloc_cpu_page(cpu_buffer->cpu);
044fa782ebb947 Steven Rostedt 2008-12-02 2282 if (!bpage)
e4c2ce82ca2710 Steven Rostedt 2008-10-01 2283 goto free_pages;
77ae365eca8950 Steven Rostedt 2009-03-27 2284
74e2afc6df5782 Qiujun Huang 2020-10-15 2285 rb_check_bpage(cpu_buffer, bpage);
74e2afc6df5782 Qiujun Huang 2020-10-15 2286
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2287) /*
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2288) * Append the pages as for mapped buffers we want to keep
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2289) * the order
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2290) */
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2291) list_add_tail(&bpage->list, pages);
e4c2ce82ca2710 Steven Rostedt 2008-10-01 2292
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2293) if (meta) {
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2294) /* A range was given. Use that for the buffer page */
b14d032973d4e6 Steven Rostedt (Google 2024-06-12 2295) bpage->page = rb_range_buffer(cpu_buffer, i + 1);
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2296) if (!bpage->page)
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2297) goto free_pages;
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2298) /* If this is valid from a previous boot */
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2299) if (meta->head_buffer)
c76883f18e59b7 Steven Rostedt (Google 2024-06-12 2300) rb_meta_buffer_update(cpu_buffer, bpage);
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2301) bpage->range = 1;
b14d032973d4e6 Steven Rostedt (Google 2024-06-12 2302) bpage->id = i + 1;
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2303) } else {
1e3d56c5556b8d Steven Rostedt 2025-11-24 2304 int order = cpu_buffer->buffer->subbuf_order;
1e3d56c5556b8d Steven Rostedt 2025-11-24 2305 bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
1e3d56c5556b8d Steven Rostedt 2025-11-24 2306 if (!bpage->page)
7a8e76a3829f10 Steven Rostedt 2008-09-29 2307 goto free_pages;
be68d63a139bd4 Steven Rostedt (Google 2024-06-12 2308) }
f9b94daa542a8d Tzvetomir Stoyanov (VMware 2023-12-19 2309) bpage->order = cpu_buffer->buffer->subbuf_order;
927e56db625322 Steven Rostedt (VMware 2018-04-04 2310)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2311) if (user_thread && fatal_signal_pending(current))
927e56db625322 Steven Rostedt (VMware 2018-04-04 2312) goto free_pages;
7a8e76a3829f10 Steven Rostedt 2008-09-29 2313 }
927e56db625322 Steven Rostedt (VMware 2018-04-04 2314) if (user_thread)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2315) clear_current_oom_origin();
7a8e76a3829f10 Steven Rostedt 2008-09-29 2316
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2317 return 0;
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2318
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2319 free_pages:
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2320 list_for_each_entry_safe(bpage, tmp, pages, list) {
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2321 list_del_init(&bpage->list);
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2322 free_buffer_page(bpage);
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2323 }
927e56db625322 Steven Rostedt (VMware 2018-04-04 2324) if (user_thread)
927e56db625322 Steven Rostedt (VMware 2018-04-04 2325) clear_current_oom_origin();
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2326
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2327 return -ENOMEM;
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2328 }
438ced1720b584 Vaibhav Nagarnaik 2012-02-02 2329
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.