Previously, the mapped ring-buffer layout caused misalignment between
the meta-page and sub-buffers when the sub-buffer size was not a
multiple of PAGE_SIZE. This prevented hardware with larger TLB entries
from utilizing them effectively.
Add a padding with the zero-page between the meta-page and sub-buffers.
Also update the ring-buffer map_test to verify that padding.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
--
This is based on the mm-unstable branch [1] as it depends on David's work [2]
for allowing the zero-page in vm_insert_page().
[1] https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git
[2] https://lore.kernel.org/all/20240522125713.775114-1-david@redhat.com
v1 -> v2:
* Fix unsequenced modification and access to 'p' (s390 build)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7345a8b625fb..c1116e76fe17 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6148,10 +6148,10 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
/* install subbuf ID to kern VA translation */
cpu_buffer->subbuf_ids = subbuf_ids;
- meta->meta_page_size = PAGE_SIZE;
meta->meta_struct_len = sizeof(*meta);
meta->nr_subbufs = nr_subbufs;
meta->subbuf_size = cpu_buffer->buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
+ meta->meta_page_size = meta->subbuf_size;
rb_update_meta_page(cpu_buffer);
}
@@ -6238,6 +6238,12 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
!(vma->vm_flags & VM_MAYSHARE))
return -EPERM;
+ subbuf_order = cpu_buffer->buffer->subbuf_order;
+ subbuf_pages = 1 << subbuf_order;
+
+ if (subbuf_order && pgoff % subbuf_pages)
+ return -EINVAL;
+
/*
* Make sure the mapping cannot become writable later. Also tell the VM
* to not touch these pages (VM_DONTCOPY | VM_DONTEXPAND).
@@ -6247,11 +6253,8 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
- subbuf_order = cpu_buffer->buffer->subbuf_order;
- subbuf_pages = 1 << subbuf_order;
-
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs) << subbuf_order) - pgoff + 1; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
if (!vma_pages || vma_pages > nr_pages)
@@ -6264,20 +6267,24 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
return -ENOMEM;
if (!pgoff) {
+ unsigned long meta_page_padding;
+
pages[p++] = virt_to_page(cpu_buffer->meta_page);
/*
- * TODO: Align sub-buffers on their size, once
- * vm_insert_pages() supports the zero-page.
+ * Pad with the zero-page to align the meta-page with the
+ * sub-buffers.
*/
- } else {
- /* Skip the meta-page */
- pgoff--;
+ meta_page_padding = subbuf_pages - 1;
+ while (meta_page_padding-- && p < nr_pages) {
+ unsigned long __maybe_unused zero_addr =
+ vma->vm_start + (PAGE_SIZE * p);
- if (pgoff % subbuf_pages) {
- err = -EINVAL;
- goto out;
+ pages[p++] = ZERO_PAGE(zero_addr);
}
+ } else {
+ /* Skip the meta-page */
+ pgoff -= subbuf_pages;
s += pgoff / subbuf_pages;
}
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index a9006fa7097e..4bb0192e43f3 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -228,6 +228,20 @@ TEST_F(map, data_mmap)
data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
desc->cpu_fd, meta_len);
ASSERT_EQ(data, MAP_FAILED);
+
+ /* Verify meta-page padding */
+ if (desc->meta->meta_page_size > getpagesize()) {
+ void *addr;
+
+ data_len = desc->meta->meta_page_size;
+ data = mmap(NULL, data_len,
+ PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
+ ASSERT_NE(data, MAP_FAILED);
+
+ addr = (void *)((unsigned long)data + getpagesize());
+ ASSERT_EQ(*((int *)addr), 0);
+ munmap(data, data_len);
+ }
}
FIXTURE(snapshot) {
base-commit: c65920c76a977c2b73c3a8b03b4c0c00cc1285ed
--
2.45.2.803.g4e1b14247a-goog
On Fri, 28 Jun 2024 11:46:11 +0100
Vincent Donnefort <vdonnefort@google.com> wrote:
> diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
> index a9006fa7097e..4bb0192e43f3 100644
> --- a/tools/testing/selftests/ring-buffer/map_test.c
> +++ b/tools/testing/selftests/ring-buffer/map_test.c
> @@ -228,6 +228,20 @@ TEST_F(map, data_mmap)
> data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
> desc->cpu_fd, meta_len);
> ASSERT_EQ(data, MAP_FAILED);
> +
> + /* Verify meta-page padding */
> + if (desc->meta->meta_page_size > getpagesize()) {
> + void *addr;
> +
> + data_len = desc->meta->meta_page_size;
> + data = mmap(NULL, data_len,
> + PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
> + ASSERT_NE(data, MAP_FAILED);
> +
> + addr = (void *)((unsigned long)data + getpagesize());
> + ASSERT_EQ(*((int *)addr), 0);
Should we make this a test that the entire page is zero?
for (int i = desc->meta->meta_struct_len; i < desc->meta->meta_page_size; i += sizeof(int))
ASSERT_EQ(((int *)data)[i], 0);
?
> + munmap(data, data_len);
> + }
> }
Also, looking at the init, if for some reason (I highly doubt it may
happen) that the meta_struct_len becomes bigger than page_size, we should
update the init section to:
/* Handle the case where meta_struct_len is greater than page size */
if (page_size < desc->meta->meta_struct_len) {
/* meta_page_size is >= meta_struct_len */
page_size = desc->meta->meta_page_size;
munmap(desc->meta, page_size);
map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
if (map == MAP_FAILED)
return -errno;
desc->meta = (struct trace_buffer_meta *)map;
}
-- Steve
Improve the ring-buffer meta-page test coverage by checking for the
entire padding region to be 0 instead of just looking at the first 4
bytes.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
--
Hi,
I saw you have sent "Align meta-page to sub-buffers for improved TLB usage" to
linux-next, so here's a follow-up patch addressing your comments, not sure if
you want to squash it or to put it on top.
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index 4bb0192e43f3..ba12fd31de87 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -231,15 +231,15 @@ TEST_F(map, data_mmap)
/* Verify meta-page padding */
if (desc->meta->meta_page_size > getpagesize()) {
- void *addr;
-
data_len = desc->meta->meta_page_size;
data = mmap(NULL, data_len,
PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
ASSERT_NE(data, MAP_FAILED);
- addr = (void *)((unsigned long)data + getpagesize());
- ASSERT_EQ(*((int *)addr), 0);
+ for (int i = desc->meta->meta_struct_len;
+ i < desc->meta->meta_page_size; i += sizeof(int))
+ ASSERT_EQ(*(int *)(data + i), 0);
+
munmap(data, data_len);
}
}
base-commit: 2a07e30c19f391af26517c409fd66e401c6f4ee7
prerequisite-patch-id: 16b79d676c5faf3b57443b576976c7522fcd5a4b
--
2.46.0.295.g3b9ea8a38a-goog
Handle the case where the meta-page content is bigger than the system
page-size. This prepares the ground for extending features covered by
the meta-page.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index ba12fd31de87..d10a847130fb 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -92,12 +92,22 @@ int tracefs_cpu_map(struct tracefs_cpu_map_desc *desc, int cpu)
if (desc->cpu_fd < 0)
return -ENODEV;
+again:
map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
if (map == MAP_FAILED)
return -errno;
desc->meta = (struct trace_buffer_meta *)map;
+ /* the meta-page is bigger than the original mapping */
+ if (page_size < desc->meta->meta_struct_len) {
+ int meta_page_size = desc->meta->meta_page_size;
+
+ munmap(desc->meta, page_size);
+ page_size = meta_page_size;
+ goto again;
+ }
+
return 0;
}
--
2.46.0.295.g3b9ea8a38a-goog
On Fri, 28 Jun 2024 11:46:11 +0100 Vincent Donnefort <vdonnefort@google.com> wrote: > This is based on the mm-unstable branch [1] as it depends on David's work [2] > for allowing the zero-page in vm_insert_page(). > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git > [2] https://lore.kernel.org/all/20240522125713.775114-1-david@redhat.com I'll hold off to 6.12 merge window before pushing this. So that the above will be guaranteed to be there. -- Steve
© 2016 - 2025 Red Hat, Inc.