From nobody Wed May  7 21:15:26 2025
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 99205207662;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743539550; cv=none;
 b=Leim1qXfHPIn9HqnOBobgs/BihTFbXKfyD6he59gX6TcaOrO1w3thePGtDF+fcUaqSZarE+bNRUfDavdNcOhkZLGnOsezitbq/75b2t/tBiN+X/yFcsPCHqNurSzvL/MgT3LfDwp39JYBLbiY8xti8ZwOJkMlILM+j2TuiESzxE=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743539550; c=relaxed/simple;
	bh=WuikE8wOn9kp2dLyXBHxz62U7JuITFmA6597hRZHqUI=;
	h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version:
	 Content-Type;
 b=Kd8/dVHIEEk4szp7np2kmkUGsWFsP7T0KYozCzDU7cs2mOki/F+xgbE7Mo8VJHFOLc5nyTXMB5WUyPqZm3VpF+XsI7FzSDyPzHf+vT+hT3ZVeyk5ZQ1S4R4P2CgluNxsNzlUjdaX6mSETnbcztOvCJj3B+S18meWIp42DT+RhWY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0FAEAC4CEE4;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Received: from rostedt by gandalf with local (Exim 4.98)
	(envelope-from <rostedt@goodmis.org>)
	id 1tziId-00000006IdJ-2xXy;
	Tue, 01 Apr 2025 16:33:31 -0400
Message-ID: <20250401203331.559044804@goodmis.org>
User-Agent: quilt/0.68
Date: Tue, 01 Apr 2025 16:25:50 -0400
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
 linux-trace-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
 Masami Hiramatsu <mhiramat@kernel.org>,
 Mark Rutland <mark.rutland@arm.com>,
 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
 Andrew Morton <akpm@linux-foundation.org>,
 Vincent Donnefort <vdonnefort@google.com>,
 Vlastimil Babka <vbabka@suse.cz>,
 Mike Rapoport <rppt@kernel.org>,
 Jann Horn <jannh@google.com>
Subject: [PATCH v3 1/5] tracing: Enforce the persistent ring buffer to be page
 aligned
References: <20250401202549.409271454@goodmis.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Steven Rostedt <rostedt@goodmis.org>

Enforce that the address and the size of the memory used by the persistent
ring buffer is page aligned. Also update the documentation to reflect this
requirement.

Link: https://lore.kernel.org/all/CAHk-=3DwhUOfVucfJRt7E0AH+GV41ELmS4wJqxHD=
nui6Giddfkzw@mail.gmail.com/

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  2 ++
 Documentation/trace/debugging.rst               |  2 ++
 kernel/trace/trace.c                            | 12 ++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentatio=
n/admin-guide/kernel-parameters.txt
index fb8752b42ec8..71861643ef14 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7241,6 +7241,8 @@
 			This is just one of many ways that can clear memory. Make sure your sys=
tem
 			keeps the content of memory across reboots before relying on this optio=
n.
=20
+			NB: Both the mapped address and size must be page aligned for the archi=
tecture.
+
 			See also Documentation/trace/debugging.rst
=20
=20
diff --git a/Documentation/trace/debugging.rst b/Documentation/trace/debugg=
ing.rst
index 54fb16239d70..d54bc500af80 100644
--- a/Documentation/trace/debugging.rst
+++ b/Documentation/trace/debugging.rst
@@ -136,6 +136,8 @@ kernel, so only the same kernel is guaranteed to work i=
f the mapping is
 preserved. Switching to a different kernel version may find a different
 layout and mark the buffer as invalid.
=20
+NB: Both the mapped address and size must be page aligned for the architec=
ture.
+
 Using trace_printk() in the boot instance
 -----------------------------------------
 By default, the content of trace_printk() goes into the top level tracing
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 14c38fcd6f9e..1288e0aaadf8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10774,6 +10774,18 @@ __init static void enable_instances(void)
 		}
=20
 		if (start) {
+			/* Start and size must be page aligned */
+			if (start & ~PAGE_MASK) {
+				pr_warn("Tracing: mapping start addr %lx is not page aligned\n",
+					(unsigned long)start);
+				continue;
+			}
+			if (size & ~PAGE_MASK) {
+				pr_warn("Tracing: mapping size %lx is not page aligned\n",
+					(unsigned long)size);
+				continue;
+			}
+
 			addr =3D map_pages(start, size);
 			if (addr) {
 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of si=
ze 0x%lx\n",
--=20
2.47.2
From nobody Wed May  7 21:15:26 2025
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id AAE5720C002;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743539550; cv=none;
 b=RKxCS7KhNkPOjE5Rf/e8pXVbnnDPONXCvG8tGJMgGAUXykFT0WmnpTdXMYICmX4M4R8URRaPw4KF1uSHGwh1W6E9EI90YIGwm3m7A4J9hFOj/tnLRNQvWc2a4YfZBUl1FxHUIvkKC/+Fn+FZfLDsEz/C61jlLLe+PIpskNIytqU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743539550; c=relaxed/simple;
	bh=eWruYTEMFqsg9MgWbzhFWe9qMXUyoTYzvL3kERLjFFU=;
	h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version:
	 Content-Type;
 b=uaEkP320qQ+pbdC8OMJF99ipun1+6jocmMzePq4ZavxZHA2u/PcS3PivTv+N+Sj/NsCyluQpIFmDTCz1zNIJFF2OPBi/PWzAqpUMpsGWGu+DkRN1XQx7JRQkrIbNWfEBSFBVJYNzD/NaSSBDB4oyNCF5CQuLqyxwb1T4W8bahFk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 42F91C4CEED;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Received: from rostedt by gandalf with local (Exim 4.98)
	(envelope-from <rostedt@goodmis.org>)
	id 1tziId-00000006Ido-3guf;
	Tue, 01 Apr 2025 16:33:31 -0400
Message-ID: <20250401203331.728113626@goodmis.org>
User-Agent: quilt/0.68
Date: Tue, 01 Apr 2025 16:25:51 -0400
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
 linux-trace-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
 Masami Hiramatsu <mhiramat@kernel.org>,
 Mark Rutland <mark.rutland@arm.com>,
 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
 Andrew Morton <akpm@linux-foundation.org>,
 Vincent Donnefort <vdonnefort@google.com>,
 Vlastimil Babka <vbabka@suse.cz>,
 Mike Rapoport <rppt@kernel.org>,
 Jann Horn <jannh@google.com>
Subject: [PATCH v3 2/5] tracing: Have reserve_mem use phys_to_virt() and
 separate from memmap
 buffer
References: <20250401202549.409271454@goodmis.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Steven Rostedt <rostedt@goodmis.org>

The reserve_mem kernel command line option may pass back a physical
address, but the memory is still part of the normal memory just like
using memblock_reserve() would be. This means that the physical memory
returned by the reserve_mem command line option can be converted directly
to virtual memory by simply using phys_to_virt().

When freeing the buffer allocated by reserve_mem, use free_reserved_area().

Because the persistent ring buffer can also be allocated via the memmap
option, which *is* different than normal memory as it cannot be added back
to the buddy system, it must be treated differently. It still needs to be
virtually mapped to have access to it. It also can not be freed nor can it
ever be memory mapped to user space.

Create a new trace_array flag called TRACE_ARRAY_FL_MEMMAP which gets set
if the buffer is created by the memmap option, and this will prevent the
buffer from being memory mapped by user space.

Also increment the ref count for memmap'ed buffers so that they can never
be freed.

Link: https://lore.kernel.org/all/Z-wFszhJ_9o4dc8O@kernel.org/

Suggested-by: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 28 ++++++++++++++++++++++------
 kernel/trace/trace.h |  1 +
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1288e0aaadf8..e33f3b092e2e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8492,6 +8492,10 @@ static int tracing_buffers_mmap(struct file *filp, s=
truct vm_area_struct *vma)
 	struct trace_iterator *iter =3D &info->iter;
 	int ret =3D 0;
=20
+	/* A memmap'ed buffer is not supported for user space mmap */
+	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+		return -ENODEV;
+
 	/* Currently the boot mapped buffer is not supported for mmap */
 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
 		return -ENODEV;
@@ -9601,8 +9605,12 @@ static void free_trace_buffers(struct trace_array *t=
r)
 	free_trace_buffer(&tr->max_buffer);
 #endif
=20
-	if (tr->range_addr_start)
-		vunmap((void *)tr->range_addr_start);
+	if (tr->range_addr_start) {
+		void *start =3D (void *)tr->range_addr_start;
+		void *end =3D start + tr->range_addr_size;
+
+		free_reserved_area(start, end, 0, tr->range_name);
+	}
 }
=20
 static void init_trace_flags_index(struct trace_array *tr)
@@ -10696,6 +10704,7 @@ static inline void do_allocate_snapshot(const char =
*name) { }
 __init static void enable_instances(void)
 {
 	struct trace_array *tr;
+	bool memmap_area =3D false;
 	char *curr_str;
 	char *name;
 	char *str;
@@ -10764,6 +10773,7 @@ __init static void enable_instances(void)
 					name);
 				continue;
 			}
+			memmap_area =3D true;
 		} else if (tok) {
 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
 				start =3D 0;
@@ -10786,7 +10796,10 @@ __init static void enable_instances(void)
 				continue;
 			}
=20
-			addr =3D map_pages(start, size);
+			if (memmap_area)
+				addr =3D map_pages(start, size);
+			else
+				addr =3D (unsigned long)phys_to_virt(start);
 			if (addr) {
 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of si=
ze 0x%lx\n",
 					name, &start, (unsigned long)size);
@@ -10813,10 +10826,13 @@ __init static void enable_instances(void)
 			update_printk_trace(tr);
=20
 		/*
-		 * If start is set, then this is a mapped buffer, and
-		 * cannot be deleted by user space, so keep the reference
-		 * to it.
+		 * memmap'd buffers can not be freed.
 		 */
+		if (memmap_area) {
+			tr->flags |=3D TRACE_ARRAY_FL_MEMMAP;
+			tr->ref++;
+		}
+
 		if (start) {
 			tr->flags |=3D TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
 			tr->range_name =3D no_free_ptr(rname);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ab7c7a1930cc..9d9dcfad6269 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -446,6 +446,7 @@ enum {
 	TRACE_ARRAY_FL_BOOT		=3D BIT(1),
 	TRACE_ARRAY_FL_LAST_BOOT	=3D BIT(2),
 	TRACE_ARRAY_FL_MOD_INIT		=3D BIT(3),
+	TRACE_ARRAY_FL_MEMMAP		=3D BIT(4),
 };
=20
 #ifdef CONFIG_MODULES
--=20
2.47.2
From nobody Wed May  7 21:15:26 2025
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id AAEC120E32B;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743539550; cv=none;
 b=EOglPOhRwRiDDv5pwXehJl0Ylo5mWv4UkhhA+tKVtJARqhEvvvYKhTj1AfzWt00CI1HmThuUXq9DOM1jqcjD5xbXHTmjb4A1j4kGiR6yyJX3VpBWjzuy8h5z8D6NZgN7Hg6Ly8HEHo7E79aQJ5rNc7juAxR8fpdoHV5BNwl/AMk=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743539550; c=relaxed/simple;
	bh=ZFM9eZYL0PvWeKjx6LDsMsM3kKFi6vLW7UHPBAVvvts=;
	h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version:
	 Content-Type;
 b=a5h5y52L4b7xTkimNULRA/OGCtdvnVheWds3W0vzw9jlji2h5kn7TjK/9J08kWFeD2tRClir0F5tBv6PbKKKoHrH80vTu2VkavFYYMZqxDCTACr2lVLMxTDU9tH9ec7FDrTXOxuVFzlgB+z5iO7uhX2KJ7e6RRnZv3JCYhBLv1k=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 521E6C4CEF1;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Received: from rostedt by gandalf with local (Exim 4.98)
	(envelope-from <rostedt@goodmis.org>)
	id 1tziIe-00000006IeK-0CvO;
	Tue, 01 Apr 2025 16:33:32 -0400
Message-ID: <20250401203331.901028151@goodmis.org>
User-Agent: quilt/0.68
Date: Tue, 01 Apr 2025 16:25:52 -0400
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
 linux-trace-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
 Masami Hiramatsu <mhiramat@kernel.org>,
 Mark Rutland <mark.rutland@arm.com>,
 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
 Andrew Morton <akpm@linux-foundation.org>,
 Vincent Donnefort <vdonnefort@google.com>,
 Vlastimil Babka <vbabka@suse.cz>,
 Mike Rapoport <rppt@kernel.org>,
 Jann Horn <jannh@google.com>
Subject: [PATCH v3 3/5] tracing: Use vmap_page_range() to map memmap ring
 buffer
References: <20250401202549.409271454@goodmis.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Steven Rostedt <rostedt@goodmis.org>

The code to map the physical memory retrieved by memmap currently
allocates an array of pages to cover the physical memory and then calls
vmap() to map it to a virtual address. Instead of using this temporary
array of struct page descriptors, simply use vmap_page_range() that can
directly map the contiguous physical memory to a virtual address.

Link: https://lore.kernel.org/all/CAHk-=3DwhUOfVucfJRt7E0AH+GV41ELmS4wJqxHD=
nui6Giddfkzw@mail.gmail.com/

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e33f3b092e2e..1d7d2b772a74 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -50,6 +50,7 @@
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
 #include <linux/sort.h>
+#include <linux/io.h> /* vmap_page_range() */
=20
 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
=20
@@ -9803,29 +9804,27 @@ static int instance_mkdir(const char *name)
 	return ret;
 }
=20
-static u64 map_pages(u64 start, u64 size)
+static u64 map_pages(unsigned long start, unsigned long size)
 {
-	struct page **pages;
-	phys_addr_t page_start;
-	unsigned int page_count;
-	unsigned int i;
-	void *vaddr;
+        unsigned long vmap_start, vmap_end;
+	struct vm_struct *area;
+	int ret;
=20
-	page_count =3D DIV_ROUND_UP(size, PAGE_SIZE);
+        area =3D get_vm_area(size, VM_IOREMAP);
+        if (!area)
+                return 0;
=20
-	page_start =3D start;
-	pages =3D kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
-	if (!pages)
-		return 0;
+        vmap_start =3D (unsigned long) area->addr;
+        vmap_end =3D vmap_start + size;
=20
-	for (i =3D 0; i < page_count; i++) {
-		phys_addr_t addr =3D page_start + i * PAGE_SIZE;
-		pages[i] =3D pfn_to_page(addr >> PAGE_SHIFT);
-	}
-	vaddr =3D vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
-	kfree(pages);
+        ret =3D vmap_page_range(vmap_start, vmap_end,
+			      start, pgprot_nx(PAGE_KERNEL));
+        if (ret < 0) {
+                free_vm_area(area);
+                return 0;
+        }
=20
-	return (u64)(unsigned long)vaddr;
+	return (u64)vmap_start;
 }
=20
 /**
--=20
2.47.2
From nobody Wed May  7 21:15:26 2025
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DF7A02144BB;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743539550; cv=none;
 b=Pxadi8OiciUuDgltcyTpaCXaNXQl0dFZVqbR4xM0cuio8kJFhmly1h0L1O52QbUsZsmLUKatO07rsfjrFy4cQ7yGsr6x6xDQgHbpcW+jR/nfEaWpk68nYhJujqjUO92WVc+HlKOXYooM/OWXL/gzc55HREZQT+CwYnYqX9FcSZQ=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743539550; c=relaxed/simple;
	bh=W0oIFGkZsh/VYsAVVtrN6VmCzdve/U9Qslfp6t8425I=;
	h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version:
	 Content-Type;
 b=G/YhEyBKtwnWwvGAKv/ZnHIXoamDA6jr1Mc/pISyUkKyX1YpejNCs8roLD3D7nspl3+TzQl9+01RdAjqP1JA5FFexEezrIm32QB5dTGcCDlk0Pebq2ZPxFn8/ePZgmhXqAafKspN0o66MCGwaX4qeDo01RUay5OJEgSx6qTuymk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 97BFAC4CEF4;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Received: from rostedt by gandalf with local (Exim 4.98)
	(envelope-from <rostedt@goodmis.org>)
	id 1tziIe-00000006Ieo-0wNL;
	Tue, 01 Apr 2025 16:33:32 -0400
Message-ID: <20250401203332.072456470@goodmis.org>
User-Agent: quilt/0.68
Date: Tue, 01 Apr 2025 16:25:53 -0400
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
 linux-trace-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
 Masami Hiramatsu <mhiramat@kernel.org>,
 Mark Rutland <mark.rutland@arm.com>,
 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
 Andrew Morton <akpm@linux-foundation.org>,
 Vincent Donnefort <vdonnefort@google.com>,
 Vlastimil Babka <vbabka@suse.cz>,
 Mike Rapoport <rppt@kernel.org>,
 Jann Horn <jannh@google.com>
Subject: [PATCH v3 4/5] ring-buffer: Use flush_kernel_vmap_range() over
 flush_dcache_folio()
References: <20250401202549.409271454@goodmis.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Steven Rostedt <rostedt@goodmis.org>

Some architectures do not have data cache coherency between user and
kernel space. For these architectures, the cache needs to be flushed on
both the kernel and user addresses so that user space can see the updates
the kernel has made.

Instead of using flush_dcache_folio() and playing with virt_to_folio()
within the call to that function, use flush_kernel_vmap_range() which
takes the virtual address and does the work for those architectures that
need it.

Link: https://lore.kernel.org/all/CAG48ez3w0my4Rwttbc5tEbNsme6tc0mrSN95thjX=
UFaJ3aQ6SA@mail.gmail.com/

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f25966b3a1fc..d4b0f7b55cce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6016,7 +6016,7 @@ static void rb_update_meta_page(struct ring_buffer_pe=
r_cpu *cpu_buffer)
 	meta->read =3D cpu_buffer->read;
=20
 	/* Some archs do not have data cache coherency between kernel and user-sp=
ace */
-	flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page));
+	flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE);
 }
=20
 static void
@@ -7319,7 +7319,8 @@ int ring_buffer_map_get_reader(struct trace_buffer *b=
uffer, int cpu)
=20
 out:
 	/* Some archs do not have data cache coherency between kernel and user-sp=
ace */
-	flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page));
+	flush_kernel_vmap_range(cpu_buffer->reader_page->page,
+				buffer->subbuf_size + BUF_PAGE_HDR_SIZE);
=20
 	rb_update_meta_page(cpu_buffer);
=20
--=20
2.47.2
From nobody Wed May  7 21:15:26 2025
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id CF319214221;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743539550; cv=none;
 b=TaXtyx4HU6N171CHkg5KNF1eRiJfPV6VCF90nDPXLnk5DPtdRd1pnVMqvdBpZuUBzkjA4EJw6/qzCSQ2tO5mQhULTWyuQSQoU1KDuln4b3FRqZE8BbD8N46h4iqcsKkE2AkC5yVFagAB5hKEWRqmyoV/LhGDzzMii1Uei7b9hCo=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743539550; c=relaxed/simple;
	bh=Gp13WAunse4VyJM266TXO63VppQOLLyoxgk8UnWLdwU=;
	h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version:
	 Content-Type;
 b=bNByDEs1OSwv1/71huR0jAsDIEu3AMD1brlomXgPKAN4Aq7Pnz065OpbQMZt2kfJXMdWltEPqVk4majIDLZaPtse0C/GQVTENignWsgKKQjmh3eECUayzP6aWy+bHoErRC2trXMcca4Vb2na9Rv7zqWLVTzS4GHqsgvk8bCaUVs=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
Received: by smtp.kernel.org (Postfix) with ESMTPSA id B5F47C4CEE4;
	Tue,  1 Apr 2025 20:32:30 +0000 (UTC)
Received: from rostedt by gandalf with local (Exim 4.98)
	(envelope-from <rostedt@goodmis.org>)
	id 1tziIe-00000006IfI-1fDZ;
	Tue, 01 Apr 2025 16:33:32 -0400
Message-ID: <20250401203332.246646011@goodmis.org>
User-Agent: quilt/0.68
Date: Tue, 01 Apr 2025 16:25:54 -0400
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
 linux-trace-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
 Masami Hiramatsu <mhiramat@kernel.org>,
 Mark Rutland <mark.rutland@arm.com>,
 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
 Andrew Morton <akpm@linux-foundation.org>,
 Vincent Donnefort <vdonnefort@google.com>,
 Vlastimil Babka <vbabka@suse.cz>,
 Mike Rapoport <rppt@kernel.org>,
 Jann Horn <jannh@google.com>
Subject: [PATCH v3 5/5] ring-buffer: Allow reserve_mem persistent ring buffers
 to be mmapped
References: <20250401202549.409271454@goodmis.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Steven Rostedt <rostedt@goodmis.org>

When the persistent ring buffer is created from the memory returned by
reserve_mem there is nothing prohibiting it to be memory mapped to user
space. The memory is the same as the pages allocated by alloc_page().

The way the memory is managed by the ring buffer code is slightly
different though and needs to be addressed.

The persistent memory uses the page->id for its own purpose where as the
user mmap buffer currently uses that for the subbuf array mapped to user
space. If the buffer is a persistent buffer, use the page index into that
buffer as the identifier instead of the page->id.

That is, the page->id for a persistent buffer, represents the order of the
buffer is in the link list. ->id =3D=3D 0 means it is the reader page.
When a reader page is swapped, the new reader page's ->id gets zero, and
the old reader page gets the ->id of the page that it swapped with.

The user space mapping has the ->id is the index of where it was mapped in
user space and does not change while it is mapped.

Since the persistent buffer is fixed in its location, the index of where
a page is in the memory range can be used as the "id" to put in the meta
page array, and it can be mapped in the same order to user space as it is
in the persistent memory.

A new rb_page_id() helper function is used to get and set the id depending
on if the page is a normal memory allocated buffer or a physical memory
mapped buffer.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 49 ++++++++++++++++++++++++++++++++++----
 kernel/trace/trace.c       |  4 ----
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4b0f7b55cce..f7a10f754066 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6000,6 +6000,39 @@ static void rb_clear_buffer_page(struct buffer_page =
*page)
 	page->read =3D 0;
 }
=20
+/*
+ * When the buffer is memory mapped to user space, each sub buffer
+ * has a unique id that is used by the meta data to tell the user
+ * where the current reader page is.
+ *
+ * For a normal allocated ring buffer, the id is saved in the buffer page
+ * id field, and updated via this function.
+ *
+ * But for a fixed memory mapped buffer, the id is already assigned for
+ * fixed memory ording in the memory layout and can not be used. Instead
+ * the index of where the page lies in the memory layout is used.
+ *
+ * For the normal pages, set the buffer page id with the passed in @id
+ * value and return that.
+ *
+ * For fixed memory mapped pages, get the page index in the memory layout
+ * and return that as the id.
+ */
+static int rb_page_id(struct ring_buffer_per_cpu *cpu_buffer,
+		      struct buffer_page *bpage, int id)
+{
+	/*
+	 * For boot buffers, the id is the index,
+	 * otherwise, set the buffer page with this id
+	 */
+	if (cpu_buffer->ring_meta)
+		id =3D rb_meta_subbuf_idx(cpu_buffer->ring_meta, bpage->page);
+	else
+		bpage->id =3D id;
+
+	return id;
+}
+
 static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct trace_buffer_meta *meta =3D cpu_buffer->meta_page;
@@ -6008,7 +6041,9 @@ static void rb_update_meta_page(struct ring_buffer_pe=
r_cpu *cpu_buffer)
 		return;
=20
 	meta->reader.read =3D cpu_buffer->reader_page->read;
-	meta->reader.id =3D cpu_buffer->reader_page->id;
+	meta->reader.id =3D rb_page_id(cpu_buffer, cpu_buffer->reader_page,
+				     cpu_buffer->reader_page->id);
+
 	meta->reader.lost_events =3D cpu_buffer->lost_events;
=20
 	meta->entries =3D local_read(&cpu_buffer->entries);
@@ -6924,23 +6959,29 @@ static void rb_setup_ids_meta_page(struct ring_buff=
er_per_cpu *cpu_buffer,
 	struct trace_buffer_meta *meta =3D cpu_buffer->meta_page;
 	unsigned int nr_subbufs =3D cpu_buffer->nr_pages + 1;
 	struct buffer_page *first_subbuf, *subbuf;
+	int cnt =3D 0;
 	int id =3D 0;
=20
-	subbuf_ids[id] =3D (unsigned long)cpu_buffer->reader_page->page;
-	cpu_buffer->reader_page->id =3D id++;
+	id =3D rb_page_id(cpu_buffer, cpu_buffer->reader_page, id);
+	subbuf_ids[id++] =3D (unsigned long)cpu_buffer->reader_page->page;
+	cnt++;
=20
 	first_subbuf =3D subbuf =3D rb_set_head_page(cpu_buffer);
 	do {
+		id =3D rb_page_id(cpu_buffer, subbuf, id);
+
 		if (WARN_ON(id >=3D nr_subbufs))
 			break;
=20
 		subbuf_ids[id] =3D (unsigned long)subbuf->page;
-		subbuf->id =3D id;
=20
 		rb_inc_page(&subbuf);
 		id++;
+		cnt++;
 	} while (subbuf !=3D first_subbuf);
=20
+	WARN_ON(cnt !=3D nr_subbufs);
+
 	/* install subbuf ID to kern VA translation */
 	cpu_buffer->subbuf_ids =3D subbuf_ids;
=20
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1d7d2b772a74..5e7f8113c024 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8497,10 +8497,6 @@ static int tracing_buffers_mmap(struct file *filp, s=
truct vm_area_struct *vma)
 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
 		return -ENODEV;
=20
-	/* Currently the boot mapped buffer is not supported for mmap */
-	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
-		return -ENODEV;
-
 	ret =3D get_snapshot_map(iter->tr);
 	if (ret)
 		return ret;
--=20
2.47.2