[PATCH bpf-next RFC 1/2] perf: Use extern perf_callchain_entry for get_perf_callchain

Tao Chen posted 2 patches 2 months ago
There is a newer version of this series
[PATCH bpf-next RFC 1/2] perf: Use extern perf_callchain_entry for get_perf_callchain
Posted by Tao Chen 2 months ago
From bpf stack map, we want to use our own buffers to avoid unnecessary copy,
so let us pass it directly.

Signed-off-by: Tao Chen <chen.dylane@linux.dev>
---
 include/linux/perf_event.h |  5 +++--
 kernel/bpf/stackmap.c      |  4 ++--
 kernel/events/callchain.c  | 18 ++++++++++++------
 kernel/events/core.c       |  2 +-
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ec9d9602568..ca69ad2723c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1719,8 +1719,9 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
 extern struct perf_callchain_entry *
-get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark);
+get_perf_callchain(struct pt_regs *regs, struct perf_callchain_entry *external_entry,
+		   u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask,
+		   bool add_mark);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 2e182a3ac4c..e6e40f22826 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -314,7 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	if (max_depth > sysctl_perf_event_max_stack)
 		max_depth = sysctl_perf_event_max_stack;
 
-	trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+	trace = get_perf_callchain(regs, NULL, 0, kernel, user, max_depth,
 				   false, false);
 
 	if (unlikely(!trace))
@@ -451,7 +451,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 	else if (kernel && task)
 		trace = get_callchain_entry_for_task(task, max_depth);
 	else
-		trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+		trace = get_perf_callchain(regs, NULL, 0, kernel, user, max_depth,
 					   crosstask, false);
 
 	if (unlikely(!trace) || trace->nr < skip) {
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 6c83ad674d0..fe5d2d58deb 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -217,16 +217,21 @@ static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr
 }
 
 struct perf_callchain_entry *
-get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
-		   u32 max_stack, bool crosstask, bool add_mark)
+get_perf_callchain(struct pt_regs *regs, struct perf_callchain_entry *external_entry,
+		   u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask,
+		   bool add_mark)
 {
 	struct perf_callchain_entry *entry;
 	struct perf_callchain_entry_ctx ctx;
 	int rctx, start_entry_idx;
 
-	entry = get_callchain_entry(&rctx);
-	if (!entry)
-		return NULL;
+	if (external_entry) {
+		entry = external_entry;
+	} else {
+		entry = get_callchain_entry(&rctx);
+		if (!entry)
+			return NULL;
+	}
 
 	ctx.entry     = entry;
 	ctx.max_stack = max_stack;
@@ -262,7 +267,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 	}
 
 exit_put:
-	put_callchain_entry(rctx);
+	if (!external_entry)
+		put_callchain_entry(rctx);
 
 	return entry;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1d354778dcd..08ce44db18f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8204,7 +8204,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	if (!kernel && !user)
 		return &__empty_callchain;
 
-	callchain = get_perf_callchain(regs, 0, kernel, user,
+	callchain = get_perf_callchain(regs, NULL, 0, kernel, user,
 				       max_stack, crosstask, true);
 	return callchain ?: &__empty_callchain;
 }
-- 
2.48.1