[PATCH v6 15/22] perf lock: Move common lock contention code to new file

Ian Rogers posted 22 patches 2 weeks ago
There is a newer version of this series
[PATCH v6 15/22] perf lock: Move common lock contention code to new file
Posted by Ian Rogers 2 weeks ago
Avoid references from util code to builtin-lock that require python
stubs. Move the functions and related variables to
util/lock-contention.c. Add max_stack_depth parameter to
match_callstack_filter to avoid sharing a global variable.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c             | 137 +--------------------
 tools/perf/util/Build                 |   1 +
 tools/perf/util/bpf_lock_contention.c |   2 +-
 tools/perf/util/lock-contention.c     | 170 ++++++++++++++++++++++++++
 tools/perf/util/lock-contention.h     |  37 ++----
 tools/perf/util/python.c              |  17 ---
 6 files changed, 185 insertions(+), 179 deletions(-)
 create mode 100644 tools/perf/util/lock-contention.c

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 062e2b56a2ab..f66948b1fbed 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -46,15 +46,6 @@
 static struct perf_session *session;
 static struct target target;
 
-/* based on kernel/lockdep.c */
-#define LOCKHASH_BITS		12
-#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
-
-static struct hlist_head *lockhash_table;
-
-#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
-#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
-
 static struct rb_root		thread_stats;
 
 static bool combine_locks;
@@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
 static int max_stack_depth = CONTENTION_STACK_DEPTH;
 static int stack_skip = CONTENTION_STACK_SKIP;
 static int print_nr_entries = INT_MAX / 2;
-static LIST_HEAD(callstack_filters);
 static const char *output_name = NULL;
 static FILE *lock_output;
 
-struct callstack_filter {
-	struct list_head list;
-	char name[];
-};
-
 static struct lock_filter filters;
 
 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
 
-static bool needs_callstack(void)
-{
-	return !list_empty(&callstack_filters);
-}
-
 static struct thread_stat *thread_stat_find(u32 tid)
 {
 	struct rb_node *node;
@@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
 	return container_of(node, struct lock_stat, rb);
 }
 
-struct lock_stat *lock_stat_find(u64 addr)
-{
-	struct hlist_head *entry = lockhashentry(addr);
-	struct lock_stat *ret;
-
-	hlist_for_each_entry(ret, entry, hash_entry) {
-		if (ret->addr == addr)
-			return ret;
-	}
-	return NULL;
-}
-
-struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
-{
-	struct hlist_head *entry = lockhashentry(addr);
-	struct lock_stat *ret, *new;
-
-	hlist_for_each_entry(ret, entry, hash_entry) {
-		if (ret->addr == addr)
-			return ret;
-	}
-
-	new = zalloc(sizeof(struct lock_stat));
-	if (!new)
-		goto alloc_failed;
-
-	new->addr = addr;
-	new->name = strdup(name);
-	if (!new->name) {
-		free(new);
-		goto alloc_failed;
-	}
-
-	new->flags = flags;
-	new->wait_time_min = ULLONG_MAX;
-
-	hlist_add_head(&new->hash_entry, entry);
-	return new;
-
-alloc_failed:
-	pr_err("memory allocation failed\n");
-	return NULL;
-}
-
-bool match_callstack_filter(struct machine *machine, u64 *callstack)
-{
-	struct map *kmap;
-	struct symbol *sym;
-	u64 ip;
-	const char *arch = perf_env__arch(machine->env);
-
-	if (list_empty(&callstack_filters))
-		return true;
-
-	for (int i = 0; i < max_stack_depth; i++) {
-		struct callstack_filter *filter;
-
-		/*
-		 * In powerpc, the callchain saved by kernel always includes
-		 * first three entries as the NIP (next instruction pointer),
-		 * LR (link register), and the contents of LR save area in the
-		 * second stack frame. In certain scenarios its possible to have
-		 * invalid kernel instruction addresses in either LR or the second
-		 * stack frame's LR. In that case, kernel will store that address as
-		 * zero.
-		 *
-		 * The below check will continue to look into callstack,
-		 * incase first or second callstack index entry has 0
-		 * address for powerpc.
-		 */
-		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
-						(i != 1 && i != 2))))
-			break;
-
-		ip = callstack[i];
-		sym = machine__find_kernel_symbol(machine, ip, &kmap);
-		if (sym == NULL)
-			continue;
-
-		list_for_each_entry(filter, &callstack_filters, list) {
-			if (strstr(sym->name, filter->name))
-				return true;
-		}
-	}
-	return false;
-}
-
 struct trace_lock_handler {
 	/* it's used on CONFIG_LOCKDEP */
 	int (*acquire_event)(struct evsel *evsel,
@@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
 		if (callstack == NULL)
 			return -ENOMEM;
 
-		if (!match_callstack_filter(machine, callstack)) {
+		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
 			free(callstack);
 			return 0;
 		}
@@ -2449,34 +2342,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
 	return ret;
 }
 
-static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
-			   int unset __maybe_unused)
-{
-	char *s, *tmp, *tok;
-	int ret = 0;
-
-	s = strdup(str);
-	if (s == NULL)
-		return -1;
-
-	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		struct callstack_filter *entry;
-
-		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
-		if (entry == NULL) {
-			pr_err("Memory allocation failure\n");
-			free(s);
-			return -1;
-		}
-
-		strcpy(entry->name, tok);
-		list_add_tail(&entry->list, &callstack_filters);
-	}
-
-	free(s);
-	return ret;
-}
-
 static int parse_output(const struct option *opt __maybe_unused, const char *str,
 			int unset __maybe_unused)
 {
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 340544a6f5ec..3c6cd8d81d88 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -121,6 +121,7 @@ perf-util-y += topdown.o
 perf-util-y += iostat.o
 perf-util-y += stream.o
 perf-util-y += kvm-stat.o
+perf-util-y += lock-contention.o
 perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
 perf-util-y += intel-pt-decoder/
 perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 41a1ad087895..37e17c56f106 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -458,7 +458,7 @@ int lock_contention_read(struct lock_contention *con)
 		if (con->save_callstack) {
 			bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
 
-			if (!match_callstack_filter(machine, stack_trace)) {
+			if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
 				con->nr_filtered += data.count;
 				goto next;
 			}
diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
new file mode 100644
index 000000000000..841bb18b1f06
--- /dev/null
+++ b/tools/perf/util/lock-contention.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "env.h"
+#include "lock-contention.h"
+#include "machine.h"
+#include "symbol.h"
+
+#include <limits.h>
+#include <string.h>
+
+#include <linux/hash.h>
+#include <linux/zalloc.h>
+
+#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
+
+struct callstack_filter {
+	struct list_head list;
+	char name[];
+};
+
+static LIST_HEAD(callstack_filters);
+struct hlist_head *lockhash_table;
+
+int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
+		     int unset __maybe_unused)
+{
+	char *s, *tmp, *tok;
+	int ret = 0;
+
+	s = strdup(str);
+	if (s == NULL)
+		return -1;
+
+	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		struct callstack_filter *entry;
+
+		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
+		if (entry == NULL) {
+			pr_err("Memory allocation failure\n");
+			free(s);
+			return -1;
+		}
+
+		strcpy(entry->name, tok);
+		list_add_tail(&entry->list, &callstack_filters);
+	}
+
+	free(s);
+	return ret;
+}
+
+bool needs_callstack(void)
+{
+	return !list_empty(&callstack_filters);
+}
+
+struct lock_stat *lock_stat_find(u64 addr)
+{
+	struct hlist_head *entry = lockhashentry(addr);
+	struct lock_stat *ret;
+
+	hlist_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+	return NULL;
+}
+
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
+{
+	struct hlist_head *entry = lockhashentry(addr);
+	struct lock_stat *ret, *new;
+
+	hlist_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+
+	new = zalloc(sizeof(struct lock_stat));
+	if (!new)
+		goto alloc_failed;
+
+	new->addr = addr;
+	new->name = strdup(name);
+	if (!new->name) {
+		free(new);
+		goto alloc_failed;
+	}
+
+	new->flags = flags;
+	new->wait_time_min = ULLONG_MAX;
+
+	hlist_add_head(&new->hash_entry, entry);
+	return new;
+
+alloc_failed:
+	pr_err("memory allocation failed\n");
+	return NULL;
+}
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
+{
+	struct map *kmap;
+	struct symbol *sym;
+	u64 ip;
+	const char *arch = perf_env__arch(machine->env);
+
+	if (list_empty(&callstack_filters))
+		return true;
+
+	for (int i = 0; i < max_stack_depth; i++) {
+		struct callstack_filter *filter;
+
+		/*
+		 * In powerpc, the callchain saved by kernel always includes
+		 * first three entries as the NIP (next instruction pointer),
+		 * LR (link register), and the contents of LR save area in the
+		 * second stack frame. In certain scenarios its possible to have
+		 * invalid kernel instruction addresses in either LR or the second
+		 * stack frame's LR. In that case, kernel will store that address as
+		 * zero.
+		 *
+		 * The below check will continue to look into callstack,
+		 * incase first or second callstack index entry has 0
+		 * address for powerpc.
+		 */
+		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
+						(i != 1 && i != 2))))
+			break;
+
+		ip = callstack[i];
+		sym = machine__find_kernel_symbol(machine, ip, &kmap);
+		if (sym == NULL)
+			continue;
+
+		list_for_each_entry(filter, &callstack_filters, list) {
+			if (strstr(sym->name, filter->name))
+				return true;
+		}
+	}
+	return false;
+}
+
+#ifndef HAVE_BPF_SKEL
+int lock_contention_prepare(struct lock_contention *con __maybe_unused)
+{
+	return 0;
+}
+
+int lock_contention_start(void)
+{
+	return 0;
+}
+
+int lock_contention_stop(void)
+{
+	return 0;
+}
+
+int lock_contention_finish(struct lock_contention *con __maybe_unused)
+{
+	return 0;
+}
+
+int lock_contention_read(struct lock_contention *con __maybe_unused)
+{
+	return 0;
+}
+#endif  /* !HAVE_BPF_SKEL */
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 1a7248ff3889..bfa5c7db0a5d 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -67,10 +67,11 @@ struct lock_stat {
  */
 #define MAX_LOCK_DEPTH 48
 
-struct lock_stat *lock_stat_find(u64 addr);
-struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS		12
+#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
 
-bool match_callstack_filter(struct machine *machine, u64 *callstack);
+extern struct hlist_head *lockhash_table;
 
 /*
  * struct lock_seq_stat:
@@ -148,7 +149,14 @@ struct lock_contention {
 	bool save_callstack;
 };
 
-#ifdef HAVE_BPF_SKEL
+struct option;
+int parse_call_stack(const struct option *opt, const char *str, int unset);
+bool needs_callstack(void);
+
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
 
 int lock_contention_prepare(struct lock_contention *con);
 int lock_contention_start(void);
@@ -156,25 +164,4 @@ int lock_contention_stop(void);
 int lock_contention_read(struct lock_contention *con);
 int lock_contention_finish(struct lock_contention *con);
 
-#else  /* !HAVE_BPF_SKEL */
-
-static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
-{
-	return 0;
-}
-
-static inline int lock_contention_start(void) { return 0; }
-static inline int lock_contention_stop(void) { return 0; }
-static inline int lock_contention_finish(struct lock_contention *con __maybe_unused)
-{
-	return 0;
-}
-
-static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
-{
-	return 0;
-}
-
-#endif  /* HAVE_BPF_SKEL */
-
 #endif  /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 35d84a96dbec..91fd444615cd 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -18,7 +18,6 @@
 #include "mmap.h"
 #include "util/kwork.h"
 #include "util/sample.h"
-#include "util/lock-contention.h"
 #include <internal/lib.h>
 #include "../builtin.h"
 
@@ -1311,22 +1310,6 @@ struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
 	return NULL;
 }
 
-bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
-{
-	return false;
-}
-
-struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
-{
-	return NULL;
-}
-
-struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
-				int flags __maybe_unused)
-{
-	return NULL;
-}
-
 int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
 {
 	return -1;
-- 
2.47.0.277.g8800431eea-goog
Re: [PATCH v6 15/22] perf lock: Move common lock contention code to new file
Posted by Namhyung Kim 4 days, 21 hours ago
On Fri, Nov 08, 2024 at 10:18:02PM -0800, Ian Rogers wrote:
> Avoid references from util code to builtin-lock that require python
> stubs. Move the functions and related variables to
> util/lock-contention.c. Add max_stack_depth parameter to
> match_callstack_filter to avoid sharing a global variable.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ---
>  tools/perf/builtin-lock.c             | 137 +--------------------
>  tools/perf/util/Build                 |   1 +
>  tools/perf/util/bpf_lock_contention.c |   2 +-
>  tools/perf/util/lock-contention.c     | 170 ++++++++++++++++++++++++++
>  tools/perf/util/lock-contention.h     |  37 ++----
>  tools/perf/util/python.c              |  17 ---
>  6 files changed, 185 insertions(+), 179 deletions(-)
>  create mode 100644 tools/perf/util/lock-contention.c
> 
> diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> index 062e2b56a2ab..f66948b1fbed 100644
> --- a/tools/perf/builtin-lock.c
> +++ b/tools/perf/builtin-lock.c
> @@ -46,15 +46,6 @@
>  static struct perf_session *session;
>  static struct target target;
>  
> -/* based on kernel/lockdep.c */
> -#define LOCKHASH_BITS		12
> -#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
> -
> -static struct hlist_head *lockhash_table;
> -
> -#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
> -#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
> -
>  static struct rb_root		thread_stats;
>  
>  static bool combine_locks;
> @@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
>  static int max_stack_depth = CONTENTION_STACK_DEPTH;
>  static int stack_skip = CONTENTION_STACK_SKIP;
>  static int print_nr_entries = INT_MAX / 2;
> -static LIST_HEAD(callstack_filters);
>  static const char *output_name = NULL;
>  static FILE *lock_output;
>  
> -struct callstack_filter {
> -	struct list_head list;
> -	char name[];
> -};
> -
>  static struct lock_filter filters;
>  
>  static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
>  
> -static bool needs_callstack(void)
> -{
> -	return !list_empty(&callstack_filters);
> -}
> -
>  static struct thread_stat *thread_stat_find(u32 tid)
>  {
>  	struct rb_node *node;
> @@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
>  	return container_of(node, struct lock_stat, rb);
>  }
>  
> -struct lock_stat *lock_stat_find(u64 addr)
> -{
> -	struct hlist_head *entry = lockhashentry(addr);
> -	struct lock_stat *ret;
> -
> -	hlist_for_each_entry(ret, entry, hash_entry) {
> -		if (ret->addr == addr)
> -			return ret;
> -	}
> -	return NULL;
> -}
> -
> -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> -{
> -	struct hlist_head *entry = lockhashentry(addr);
> -	struct lock_stat *ret, *new;
> -
> -	hlist_for_each_entry(ret, entry, hash_entry) {
> -		if (ret->addr == addr)
> -			return ret;
> -	}
> -
> -	new = zalloc(sizeof(struct lock_stat));
> -	if (!new)
> -		goto alloc_failed;
> -
> -	new->addr = addr;
> -	new->name = strdup(name);
> -	if (!new->name) {
> -		free(new);
> -		goto alloc_failed;
> -	}
> -
> -	new->flags = flags;
> -	new->wait_time_min = ULLONG_MAX;
> -
> -	hlist_add_head(&new->hash_entry, entry);
> -	return new;
> -
> -alloc_failed:
> -	pr_err("memory allocation failed\n");
> -	return NULL;
> -}
> -
> -bool match_callstack_filter(struct machine *machine, u64 *callstack)
> -{
> -	struct map *kmap;
> -	struct symbol *sym;
> -	u64 ip;
> -	const char *arch = perf_env__arch(machine->env);
> -
> -	if (list_empty(&callstack_filters))
> -		return true;
> -
> -	for (int i = 0; i < max_stack_depth; i++) {
> -		struct callstack_filter *filter;
> -
> -		/*
> -		 * In powerpc, the callchain saved by kernel always includes
> -		 * first three entries as the NIP (next instruction pointer),
> -		 * LR (link register), and the contents of LR save area in the
> -		 * second stack frame. In certain scenarios its possible to have
> -		 * invalid kernel instruction addresses in either LR or the second
> -		 * stack frame's LR. In that case, kernel will store that address as
> -		 * zero.
> -		 *
> -		 * The below check will continue to look into callstack,
> -		 * incase first or second callstack index entry has 0
> -		 * address for powerpc.
> -		 */
> -		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> -						(i != 1 && i != 2))))
> -			break;
> -
> -		ip = callstack[i];
> -		sym = machine__find_kernel_symbol(machine, ip, &kmap);
> -		if (sym == NULL)
> -			continue;
> -
> -		list_for_each_entry(filter, &callstack_filters, list) {
> -			if (strstr(sym->name, filter->name))
> -				return true;
> -		}
> -	}
> -	return false;
> -}
> -
>  struct trace_lock_handler {
>  	/* it's used on CONFIG_LOCKDEP */
>  	int (*acquire_event)(struct evsel *evsel,
> @@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
>  		if (callstack == NULL)
>  			return -ENOMEM;
>  
> -		if (!match_callstack_filter(machine, callstack)) {
> +		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
>  			free(callstack);
>  			return 0;
>  		}
> @@ -2449,34 +2342,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
>  	return ret;
>  }
>  
> -static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> -			   int unset __maybe_unused)
> -{
> -	char *s, *tmp, *tok;
> -	int ret = 0;
> -
> -	s = strdup(str);
> -	if (s == NULL)
> -		return -1;
> -
> -	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> -		struct callstack_filter *entry;
> -
> -		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> -		if (entry == NULL) {
> -			pr_err("Memory allocation failure\n");
> -			free(s);
> -			return -1;
> -		}
> -
> -		strcpy(entry->name, tok);
> -		list_add_tail(&entry->list, &callstack_filters);
> -	}
> -
> -	free(s);
> -	return ret;
> -}
> -
>  static int parse_output(const struct option *opt __maybe_unused, const char *str,
>  			int unset __maybe_unused)
>  {
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 340544a6f5ec..3c6cd8d81d88 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -121,6 +121,7 @@ perf-util-y += topdown.o
>  perf-util-y += iostat.o
>  perf-util-y += stream.o
>  perf-util-y += kvm-stat.o
> +perf-util-y += lock-contention.o
>  perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
>  perf-util-y += intel-pt-decoder/
>  perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index 41a1ad087895..37e17c56f106 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -458,7 +458,7 @@ int lock_contention_read(struct lock_contention *con)
>  		if (con->save_callstack) {
>  			bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
>  
> -			if (!match_callstack_filter(machine, stack_trace)) {
> +			if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
>  				con->nr_filtered += data.count;
>  				goto next;
>  			}
> diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
> new file mode 100644
> index 000000000000..841bb18b1f06
> --- /dev/null
> +++ b/tools/perf/util/lock-contention.c
> @@ -0,0 +1,170 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include "debug.h"
> +#include "env.h"
> +#include "lock-contention.h"
> +#include "machine.h"
> +#include "symbol.h"
> +
> +#include <limits.h>
> +#include <string.h>
> +
> +#include <linux/hash.h>
> +#include <linux/zalloc.h>
> +
> +#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
> +#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
> +
> +struct callstack_filter {
> +	struct list_head list;
> +	char name[];
> +};
> +
> +static LIST_HEAD(callstack_filters);
> +struct hlist_head *lockhash_table;
> +
> +int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> +		     int unset __maybe_unused)
> +{
> +	char *s, *tmp, *tok;
> +	int ret = 0;
> +
> +	s = strdup(str);
> +	if (s == NULL)
> +		return -1;
> +
> +	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> +		struct callstack_filter *entry;
> +
> +		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> +		if (entry == NULL) {
> +			pr_err("Memory allocation failure\n");
> +			free(s);
> +			return -1;
> +		}
> +
> +		strcpy(entry->name, tok);
> +		list_add_tail(&entry->list, &callstack_filters);
> +	}
> +
> +	free(s);
> +	return ret;
> +}
> +
> +bool needs_callstack(void)
> +{
> +	return !list_empty(&callstack_filters);
> +}
> +
> +struct lock_stat *lock_stat_find(u64 addr)
> +{
> +	struct hlist_head *entry = lockhashentry(addr);
> +	struct lock_stat *ret;
> +
> +	hlist_for_each_entry(ret, entry, hash_entry) {
> +		if (ret->addr == addr)
> +			return ret;
> +	}
> +	return NULL;
> +}
> +
> +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> +{
> +	struct hlist_head *entry = lockhashentry(addr);
> +	struct lock_stat *ret, *new;
> +
> +	hlist_for_each_entry(ret, entry, hash_entry) {
> +		if (ret->addr == addr)
> +			return ret;
> +	}
> +
> +	new = zalloc(sizeof(struct lock_stat));
> +	if (!new)
> +		goto alloc_failed;
> +
> +	new->addr = addr;
> +	new->name = strdup(name);
> +	if (!new->name) {
> +		free(new);
> +		goto alloc_failed;
> +	}
> +
> +	new->flags = flags;
> +	new->wait_time_min = ULLONG_MAX;
> +
> +	hlist_add_head(&new->hash_entry, entry);
> +	return new;
> +
> +alloc_failed:
> +	pr_err("memory allocation failed\n");
> +	return NULL;
> +}
> +
> +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
> +{
> +	struct map *kmap;
> +	struct symbol *sym;
> +	u64 ip;
> +	const char *arch = perf_env__arch(machine->env);
> +
> +	if (list_empty(&callstack_filters))
> +		return true;
> +
> +	for (int i = 0; i < max_stack_depth; i++) {
> +		struct callstack_filter *filter;
> +
> +		/*
> +		 * In powerpc, the callchain saved by kernel always includes
> +		 * first three entries as the NIP (next instruction pointer),
> +		 * LR (link register), and the contents of LR save area in the
> +		 * second stack frame. In certain scenarios its possible to have
> +		 * invalid kernel instruction addresses in either LR or the second
> +		 * stack frame's LR. In that case, kernel will store that address as
> +		 * zero.
> +		 *
> +		 * The below check will continue to look into callstack,
> +		 * incase first or second callstack index entry has 0
> +		 * address for powerpc.
> +		 */
> +		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> +						(i != 1 && i != 2))))
> +			break;
> +
> +		ip = callstack[i];
> +		sym = machine__find_kernel_symbol(machine, ip, &kmap);
> +		if (sym == NULL)
> +			continue;
> +
> +		list_for_each_entry(filter, &callstack_filters, list) {
> +			if (strstr(sym->name, filter->name))
> +				return true;
> +		}
> +	}
> +	return false;
> +}
> +
> +#ifndef HAVE_BPF_SKEL
> +int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> +{
> +	return 0;
> +}
> +
> +int lock_contention_start(void)
> +{
> +	return 0;
> +}
> +
> +int lock_contention_stop(void)
> +{
> +	return 0;
> +}
> +
> +int lock_contention_finish(struct lock_contention *con __maybe_unused)
> +{
> +	return 0;
> +}
> +
> +int lock_contention_read(struct lock_contention *con __maybe_unused)
> +{
> +	return 0;
> +}
> +#endif  /* !HAVE_BPF_SKEL */

I still think it's the convention to have them in a header file as
static inline functions and reduce the #ifdef in the .c file.

Thanks,
Namhyung


> diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
> index 1a7248ff3889..bfa5c7db0a5d 100644
> --- a/tools/perf/util/lock-contention.h
> +++ b/tools/perf/util/lock-contention.h
> @@ -67,10 +67,11 @@ struct lock_stat {
>   */
>  #define MAX_LOCK_DEPTH 48
>  
> -struct lock_stat *lock_stat_find(u64 addr);
> -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> +/* based on kernel/lockdep.c */
> +#define LOCKHASH_BITS		12
> +#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
>  
> -bool match_callstack_filter(struct machine *machine, u64 *callstack);
> +extern struct hlist_head *lockhash_table;
>  
>  /*
>   * struct lock_seq_stat:
> @@ -148,7 +149,14 @@ struct lock_contention {
>  	bool save_callstack;
>  };
>  
> -#ifdef HAVE_BPF_SKEL
> +struct option;
> +int parse_call_stack(const struct option *opt, const char *str, int unset);
> +bool needs_callstack(void);
> +
> +struct lock_stat *lock_stat_find(u64 addr);
> +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> +
> +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
>  
>  int lock_contention_prepare(struct lock_contention *con);
>  int lock_contention_start(void);
> @@ -156,25 +164,4 @@ int lock_contention_stop(void);
>  int lock_contention_read(struct lock_contention *con);
>  int lock_contention_finish(struct lock_contention *con);
>  
> -#else  /* !HAVE_BPF_SKEL */
> -
> -static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> -{
> -	return 0;
> -}
> -
> -static inline int lock_contention_start(void) { return 0; }
> -static inline int lock_contention_stop(void) { return 0; }
> -static inline int lock_contention_finish(struct lock_contention *con __maybe_unused)
> -{
> -	return 0;
> -}
> -
> -static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
> -{
> -	return 0;
> -}
> -
> -#endif  /* HAVE_BPF_SKEL */
> -
>  #endif  /* PERF_LOCK_CONTENTION_H */
> diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
> index 35d84a96dbec..91fd444615cd 100644
> --- a/tools/perf/util/python.c
> +++ b/tools/perf/util/python.c
> @@ -18,7 +18,6 @@
>  #include "mmap.h"
>  #include "util/kwork.h"
>  #include "util/sample.h"
> -#include "util/lock-contention.h"
>  #include <internal/lib.h>
>  #include "../builtin.h"
>  
> @@ -1311,22 +1310,6 @@ struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
>  	return NULL;
>  }
>  
> -bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
> -{
> -	return false;
> -}
> -
> -struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
> -{
> -	return NULL;
> -}
> -
> -struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
> -				int flags __maybe_unused)
> -{
> -	return NULL;
> -}
> -
>  int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
>  {
>  	return -1;
> -- 
> 2.47.0.277.g8800431eea-goog
>
Re: [PATCH v6 15/22] perf lock: Move common lock contention code to new file
Posted by Ian Rogers 4 days, 21 hours ago
On Mon, Nov 18, 2024 at 4:23 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, Nov 08, 2024 at 10:18:02PM -0800, Ian Rogers wrote:
> > Avoid references from util code to builtin-lock that require python
> > stubs. Move the functions and related variables to
> > util/lock-contention.c. Add max_stack_depth parameter to
> > match_callstack_filter to avoid sharing a global variable.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > ---
> >  tools/perf/builtin-lock.c             | 137 +--------------------
> >  tools/perf/util/Build                 |   1 +
> >  tools/perf/util/bpf_lock_contention.c |   2 +-
> >  tools/perf/util/lock-contention.c     | 170 ++++++++++++++++++++++++++
> >  tools/perf/util/lock-contention.h     |  37 ++----
> >  tools/perf/util/python.c              |  17 ---
> >  6 files changed, 185 insertions(+), 179 deletions(-)
> >  create mode 100644 tools/perf/util/lock-contention.c
> >
> > diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> > index 062e2b56a2ab..f66948b1fbed 100644
> > --- a/tools/perf/builtin-lock.c
> > +++ b/tools/perf/builtin-lock.c
> > @@ -46,15 +46,6 @@
> >  static struct perf_session *session;
> >  static struct target target;
> >
> > -/* based on kernel/lockdep.c */
> > -#define LOCKHASH_BITS                12
> > -#define LOCKHASH_SIZE                (1UL << LOCKHASH_BITS)
> > -
> > -static struct hlist_head *lockhash_table;
> > -
> > -#define __lockhashfn(key)    hash_long((unsigned long)key, LOCKHASH_BITS)
> > -#define lockhashentry(key)   (lockhash_table + __lockhashfn((key)))
> > -
> >  static struct rb_root                thread_stats;
> >
> >  static bool combine_locks;
> > @@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
> >  static int max_stack_depth = CONTENTION_STACK_DEPTH;
> >  static int stack_skip = CONTENTION_STACK_SKIP;
> >  static int print_nr_entries = INT_MAX / 2;
> > -static LIST_HEAD(callstack_filters);
> >  static const char *output_name = NULL;
> >  static FILE *lock_output;
> >
> > -struct callstack_filter {
> > -     struct list_head list;
> > -     char name[];
> > -};
> > -
> >  static struct lock_filter filters;
> >
> >  static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
> >
> > -static bool needs_callstack(void)
> > -{
> > -     return !list_empty(&callstack_filters);
> > -}
> > -
> >  static struct thread_stat *thread_stat_find(u32 tid)
> >  {
> >       struct rb_node *node;
> > @@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
> >       return container_of(node, struct lock_stat, rb);
> >  }
> >
> > -struct lock_stat *lock_stat_find(u64 addr)
> > -{
> > -     struct hlist_head *entry = lockhashentry(addr);
> > -     struct lock_stat *ret;
> > -
> > -     hlist_for_each_entry(ret, entry, hash_entry) {
> > -             if (ret->addr == addr)
> > -                     return ret;
> > -     }
> > -     return NULL;
> > -}
> > -
> > -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> > -{
> > -     struct hlist_head *entry = lockhashentry(addr);
> > -     struct lock_stat *ret, *new;
> > -
> > -     hlist_for_each_entry(ret, entry, hash_entry) {
> > -             if (ret->addr == addr)
> > -                     return ret;
> > -     }
> > -
> > -     new = zalloc(sizeof(struct lock_stat));
> > -     if (!new)
> > -             goto alloc_failed;
> > -
> > -     new->addr = addr;
> > -     new->name = strdup(name);
> > -     if (!new->name) {
> > -             free(new);
> > -             goto alloc_failed;
> > -     }
> > -
> > -     new->flags = flags;
> > -     new->wait_time_min = ULLONG_MAX;
> > -
> > -     hlist_add_head(&new->hash_entry, entry);
> > -     return new;
> > -
> > -alloc_failed:
> > -     pr_err("memory allocation failed\n");
> > -     return NULL;
> > -}
> > -
> > -bool match_callstack_filter(struct machine *machine, u64 *callstack)
> > -{
> > -     struct map *kmap;
> > -     struct symbol *sym;
> > -     u64 ip;
> > -     const char *arch = perf_env__arch(machine->env);
> > -
> > -     if (list_empty(&callstack_filters))
> > -             return true;
> > -
> > -     for (int i = 0; i < max_stack_depth; i++) {
> > -             struct callstack_filter *filter;
> > -
> > -             /*
> > -              * In powerpc, the callchain saved by kernel always includes
> > -              * first three entries as the NIP (next instruction pointer),
> > -              * LR (link register), and the contents of LR save area in the
> > -              * second stack frame. In certain scenarios its possible to have
> > -              * invalid kernel instruction addresses in either LR or the second
> > -              * stack frame's LR. In that case, kernel will store that address as
> > -              * zero.
> > -              *
> > -              * The below check will continue to look into callstack,
> > -              * incase first or second callstack index entry has 0
> > -              * address for powerpc.
> > -              */
> > -             if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> > -                                             (i != 1 && i != 2))))
> > -                     break;
> > -
> > -             ip = callstack[i];
> > -             sym = machine__find_kernel_symbol(machine, ip, &kmap);
> > -             if (sym == NULL)
> > -                     continue;
> > -
> > -             list_for_each_entry(filter, &callstack_filters, list) {
> > -                     if (strstr(sym->name, filter->name))
> > -                             return true;
> > -             }
> > -     }
> > -     return false;
> > -}
> > -
> >  struct trace_lock_handler {
> >       /* it's used on CONFIG_LOCKDEP */
> >       int (*acquire_event)(struct evsel *evsel,
> > @@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
> >               if (callstack == NULL)
> >                       return -ENOMEM;
> >
> > -             if (!match_callstack_filter(machine, callstack)) {
> > +             if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
> >                       free(callstack);
> >                       return 0;
> >               }
> > @@ -2449,34 +2342,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
> >       return ret;
> >  }
> >
> > -static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> > -                        int unset __maybe_unused)
> > -{
> > -     char *s, *tmp, *tok;
> > -     int ret = 0;
> > -
> > -     s = strdup(str);
> > -     if (s == NULL)
> > -             return -1;
> > -
> > -     for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> > -             struct callstack_filter *entry;
> > -
> > -             entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> > -             if (entry == NULL) {
> > -                     pr_err("Memory allocation failure\n");
> > -                     free(s);
> > -                     return -1;
> > -             }
> > -
> > -             strcpy(entry->name, tok);
> > -             list_add_tail(&entry->list, &callstack_filters);
> > -     }
> > -
> > -     free(s);
> > -     return ret;
> > -}
> > -
> >  static int parse_output(const struct option *opt __maybe_unused, const char *str,
> >                       int unset __maybe_unused)
> >  {
> > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > index 340544a6f5ec..3c6cd8d81d88 100644
> > --- a/tools/perf/util/Build
> > +++ b/tools/perf/util/Build
> > @@ -121,6 +121,7 @@ perf-util-y += topdown.o
> >  perf-util-y += iostat.o
> >  perf-util-y += stream.o
> >  perf-util-y += kvm-stat.o
> > +perf-util-y += lock-contention.o
> >  perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
> >  perf-util-y += intel-pt-decoder/
> >  perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
> > diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> > index 41a1ad087895..37e17c56f106 100644
> > --- a/tools/perf/util/bpf_lock_contention.c
> > +++ b/tools/perf/util/bpf_lock_contention.c
> > @@ -458,7 +458,7 @@ int lock_contention_read(struct lock_contention *con)
> >               if (con->save_callstack) {
> >                       bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
> >
> > -                     if (!match_callstack_filter(machine, stack_trace)) {
> > +                     if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
> >                               con->nr_filtered += data.count;
> >                               goto next;
> >                       }
> > diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
> > new file mode 100644
> > index 000000000000..841bb18b1f06
> > --- /dev/null
> > +++ b/tools/perf/util/lock-contention.c
> > @@ -0,0 +1,170 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include "debug.h"
> > +#include "env.h"
> > +#include "lock-contention.h"
> > +#include "machine.h"
> > +#include "symbol.h"
> > +
> > +#include <limits.h>
> > +#include <string.h>
> > +
> > +#include <linux/hash.h>
> > +#include <linux/zalloc.h>
> > +
> > +#define __lockhashfn(key)    hash_long((unsigned long)key, LOCKHASH_BITS)
> > +#define lockhashentry(key)   (lockhash_table + __lockhashfn((key)))
> > +
> > +struct callstack_filter {
> > +     struct list_head list;
> > +     char name[];
> > +};
> > +
> > +static LIST_HEAD(callstack_filters);
> > +struct hlist_head *lockhash_table;
> > +
> > +int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> > +                  int unset __maybe_unused)
> > +{
> > +     char *s, *tmp, *tok;
> > +     int ret = 0;
> > +
> > +     s = strdup(str);
> > +     if (s == NULL)
> > +             return -1;
> > +
> > +     for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> > +             struct callstack_filter *entry;
> > +
> > +             entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> > +             if (entry == NULL) {
> > +                     pr_err("Memory allocation failure\n");
> > +                     free(s);
> > +                     return -1;
> > +             }
> > +
> > +             strcpy(entry->name, tok);
> > +             list_add_tail(&entry->list, &callstack_filters);
> > +     }
> > +
> > +     free(s);
> > +     return ret;
> > +}
> > +
> > +bool needs_callstack(void)
> > +{
> > +     return !list_empty(&callstack_filters);
> > +}
> > +
> > +struct lock_stat *lock_stat_find(u64 addr)
> > +{
> > +     struct hlist_head *entry = lockhashentry(addr);
> > +     struct lock_stat *ret;
> > +
> > +     hlist_for_each_entry(ret, entry, hash_entry) {
> > +             if (ret->addr == addr)
> > +                     return ret;
> > +     }
> > +     return NULL;
> > +}
> > +
> > +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> > +{
> > +     struct hlist_head *entry = lockhashentry(addr);
> > +     struct lock_stat *ret, *new;
> > +
> > +     hlist_for_each_entry(ret, entry, hash_entry) {
> > +             if (ret->addr == addr)
> > +                     return ret;
> > +     }
> > +
> > +     new = zalloc(sizeof(struct lock_stat));
> > +     if (!new)
> > +             goto alloc_failed;
> > +
> > +     new->addr = addr;
> > +     new->name = strdup(name);
> > +     if (!new->name) {
> > +             free(new);
> > +             goto alloc_failed;
> > +     }
> > +
> > +     new->flags = flags;
> > +     new->wait_time_min = ULLONG_MAX;
> > +
> > +     hlist_add_head(&new->hash_entry, entry);
> > +     return new;
> > +
> > +alloc_failed:
> > +     pr_err("memory allocation failed\n");
> > +     return NULL;
> > +}
> > +
> > +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
> > +{
> > +     struct map *kmap;
> > +     struct symbol *sym;
> > +     u64 ip;
> > +     const char *arch = perf_env__arch(machine->env);
> > +
> > +     if (list_empty(&callstack_filters))
> > +             return true;
> > +
> > +     for (int i = 0; i < max_stack_depth; i++) {
> > +             struct callstack_filter *filter;
> > +
> > +             /*
> > +              * In powerpc, the callchain saved by kernel always includes
> > +              * first three entries as the NIP (next instruction pointer),
> > +              * LR (link register), and the contents of LR save area in the
> > +              * second stack frame. In certain scenarios its possible to have
> > +              * invalid kernel instruction addresses in either LR or the second
> > +              * stack frame's LR. In that case, kernel will store that address as
> > +              * zero.
> > +              *
> > +              * The below check will continue to look into callstack,
> > +              * incase first or second callstack index entry has 0
> > +              * address for powerpc.
> > +              */
> > +             if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> > +                                             (i != 1 && i != 2))))
> > +                     break;
> > +
> > +             ip = callstack[i];
> > +             sym = machine__find_kernel_symbol(machine, ip, &kmap);
> > +             if (sym == NULL)
> > +                     continue;
> > +
> > +             list_for_each_entry(filter, &callstack_filters, list) {
> > +                     if (strstr(sym->name, filter->name))
> > +                             return true;
> > +             }
> > +     }
> > +     return false;
> > +}
> > +
> > +#ifndef HAVE_BPF_SKEL
> > +int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > +{
> > +     return 0;
> > +}
> > +
> > +int lock_contention_start(void)
> > +{
> > +     return 0;
> > +}
> > +
> > +int lock_contention_stop(void)
> > +{
> > +     return 0;
> > +}
> > +
> > +int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > +{
> > +     return 0;
> > +}
> > +
> > +int lock_contention_read(struct lock_contention *con __maybe_unused)
> > +{
> > +     return 0;
> > +}
> > +#endif  /* !HAVE_BPF_SKEL */
>
> I still think it's the convention to have them in a header file as
> static inline functions and reduce the #ifdef in the .c file.

Shouldn't minimizing ifdefs, and associated cognitive load, in header
files be the priority given they are #included many times while the .c
file is only compiled once?
Shouldn't a goal of the header file be to abstract away things like
HAVE_BPF_SKEL?
I'm not clear what the goal of having the functions in the header
files is, performance? The code isn't going to run anyway. I feel
lock_contention.h is smaller and easier to read like this but I also
don't care enough to fight. I did this change here as
lock_contention.h was being brought into python.c for the sake of
stubbing out functions that the header file was also subbing out for
!BPF_HAVE_SKEL. A single stub felt like progress.

Thanks,
Ian

> > diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
> > index 1a7248ff3889..bfa5c7db0a5d 100644
> > --- a/tools/perf/util/lock-contention.h
> > +++ b/tools/perf/util/lock-contention.h
> > @@ -67,10 +67,11 @@ struct lock_stat {
> >   */
> >  #define MAX_LOCK_DEPTH 48
> >
> > -struct lock_stat *lock_stat_find(u64 addr);
> > -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> > +/* based on kernel/lockdep.c */
> > +#define LOCKHASH_BITS                12
> > +#define LOCKHASH_SIZE                (1UL << LOCKHASH_BITS)
> >
> > -bool match_callstack_filter(struct machine *machine, u64 *callstack);
> > +extern struct hlist_head *lockhash_table;
> >
> >  /*
> >   * struct lock_seq_stat:
> > @@ -148,7 +149,14 @@ struct lock_contention {
> >       bool save_callstack;
> >  };
> >
> > -#ifdef HAVE_BPF_SKEL
> > +struct option;
> > +int parse_call_stack(const struct option *opt, const char *str, int unset);
> > +bool needs_callstack(void);
> > +
> > +struct lock_stat *lock_stat_find(u64 addr);
> > +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> > +
> > +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
> >
> >  int lock_contention_prepare(struct lock_contention *con);
> >  int lock_contention_start(void);
> > @@ -156,25 +164,4 @@ int lock_contention_stop(void);
> >  int lock_contention_read(struct lock_contention *con);
> >  int lock_contention_finish(struct lock_contention *con);
> >
> > -#else  /* !HAVE_BPF_SKEL */
> > -
> > -static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > -{
> > -     return 0;
> > -}
> > -
> > -static inline int lock_contention_start(void) { return 0; }
> > -static inline int lock_contention_stop(void) { return 0; }
> > -static inline int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > -{
> > -     return 0;
> > -}
> > -
> > -static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
> > -{
> > -     return 0;
> > -}
> > -
> > -#endif  /* HAVE_BPF_SKEL */
> > -
> >  #endif  /* PERF_LOCK_CONTENTION_H */
> > diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
> > index 35d84a96dbec..91fd444615cd 100644
> > --- a/tools/perf/util/python.c
> > +++ b/tools/perf/util/python.c
> > @@ -18,7 +18,6 @@
> >  #include "mmap.h"
> >  #include "util/kwork.h"
> >  #include "util/sample.h"
> > -#include "util/lock-contention.h"
> >  #include <internal/lib.h>
> >  #include "../builtin.h"
> >
> > @@ -1311,22 +1310,6 @@ struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
> >       return NULL;
> >  }
> >
> > -bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
> > -{
> > -     return false;
> > -}
> > -
> > -struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
> > -{
> > -     return NULL;
> > -}
> > -
> > -struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
> > -                             int flags __maybe_unused)
> > -{
> > -     return NULL;
> > -}
> > -
> >  int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
> >  {
> >       return -1;
> > --
> > 2.47.0.277.g8800431eea-goog
> >
Re: [PATCH v6 15/22] perf lock: Move common lock contention code to new file
Posted by Namhyung Kim 4 days, 5 hours ago
On Mon, Nov 18, 2024 at 05:03:41PM -0800, Ian Rogers wrote:
> On Mon, Nov 18, 2024 at 4:23 PM Namhyung Kim <namhyung@kernel.org> wrote:
[SNIP]
> > On Fri, Nov 08, 2024 at 10:18:02PM -0800, Ian Rogers wrote:
> > > +#ifndef HAVE_BPF_SKEL
> > > +int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > > +{
> > > +     return 0;
> > > +}
> > > +
> > > +int lock_contention_start(void)
> > > +{
> > > +     return 0;
> > > +}
> > > +
> > > +int lock_contention_stop(void)
> > > +{
> > > +     return 0;
> > > +}
> > > +
> > > +int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > > +{
> > > +     return 0;
> > > +}
> > > +
> > > +int lock_contention_read(struct lock_contention *con __maybe_unused)
> > > +{
> > > +     return 0;
> > > +}
> > > +#endif  /* !HAVE_BPF_SKEL */
> >
> > I still think it's the convention to have them in a header file as
> > static inline functions and reduce the #ifdef in the .c file.
> 
> Shouldn't minimizing ifdefs, and associated cognitive load, in header
> files be the priority given they are #included many times while the .c
> file is only compiled once?
> Shouldn't a goal of the header file be to abstract away things like
> HAVE_BPF_SKEL?
> I'm not clear what the goal of having the functions in the header
> files is, performance? The code isn't going to run anyway. I feel
> lock_contention.h is smaller and easier to read like this but I also
> don't care enough to fight. I did this change here as
> lock_contention.h was being brought into python.c for the sake of
> stubbing out functions that the header file was also subbing out for
> !BPF_HAVE_SKEL. A single stub felt like progress.

I think it may have the empty functions in the binary if we keep the
functions in the .c file whereas compilers would optimize away them if
they are static inline functions.

Thanks,
Namhyung

Re: [PATCH v6 15/22] perf lock: Move common lock contention code to new file
Posted by Ian Rogers 4 days, 4 hours ago
On Tue, Nov 19, 2024 at 9:00 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Mon, Nov 18, 2024 at 05:03:41PM -0800, Ian Rogers wrote:
> > On Mon, Nov 18, 2024 at 4:23 PM Namhyung Kim <namhyung@kernel.org> wrote:
> [SNIP]
> > > On Fri, Nov 08, 2024 at 10:18:02PM -0800, Ian Rogers wrote:
> > > > +#ifndef HAVE_BPF_SKEL
> > > > +int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > > > +{
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +int lock_contention_start(void)
> > > > +{
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +int lock_contention_stop(void)
> > > > +{
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > > > +{
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +int lock_contention_read(struct lock_contention *con __maybe_unused)
> > > > +{
> > > > +     return 0;
> > > > +}
> > > > +#endif  /* !HAVE_BPF_SKEL */
> > >
> > > I still think it's the convention to have them in a header file as
> > > static inline functions and reduce the #ifdef in the .c file.
> >
> > Shouldn't minimizing ifdefs, and associated cognitive load, in header
> > files be the priority given they are #included many times while the .c
> > file is only compiled once?
> > Shouldn't a goal of the header file be to abstract away things like
> > HAVE_BPF_SKEL?
> > I'm not clear what the goal of having the functions in the header
> > files is, performance? The code isn't going to run anyway. I feel
> > lock_contention.h is smaller and easier to read like this but I also
> > don't care enough to fight. I did this change here as
> > lock_contention.h was being brought into python.c for the sake of
> > stubbing out functions that the header file was also subbing out for
> > !BPF_HAVE_SKEL. A single stub felt like progress.
>
> I think it may have the empty functions in the binary if we keep the
> functions in the .c file whereas compilers would optimize away them if
> they are static inline functions.

The functions will be 1 or 2 bytes and can all be deduplicated, the
linker can also garbage collect them. It is better to optimize code
for readability rather than for wins like this.

Thanks,
Ian