[PATCH 1/3] perf disasm: Introduce symbol__disassemble_objdump()

Arnaldo Carvalho de Melo posted 3 patches 1 week, 5 days ago
[PATCH 1/3] perf disasm: Introduce symbol__disassemble_objdump()
Posted by Arnaldo Carvalho de Melo 1 week, 5 days ago
From: Arnaldo Carvalho de Melo <acme@redhat.com>

With the first disassemble method in perf, the parsing of objdump
output, just like we have for llvm and capstone.

This paves the way to allow the user to specify what disassemblers are
preferred and to also to at some point allow building without the
objdump method.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/disasm.c | 168 ++++++++++++++++++++-------------------
 1 file changed, 88 insertions(+), 80 deletions(-)

diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index a525b80b934fdb5f..36cf61602c17fe69 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -2045,17 +2045,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
 	return new_line;
 }
 
-int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
+				       struct annotate_args *args)
 {
 	struct annotation_options *opts = &annotate_opts;
 	struct map *map = args->ms.map;
 	struct dso *dso = map__dso(map);
 	char *command;
 	FILE *file;
-	char symfs_filename[PATH_MAX];
-	struct kcore_extract kce;
-	bool delete_extract = false;
-	bool decomp = false;
 	int lineno = 0;
 	char *fileloc = NULL;
 	int nline;
@@ -2070,77 +2067,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		NULL,
 	};
 	struct child_process objdump_process;
-	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
-
-	if (err)
-		return err;
-
-	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
-		 symfs_filename, sym->name, map__unmap_ip(map, sym->start),
-		 map__unmap_ip(map, sym->end));
-
-	pr_debug("annotating [%p] %30s : [%p] %30s\n",
-		 dso, dso__long_name(dso), sym, sym->name);
-
-	if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
-		return symbol__disassemble_bpf(sym, args);
-	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
-		return symbol__disassemble_bpf_image(sym, args);
-	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
-		return -1;
-	} else if (dso__is_kcore(dso)) {
-		kce.kcore_filename = symfs_filename;
-		kce.addr = map__rip_2objdump(map, sym->start);
-		kce.offs = sym->start;
-		kce.len = sym->end - sym->start;
-		if (!kcore_extract__create(&kce)) {
-			delete_extract = true;
-			strlcpy(symfs_filename, kce.extract_filename,
-				sizeof(symfs_filename));
-		}
-	} else if (dso__needs_decompress(dso)) {
-		char tmp[KMOD_DECOMP_LEN];
-
-		if (dso__decompress_kmodule_path(dso, symfs_filename,
-						 tmp, sizeof(tmp)) < 0)
-			return -1;
-
-		decomp = true;
-		strcpy(symfs_filename, tmp);
-	}
-
-	/*
-	 * For powerpc data type profiling, use the dso__data_read_offset
-	 * to read raw instruction directly and interpret the binary code
-	 * to understand instructions and register fields. For sort keys as
-	 * type and typeoff, disassemble to mnemonic notation is
-	 * not required in case of powerpc.
-	 */
-	if (arch__is(args->arch, "powerpc")) {
-		extern const char *sort_order;
-
-		if (sort_order && !strstr(sort_order, "sym")) {
-			err = symbol__disassemble_raw(symfs_filename, sym, args);
-			if (err == 0)
-				goto out_remove_tmp;
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-			err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
-			if (err == 0)
-				goto out_remove_tmp;
-#endif
-		}
-	}
-
-#ifdef HAVE_LIBLLVM_SUPPORT
-	err = symbol__disassemble_llvm(symfs_filename, sym, args);
-	if (err == 0)
-		goto out_remove_tmp;
-#endif
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-	err = symbol__disassemble_capstone(symfs_filename, sym, args);
-	if (err == 0)
-		goto out_remove_tmp;
-#endif
+	int err;
 
 	err = asprintf(&command,
 		 "%s %s%s --start-address=0x%016" PRIx64
@@ -2163,13 +2090,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 
 	if (err < 0) {
 		pr_err("Failure allocating memory for the command to run\n");
-		goto out_remove_tmp;
+		return err;
 	}
 
 	pr_debug("Executing: %s\n", command);
 
 	objdump_argv[2] = command;
-	objdump_argv[4] = symfs_filename;
+	objdump_argv[4] = filename;
 
 	/* Create a pipe to read from for stdout */
 	memset(&objdump_process, 0, sizeof(objdump_process));
@@ -2207,8 +2134,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 			break;
 
 		/* Skip lines containing "filename:" */
-		match = strstr(line, symfs_filename);
-		if (match && match[strlen(symfs_filename)] == ':')
+		match = strstr(line, filename);
+		if (match && match[strlen(filename)] == ':')
 			continue;
 
 		expanded_line = strim(line);
@@ -2253,6 +2180,87 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 
 out_free_command:
 	free(command);
+	return err;
+}
+
+int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+	struct map *map = args->ms.map;
+	struct dso *dso = map__dso(map);
+	char symfs_filename[PATH_MAX];
+	bool delete_extract = false;
+	struct kcore_extract kce;
+	bool decomp = false;
+	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+	if (err)
+		return err;
+
+	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+		 symfs_filename, sym->name, map__unmap_ip(map, sym->start),
+		 map__unmap_ip(map, sym->end));
+
+	pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
+
+	if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+		return symbol__disassemble_bpf(sym, args);
+	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
+		return symbol__disassemble_bpf_image(sym, args);
+	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
+		return -1;
+	} else if (dso__is_kcore(dso)) {
+		kce.addr = map__rip_2objdump(map, sym->start);
+		kce.kcore_filename = symfs_filename;
+		kce.len = sym->end - sym->start;
+		kce.offs = sym->start;
+
+		if (!kcore_extract__create(&kce)) {
+			delete_extract = true;
+			strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
+		}
+	} else if (dso__needs_decompress(dso)) {
+		char tmp[KMOD_DECOMP_LEN];
+
+		if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
+			return -1;
+
+		decomp = true;
+		strcpy(symfs_filename, tmp);
+	}
+
+	/*
+	 * For powerpc data type profiling, use the dso__data_read_offset to
+	 * read raw instruction directly and interpret the binary code to
+	 * understand instructions and register fields. For sort keys as type
+	 * and typeoff, disassemble to mnemonic notation is not required in
+	 * case of powerpc.
+	 */
+	if (arch__is(args->arch, "powerpc")) {
+		extern const char *sort_order;
+
+		if (sort_order && !strstr(sort_order, "sym")) {
+			err = symbol__disassemble_raw(symfs_filename, sym, args);
+			if (err == 0)
+				goto out_remove_tmp;
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+			err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
+			if (err == 0)
+				goto out_remove_tmp;
+#endif
+		}
+	}
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+	err = symbol__disassemble_llvm(symfs_filename, sym, args);
+	if (err == 0)
+		goto out_remove_tmp;
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+	err = symbol__disassemble_capstone(symfs_filename, sym, args);
+	if (err == 0)
+		goto out_remove_tmp;
+#endif
+	err = symbol__disassemble_objdump(symfs_filename, sym, args);
 
 out_remove_tmp:
 	if (decomp)
-- 
2.47.0
Re: [PATCH 1/3] perf disasm: Introduce symbol__disassemble_objdump()
Posted by Ian Rogers 1 week, 5 days ago
On Mon, Nov 11, 2024 at 7:17 AM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> From: Arnaldo Carvalho de Melo <acme@redhat.com>
>
> With the first disassemble method in perf, the parsing of objdump
> output, just like we have for llvm and capstone.
>
> This paves the way to allow the user to specify what disassemblers are
> preferred and to also to at some point allow building without the
> objdump method.
>
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Cc: Ian Rogers <irogers@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Kan Liang <kan.liang@linux.intel.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Steinar H. Gunderson <sesse@google.com>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Acked-by: Ian Rogers <irogers@google.com>

Nit below relating to a pre-existing condition in the code.

> ---
>  tools/perf/util/disasm.c | 168 ++++++++++++++++++++-------------------
>  1 file changed, 88 insertions(+), 80 deletions(-)
>
> diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
> index a525b80b934fdb5f..36cf61602c17fe69 100644
> --- a/tools/perf/util/disasm.c
> +++ b/tools/perf/util/disasm.c
> @@ -2045,17 +2045,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
>         return new_line;
>  }
>
> -int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
> +                                      struct annotate_args *args)
>  {
>         struct annotation_options *opts = &annotate_opts;
>         struct map *map = args->ms.map;
>         struct dso *dso = map__dso(map);
>         char *command;
>         FILE *file;
> -       char symfs_filename[PATH_MAX];
> -       struct kcore_extract kce;
> -       bool delete_extract = false;
> -       bool decomp = false;
>         int lineno = 0;
>         char *fileloc = NULL;
>         int nline;
> @@ -2070,77 +2067,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>                 NULL,
>         };
>         struct child_process objdump_process;
> -       int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
> -
> -       if (err)
> -               return err;
> -
> -       pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
> -                symfs_filename, sym->name, map__unmap_ip(map, sym->start),
> -                map__unmap_ip(map, sym->end));
> -
> -       pr_debug("annotating [%p] %30s : [%p] %30s\n",
> -                dso, dso__long_name(dso), sym, sym->name);
> -
> -       if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
> -               return symbol__disassemble_bpf(sym, args);
> -       } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
> -               return symbol__disassemble_bpf_image(sym, args);
> -       } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
> -               return -1;
> -       } else if (dso__is_kcore(dso)) {
> -               kce.kcore_filename = symfs_filename;
> -               kce.addr = map__rip_2objdump(map, sym->start);
> -               kce.offs = sym->start;
> -               kce.len = sym->end - sym->start;
> -               if (!kcore_extract__create(&kce)) {
> -                       delete_extract = true;
> -                       strlcpy(symfs_filename, kce.extract_filename,
> -                               sizeof(symfs_filename));
> -               }
> -       } else if (dso__needs_decompress(dso)) {
> -               char tmp[KMOD_DECOMP_LEN];
> -
> -               if (dso__decompress_kmodule_path(dso, symfs_filename,
> -                                                tmp, sizeof(tmp)) < 0)
> -                       return -1;
> -
> -               decomp = true;
> -               strcpy(symfs_filename, tmp);
> -       }
> -
> -       /*
> -        * For powerpc data type profiling, use the dso__data_read_offset
> -        * to read raw instruction directly and interpret the binary code
> -        * to understand instructions and register fields. For sort keys as
> -        * type and typeoff, disassemble to mnemonic notation is
> -        * not required in case of powerpc.
> -        */
> -       if (arch__is(args->arch, "powerpc")) {
> -               extern const char *sort_order;
> -
> -               if (sort_order && !strstr(sort_order, "sym")) {
> -                       err = symbol__disassemble_raw(symfs_filename, sym, args);
> -                       if (err == 0)
> -                               goto out_remove_tmp;
> -#ifdef HAVE_LIBCAPSTONE_SUPPORT
> -                       err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
> -                       if (err == 0)
> -                               goto out_remove_tmp;
> -#endif
> -               }
> -       }
> -
> -#ifdef HAVE_LIBLLVM_SUPPORT
> -       err = symbol__disassemble_llvm(symfs_filename, sym, args);
> -       if (err == 0)
> -               goto out_remove_tmp;
> -#endif
> -#ifdef HAVE_LIBCAPSTONE_SUPPORT
> -       err = symbol__disassemble_capstone(symfs_filename, sym, args);
> -       if (err == 0)
> -               goto out_remove_tmp;
> -#endif
> +       int err;
>
>         err = asprintf(&command,
>                  "%s %s%s --start-address=0x%016" PRIx64
> @@ -2163,13 +2090,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>
>         if (err < 0) {
>                 pr_err("Failure allocating memory for the command to run\n");
> -               goto out_remove_tmp;
> +               return err;
>         }
>
>         pr_debug("Executing: %s\n", command);
>
>         objdump_argv[2] = command;
> -       objdump_argv[4] = symfs_filename;
> +       objdump_argv[4] = filename;
>
>         /* Create a pipe to read from for stdout */
>         memset(&objdump_process, 0, sizeof(objdump_process));
> @@ -2207,8 +2134,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>                         break;
>
>                 /* Skip lines containing "filename:" */
> -               match = strstr(line, symfs_filename);
> -               if (match && match[strlen(symfs_filename)] == ':')
> +               match = strstr(line, filename);
> +               if (match && match[strlen(filename)] == ':')
>                         continue;
>
>                 expanded_line = strim(line);
> @@ -2253,6 +2180,87 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>
>  out_free_command:
>         free(command);
> +       return err;
> +}
> +
> +int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> +{
> +       struct map *map = args->ms.map;
> +       struct dso *dso = map__dso(map);
> +       char symfs_filename[PATH_MAX];
> +       bool delete_extract = false;
> +       struct kcore_extract kce;
> +       bool decomp = false;
> +       int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
> +
> +       if (err)
> +               return err;
> +
> +       pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
> +                symfs_filename, sym->name, map__unmap_ip(map, sym->start),
> +                map__unmap_ip(map, sym->end));
> +
> +       pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
> +
> +       if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
> +               return symbol__disassemble_bpf(sym, args);
> +       } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
> +               return symbol__disassemble_bpf_image(sym, args);
> +       } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
> +               return -1;
> +       } else if (dso__is_kcore(dso)) {
> +               kce.addr = map__rip_2objdump(map, sym->start);
> +               kce.kcore_filename = symfs_filename;
> +               kce.len = sym->end - sym->start;
> +               kce.offs = sym->start;
> +
> +               if (!kcore_extract__create(&kce)) {
> +                       delete_extract = true;
> +                       strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
> +               }
> +       } else if (dso__needs_decompress(dso)) {
> +               char tmp[KMOD_DECOMP_LEN];
> +
> +               if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
> +                       return -1;
> +
> +               decomp = true;
> +               strcpy(symfs_filename, tmp);
> +       }
> +
> +       /*
> +        * For powerpc data type profiling, use the dso__data_read_offset to
> +        * read raw instruction directly and interpret the binary code to
> +        * understand instructions and register fields. For sort keys as type
> +        * and typeoff, disassemble to mnemonic notation is not required in
> +        * case of powerpc.
> +        */
> +       if (arch__is(args->arch, "powerpc")) {
> +               extern const char *sort_order;
> +
> +               if (sort_order && !strstr(sort_order, "sym")) {
> +                       err = symbol__disassemble_raw(symfs_filename, sym, args);
> +                       if (err == 0)
> +                               goto out_remove_tmp;
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> +                       err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
> +                       if (err == 0)
> +                               goto out_remove_tmp;
> +#endif
> +               }
> +       }
> +
> +#ifdef HAVE_LIBLLVM_SUPPORT
> +       err = symbol__disassemble_llvm(symfs_filename, sym, args);
> +       if (err == 0)
> +               goto out_remove_tmp;
> +#endif
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> +       err = symbol__disassemble_capstone(symfs_filename, sym, args);
> +       if (err == 0)
> +               goto out_remove_tmp;
> +#endif
> +       err = symbol__disassemble_objdump(symfs_filename, sym, args);

This sure does read like the symbol will be disassembled 3 times if
those ifdefs are defined. Is there anyway to make the code look more
intuitive?

Thanks,
Ian

>
>  out_remove_tmp:
>         if (decomp)
> --
> 2.47.0
>
Re: [PATCH 1/3] perf disasm: Introduce symbol__disassemble_objdump()
Posted by Arnaldo Carvalho de Melo 1 week, 5 days ago
On Mon, Nov 11, 2024 at 08:15:53AM -0800, Ian Rogers wrote:
> On Mon, Nov 11, 2024 at 7:17 AM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > From: Arnaldo Carvalho de Melo <acme@redhat.com>
> >
> > With the first disassemble method in perf, the parsing of objdump
> > output, just like we have for llvm and capstone.
> >
> > This paves the way to allow the user to specify what disassemblers are
> > preferred and to also to at some point allow building without the
> > objdump method.
> >
> > Cc: Adrian Hunter <adrian.hunter@intel.com>
> > Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Cc: Kan Liang <kan.liang@linux.intel.com>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Cc: Steinar H. Gunderson <sesse@google.com>
> > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> 
> Acked-by: Ian Rogers <irogers@google.com>
> 
> Nit below relating to a pre-existing condition in the code.

<SNIP>

> > +#ifdef HAVE_LIBLLVM_SUPPORT
> > +       err = symbol__disassemble_llvm(symfs_filename, sym, args);
> > +       if (err == 0)
> > +               goto out_remove_tmp;
> > +#endif
> > +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> > +       err = symbol__disassemble_capstone(symfs_filename, sym, args);
> > +       if (err == 0)
> > +               goto out_remove_tmp;
> > +#endif
> > +       err = symbol__disassemble_objdump(symfs_filename, sym, args);

> This sure does read like the symbol will be disassembled 3 times if
> those ifdefs are defined. Is there anyway to make the code look more
> intuitive?

This will end up being rewritten, the end result is a loop where the
list of disassemblers to try is iterated and as soon as one succeeeds
(returns zero), the loop is exited.

- Arnaldo