[RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump

Alexandre Chartre posted 13 patches 8 months ago
There is a newer version of this series
[RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump
Posted by Alexandre Chartre 8 months ago
objtool executes the objdump command to disassemble code. Use libopcodes
instead to have more control about the disassembly scope and output.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 tools/objtool/Makefile                  |   2 +-
 tools/objtool/arch/loongarch/decode.c   |   6 +
 tools/objtool/arch/powerpc/decode.c     |   6 +
 tools/objtool/arch/x86/decode.c         |   7 +
 tools/objtool/check.c                   |   4 +-
 tools/objtool/disas.c                   | 186 +++++++++++++++---------
 tools/objtool/include/objtool/arch.h    |   5 +
 tools/objtool/include/objtool/check.h   |   5 +
 tools/objtool/include/objtool/objtool.h |   4 +
 9 files changed, 154 insertions(+), 71 deletions(-)

diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 8c20361dd100..00350fc7c662 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -34,7 +34,7 @@ INCLUDES := -I$(srctree)/tools/include \
 # is passed here to match a legacy behavior.
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
 OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
-OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) -lopcodes
 
 # Allow old libelf to be used:
 elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index b6fdc68053cc..bf5ac6750512 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -386,4 +386,10 @@ unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *tabl
 	default:
 		return reloc->sym->offset + reloc_addend(reloc);
 	}
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_loongarch,
+			       bfd_mach_loongarch32, bfd_mach_loongarch64,
+			       NULL);
 }
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index c851c51d4bd3..c0fcab2d643c 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -127,4 +127,10 @@ unsigned int arch_reloc_size(struct reloc *reloc)
 	default:
 		return 8;
 	}
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_powerpc,
+			       bfd_mach_ppc, bfd_mach_ppc64,
+			       NULL);
 }
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..6c13c67ed9b9 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -880,3 +880,10 @@ unsigned int arch_reloc_size(struct reloc *reloc)
 		return 8;
 	}
 }
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_i386,
+			       bfd_mach_i386_i386, bfd_mach_x86_64,
+			       "att");
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 085fcc1b643b..9cfac23185b8 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4701,8 +4701,6 @@ int check(struct objtool_file *file)
 			goto out;
 	}
 
-	free_insns(file);
-
 	if (opts.stats) {
 		printf("nr_insns_visited: %ld\n", nr_insns_visited);
 		printf("nr_cfi: %ld\n", nr_cfi);
@@ -4726,5 +4724,7 @@ int check(struct objtool_file *file)
 		disas_context_destroy(disas_ctx);
 	}
 
+	free_insns(file);
+
 	return ret;
 }
diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c
index ed74554bccbf..f2eb1050ce11 100644
--- a/tools/objtool/disas.c
+++ b/tools/objtool/disas.c
@@ -4,17 +4,52 @@
  */
 
 #include <objtool/arch.h>
+#include <objtool/check.h>
 #include <objtool/warn.h>
 
+#include <bfd.h>
 #include <linux/string.h>
+#include <tools/dis-asm-compat.h>
 
 struct disas_context {
 	struct objtool_file *file;
+	disassembler_ftype disassembler;
+	struct disassemble_info info;
 };
 
+/*
+ * Initialize disassemble info arch, mach (32 or 64-bit) and options.
+ */
+int disas_info_init(struct disassemble_info *dinfo,
+		    int arch, int mach32, int mach64,
+		    const char *options)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct objtool_file *file = dctx->file;
+
+	dinfo->arch = arch;
+
+	switch (file->elf->ehdr.e_ident[EI_CLASS]) {
+	case ELFCLASS32:
+		dinfo->mach = mach32;
+		break;
+	case ELFCLASS64:
+		dinfo->mach = mach64;
+		break;
+	default:
+		return -1;
+	}
+
+	dinfo->disassembler_options = options;
+
+	return 0;
+}
+
 struct disas_context *disas_context_create(struct objtool_file *file)
 {
 	struct disas_context *dctx;
+	struct disassemble_info *dinfo;
+	int err;
 
 	dctx = malloc(sizeof(*dctx));
 	if (!dctx) {
@@ -23,8 +58,49 @@ struct disas_context *disas_context_create(struct objtool_file *file)
 	}
 
 	dctx->file = file;
+	dinfo = &dctx->info;
+
+	init_disassemble_info_compat(dinfo, stdout,
+				     (fprintf_ftype)fprintf,
+				     fprintf_styled);
+
+	dinfo->read_memory_func = buffer_read_memory;
+	dinfo->application_data = dctx;
+
+	/*
+	 * bfd_openr() is not used to avoid doing ELF data processing
+	 * and caching that has already being done. Here, we just need
+	 * to identify the target file so we call an arch specific
+	 * function to fill some disassemble info (arch, mach).
+	 */
+
+	dinfo->arch = bfd_arch_unknown;
+	dinfo->mach = 0;
+
+	err = arch_disas_info_init(dinfo);
+	if (err || dinfo->arch == bfd_arch_unknown || dinfo->mach == 0) {
+		WARN("failed to init disassembly arch\n");
+		goto error;
+	}
+
+	dinfo->endian = (file->elf->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) ?
+		BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
+
+	disassemble_init_for_target(dinfo);
+
+	dctx->disassembler = disassembler(dinfo->arch,
+					       dinfo->endian == BFD_ENDIAN_BIG,
+					       dinfo->mach, NULL);
+	if (!dctx->disassembler) {
+		WARN("failed to create disassembler function\n");
+		goto error;
+	}
 
 	return dctx;
+
+error:
+	free(dctx);
+	return NULL;
 }
 
 void disas_context_destroy(struct disas_context *dctx)
@@ -32,60 +108,54 @@ void disas_context_destroy(struct disas_context *dctx)
 	free(dctx);
 }
 
-/* 'funcs' is a space-separated list of function names */
-static void disas_funcs(const char *funcs)
+/*
+ * Disassemble a single instruction. Return the size of the instruction.
+ */
+static size_t disas_insn(struct disas_context *dctx,
+			 struct instruction *insn)
 {
-	const char *objdump_str, *cross_compile;
-	int size, ret;
-	char *cmd;
-
-	cross_compile = getenv("CROSS_COMPILE");
-	if (!cross_compile)
-		cross_compile = "";
-
-	objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
-			"BEGIN { split(_funcs, funcs); }"
-			"/^$/ { func_match = 0; }"
-			"/<.*>:/ { "
-				"f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
-				"for (i in funcs) {"
-					"if (funcs[i] == f) {"
-						"func_match = 1;"
-						"base = strtonum(\"0x\" $1);"
-						"break;"
-					"}"
-				"}"
-			"}"
-			"{"
-				"if (func_match) {"
-					"addr = strtonum(\"0x\" $1);"
-					"printf(\"%%04x \", addr - base);"
-					"print;"
-				"}"
-			"}' 1>&2";
-
-	/* fake snprintf() to calculate the size */
-	size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
-	if (size <= 0) {
-		WARN("objdump string size calculation failed");
-		return;
-	}
-
-	cmd = malloc(size);
+	disassembler_ftype disasm = dctx->disassembler;
+	struct disassemble_info *dinfo = &dctx->info;
+
+	/*
+	 * Set the disassembler buffer to read data from the section
+	 * containing the instruction to disassemble.
+	 */
+	dinfo->buffer = insn->sec->data->d_buf;
+	dinfo->buffer_vma = 0;
+	dinfo->buffer_length = insn->sec->sh.sh_size;
+
+	return disasm(insn->offset, &dctx->info);
+}
 
-	/* real snprintf() */
-	snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
-	ret = system(cmd);
-	if (ret) {
-		WARN("disassembly failed: %d", ret);
-		return;
+/*
+ * Disassemble a function.
+ */
+static void disas_func(struct disas_context *dctx, struct symbol *func)
+{
+	struct instruction *insn;
+	size_t addr, size;
+
+	printf("%s:\n", func->name);
+	sym_for_each_insn(dctx->file, func, insn) {
+
+		addr = insn->offset;
+		printf(" %6lx:  %s+0x%-6lx      ",
+		       addr, func->name, addr - func->offset);
+		size = disas_insn(dctx, insn);
+		printf("\n");
+		if (size != insn->len)
+			WARN("inconsistent insn size (%ld and %d)\n", size, insn->len);
 	}
+	printf("\n");
 }
 
+/*
+ * Disassemble all warned functions.
+ */
 void disas_warned_funcs(struct disas_context *dctx)
 {
 	struct symbol *sym;
-	char *funcs = NULL, *tmp;
 
 	if (!dctx) {
 		ERROR("disassembly context is not defined");
@@ -93,27 +163,7 @@ void disas_warned_funcs(struct disas_context *dctx)
 	}
 
 	for_each_sym(dctx->file, sym) {
-		if (sym->warned) {
-			if (!funcs) {
-				funcs = malloc(strlen(sym->name) + 1);
-				if (!funcs) {
-					ERROR_GLIBC("malloc");
-					return;
-				}
-				strcpy(funcs, sym->name);
-			} else {
-				tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
-				if (!tmp) {
-					ERROR_GLIBC("malloc");
-					return;
-				}
-				sprintf(tmp, "%s %s", funcs, sym->name);
-				free(funcs);
-				funcs = tmp;
-			}
-		}
+		if (sym->warned)
+			disas_func(dctx, sym);
 	}
-
-	if (funcs)
-		disas_funcs(funcs);
 }
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..aecf8fc29571 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -6,6 +6,8 @@
 #ifndef _ARCH_H
 #define _ARCH_H
 
+#include <bfd.h>
+#include <dis-asm.h>
 #include <stdbool.h>
 #include <linux/list.h>
 #include <objtool/objtool.h>
@@ -98,7 +100,10 @@ int arch_rewrite_retpolines(struct objtool_file *file);
 
 bool arch_pc_relative_reloc(struct reloc *reloc);
 
+
 unsigned int arch_reloc_size(struct reloc *reloc);
 unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
 
+int arch_disas_info_init(struct disassemble_info *dinfo);
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 00fb745e7233..5290ac1ebbc1 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -125,4 +125,9 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruc
 	     insn && insn->sec == _sec;					\
 	     insn = next_insn_same_sec(file, insn))
 
+#define sym_for_each_insn(file, sym, insn)				\
+	for (insn = find_insn(file, sym->sec, sym->offset);		\
+	     insn && insn->offset < sym->offset + sym->len;		\
+	     insn = next_insn_same_sec(file, insn))
+
 #endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index f5ab71f07f5c..0b404cfd81c0 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -48,8 +48,12 @@ int orc_dump(const char *objname);
 int orc_create(struct objtool_file *file);
 
 struct disas_context;
+struct disassemble_info;
 struct disas_context *disas_context_create(struct objtool_file *file);
 void disas_context_destroy(struct disas_context *dctx);
 void disas_warned_funcs(struct disas_context *dctx);
+int disas_info_init(struct disassemble_info *dinfo,
+		    int arch, int mach32, int mach64,
+		    const char *options);
 
 #endif /* _OBJTOOL_H */
-- 
2.43.5
Re: [RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump
Posted by Peter Zijlstra 8 months ago
On Fri, Jun 06, 2025 at 05:34:30PM +0200, Alexandre Chartre wrote:
> objtool executes the objdump command to disassemble code. Use libopcodes
> instead to have more control about the disassembly scope and output.
> 
> Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
> ---
>  tools/objtool/Makefile                  |   2 +-
>  tools/objtool/arch/loongarch/decode.c   |   6 +
>  tools/objtool/arch/powerpc/decode.c     |   6 +
>  tools/objtool/arch/x86/decode.c         |   7 +
>  tools/objtool/check.c                   |   4 +-
>  tools/objtool/disas.c                   | 186 +++++++++++++++---------
>  tools/objtool/include/objtool/arch.h    |   5 +
>  tools/objtool/include/objtool/check.h   |   5 +
>  tools/objtool/include/objtool/objtool.h |   4 +
>  9 files changed, 154 insertions(+), 71 deletions(-)
> 
> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
> index 8c20361dd100..00350fc7c662 100644
> --- a/tools/objtool/Makefile
> +++ b/tools/objtool/Makefile
> @@ -34,7 +34,7 @@ INCLUDES := -I$(srctree)/tools/include \
>  # is passed here to match a legacy behavior.
>  WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
>  OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
> -OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
> +OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) -lopcodes

Would it be possible to make this optional? Such that when people do not
have libopcodes installed they can still build the kernel.

Or is libopcodes a mandatory part of any binutils installation?
Re: [RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump
Posted by Alexandre Chartre 8 months ago
On 6/11/25 14:23, Peter Zijlstra wrote:
> On Fri, Jun 06, 2025 at 05:34:30PM +0200, Alexandre Chartre wrote:
>> objtool executes the objdump command to disassemble code. Use libopcodes
>> instead to have more control about the disassembly scope and output.
>>
>> Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
>> ---
>>   tools/objtool/Makefile                  |   2 +-
>>   tools/objtool/arch/loongarch/decode.c   |   6 +
>>   tools/objtool/arch/powerpc/decode.c     |   6 +
>>   tools/objtool/arch/x86/decode.c         |   7 +
>>   tools/objtool/check.c                   |   4 +-
>>   tools/objtool/disas.c                   | 186 +++++++++++++++---------
>>   tools/objtool/include/objtool/arch.h    |   5 +
>>   tools/objtool/include/objtool/check.h   |   5 +
>>   tools/objtool/include/objtool/objtool.h |   4 +
>>   9 files changed, 154 insertions(+), 71 deletions(-)
>>
>> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
>> index 8c20361dd100..00350fc7c662 100644
>> --- a/tools/objtool/Makefile
>> +++ b/tools/objtool/Makefile
>> @@ -34,7 +34,7 @@ INCLUDES := -I$(srctree)/tools/include \
>>   # is passed here to match a legacy behavior.
>>   WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
>>   OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
>> -OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
>> +OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) -lopcodes
> 
> Would it be possible to make this optional? Such that when people do not
> have libopcodes installed they can still build the kernel.
> 
> Or is libopcodes a mandatory part of any binutils installation?

I guess that libopcodes is mandatory because it is used by as (and also objdump).
But I can check if it is effectively present anyway.

alex.
Re: [RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump
Posted by Josh Poimboeuf 8 months ago
On Wed, Jun 11, 2025 at 03:35:38PM +0200, Alexandre Chartre wrote:
> 
> On 6/11/25 14:23, Peter Zijlstra wrote:
> > On Fri, Jun 06, 2025 at 05:34:30PM +0200, Alexandre Chartre wrote:
> > > objtool executes the objdump command to disassemble code. Use libopcodes
> > > instead to have more control about the disassembly scope and output.
> > > 
> > > Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
> > > ---
> > >   tools/objtool/Makefile                  |   2 +-
> > >   tools/objtool/arch/loongarch/decode.c   |   6 +
> > >   tools/objtool/arch/powerpc/decode.c     |   6 +
> > >   tools/objtool/arch/x86/decode.c         |   7 +
> > >   tools/objtool/check.c                   |   4 +-
> > >   tools/objtool/disas.c                   | 186 +++++++++++++++---------
> > >   tools/objtool/include/objtool/arch.h    |   5 +
> > >   tools/objtool/include/objtool/check.h   |   5 +
> > >   tools/objtool/include/objtool/objtool.h |   4 +
> > >   9 files changed, 154 insertions(+), 71 deletions(-)
> > > 
> > > diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
> > > index 8c20361dd100..00350fc7c662 100644
> > > --- a/tools/objtool/Makefile
> > > +++ b/tools/objtool/Makefile
> > > @@ -34,7 +34,7 @@ INCLUDES := -I$(srctree)/tools/include \
> > >   # is passed here to match a legacy behavior.
> > >   WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
> > >   OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
> > > -OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
> > > +OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) -lopcodes
> > 
> > Would it be possible to make this optional? Such that when people do not
> > have libopcodes installed they can still build the kernel.
> > 
> > Or is libopcodes a mandatory part of any binutils installation?
> 
> I guess that libopcodes is mandatory because it is used by as (and also objdump).
> But I can check if it is effectively present anyway.

libopcodes should always be present, but compiling/linking against it
shouldn't be possible unless a binutils devel pkg is installed.

So I think Peter's basically asking for what I asked for: make all this
optional depending on whether the binutils devel pkg is installed, and
print a helpful error message if somebody tries to use without.

-- 
Josh
Re: [RFC 03/13] objtool: Disassemble code with libopcodes instead of running objdump
Posted by Josh Poimboeuf 8 months ago
On Fri, Jun 06, 2025 at 05:34:30PM +0200, Alexandre Chartre wrote:
> +	dctx->disassembler = disassembler(dinfo->arch,
> +					       dinfo->endian == BFD_ENDIAN_BIG,
> +					       dinfo->mach, NULL);

These lines should be aligned like:

	dctx->disassembler = disassembler(dinfo->arch,
					  dinfo->endian == BFD_ENDIAN_BIG,
					  dinfo->mach, NULL);

-- 
Josh