From nobody Mon Feb 9 23:44:17 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E8D4313D251; Fri, 29 Mar 2024 21:58:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711749496; cv=none; b=iy+Sl4ab/ULcb2mL+tdnKE/3aToPrjv2XzHjMFg9XFfpDzECuDmPt3n18naPktLC6yEXbe3IEJKlc5eeqccUxz1IzRz2vyc0kMWZQN5a1ru2Q1cPYTWAH6UOJAvuxvCX3z+HBAmJtoaQqGy2M70B0aIN5UKyZHl36Jh7hyfEHms= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711749496; c=relaxed/simple; bh=fo6R1ox3B34TItwGjFPlY2xFQyHPSRWw04kHSukApSs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NHJvXsiXr2tYTUWS16Bh/UO1euni2OmQiIQ8Ez5lAqXHfy9P6tUf4Sr4P2g5zpv2+8Et5qc+3N8Gr5sQhWD8hy26N9qS6ZxD2gUXb4rOz5RB/I4ru0q2jGg6mU5n3u4pmSgpk14jZ6Tz8uh+Eq2tqhKihUVfe6CJJIEIVva6vJM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=a26PmS9/; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="a26PmS9/" Received: by smtp.kernel.org (Postfix) with ESMTPSA id F13F5C43609; Fri, 29 Mar 2024 21:58:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1711749495; bh=fo6R1ox3B34TItwGjFPlY2xFQyHPSRWw04kHSukApSs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=a26PmS9/TVldywT2nsiyBUtHX2jE2p7HxJcyfRwmjUOCdaSCY8mR8oBvQpMjMYzUU dbsZsGHbA7hV4BLrgOQfQIE53dn4fcqnfxYmLwQy8cFe+3zIgScjHYIHcH37opllgG OHvyI6R7DqS2Hxt/HyDIc3bz21+XsP8eSDqxFsXuuLVqEeALirzwndRIz8xgCeEjcb rEaFkOLO8FGxpdkucRonkWCi7Y0njGhLr9h8CpRADPacm0S6TsG+ejJ08BGEk02n8R b7kLmYYe2X8YELcTSUldPPCkC0SeF4m5Mk/b9h3Njf8If6qItH5Cin4vBGNLzx9cRf OJThQgTwT7AoQ== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org Subject: [PATCH v2 3/5] perf annotate: Split out util/disasm.c Date: Fri, 29 Mar 2024 14:58:10 -0700 Message-ID: <20240329215812.537846-4-namhyung@kernel.org> X-Mailer: git-send-email 2.44.0.478.gd926399ef9-goog In-Reply-To: <20240329215812.537846-1-namhyung@kernel.org> References: <20240329215812.537846-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The util/annotate.c code has both disassembly and sample annotation related codes. Factor out the disasm part so that it can be handled more easily. No functional changes intended. Tested-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/util/Build | 1 + tools/perf/util/annotate.c | 1708 ++---------------------------------- tools/perf/util/annotate.h | 60 +- tools/perf/util/disasm.c | 1586 +++++++++++++++++++++++++++++++++ tools/perf/util/disasm.h | 112 +++ 5 files changed, 1757 insertions(+), 1710 deletions(-) create mode 100644 tools/perf/util/disasm.c create mode 100644 tools/perf/util/disasm.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e0a723e24503..aec5a590e349 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -12,6 +12,7 @@ perf-y +=3D config.o perf-y +=3D copyfile.o perf-y +=3D ctype.o perf-y +=3D db-export.o +perf-y +=3D disasm.o perf-y +=3D env.o perf-y +=3D event.o perf-y +=3D evlist.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5d0ca004dcfb..b795f27f2602 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -16,6 +16,7 @@ #include "build-id.h" #include "color.h" #include "config.h" +#include "disasm.h" #include "dso.h" #include "env.h" #include "map.h" @@ -64,48 +65,6 @@ /* global annotation options */ struct annotation_options annotate_opts; =20 -static regex_t file_lineno; - -static struct ins_ops *ins__find(struct arch *arch, const char *name); -static void ins__sort(struct arch *arch); -static int disasm_line__parse(char *line, const char **namep, char **rawp); -static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); -static int jump__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); - -struct arch { - const char *name; - struct ins *instructions; - size_t nr_instructions; - size_t nr_instructions_allocated; - struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const ch= ar *name); - bool sorted_instructions; - bool initialized; - const char *insn_suffix; - void *priv; - unsigned int model; - unsigned int family; - int (*init)(struct arch *arch, char *cpuid); - bool (*ins_is_fused)(struct arch *arch, const char *ins1, - const char *ins2); - struct { - char comment_char; - char skip_functions_char; - char register_char; - char memory_ref_char; - char imm_char; - } objdump; -}; - -static struct ins_ops call_ops; -static struct ins_ops dec_ops; -static struct ins_ops jump_ops; -static struct ins_ops mov_ops; -static struct ins_ops nop_ops; -static struct ins_ops lock_ops; -static struct ins_ops ret_ops; - /* Data type collection debug statistics */ struct annotated_data_stat ann_data_stat; LIST_HEAD(ann_insn_stat); @@ -125,759 +84,6 @@ struct annotated_data_type canary_type =3D { }, }; =20 -static int arch__grow_instructions(struct arch *arch) -{ - struct ins *new_instructions; - size_t new_nr_allocated; - - if (arch->nr_instructions_allocated =3D=3D 0 && arch->instructions) - goto grow_from_non_allocated_table; - - new_nr_allocated =3D arch->nr_instructions_allocated + 128; - new_instructions =3D realloc(arch->instructions, new_nr_allocated * sizeo= f(struct ins)); - if (new_instructions =3D=3D NULL) - return -1; - -out_update_instructions: - arch->instructions =3D new_instructions; - arch->nr_instructions_allocated =3D new_nr_allocated; - return 0; - -grow_from_non_allocated_table: - new_nr_allocated =3D arch->nr_instructions + 128; - new_instructions =3D calloc(new_nr_allocated, sizeof(struct ins)); - if (new_instructions =3D=3D NULL) - return -1; - - memcpy(new_instructions, arch->instructions, arch->nr_instructions); - goto out_update_instructions; -} - -static int arch__associate_ins_ops(struct arch* arch, const char *name, st= ruct ins_ops *ops) -{ - struct ins *ins; - - if (arch->nr_instructions =3D=3D arch->nr_instructions_allocated && - arch__grow_instructions(arch)) - return -1; - - ins =3D &arch->instructions[arch->nr_instructions]; - ins->name =3D strdup(name); - if (!ins->name) - return -1; - - ins->ops =3D ops; - arch->nr_instructions++; - - ins__sort(arch); - return 0; -} - -#include "arch/arc/annotate/instructions.c" -#include "arch/arm/annotate/instructions.c" -#include "arch/arm64/annotate/instructions.c" -#include "arch/csky/annotate/instructions.c" -#include "arch/loongarch/annotate/instructions.c" -#include "arch/mips/annotate/instructions.c" -#include "arch/x86/annotate/instructions.c" -#include "arch/powerpc/annotate/instructions.c" -#include "arch/riscv64/annotate/instructions.c" -#include "arch/s390/annotate/instructions.c" -#include "arch/sparc/annotate/instructions.c" - -static struct arch architectures[] =3D { - { - .name =3D "arc", - .init =3D arc__annotate_init, - }, - { - .name =3D "arm", - .init =3D arm__annotate_init, - }, - { - .name =3D "arm64", - .init =3D arm64__annotate_init, - }, - { - .name =3D "csky", - .init =3D csky__annotate_init, - }, - { - .name =3D "mips", - .init =3D mips__annotate_init, - .objdump =3D { - .comment_char =3D '#', - }, - }, - { - .name =3D "x86", - .init =3D x86__annotate_init, - .instructions =3D x86__instructions, - .nr_instructions =3D ARRAY_SIZE(x86__instructions), - .insn_suffix =3D "bwlq", - .objdump =3D { - .comment_char =3D '#', - .register_char =3D '%', - .memory_ref_char =3D '(', - .imm_char =3D '$', - }, - }, - { - .name =3D "powerpc", - .init =3D powerpc__annotate_init, - }, - { - .name =3D "riscv64", - .init =3D riscv64__annotate_init, - }, - { - .name =3D "s390", - .init =3D s390__annotate_init, - .objdump =3D { - .comment_char =3D '#', - }, - }, - { - .name =3D "sparc", - .init =3D sparc__annotate_init, - .objdump =3D { - .comment_char =3D '#', - }, - }, - { - .name =3D "loongarch", - .init =3D loongarch__annotate_init, - .objdump =3D { - .comment_char =3D '#', - }, - }, -}; - -static void ins__delete(struct ins_operands *ops) -{ - if (ops =3D=3D NULL) - return; - zfree(&ops->source.raw); - zfree(&ops->source.name); - zfree(&ops->target.raw); - zfree(&ops->target.name); -} - -static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); -} - -int ins__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - if (ins->ops->scnprintf) - return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); - - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); -} - -bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) -{ - if (!arch || !arch->ins_is_fused) - return false; - - return arch->ins_is_fused(arch, ins1, ins2); -} - -static int call__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) -{ - char *endptr, *tok, *name; - struct map *map =3D ms->map; - struct addr_map_symbol target =3D { - .ms =3D { .map =3D map, }, - }; - - ops->target.addr =3D strtoull(ops->raw, &endptr, 16); - - name =3D strchr(endptr, '<'); - if (name =3D=3D NULL) - goto indirect_call; - - name++; - - if (arch->objdump.skip_functions_char && - strchr(name, arch->objdump.skip_functions_char)) - return -1; - - tok =3D strchr(name, '>'); - if (tok =3D=3D NULL) - return -1; - - *tok =3D '\0'; - ops->target.name =3D strdup(name); - *tok =3D '>'; - - if (ops->target.name =3D=3D NULL) - return -1; -find_target: - target.addr =3D map__objdump_2mem(map, ops->target.addr); - - if (maps__find_ams(ms->maps, &target) =3D=3D 0 && - map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.ad= dr)) =3D=3D ops->target.addr) - ops->target.sym =3D target.ms.sym; - - return 0; - -indirect_call: - tok =3D strchr(endptr, '*'); - if (tok !=3D NULL) { - endptr++; - - /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx= ). - * Do not parse such instruction. */ - if (strstr(endptr, "(%r") =3D=3D NULL) - ops->target.addr =3D strtoull(endptr, NULL, 16); - } - goto find_target; -} - -static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - if (ops->target.sym) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.sym->name); - - if (ops->target.addr =3D=3D 0) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - if (ops->target.name) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.name); - - return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops= ->target.addr); -} - -static struct ins_ops call_ops =3D { - .parse =3D call__parse, - .scnprintf =3D call__scnprintf, -}; - -bool ins__is_call(const struct ins *ins) -{ - return ins->ops =3D=3D &call_ops || ins->ops =3D=3D &s390_call_ops || ins= ->ops =3D=3D &loongarch_call_ops; -} - -/* - * Prevents from matching commas in the comment section, e.g.: - * ffff200008446e70: b.cs ffff2000084470f4 // b.hs, b.nlast - * - * and skip comma as part of function arguments, e.g.: - * 1d8b4ac - */ -static inline const char *validate_comma(const char *c, struct ins_operand= s *ops) -{ - if (ops->jump.raw_comment && c > ops->jump.raw_comment) - return NULL; - - if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) - return NULL; - - return c; -} - -static int jump__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) -{ - struct map *map =3D ms->map; - struct symbol *sym =3D ms->sym; - struct addr_map_symbol target =3D { - .ms =3D { .map =3D map, }, - }; - const char *c =3D strchr(ops->raw, ','); - u64 start, end; - - ops->jump.raw_comment =3D strchr(ops->raw, arch->objdump.comment_char); - ops->jump.raw_func_start =3D strchr(ops->raw, '<'); - - c =3D validate_comma(c, ops); - - /* - * Examples of lines to parse for the _cpp_lex_token@@Base - * function: - * - * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> - * 1159e8b: jne c469be - * - * The first is a jump to an offset inside the same function, - * the second is to another function, i.e. that 0xa72 is an - * offset in the cpp_named_operator2name@@base function. - */ - /* - * skip over possible up to 2 operands to get to address, e.g.: - * tbnz w0, #26, ffff0000083cd190 - */ - if (c++ !=3D NULL) { - ops->target.addr =3D strtoull(c, NULL, 16); - if (!ops->target.addr) { - c =3D strchr(c, ','); - c =3D validate_comma(c, ops); - if (c++ !=3D NULL) - ops->target.addr =3D strtoull(c, NULL, 16); - } - } else { - ops->target.addr =3D strtoull(ops->raw, NULL, 16); - } - - target.addr =3D map__objdump_2mem(map, ops->target.addr); - start =3D map__unmap_ip(map, sym->start); - end =3D map__unmap_ip(map, sym->end); - - ops->target.outside =3D target.addr < start || target.addr > end; - - /* - * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): - - cpp_named_operator2name@@Base+0xa72 - - * Point to a place that is after the cpp_named_operator2name - * boundaries, i.e. in the ELF symbol table for cc1 - * cpp_named_operator2name is marked as being 32-bytes long, but it in - * fact is much larger than that, so we seem to need a symbols__find() - * routine that looks for >=3D current->start and < next_symbol->start, - * possibly just for C++ objects? - * - * For now lets just make some progress by marking jumps to outside the - * current function as call like. - * - * Actual navigation will come next, with further understanding of how - * the symbol searching and disassembly should be done. - */ - if (maps__find_ams(ms->maps, &target) =3D=3D 0 && - map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.ad= dr)) =3D=3D ops->target.addr) - ops->target.sym =3D target.ms.sym; - - if (!ops->target.outside) { - ops->target.offset =3D target.addr - start; - ops->target.offset_avail =3D true; - } else { - ops->target.offset_avail =3D false; - } - - return 0; -} - -static int jump__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - const char *c; - - if (!ops->target.addr || ops->target.offset < 0) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - if (ops->target.outside && ops->target.sym !=3D NULL) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.sym->name); - - c =3D strchr(ops->raw, ','); - c =3D validate_comma(c, ops); - - if (c !=3D NULL) { - const char *c2 =3D strchr(c + 1, ','); - - c2 =3D validate_comma(c2, ops); - /* check for 3-op insn */ - if (c2 !=3D NULL) - c =3D c2; - c++; - - /* mirror arch objdump's space-after-comma style */ - if (*c =3D=3D ' ') - c++; - } - - return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, - ins->name, c ? c - ops->raw : 0, ops->raw, - ops->target.offset); -} - -static void jump__delete(struct ins_operands *ops __maybe_unused) -{ - /* - * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the - * raw string, don't free them. - */ -} - -static struct ins_ops jump_ops =3D { - .free =3D jump__delete, - .parse =3D jump__parse, - .scnprintf =3D jump__scnprintf, -}; - -bool ins__is_jump(const struct ins *ins) -{ - return ins->ops =3D=3D &jump_ops || ins->ops =3D=3D &loongarch_jump_ops; -} - -static int comment__symbol(char *raw, char *comment, u64 *addrp, char **na= mep) -{ - char *endptr, *name, *t; - - if (strstr(raw, "(%rip)") =3D=3D NULL) - return 0; - - *addrp =3D strtoull(comment, &endptr, 16); - if (endptr =3D=3D comment) - return 0; - name =3D strchr(endptr, '<'); - if (name =3D=3D NULL) - return -1; - - name++; - - t =3D strchr(name, '>'); - if (t =3D=3D NULL) - return 0; - - *t =3D '\0'; - *namep =3D strdup(name); - *t =3D '>'; - - return 0; -} - -static int lock__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) -{ - ops->locked.ops =3D zalloc(sizeof(*ops->locked.ops)); - if (ops->locked.ops =3D=3D NULL) - return 0; - - if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops-= >raw) < 0) - goto out_free_ops; - - ops->locked.ins.ops =3D ins__find(arch, ops->locked.ins.name); - - if (ops->locked.ins.ops =3D=3D NULL) - goto out_free_ops; - - if (ops->locked.ins.ops->parse && - ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0) - goto out_free_ops; - - return 0; - -out_free_ops: - zfree(&ops->locked.ops); - return 0; -} - -static int lock__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - int printed; - - if (ops->locked.ins.ops =3D=3D NULL) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - printed =3D scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); - return printed + ins__scnprintf(&ops->locked.ins, bf + printed, - size - printed, ops->locked.ops, max_ins_name); -} - -static void lock__delete(struct ins_operands *ops) -{ - struct ins *ins =3D &ops->locked.ins; - - if (ins->ops && ins->ops->free) - ins->ops->free(ops->locked.ops); - else - ins__delete(ops->locked.ops); - - zfree(&ops->locked.ops); - zfree(&ops->target.raw); - zfree(&ops->target.name); -} - -static struct ins_ops lock_ops =3D { - .free =3D lock__delete, - .parse =3D lock__parse, - .scnprintf =3D lock__scnprintf, -}; - -/* - * Check if the operand has more than one registers like x86 SIB addressin= g: - * 0x1234(%rax, %rbx, 8) - * - * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just ch= eck - * the input string after 'memory_ref_char' if exists. - */ -static bool check_multi_regs(struct arch *arch, const char *op) -{ - int count =3D 0; - - if (arch->objdump.register_char =3D=3D 0) - return false; - - if (arch->objdump.memory_ref_char) { - op =3D strchr(op, arch->objdump.memory_ref_char); - if (op =3D=3D NULL) - return false; - } - - while ((op =3D strchr(op, arch->objdump.register_char)) !=3D NULL) { - count++; - op++; - } - - return count > 1; -} - -static int mov__parse(struct arch *arch, struct ins_operands *ops, struct = map_symbol *ms __maybe_unused) -{ - char *s =3D strchr(ops->raw, ','), *target, *comment, prev; - - if (s =3D=3D NULL) - return -1; - - *s =3D '\0'; - - /* - * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) - * then it needs to have the closing parenthesis. - */ - if (strchr(ops->raw, '(')) { - *s =3D ','; - s =3D strchr(ops->raw, ')'); - if (s =3D=3D NULL || s[1] !=3D ',') - return -1; - *++s =3D '\0'; - } - - ops->source.raw =3D strdup(ops->raw); - *s =3D ','; - - if (ops->source.raw =3D=3D NULL) - return -1; - - ops->source.multi_regs =3D check_multi_regs(arch, ops->source.raw); - - target =3D skip_spaces(++s); - comment =3D strchr(s, arch->objdump.comment_char); - - if (comment !=3D NULL) - s =3D comment - 1; - else - s =3D strchr(s, '\0') - 1; - - while (s > target && isspace(s[0])) - --s; - s++; - prev =3D *s; - *s =3D '\0'; - - ops->target.raw =3D strdup(target); - *s =3D prev; - - if (ops->target.raw =3D=3D NULL) - goto out_free_source; - - ops->target.multi_regs =3D check_multi_regs(arch, ops->target.raw); - - if (comment =3D=3D NULL) - return 0; - - comment =3D skip_spaces(comment); - comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->so= urce.name); - comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->ta= rget.name); - - return 0; - -out_free_source: - zfree(&ops->source.raw); - return -1; -} - -static int mov__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, - ops->source.name ?: ops->source.raw, - ops->target.name ?: ops->target.raw); -} - -static struct ins_ops mov_ops =3D { - .parse =3D mov__parse, - .scnprintf =3D mov__scnprintf, -}; - -static int dec__parse(struct arch *arch __maybe_unused, struct ins_operand= s *ops, struct map_symbol *ms __maybe_unused) -{ - char *target, *comment, *s, prev; - - target =3D s =3D ops->raw; - - while (s[0] !=3D '\0' && !isspace(s[0])) - ++s; - prev =3D *s; - *s =3D '\0'; - - ops->target.raw =3D strdup(target); - *s =3D prev; - - if (ops->target.raw =3D=3D NULL) - return -1; - - comment =3D strchr(s, arch->objdump.comment_char); - if (comment =3D=3D NULL) - return 0; - - comment =3D skip_spaces(comment); - comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->ta= rget.name); - - return 0; -} - -static int dec__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, - ops->target.name ?: ops->target.raw); -} - -static struct ins_ops dec_ops =3D { - .parse =3D dec__parse, - .scnprintf =3D dec__scnprintf, -}; - -static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t= size, - struct ins_operands *ops __maybe_unused, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); -} - -static struct ins_ops nop_ops =3D { - .scnprintf =3D nop__scnprintf, -}; - -static struct ins_ops ret_ops =3D { - .scnprintf =3D ins__raw_scnprintf, -}; - -bool ins__is_nop(const struct ins *ins) -{ - return ins->ops =3D=3D &nop_ops; -} - -bool ins__is_ret(const struct ins *ins) -{ - return ins->ops =3D=3D &ret_ops; -} - -bool ins__is_lock(const struct ins *ins) -{ - return ins->ops =3D=3D &lock_ops; -} - -static int ins__key_cmp(const void *name, const void *insp) -{ - const struct ins *ins =3D insp; - - return strcmp(name, ins->name); -} - -static int ins__cmp(const void *a, const void *b) -{ - const struct ins *ia =3D a; - const struct ins *ib =3D b; - - return strcmp(ia->name, ib->name); -} - -static void ins__sort(struct arch *arch) -{ - const int nmemb =3D arch->nr_instructions; - - qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); -} - -static struct ins_ops *__ins__find(struct arch *arch, const char *name) -{ - struct ins *ins; - const int nmemb =3D arch->nr_instructions; - - if (!arch->sorted_instructions) { - ins__sort(arch); - arch->sorted_instructions =3D true; - } - - ins =3D bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins_= _key_cmp); - if (ins) - return ins->ops; - - if (arch->insn_suffix) { - char tmp[32]; - char suffix; - size_t len =3D strlen(name); - - if (len =3D=3D 0 || len >=3D sizeof(tmp)) - return NULL; - - suffix =3D name[len - 1]; - if (strchr(arch->insn_suffix, suffix) =3D=3D NULL) - return NULL; - - strcpy(tmp, name); - tmp[len - 1] =3D '\0'; /* remove the suffix and check again */ - - ins =3D bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins_= _key_cmp); - } - return ins ? ins->ops : NULL; -} - -static struct ins_ops *ins__find(struct arch *arch, const char *name) -{ - struct ins_ops *ops =3D __ins__find(arch, name); - - if (!ops && arch->associate_instruction_ops) - ops =3D arch->associate_instruction_ops(arch, name); - - return ops; -} - -static int arch__key_cmp(const void *name, const void *archp) -{ - const struct arch *arch =3D archp; - - return strcmp(name, arch->name); -} - -static int arch__cmp(const void *a, const void *b) -{ - const struct arch *aa =3D a; - const struct arch *ab =3D b; - - return strcmp(aa->name, ab->name); -} - -static void arch__sort(void) -{ - const int nmemb =3D ARRAY_SIZE(architectures); - - qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); -} - -static struct arch *arch__find(const char *name) -{ - const int nmemb =3D ARRAY_SIZE(architectures); - static bool sorted; - - if (!sorted) { - arch__sort(); - sorted =3D true; - } - - return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key= _cmp); -} - -bool arch__is(struct arch *arch, const char *name) -{ - return !strcmp(arch->name, name); -} - /* symbol histogram: key =3D offset << 16 | evsel->core.idx */ static size_t sym_hist_hash(long key, void *ctx __maybe_unused) { @@ -1214,212 +420,76 @@ static void annotation__count_and_fill(struct annot= ation *notes, u64 start, u64 branch->cover_insn +=3D cover_insn; } } -} - -static int annotation__compute_ipc(struct annotation *notes, size_t size) -{ - int err =3D 0; - s64 offset; - - if (!notes->branch || !notes->branch->cycles_hist) - return 0; - - notes->branch->total_insn =3D annotation__count_insn(notes, 0, size - 1); - notes->branch->hit_cycles =3D 0; - notes->branch->hit_insn =3D 0; - notes->branch->cover_insn =3D 0; - - annotation__lock(notes); - for (offset =3D size - 1; offset >=3D 0; --offset) { - struct cyc_hist *ch; - - ch =3D ¬es->branch->cycles_hist[offset]; - if (ch && ch->cycles) { - struct annotation_line *al; - - al =3D notes->src->offsets[offset]; - if (al && al->cycles =3D=3D NULL) { - al->cycles =3D zalloc(sizeof(*al->cycles)); - if (al->cycles =3D=3D NULL) { - err =3D ENOMEM; - break; - } - } - if (ch->have_start) - annotation__count_and_fill(notes, ch->start, offset, ch); - if (al && ch->num_aggr) { - al->cycles->avg =3D ch->cycles_aggr / ch->num_aggr; - al->cycles->max =3D ch->cycles_max; - al->cycles->min =3D ch->cycles_min; - } - } - } - - if (err) { - while (++offset < (s64)size) { - struct cyc_hist *ch =3D ¬es->branch->cycles_hist[offset]; - - if (ch && ch->cycles) { - struct annotation_line *al =3D notes->src->offsets[offset]; - if (al) - zfree(&al->cycles); - } - } - } - - annotation__unlock(notes); - return 0; -} - -int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_= sample *sample, - struct evsel *evsel) -{ - return symbol__inc_addr_samples(&ams->ms, evsel, ams->al_addr, sample); -} - -int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample= *sample, - struct evsel *evsel, u64 ip) -{ - return symbol__inc_addr_samples(&he->ms, evsel, ip, sample); -} - -static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arc= h, struct map_symbol *ms) -{ - dl->ins.ops =3D ins__find(arch, dl->ins.name); - - if (!dl->ins.ops) - return; - - if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0) - dl->ins.ops =3D NULL; -} - -static int disasm_line__parse(char *line, const char **namep, char **rawp) -{ - char tmp, *name =3D skip_spaces(line); - - if (name[0] =3D=3D '\0') - return -1; - - *rawp =3D name + 1; - - while ((*rawp)[0] !=3D '\0' && !isspace((*rawp)[0])) - ++*rawp; - - tmp =3D (*rawp)[0]; - (*rawp)[0] =3D '\0'; - *namep =3D strdup(name); - - if (*namep =3D=3D NULL) - goto out; - - (*rawp)[0] =3D tmp; - *rawp =3D strim(*rawp); - - return 0; - -out: - return -1; -} - -struct annotate_args { - struct arch *arch; - struct map_symbol ms; - struct evsel *evsel; - struct annotation_options *options; - s64 offset; - char *line; - int line_nr; - char *fileloc; -}; - -static void annotation_line__init(struct annotation_line *al, - struct annotate_args *args, - int nr) -{ - al->offset =3D args->offset; - al->line =3D strdup(args->line); - al->line_nr =3D args->line_nr; - al->fileloc =3D args->fileloc; - al->data_nr =3D nr; -} - -static void annotation_line__exit(struct annotation_line *al) -{ - zfree_srcline(&al->path); - zfree(&al->line); - zfree(&al->cycles); -} - -static size_t disasm_line_size(int nr) -{ - struct annotation_line *al; - - return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); -} - -/* - * Allocating the disasm annotation line data with - * following structure: - * - * ------------------------------------------- - * struct disasm_line | struct annotation_line - * ------------------------------------------- - * - * We have 'struct annotation_line' member as last member - * of 'struct disasm_line' to have an easy access. - */ -static struct disasm_line *disasm_line__new(struct annotate_args *args) +} + +static int annotation__compute_ipc(struct annotation *notes, size_t size) { - struct disasm_line *dl =3D NULL; - int nr =3D 1; + int err =3D 0; + s64 offset; =20 - if (evsel__is_group_event(args->evsel)) - nr =3D args->evsel->core.nr_members; + if (!notes->branch || !notes->branch->cycles_hist) + return 0; =20 - dl =3D zalloc(disasm_line_size(nr)); - if (!dl) - return NULL; + notes->branch->total_insn =3D annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles =3D 0; + notes->branch->hit_insn =3D 0; + notes->branch->cover_insn =3D 0; =20 - annotation_line__init(&dl->al, args, nr); - if (dl->al.line =3D=3D NULL) - goto out_delete; + annotation__lock(notes); + for (offset =3D size - 1; offset >=3D 0; --offset) { + struct cyc_hist *ch; =20 - if (args->offset !=3D -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) - goto out_free_line; + ch =3D ¬es->branch->cycles_hist[offset]; + if (ch && ch->cycles) { + struct annotation_line *al; =20 - disasm_line__init_ins(dl, args->arch, &args->ms); + al =3D notes->src->offsets[offset]; + if (al && al->cycles =3D=3D NULL) { + al->cycles =3D zalloc(sizeof(*al->cycles)); + if (al->cycles =3D=3D NULL) { + err =3D ENOMEM; + break; + } + } + if (ch->have_start) + annotation__count_and_fill(notes, ch->start, offset, ch); + if (al && ch->num_aggr) { + al->cycles->avg =3D ch->cycles_aggr / ch->num_aggr; + al->cycles->max =3D ch->cycles_max; + al->cycles->min =3D ch->cycles_min; + } + } } =20 - return dl; + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch =3D ¬es->branch->cycles_hist[offset]; =20 -out_free_line: - zfree(&dl->al.line); -out_delete: - free(dl); - return NULL; + if (ch && ch->cycles) { + struct annotation_line *al =3D notes->src->offsets[offset]; + if (al) + zfree(&al->cycles); + } + } + } + + annotation__unlock(notes); + return 0; } =20 -void disasm_line__free(struct disasm_line *dl) +int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_= sample *sample, + struct evsel *evsel) { - if (dl->ins.ops && dl->ins.ops->free) - dl->ins.ops->free(&dl->ops); - else - ins__delete(&dl->ops); - zfree(&dl->ins.name); - annotation_line__exit(&dl->al); - free(dl); + return symbol__inc_addr_samples(&ams->ms, evsel, ams->al_addr, sample); } =20 -int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, = bool raw, int max_ins_name) +int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample= *sample, + struct evsel *evsel, u64 ip) { - if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->op= s.raw); - - return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); + return symbol__inc_addr_samples(&he->ms, evsel, ip, sample); } =20 + void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); @@ -1478,8 +548,7 @@ bool annotation__trylock(struct annotation *notes) return mutex_trylock(mutex); } =20 - -static void annotation_line__add(struct annotation_line *al, struct list_h= ead *head) +void annotation_line__add(struct annotation_line *al, struct list_head *he= ad) { list_add_tail(&al->node, head); } @@ -1689,673 +758,6 @@ annotation_line__print(struct annotation_line *al, s= truct symbol *sym, u64 start return 0; } =20 -/* - * symbol__parse_objdump_line() parses objdump output (with -d --no-show-r= aw) - * which looks like following - * - * 0000000000415500 <_init>: - * 415500: sub $0x8,%rsp - * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0= x2f8> - * 41550b: test %rax,%rax - * 41550e: je 415515 <_init+0x15> - * 415510: callq 416e70 <__gmon_start__@plt> - * 415515: add $0x8,%rsp - * 415519: retq - * - * it will be parsed and saved into struct disasm_line as - * - * - * The offset will be a relative offset from the start of the symbol and -1 - * means that it's not a disassembly line so should be treated differently. - * The ops.raw part will be parsed further according to type of the instru= ction. - */ -static int symbol__parse_objdump_line(struct symbol *sym, - struct annotate_args *args, - char *parsed_line, int *line_nr, char **fileloc) -{ - struct map *map =3D args->ms.map; - struct annotation *notes =3D symbol__annotation(sym); - struct disasm_line *dl; - char *tmp; - s64 line_ip, offset =3D -1; - regmatch_t match[2]; - - /* /filename:linenr ? Save line number and ignore. */ - if (regexec(&file_lineno, parsed_line, 2, match, 0) =3D=3D 0) { - *line_nr =3D atoi(parsed_line + match[1].rm_so); - free(*fileloc); - *fileloc =3D strdup(parsed_line); - return 0; - } - - /* Process hex address followed by ':'. */ - line_ip =3D strtoull(parsed_line, &tmp, 16); - if (parsed_line !=3D tmp && tmp[0] =3D=3D ':' && tmp[1] !=3D '\0') { - u64 start =3D map__rip_2objdump(map, sym->start), - end =3D map__rip_2objdump(map, sym->end); - - offset =3D line_ip - start; - if ((u64)line_ip < start || (u64)line_ip >=3D end) - offset =3D -1; - else - parsed_line =3D tmp + 1; - } - - args->offset =3D offset; - args->line =3D parsed_line; - args->line_nr =3D *line_nr; - args->fileloc =3D *fileloc; - args->ms.sym =3D sym; - - dl =3D disasm_line__new(args); - (*line_nr)++; - - if (dl =3D=3D NULL) - return -1; - - if (!disasm_line__has_local_offset(dl)) { - dl->ops.target.offset =3D dl->ops.target.addr - - map__rip_2objdump(map, sym->start); - dl->ops.target.offset_avail =3D true; - } - - /* kcore has no symbols, so add the call target symbol */ - if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { - struct addr_map_symbol target =3D { - .addr =3D dl->ops.target.addr, - .ms =3D { .map =3D map, }, - }; - - if (!maps__find_ams(args->ms.maps, &target) && - target.ms.sym->start =3D=3D target.al_addr) - dl->ops.target.sym =3D target.ms.sym; - } - - annotation_line__add(&dl->al, ¬es->src->source); - return 0; -} - -static __attribute__((constructor)) void symbol__init_regexpr(void) -{ - regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); -} - -static void delete_last_nop(struct symbol *sym) -{ - struct annotation *notes =3D symbol__annotation(sym); - struct list_head *list =3D ¬es->src->source; - struct disasm_line *dl; - - while (!list_empty(list)) { - dl =3D list_entry(list->prev, struct disasm_line, al.node); - - if (dl->ins.ops) { - if (!ins__is_nop(&dl->ins)) - return; - } else { - if (!strstr(dl->al.line, " nop ") && - !strstr(dl->al.line, " nopl ") && - !strstr(dl->al.line, " nopw ")) - return; - } - - list_del_init(&dl->al.node); - disasm_line__free(dl); - } -} - -int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *= buf, size_t buflen) -{ - struct dso *dso =3D map__dso(ms->map); - - BUG_ON(buflen =3D=3D 0); - - if (errnum >=3D 0) { - str_error_r(errnum, buf, buflen); - return 0; - } - - switch (errnum) { - case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { - char bf[SBUILD_ID_SIZE + 15] =3D " with build id "; - char *build_id_msg =3D NULL; - - if (dso->has_build_id) { - build_id__sprintf(&dso->bid, bf + 15); - build_id_msg =3D bf; - } - scnprintf(buf, buflen, - "No vmlinux file%s\nwas found in the path.\n\n" - "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capabi= lity.\n\n" - "Please use:\n\n" - " perf buildid-cache -vu vmlinux\n\n" - "or:\n\n" - " --vmlinux vmlinux\n", build_id_msg ?: ""); - } - break; - case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: - scnprintf(buf, buflen, "Please link with binutils's libopcode to enable = BPF annotation"); - break; - case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: - scnprintf(buf, buflen, "Problems with arch specific instruction name reg= ular expressions."); - break; - case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: - scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch spe= cific initialization."); - break; - case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: - scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name); - break; - case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: - scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with= -g or use pahole -J.", - dso->long_name); - break; - default: - scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum= ); - break; - } - - return 0; -} - -static int dso__disassemble_filename(struct dso *dso, char *filename, size= _t filename_size) -{ - char linkname[PATH_MAX]; - char *build_id_filename; - char *build_id_path =3D NULL; - char *pos; - int len; - - if (dso->symtab_type =3D=3D DSO_BINARY_TYPE__KALLSYMS && - !dso__is_kcore(dso)) - return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; - - build_id_filename =3D dso__build_id_filename(dso, NULL, 0, false); - if (build_id_filename) { - __symbol__join_symfs(filename, filename_size, build_id_filename); - free(build_id_filename); - } else { - if (dso->has_build_id) - return ENOMEM; - goto fallback; - } - - build_id_path =3D strdup(filename); - if (!build_id_path) - return ENOMEM; - - /* - * old style build-id cache has name of XX/XXXXXXX.. while - * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. - * extract the build-id part of dirname in the new style only. - */ - pos =3D strrchr(build_id_path, '/'); - if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) - dirname(build_id_path); - - if (dso__is_kcore(dso)) - goto fallback; - - len =3D readlink(build_id_path, linkname, sizeof(linkname) - 1); - if (len < 0) - goto fallback; - - linkname[len] =3D '\0'; - if (strstr(linkname, DSO__NAME_KALLSYMS) || - access(filename, R_OK)) { -fallback: - /* - * If we don't have build-ids or the build-id file isn't in the - * cache, or is just a kallsyms file, well, lets hope that this - * DSO is the same as when 'perf record' ran. - */ - if (dso->kernel && dso->long_name[0] =3D=3D '/') - snprintf(filename, filename_size, "%s", dso->long_name); - else - __symbol__join_symfs(filename, filename_size, dso->long_name); - - mutex_lock(&dso->lock); - if (access(filename, R_OK) && errno =3D=3D ENOENT && dso->nsinfo) { - char *new_name =3D dso__filename_with_chroot(dso, filename); - if (new_name) { - strlcpy(filename, new_name, filename_size); - free(new_name); - } - } - mutex_unlock(&dso->lock); - } - - free(build_id_path); - return 0; -} - -#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -#define PACKAGE "perf" -#include -#include -#include -#include -#include -#include -#include - -static int symbol__disassemble_bpf(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes =3D symbol__annotation(sym); - struct bpf_prog_linfo *prog_linfo =3D NULL; - struct bpf_prog_info_node *info_node; - int len =3D sym->end - sym->start; - disassembler_ftype disassemble; - struct map *map =3D args->ms.map; - struct perf_bpil *info_linear; - struct disassemble_info info; - struct dso *dso =3D map__dso(map); - int pc =3D 0, count, sub_id; - struct btf *btf =3D NULL; - char tpath[PATH_MAX]; - size_t buf_size; - int nr_skip =3D 0; - char *buf; - bfd *bfdf; - int ret; - FILE *s; - - if (dso->binary_type !=3D DSO_BINARY_TYPE__BPF_PROG_INFO) - return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; - - pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func= __, - sym->name, sym->start, sym->end - sym->start); - - memset(tpath, 0, sizeof(tpath)); - perf_exe(tpath, sizeof(tpath)); - - bfdf =3D bfd_openr(tpath, NULL); - if (bfdf =3D=3D NULL) - abort(); - - if (!bfd_check_format(bfdf, bfd_object)) - abort(); - - s =3D open_memstream(&buf, &buf_size); - if (!s) { - ret =3D errno; - goto out; - } - init_disassemble_info_compat(&info, s, - (fprintf_ftype) fprintf, - fprintf_styled); - info.arch =3D bfd_get_arch(bfdf); - info.mach =3D bfd_get_mach(bfdf); - - info_node =3D perf_env__find_bpf_prog_info(dso->bpf_prog.env, - dso->bpf_prog.id); - if (!info_node) { - ret =3D SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; - goto out; - } - info_linear =3D info_node->info_linear; - sub_id =3D dso->bpf_prog.sub_id; - - info.buffer =3D (void *)(uintptr_t)(info_linear->info.jited_prog_insns); - info.buffer_length =3D info_linear->info.jited_prog_len; - - if (info_linear->info.nr_line_info) - prog_linfo =3D bpf_prog_linfo__new(&info_linear->info); - - if (info_linear->info.btf_id) { - struct btf_node *node; - - node =3D perf_env__find_btf(dso->bpf_prog.env, - info_linear->info.btf_id); - if (node) - btf =3D btf__new((__u8 *)(node->data), - node->data_size); - } - - disassemble_init_for_target(&info); - -#ifdef DISASM_FOUR_ARGS_SIGNATURE - disassemble =3D disassembler(info.arch, - bfd_big_endian(bfdf), - info.mach, - bfdf); -#else - disassemble =3D disassembler(bfdf); -#endif - if (disassemble =3D=3D NULL) - abort(); - - fflush(s); - do { - const struct bpf_line_info *linfo =3D NULL; - struct disasm_line *dl; - size_t prev_buf_size; - const char *srcline; - u64 addr; - - addr =3D pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id= ]; - count =3D disassemble(pc, &info); - - if (prog_linfo) - linfo =3D bpf_prog_linfo__lfind_addr_func(prog_linfo, - addr, sub_id, - nr_skip); - - if (linfo && btf) { - srcline =3D btf__name_by_offset(btf, linfo->line_off); - nr_skip++; - } else - srcline =3D NULL; - - fprintf(s, "\n"); - prev_buf_size =3D buf_size; - fflush(s); - - if (!annotate_opts.hide_src_code && srcline) { - args->offset =3D -1; - args->line =3D strdup(srcline); - args->line_nr =3D 0; - args->fileloc =3D NULL; - args->ms.sym =3D sym; - dl =3D disasm_line__new(args); - if (dl) { - annotation_line__add(&dl->al, - ¬es->src->source); - } - } - - args->offset =3D pc; - args->line =3D buf + prev_buf_size; - args->line_nr =3D 0; - args->fileloc =3D NULL; - args->ms.sym =3D sym; - dl =3D disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - pc +=3D count; - } while (count > 0 && pc < len); - - ret =3D 0; -out: - free(prog_linfo); - btf__free(btf); - fclose(s); - bfd_close(bfdf); - return ret; -} -#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, - struct annotate_args *args __maybe_unused) -{ - return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; -} -#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) - -static int -symbol__disassemble_bpf_image(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes =3D symbol__annotation(sym); - struct disasm_line *dl; - - args->offset =3D -1; - args->line =3D strdup("to be implemented"); - args->line_nr =3D 0; - args->fileloc =3D NULL; - dl =3D disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - zfree(&args->line); - return 0; -} - -/* - * Possibly create a new version of line with tabs expanded. Returns the - * existing or new line, storage is updated if a new line is allocated. If - * allocation fails then NULL is returned. - */ -static char *expand_tabs(char *line, char **storage, size_t *storage_len) -{ - size_t i, src, dst, len, new_storage_len, num_tabs; - char *new_line; - size_t line_len =3D strlen(line); - - for (num_tabs =3D 0, i =3D 0; i < line_len; i++) - if (line[i] =3D=3D '\t') - num_tabs++; - - if (num_tabs =3D=3D 0) - return line; - - /* - * Space for the line and '\0', less the leading and trailing - * spaces. Each tab may introduce 7 additional spaces. - */ - new_storage_len =3D line_len + 1 + (num_tabs * 7); - - new_line =3D malloc(new_storage_len); - if (new_line =3D=3D NULL) { - pr_err("Failure allocating memory for tab expansion\n"); - return NULL; - } - - /* - * Copy regions starting at src and expand tabs. If there are two - * adjacent tabs then 'src =3D=3D i', the memcpy is of size 0 and the spa= ces - * are inserted. - */ - for (i =3D 0, src =3D 0, dst =3D 0; i < line_len && num_tabs; i++) { - if (line[i] =3D=3D '\t') { - len =3D i - src; - memcpy(&new_line[dst], &line[src], len); - dst +=3D len; - new_line[dst++] =3D ' '; - while (dst % 8 !=3D 0) - new_line[dst++] =3D ' '; - src =3D i + 1; - num_tabs--; - } - } - - /* Expand the last region. */ - len =3D line_len - src; - memcpy(&new_line[dst], &line[src], len); - dst +=3D len; - new_line[dst] =3D '\0'; - - free(*storage); - *storage =3D new_line; - *storage_len =3D new_storage_len; - return new_line; - -} - -static int symbol__disassemble(struct symbol *sym, struct annotate_args *a= rgs) -{ - struct annotation_options *opts =3D &annotate_opts; - struct map *map =3D args->ms.map; - struct dso *dso =3D map__dso(map); - char *command; - FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract =3D false; - bool decomp =3D false; - int lineno =3D 0; - char *fileloc =3D NULL; - int nline; - char *line; - size_t line_len; - const char *objdump_argv[] =3D { - "/bin/sh", - "-c", - NULL, /* Will be the objdump command to run. */ - "--", - NULL, /* Will be the symfs path. */ - NULL, - }; - struct child_process objdump_process; - int err =3D dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_f= ilename)); - - if (err) - return err; - - pr_debug("%s: filename=3D%s, sym=3D%s, start=3D%#" PRIx64 ", end=3D%#" PR= Ix64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso->long_name, sym, sym->name); - - if (dso->binary_type =3D=3D DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso->binary_type =3D=3D DSO_BINARY_TYPE__BPF_IMAGE) { - return symbol__disassemble_bpf_image(sym, args); - } else if (dso__is_kcore(dso)) { - kce.kcore_filename =3D symfs_filename; - kce.addr =3D map__rip_2objdump(map, sym->start); - kce.offs =3D sym->start; - kce.len =3D sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract =3D true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp =3D true; - strcpy(symfs_filename, tmp); - } - - err =3D asprintf(&command, - "%s %s%s --start-address=3D0x%016" PRIx64 - " --stop-address=3D0x%016" PRIx64 - " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", - opts->objdump_path ?: "objdump", - opts->disassembler_style ? "-M " : "", - opts->disassembler_style ?: "", - map__rip_2objdump(map, sym->start), - map__rip_2objdump(map, sym->end), - opts->show_linenr ? "-l" : "", - opts->show_asm_raw ? "" : "--no-show-raw-insn", - opts->annotate_src ? "-S" : "", - opts->prefix ? "--prefix " : "", - opts->prefix ? '"' : ' ', - opts->prefix ?: "", - opts->prefix ? '"' : ' ', - opts->prefix_strip ? "--prefix-strip=3D" : "", - opts->prefix_strip ?: ""); - - if (err < 0) { - pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; - } - - pr_debug("Executing: %s\n", command); - - objdump_argv[2] =3D command; - objdump_argv[4] =3D symfs_filename; - - /* Create a pipe to read from for stdout */ - memset(&objdump_process, 0, sizeof(objdump_process)); - objdump_process.argv =3D objdump_argv; - objdump_process.out =3D -1; - objdump_process.err =3D -1; - objdump_process.no_stderr =3D 1; - if (start_command(&objdump_process)) { - pr_err("Failure starting to run %s\n", command); - err =3D -1; - goto out_free_command; - } - - file =3D fdopen(objdump_process.out, "r"); - if (!file) { - pr_err("Failure creating FILE stream for %s\n", command); - /* - * If we were using debug info should retry with - * original binary. - */ - err =3D -1; - goto out_close_stdout; - } - - /* Storage for getline. */ - line =3D NULL; - line_len =3D 0; - - nline =3D 0; - while (!feof(file)) { - const char *match; - char *expanded_line; - - if (getline(&line, &line_len, file) < 0 || !line) - break; - - /* Skip lines containing "filename:" */ - match =3D strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] =3D=3D ':') - continue; - - expanded_line =3D strim(line); - expanded_line =3D expand_tabs(expanded_line, &line, &line_len); - if (!expanded_line) - break; - - /* - * The source code line number (lineno) needs to be kept in - * across calls to symbol__parse_objdump_line(), so that it - * can associate it with the instructions till the next one. - * See disasm_line__new() and struct disasm_line::line_nr. - */ - if (symbol__parse_objdump_line(sym, args, expanded_line, - &lineno, &fileloc) < 0) - break; - nline++; - } - free(line); - free(fileloc); - - err =3D finish_command(&objdump_process); - if (err) - pr_err("Error running %s\n", command); - - if (nline =3D=3D 0) { - err =3D -1; - pr_err("No output from %s\n", command); - } - - /* - * kallsyms does not have symbol sizes so there may a nop at the end. - * Remove it. - */ - if (dso__is_kcore(dso)) - delete_last_nop(sym); - - fclose(file); - -out_close_stdout: - close(objdump_process.out); - -out_free_command: - free(command); - -out_remove_tmp: - if (decomp) - unlink(symfs_filename); - - if (delete_extract) - kcore_extract__delete(&kce); - - return err; -} - static void calc_percent(struct annotation *notes, struct evsel *evsel, struct annotation_data *data, diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 98f556af637c..b3007c9966fd 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -13,10 +13,10 @@ #include "mutex.h" #include "spark.h" #include "hashmap.h" +#include "disasm.h" =20 struct hist_browser_timer; struct hist_entry; -struct ins_ops; struct map; struct map_symbol; struct addr_map_symbol; @@ -26,60 +26,6 @@ struct evsel; struct symbol; struct annotated_data_type; =20 -struct ins { - const char *name; - struct ins_ops *ops; -}; - -struct ins_operands { - char *raw; - struct { - char *raw; - char *name; - struct symbol *sym; - u64 addr; - s64 offset; - bool offset_avail; - bool outside; - bool multi_regs; - } target; - union { - struct { - char *raw; - char *name; - u64 addr; - bool multi_regs; - } source; - struct { - struct ins ins; - struct ins_operands *ops; - } locked; - struct { - char *raw_comment; - char *raw_func_start; - } jump; - }; -}; - -struct arch; - -bool arch__is(struct arch *arch, const char *name); - -struct ins_ops { - void (*free)(struct ins_operands *ops); - int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symb= ol *ms); - int (*scnprintf)(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); -}; - -bool ins__is_jump(const struct ins *ins); -bool ins__is_call(const struct ins *ins); -bool ins__is_nop(const struct ins *ins); -bool ins__is_ret(const struct ins *ins); -bool ins__is_lock(const struct ins *ins); -int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_oper= ands *ops, int max_ins_name); -bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); - #define ANNOTATION__IPC_WIDTH 6 #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 @@ -172,6 +118,8 @@ struct disasm_line { struct annotation_line al; }; =20 +void annotation_line__add(struct annotation_line *al, struct list_head *he= ad); + static inline double annotation_data__percent(struct annotation_data *data, unsigned int which) { @@ -213,7 +161,6 @@ static inline bool disasm_line__has_local_offset(const = struct disasm_line *dl) */ bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbo= l *sym); =20 -void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); =20 @@ -236,7 +183,6 @@ int __annotation__scnprintf_samples_period(struct annot= ation *notes, struct evsel *evsel, bool show_freq); =20 -int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, = bool raw, int max_ins_name); size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); =20 diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c new file mode 100644 index 000000000000..59ac37723990 --- /dev/null +++ b/tools/perf/util/disasm.c @@ -0,0 +1,1586 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "annotate.h" +#include "build-id.h" +#include "debug.h" +#include "disasm.h" +#include "dso.h" +#include "evsel.h" +#include "map.h" +#include "maps.h" +#include "srcline.h" +#include "symbol.h" + +static regex_t file_lineno; + +/* These can be referred from the arch-dependent code */ +static struct ins_ops call_ops; +static struct ins_ops dec_ops; +static struct ins_ops jump_ops; +static struct ins_ops mov_ops; +static struct ins_ops nop_ops; +static struct ins_ops lock_ops; +static struct ins_ops ret_ops; + +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); + +static void ins__sort(struct arch *arch); +static int disasm_line__parse(char *line, const char **namep, char **rawp); + +static __attribute__((constructor)) void symbol__init_regexpr(void) +{ + regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); +} + +static int arch__grow_instructions(struct arch *arch) +{ + struct ins *new_instructions; + size_t new_nr_allocated; + + if (arch->nr_instructions_allocated =3D=3D 0 && arch->instructions) + goto grow_from_non_allocated_table; + + new_nr_allocated =3D arch->nr_instructions_allocated + 128; + new_instructions =3D realloc(arch->instructions, new_nr_allocated * sizeo= f(struct ins)); + if (new_instructions =3D=3D NULL) + return -1; + +out_update_instructions: + arch->instructions =3D new_instructions; + arch->nr_instructions_allocated =3D new_nr_allocated; + return 0; + +grow_from_non_allocated_table: + new_nr_allocated =3D arch->nr_instructions + 128; + new_instructions =3D calloc(new_nr_allocated, sizeof(struct ins)); + if (new_instructions =3D=3D NULL) + return -1; + + memcpy(new_instructions, arch->instructions, arch->nr_instructions); + goto out_update_instructions; +} + +static int arch__associate_ins_ops(struct arch* arch, const char *name, st= ruct ins_ops *ops) +{ + struct ins *ins; + + if (arch->nr_instructions =3D=3D arch->nr_instructions_allocated && + arch__grow_instructions(arch)) + return -1; + + ins =3D &arch->instructions[arch->nr_instructions]; + ins->name =3D strdup(name); + if (!ins->name) + return -1; + + ins->ops =3D ops; + arch->nr_instructions++; + + ins__sort(arch); + return 0; +} + +#include "arch/arc/annotate/instructions.c" +#include "arch/arm/annotate/instructions.c" +#include "arch/arm64/annotate/instructions.c" +#include "arch/csky/annotate/instructions.c" +#include "arch/loongarch/annotate/instructions.c" +#include "arch/mips/annotate/instructions.c" +#include "arch/x86/annotate/instructions.c" +#include "arch/powerpc/annotate/instructions.c" +#include "arch/riscv64/annotate/instructions.c" +#include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" + +static struct arch architectures[] =3D { + { + .name =3D "arc", + .init =3D arc__annotate_init, + }, + { + .name =3D "arm", + .init =3D arm__annotate_init, + }, + { + .name =3D "arm64", + .init =3D arm64__annotate_init, + }, + { + .name =3D "csky", + .init =3D csky__annotate_init, + }, + { + .name =3D "mips", + .init =3D mips__annotate_init, + .objdump =3D { + .comment_char =3D '#', + }, + }, + { + .name =3D "x86", + .init =3D x86__annotate_init, + .instructions =3D x86__instructions, + .nr_instructions =3D ARRAY_SIZE(x86__instructions), + .insn_suffix =3D "bwlq", + .objdump =3D { + .comment_char =3D '#', + .register_char =3D '%', + .memory_ref_char =3D '(', + .imm_char =3D '$', + }, + }, + { + .name =3D "powerpc", + .init =3D powerpc__annotate_init, + }, + { + .name =3D "riscv64", + .init =3D riscv64__annotate_init, + }, + { + .name =3D "s390", + .init =3D s390__annotate_init, + .objdump =3D { + .comment_char =3D '#', + }, + }, + { + .name =3D "sparc", + .init =3D sparc__annotate_init, + .objdump =3D { + .comment_char =3D '#', + }, + }, + { + .name =3D "loongarch", + .init =3D loongarch__annotate_init, + .objdump =3D { + .comment_char =3D '#', + }, + }, +}; + +static int arch__key_cmp(const void *name, const void *archp) +{ + const struct arch *arch =3D archp; + + return strcmp(name, arch->name); +} + +static int arch__cmp(const void *a, const void *b) +{ + const struct arch *aa =3D a; + const struct arch *ab =3D b; + + return strcmp(aa->name, ab->name); +} + +static void arch__sort(void) +{ + const int nmemb =3D ARRAY_SIZE(architectures); + + qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); +} + +struct arch *arch__find(const char *name) +{ + const int nmemb =3D ARRAY_SIZE(architectures); + static bool sorted; + + if (!sorted) { + arch__sort(); + sorted =3D true; + } + + return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key= _cmp); +} + +bool arch__is(struct arch *arch, const char *name) +{ + return !strcmp(arch->name, name); +} + +static void ins_ops__delete(struct ins_operands *ops) +{ + if (ops =3D=3D NULL) + return; + zfree(&ops->source.raw); + zfree(&ops->source.name); + zfree(&ops->target.raw); + zfree(&ops->target.name); +} + +static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); +} + +int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + if (ins->ops->scnprintf) + return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); + + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); +} + +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) +{ + if (!arch || !arch->ins_is_fused) + return false; + + return arch->ins_is_fused(arch, ins1, ins2); +} + +static int call__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) +{ + char *endptr, *tok, *name; + struct map *map =3D ms->map; + struct addr_map_symbol target =3D { + .ms =3D { .map =3D map, }, + }; + + ops->target.addr =3D strtoull(ops->raw, &endptr, 16); + + name =3D strchr(endptr, '<'); + if (name =3D=3D NULL) + goto indirect_call; + + name++; + + if (arch->objdump.skip_functions_char && + strchr(name, arch->objdump.skip_functions_char)) + return -1; + + tok =3D strchr(name, '>'); + if (tok =3D=3D NULL) + return -1; + + *tok =3D '\0'; + ops->target.name =3D strdup(name); + *tok =3D '>'; + + if (ops->target.name =3D=3D NULL) + return -1; +find_target: + target.addr =3D map__objdump_2mem(map, ops->target.addr); + + if (maps__find_ams(ms->maps, &target) =3D=3D 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.ad= dr)) =3D=3D ops->target.addr) + ops->target.sym =3D target.ms.sym; + + return 0; + +indirect_call: + tok =3D strchr(endptr, '*'); + if (tok !=3D NULL) { + endptr++; + + /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx= ). + * Do not parse such instruction. */ + if (strstr(endptr, "(%r") =3D=3D NULL) + ops->target.addr =3D strtoull(endptr, NULL, 16); + } + goto find_target; +} + +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + if (ops->target.sym) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.sym->name); + + if (ops->target.addr =3D=3D 0) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + if (ops->target.name) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.name); + + return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops= ->target.addr); +} + +static struct ins_ops call_ops =3D { + .parse =3D call__parse, + .scnprintf =3D call__scnprintf, +}; + +bool ins__is_call(const struct ins *ins) +{ + return ins->ops =3D=3D &call_ops || ins->ops =3D=3D &s390_call_ops || ins= ->ops =3D=3D &loongarch_call_ops; +} + +/* + * Prevents from matching commas in the comment section, e.g.: + * ffff200008446e70: b.cs ffff2000084470f4 // b.hs, b.nlast + * + * and skip comma as part of function arguments, e.g.: + * 1d8b4ac + */ +static inline const char *validate_comma(const char *c, struct ins_operand= s *ops) +{ + if (ops->jump.raw_comment && c > ops->jump.raw_comment) + return NULL; + + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) + return NULL; + + return c; +} + +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) +{ + struct map *map =3D ms->map; + struct symbol *sym =3D ms->sym; + struct addr_map_symbol target =3D { + .ms =3D { .map =3D map, }, + }; + const char *c =3D strchr(ops->raw, ','); + u64 start, end; + + ops->jump.raw_comment =3D strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start =3D strchr(ops->raw, '<'); + + c =3D validate_comma(c, ops); + + /* + * Examples of lines to parse for the _cpp_lex_token@@Base + * function: + * + * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> + * 1159e8b: jne c469be + * + * The first is a jump to an offset inside the same function, + * the second is to another function, i.e. that 0xa72 is an + * offset in the cpp_named_operator2name@@base function. + */ + /* + * skip over possible up to 2 operands to get to address, e.g.: + * tbnz w0, #26, ffff0000083cd190 + */ + if (c++ !=3D NULL) { + ops->target.addr =3D strtoull(c, NULL, 16); + if (!ops->target.addr) { + c =3D strchr(c, ','); + c =3D validate_comma(c, ops); + if (c++ !=3D NULL) + ops->target.addr =3D strtoull(c, NULL, 16); + } + } else { + ops->target.addr =3D strtoull(ops->raw, NULL, 16); + } + + target.addr =3D map__objdump_2mem(map, ops->target.addr); + start =3D map__unmap_ip(map, sym->start); + end =3D map__unmap_ip(map, sym->end); + + ops->target.outside =3D target.addr < start || target.addr > end; + + /* + * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): + + cpp_named_operator2name@@Base+0xa72 + + * Point to a place that is after the cpp_named_operator2name + * boundaries, i.e. in the ELF symbol table for cc1 + * cpp_named_operator2name is marked as being 32-bytes long, but it in + * fact is much larger than that, so we seem to need a symbols__find() + * routine that looks for >=3D current->start and < next_symbol->start, + * possibly just for C++ objects? + * + * For now lets just make some progress by marking jumps to outside the + * current function as call like. + * + * Actual navigation will come next, with further understanding of how + * the symbol searching and disassembly should be done. + */ + if (maps__find_ams(ms->maps, &target) =3D=3D 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.ad= dr)) =3D=3D ops->target.addr) + ops->target.sym =3D target.ms.sym; + + if (!ops->target.outside) { + ops->target.offset =3D target.addr - start; + ops->target.offset_avail =3D true; + } else { + ops->target.offset_avail =3D false; + } + + return 0; +} + +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + const char *c; + + if (!ops->target.addr || ops->target.offset < 0) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + if (ops->target.outside && ops->target.sym !=3D NULL) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->targ= et.sym->name); + + c =3D strchr(ops->raw, ','); + c =3D validate_comma(c, ops); + + if (c !=3D NULL) { + const char *c2 =3D strchr(c + 1, ','); + + c2 =3D validate_comma(c2, ops); + /* check for 3-op insn */ + if (c2 !=3D NULL) + c =3D c2; + c++; + + /* mirror arch objdump's space-after-comma style */ + if (*c =3D=3D ' ') + c++; + } + + return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, + ins->name, c ? c - ops->raw : 0, ops->raw, + ops->target.offset); +} + +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + +static struct ins_ops jump_ops =3D { + .free =3D jump__delete, + .parse =3D jump__parse, + .scnprintf =3D jump__scnprintf, +}; + +bool ins__is_jump(const struct ins *ins) +{ + return ins->ops =3D=3D &jump_ops || ins->ops =3D=3D &loongarch_jump_ops; +} + +static int comment__symbol(char *raw, char *comment, u64 *addrp, char **na= mep) +{ + char *endptr, *name, *t; + + if (strstr(raw, "(%rip)") =3D=3D NULL) + return 0; + + *addrp =3D strtoull(comment, &endptr, 16); + if (endptr =3D=3D comment) + return 0; + name =3D strchr(endptr, '<'); + if (name =3D=3D NULL) + return -1; + + name++; + + t =3D strchr(name, '>'); + if (t =3D=3D NULL) + return 0; + + *t =3D '\0'; + *namep =3D strdup(name); + *t =3D '>'; + + return 0; +} + +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct= map_symbol *ms) +{ + ops->locked.ops =3D zalloc(sizeof(*ops->locked.ops)); + if (ops->locked.ops =3D=3D NULL) + return 0; + + if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops-= >raw) < 0) + goto out_free_ops; + + ops->locked.ins.ops =3D ins__find(arch, ops->locked.ins.name); + + if (ops->locked.ins.ops =3D=3D NULL) + goto out_free_ops; + + if (ops->locked.ins.ops->parse && + ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0) + goto out_free_ops; + + return 0; + +out_free_ops: + zfree(&ops->locked.ops); + return 0; +} + +static int lock__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + int printed; + + if (ops->locked.ins.ops =3D=3D NULL) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + printed =3D scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); + return printed + ins__scnprintf(&ops->locked.ins, bf + printed, + size - printed, ops->locked.ops, max_ins_name); +} + +static void lock__delete(struct ins_operands *ops) +{ + struct ins *ins =3D &ops->locked.ins; + + if (ins->ops && ins->ops->free) + ins->ops->free(ops->locked.ops); + else + ins_ops__delete(ops->locked.ops); + + zfree(&ops->locked.ops); + zfree(&ops->target.raw); + zfree(&ops->target.name); +} + +static struct ins_ops lock_ops =3D { + .free =3D lock__delete, + .parse =3D lock__parse, + .scnprintf =3D lock__scnprintf, +}; + +/* + * Check if the operand has more than one registers like x86 SIB addressin= g: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just ch= eck + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count =3D 0; + + if (arch->objdump.register_char =3D=3D 0) + return false; + + if (arch->objdump.memory_ref_char) { + op =3D strchr(op, arch->objdump.memory_ref_char); + if (op =3D=3D NULL) + return false; + } + + while ((op =3D strchr(op, arch->objdump.register_char)) !=3D NULL) { + count++; + op++; + } + + return count > 1; +} + +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct = map_symbol *ms __maybe_unused) +{ + char *s =3D strchr(ops->raw, ','), *target, *comment, prev; + + if (s =3D=3D NULL) + return -1; + + *s =3D '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s =3D ','; + s =3D strchr(ops->raw, ')'); + if (s =3D=3D NULL || s[1] !=3D ',') + return -1; + *++s =3D '\0'; + } + + ops->source.raw =3D strdup(ops->raw); + *s =3D ','; + + if (ops->source.raw =3D=3D NULL) + return -1; + + ops->source.multi_regs =3D check_multi_regs(arch, ops->source.raw); + + target =3D skip_spaces(++s); + comment =3D strchr(s, arch->objdump.comment_char); + + if (comment !=3D NULL) + s =3D comment - 1; + else + s =3D strchr(s, '\0') - 1; + + while (s > target && isspace(s[0])) + --s; + s++; + prev =3D *s; + *s =3D '\0'; + + ops->target.raw =3D strdup(target); + *s =3D prev; + + if (ops->target.raw =3D=3D NULL) + goto out_free_source; + + ops->target.multi_regs =3D check_multi_regs(arch, ops->target.raw); + + if (comment =3D=3D NULL) + return 0; + + comment =3D skip_spaces(comment); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->so= urce.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->ta= rget.name); + + return 0; + +out_free_source: + zfree(&ops->source.raw); + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, + ops->source.name ?: ops->source.raw, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops mov_ops =3D { + .parse =3D mov__parse, + .scnprintf =3D mov__scnprintf, +}; + +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operand= s *ops, struct map_symbol *ms __maybe_unused) +{ + char *target, *comment, *s, prev; + + target =3D s =3D ops->raw; + + while (s[0] !=3D '\0' && !isspace(s[0])) + ++s; + prev =3D *s; + *s =3D '\0'; + + ops->target.raw =3D strdup(target); + *s =3D prev; + + if (ops->target.raw =3D=3D NULL) + return -1; + + comment =3D strchr(s, arch->objdump.comment_char); + if (comment =3D=3D NULL) + return 0; + + comment =3D skip_spaces(comment); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->ta= rget.name); + + return 0; +} + +static int dec__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops dec_ops =3D { + .parse =3D dec__parse, + .scnprintf =3D dec__scnprintf, +}; + +static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t= size, + struct ins_operands *ops __maybe_unused, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); +} + +static struct ins_ops nop_ops =3D { + .scnprintf =3D nop__scnprintf, +}; + +static struct ins_ops ret_ops =3D { + .scnprintf =3D ins__raw_scnprintf, +}; + +bool ins__is_nop(const struct ins *ins) +{ + return ins->ops =3D=3D &nop_ops; +} + +bool ins__is_ret(const struct ins *ins) +{ + return ins->ops =3D=3D &ret_ops; +} + +bool ins__is_lock(const struct ins *ins) +{ + return ins->ops =3D=3D &lock_ops; +} + +static int ins__key_cmp(const void *name, const void *insp) +{ + const struct ins *ins =3D insp; + + return strcmp(name, ins->name); +} + +static int ins__cmp(const void *a, const void *b) +{ + const struct ins *ia =3D a; + const struct ins *ib =3D b; + + return strcmp(ia->name, ib->name); +} + +static void ins__sort(struct arch *arch) +{ + const int nmemb =3D arch->nr_instructions; + + qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); +} + +static struct ins_ops *__ins__find(struct arch *arch, const char *name) +{ + struct ins *ins; + const int nmemb =3D arch->nr_instructions; + + if (!arch->sorted_instructions) { + ins__sort(arch); + arch->sorted_instructions =3D true; + } + + ins =3D bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins_= _key_cmp); + if (ins) + return ins->ops; + + if (arch->insn_suffix) { + char tmp[32]; + char suffix; + size_t len =3D strlen(name); + + if (len =3D=3D 0 || len >=3D sizeof(tmp)) + return NULL; + + suffix =3D name[len - 1]; + if (strchr(arch->insn_suffix, suffix) =3D=3D NULL) + return NULL; + + strcpy(tmp, name); + tmp[len - 1] =3D '\0'; /* remove the suffix and check again */ + + ins =3D bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins_= _key_cmp); + } + return ins ? ins->ops : NULL; +} + +struct ins_ops *ins__find(struct arch *arch, const char *name) +{ + struct ins_ops *ops =3D __ins__find(arch, name); + + if (!ops && arch->associate_instruction_ops) + ops =3D arch->associate_instruction_ops(arch, name); + + return ops; +} + +static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arc= h, struct map_symbol *ms) +{ + dl->ins.ops =3D ins__find(arch, dl->ins.name); + + if (!dl->ins.ops) + return; + + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0) + dl->ins.ops =3D NULL; +} + +static int disasm_line__parse(char *line, const char **namep, char **rawp) +{ + char tmp, *name =3D skip_spaces(line); + + if (name[0] =3D=3D '\0') + return -1; + + *rawp =3D name + 1; + + while ((*rawp)[0] !=3D '\0' && !isspace((*rawp)[0])) + ++*rawp; + + tmp =3D (*rawp)[0]; + (*rawp)[0] =3D '\0'; + *namep =3D strdup(name); + + if (*namep =3D=3D NULL) + goto out; + + (*rawp)[0] =3D tmp; + *rawp =3D strim(*rawp); + + return 0; + +out: + return -1; +} + +static void annotation_line__init(struct annotation_line *al, + struct annotate_args *args, + int nr) +{ + al->offset =3D args->offset; + al->line =3D strdup(args->line); + al->line_nr =3D args->line_nr; + al->fileloc =3D args->fileloc; + al->data_nr =3D nr; +} + +static void annotation_line__exit(struct annotation_line *al) +{ + zfree_srcline(&al->path); + zfree(&al->line); + zfree(&al->cycles); +} + +static size_t disasm_line_size(int nr) +{ + struct annotation_line *al; + + return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------- + * struct disasm_line | struct annotation_line + * ------------------------------------------- + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + */ +struct disasm_line *disasm_line__new(struct annotate_args *args) +{ + struct disasm_line *dl =3D NULL; + int nr =3D 1; + + if (evsel__is_group_event(args->evsel)) + nr =3D args->evsel->core.nr_members; + + dl =3D zalloc(disasm_line_size(nr)); + if (!dl) + return NULL; + + annotation_line__init(&dl->al, args, nr); + if (dl->al.line =3D=3D NULL) + goto out_delete; + + if (args->offset !=3D -1) { + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + goto out_free_line; + + disasm_line__init_ins(dl, args->arch, &args->ms); + } + + return dl; + +out_free_line: + zfree(&dl->al.line); +out_delete: + free(dl); + return NULL; +} + +void disasm_line__free(struct disasm_line *dl) +{ + if (dl->ins.ops && dl->ins.ops->free) + dl->ins.ops->free(&dl->ops); + else + ins_ops__delete(&dl->ops); + zfree(&dl->ins.name); + annotation_line__exit(&dl->al); + free(dl); +} + +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, = bool raw, int max_ins_name) +{ + if (raw || !dl->ins.ops) + return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->op= s.raw); + + return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); +} + +/* + * symbol__parse_objdump_line() parses objdump output (with -d --no-show-r= aw) + * which looks like following + * + * 0000000000415500 <_init>: + * 415500: sub $0x8,%rsp + * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0= x2f8> + * 41550b: test %rax,%rax + * 41550e: je 415515 <_init+0x15> + * 415510: callq 416e70 <__gmon_start__@plt> + * 415515: add $0x8,%rsp + * 415519: retq + * + * it will be parsed and saved into struct disasm_line as + * + * + * The offset will be a relative offset from the start of the symbol and -1 + * means that it's not a disassembly line so should be treated differently. + * The ops.raw part will be parsed further according to type of the instru= ction. + */ +static int symbol__parse_objdump_line(struct symbol *sym, + struct annotate_args *args, + char *parsed_line, int *line_nr, char **fileloc) +{ + struct map *map =3D args->ms.map; + struct annotation *notes =3D symbol__annotation(sym); + struct disasm_line *dl; + char *tmp; + s64 line_ip, offset =3D -1; + regmatch_t match[2]; + + /* /filename:linenr ? Save line number and ignore. */ + if (regexec(&file_lineno, parsed_line, 2, match, 0) =3D=3D 0) { + *line_nr =3D atoi(parsed_line + match[1].rm_so); + free(*fileloc); + *fileloc =3D strdup(parsed_line); + return 0; + } + + /* Process hex address followed by ':'. */ + line_ip =3D strtoull(parsed_line, &tmp, 16); + if (parsed_line !=3D tmp && tmp[0] =3D=3D ':' && tmp[1] !=3D '\0') { + u64 start =3D map__rip_2objdump(map, sym->start), + end =3D map__rip_2objdump(map, sym->end); + + offset =3D line_ip - start; + if ((u64)line_ip < start || (u64)line_ip >=3D end) + offset =3D -1; + else + parsed_line =3D tmp + 1; + } + + args->offset =3D offset; + args->line =3D parsed_line; + args->line_nr =3D *line_nr; + args->fileloc =3D *fileloc; + args->ms.sym =3D sym; + + dl =3D disasm_line__new(args); + (*line_nr)++; + + if (dl =3D=3D NULL) + return -1; + + if (!disasm_line__has_local_offset(dl)) { + dl->ops.target.offset =3D dl->ops.target.addr - + map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail =3D true; + } + + /* kcore has no symbols, so add the call target symbol */ + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { + struct addr_map_symbol target =3D { + .addr =3D dl->ops.target.addr, + .ms =3D { .map =3D map, }, + }; + + if (!maps__find_ams(args->ms.maps, &target) && + target.ms.sym->start =3D=3D target.al_addr) + dl->ops.target.sym =3D target.ms.sym; + } + + annotation_line__add(&dl->al, ¬es->src->source); + return 0; +} + +static void delete_last_nop(struct symbol *sym) +{ + struct annotation *notes =3D symbol__annotation(sym); + struct list_head *list =3D ¬es->src->source; + struct disasm_line *dl; + + while (!list_empty(list)) { + dl =3D list_entry(list->prev, struct disasm_line, al.node); + + if (dl->ins.ops) { + if (!ins__is_nop(&dl->ins)) + return; + } else { + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) + return; + } + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } +} + +int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *= buf, size_t buflen) +{ + struct dso *dso =3D map__dso(ms->map); + + BUG_ON(buflen =3D=3D 0); + + if (errnum >=3D 0) { + str_error_r(errnum, buf, buflen); + return 0; + } + + switch (errnum) { + case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { + char bf[SBUILD_ID_SIZE + 15] =3D " with build id "; + char *build_id_msg =3D NULL; + + if (dso->has_build_id) { + build_id__sprintf(&dso->bid, bf + 15); + build_id_msg =3D bf; + } + scnprintf(buf, buflen, + "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capabi= lity.\n\n" + "Please use:\n\n" + " perf buildid-cache -vu vmlinux\n\n" + "or:\n\n" + " --vmlinux vmlinux\n", build_id_msg ?: ""); + } + break; + case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: + scnprintf(buf, buflen, "Please link with binutils's libopcode to enable = BPF annotation"); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: + scnprintf(buf, buflen, "Problems with arch specific instruction name reg= ular expressions."); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: + scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch spe= cific initialization."); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: + scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: + scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with= -g or use pahole -J.", + dso->long_name); + break; + default: + scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum= ); + break; + } + + return 0; +} + +static int dso__disassemble_filename(struct dso *dso, char *filename, size= _t filename_size) +{ + char linkname[PATH_MAX]; + char *build_id_filename; + char *build_id_path =3D NULL; + char *pos; + int len; + + if (dso->symtab_type =3D=3D DSO_BINARY_TYPE__KALLSYMS && + !dso__is_kcore(dso)) + return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; + + build_id_filename =3D dso__build_id_filename(dso, NULL, 0, false); + if (build_id_filename) { + __symbol__join_symfs(filename, filename_size, build_id_filename); + free(build_id_filename); + } else { + if (dso->has_build_id) + return ENOMEM; + goto fallback; + } + + build_id_path =3D strdup(filename); + if (!build_id_path) + return ENOMEM; + + /* + * old style build-id cache has name of XX/XXXXXXX.. while + * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. + * extract the build-id part of dirname in the new style only. + */ + pos =3D strrchr(build_id_path, '/'); + if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) + dirname(build_id_path); + + if (dso__is_kcore(dso)) + goto fallback; + + len =3D readlink(build_id_path, linkname, sizeof(linkname) - 1); + if (len < 0) + goto fallback; + + linkname[len] =3D '\0'; + if (strstr(linkname, DSO__NAME_KALLSYMS) || + access(filename, R_OK)) { +fallback: + /* + * If we don't have build-ids or the build-id file isn't in the + * cache, or is just a kallsyms file, well, lets hope that this + * DSO is the same as when 'perf record' ran. + */ + if (dso->kernel && dso->long_name[0] =3D=3D '/') + snprintf(filename, filename_size, "%s", dso->long_name); + else + __symbol__join_symfs(filename, filename_size, dso->long_name); + + mutex_lock(&dso->lock); + if (access(filename, R_OK) && errno =3D=3D ENOENT && dso->nsinfo) { + char *new_name =3D dso__filename_with_chroot(dso, filename); + if (new_name) { + strlcpy(filename, new_name, filename_size); + free(new_name); + } + } + mutex_unlock(&dso->lock); + } + + free(build_id_path); + return 0; +} + +#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +#define PACKAGE "perf" +#include +#include +#include +#include +#include +#include +#include + +static int symbol__disassemble_bpf(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes =3D symbol__annotation(sym); + struct bpf_prog_linfo *prog_linfo =3D NULL; + struct bpf_prog_info_node *info_node; + int len =3D sym->end - sym->start; + disassembler_ftype disassemble; + struct map *map =3D args->ms.map; + struct perf_bpil *info_linear; + struct disassemble_info info; + struct dso *dso =3D map__dso(map); + int pc =3D 0, count, sub_id; + struct btf *btf =3D NULL; + char tpath[PATH_MAX]; + size_t buf_size; + int nr_skip =3D 0; + char *buf; + bfd *bfdf; + int ret; + FILE *s; + + if (dso->binary_type !=3D DSO_BINARY_TYPE__BPF_PROG_INFO) + return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; + + pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func= __, + sym->name, sym->start, sym->end - sym->start); + + memset(tpath, 0, sizeof(tpath)); + perf_exe(tpath, sizeof(tpath)); + + bfdf =3D bfd_openr(tpath, NULL); + if (bfdf =3D=3D NULL) + abort(); + + if (!bfd_check_format(bfdf, bfd_object)) + abort(); + + s =3D open_memstream(&buf, &buf_size); + if (!s) { + ret =3D errno; + goto out; + } + init_disassemble_info_compat(&info, s, + (fprintf_ftype) fprintf, + fprintf_styled); + info.arch =3D bfd_get_arch(bfdf); + info.mach =3D bfd_get_mach(bfdf); + + info_node =3D perf_env__find_bpf_prog_info(dso->bpf_prog.env, + dso->bpf_prog.id); + if (!info_node) { + ret =3D SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + goto out; + } + info_linear =3D info_node->info_linear; + sub_id =3D dso->bpf_prog.sub_id; + + info.buffer =3D (void *)(uintptr_t)(info_linear->info.jited_prog_insns); + info.buffer_length =3D info_linear->info.jited_prog_len; + + if (info_linear->info.nr_line_info) + prog_linfo =3D bpf_prog_linfo__new(&info_linear->info); + + if (info_linear->info.btf_id) { + struct btf_node *node; + + node =3D perf_env__find_btf(dso->bpf_prog.env, + info_linear->info.btf_id); + if (node) + btf =3D btf__new((__u8 *)(node->data), + node->data_size); + } + + disassemble_init_for_target(&info); + +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble =3D disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else + disassemble =3D disassembler(bfdf); +#endif + if (disassemble =3D=3D NULL) + abort(); + + fflush(s); + do { + const struct bpf_line_info *linfo =3D NULL; + struct disasm_line *dl; + size_t prev_buf_size; + const char *srcline; + u64 addr; + + addr =3D pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id= ]; + count =3D disassemble(pc, &info); + + if (prog_linfo) + linfo =3D bpf_prog_linfo__lfind_addr_func(prog_linfo, + addr, sub_id, + nr_skip); + + if (linfo && btf) { + srcline =3D btf__name_by_offset(btf, linfo->line_off); + nr_skip++; + } else + srcline =3D NULL; + + fprintf(s, "\n"); + prev_buf_size =3D buf_size; + fflush(s); + + if (!annotate_opts.hide_src_code && srcline) { + args->offset =3D -1; + args->line =3D strdup(srcline); + args->line_nr =3D 0; + args->fileloc =3D NULL; + args->ms.sym =3D sym; + dl =3D disasm_line__new(args); + if (dl) { + annotation_line__add(&dl->al, + ¬es->src->source); + } + } + + args->offset =3D pc; + args->line =3D buf + prev_buf_size; + args->line_nr =3D 0; + args->fileloc =3D NULL; + args->ms.sym =3D sym; + dl =3D disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + pc +=3D count; + } while (count > 0 && pc < len); + + ret =3D 0; +out: + free(prog_linfo); + btf__free(btf); + fclose(s); + bfd_close(bfdf); + return ret; +} +#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +} +#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) + +static int +symbol__disassemble_bpf_image(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes =3D symbol__annotation(sym); + struct disasm_line *dl; + + args->offset =3D -1; + args->line =3D strdup("to be implemented"); + args->line_nr =3D 0; + args->fileloc =3D NULL; + dl =3D disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + zfree(&args->line); + return 0; +} + +/* + * Possibly create a new version of line with tabs expanded. Returns the + * existing or new line, storage is updated if a new line is allocated. If + * allocation fails then NULL is returned. + */ +static char *expand_tabs(char *line, char **storage, size_t *storage_len) +{ + size_t i, src, dst, len, new_storage_len, num_tabs; + char *new_line; + size_t line_len =3D strlen(line); + + for (num_tabs =3D 0, i =3D 0; i < line_len; i++) + if (line[i] =3D=3D '\t') + num_tabs++; + + if (num_tabs =3D=3D 0) + return line; + + /* + * Space for the line and '\0', less the leading and trailing + * spaces. Each tab may introduce 7 additional spaces. + */ + new_storage_len =3D line_len + 1 + (num_tabs * 7); + + new_line =3D malloc(new_storage_len); + if (new_line =3D=3D NULL) { + pr_err("Failure allocating memory for tab expansion\n"); + return NULL; + } + + /* + * Copy regions starting at src and expand tabs. If there are two + * adjacent tabs then 'src =3D=3D i', the memcpy is of size 0 and the spa= ces + * are inserted. + */ + for (i =3D 0, src =3D 0, dst =3D 0; i < line_len && num_tabs; i++) { + if (line[i] =3D=3D '\t') { + len =3D i - src; + memcpy(&new_line[dst], &line[src], len); + dst +=3D len; + new_line[dst++] =3D ' '; + while (dst % 8 !=3D 0) + new_line[dst++] =3D ' '; + src =3D i + 1; + num_tabs--; + } + } + + /* Expand the last region. */ + len =3D line_len - src; + memcpy(&new_line[dst], &line[src], len); + dst +=3D len; + new_line[dst] =3D '\0'; + + free(*storage); + *storage =3D new_line; + *storage_len =3D new_storage_len; + return new_line; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *opts =3D &annotate_opts; + struct map *map =3D args->ms.map; + struct dso *dso =3D map__dso(map); + char *command; + FILE *file; + char symfs_filename[PATH_MAX]; + struct kcore_extract kce; + bool delete_extract =3D false; + bool decomp =3D false; + int lineno =3D 0; + char *fileloc =3D NULL; + int nline; + char *line; + size_t line_len; + const char *objdump_argv[] =3D { + "/bin/sh", + "-c", + NULL, /* Will be the objdump command to run. */ + "--", + NULL, /* Will be the symfs path. */ + NULL, + }; + struct child_process objdump_process; + int err =3D dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_f= ilename)); + + if (err) + return err; + + pr_debug("%s: filename=3D%s, sym=3D%s, start=3D%#" PRIx64 ", end=3D%#" PR= Ix64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + + pr_debug("annotating [%p] %30s : [%p] %30s\n", + dso, dso->long_name, sym, sym->name); + + if (dso->binary_type =3D=3D DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso->binary_type =3D=3D DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); + } else if (dso__is_kcore(dso)) { + kce.kcore_filename =3D symfs_filename; + kce.addr =3D map__rip_2objdump(map, sym->start); + kce.offs =3D sym->start; + kce.len =3D sym->end - sym->start; + if (!kcore_extract__create(&kce)) { + delete_extract =3D true; + strlcpy(symfs_filename, kce.extract_filename, + sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, + tmp, sizeof(tmp)) < 0) + return -1; + + decomp =3D true; + strcpy(symfs_filename, tmp); + } + + err =3D asprintf(&command, + "%s %s%s --start-address=3D0x%016" PRIx64 + " --stop-address=3D0x%016" PRIx64 + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", + opts->objdump_path ?: "objdump", + opts->disassembler_style ? "-M " : "", + opts->disassembler_style ?: "", + map__rip_2objdump(map, sym->start), + map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", + opts->show_asm_raw ? "" : "--no-show-raw-insn", + opts->annotate_src ? "-S" : "", + opts->prefix ? "--prefix " : "", + opts->prefix ? '"' : ' ', + opts->prefix ?: "", + opts->prefix ? '"' : ' ', + opts->prefix_strip ? "--prefix-strip=3D" : "", + opts->prefix_strip ?: ""); + + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + goto out_remove_tmp; + } + + pr_debug("Executing: %s\n", command); + + objdump_argv[2] =3D command; + objdump_argv[4] =3D symfs_filename; + + /* Create a pipe to read from for stdout */ + memset(&objdump_process, 0, sizeof(objdump_process)); + objdump_process.argv =3D objdump_argv; + objdump_process.out =3D -1; + objdump_process.err =3D -1; + objdump_process.no_stderr =3D 1; + if (start_command(&objdump_process)) { + pr_err("Failure starting to run %s\n", command); + err =3D -1; + goto out_free_command; + } + + file =3D fdopen(objdump_process.out, "r"); + if (!file) { + pr_err("Failure creating FILE stream for %s\n", command); + /* + * If we were using debug info should retry with + * original binary. + */ + err =3D -1; + goto out_close_stdout; + } + + /* Storage for getline. */ + line =3D NULL; + line_len =3D 0; + + nline =3D 0; + while (!feof(file)) { + const char *match; + char *expanded_line; + + if (getline(&line, &line_len, file) < 0 || !line) + break; + + /* Skip lines containing "filename:" */ + match =3D strstr(line, symfs_filename); + if (match && match[strlen(symfs_filename)] =3D=3D ':') + continue; + + expanded_line =3D strim(line); + expanded_line =3D expand_tabs(expanded_line, &line, &line_len); + if (!expanded_line) + break; + + /* + * The source code line number (lineno) needs to be kept in + * across calls to symbol__parse_objdump_line(), so that it + * can associate it with the instructions till the next one. + * See disasm_line__new() and struct disasm_line::line_nr. + */ + if (symbol__parse_objdump_line(sym, args, expanded_line, + &lineno, &fileloc) < 0) + break; + nline++; + } + free(line); + free(fileloc); + + err =3D finish_command(&objdump_process); + if (err) + pr_err("Error running %s\n", command); + + if (nline =3D=3D 0) { + err =3D -1; + pr_err("No output from %s\n", command); + } + + /* + * kallsyms does not have symbol sizes so there may a nop at the end. + * Remove it. + */ + if (dso__is_kcore(dso)) + delete_last_nop(sym); + + fclose(file); + +out_close_stdout: + close(objdump_process.out); + +out_free_command: + free(command); + +out_remove_tmp: + if (decomp) + unlink(symfs_filename); + + if (delete_extract) + kcore_extract__delete(&kce); + + return err; +} diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h new file mode 100644 index 000000000000..3d381a043520 --- /dev/null +++ b/tools/perf/util/disasm.h @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_UTIL_DISASM_H +#define __PERF_UTIL_DISASM_H + +#include "map_symbol.h" + +struct annotation_options; +struct disasm_line; +struct ins; +struct evsel; +struct symbol; + +struct arch { + const char *name; + struct ins *instructions; + size_t nr_instructions; + size_t nr_instructions_allocated; + struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const ch= ar *name); + bool sorted_instructions; + bool initialized; + const char *insn_suffix; + void *priv; + unsigned int model; + unsigned int family; + int (*init)(struct arch *arch, char *cpuid); + bool (*ins_is_fused)(struct arch *arch, const char *ins1, + const char *ins2); + struct { + char comment_char; + char skip_functions_char; + char register_char; + char memory_ref_char; + char imm_char; + } objdump; +}; + +struct ins { + const char *name; + struct ins_ops *ops; +}; + +struct ins_operands { + char *raw; + struct { + char *raw; + char *name; + struct symbol *sym; + u64 addr; + s64 offset; + bool offset_avail; + bool outside; + bool multi_regs; + } target; + union { + struct { + char *raw; + char *name; + u64 addr; + bool multi_regs; + } source; + struct { + struct ins ins; + struct ins_operands *ops; + } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; + }; +}; + +struct ins_ops { + void (*free)(struct ins_operands *ops); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symb= ol *ms); + int (*scnprintf)(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +}; + +struct annotate_args { + struct arch *arch; + struct map_symbol ms; + struct evsel *evsel; + struct annotation_options *options; + s64 offset; + char *line; + int line_nr; + char *fileloc; +}; + +struct arch *arch__find(const char *name); +bool arch__is(struct arch *arch, const char *name); + +struct ins_ops *ins__find(struct arch *arch, const char *name); +int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); + +bool ins__is_call(const struct ins *ins); +bool ins__is_jump(const struct ins *ins); +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); +bool ins__is_nop(const struct ins *ins); +bool ins__is_ret(const struct ins *ins); +bool ins__is_lock(const struct ins *ins); + +struct disasm_line *disasm_line__new(struct annotate_args *args); +void disasm_line__free(struct disasm_line *dl); + +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, + bool raw, int max_ins_name); + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args); + +#endif /* __PERF_UTIL_DISASM_H */ --=20 2.44.0.478.gd926399ef9-goog