[PATCH v1 12/23] perf dwarf-regs: Add get_dwarf_regnum_for_perf_regnum and use for x86 unwinding

Ian Rogers posted 23 patches 3 weeks, 1 day ago
[PATCH v1 12/23] perf dwarf-regs: Add get_dwarf_regnum_for_perf_regnum and use for x86 unwinding
Posted by Ian Rogers 3 weeks, 1 day ago
Add a utility to map a perf register number to a dwarf register number
for a particular ELF machine type.

Create a generic unwind-libdw initial register initialization routine
that uses this function and thereby avoids arch specific
initialization. The unwind-libdw code does:
1) compute the maximum dwarf register from the set of sampled user registers,
2) allocates a set of dwarf registers,
3) copies the sample registers into the appropriate dwarf registers.

This generic solution is initially implemented for use with x86 as
only get_dwarf_regnum_for_perf_regnum support for x86 is currently present.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 .../util/dwarf-regs-arch/dwarf-regs-x86.c     | 95 +++++++++++++++++++
 tools/perf/util/dwarf-regs.c                  | 55 +++++++++++
 tools/perf/util/include/dwarf-regs.h          |  8 ++
 tools/perf/util/unwind-libdw-arch/Build       |  1 -
 .../util/unwind-libdw-arch/unwind-libdw-x86.c | 54 -----------
 tools/perf/util/unwind-libdw.c                | 70 ++++++++++++--
 tools/perf/util/unwind-libdw.h                |  2 +-
 7 files changed, 222 insertions(+), 63 deletions(-)
 delete mode 100644 tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c

diff --git a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
index f0c42e4d7423..cadef120aeb4 100644
--- a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
+++ b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
@@ -10,6 +10,7 @@
 #include <string.h> /* for strcmp */
 #include <linux/kernel.h> /* for ARRAY_SIZE */
 #include <dwarf-regs.h>
+#include "../../../arch/x86/include/uapi/asm/perf_regs.h"
 
 struct dwarf_regs_idx {
 	const char *name;
@@ -163,3 +164,97 @@ int __get_dwarf_regnum_x86_64(const char *name)
 {
 	return get_regnum(x86_64_regidx_table, ARRAY_SIZE(x86_64_regidx_table), name);
 }
+
+int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum)
+{
+	static const int dwarf_i386_regnums[] = {
+		[PERF_REG_X86_AX] = 0,
+		[PERF_REG_X86_BX] = 3,
+		[PERF_REG_X86_CX] = 1,
+		[PERF_REG_X86_DX] = 2,
+		[PERF_REG_X86_SI] = 6,
+		[PERF_REG_X86_DI] = 7,
+		[PERF_REG_X86_BP] = 5,
+		[PERF_REG_X86_SP] = 4,
+		[PERF_REG_X86_IP] = 8,
+		[PERF_REG_X86_FLAGS] = 9,
+		[PERF_REG_X86_CS] = 41,
+		[PERF_REG_X86_SS] = 42,
+		[PERF_REG_X86_DS] = 43,
+		[PERF_REG_X86_ES] = 40,
+		[PERF_REG_X86_FS] = 44,
+		[PERF_REG_X86_GS] = 45,
+		[PERF_REG_X86_XMM0] = 21,
+		[PERF_REG_X86_XMM1] = 22,
+		[PERF_REG_X86_XMM2] = 23,
+		[PERF_REG_X86_XMM3] = 24,
+		[PERF_REG_X86_XMM4] = 25,
+		[PERF_REG_X86_XMM5] = 26,
+		[PERF_REG_X86_XMM6] = 27,
+		[PERF_REG_X86_XMM7] = 28,
+	};
+
+	if (perf_regnum == 0)
+		return 0;
+
+	if (perf_regnum <  0 || perf_regnum > (int)ARRAY_SIZE(dwarf_i386_regnums) ||
+	    dwarf_i386_regnums[perf_regnum] == 0)
+		return -ENOENT;
+
+	return dwarf_i386_regnums[perf_regnum];
+}
+
+int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
+{
+	static const int dwarf_x86_64_regnums[] = {
+		[PERF_REG_X86_AX] = 0,
+		[PERF_REG_X86_BX] = 3,
+		[PERF_REG_X86_CX] = 2,
+		[PERF_REG_X86_DX] = 1,
+		[PERF_REG_X86_SI] = 4,
+		[PERF_REG_X86_DI] = 5,
+		[PERF_REG_X86_BP] = 6,
+		[PERF_REG_X86_SP] = 7,
+		[PERF_REG_X86_IP] = 16,
+		[PERF_REG_X86_FLAGS] = 49,
+		[PERF_REG_X86_CS] = 51,
+		[PERF_REG_X86_SS] = 52,
+		[PERF_REG_X86_DS] = 53,
+		[PERF_REG_X86_ES] = 50,
+		[PERF_REG_X86_FS] = 54,
+		[PERF_REG_X86_GS] = 55,
+		[PERF_REG_X86_R8] = 8,
+		[PERF_REG_X86_R9] = 9,
+		[PERF_REG_X86_R10] = 10,
+		[PERF_REG_X86_R11] = 11,
+		[PERF_REG_X86_R12] = 12,
+		[PERF_REG_X86_R13] = 13,
+		[PERF_REG_X86_R14] = 14,
+		[PERF_REG_X86_R15] = 15,
+		[PERF_REG_X86_XMM0] = 17,
+		[PERF_REG_X86_XMM1] = 18,
+		[PERF_REG_X86_XMM2] = 19,
+		[PERF_REG_X86_XMM3] = 20,
+		[PERF_REG_X86_XMM4] = 21,
+		[PERF_REG_X86_XMM5] = 22,
+		[PERF_REG_X86_XMM6] = 23,
+		[PERF_REG_X86_XMM7] = 24,
+		[PERF_REG_X86_XMM8] = 25,
+		[PERF_REG_X86_XMM9] = 26,
+		[PERF_REG_X86_XMM10] = 27,
+		[PERF_REG_X86_XMM11] = 28,
+		[PERF_REG_X86_XMM12] = 29,
+		[PERF_REG_X86_XMM13] = 30,
+		[PERF_REG_X86_XMM14] = 31,
+		[PERF_REG_X86_XMM15] = 32,
+	};
+
+	if (perf_regnum == 0)
+		return 0;
+
+	if (perf_regnum <  0 || perf_regnum > (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
+	    dwarf_x86_64_regnums[perf_regnum] == 0)
+		return -ENOENT;
+
+	return dwarf_x86_64_regnums[perf_regnum];
+}
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
index ef249dd589e3..1f7d892612df 100644
--- a/tools/perf/util/dwarf-regs.c
+++ b/tools/perf/util/dwarf-regs.c
@@ -103,3 +103,58 @@ int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags
 	free(regname);
 	return reg;
 }
+
+static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __maybe_unused)
+{
+	switch (machine) {
+	case EM_X86_64:
+		return 17;
+	case EM_386:
+		return 9;
+	case EM_ARM:
+		return 16;
+	case EM_AARCH64:
+		return 97;
+	case EM_CSKY:
+		return 38;
+	case EM_S390:
+		return 32;
+	case EM_PPC:
+	case EM_PPC64:
+		return 145;
+	case EM_RISCV:
+		return 66;
+	case EM_SPARC:
+	case EM_SPARCV9:
+		return 103;
+	case EM_LOONGARCH:
+		return 74;
+	default:
+		return 0;
+	}
+}
+
+int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
+				     unsigned int flags, bool only_libdw_supported)
+{
+	int reg;
+
+	switch (machine) {
+	case EM_X86_64:
+		reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum);
+		break;
+	case EM_386:
+		reg = __get_dwarf_regnum_for_perf_regnum_i386(perf_regnum);
+		break;
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+		return -ENOENT;
+	}
+	if (reg >= 0 && only_libdw_supported) {
+		int nregs = get_libdw_frame_nregs(machine, flags);
+
+		if (reg >= nregs)
+			reg = -ENOENT;
+	}
+	return reg;
+}
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index bb5413b0fee4..00881f1d45d6 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -101,6 +101,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int
 
 int __get_dwarf_regnum_i386(const char *name);
 int __get_dwarf_regnum_x86_64(const char *name);
+int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum);
+int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum);
 
 /*
  * get_dwarf_regnum - Returns DWARF regnum from register name
@@ -109,6 +111,12 @@ int __get_dwarf_regnum_x86_64(const char *name);
  */
 int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags);
 
+/*
+ * get_dwarf_regnum - Returns DWARF regnum from perf register number.
+ */
+int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine, unsigned int flags,
+				     bool only_libdw_supported);
+
 void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
 
 #else /* HAVE_LIBDW_SUPPORT */
diff --git a/tools/perf/util/unwind-libdw-arch/Build b/tools/perf/util/unwind-libdw-arch/Build
index ef17a83a7813..5b5682029953 100644
--- a/tools/perf/util/unwind-libdw-arch/Build
+++ b/tools/perf/util/unwind-libdw-arch/Build
@@ -1,4 +1,3 @@
-perf-util-y += unwind-libdw-x86.o
 perf-util-y += unwind-libdw-arm.o
 perf-util-y += unwind-libdw-arm64.o
 perf-util-y += unwind-libdw-csky.o
diff --git a/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c b/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c
deleted file mode 100644
index dd27545a4a68..000000000000
--- a/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <elfutils/libdwfl.h>
-#include "../arch/x86/include/uapi/asm/perf_regs.h"
-#include "util/unwind-libdw.h"
-#include "util/perf_regs.h"
-#include "util/sample.h"
-
-bool libdw_set_initial_registers_x86(Dwfl_Thread *thread, void *arg)
-{
-	struct unwind_info *ui = arg;
-	struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
-	Dwarf_Word dwarf_regs[17];
-	unsigned nregs;
-
-#define REG(r) ({						\
-	Dwarf_Word val = 0;					\
-	perf_reg_value(&val, user_regs, PERF_REG_X86_##r);	\
-	val;							\
-})
-
-	if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
-		dwarf_regs[0] = REG(AX);
-		dwarf_regs[1] = REG(CX);
-		dwarf_regs[2] = REG(DX);
-		dwarf_regs[3] = REG(BX);
-		dwarf_regs[4] = REG(SP);
-		dwarf_regs[5] = REG(BP);
-		dwarf_regs[6] = REG(SI);
-		dwarf_regs[7] = REG(DI);
-		dwarf_regs[8] = REG(IP);
-		nregs = 9;
-	} else {
-		dwarf_regs[0]  = REG(AX);
-		dwarf_regs[1]  = REG(DX);
-		dwarf_regs[2]  = REG(CX);
-		dwarf_regs[3]  = REG(BX);
-		dwarf_regs[4]  = REG(SI);
-		dwarf_regs[5]  = REG(DI);
-		dwarf_regs[6]  = REG(BP);
-		dwarf_regs[7]  = REG(SP);
-		dwarf_regs[8]  = REG(R8);
-		dwarf_regs[9]  = REG(R9);
-		dwarf_regs[10] = REG(R10);
-		dwarf_regs[11] = REG(R11);
-		dwarf_regs[12] = REG(R12);
-		dwarf_regs[13] = REG(R13);
-		dwarf_regs[14] = REG(R14);
-		dwarf_regs[15] = REG(R15);
-		dwarf_regs[16] = REG(IP);
-		nregs = 17;
-	}
-
-	return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
-}
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index c25190cdceb4..055dab921442 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -6,6 +6,7 @@
 #include <errno.h>
 #include "debug.h"
 #include "dso.h"
+#include <dwarf-regs.h>
 #include "unwind.h"
 #include "unwind-libdw.h"
 #include "machine.h"
@@ -225,6 +226,59 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
 	return true;
 }
 
+static bool libdw_set_initial_registers_generic(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
+	Dwarf_Word *dwarf_regs;
+	int max_dwarf_reg = 0;
+	bool ret;
+	uint16_t e_machine = ui->e_machine;
+	int e_flags = 0;
+	uint64_t ip_perf_reg = perf_arch_reg_ip(e_machine);
+	Dwarf_Word val = 0;
+
+
+	/*
+	 * For every possible perf register in the bitmap determine the dwarf
+	 * register and use to compute the max.
+	 */
+	for (int perf_reg = 0; perf_reg < 64; perf_reg++) {
+		if (user_regs->mask & (1ULL << perf_reg)) {
+			int dwarf_reg =
+				get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
+								 e_flags,
+								 /*only_libdw_supported=*/true);
+			if (dwarf_reg > max_dwarf_reg)
+				max_dwarf_reg = dwarf_reg;
+		}
+	}
+
+	dwarf_regs = calloc(max_dwarf_reg + 1, sizeof(*dwarf_regs));
+	if (!dwarf_regs)
+		return false;
+
+	for (int perf_reg = 0; perf_reg < 64; perf_reg++) {
+		if (user_regs->mask & (1ULL << perf_reg)) {
+			int dwarf_reg =
+				get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
+								 e_flags,
+								 /*only_libdw_supported=*/true);
+			if (dwarf_reg >= 0) {
+				val = 0;
+				if (perf_reg_value(&val, user_regs, perf_reg) == 0)
+					dwarf_regs[dwarf_reg] = val;
+			}
+		}
+	}
+	if (perf_reg_value(&val, user_regs, ip_perf_reg) == 0)
+		dwfl_thread_state_register_pc(thread, val);
+
+	ret = dwfl_thread_state_registers(thread, 0, max_dwarf_reg + 1, dwarf_regs);
+	free(dwarf_regs);
+	return ret;
+}
+
 #define DEFINE_DWFL_THREAD_CALLBACKS(arch)                           \
 static const Dwfl_Thread_Callbacks callbacks_##arch = {              \
 	.next_thread           = next_thread,                        \
@@ -232,7 +286,12 @@ static const Dwfl_Thread_Callbacks callbacks_##arch = {              \
 	.set_initial_registers = libdw_set_initial_registers_##arch, \
 }
 
-DEFINE_DWFL_THREAD_CALLBACKS(x86);
+static const Dwfl_Thread_Callbacks callbacks_generic = {
+	.next_thread           = next_thread,
+	.memory_read           = memory_read,
+	.set_initial_registers = libdw_set_initial_registers_generic,
+};
+
 DEFINE_DWFL_THREAD_CALLBACKS(arm);
 DEFINE_DWFL_THREAD_CALLBACKS(arm64);
 DEFINE_DWFL_THREAD_CALLBACKS(csky);
@@ -257,12 +316,8 @@ static const Dwfl_Thread_Callbacks *get_thread_callbacks(const char *arch)
 		return &callbacks_riscv;
 	else if (!strcmp(arch, "s390"))
 		return &callbacks_s390;
-	else if (!strcmp(arch, "x86"))
-		return &callbacks_x86;
 
-	pr_err("Fail to get thread callbacks for arch %s, returns NULL\n",
-	       arch);
-	return NULL;
+	return &callbacks_generic;
 }
 
 static int
@@ -301,6 +356,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 			bool best_effort)
 {
 	struct machine *machine = maps__machine(thread__maps(thread));
+	uint16_t e_machine = thread__e_machine(thread, machine);
 	struct unwind_info *ui, ui_buf = {
 		.sample		= data,
 		.thread		= thread,
@@ -308,9 +364,9 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 		.cb		= cb,
 		.arg		= arg,
 		.max_stack	= max_stack,
+		.e_machine	= e_machine,
 		.best_effort    = best_effort
 	};
-	uint16_t e_machine = thread__e_machine(thread, machine);
 	const char *arch = perf_env__arch(machine->env);
 	Dwarf_Word ip;
 	int err = -EINVAL, i;
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
index 574b29848cce..496e5898e7ef 100644
--- a/tools/perf/util/unwind-libdw.h
+++ b/tools/perf/util/unwind-libdw.h
@@ -9,7 +9,6 @@ struct machine;
 struct perf_sample;
 struct thread;
 
-bool libdw_set_initial_registers_x86(Dwfl_Thread *thread, void *arg);
 bool libdw_set_initial_registers_arm(Dwfl_Thread *thread, void *arg);
 bool libdw_set_initial_registers_arm64(Dwfl_Thread *thread, void *arg);
 bool libdw_set_initial_registers_csky(Dwfl_Thread *thread, void *arg);
@@ -28,6 +27,7 @@ struct unwind_info {
 	void			*arg;
 	int			max_stack;
 	int			idx;
+	uint16_t		e_machine;
 	bool			best_effort;
 	struct unwind_entry	entries[];
 };
-- 
2.52.0.457.g6b5491de43-goog
Re: [PATCH v1 12/23] perf dwarf-regs: Add get_dwarf_regnum_for_perf_regnum and use for x86 unwinding
Posted by Ian Rogers 3 weeks, 1 day ago
On Fri, Jan 16, 2026 at 9:29 PM Ian Rogers <irogers@google.com> wrote:
>
> Add a utility to map a perf register number to a dwarf register number
> for a particular ELF machine type.
>
> Create a generic unwind-libdw initial register initialization routine
> that uses this function and thereby avoids arch specific
> initialization. The unwind-libdw code does:
> 1) compute the maximum dwarf register from the set of sampled user registers,
> 2) allocates a set of dwarf registers,
> 3) copies the sample registers into the appropriate dwarf registers.
>
> This generic solution is initially implemented for use with x86 as
> only get_dwarf_regnum_for_perf_regnum support for x86 is currently present.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  .../util/dwarf-regs-arch/dwarf-regs-x86.c     | 95 +++++++++++++++++++
>  tools/perf/util/dwarf-regs.c                  | 55 +++++++++++
>  tools/perf/util/include/dwarf-regs.h          |  8 ++
>  tools/perf/util/unwind-libdw-arch/Build       |  1 -
>  .../util/unwind-libdw-arch/unwind-libdw-x86.c | 54 -----------
>  tools/perf/util/unwind-libdw.c                | 70 ++++++++++++--
>  tools/perf/util/unwind-libdw.h                |  2 +-
>  7 files changed, 222 insertions(+), 63 deletions(-)
>  delete mode 100644 tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c
>
> diff --git a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> index f0c42e4d7423..cadef120aeb4 100644
> --- a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> +++ b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
> @@ -10,6 +10,7 @@
>  #include <string.h> /* for strcmp */
>  #include <linux/kernel.h> /* for ARRAY_SIZE */
>  #include <dwarf-regs.h>
> +#include "../../../arch/x86/include/uapi/asm/perf_regs.h"
>
>  struct dwarf_regs_idx {
>         const char *name;
> @@ -163,3 +164,97 @@ int __get_dwarf_regnum_x86_64(const char *name)
>  {
>         return get_regnum(x86_64_regidx_table, ARRAY_SIZE(x86_64_regidx_table), name);
>  }
> +
> +int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum)
> +{
> +       static const int dwarf_i386_regnums[] = {
> +               [PERF_REG_X86_AX] = 0,
> +               [PERF_REG_X86_BX] = 3,
> +               [PERF_REG_X86_CX] = 1,
> +               [PERF_REG_X86_DX] = 2,
> +               [PERF_REG_X86_SI] = 6,
> +               [PERF_REG_X86_DI] = 7,
> +               [PERF_REG_X86_BP] = 5,
> +               [PERF_REG_X86_SP] = 4,
> +               [PERF_REG_X86_IP] = 8,
> +               [PERF_REG_X86_FLAGS] = 9,
> +               [PERF_REG_X86_CS] = 41,
> +               [PERF_REG_X86_SS] = 42,
> +               [PERF_REG_X86_DS] = 43,
> +               [PERF_REG_X86_ES] = 40,
> +               [PERF_REG_X86_FS] = 44,
> +               [PERF_REG_X86_GS] = 45,
> +               [PERF_REG_X86_XMM0] = 21,
> +               [PERF_REG_X86_XMM1] = 22,
> +               [PERF_REG_X86_XMM2] = 23,
> +               [PERF_REG_X86_XMM3] = 24,
> +               [PERF_REG_X86_XMM4] = 25,
> +               [PERF_REG_X86_XMM5] = 26,
> +               [PERF_REG_X86_XMM6] = 27,
> +               [PERF_REG_X86_XMM7] = 28,
> +       };
> +
> +       if (perf_regnum == 0)
> +               return 0;
> +
> +       if (perf_regnum <  0 || perf_regnum > (int)ARRAY_SIZE(dwarf_i386_regnums) ||
> +           dwarf_i386_regnums[perf_regnum] == 0)
> +               return -ENOENT;
> +
> +       return dwarf_i386_regnums[perf_regnum];
> +}
> +
> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
> +{
> +       static const int dwarf_x86_64_regnums[] = {
> +               [PERF_REG_X86_AX] = 0,
> +               [PERF_REG_X86_BX] = 3,
> +               [PERF_REG_X86_CX] = 2,
> +               [PERF_REG_X86_DX] = 1,
> +               [PERF_REG_X86_SI] = 4,
> +               [PERF_REG_X86_DI] = 5,
> +               [PERF_REG_X86_BP] = 6,
> +               [PERF_REG_X86_SP] = 7,
> +               [PERF_REG_X86_IP] = 16,
> +               [PERF_REG_X86_FLAGS] = 49,
> +               [PERF_REG_X86_CS] = 51,
> +               [PERF_REG_X86_SS] = 52,
> +               [PERF_REG_X86_DS] = 53,
> +               [PERF_REG_X86_ES] = 50,
> +               [PERF_REG_X86_FS] = 54,
> +               [PERF_REG_X86_GS] = 55,
> +               [PERF_REG_X86_R8] = 8,
> +               [PERF_REG_X86_R9] = 9,
> +               [PERF_REG_X86_R10] = 10,
> +               [PERF_REG_X86_R11] = 11,
> +               [PERF_REG_X86_R12] = 12,
> +               [PERF_REG_X86_R13] = 13,
> +               [PERF_REG_X86_R14] = 14,
> +               [PERF_REG_X86_R15] = 15,
> +               [PERF_REG_X86_XMM0] = 17,
> +               [PERF_REG_X86_XMM1] = 18,
> +               [PERF_REG_X86_XMM2] = 19,
> +               [PERF_REG_X86_XMM3] = 20,
> +               [PERF_REG_X86_XMM4] = 21,
> +               [PERF_REG_X86_XMM5] = 22,
> +               [PERF_REG_X86_XMM6] = 23,
> +               [PERF_REG_X86_XMM7] = 24,
> +               [PERF_REG_X86_XMM8] = 25,
> +               [PERF_REG_X86_XMM9] = 26,
> +               [PERF_REG_X86_XMM10] = 27,
> +               [PERF_REG_X86_XMM11] = 28,
> +               [PERF_REG_X86_XMM12] = 29,
> +               [PERF_REG_X86_XMM13] = 30,
> +               [PERF_REG_X86_XMM14] = 31,
> +               [PERF_REG_X86_XMM15] = 32,
> +       };
> +
> +       if (perf_regnum == 0)
> +               return 0;
> +
> +       if (perf_regnum <  0 || perf_regnum > (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
> +           dwarf_x86_64_regnums[perf_regnum] == 0)
> +               return -ENOENT;
> +
> +       return dwarf_x86_64_regnums[perf_regnum];
> +}
> diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
> index ef249dd589e3..1f7d892612df 100644
> --- a/tools/perf/util/dwarf-regs.c
> +++ b/tools/perf/util/dwarf-regs.c
> @@ -103,3 +103,58 @@ int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags
>         free(regname);
>         return reg;
>  }
> +
> +static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __maybe_unused)
> +{
> +       switch (machine) {
> +       case EM_X86_64:
> +               return 17;
> +       case EM_386:
> +               return 9;
> +       case EM_ARM:
> +               return 16;
> +       case EM_AARCH64:
> +               return 97;
> +       case EM_CSKY:
> +               return 38;
> +       case EM_S390:
> +               return 32;
> +       case EM_PPC:
> +       case EM_PPC64:
> +               return 145;
> +       case EM_RISCV:
> +               return 66;
> +       case EM_SPARC:
> +       case EM_SPARCV9:
> +               return 103;
> +       case EM_LOONGARCH:
> +               return 74;
> +       default:
> +               return 0;
> +       }
> +}
> +
> +int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
> +                                    unsigned int flags, bool only_libdw_supported)
> +{
> +       int reg;
> +
> +       switch (machine) {
> +       case EM_X86_64:
> +               reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum);
> +               break;
> +       case EM_386:
> +               reg = __get_dwarf_regnum_for_perf_regnum_i386(perf_regnum);
> +               break;
> +       default:
> +               pr_err("ELF MACHINE %x is not supported.\n", machine);
> +               return -ENOENT;
> +       }
> +       if (reg >= 0 && only_libdw_supported) {
> +               int nregs = get_libdw_frame_nregs(machine, flags);
> +
> +               if (reg >= nregs)
> +                       reg = -ENOENT;
> +       }
> +       return reg;
> +}
> diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
> index bb5413b0fee4..00881f1d45d6 100644
> --- a/tools/perf/util/include/dwarf-regs.h
> +++ b/tools/perf/util/include/dwarf-regs.h
> @@ -101,6 +101,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int
>
>  int __get_dwarf_regnum_i386(const char *name);
>  int __get_dwarf_regnum_x86_64(const char *name);
> +int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum);
> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum);
>
>  /*
>   * get_dwarf_regnum - Returns DWARF regnum from register name
> @@ -109,6 +111,12 @@ int __get_dwarf_regnum_x86_64(const char *name);
>   */
>  int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags);
>
> +/*
> + * get_dwarf_regnum - Returns DWARF regnum from perf register number.
> + */
> +int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine, unsigned int flags,
> +                                    bool only_libdw_supported);
> +
>  void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
>
>  #else /* HAVE_LIBDW_SUPPORT */
> diff --git a/tools/perf/util/unwind-libdw-arch/Build b/tools/perf/util/unwind-libdw-arch/Build
> index ef17a83a7813..5b5682029953 100644
> --- a/tools/perf/util/unwind-libdw-arch/Build
> +++ b/tools/perf/util/unwind-libdw-arch/Build
> @@ -1,4 +1,3 @@
> -perf-util-y += unwind-libdw-x86.o
>  perf-util-y += unwind-libdw-arm.o
>  perf-util-y += unwind-libdw-arm64.o
>  perf-util-y += unwind-libdw-csky.o
> diff --git a/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c b/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c
> deleted file mode 100644
> index dd27545a4a68..000000000000
> --- a/tools/perf/util/unwind-libdw-arch/unwind-libdw-x86.c
> +++ /dev/null
> @@ -1,54 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -#include <elfutils/libdwfl.h>
> -#include "../arch/x86/include/uapi/asm/perf_regs.h"
> -#include "util/unwind-libdw.h"
> -#include "util/perf_regs.h"
> -#include "util/sample.h"
> -
> -bool libdw_set_initial_registers_x86(Dwfl_Thread *thread, void *arg)
> -{
> -       struct unwind_info *ui = arg;
> -       struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
> -       Dwarf_Word dwarf_regs[17];
> -       unsigned nregs;
> -
> -#define REG(r) ({                                              \
> -       Dwarf_Word val = 0;                                     \
> -       perf_reg_value(&val, user_regs, PERF_REG_X86_##r);      \
> -       val;                                                    \
> -})
> -
> -       if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
> -               dwarf_regs[0] = REG(AX);
> -               dwarf_regs[1] = REG(CX);
> -               dwarf_regs[2] = REG(DX);
> -               dwarf_regs[3] = REG(BX);
> -               dwarf_regs[4] = REG(SP);
> -               dwarf_regs[5] = REG(BP);
> -               dwarf_regs[6] = REG(SI);
> -               dwarf_regs[7] = REG(DI);
> -               dwarf_regs[8] = REG(IP);
> -               nregs = 9;
> -       } else {
> -               dwarf_regs[0]  = REG(AX);
> -               dwarf_regs[1]  = REG(DX);
> -               dwarf_regs[2]  = REG(CX);
> -               dwarf_regs[3]  = REG(BX);
> -               dwarf_regs[4]  = REG(SI);
> -               dwarf_regs[5]  = REG(DI);
> -               dwarf_regs[6]  = REG(BP);
> -               dwarf_regs[7]  = REG(SP);
> -               dwarf_regs[8]  = REG(R8);
> -               dwarf_regs[9]  = REG(R9);
> -               dwarf_regs[10] = REG(R10);
> -               dwarf_regs[11] = REG(R11);
> -               dwarf_regs[12] = REG(R12);
> -               dwarf_regs[13] = REG(R13);
> -               dwarf_regs[14] = REG(R14);
> -               dwarf_regs[15] = REG(R15);
> -               dwarf_regs[16] = REG(IP);
> -               nregs = 17;
> -       }
> -
> -       return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
> -}
> diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
> index c25190cdceb4..055dab921442 100644
> --- a/tools/perf/util/unwind-libdw.c
> +++ b/tools/perf/util/unwind-libdw.c
> @@ -6,6 +6,7 @@
>  #include <errno.h>
>  #include "debug.h"
>  #include "dso.h"
> +#include <dwarf-regs.h>
>  #include "unwind.h"
>  #include "unwind-libdw.h"
>  #include "machine.h"
> @@ -225,6 +226,59 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
>         return true;
>  }
>
> +static bool libdw_set_initial_registers_generic(Dwfl_Thread *thread, void *arg)
> +{
> +       struct unwind_info *ui = arg;
> +       struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
> +       Dwarf_Word *dwarf_regs;
> +       int max_dwarf_reg = 0;
> +       bool ret;
> +       uint16_t e_machine = ui->e_machine;
> +       int e_flags = 0;
> +       uint64_t ip_perf_reg = perf_arch_reg_ip(e_machine);
> +       Dwarf_Word val = 0;
> +
> +
> +       /*
> +        * For every possible perf register in the bitmap determine the dwarf
> +        * register and use to compute the max.
> +        */
> +       for (int perf_reg = 0; perf_reg < 64; perf_reg++) {
> +               if (user_regs->mask & (1ULL << perf_reg)) {
> +                       int dwarf_reg =
> +                               get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
> +                                                                e_flags,
> +                                                                /*only_libdw_supported=*/true);
> +                       if (dwarf_reg > max_dwarf_reg)
> +                               max_dwarf_reg = dwarf_reg;
> +               }
> +       }
> +
> +       dwarf_regs = calloc(max_dwarf_reg + 1, sizeof(*dwarf_regs));
> +       if (!dwarf_regs)
> +               return false;
> +
> +       for (int perf_reg = 0; perf_reg < 64; perf_reg++) {
> +               if (user_regs->mask & (1ULL << perf_reg)) {
> +                       int dwarf_reg =
> +                               get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
> +                                                                e_flags,
> +                                                                /*only_libdw_supported=*/true);
> +                       if (dwarf_reg >= 0) {
> +                               val = 0;
> +                               if (perf_reg_value(&val, user_regs, perf_reg) == 0)
> +                                       dwarf_regs[dwarf_reg] = val;
> +                       }
> +               }
> +       }
> +       if (perf_reg_value(&val, user_regs, ip_perf_reg) == 0)
> +               dwfl_thread_state_register_pc(thread, val);
> +
> +       ret = dwfl_thread_state_registers(thread, 0, max_dwarf_reg + 1, dwarf_regs);

I always forget something. There's possibly a TODO/alternative here
where rather than computing the max dwarf register, copying the
registers and calling dwfl_thread_state_registers, the code could read
the register then call dwfl_thread_state_registers just for that 1
register. The code as it is here is closest to the code as it was
before the change, but removing a memory allocation/free may have some
benefit.

Thanks,
Ian

> +       free(dwarf_regs);
> +       return ret;
> +}
> +
>  #define DEFINE_DWFL_THREAD_CALLBACKS(arch)                           \
>  static const Dwfl_Thread_Callbacks callbacks_##arch = {              \
>         .next_thread           = next_thread,                        \
> @@ -232,7 +286,12 @@ static const Dwfl_Thread_Callbacks callbacks_##arch = {              \
>         .set_initial_registers = libdw_set_initial_registers_##arch, \
>  }
>
> -DEFINE_DWFL_THREAD_CALLBACKS(x86);
> +static const Dwfl_Thread_Callbacks callbacks_generic = {
> +       .next_thread           = next_thread,
> +       .memory_read           = memory_read,
> +       .set_initial_registers = libdw_set_initial_registers_generic,
> +};
> +
>  DEFINE_DWFL_THREAD_CALLBACKS(arm);
>  DEFINE_DWFL_THREAD_CALLBACKS(arm64);
>  DEFINE_DWFL_THREAD_CALLBACKS(csky);
> @@ -257,12 +316,8 @@ static const Dwfl_Thread_Callbacks *get_thread_callbacks(const char *arch)
>                 return &callbacks_riscv;
>         else if (!strcmp(arch, "s390"))
>                 return &callbacks_s390;
> -       else if (!strcmp(arch, "x86"))
> -               return &callbacks_x86;
>
> -       pr_err("Fail to get thread callbacks for arch %s, returns NULL\n",
> -              arch);
> -       return NULL;
> +       return &callbacks_generic;
>  }
>
>  static int
> @@ -301,6 +356,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
>                         bool best_effort)
>  {
>         struct machine *machine = maps__machine(thread__maps(thread));
> +       uint16_t e_machine = thread__e_machine(thread, machine);
>         struct unwind_info *ui, ui_buf = {
>                 .sample         = data,
>                 .thread         = thread,
> @@ -308,9 +364,9 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
>                 .cb             = cb,
>                 .arg            = arg,
>                 .max_stack      = max_stack,
> +               .e_machine      = e_machine,
>                 .best_effort    = best_effort
>         };
> -       uint16_t e_machine = thread__e_machine(thread, machine);
>         const char *arch = perf_env__arch(machine->env);
>         Dwarf_Word ip;
>         int err = -EINVAL, i;
> diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
> index 574b29848cce..496e5898e7ef 100644
> --- a/tools/perf/util/unwind-libdw.h
> +++ b/tools/perf/util/unwind-libdw.h
> @@ -9,7 +9,6 @@ struct machine;
>  struct perf_sample;
>  struct thread;
>
> -bool libdw_set_initial_registers_x86(Dwfl_Thread *thread, void *arg);
>  bool libdw_set_initial_registers_arm(Dwfl_Thread *thread, void *arg);
>  bool libdw_set_initial_registers_arm64(Dwfl_Thread *thread, void *arg);
>  bool libdw_set_initial_registers_csky(Dwfl_Thread *thread, void *arg);
> @@ -28,6 +27,7 @@ struct unwind_info {
>         void                    *arg;
>         int                     max_stack;
>         int                     idx;
> +       uint16_t                e_machine;
>         bool                    best_effort;
>         struct unwind_entry     entries[];
>  };
> --
> 2.52.0.457.g6b5491de43-goog
>