1
The following changes since commit fb7e7990342e59cf67dbd895c1a1e3fb1741df7a:
1
Version 3: Rebase and fix a minor patch conflict.
2
2
3
tests/qtest/qom-test: Do not print tested properties by default (2023-01-16 15:00:57 +0000)
3
4
r~
5
6
7
The following changes since commit c6f5e042d89e79206cd1ce5525d3df219f13c3cc:
8
9
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210913-3' into staging (2021-09-13 21:06:15 +0100)
4
10
5
are available in the Git repository at:
11
are available in the Git repository at:
6
12
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230116
13
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210914
8
14
9
for you to fetch changes up to 61710a7e23a63546da0071ea32adb96476fa5d07:
15
for you to fetch changes up to a5b759b6dca7daf87fa5007a7f5784bf22f3830f:
10
16
11
accel/tcg: Split out cpu_exec_{setjmp,loop} (2023-01-16 10:14:12 -1000)
17
tcg/arm: More use of the TCGReg enum (2021-09-14 07:59:43 -0700)
12
18
13
----------------------------------------------------------------
19
----------------------------------------------------------------
14
- Reorg cpu_tb_exec around setjmp.
20
Fix translation race condition for user-only.
15
- Use __attribute__((target)) for buffer_is_zero.
21
Fix tcg/i386 encoding for VPSLLVQ, VPSRLVQ.
16
- Add perfmap and jitdump for perf support.
22
Fix tcg/arm tcg_out_vec_op signature.
23
Fix tcg/ppc (32bit) build with clang.
24
Remove dupluate TCG_KICK_PERIOD definition.
25
Remove unused tcg_global_reg_new.
26
Restrict cpu_exec_interrupt and its callees to sysemu.
27
Cleanups for tcg/arm.
17
28
18
----------------------------------------------------------------
29
----------------------------------------------------------------
30
Bin Meng (1):
31
tcg: Remove tcg_global_reg_new defines
32
19
Ilya Leoshkevich (3):
33
Ilya Leoshkevich (3):
20
linux-user: Clean up when exiting due to a signal
34
accel/tcg: Add DisasContextBase argument to translator_ld*
21
accel/tcg: Add debuginfo support
35
accel/tcg: Clear PAGE_WRITE before translation
22
tcg: add perfmap and jitdump
36
accel/tcg/user-exec: Fix read-modify-write of code on s390 hosts
23
37
24
Richard Henderson (2):
38
Jose R. Ziviani (1):
25
util/bufferiszero: Use __attribute__((target)) for avx2/avx512
39
tcg/arm: Fix tcg_out_vec_op function signature
26
accel/tcg: Split out cpu_exec_{setjmp,loop}
27
40
28
docs/devel/tcg.rst | 23 +++
41
Luc Michel (1):
29
meson.build | 16 +-
42
accel/tcg: remove redundant TCG_KICK_PERIOD define
30
accel/tcg/debuginfo.h | 77 ++++++++++
43
31
accel/tcg/perf.h | 49 ++++++
44
Philippe Mathieu-Daudé (25):
32
accel/tcg/cpu-exec.c | 111 +++++++-------
45
target/avr: Remove pointless use of CONFIG_USER_ONLY definition
33
accel/tcg/debuginfo.c | 96 ++++++++++++
46
target/i386: Restrict sysemu-only fpu_helper helpers
34
accel/tcg/perf.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++
47
target/i386: Simplify TARGET_X86_64 #ifdef'ry
35
accel/tcg/translate-all.c | 7 +
48
target/xtensa: Restrict do_transaction_failed() to sysemu
36
hw/core/loader.c | 5 +
49
accel/tcg: Rename user-mode do_interrupt hack as fake_user_interrupt
37
linux-user/elfload.c | 3 +
50
target/alpha: Restrict cpu_exec_interrupt() handler to sysemu
38
linux-user/exit.c | 2 +
51
target/arm: Restrict cpu_exec_interrupt() handler to sysemu
39
linux-user/main.c | 15 ++
52
target/cris: Restrict cpu_exec_interrupt() handler to sysemu
40
linux-user/signal.c | 8 +-
53
target/hppa: Restrict cpu_exec_interrupt() handler to sysemu
41
softmmu/vl.c | 11 ++
54
target/i386: Restrict cpu_exec_interrupt() handler to sysemu
42
tcg/tcg.c | 2 +
55
target/i386: Move x86_cpu_exec_interrupt() under sysemu/ folder
43
util/bufferiszero.c | 41 +----
56
target/m68k: Restrict cpu_exec_interrupt() handler to sysemu
44
accel/tcg/meson.build | 2 +
57
target/microblaze: Restrict cpu_exec_interrupt() handler to sysemu
45
linux-user/meson.build | 1 +
58
target/mips: Restrict cpu_exec_interrupt() handler to sysemu
46
qemu-options.hx | 20 +++
59
target/nios2: Restrict cpu_exec_interrupt() handler to sysemu
47
19 files changed, 763 insertions(+), 101 deletions(-)
60
target/openrisc: Restrict cpu_exec_interrupt() handler to sysemu
48
create mode 100644 accel/tcg/debuginfo.h
61
target/ppc: Restrict cpu_exec_interrupt() handler to sysemu
49
create mode 100644 accel/tcg/perf.h
62
target/riscv: Restrict cpu_exec_interrupt() handler to sysemu
50
create mode 100644 accel/tcg/debuginfo.c
63
target/sh4: Restrict cpu_exec_interrupt() handler to sysemu
51
create mode 100644 accel/tcg/perf.c
64
target/sparc: Restrict cpu_exec_interrupt() handler to sysemu
65
target/rx: Restrict cpu_exec_interrupt() handler to sysemu
66
target/xtensa: Restrict cpu_exec_interrupt() handler to sysemu
67
accel/tcg: Restrict TCGCPUOps::cpu_exec_interrupt() to sysemu
68
user: Remove cpu_get_pic_interrupt() stubs
69
user: Mark cpu_loop() with noreturn attribute
70
71
Richard Henderson (13):
72
tcg/i386: Split P_VEXW from P_REXW
73
tcg/ppc: Replace TCG_TARGET_CALL_DARWIN with _CALL_DARWIN
74
tcg/ppc: Ensure _CALL_SYSV is set for 32-bit ELF
75
tcg/arm: Remove fallback definition of __ARM_ARCH
76
tcg/arm: Standardize on tcg_out_<branch>_{reg,imm}
77
tcg/arm: Simplify use_armv5t_instructions
78
tcg/arm: Support armv4t in tcg_out_goto and tcg_out_call
79
tcg/arm: Split out tcg_out_ldstm
80
tcg/arm: Simplify usage of encode_imm
81
tcg/arm: Drop inline markers
82
tcg/arm: Give enum arm_cond_code_e a typedef and use it
83
tcg/arm: More use of the ARMInsn enum
84
tcg/arm: More use of the TCGReg enum
85
86
bsd-user/qemu.h | 2 +-
87
include/exec/translate-all.h | 1 +
88
include/exec/translator.h | 44 +--
89
include/hw/core/tcg-cpu-ops.h | 26 +-
90
include/tcg/tcg-op.h | 2 -
91
linux-user/qemu.h | 2 +-
92
target/alpha/cpu.h | 2 +-
93
target/arm/arm_ldst.h | 12 +-
94
target/arm/cpu.h | 3 +-
95
target/cris/cpu.h | 2 +-
96
target/hppa/cpu.h | 4 +-
97
target/i386/cpu.h | 3 +
98
target/i386/tcg/helper-tcg.h | 2 +
99
target/m68k/cpu.h | 2 +
100
target/microblaze/cpu.h | 2 +
101
target/mips/tcg/tcg-internal.h | 5 +-
102
target/openrisc/cpu.h | 5 +-
103
target/ppc/cpu.h | 4 +-
104
target/riscv/cpu.h | 2 +-
105
target/rx/cpu.h | 2 +
106
target/sh4/cpu.h | 4 +-
107
target/xtensa/cpu.h | 2 +
108
tcg/arm/tcg-target.h | 27 +-
109
accel/tcg/cpu-exec.c | 14 +-
110
accel/tcg/tcg-accel-ops-rr.c | 2 -
111
accel/tcg/translate-all.c | 59 ++--
112
accel/tcg/translator.c | 39 +++
113
accel/tcg/user-exec.c | 48 ++-
114
bsd-user/i386/target_arch_cpu.c | 5 -
115
bsd-user/x86_64/target_arch_cpu.c | 5 -
116
linux-user/main.c | 7 -
117
target/alpha/cpu.c | 2 +-
118
target/alpha/helper.c | 5 +-
119
target/alpha/translate.c | 2 +-
120
target/arm/cpu.c | 7 +-
121
target/arm/cpu_tcg.c | 6 +-
122
target/arm/translate-a64.c | 2 +-
123
target/arm/translate.c | 9 +-
124
target/avr/cpu.c | 3 -
125
target/cris/cpu.c | 4 +-
126
target/cris/helper.c | 17 +-
127
target/hexagon/translate.c | 3 +-
128
target/hppa/cpu.c | 2 +-
129
target/hppa/int_helper.c | 7 +-
130
target/hppa/translate.c | 5 +-
131
target/i386/tcg/seg_helper.c | 74 +----
132
target/i386/tcg/sysemu/seg_helper.c | 62 ++++
133
target/i386/tcg/tcg-cpu.c | 8 +-
134
target/i386/tcg/translate.c | 10 +-
135
target/m68k/cpu.c | 2 +-
136
target/m68k/op_helper.c | 16 +-
137
target/m68k/translate.c | 2 +-
138
target/microblaze/cpu.c | 2 +-
139
target/microblaze/helper.c | 13 +-
140
target/mips/cpu.c | 2 +-
141
target/mips/tcg/exception.c | 18 --
142
target/mips/tcg/sysemu/tlb_helper.c | 18 ++
143
target/mips/tcg/translate.c | 8 +-
144
target/mips/tcg/user/tlb_helper.c | 5 -
145
target/nios2/cpu.c | 5 +-
146
target/openrisc/cpu.c | 2 +-
147
target/openrisc/interrupt.c | 2 -
148
target/openrisc/translate.c | 2 +-
149
target/ppc/cpu_init.c | 2 +-
150
target/ppc/excp_helper.c | 21 +-
151
target/ppc/translate.c | 5 +-
152
target/riscv/cpu.c | 2 +-
153
target/riscv/cpu_helper.c | 5 -
154
target/riscv/translate.c | 5 +-
155
target/rx/cpu.c | 2 +-
156
target/rx/helper.c | 4 +
157
target/s390x/tcg/translate.c | 16 +-
158
target/sh4/cpu.c | 2 +-
159
target/sh4/helper.c | 9 +-
160
target/sh4/translate.c | 4 +-
161
target/sparc/cpu.c | 4 +-
162
target/sparc/translate.c | 2 +-
163
target/xtensa/cpu.c | 2 +-
164
target/xtensa/exc_helper.c | 7 +-
165
target/xtensa/translate.c | 5 +-
166
target/mips/tcg/micromips_translate.c.inc | 2 +-
167
target/mips/tcg/mips16e_translate.c.inc | 4 +-
168
target/mips/tcg/nanomips_translate.c.inc | 4 +-
169
tcg/arm/tcg-target.c.inc | 517 ++++++++++++++++--------------
170
tcg/i386/tcg-target.c.inc | 13 +-
171
tcg/ppc/tcg-target.c.inc | 25 +-
172
target/openrisc/meson.build | 6 +-
173
87 files changed, 702 insertions(+), 630 deletions(-)
174
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
2
3
When exiting due to an exit() syscall, qemu-user calls
4
preexit_cleanup(), but this is currently not the case when exiting due
5
to a signal. This leads to various buffers not being flushed (e.g.,
6
for gprof, for gcov, and for the upcoming perf support).
7
8
Add the missing call.
9
10
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
3
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
[rth: Split out of a larger patch.]
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-Id: <20230112152013.125680-2-iii@linux.ibm.com>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
6
---
16
linux-user/signal.c | 8 +++++---
7
include/exec/translator.h | 9 +++++----
17
1 file changed, 5 insertions(+), 3 deletions(-)
8
target/arm/arm_ldst.h | 12 ++++++------
9
target/alpha/translate.c | 2 +-
10
target/arm/translate-a64.c | 2 +-
11
target/arm/translate.c | 9 +++++----
12
target/hexagon/translate.c | 3 ++-
13
target/hppa/translate.c | 2 +-
14
target/i386/tcg/translate.c | 10 +++++-----
15
target/m68k/translate.c | 2 +-
16
target/mips/tcg/translate.c | 8 ++++----
17
target/openrisc/translate.c | 2 +-
18
target/ppc/translate.c | 5 +++--
19
target/riscv/translate.c | 5 +++--
20
target/s390x/tcg/translate.c | 16 +++++++++-------
21
target/sh4/translate.c | 4 ++--
22
target/sparc/translate.c | 2 +-
23
target/xtensa/translate.c | 5 +++--
24
target/mips/tcg/micromips_translate.c.inc | 2 +-
25
target/mips/tcg/mips16e_translate.c.inc | 4 ++--
26
target/mips/tcg/nanomips_translate.c.inc | 4 ++--
27
20 files changed, 58 insertions(+), 50 deletions(-)
18
28
19
diff --git a/linux-user/signal.c b/linux-user/signal.c
29
diff --git a/include/exec/translator.h b/include/exec/translator.h
20
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
21
--- a/linux-user/signal.c
31
--- a/include/exec/translator.h
22
+++ b/linux-user/signal.c
32
+++ b/include/exec/translator.h
23
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
33
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
24
34
25
/* abort execution with signal */
35
#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \
26
static G_NORETURN
36
static inline type \
27
-void dump_core_and_abort(int target_sig)
37
- fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap) \
28
+void dump_core_and_abort(CPUArchState *cpu_env, int target_sig)
38
+ fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
29
{
39
+ abi_ptr pc, bool do_swap) \
30
CPUState *cpu = thread_cpu;
40
{ \
31
CPUArchState *env = cpu->env_ptr;
41
type ret = load_fn(env, pc); \
32
@@ -XXX,XX +XXX,XX @@ void dump_core_and_abort(int target_sig)
42
if (do_swap) { \
33
target_sig, strsignal(host_sig), "core dumped" );
43
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
34
}
44
plugin_insn_append(&ret, sizeof(ret)); \
35
45
return ret; \
36
+ preexit_cleanup(cpu_env, 128 + target_sig);
46
} \
37
+
47
- \
38
/* The proper exit code for dying from an uncaught signal is
48
- static inline type fullname(CPUArchState *env, abi_ptr pc) \
39
* -<signal>. The kernel doesn't allow exit() or _exit() to pass
49
+ static inline type fullname(CPUArchState *env, \
40
* a negative value. To get the proper exit code we need to
50
+ DisasContextBase *dcbase, abi_ptr pc) \
41
@@ -XXX,XX +XXX,XX @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
51
{ \
42
sig != TARGET_SIGURG &&
52
- return fullname ## _swap(env, pc, false); \
43
sig != TARGET_SIGWINCH &&
53
+ return fullname ## _swap(env, dcbase, pc, false); \
44
sig != TARGET_SIGCONT) {
54
}
45
- dump_core_and_abort(sig);
55
46
+ dump_core_and_abort(cpu_env, sig);
56
GEN_TRANSLATOR_LD(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)
47
}
57
diff --git a/target/arm/arm_ldst.h b/target/arm/arm_ldst.h
48
} else if (handler == TARGET_SIG_IGN) {
58
index XXXXXXX..XXXXXXX 100644
49
/* ignore sig */
59
--- a/target/arm/arm_ldst.h
50
} else if (handler == TARGET_SIG_ERR) {
60
+++ b/target/arm/arm_ldst.h
51
- dump_core_and_abort(sig);
61
@@ -XXX,XX +XXX,XX @@
52
+ dump_core_and_abort(cpu_env, sig);
62
#include "qemu/bswap.h"
63
64
/* Load an instruction and return it in the standard little-endian order */
65
-static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
66
- bool sctlr_b)
67
+static inline uint32_t arm_ldl_code(CPUARMState *env, DisasContextBase *s,
68
+ target_ulong addr, bool sctlr_b)
69
{
70
- return translator_ldl_swap(env, addr, bswap_code(sctlr_b));
71
+ return translator_ldl_swap(env, s, addr, bswap_code(sctlr_b));
72
}
73
74
/* Ditto, for a halfword (Thumb) instruction */
75
-static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
76
- bool sctlr_b)
77
+static inline uint16_t arm_lduw_code(CPUARMState *env, DisasContextBase* s,
78
+ target_ulong addr, bool sctlr_b)
79
{
80
#ifndef CONFIG_USER_ONLY
81
/* In big-endian (BE32) mode, adjacent Thumb instructions have been swapped
82
@@ -XXX,XX +XXX,XX @@ static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
83
addr ^= 2;
84
}
85
#endif
86
- return translator_lduw_swap(env, addr, bswap_code(sctlr_b));
87
+ return translator_lduw_swap(env, s, addr, bswap_code(sctlr_b));
88
}
89
90
#endif
91
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/alpha/translate.c
94
+++ b/target/alpha/translate.c
95
@@ -XXX,XX +XXX,XX @@ static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
96
{
97
DisasContext *ctx = container_of(dcbase, DisasContext, base);
98
CPUAlphaState *env = cpu->env_ptr;
99
- uint32_t insn = translator_ldl(env, ctx->base.pc_next);
100
+ uint32_t insn = translator_ldl(env, &ctx->base, ctx->base.pc_next);
101
102
ctx->base.pc_next += 4;
103
ctx->base.is_jmp = translate_one(ctx, insn);
104
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/translate-a64.c
107
+++ b/target/arm/translate-a64.c
108
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
109
}
110
111
s->pc_curr = s->base.pc_next;
112
- insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
113
+ insn = arm_ldl_code(env, &s->base, s->base.pc_next, s->sctlr_b);
114
s->insn = insn;
115
s->base.pc_next += 4;
116
117
diff --git a/target/arm/translate.c b/target/arm/translate.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/arm/translate.c
120
+++ b/target/arm/translate.c
121
@@ -XXX,XX +XXX,XX @@ static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
122
* boundary, so we cross the page if the first 16 bits indicate
123
* that this is a 32 bit insn.
124
*/
125
- uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
126
+ uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
127
128
return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
129
}
130
@@ -XXX,XX +XXX,XX @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
131
}
132
133
dc->pc_curr = dc->base.pc_next;
134
- insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
135
+ insn = arm_ldl_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
136
dc->insn = insn;
137
dc->base.pc_next += 4;
138
disas_arm_insn(dc, insn);
139
@@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
140
}
141
142
dc->pc_curr = dc->base.pc_next;
143
- insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
144
+ insn = arm_lduw_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
145
is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
146
dc->base.pc_next += 2;
147
if (!is_16bit) {
148
- uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
149
+ uint32_t insn2 = arm_lduw_code(env, &dc->base, dc->base.pc_next,
150
+ dc->sctlr_b);
151
152
insn = insn << 16 | insn2;
153
dc->base.pc_next += 2;
154
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
155
index XXXXXXX..XXXXXXX 100644
156
--- a/target/hexagon/translate.c
157
+++ b/target/hexagon/translate.c
158
@@ -XXX,XX +XXX,XX @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
159
memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
160
for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
161
words[nwords] =
162
- translator_ldl(env, ctx->base.pc_next + nwords * sizeof(uint32_t));
163
+ translator_ldl(env, &ctx->base,
164
+ ctx->base.pc_next + nwords * sizeof(uint32_t));
165
found_end = is_packet_end(words[nwords]);
166
}
167
if (!found_end) {
168
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
169
index XXXXXXX..XXXXXXX 100644
170
--- a/target/hppa/translate.c
171
+++ b/target/hppa/translate.c
172
@@ -XXX,XX +XXX,XX @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
173
{
174
/* Always fetch the insn, even if nullified, so that we check
175
the page permissions for execute. */
176
- uint32_t insn = translator_ldl(env, ctx->base.pc_next);
177
+ uint32_t insn = translator_ldl(env, &ctx->base, ctx->base.pc_next);
178
179
/* Set up the IA queue for the next insn.
180
This will be overwritten by a branch. */
181
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/target/i386/tcg/translate.c
184
+++ b/target/i386/tcg/translate.c
185
@@ -XXX,XX +XXX,XX @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
186
187
static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
188
{
189
- return translator_ldub(env, advance_pc(env, s, 1));
190
+ return translator_ldub(env, &s->base, advance_pc(env, s, 1));
191
}
192
193
static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
194
{
195
- return translator_ldsw(env, advance_pc(env, s, 2));
196
+ return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
197
}
198
199
static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
200
{
201
- return translator_lduw(env, advance_pc(env, s, 2));
202
+ return translator_lduw(env, &s->base, advance_pc(env, s, 2));
203
}
204
205
static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
206
{
207
- return translator_ldl(env, advance_pc(env, s, 4));
208
+ return translator_ldl(env, &s->base, advance_pc(env, s, 4));
209
}
210
211
#ifdef TARGET_X86_64
212
static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
213
{
214
- return translator_ldq(env, advance_pc(env, s, 8));
215
+ return translator_ldq(env, &s->base, advance_pc(env, s, 8));
216
}
217
#endif
218
219
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/target/m68k/translate.c
222
+++ b/target/m68k/translate.c
223
@@ -XXX,XX +XXX,XX @@ static TCGv gen_ldst(DisasContext *s, int opsize, TCGv addr, TCGv val,
224
static inline uint16_t read_im16(CPUM68KState *env, DisasContext *s)
225
{
226
uint16_t im;
227
- im = translator_lduw(env, s->pc);
228
+ im = translator_lduw(env, &s->base, s->pc);
229
s->pc += 2;
230
return im;
231
}
232
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
233
index XXXXXXX..XXXXXXX 100644
234
--- a/target/mips/tcg/translate.c
235
+++ b/target/mips/tcg/translate.c
236
@@ -XXX,XX +XXX,XX @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
237
238
is_slot = ctx->hflags & MIPS_HFLAG_BMASK;
239
if (ctx->insn_flags & ISA_NANOMIPS32) {
240
- ctx->opcode = translator_lduw(env, ctx->base.pc_next);
241
+ ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
242
insn_bytes = decode_isa_nanomips(env, ctx);
243
} else if (!(ctx->hflags & MIPS_HFLAG_M16)) {
244
- ctx->opcode = translator_ldl(env, ctx->base.pc_next);
245
+ ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
246
insn_bytes = 4;
247
decode_opc(env, ctx);
248
} else if (ctx->insn_flags & ASE_MICROMIPS) {
249
- ctx->opcode = translator_lduw(env, ctx->base.pc_next);
250
+ ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
251
insn_bytes = decode_isa_micromips(env, ctx);
252
} else if (ctx->insn_flags & ASE_MIPS16) {
253
- ctx->opcode = translator_lduw(env, ctx->base.pc_next);
254
+ ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
255
insn_bytes = decode_ase_mips16e(env, ctx);
53
} else {
256
} else {
54
/* compute the blocked signals during the handler execution */
257
gen_reserved_instruction(ctx);
55
sigset_t *blocked_set;
258
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
259
index XXXXXXX..XXXXXXX 100644
260
--- a/target/openrisc/translate.c
261
+++ b/target/openrisc/translate.c
262
@@ -XXX,XX +XXX,XX @@ static void openrisc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
263
{
264
DisasContext *dc = container_of(dcbase, DisasContext, base);
265
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
266
- uint32_t insn = translator_ldl(&cpu->env, dc->base.pc_next);
267
+ uint32_t insn = translator_ldl(&cpu->env, &dc->base, dc->base.pc_next);
268
269
if (!decode(dc, insn)) {
270
gen_illegal_exception(dc);
271
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
272
index XXXXXXX..XXXXXXX 100644
273
--- a/target/ppc/translate.c
274
+++ b/target/ppc/translate.c
275
@@ -XXX,XX +XXX,XX @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
276
ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
277
278
ctx->cia = pc = ctx->base.pc_next;
279
- insn = translator_ldl_swap(env, pc, need_byteswap(ctx));
280
+ insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx));
281
ctx->base.pc_next = pc += 4;
282
283
if (!is_prefix_insn(ctx, insn)) {
284
@@ -XXX,XX +XXX,XX @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
285
gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_INSN);
286
ok = true;
287
} else {
288
- uint32_t insn2 = translator_ldl_swap(env, pc, need_byteswap(ctx));
289
+ uint32_t insn2 = translator_ldl_swap(env, dcbase, pc,
290
+ need_byteswap(ctx));
291
ctx->base.pc_next = pc += 4;
292
ok = decode_insn64(ctx, deposit64(insn2, 32, 32, insn));
293
}
294
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
295
index XXXXXXX..XXXXXXX 100644
296
--- a/target/riscv/translate.c
297
+++ b/target/riscv/translate.c
298
@@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
299
} else {
300
uint32_t opcode32 = opcode;
301
opcode32 = deposit32(opcode32, 16, 16,
302
- translator_lduw(env, ctx->base.pc_next + 2));
303
+ translator_lduw(env, &ctx->base,
304
+ ctx->base.pc_next + 2));
305
ctx->pc_succ_insn = ctx->base.pc_next + 4;
306
if (!decode_insn32(ctx, opcode32)) {
307
gen_exception_illegal(ctx);
308
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
309
{
310
DisasContext *ctx = container_of(dcbase, DisasContext, base);
311
CPURISCVState *env = cpu->env_ptr;
312
- uint16_t opcode16 = translator_lduw(env, ctx->base.pc_next);
313
+ uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
314
315
decode_opc(env, ctx, opcode16);
316
ctx->base.pc_next = ctx->pc_succ_insn;
317
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/s390x/tcg/translate.c
320
+++ b/target/s390x/tcg/translate.c
321
@@ -XXX,XX +XXX,XX @@ static void update_cc_op(DisasContext *s)
322
}
323
}
324
325
-static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc)
326
+static inline uint64_t ld_code2(CPUS390XState *env, DisasContext *s,
327
+ uint64_t pc)
328
{
329
- return (uint64_t)cpu_lduw_code(env, pc);
330
+ return (uint64_t)translator_lduw(env, &s->base, pc);
331
}
332
333
-static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc)
334
+static inline uint64_t ld_code4(CPUS390XState *env, DisasContext *s,
335
+ uint64_t pc)
336
{
337
- return (uint64_t)(uint32_t)cpu_ldl_code(env, pc);
338
+ return (uint64_t)(uint32_t)translator_ldl(env, &s->base, pc);
339
}
340
341
static int get_mem_index(DisasContext *s)
342
@@ -XXX,XX +XXX,XX @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s)
343
ilen = s->ex_value & 0xf;
344
op = insn >> 56;
345
} else {
346
- insn = ld_code2(env, pc);
347
+ insn = ld_code2(env, s, pc);
348
op = (insn >> 8) & 0xff;
349
ilen = get_ilen(op);
350
switch (ilen) {
351
@@ -XXX,XX +XXX,XX @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s)
352
insn = insn << 48;
353
break;
354
case 4:
355
- insn = ld_code4(env, pc) << 32;
356
+ insn = ld_code4(env, s, pc) << 32;
357
break;
358
case 6:
359
- insn = (insn << 48) | (ld_code4(env, pc + 2) << 16);
360
+ insn = (insn << 48) | (ld_code4(env, s, pc + 2) << 16);
361
break;
362
default:
363
g_assert_not_reached();
364
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
365
index XXXXXXX..XXXXXXX 100644
366
--- a/target/sh4/translate.c
367
+++ b/target/sh4/translate.c
368
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
369
370
/* Read all of the insns for the region. */
371
for (i = 0; i < max_insns; ++i) {
372
- insns[i] = translator_lduw(env, pc + i * 2);
373
+ insns[i] = translator_lduw(env, &ctx->base, pc + i * 2);
374
}
375
376
ld_adr = ld_dst = ld_mop = -1;
377
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
378
}
379
#endif
380
381
- ctx->opcode = translator_lduw(env, ctx->base.pc_next);
382
+ ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
383
decode_opc(ctx);
384
ctx->base.pc_next += 2;
385
}
386
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
387
index XXXXXXX..XXXXXXX 100644
388
--- a/target/sparc/translate.c
389
+++ b/target/sparc/translate.c
390
@@ -XXX,XX +XXX,XX @@ static void sparc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
391
CPUSPARCState *env = cs->env_ptr;
392
unsigned int insn;
393
394
- insn = translator_ldl(env, dc->pc);
395
+ insn = translator_ldl(env, &dc->base, dc->pc);
396
dc->base.pc_next += 4;
397
disas_sparc_insn(dc, insn);
398
399
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
400
index XXXXXXX..XXXXXXX 100644
401
--- a/target/xtensa/translate.c
402
+++ b/target/xtensa/translate.c
403
@@ -XXX,XX +XXX,XX @@ static int arg_copy_compare(const void *a, const void *b)
404
static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
405
{
406
xtensa_isa isa = dc->config->isa;
407
- unsigned char b[MAX_INSN_LENGTH] = {translator_ldub(env, dc->pc)};
408
+ unsigned char b[MAX_INSN_LENGTH] = {translator_ldub(env, &dc->base,
409
+ dc->pc)};
410
unsigned len = xtensa_op0_insn_len(dc, b[0]);
411
xtensa_format fmt;
412
int slot, slots;
413
@@ -XXX,XX +XXX,XX @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
414
415
dc->base.pc_next = dc->pc + len;
416
for (i = 1; i < len; ++i) {
417
- b[i] = translator_ldub(env, dc->pc + i);
418
+ b[i] = translator_ldub(env, &dc->base, dc->pc + i);
419
}
420
xtensa_insnbuf_from_chars(isa, dc->insnbuf, b, len);
421
fmt = xtensa_format_decode(isa, dc->insnbuf);
422
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
423
index XXXXXXX..XXXXXXX 100644
424
--- a/target/mips/tcg/micromips_translate.c.inc
425
+++ b/target/mips/tcg/micromips_translate.c.inc
426
@@ -XXX,XX +XXX,XX @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
427
uint32_t op, minor, minor2, mips32_op;
428
uint32_t cond, fmt, cc;
429
430
- insn = translator_lduw(env, ctx->base.pc_next + 2);
431
+ insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
432
ctx->opcode = (ctx->opcode << 16) | insn;
433
434
rt = (ctx->opcode >> 21) & 0x1f;
435
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
436
index XXXXXXX..XXXXXXX 100644
437
--- a/target/mips/tcg/mips16e_translate.c.inc
438
+++ b/target/mips/tcg/mips16e_translate.c.inc
439
@@ -XXX,XX +XXX,XX @@ static void decode_i64_mips16(DisasContext *ctx,
440
441
static int decode_extended_mips16_opc(CPUMIPSState *env, DisasContext *ctx)
442
{
443
- int extend = translator_lduw(env, ctx->base.pc_next + 2);
444
+ int extend = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
445
int op, rx, ry, funct, sa;
446
int16_t imm, offset;
447
448
@@ -XXX,XX +XXX,XX @@ static int decode_ase_mips16e(CPUMIPSState *env, DisasContext *ctx)
449
/* No delay slot, so just process as a normal instruction */
450
break;
451
case M16_OPC_JAL:
452
- offset = translator_lduw(env, ctx->base.pc_next + 2);
453
+ offset = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
454
offset = (((ctx->opcode & 0x1f) << 21)
455
| ((ctx->opcode >> 5) & 0x1f) << 16
456
| offset) << 2;
457
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
458
index XXXXXXX..XXXXXXX 100644
459
--- a/target/mips/tcg/nanomips_translate.c.inc
460
+++ b/target/mips/tcg/nanomips_translate.c.inc
461
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
462
int offset;
463
int imm;
464
465
- insn = translator_lduw(env, ctx->base.pc_next + 2);
466
+ insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
467
ctx->opcode = (ctx->opcode << 16) | insn;
468
469
rt = extract32(ctx->opcode, 21, 5);
470
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
471
break;
472
case NM_P48I:
473
{
474
- insn = translator_lduw(env, ctx->base.pc_next + 4);
475
+ insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 4);
476
target_long addr_off = extract32(ctx->opcode, 0, 16) | insn << 16;
477
switch (extract32(ctx->opcode, 16, 5)) {
478
case NM_LI48:
56
--
479
--
57
2.34.1
480
2.25.1
58
481
59
482
diff view generated by jsdifflib
Deleted patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
1
3
Add libdw-based functions for loading and querying debuginfo. Load
4
debuginfo from the system and the linux-user loaders.
5
6
This is useful for the upcoming perf support, which can then put
7
human-readable guest symbols instead of raw guest PCs into perfmap and
8
jitdump files.
9
10
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
11
Message-Id: <20230112152013.125680-3-iii@linux.ibm.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
meson.build | 8 ++++
15
accel/tcg/debuginfo.h | 77 +++++++++++++++++++++++++++++++++
16
accel/tcg/debuginfo.c | 96 ++++++++++++++++++++++++++++++++++++++++++
17
hw/core/loader.c | 5 +++
18
linux-user/elfload.c | 3 ++
19
accel/tcg/meson.build | 1 +
20
linux-user/meson.build | 1 +
21
7 files changed, 191 insertions(+)
22
create mode 100644 accel/tcg/debuginfo.h
23
create mode 100644 accel/tcg/debuginfo.c
24
25
diff --git a/meson.build b/meson.build
26
index XXXXXXX..XXXXXXX 100644
27
--- a/meson.build
28
+++ b/meson.build
29
@@ -XXX,XX +XXX,XX @@ if libbpf.found() and not cc.links('''
30
endif
31
endif
32
33
+# libdw
34
+libdw = dependency('libdw',
35
+ method: 'pkg-config',
36
+ kwargs: static_kwargs,
37
+ required: false)
38
+
39
#################
40
# config-host.h #
41
#################
42
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_DBUS_DISPLAY', dbus_display)
43
config_host_data.set('CONFIG_CFI', get_option('cfi'))
44
config_host_data.set('CONFIG_SELINUX', selinux.found())
45
config_host_data.set('CONFIG_XEN_BACKEND', xen.found())
46
+config_host_data.set('CONFIG_LIBDW', libdw.found())
47
if xen.found()
48
# protect from xen.version() having less than three components
49
xen_version = xen.version().split('.') + ['0', '0']
50
@@ -XXX,XX +XXX,XX @@ summary_info += {'libudev': libudev}
51
# Dummy dependency, keep .found()
52
summary_info += {'FUSE lseek': fuse_lseek.found()}
53
summary_info += {'selinux': selinux}
54
+summary_info += {'libdw': libdw}
55
summary(summary_info, bool_yn: true, section: 'Dependencies')
56
57
if not supported_cpus.contains(cpu)
58
diff --git a/accel/tcg/debuginfo.h b/accel/tcg/debuginfo.h
59
new file mode 100644
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/accel/tcg/debuginfo.h
63
@@ -XXX,XX +XXX,XX @@
64
+/*
65
+ * Debug information support.
66
+ *
67
+ * SPDX-License-Identifier: GPL-2.0-or-later
68
+ */
69
+
70
+#ifndef ACCEL_TCG_DEBUGINFO_H
71
+#define ACCEL_TCG_DEBUGINFO_H
72
+
73
+/*
74
+ * Debuginfo describing a certain address.
75
+ */
76
+struct debuginfo_query {
77
+ uint64_t address; /* Input: address. */
78
+ int flags; /* Input: debuginfo subset. */
79
+ const char *symbol; /* Symbol that the address is part of. */
80
+ uint64_t offset; /* Offset from the symbol. */
81
+ const char *file; /* Source file associated with the address. */
82
+ int line; /* Line number in the source file. */
83
+};
84
+
85
+/*
86
+ * Debuginfo subsets.
87
+ */
88
+#define DEBUGINFO_SYMBOL BIT(1)
89
+#define DEBUGINFO_LINE BIT(2)
90
+
91
+#if defined(CONFIG_TCG) && defined(CONFIG_LIBDW)
92
+/*
93
+ * Load debuginfo for the specified guest ELF image.
94
+ * Return true on success, false on failure.
95
+ */
96
+void debuginfo_report_elf(const char *name, int fd, uint64_t bias);
97
+
98
+/*
99
+ * Take the debuginfo lock.
100
+ */
101
+void debuginfo_lock(void);
102
+
103
+/*
104
+ * Fill each on N Qs with the debuginfo about Q->ADDRESS as specified by
105
+ * Q->FLAGS:
106
+ *
107
+ * - DEBUGINFO_SYMBOL: update Q->SYMBOL and Q->OFFSET. If symbol debuginfo is
108
+ * missing, then leave them as is.
109
+ * - DEBUINFO_LINE: update Q->FILE and Q->LINE. If line debuginfo is missing,
110
+ * then leave them as is.
111
+ *
112
+ * This function must be called under the debuginfo lock. The results can be
113
+ * accessed only until the debuginfo lock is released.
114
+ */
115
+void debuginfo_query(struct debuginfo_query *q, size_t n);
116
+
117
+/*
118
+ * Release the debuginfo lock.
119
+ */
120
+void debuginfo_unlock(void);
121
+#else
122
+static inline void debuginfo_report_elf(const char *image_name, int image_fd,
123
+ uint64_t load_bias)
124
+{
125
+}
126
+
127
+static inline void debuginfo_lock(void)
128
+{
129
+}
130
+
131
+static inline void debuginfo_query(struct debuginfo_query *q, size_t n)
132
+{
133
+}
134
+
135
+static inline void debuginfo_unlock(void)
136
+{
137
+}
138
+#endif
139
+
140
+#endif
141
diff --git a/accel/tcg/debuginfo.c b/accel/tcg/debuginfo.c
142
new file mode 100644
143
index XXXXXXX..XXXXXXX
144
--- /dev/null
145
+++ b/accel/tcg/debuginfo.c
146
@@ -XXX,XX +XXX,XX @@
147
+/*
148
+ * Debug information support.
149
+ *
150
+ * SPDX-License-Identifier: GPL-2.0-or-later
151
+ */
152
+
153
+#include "qemu/osdep.h"
154
+#include "qemu/lockable.h"
155
+
156
+#include <elfutils/libdwfl.h>
157
+
158
+#include "debuginfo.h"
159
+
160
+static QemuMutex lock;
161
+static Dwfl *dwfl;
162
+static const Dwfl_Callbacks dwfl_callbacks = {
163
+ .find_elf = NULL,
164
+ .find_debuginfo = dwfl_standard_find_debuginfo,
165
+ .section_address = NULL,
166
+ .debuginfo_path = NULL,
167
+};
168
+
169
+__attribute__((constructor))
170
+static void debuginfo_init(void)
171
+{
172
+ qemu_mutex_init(&lock);
173
+}
174
+
175
+void debuginfo_report_elf(const char *name, int fd, uint64_t bias)
176
+{
177
+ QEMU_LOCK_GUARD(&lock);
178
+
179
+ if (dwfl) {
180
+ dwfl_report_begin_add(dwfl);
181
+ } else {
182
+ dwfl = dwfl_begin(&dwfl_callbacks);
183
+ }
184
+
185
+ if (dwfl) {
186
+ dwfl_report_elf(dwfl, name, name, fd, bias, true);
187
+ dwfl_report_end(dwfl, NULL, NULL);
188
+ }
189
+}
190
+
191
+void debuginfo_lock(void)
192
+{
193
+ qemu_mutex_lock(&lock);
194
+}
195
+
196
+void debuginfo_query(struct debuginfo_query *q, size_t n)
197
+{
198
+ const char *symbol, *file;
199
+ Dwfl_Module *dwfl_module;
200
+ Dwfl_Line *dwfl_line;
201
+ GElf_Off dwfl_offset;
202
+ GElf_Sym dwfl_sym;
203
+ size_t i;
204
+ int line;
205
+
206
+ if (!dwfl) {
207
+ return;
208
+ }
209
+
210
+ for (i = 0; i < n; i++) {
211
+ dwfl_module = dwfl_addrmodule(dwfl, q[i].address);
212
+ if (!dwfl_module) {
213
+ continue;
214
+ }
215
+
216
+ if (q[i].flags & DEBUGINFO_SYMBOL) {
217
+ symbol = dwfl_module_addrinfo(dwfl_module, q[i].address,
218
+ &dwfl_offset, &dwfl_sym,
219
+ NULL, NULL, NULL);
220
+ if (symbol) {
221
+ q[i].symbol = symbol;
222
+ q[i].offset = dwfl_offset;
223
+ }
224
+ }
225
+
226
+ if (q[i].flags & DEBUGINFO_LINE) {
227
+ dwfl_line = dwfl_module_getsrc(dwfl_module, q[i].address);
228
+ if (dwfl_line) {
229
+ file = dwfl_lineinfo(dwfl_line, NULL, &line, 0, NULL, NULL);
230
+ if (file) {
231
+ q[i].file = file;
232
+ q[i].line = line;
233
+ }
234
+ }
235
+ }
236
+ }
237
+}
238
+
239
+void debuginfo_unlock(void)
240
+{
241
+ qemu_mutex_unlock(&lock);
242
+}
243
diff --git a/hw/core/loader.c b/hw/core/loader.c
244
index XXXXXXX..XXXXXXX 100644
245
--- a/hw/core/loader.c
246
+++ b/hw/core/loader.c
247
@@ -XXX,XX +XXX,XX @@
248
#include "hw/boards.h"
249
#include "qemu/cutils.h"
250
#include "sysemu/runstate.h"
251
+#include "accel/tcg/debuginfo.h"
252
253
#include <zlib.h>
254
255
@@ -XXX,XX +XXX,XX @@ ssize_t load_elf_ram_sym(const char *filename,
256
clear_lsb, data_swab, as, load_rom, sym_cb);
257
}
258
259
+ if (ret != ELF_LOAD_FAILED) {
260
+ debuginfo_report_elf(filename, fd, 0);
261
+ }
262
+
263
fail:
264
close(fd);
265
return ret;
266
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/linux-user/elfload.c
269
+++ b/linux-user/elfload.c
270
@@ -XXX,XX +XXX,XX @@
271
#include "qemu/selfmap.h"
272
#include "qapi/error.h"
273
#include "target_signal.h"
274
+#include "accel/tcg/debuginfo.h"
275
276
#ifdef _ARCH_PPC64
277
#undef ARCH_DLINFO
278
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
279
load_symbols(ehdr, image_fd, load_bias);
280
}
281
282
+ debuginfo_report_elf(image_name, image_fd, load_bias);
283
+
284
mmap_unlock();
285
286
close(image_fd);
287
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
288
index XXXXXXX..XXXXXXX 100644
289
--- a/accel/tcg/meson.build
290
+++ b/accel/tcg/meson.build
291
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
292
tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
293
tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
294
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
295
+tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
296
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
297
298
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
299
diff --git a/linux-user/meson.build b/linux-user/meson.build
300
index XXXXXXX..XXXXXXX 100644
301
--- a/linux-user/meson.build
302
+++ b/linux-user/meson.build
303
@@ -XXX,XX +XXX,XX @@ linux_user_ss.add(files(
304
'uname.c',
305
))
306
linux_user_ss.add(rt)
307
+linux_user_ss.add(libdw)
308
309
linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c'))
310
linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c'))
311
--
312
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
1
3
Add ability to dump /tmp/perf-<pid>.map and jit-<pid>.dump.
4
The first one allows the perf tool to map samples to each individual
5
translation block. The second one adds the ability to resolve symbol
6
names, line numbers and inspect JITed code.
7
8
Example of use:
9
10
perf record qemu-x86_64 -perfmap ./a.out
11
perf report
12
13
or
14
15
perf record -k 1 qemu-x86_64 -jitdump ./a.out
16
DEBUGINFOD_URLS= perf inject -j -i perf.data -o perf.data.jitted
17
perf report -i perf.data.jitted
18
19
Co-developed-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
20
Co-developed-by: Alex Bennée <alex.bennee@linaro.org>
21
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
22
Message-Id: <20230112152013.125680-4-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
docs/devel/tcg.rst | 23 +++
26
accel/tcg/perf.h | 49 +++++
27
accel/tcg/perf.c | 375 ++++++++++++++++++++++++++++++++++++++
28
accel/tcg/translate-all.c | 7 +
29
linux-user/exit.c | 2 +
30
linux-user/main.c | 15 ++
31
softmmu/vl.c | 11 ++
32
tcg/tcg.c | 2 +
33
accel/tcg/meson.build | 1 +
34
qemu-options.hx | 20 ++
35
10 files changed, 505 insertions(+)
36
create mode 100644 accel/tcg/perf.h
37
create mode 100644 accel/tcg/perf.c
38
39
diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
40
index XXXXXXX..XXXXXXX 100644
41
--- a/docs/devel/tcg.rst
42
+++ b/docs/devel/tcg.rst
43
@@ -XXX,XX +XXX,XX @@ memory areas instead calls out to C code for device emulation.
44
Finally, the MMU helps tracking dirty pages and pages pointed to by
45
translation blocks.
46
47
+Profiling JITted code
48
+---------------------
49
+
50
+The Linux ``perf`` tool will treat all JITted code as a single block as
51
+unlike the main code it can't use debug information to link individual
52
+program counter samples with larger functions. To overcome this
53
+limitation you can use the ``-perfmap`` or the ``-jitdump`` option to generate
54
+map files. ``-perfmap`` is lightweight and produces only guest-host mappings.
55
+``-jitdump`` additionally saves JITed code and guest debug information (if
56
+available); its output needs to be integrated with the ``perf.data`` file
57
+before the final report can be viewed.
58
+
59
+.. code::
60
+
61
+ perf record $QEMU -perfmap $REMAINING_ARGS
62
+ perf report
63
+
64
+ perf record -k 1 $QEMU -jitdump $REMAINING_ARGS
65
+ DEBUGINFOD_URLS= perf inject -j -i perf.data -o perf.data.jitted
66
+ perf report -i perf.data.jitted
67
+
68
+Note that qemu-system generates mappings only for ``-kernel`` files in ELF
69
+format.
70
diff --git a/accel/tcg/perf.h b/accel/tcg/perf.h
71
new file mode 100644
72
index XXXXXXX..XXXXXXX
73
--- /dev/null
74
+++ b/accel/tcg/perf.h
75
@@ -XXX,XX +XXX,XX @@
76
+/*
77
+ * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
78
+ *
79
+ * SPDX-License-Identifier: GPL-2.0-or-later
80
+ */
81
+
82
+#ifndef ACCEL_TCG_PERF_H
83
+#define ACCEL_TCG_PERF_H
84
+
85
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
86
+/* Start writing perf-<pid>.map. */
87
+void perf_enable_perfmap(void);
88
+
89
+/* Start writing jit-<pid>.dump. */
90
+void perf_enable_jitdump(void);
91
+
92
+/* Add information about TCG prologue to profiler maps. */
93
+void perf_report_prologue(const void *start, size_t size);
94
+
95
+/* Add information about JITted guest code to profiler maps. */
96
+void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
97
+ const void *start);
98
+
99
+/* Stop writing perf-<pid>.map and/or jit-<pid>.dump. */
100
+void perf_exit(void);
101
+#else
102
+static inline void perf_enable_perfmap(void)
103
+{
104
+}
105
+
106
+static inline void perf_enable_jitdump(void)
107
+{
108
+}
109
+
110
+static inline void perf_report_prologue(const void *start, size_t size)
111
+{
112
+}
113
+
114
+static inline void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
115
+ const void *start)
116
+{
117
+}
118
+
119
+static inline void perf_exit(void)
120
+{
121
+}
122
+#endif
123
+
124
+#endif
125
diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c
126
new file mode 100644
127
index XXXXXXX..XXXXXXX
128
--- /dev/null
129
+++ b/accel/tcg/perf.c
130
@@ -XXX,XX +XXX,XX @@
131
+/*
132
+ * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
133
+ *
134
+ * The jitdump spec can be found at [1].
135
+ *
136
+ * [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
137
+ *
138
+ * SPDX-License-Identifier: GPL-2.0-or-later
139
+ */
140
+
141
+#include "qemu/osdep.h"
142
+#include "elf.h"
143
+#include "exec/exec-all.h"
144
+#include "qemu/timer.h"
145
+#include "tcg/tcg.h"
146
+
147
+#include "debuginfo.h"
148
+#include "perf.h"
149
+
150
+static FILE *safe_fopen_w(const char *path)
151
+{
152
+ int saved_errno;
153
+ FILE *f;
154
+ int fd;
155
+
156
+ /* Delete the old file, if any. */
157
+ unlink(path);
158
+
159
+ /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
160
+ fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
161
+ if (fd == -1) {
162
+ return NULL;
163
+ }
164
+
165
+ /* Convert fd to FILE*. */
166
+ f = fdopen(fd, "w");
167
+ if (f == NULL) {
168
+ saved_errno = errno;
169
+ close(fd);
170
+ errno = saved_errno;
171
+ return NULL;
172
+ }
173
+
174
+ return f;
175
+}
176
+
177
+static FILE *perfmap;
178
+
179
+void perf_enable_perfmap(void)
180
+{
181
+ char map_file[32];
182
+
183
+ snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid());
184
+ perfmap = safe_fopen_w(map_file);
185
+ if (perfmap == NULL) {
186
+ warn_report("Could not open %s: %s, proceeding without perfmap",
187
+ map_file, strerror(errno));
188
+ }
189
+}
190
+
191
+/* Get PC and size of code JITed for guest instruction #INSN. */
192
+static void get_host_pc_size(uintptr_t *host_pc, uint16_t *host_size,
193
+ const void *start, size_t insn)
194
+{
195
+ uint16_t start_off = insn ? tcg_ctx->gen_insn_end_off[insn - 1] : 0;
196
+
197
+ if (host_pc) {
198
+ *host_pc = (uintptr_t)start + start_off;
199
+ }
200
+ if (host_size) {
201
+ *host_size = tcg_ctx->gen_insn_end_off[insn] - start_off;
202
+ }
203
+}
204
+
205
+static const char *pretty_symbol(const struct debuginfo_query *q, size_t *len)
206
+{
207
+ static __thread char buf[64];
208
+ int tmp;
209
+
210
+ if (!q->symbol) {
211
+ tmp = snprintf(buf, sizeof(buf), "guest-0x%"PRIx64, q->address);
212
+ if (len) {
213
+ *len = MIN(tmp + 1, sizeof(buf));
214
+ }
215
+ return buf;
216
+ }
217
+
218
+ if (!q->offset) {
219
+ if (len) {
220
+ *len = strlen(q->symbol) + 1;
221
+ }
222
+ return q->symbol;
223
+ }
224
+
225
+ tmp = snprintf(buf, sizeof(buf), "%s+0x%"PRIx64, q->symbol, q->offset);
226
+ if (len) {
227
+ *len = MIN(tmp + 1, sizeof(buf));
228
+ }
229
+ return buf;
230
+}
231
+
232
+static void write_perfmap_entry(const void *start, size_t insn,
233
+ const struct debuginfo_query *q)
234
+{
235
+ uint16_t host_size;
236
+ uintptr_t host_pc;
237
+
238
+ get_host_pc_size(&host_pc, &host_size, start, insn);
239
+ fprintf(perfmap, "%"PRIxPTR" %"PRIx16" %s\n",
240
+ host_pc, host_size, pretty_symbol(q, NULL));
241
+}
242
+
243
+static FILE *jitdump;
244
+
245
+#define JITHEADER_MAGIC 0x4A695444
246
+#define JITHEADER_VERSION 1
247
+
248
+struct jitheader {
249
+ uint32_t magic;
250
+ uint32_t version;
251
+ uint32_t total_size;
252
+ uint32_t elf_mach;
253
+ uint32_t pad1;
254
+ uint32_t pid;
255
+ uint64_t timestamp;
256
+ uint64_t flags;
257
+};
258
+
259
+enum jit_record_type {
260
+ JIT_CODE_LOAD = 0,
261
+ JIT_CODE_DEBUG_INFO = 2,
262
+};
263
+
264
+struct jr_prefix {
265
+ uint32_t id;
266
+ uint32_t total_size;
267
+ uint64_t timestamp;
268
+};
269
+
270
+struct jr_code_load {
271
+ struct jr_prefix p;
272
+
273
+ uint32_t pid;
274
+ uint32_t tid;
275
+ uint64_t vma;
276
+ uint64_t code_addr;
277
+ uint64_t code_size;
278
+ uint64_t code_index;
279
+};
280
+
281
+struct debug_entry {
282
+ uint64_t addr;
283
+ int lineno;
284
+ int discrim;
285
+ const char name[];
286
+};
287
+
288
+struct jr_code_debug_info {
289
+ struct jr_prefix p;
290
+
291
+ uint64_t code_addr;
292
+ uint64_t nr_entry;
293
+ struct debug_entry entries[];
294
+};
295
+
296
+static uint32_t get_e_machine(void)
297
+{
298
+ Elf64_Ehdr elf_header;
299
+ FILE *exe;
300
+ size_t n;
301
+
302
+ QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) !=
303
+ offsetof(Elf64_Ehdr, e_machine));
304
+
305
+ exe = fopen("/proc/self/exe", "r");
306
+ if (exe == NULL) {
307
+ return EM_NONE;
308
+ }
309
+
310
+ n = fread(&elf_header, sizeof(elf_header), 1, exe);
311
+ fclose(exe);
312
+ if (n != 1) {
313
+ return EM_NONE;
314
+ }
315
+
316
+ return elf_header.e_machine;
317
+}
318
+
319
+void perf_enable_jitdump(void)
320
+{
321
+ struct jitheader header;
322
+ char jitdump_file[32];
323
+ void *perf_marker;
324
+
325
+ if (!use_rt_clock) {
326
+ warn_report("CLOCK_MONOTONIC is not available, proceeding without jitdump");
327
+ return;
328
+ }
329
+
330
+ snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid());
331
+ jitdump = safe_fopen_w(jitdump_file);
332
+ if (jitdump == NULL) {
333
+ warn_report("Could not open %s: %s, proceeding without jitdump",
334
+ jitdump_file, strerror(errno));
335
+ return;
336
+ }
337
+
338
+ /*
339
+ * `perf inject` will see that the mapped file name in the corresponding
340
+ * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
341
+ * and will process it as a jitdump file.
342
+ */
343
+ perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_EXEC,
344
+ MAP_PRIVATE, fileno(jitdump), 0);
345
+ if (perf_marker == MAP_FAILED) {
346
+ warn_report("Could not map %s: %s, proceeding without jitdump",
347
+ jitdump_file, strerror(errno));
348
+ fclose(jitdump);
349
+ jitdump = NULL;
350
+ return;
351
+ }
352
+
353
+ header.magic = JITHEADER_MAGIC;
354
+ header.version = JITHEADER_VERSION;
355
+ header.total_size = sizeof(header);
356
+ header.elf_mach = get_e_machine();
357
+ header.pad1 = 0;
358
+ header.pid = getpid();
359
+ header.timestamp = get_clock();
360
+ header.flags = 0;
361
+ fwrite(&header, sizeof(header), 1, jitdump);
362
+}
363
+
364
+void perf_report_prologue(const void *start, size_t size)
365
+{
366
+ if (perfmap) {
367
+ fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n",
368
+ (uintptr_t)start, size);
369
+ }
370
+}
371
+
372
+/* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
373
+static void write_jr_code_debug_info(const void *start,
374
+ const struct debuginfo_query *q,
375
+ size_t icount)
376
+{
377
+ struct jr_code_debug_info rec;
378
+ struct debug_entry ent;
379
+ uintptr_t host_pc;
380
+ int insn;
381
+
382
+ /* Write the header. */
383
+ rec.p.id = JIT_CODE_DEBUG_INFO;
384
+ rec.p.total_size = sizeof(rec) + sizeof(ent) + 1;
385
+ rec.p.timestamp = get_clock();
386
+ rec.code_addr = (uintptr_t)start;
387
+ rec.nr_entry = 1;
388
+ for (insn = 0; insn < icount; insn++) {
389
+ if (q[insn].file) {
390
+ rec.p.total_size += sizeof(ent) + strlen(q[insn].file) + 1;
391
+ rec.nr_entry++;
392
+ }
393
+ }
394
+ fwrite(&rec, sizeof(rec), 1, jitdump);
395
+
396
+ /* Write the main debug entries. */
397
+ for (insn = 0; insn < icount; insn++) {
398
+ if (q[insn].file) {
399
+ get_host_pc_size(&host_pc, NULL, start, insn);
400
+ ent.addr = host_pc;
401
+ ent.lineno = q[insn].line;
402
+ ent.discrim = 0;
403
+ fwrite(&ent, sizeof(ent), 1, jitdump);
404
+ fwrite(q[insn].file, strlen(q[insn].file) + 1, 1, jitdump);
405
+ }
406
+ }
407
+
408
+ /* Write the trailing debug_entry. */
409
+ ent.addr = (uintptr_t)start + tcg_ctx->gen_insn_end_off[icount - 1];
410
+ ent.lineno = 0;
411
+ ent.discrim = 0;
412
+ fwrite(&ent, sizeof(ent), 1, jitdump);
413
+ fwrite("", 1, 1, jitdump);
414
+}
415
+
416
+/* Write a JIT_CODE_LOAD jitdump entry. */
417
+static void write_jr_code_load(const void *start, uint16_t host_size,
418
+ const struct debuginfo_query *q)
419
+{
420
+ static uint64_t code_index;
421
+ struct jr_code_load rec;
422
+ const char *symbol;
423
+ size_t symbol_size;
424
+
425
+ symbol = pretty_symbol(q, &symbol_size);
426
+ rec.p.id = JIT_CODE_LOAD;
427
+ rec.p.total_size = sizeof(rec) + symbol_size + host_size;
428
+ rec.p.timestamp = get_clock();
429
+ rec.pid = getpid();
430
+ rec.tid = qemu_get_thread_id();
431
+ rec.vma = (uintptr_t)start;
432
+ rec.code_addr = (uintptr_t)start;
433
+ rec.code_size = host_size;
434
+ rec.code_index = code_index++;
435
+ fwrite(&rec, sizeof(rec), 1, jitdump);
436
+ fwrite(symbol, symbol_size, 1, jitdump);
437
+ fwrite(start, host_size, 1, jitdump);
438
+}
439
+
440
+void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
441
+ const void *start)
442
+{
443
+ struct debuginfo_query *q;
444
+ size_t insn;
445
+
446
+ if (!perfmap && !jitdump) {
447
+ return;
448
+ }
449
+
450
+ q = g_try_malloc0_n(tb->icount, sizeof(*q));
451
+ if (!q) {
452
+ return;
453
+ }
454
+
455
+ debuginfo_lock();
456
+
457
+ /* Query debuginfo for each guest instruction. */
458
+ for (insn = 0; insn < tb->icount; insn++) {
459
+ /* FIXME: This replicates the restore_state_to_opc() logic. */
460
+ q[insn].address = tcg_ctx->gen_insn_data[insn][0];
461
+ if (TARGET_TB_PCREL) {
462
+ q[insn].address |= (guest_pc & TARGET_PAGE_MASK);
463
+ } else {
464
+#if defined(TARGET_I386)
465
+ q[insn].address -= tb->cs_base;
466
+#endif
467
+ }
468
+ q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0);
469
+ }
470
+ debuginfo_query(q, tb->icount);
471
+
472
+ /* Emit perfmap entries if needed. */
473
+ if (perfmap) {
474
+ flockfile(perfmap);
475
+ for (insn = 0; insn < tb->icount; insn++) {
476
+ write_perfmap_entry(start, insn, &q[insn]);
477
+ }
478
+ funlockfile(perfmap);
479
+ }
480
+
481
+ /* Emit jitdump entries if needed. */
482
+ if (jitdump) {
483
+ flockfile(jitdump);
484
+ write_jr_code_debug_info(start, q, tb->icount);
485
+ write_jr_code_load(start, tcg_ctx->gen_insn_end_off[tb->icount - 1],
486
+ q);
487
+ funlockfile(jitdump);
488
+ }
489
+
490
+ debuginfo_unlock();
491
+ g_free(q);
492
+}
493
+
494
+void perf_exit(void)
495
+{
496
+ if (perfmap) {
497
+ fclose(perfmap);
498
+ perfmap = NULL;
499
+ }
500
+
501
+ if (jitdump) {
502
+ fclose(jitdump);
503
+ jitdump = NULL;
504
+ }
505
+}
506
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
507
index XXXXXXX..XXXXXXX 100644
508
--- a/accel/tcg/translate-all.c
509
+++ b/accel/tcg/translate-all.c
510
@@ -XXX,XX +XXX,XX @@
511
#include "tb-hash.h"
512
#include "tb-context.h"
513
#include "internal.h"
514
+#include "perf.h"
515
516
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
517
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
518
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
519
}
520
tb->tc.size = gen_code_size;
521
522
+ /*
523
+ * For TARGET_TB_PCREL, attribute all executions of the generated
524
+ * code to its first mapping.
525
+ */
526
+ perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
527
+
528
#ifdef CONFIG_PROFILER
529
qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
530
qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
531
diff --git a/linux-user/exit.c b/linux-user/exit.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/linux-user/exit.c
534
+++ b/linux-user/exit.c
535
@@ -XXX,XX +XXX,XX @@
536
* along with this program; if not, see <http://www.gnu.org/licenses/>.
537
*/
538
#include "qemu/osdep.h"
539
+#include "accel/tcg/perf.h"
540
#include "exec/gdbstub.h"
541
#include "qemu.h"
542
#include "user-internals.h"
543
@@ -XXX,XX +XXX,XX @@ void preexit_cleanup(CPUArchState *env, int code)
544
#endif
545
gdb_exit(code);
546
qemu_plugin_user_exit();
547
+ perf_exit();
548
}
549
diff --git a/linux-user/main.c b/linux-user/main.c
550
index XXXXXXX..XXXXXXX 100644
551
--- a/linux-user/main.c
552
+++ b/linux-user/main.c
553
@@ -XXX,XX +XXX,XX @@
554
#include "signal-common.h"
555
#include "loader.h"
556
#include "user-mmap.h"
557
+#include "accel/tcg/perf.h"
558
559
#ifdef CONFIG_SEMIHOSTING
560
#include "semihosting/semihost.h"
561
@@ -XXX,XX +XXX,XX @@ static void handle_arg_abi_call0(const char *arg)
562
}
563
#endif
564
565
+static void handle_arg_perfmap(const char *arg)
566
+{
567
+ perf_enable_perfmap();
568
+}
569
+
570
+static void handle_arg_jitdump(const char *arg)
571
+{
572
+ perf_enable_jitdump();
573
+}
574
+
575
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
576
577
#ifdef CONFIG_PLUGIN
578
@@ -XXX,XX +XXX,XX @@ static const struct qemu_argument arg_table[] = {
579
{"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false, handle_arg_abi_call0,
580
"", "assume CALL0 Xtensa ABI"},
581
#endif
582
+ {"perfmap", "QEMU_PERFMAP", false, handle_arg_perfmap,
583
+ "", "Generate a /tmp/perf-${pid}.map file for perf"},
584
+ {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
585
+ "", "Generate a jit-${pid}.dump file for perf"},
586
{NULL, NULL, false, NULL, NULL, NULL}
587
};
588
589
diff --git a/softmmu/vl.c b/softmmu/vl.c
590
index XXXXXXX..XXXXXXX 100644
591
--- a/softmmu/vl.c
592
+++ b/softmmu/vl.c
593
@@ -XXX,XX +XXX,XX @@
594
#include "fsdev/qemu-fsdev.h"
595
#endif
596
#include "sysemu/qtest.h"
597
+#ifdef CONFIG_TCG
598
+#include "accel/tcg/perf.h"
599
+#endif
600
601
#include "disas/disas.h"
602
603
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
604
case QEMU_OPTION_DFILTER:
605
qemu_set_dfilter_ranges(optarg, &error_fatal);
606
break;
607
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
608
+ case QEMU_OPTION_perfmap:
609
+ perf_enable_perfmap();
610
+ break;
611
+ case QEMU_OPTION_jitdump:
612
+ perf_enable_jitdump();
613
+ break;
614
+#endif
615
case QEMU_OPTION_seed:
616
qemu_guest_random_seed_main(optarg, &error_fatal);
617
break;
618
diff --git a/tcg/tcg.c b/tcg/tcg.c
619
index XXXXXXX..XXXXXXX 100644
620
--- a/tcg/tcg.c
621
+++ b/tcg/tcg.c
622
@@ -XXX,XX +XXX,XX @@
623
#include "exec/log.h"
624
#include "tcg/tcg-ldst.h"
625
#include "tcg-internal.h"
626
+#include "accel/tcg/perf.h"
627
628
/* Forward declarations for functions declared in tcg-target.c.inc and
629
used here. */
630
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
631
#endif
632
633
prologue_size = tcg_current_code_size(s);
634
+ perf_report_prologue(s->code_gen_ptr, prologue_size);
635
636
#ifndef CONFIG_TCG_INTERPRETER
637
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
638
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
639
index XXXXXXX..XXXXXXX 100644
640
--- a/accel/tcg/meson.build
641
+++ b/accel/tcg/meson.build
642
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
643
tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
644
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
645
tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
646
+tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
647
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
648
649
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
650
diff --git a/qemu-options.hx b/qemu-options.hx
651
index XXXXXXX..XXXXXXX 100644
652
--- a/qemu-options.hx
653
+++ b/qemu-options.hx
654
@@ -XXX,XX +XXX,XX @@ SRST
655
Enable synchronization profiling.
656
ERST
657
658
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
659
+DEF("perfmap", 0, QEMU_OPTION_perfmap,
660
+ "-perfmap generate a /tmp/perf-${pid}.map file for perf\n",
661
+ QEMU_ARCH_ALL)
662
+SRST
663
+``-perfmap``
664
+ Generate a map file for Linux perf tools that will allow basic profiling
665
+ information to be broken down into basic blocks.
666
+ERST
667
+
668
+DEF("jitdump", 0, QEMU_OPTION_jitdump,
669
+ "-jitdump generate a jit-${pid}.dump file for perf\n",
670
+ QEMU_ARCH_ALL)
671
+SRST
672
+``-jitdump``
673
+ Generate a dump file for Linux perf tools that maps basic blocks to symbol
674
+ names, line numbers and JITted code.
675
+ERST
676
+#endif
677
+
678
DEFHEADING()
679
680
DEFHEADING(Generic object creation:)
681
--
682
2.34.1
683
684
diff view generated by jsdifflib
Deleted patch
1
Use the attribute, which is supported by clang, instead of
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
4
1
5
Include only <immintrin.h> as that is the outermost "official"
6
header for these intrinsics -- emmintrin.h and smmintrin -- are
7
older SSE2 and SSE4 specific headers, while the immintrin.h
8
includes all of the Intel intrinsics.
9
10
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
meson.build | 8 ++------
14
util/bufferiszero.c | 41 ++++++-----------------------------------
15
2 files changed, 8 insertions(+), 41 deletions(-)
16
17
diff --git a/meson.build b/meson.build
18
index XXXXXXX..XXXXXXX 100644
19
--- a/meson.build
20
+++ b/meson.build
21
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
22
config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
23
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
24
.require(cc.links('''
25
- #pragma GCC push_options
26
- #pragma GCC target("avx2")
27
#include <cpuid.h>
28
#include <immintrin.h>
29
- static int bar(void *a) {
30
+ static int __attribute__((target("avx2"))) bar(void *a) {
31
__m256i x = *(__m256i *)a;
32
return _mm256_testz_si256(x, x);
33
}
34
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
35
config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
36
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
37
.require(cc.links('''
38
- #pragma GCC push_options
39
- #pragma GCC target("avx512f")
40
#include <cpuid.h>
41
#include <immintrin.h>
42
- static int bar(void *a) {
43
+ static int __attribute__((target("avx512f"))) bar(void *a) {
44
__m512i x = *(__m512i *)a;
45
return _mm512_test_epi64_mask(x, x);
46
}
47
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/util/bufferiszero.c
50
+++ b/util/bufferiszero.c
51
@@ -XXX,XX +XXX,XX @@ buffer_zero_int(const void *buf, size_t len)
52
}
53
54
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
55
-/* Do not use push_options pragmas unnecessarily, because clang
56
- * does not support them.
57
- */
58
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
59
-#pragma GCC push_options
60
-#pragma GCC target("sse2")
61
-#endif
62
-#include <emmintrin.h>
63
+#include <immintrin.h>
64
65
/* Note that each of these vectorized functions require len >= 64. */
66
67
-static bool
68
+static bool __attribute__((target("sse2")))
69
buffer_zero_sse2(const void *buf, size_t len)
70
{
71
__m128i t = _mm_loadu_si128(buf);
72
@@ -XXX,XX +XXX,XX @@ buffer_zero_sse2(const void *buf, size_t len)
73
74
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
75
}
76
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
77
-#pragma GCC pop_options
78
-#endif
79
80
#ifdef CONFIG_AVX2_OPT
81
-/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
82
- * the includes have to be within the corresponding push_options region, and
83
- * therefore the regions themselves have to be ordered with increasing ISA.
84
- */
85
-#pragma GCC push_options
86
-#pragma GCC target("sse4")
87
-#include <smmintrin.h>
88
-
89
-static bool
90
+static bool __attribute__((target("sse4")))
91
buffer_zero_sse4(const void *buf, size_t len)
92
{
93
__m128i t = _mm_loadu_si128(buf);
94
@@ -XXX,XX +XXX,XX @@ buffer_zero_sse4(const void *buf, size_t len)
95
return _mm_testz_si128(t, t);
96
}
97
98
-#pragma GCC pop_options
99
-#pragma GCC push_options
100
-#pragma GCC target("avx2")
101
-#include <immintrin.h>
102
-
103
-static bool
104
+static bool __attribute__((target("avx2")))
105
buffer_zero_avx2(const void *buf, size_t len)
106
{
107
/* Begin with an unaligned head of 32 bytes. */
108
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx2(const void *buf, size_t len)
109
110
return _mm256_testz_si256(t, t);
111
}
112
-#pragma GCC pop_options
113
#endif /* CONFIG_AVX2_OPT */
114
115
#ifdef CONFIG_AVX512F_OPT
116
-#pragma GCC push_options
117
-#pragma GCC target("avx512f")
118
-#include <immintrin.h>
119
-
120
-static bool
121
+static bool __attribute__((target("avx512f")))
122
buffer_zero_avx512(const void *buf, size_t len)
123
{
124
/* Begin with an unaligned head of 64 bytes. */
125
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx512(const void *buf, size_t len)
126
return !_mm512_test_epi64_mask(t, t);
127
128
}
129
-#pragma GCC pop_options
130
-#endif
131
+#endif /* CONFIG_AVX512F_OPT */
132
133
134
/* Note that for test_buffer_is_zero_next_accel, the most preferred
135
--
136
2.34.1
137
138
diff view generated by jsdifflib
Deleted patch
1
Recently the g_assert(cpu == current_cpu) test has been
2
intermittently failing with gcc. Reorg the code around
3
the setjmp to minimize the lifetime of the cpu variable
4
affected by the setjmp.
5
1
6
This appears to fix the existing issue with clang as well.
7
8
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1147
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
accel/tcg/cpu-exec.c | 111 +++++++++++++++++++++----------------------
13
1 file changed, 54 insertions(+), 57 deletions(-)
14
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cpu-exec.c
18
+++ b/accel/tcg/cpu-exec.c
19
@@ -XXX,XX +XXX,XX @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
20
21
/* main execution loop */
22
23
-int cpu_exec(CPUState *cpu)
24
+static int __attribute__((noinline))
25
+cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
26
{
27
int ret;
28
- SyncClocks sc = { 0 };
29
-
30
- /* replay_interrupt may need current_cpu */
31
- current_cpu = cpu;
32
-
33
- if (cpu_handle_halt(cpu)) {
34
- return EXCP_HALTED;
35
- }
36
-
37
- rcu_read_lock();
38
-
39
- cpu_exec_enter(cpu);
40
-
41
- /* Calculate difference between guest clock and host clock.
42
- * This delay includes the delay of the last cycle, so
43
- * what we have to do is sleep until it is 0. As for the
44
- * advance/delay we gain here, we try to fix it next time.
45
- */
46
- init_delay_params(&sc, cpu);
47
-
48
- /* prepare setjmp context for exception handling */
49
- if (sigsetjmp(cpu->jmp_env, 0) != 0) {
50
-#if defined(__clang__)
51
- /*
52
- * Some compilers wrongly smash all local variables after
53
- * siglongjmp (the spec requires that only non-volatile locals
54
- * which are changed between the sigsetjmp and siglongjmp are
55
- * permitted to be trashed). There were bug reports for gcc
56
- * 4.5.0 and clang. The bug is fixed in all versions of gcc
57
- * that we support, but is still unfixed in clang:
58
- * https://bugs.llvm.org/show_bug.cgi?id=21183
59
- *
60
- * Reload an essential local variable here for those compilers.
61
- * Newer versions of gcc would complain about this code (-Wclobbered),
62
- * so we only perform the workaround for clang.
63
- */
64
- cpu = current_cpu;
65
-#else
66
- /* Non-buggy compilers preserve this; assert the correct value. */
67
- g_assert(cpu == current_cpu);
68
-#endif
69
-
70
-#ifndef CONFIG_SOFTMMU
71
- clear_helper_retaddr();
72
- if (have_mmap_lock()) {
73
- mmap_unlock();
74
- }
75
-#endif
76
- if (qemu_mutex_iothread_locked()) {
77
- qemu_mutex_unlock_iothread();
78
- }
79
- qemu_plugin_disable_mem_helpers(cpu);
80
-
81
- assert_no_pages_locked();
82
- }
83
84
/* if an exception is pending, we execute it here */
85
while (!cpu_handle_exception(cpu, &ret)) {
86
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
87
88
/* Try to align the host and virtual clocks
89
if the guest is in advance */
90
- align_clocks(&sc, cpu);
91
+ align_clocks(sc, cpu);
92
}
93
}
94
+ return ret;
95
+}
96
+
97
+static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
98
+{
99
+ /* Prepare setjmp context for exception handling. */
100
+ if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
101
+ /* Non-buggy compilers preserve this; assert the correct value. */
102
+ g_assert(cpu == current_cpu);
103
+
104
+#ifndef CONFIG_SOFTMMU
105
+ clear_helper_retaddr();
106
+ if (have_mmap_lock()) {
107
+ mmap_unlock();
108
+ }
109
+#endif
110
+ if (qemu_mutex_iothread_locked()) {
111
+ qemu_mutex_unlock_iothread();
112
+ }
113
+ qemu_plugin_disable_mem_helpers(cpu);
114
+
115
+ assert_no_pages_locked();
116
+ }
117
+
118
+ return cpu_exec_loop(cpu, sc);
119
+}
120
+
121
+int cpu_exec(CPUState *cpu)
122
+{
123
+ int ret;
124
+ SyncClocks sc = { 0 };
125
+
126
+ /* replay_interrupt may need current_cpu */
127
+ current_cpu = cpu;
128
+
129
+ if (cpu_handle_halt(cpu)) {
130
+ return EXCP_HALTED;
131
+ }
132
+
133
+ rcu_read_lock();
134
+ cpu_exec_enter(cpu);
135
+
136
+ /*
137
+ * Calculate difference between guest clock and host clock.
138
+ * This delay includes the delay of the last cycle, so
139
+ * what we have to do is sleep until it is 0. As for the
140
+ * advance/delay we gain here, we try to fix it next time.
141
+ */
142
+ init_delay_params(&sc, cpu);
143
+
144
+ ret = cpu_exec_setjmp(cpu, &sc);
145
146
cpu_exec_exit(cpu);
147
rcu_read_unlock();
148
--
149
2.34.1
150
151
diff view generated by jsdifflib