1
The following changes since commit fb7e7990342e59cf67dbd895c1a1e3fb1741df7a:
1
Pretty small still, but there are two patches that ought
2
to get backported to stable, so no point in delaying.
2
3
3
tests/qtest/qom-test: Do not print tested properties by default (2023-01-16 15:00:57 +0000)
4
r~
5
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230116
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
8
13
9
for you to fetch changes up to 61710a7e23a63546da0071ea32adb96476fa5d07:
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
10
15
11
accel/tcg: Split out cpu_exec_{setjmp,loop} (2023-01-16 10:14:12 -1000)
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
- Reorg cpu_tb_exec around setjmp.
19
tcg: Reset free_temps before tcg_optimize
15
- Use __attribute__((target)) for buffer_is_zero.
20
tcg/riscv: Fix StoreStore barrier generation
16
- Add perfmap and jitdump for perf support.
21
include/exec: Introduce fpst alias in helper-head.h.inc
22
target/sparc: Use memcpy() and remove memcpy32()
17
23
18
----------------------------------------------------------------
24
----------------------------------------------------------------
19
Ilya Leoshkevich (3):
25
Philippe Mathieu-Daudé (1):
20
linux-user: Clean up when exiting due to a signal
26
target/sparc: Use memcpy() and remove memcpy32()
21
accel/tcg: Add debuginfo support
22
tcg: add perfmap and jitdump
23
27
24
Richard Henderson (2):
28
Richard Henderson (2):
25
util/bufferiszero: Use __attribute__((target)) for avx2/avx512
29
tcg: Reset free_temps before tcg_optimize
26
accel/tcg: Split out cpu_exec_{setjmp,loop}
30
include/exec: Introduce fpst alias in helper-head.h.inc
27
31
28
docs/devel/tcg.rst | 23 +++
32
Roman Artemev (1):
29
meson.build | 16 +-
33
tcg/riscv: Fix StoreStore barrier generation
30
accel/tcg/debuginfo.h | 77 ++++++++++
34
31
accel/tcg/perf.h | 49 ++++++
35
include/tcg/tcg-temp-internal.h | 6 ++++++
32
accel/tcg/cpu-exec.c | 111 +++++++-------
36
accel/tcg/plugin-gen.c | 2 +-
33
accel/tcg/debuginfo.c | 96 ++++++++++++
37
target/sparc/win_helper.c | 26 ++++++++------------------
34
accel/tcg/perf.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++
38
tcg/tcg.c | 5 ++++-
35
accel/tcg/translate-all.c | 7 +
39
include/exec/helper-head.h.inc | 3 +++
36
hw/core/loader.c | 5 +
40
tcg/riscv/tcg-target.c.inc | 2 +-
37
linux-user/elfload.c | 3 +
41
6 files changed, 23 insertions(+), 21 deletions(-)
38
linux-user/exit.c | 2 +
42
39
linux-user/main.c | 15 ++
40
linux-user/signal.c | 8 +-
41
softmmu/vl.c | 11 ++
42
tcg/tcg.c | 2 +
43
util/bufferiszero.c | 41 +----
44
accel/tcg/meson.build | 2 +
45
linux-user/meson.build | 1 +
46
qemu-options.hx | 20 +++
47
19 files changed, 763 insertions(+), 101 deletions(-)
48
create mode 100644 accel/tcg/debuginfo.h
49
create mode 100644 accel/tcg/perf.h
50
create mode 100644 accel/tcg/debuginfo.c
51
create mode 100644 accel/tcg/perf.c
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
When allocating new temps during tcg_optmize, do not re-use
2
any EBB temps that were used within the TB. We do not have
3
any idea what span of the TB in which the temp was live.
2
4
3
Add ability to dump /tmp/perf-<pid>.map and jit-<pid>.dump.
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
4
The first one allows the perf tool to map samples to each individual
6
as well as replacing the equivalent in plugin_gen_inject and
5
translation block. The second one adds the ability to resolve symbol
7
tcg_func_start.
6
names, line numbers and inspect JITed code.
7
8
8
Example of use:
9
Cc: qemu-stable@nongnu.org
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
---
17
include/tcg/tcg-temp-internal.h | 6 ++++++
18
accel/tcg/plugin-gen.c | 2 +-
19
tcg/tcg.c | 5 ++++-
20
3 files changed, 11 insertions(+), 2 deletions(-)
9
21
10
perf record qemu-x86_64 -perfmap ./a.out
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
11
perf report
12
13
or
14
15
perf record -k 1 qemu-x86_64 -jitdump ./a.out
16
DEBUGINFOD_URLS= perf inject -j -i perf.data -o perf.data.jitted
17
perf report -i perf.data.jitted
18
19
Co-developed-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
20
Co-developed-by: Alex Bennée <alex.bennee@linaro.org>
21
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
22
Message-Id: <20230112152013.125680-4-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
docs/devel/tcg.rst | 23 +++
26
accel/tcg/perf.h | 49 +++++
27
accel/tcg/perf.c | 375 ++++++++++++++++++++++++++++++++++++++
28
accel/tcg/translate-all.c | 7 +
29
linux-user/exit.c | 2 +
30
linux-user/main.c | 15 ++
31
softmmu/vl.c | 11 ++
32
tcg/tcg.c | 2 +
33
accel/tcg/meson.build | 1 +
34
qemu-options.hx | 20 ++
35
10 files changed, 505 insertions(+)
36
create mode 100644 accel/tcg/perf.h
37
create mode 100644 accel/tcg/perf.c
38
39
diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
40
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
41
--- a/docs/devel/tcg.rst
24
--- a/include/tcg/tcg-temp-internal.h
42
+++ b/docs/devel/tcg.rst
25
+++ b/include/tcg/tcg-temp-internal.h
43
@@ -XXX,XX +XXX,XX @@ memory areas instead calls out to C code for device emulation.
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
44
Finally, the MMU helps tracking dirty pages and pages pointed to by
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
45
translation blocks.
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
46
29
47
+Profiling JITted code
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
48
+---------------------
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
49
+
50
+The Linux ``perf`` tool will treat all JITted code as a single block as
51
+unlike the main code it can't use debug information to link individual
52
+program counter samples with larger functions. To overcome this
53
+limitation you can use the ``-perfmap`` or the ``-jitdump`` option to generate
54
+map files. ``-perfmap`` is lightweight and produces only guest-host mappings.
55
+``-jitdump`` additionally saves JITed code and guest debug information (if
56
+available); its output needs to be integrated with the ``perf.data`` file
57
+before the final report can be viewed.
58
+
59
+.. code::
60
+
61
+ perf record $QEMU -perfmap $REMAINING_ARGS
62
+ perf report
63
+
64
+ perf record -k 1 $QEMU -jitdump $REMAINING_ARGS
65
+ DEBUGINFOD_URLS= perf inject -j -i perf.data -o perf.data.jitted
66
+ perf report -i perf.data.jitted
67
+
68
+Note that qemu-system generates mappings only for ``-kernel`` files in ELF
69
+format.
70
diff --git a/accel/tcg/perf.h b/accel/tcg/perf.h
71
new file mode 100644
72
index XXXXXXX..XXXXXXX
73
--- /dev/null
74
+++ b/accel/tcg/perf.h
75
@@ -XXX,XX +XXX,XX @@
76
+/*
77
+ * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
78
+ *
79
+ * SPDX-License-Identifier: GPL-2.0-or-later
80
+ */
81
+
82
+#ifndef ACCEL_TCG_PERF_H
83
+#define ACCEL_TCG_PERF_H
84
+
85
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
86
+/* Start writing perf-<pid>.map. */
87
+void perf_enable_perfmap(void);
88
+
89
+/* Start writing jit-<pid>.dump. */
90
+void perf_enable_jitdump(void);
91
+
92
+/* Add information about TCG prologue to profiler maps. */
93
+void perf_report_prologue(const void *start, size_t size);
94
+
95
+/* Add information about JITted guest code to profiler maps. */
96
+void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
97
+ const void *start);
98
+
99
+/* Stop writing perf-<pid>.map and/or jit-<pid>.dump. */
100
+void perf_exit(void);
101
+#else
102
+static inline void perf_enable_perfmap(void)
103
+{
32
+{
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
104
+}
34
+}
105
+
35
+
106
+static inline void perf_enable_jitdump(void)
36
#endif /* TCG_TEMP_FREE_H */
107
+{
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
108
+}
109
+
110
+static inline void perf_report_prologue(const void *start, size_t size)
111
+{
112
+}
113
+
114
+static inline void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
115
+ const void *start)
116
+{
117
+}
118
+
119
+static inline void perf_exit(void)
120
+{
121
+}
122
+#endif
123
+
124
+#endif
125
diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c
126
new file mode 100644
127
index XXXXXXX..XXXXXXX
128
--- /dev/null
129
+++ b/accel/tcg/perf.c
130
@@ -XXX,XX +XXX,XX @@
131
+/*
132
+ * Linux perf perf-<pid>.map and jit-<pid>.dump integration.
133
+ *
134
+ * The jitdump spec can be found at [1].
135
+ *
136
+ * [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
137
+ *
138
+ * SPDX-License-Identifier: GPL-2.0-or-later
139
+ */
140
+
141
+#include "qemu/osdep.h"
142
+#include "elf.h"
143
+#include "exec/exec-all.h"
144
+#include "qemu/timer.h"
145
+#include "tcg/tcg.h"
146
+
147
+#include "debuginfo.h"
148
+#include "perf.h"
149
+
150
+static FILE *safe_fopen_w(const char *path)
151
+{
152
+ int saved_errno;
153
+ FILE *f;
154
+ int fd;
155
+
156
+ /* Delete the old file, if any. */
157
+ unlink(path);
158
+
159
+ /* Avoid symlink attacks by using O_CREAT | O_EXCL. */
160
+ fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
161
+ if (fd == -1) {
162
+ return NULL;
163
+ }
164
+
165
+ /* Convert fd to FILE*. */
166
+ f = fdopen(fd, "w");
167
+ if (f == NULL) {
168
+ saved_errno = errno;
169
+ close(fd);
170
+ errno = saved_errno;
171
+ return NULL;
172
+ }
173
+
174
+ return f;
175
+}
176
+
177
+static FILE *perfmap;
178
+
179
+void perf_enable_perfmap(void)
180
+{
181
+ char map_file[32];
182
+
183
+ snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid());
184
+ perfmap = safe_fopen_w(map_file);
185
+ if (perfmap == NULL) {
186
+ warn_report("Could not open %s: %s, proceeding without perfmap",
187
+ map_file, strerror(errno));
188
+ }
189
+}
190
+
191
+/* Get PC and size of code JITed for guest instruction #INSN. */
192
+static void get_host_pc_size(uintptr_t *host_pc, uint16_t *host_size,
193
+ const void *start, size_t insn)
194
+{
195
+ uint16_t start_off = insn ? tcg_ctx->gen_insn_end_off[insn - 1] : 0;
196
+
197
+ if (host_pc) {
198
+ *host_pc = (uintptr_t)start + start_off;
199
+ }
200
+ if (host_size) {
201
+ *host_size = tcg_ctx->gen_insn_end_off[insn] - start_off;
202
+ }
203
+}
204
+
205
+static const char *pretty_symbol(const struct debuginfo_query *q, size_t *len)
206
+{
207
+ static __thread char buf[64];
208
+ int tmp;
209
+
210
+ if (!q->symbol) {
211
+ tmp = snprintf(buf, sizeof(buf), "guest-0x%"PRIx64, q->address);
212
+ if (len) {
213
+ *len = MIN(tmp + 1, sizeof(buf));
214
+ }
215
+ return buf;
216
+ }
217
+
218
+ if (!q->offset) {
219
+ if (len) {
220
+ *len = strlen(q->symbol) + 1;
221
+ }
222
+ return q->symbol;
223
+ }
224
+
225
+ tmp = snprintf(buf, sizeof(buf), "%s+0x%"PRIx64, q->symbol, q->offset);
226
+ if (len) {
227
+ *len = MIN(tmp + 1, sizeof(buf));
228
+ }
229
+ return buf;
230
+}
231
+
232
+static void write_perfmap_entry(const void *start, size_t insn,
233
+ const struct debuginfo_query *q)
234
+{
235
+ uint16_t host_size;
236
+ uintptr_t host_pc;
237
+
238
+ get_host_pc_size(&host_pc, &host_size, start, insn);
239
+ fprintf(perfmap, "%"PRIxPTR" %"PRIx16" %s\n",
240
+ host_pc, host_size, pretty_symbol(q, NULL));
241
+}
242
+
243
+static FILE *jitdump;
244
+
245
+#define JITHEADER_MAGIC 0x4A695444
246
+#define JITHEADER_VERSION 1
247
+
248
+struct jitheader {
249
+ uint32_t magic;
250
+ uint32_t version;
251
+ uint32_t total_size;
252
+ uint32_t elf_mach;
253
+ uint32_t pad1;
254
+ uint32_t pid;
255
+ uint64_t timestamp;
256
+ uint64_t flags;
257
+};
258
+
259
+enum jit_record_type {
260
+ JIT_CODE_LOAD = 0,
261
+ JIT_CODE_DEBUG_INFO = 2,
262
+};
263
+
264
+struct jr_prefix {
265
+ uint32_t id;
266
+ uint32_t total_size;
267
+ uint64_t timestamp;
268
+};
269
+
270
+struct jr_code_load {
271
+ struct jr_prefix p;
272
+
273
+ uint32_t pid;
274
+ uint32_t tid;
275
+ uint64_t vma;
276
+ uint64_t code_addr;
277
+ uint64_t code_size;
278
+ uint64_t code_index;
279
+};
280
+
281
+struct debug_entry {
282
+ uint64_t addr;
283
+ int lineno;
284
+ int discrim;
285
+ const char name[];
286
+};
287
+
288
+struct jr_code_debug_info {
289
+ struct jr_prefix p;
290
+
291
+ uint64_t code_addr;
292
+ uint64_t nr_entry;
293
+ struct debug_entry entries[];
294
+};
295
+
296
+static uint32_t get_e_machine(void)
297
+{
298
+ Elf64_Ehdr elf_header;
299
+ FILE *exe;
300
+ size_t n;
301
+
302
+ QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) !=
303
+ offsetof(Elf64_Ehdr, e_machine));
304
+
305
+ exe = fopen("/proc/self/exe", "r");
306
+ if (exe == NULL) {
307
+ return EM_NONE;
308
+ }
309
+
310
+ n = fread(&elf_header, sizeof(elf_header), 1, exe);
311
+ fclose(exe);
312
+ if (n != 1) {
313
+ return EM_NONE;
314
+ }
315
+
316
+ return elf_header.e_machine;
317
+}
318
+
319
+void perf_enable_jitdump(void)
320
+{
321
+ struct jitheader header;
322
+ char jitdump_file[32];
323
+ void *perf_marker;
324
+
325
+ if (!use_rt_clock) {
326
+ warn_report("CLOCK_MONOTONIC is not available, proceeding without jitdump");
327
+ return;
328
+ }
329
+
330
+ snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid());
331
+ jitdump = safe_fopen_w(jitdump_file);
332
+ if (jitdump == NULL) {
333
+ warn_report("Could not open %s: %s, proceeding without jitdump",
334
+ jitdump_file, strerror(errno));
335
+ return;
336
+ }
337
+
338
+ /*
339
+ * `perf inject` will see that the mapped file name in the corresponding
340
+ * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
341
+ * and will process it as a jitdump file.
342
+ */
343
+ perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_EXEC,
344
+ MAP_PRIVATE, fileno(jitdump), 0);
345
+ if (perf_marker == MAP_FAILED) {
346
+ warn_report("Could not map %s: %s, proceeding without jitdump",
347
+ jitdump_file, strerror(errno));
348
+ fclose(jitdump);
349
+ jitdump = NULL;
350
+ return;
351
+ }
352
+
353
+ header.magic = JITHEADER_MAGIC;
354
+ header.version = JITHEADER_VERSION;
355
+ header.total_size = sizeof(header);
356
+ header.elf_mach = get_e_machine();
357
+ header.pad1 = 0;
358
+ header.pid = getpid();
359
+ header.timestamp = get_clock();
360
+ header.flags = 0;
361
+ fwrite(&header, sizeof(header), 1, jitdump);
362
+}
363
+
364
+void perf_report_prologue(const void *start, size_t size)
365
+{
366
+ if (perfmap) {
367
+ fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n",
368
+ (uintptr_t)start, size);
369
+ }
370
+}
371
+
372
+/* Write a JIT_CODE_DEBUG_INFO jitdump entry. */
373
+static void write_jr_code_debug_info(const void *start,
374
+ const struct debuginfo_query *q,
375
+ size_t icount)
376
+{
377
+ struct jr_code_debug_info rec;
378
+ struct debug_entry ent;
379
+ uintptr_t host_pc;
380
+ int insn;
381
+
382
+ /* Write the header. */
383
+ rec.p.id = JIT_CODE_DEBUG_INFO;
384
+ rec.p.total_size = sizeof(rec) + sizeof(ent) + 1;
385
+ rec.p.timestamp = get_clock();
386
+ rec.code_addr = (uintptr_t)start;
387
+ rec.nr_entry = 1;
388
+ for (insn = 0; insn < icount; insn++) {
389
+ if (q[insn].file) {
390
+ rec.p.total_size += sizeof(ent) + strlen(q[insn].file) + 1;
391
+ rec.nr_entry++;
392
+ }
393
+ }
394
+ fwrite(&rec, sizeof(rec), 1, jitdump);
395
+
396
+ /* Write the main debug entries. */
397
+ for (insn = 0; insn < icount; insn++) {
398
+ if (q[insn].file) {
399
+ get_host_pc_size(&host_pc, NULL, start, insn);
400
+ ent.addr = host_pc;
401
+ ent.lineno = q[insn].line;
402
+ ent.discrim = 0;
403
+ fwrite(&ent, sizeof(ent), 1, jitdump);
404
+ fwrite(q[insn].file, strlen(q[insn].file) + 1, 1, jitdump);
405
+ }
406
+ }
407
+
408
+ /* Write the trailing debug_entry. */
409
+ ent.addr = (uintptr_t)start + tcg_ctx->gen_insn_end_off[icount - 1];
410
+ ent.lineno = 0;
411
+ ent.discrim = 0;
412
+ fwrite(&ent, sizeof(ent), 1, jitdump);
413
+ fwrite("", 1, 1, jitdump);
414
+}
415
+
416
+/* Write a JIT_CODE_LOAD jitdump entry. */
417
+static void write_jr_code_load(const void *start, uint16_t host_size,
418
+ const struct debuginfo_query *q)
419
+{
420
+ static uint64_t code_index;
421
+ struct jr_code_load rec;
422
+ const char *symbol;
423
+ size_t symbol_size;
424
+
425
+ symbol = pretty_symbol(q, &symbol_size);
426
+ rec.p.id = JIT_CODE_LOAD;
427
+ rec.p.total_size = sizeof(rec) + symbol_size + host_size;
428
+ rec.p.timestamp = get_clock();
429
+ rec.pid = getpid();
430
+ rec.tid = qemu_get_thread_id();
431
+ rec.vma = (uintptr_t)start;
432
+ rec.code_addr = (uintptr_t)start;
433
+ rec.code_size = host_size;
434
+ rec.code_index = code_index++;
435
+ fwrite(&rec, sizeof(rec), 1, jitdump);
436
+ fwrite(symbol, symbol_size, 1, jitdump);
437
+ fwrite(start, host_size, 1, jitdump);
438
+}
439
+
440
+void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
441
+ const void *start)
442
+{
443
+ struct debuginfo_query *q;
444
+ size_t insn;
445
+
446
+ if (!perfmap && !jitdump) {
447
+ return;
448
+ }
449
+
450
+ q = g_try_malloc0_n(tb->icount, sizeof(*q));
451
+ if (!q) {
452
+ return;
453
+ }
454
+
455
+ debuginfo_lock();
456
+
457
+ /* Query debuginfo for each guest instruction. */
458
+ for (insn = 0; insn < tb->icount; insn++) {
459
+ /* FIXME: This replicates the restore_state_to_opc() logic. */
460
+ q[insn].address = tcg_ctx->gen_insn_data[insn][0];
461
+ if (TARGET_TB_PCREL) {
462
+ q[insn].address |= (guest_pc & TARGET_PAGE_MASK);
463
+ } else {
464
+#if defined(TARGET_I386)
465
+ q[insn].address -= tb->cs_base;
466
+#endif
467
+ }
468
+ q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0);
469
+ }
470
+ debuginfo_query(q, tb->icount);
471
+
472
+ /* Emit perfmap entries if needed. */
473
+ if (perfmap) {
474
+ flockfile(perfmap);
475
+ for (insn = 0; insn < tb->icount; insn++) {
476
+ write_perfmap_entry(start, insn, &q[insn]);
477
+ }
478
+ funlockfile(perfmap);
479
+ }
480
+
481
+ /* Emit jitdump entries if needed. */
482
+ if (jitdump) {
483
+ flockfile(jitdump);
484
+ write_jr_code_debug_info(start, q, tb->icount);
485
+ write_jr_code_load(start, tcg_ctx->gen_insn_end_off[tb->icount - 1],
486
+ q);
487
+ funlockfile(jitdump);
488
+ }
489
+
490
+ debuginfo_unlock();
491
+ g_free(q);
492
+}
493
+
494
+void perf_exit(void)
495
+{
496
+ if (perfmap) {
497
+ fclose(perfmap);
498
+ perfmap = NULL;
499
+ }
500
+
501
+ if (jitdump) {
502
+ fclose(jitdump);
503
+ jitdump = NULL;
504
+ }
505
+}
506
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
507
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
508
--- a/accel/tcg/translate-all.c
39
--- a/accel/tcg/plugin-gen.c
509
+++ b/accel/tcg/translate-all.c
40
+++ b/accel/tcg/plugin-gen.c
510
@@ -XXX,XX +XXX,XX @@
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
511
#include "tb-hash.h"
42
* that might be live within the existing opcode stream.
512
#include "tb-context.h"
43
* The simplest solution is to release them all and create new.
513
#include "internal.h"
44
*/
514
+#include "perf.h"
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
515
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
516
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
47
517
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
518
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
49
switch (op->opc) {
519
}
520
tb->tc.size = gen_code_size;
521
522
+ /*
523
+ * For TARGET_TB_PCREL, attribute all executions of the generated
524
+ * code to its first mapping.
525
+ */
526
+ perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
527
+
528
#ifdef CONFIG_PROFILER
529
qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
530
qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
531
diff --git a/linux-user/exit.c b/linux-user/exit.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/linux-user/exit.c
534
+++ b/linux-user/exit.c
535
@@ -XXX,XX +XXX,XX @@
536
* along with this program; if not, see <http://www.gnu.org/licenses/>.
537
*/
538
#include "qemu/osdep.h"
539
+#include "accel/tcg/perf.h"
540
#include "exec/gdbstub.h"
541
#include "qemu.h"
542
#include "user-internals.h"
543
@@ -XXX,XX +XXX,XX @@ void preexit_cleanup(CPUArchState *env, int code)
544
#endif
545
gdb_exit(code);
546
qemu_plugin_user_exit();
547
+ perf_exit();
548
}
549
diff --git a/linux-user/main.c b/linux-user/main.c
550
index XXXXXXX..XXXXXXX 100644
551
--- a/linux-user/main.c
552
+++ b/linux-user/main.c
553
@@ -XXX,XX +XXX,XX @@
554
#include "signal-common.h"
555
#include "loader.h"
556
#include "user-mmap.h"
557
+#include "accel/tcg/perf.h"
558
559
#ifdef CONFIG_SEMIHOSTING
560
#include "semihosting/semihost.h"
561
@@ -XXX,XX +XXX,XX @@ static void handle_arg_abi_call0(const char *arg)
562
}
563
#endif
564
565
+static void handle_arg_perfmap(const char *arg)
566
+{
567
+ perf_enable_perfmap();
568
+}
569
+
570
+static void handle_arg_jitdump(const char *arg)
571
+{
572
+ perf_enable_jitdump();
573
+}
574
+
575
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
576
577
#ifdef CONFIG_PLUGIN
578
@@ -XXX,XX +XXX,XX @@ static const struct qemu_argument arg_table[] = {
579
{"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false, handle_arg_abi_call0,
580
"", "assume CALL0 Xtensa ABI"},
581
#endif
582
+ {"perfmap", "QEMU_PERFMAP", false, handle_arg_perfmap,
583
+ "", "Generate a /tmp/perf-${pid}.map file for perf"},
584
+ {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
585
+ "", "Generate a jit-${pid}.dump file for perf"},
586
{NULL, NULL, false, NULL, NULL, NULL}
587
};
588
589
diff --git a/softmmu/vl.c b/softmmu/vl.c
590
index XXXXXXX..XXXXXXX 100644
591
--- a/softmmu/vl.c
592
+++ b/softmmu/vl.c
593
@@ -XXX,XX +XXX,XX @@
594
#include "fsdev/qemu-fsdev.h"
595
#endif
596
#include "sysemu/qtest.h"
597
+#ifdef CONFIG_TCG
598
+#include "accel/tcg/perf.h"
599
+#endif
600
601
#include "disas/disas.h"
602
603
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
604
case QEMU_OPTION_DFILTER:
605
qemu_set_dfilter_ranges(optarg, &error_fatal);
606
break;
607
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
608
+ case QEMU_OPTION_perfmap:
609
+ perf_enable_perfmap();
610
+ break;
611
+ case QEMU_OPTION_jitdump:
612
+ perf_enable_jitdump();
613
+ break;
614
+#endif
615
case QEMU_OPTION_seed:
616
qemu_guest_random_seed_main(optarg, &error_fatal);
617
break;
618
diff --git a/tcg/tcg.c b/tcg/tcg.c
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
619
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
620
--- a/tcg/tcg.c
52
--- a/tcg/tcg.c
621
+++ b/tcg/tcg.c
53
+++ b/tcg/tcg.c
622
@@ -XXX,XX +XXX,XX @@
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
623
#include "exec/log.h"
55
s->nb_temps = s->nb_globals;
624
#include "tcg/tcg-ldst.h"
56
625
#include "tcg-internal.h"
57
/* No temps have been previously allocated for size or locality. */
626
+#include "accel/tcg/perf.h"
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
627
59
+ tcg_temp_ebb_reset_freed(s);
628
/* Forward declarations for functions declared in tcg-target.c.inc and
60
629
used here. */
61
/* No constant temps have been previously allocated. */
630
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
64
}
631
#endif
65
#endif
632
66
633
prologue_size = tcg_current_code_size(s);
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
634
+ perf_report_prologue(s->code_gen_ptr, prologue_size);
68
+ tcg_temp_ebb_reset_freed(s);
635
636
#ifndef CONFIG_TCG_INTERPRETER
637
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
638
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
639
index XXXXXXX..XXXXXXX 100644
640
--- a/accel/tcg/meson.build
641
+++ b/accel/tcg/meson.build
642
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
643
tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
644
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
645
tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
646
+tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
647
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
648
649
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
650
diff --git a/qemu-options.hx b/qemu-options.hx
651
index XXXXXXX..XXXXXXX 100644
652
--- a/qemu-options.hx
653
+++ b/qemu-options.hx
654
@@ -XXX,XX +XXX,XX @@ SRST
655
Enable synchronization profiling.
656
ERST
657
658
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
659
+DEF("perfmap", 0, QEMU_OPTION_perfmap,
660
+ "-perfmap generate a /tmp/perf-${pid}.map file for perf\n",
661
+ QEMU_ARCH_ALL)
662
+SRST
663
+``-perfmap``
664
+ Generate a map file for Linux perf tools that will allow basic profiling
665
+ information to be broken down into basic blocks.
666
+ERST
667
+
69
+
668
+DEF("jitdump", 0, QEMU_OPTION_jitdump,
70
tcg_optimize(s);
669
+ "-jitdump generate a jit-${pid}.dump file for perf\n",
71
670
+ QEMU_ARCH_ALL)
72
reachable_code_pass(s);
671
+SRST
672
+``-jitdump``
673
+ Generate a dump file for Linux perf tools that maps basic blocks to symbol
674
+ names, line numbers and JITted code.
675
+ERST
676
+#endif
677
+
678
DEFHEADING()
679
680
DEFHEADING(Generic object creation:)
681
--
73
--
682
2.34.1
74
2.43.0
683
75
684
76
diff view generated by jsdifflib
1
Use the attribute, which is supported by clang, instead of
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
4
2
5
Include only <immintrin.h> as that is the outermost "official"
3
On RISC-V to StoreStore barrier corresponds
6
header for these intrinsics -- emmintrin.h and smmintrin -- are
4
`fence w, w` not `fence r, r`
7
older SSE2 and SSE4 specific headers, while the immintrin.h
8
includes all of the Intel intrinsics.
9
5
10
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
6
Cc: qemu-stable@nongnu.org
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
---
13
meson.build | 8 ++------
14
tcg/riscv/tcg-target.c.inc | 2 +-
14
util/bufferiszero.c | 41 ++++++-----------------------------------
15
1 file changed, 1 insertion(+), 1 deletion(-)
15
2 files changed, 8 insertions(+), 41 deletions(-)
16
16
17
diff --git a/meson.build b/meson.build
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/meson.build
19
--- a/tcg/riscv/tcg-target.c.inc
20
+++ b/meson.build
20
+++ b/tcg/riscv/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
22
config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
22
insn |= 0x02100000;
23
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
24
.require(cc.links('''
25
- #pragma GCC push_options
26
- #pragma GCC target("avx2")
27
#include <cpuid.h>
28
#include <immintrin.h>
29
- static int bar(void *a) {
30
+ static int __attribute__((target("avx2"))) bar(void *a) {
31
__m256i x = *(__m256i *)a;
32
return _mm256_testz_si256(x, x);
33
}
23
}
34
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
24
if (a0 & TCG_MO_ST_ST) {
35
config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
25
- insn |= 0x02200000;
36
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
26
+ insn |= 0x01100000;
37
.require(cc.links('''
38
- #pragma GCC push_options
39
- #pragma GCC target("avx512f")
40
#include <cpuid.h>
41
#include <immintrin.h>
42
- static int bar(void *a) {
43
+ static int __attribute__((target("avx512f"))) bar(void *a) {
44
__m512i x = *(__m512i *)a;
45
return _mm512_test_epi64_mask(x, x);
46
}
27
}
47
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
28
tcg_out32(s, insn);
48
index XXXXXXX..XXXXXXX 100644
49
--- a/util/bufferiszero.c
50
+++ b/util/bufferiszero.c
51
@@ -XXX,XX +XXX,XX @@ buffer_zero_int(const void *buf, size_t len)
52
}
29
}
53
54
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
55
-/* Do not use push_options pragmas unnecessarily, because clang
56
- * does not support them.
57
- */
58
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
59
-#pragma GCC push_options
60
-#pragma GCC target("sse2")
61
-#endif
62
-#include <emmintrin.h>
63
+#include <immintrin.h>
64
65
/* Note that each of these vectorized functions require len >= 64. */
66
67
-static bool
68
+static bool __attribute__((target("sse2")))
69
buffer_zero_sse2(const void *buf, size_t len)
70
{
71
__m128i t = _mm_loadu_si128(buf);
72
@@ -XXX,XX +XXX,XX @@ buffer_zero_sse2(const void *buf, size_t len)
73
74
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
75
}
76
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
77
-#pragma GCC pop_options
78
-#endif
79
80
#ifdef CONFIG_AVX2_OPT
81
-/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
82
- * the includes have to be within the corresponding push_options region, and
83
- * therefore the regions themselves have to be ordered with increasing ISA.
84
- */
85
-#pragma GCC push_options
86
-#pragma GCC target("sse4")
87
-#include <smmintrin.h>
88
-
89
-static bool
90
+static bool __attribute__((target("sse4")))
91
buffer_zero_sse4(const void *buf, size_t len)
92
{
93
__m128i t = _mm_loadu_si128(buf);
94
@@ -XXX,XX +XXX,XX @@ buffer_zero_sse4(const void *buf, size_t len)
95
return _mm_testz_si128(t, t);
96
}
97
98
-#pragma GCC pop_options
99
-#pragma GCC push_options
100
-#pragma GCC target("avx2")
101
-#include <immintrin.h>
102
-
103
-static bool
104
+static bool __attribute__((target("avx2")))
105
buffer_zero_avx2(const void *buf, size_t len)
106
{
107
/* Begin with an unaligned head of 32 bytes. */
108
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx2(const void *buf, size_t len)
109
110
return _mm256_testz_si256(t, t);
111
}
112
-#pragma GCC pop_options
113
#endif /* CONFIG_AVX2_OPT */
114
115
#ifdef CONFIG_AVX512F_OPT
116
-#pragma GCC push_options
117
-#pragma GCC target("avx512f")
118
-#include <immintrin.h>
119
-
120
-static bool
121
+static bool __attribute__((target("avx512f")))
122
buffer_zero_avx512(const void *buf, size_t len)
123
{
124
/* Begin with an unaligned head of 64 bytes. */
125
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx512(const void *buf, size_t len)
126
return !_mm512_test_epi64_mask(t, t);
127
128
}
129
-#pragma GCC pop_options
130
-#endif
131
+#endif /* CONFIG_AVX512F_OPT */
132
133
134
/* Note that for test_buffer_is_zero_next_accel, the most preferred
135
--
30
--
136
2.34.1
31
2.43.0
137
138
diff view generated by jsdifflib
1
Recently the g_assert(cpu == current_cpu) test has been
1
This allows targets to declare that the helper requires a
2
intermittently failing with gcc. Reorg the code around
2
float_status pointer and instead of a generic void pointer.
3
the setjmp to minimize the lifetime of the cpu variable
4
affected by the setjmp.
5
3
6
This appears to fix the existing issue with clang as well.
7
8
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1147
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
6
---
12
accel/tcg/cpu-exec.c | 111 +++++++++++++++++++++----------------------
7
include/exec/helper-head.h.inc | 3 +++
13
1 file changed, 54 insertions(+), 57 deletions(-)
8
1 file changed, 3 insertions(+)
14
9
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cpu-exec.c
12
--- a/include/exec/helper-head.h.inc
18
+++ b/accel/tcg/cpu-exec.c
13
+++ b/include/exec/helper-head.h.inc
19
@@ -XXX,XX +XXX,XX @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
14
@@ -XXX,XX +XXX,XX @@
20
15
#define dh_alias_ptr ptr
21
/* main execution loop */
16
#define dh_alias_cptr ptr
22
17
#define dh_alias_env ptr
23
-int cpu_exec(CPUState *cpu)
18
+#define dh_alias_fpst ptr
24
+static int __attribute__((noinline))
19
#define dh_alias_void void
25
+cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
20
#define dh_alias_noreturn noreturn
26
{
21
#define dh_alias(t) glue(dh_alias_, t)
27
int ret;
22
@@ -XXX,XX +XXX,XX @@
28
- SyncClocks sc = { 0 };
23
#define dh_ctype_ptr void *
29
-
24
#define dh_ctype_cptr const void *
30
- /* replay_interrupt may need current_cpu */
25
#define dh_ctype_env CPUArchState *
31
- current_cpu = cpu;
26
+#define dh_ctype_fpst float_status *
32
-
27
#define dh_ctype_void void
33
- if (cpu_handle_halt(cpu)) {
28
#define dh_ctype_noreturn G_NORETURN void
34
- return EXCP_HALTED;
29
#define dh_ctype(t) dh_ctype_##t
35
- }
30
@@ -XXX,XX +XXX,XX @@
36
-
31
#define dh_typecode_f64 dh_typecode_i64
37
- rcu_read_lock();
32
#define dh_typecode_cptr dh_typecode_ptr
38
-
33
#define dh_typecode_env dh_typecode_ptr
39
- cpu_exec_enter(cpu);
34
+#define dh_typecode_fpst dh_typecode_ptr
40
-
35
#define dh_typecode(t) dh_typecode_##t
41
- /* Calculate difference between guest clock and host clock.
36
42
- * This delay includes the delay of the last cycle, so
37
#define dh_callflag_i32 0
43
- * what we have to do is sleep until it is 0. As for the
44
- * advance/delay we gain here, we try to fix it next time.
45
- */
46
- init_delay_params(&sc, cpu);
47
-
48
- /* prepare setjmp context for exception handling */
49
- if (sigsetjmp(cpu->jmp_env, 0) != 0) {
50
-#if defined(__clang__)
51
- /*
52
- * Some compilers wrongly smash all local variables after
53
- * siglongjmp (the spec requires that only non-volatile locals
54
- * which are changed between the sigsetjmp and siglongjmp are
55
- * permitted to be trashed). There were bug reports for gcc
56
- * 4.5.0 and clang. The bug is fixed in all versions of gcc
57
- * that we support, but is still unfixed in clang:
58
- * https://bugs.llvm.org/show_bug.cgi?id=21183
59
- *
60
- * Reload an essential local variable here for those compilers.
61
- * Newer versions of gcc would complain about this code (-Wclobbered),
62
- * so we only perform the workaround for clang.
63
- */
64
- cpu = current_cpu;
65
-#else
66
- /* Non-buggy compilers preserve this; assert the correct value. */
67
- g_assert(cpu == current_cpu);
68
-#endif
69
-
70
-#ifndef CONFIG_SOFTMMU
71
- clear_helper_retaddr();
72
- if (have_mmap_lock()) {
73
- mmap_unlock();
74
- }
75
-#endif
76
- if (qemu_mutex_iothread_locked()) {
77
- qemu_mutex_unlock_iothread();
78
- }
79
- qemu_plugin_disable_mem_helpers(cpu);
80
-
81
- assert_no_pages_locked();
82
- }
83
84
/* if an exception is pending, we execute it here */
85
while (!cpu_handle_exception(cpu, &ret)) {
86
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
87
88
/* Try to align the host and virtual clocks
89
if the guest is in advance */
90
- align_clocks(&sc, cpu);
91
+ align_clocks(sc, cpu);
92
}
93
}
94
+ return ret;
95
+}
96
+
97
+static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
98
+{
99
+ /* Prepare setjmp context for exception handling. */
100
+ if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
101
+ /* Non-buggy compilers preserve this; assert the correct value. */
102
+ g_assert(cpu == current_cpu);
103
+
104
+#ifndef CONFIG_SOFTMMU
105
+ clear_helper_retaddr();
106
+ if (have_mmap_lock()) {
107
+ mmap_unlock();
108
+ }
109
+#endif
110
+ if (qemu_mutex_iothread_locked()) {
111
+ qemu_mutex_unlock_iothread();
112
+ }
113
+ qemu_plugin_disable_mem_helpers(cpu);
114
+
115
+ assert_no_pages_locked();
116
+ }
117
+
118
+ return cpu_exec_loop(cpu, sc);
119
+}
120
+
121
+int cpu_exec(CPUState *cpu)
122
+{
123
+ int ret;
124
+ SyncClocks sc = { 0 };
125
+
126
+ /* replay_interrupt may need current_cpu */
127
+ current_cpu = cpu;
128
+
129
+ if (cpu_handle_halt(cpu)) {
130
+ return EXCP_HALTED;
131
+ }
132
+
133
+ rcu_read_lock();
134
+ cpu_exec_enter(cpu);
135
+
136
+ /*
137
+ * Calculate difference between guest clock and host clock.
138
+ * This delay includes the delay of the last cycle, so
139
+ * what we have to do is sleep until it is 0. As for the
140
+ * advance/delay we gain here, we try to fix it next time.
141
+ */
142
+ init_delay_params(&sc, cpu);
143
+
144
+ ret = cpu_exec_setjmp(cpu, &sc);
145
146
cpu_exec_exit(cpu);
147
rcu_read_unlock();
148
--
38
--
149
2.34.1
39
2.43.0
150
40
151
41
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
When exiting due to an exit() syscall, qemu-user calls
3
Rather than manually copying each register, use
4
preexit_cleanup(), but this is currently not the case when exiting due
4
the libc memcpy(), which is well optimized nowadays.
5
to a signal. This leads to various buffers not being flushed (e.g.,
6
for gprof, for gcov, and for the upcoming perf support).
7
5
8
Add the missing call.
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-Id: <20230112152013.125680-2-iii@linux.ibm.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
12
---
16
linux-user/signal.c | 8 +++++---
13
target/sparc/win_helper.c | 26 ++++++++------------------
17
1 file changed, 5 insertions(+), 3 deletions(-)
14
1 file changed, 8 insertions(+), 18 deletions(-)
18
15
19
diff --git a/linux-user/signal.c b/linux-user/signal.c
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
20
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
21
--- a/linux-user/signal.c
18
--- a/target/sparc/win_helper.c
22
+++ b/linux-user/signal.c
19
+++ b/target/sparc/win_helper.c
23
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
20
@@ -XXX,XX +XXX,XX @@
24
21
#include "exec/helper-proto.h"
25
/* abort execution with signal */
22
#include "trace.h"
26
static G_NORETURN
23
27
-void dump_core_and_abort(int target_sig)
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
28
+void dump_core_and_abort(CPUArchState *cpu_env, int target_sig)
25
-{
26
- dst[0] = src[0];
27
- dst[1] = src[1];
28
- dst[2] = src[2];
29
- dst[3] = src[3];
30
- dst[4] = src[4];
31
- dst[5] = src[5];
32
- dst[6] = src[6];
33
- dst[7] = src[7];
34
-}
35
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
29
{
37
{
30
CPUState *cpu = thread_cpu;
38
/* put the modified wrap registers at their proper location */
31
CPUArchState *env = cpu->env_ptr;
39
if (env->cwp == env->nwindows - 1) {
32
@@ -XXX,XX +XXX,XX @@ void dump_core_and_abort(int target_sig)
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
33
target_sig, strsignal(host_sig), "core dumped" );
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
42
+ sizeof(env->gregs));
34
}
43
}
35
44
env->cwp = new_cwp;
36
+ preexit_cleanup(cpu_env, 128 + target_sig);
45
37
+
46
/* put the wrap registers at their temporary location */
38
/* The proper exit code for dying from an uncaught signal is
47
if (new_cwp == env->nwindows - 1) {
39
* -<signal>. The kernel doesn't allow exit() or _exit() to pass
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
40
* a negative value. To get the proper exit code we need to
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
41
@@ -XXX,XX +XXX,XX @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
50
+ sizeof(env->gregs));
42
sig != TARGET_SIGURG &&
51
}
43
sig != TARGET_SIGWINCH &&
52
env->regwptr = env->regbase + (new_cwp * 16);
44
sig != TARGET_SIGCONT) {
53
}
45
- dump_core_and_abort(sig);
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
46
+ dump_core_and_abort(cpu_env, sig);
55
dst = get_gl_gregset(env, env->gl);
47
}
56
48
} else if (handler == TARGET_SIG_IGN) {
57
if (src != dst) {
49
/* ignore sig */
58
- memcpy32(dst, env->gregs);
50
} else if (handler == TARGET_SIG_ERR) {
59
- memcpy32(env->gregs, src);
51
- dump_core_and_abort(sig);
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
52
+ dump_core_and_abort(cpu_env, sig);
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
62
}
63
}
64
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
66
/* Switch global register bank */
67
src = get_gregset(env, new_pstate_regs);
68
dst = get_gregset(env, pstate_regs);
69
- memcpy32(dst, env->gregs);
70
- memcpy32(env->gregs, src);
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
53
} else {
73
} else {
54
/* compute the blocked signals during the handler execution */
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
55
sigset_t *blocked_set;
75
}
56
--
76
--
57
2.34.1
77
2.43.0
58
78
59
79
diff view generated by jsdifflib
Deleted patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
1
3
Add libdw-based functions for loading and querying debuginfo. Load
4
debuginfo from the system and the linux-user loaders.
5
6
This is useful for the upcoming perf support, which can then put
7
human-readable guest symbols instead of raw guest PCs into perfmap and
8
jitdump files.
9
10
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
11
Message-Id: <20230112152013.125680-3-iii@linux.ibm.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
meson.build | 8 ++++
15
accel/tcg/debuginfo.h | 77 +++++++++++++++++++++++++++++++++
16
accel/tcg/debuginfo.c | 96 ++++++++++++++++++++++++++++++++++++++++++
17
hw/core/loader.c | 5 +++
18
linux-user/elfload.c | 3 ++
19
accel/tcg/meson.build | 1 +
20
linux-user/meson.build | 1 +
21
7 files changed, 191 insertions(+)
22
create mode 100644 accel/tcg/debuginfo.h
23
create mode 100644 accel/tcg/debuginfo.c
24
25
diff --git a/meson.build b/meson.build
26
index XXXXXXX..XXXXXXX 100644
27
--- a/meson.build
28
+++ b/meson.build
29
@@ -XXX,XX +XXX,XX @@ if libbpf.found() and not cc.links('''
30
endif
31
endif
32
33
+# libdw
34
+libdw = dependency('libdw',
35
+ method: 'pkg-config',
36
+ kwargs: static_kwargs,
37
+ required: false)
38
+
39
#################
40
# config-host.h #
41
#################
42
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_DBUS_DISPLAY', dbus_display)
43
config_host_data.set('CONFIG_CFI', get_option('cfi'))
44
config_host_data.set('CONFIG_SELINUX', selinux.found())
45
config_host_data.set('CONFIG_XEN_BACKEND', xen.found())
46
+config_host_data.set('CONFIG_LIBDW', libdw.found())
47
if xen.found()
48
# protect from xen.version() having less than three components
49
xen_version = xen.version().split('.') + ['0', '0']
50
@@ -XXX,XX +XXX,XX @@ summary_info += {'libudev': libudev}
51
# Dummy dependency, keep .found()
52
summary_info += {'FUSE lseek': fuse_lseek.found()}
53
summary_info += {'selinux': selinux}
54
+summary_info += {'libdw': libdw}
55
summary(summary_info, bool_yn: true, section: 'Dependencies')
56
57
if not supported_cpus.contains(cpu)
58
diff --git a/accel/tcg/debuginfo.h b/accel/tcg/debuginfo.h
59
new file mode 100644
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/accel/tcg/debuginfo.h
63
@@ -XXX,XX +XXX,XX @@
64
+/*
65
+ * Debug information support.
66
+ *
67
+ * SPDX-License-Identifier: GPL-2.0-or-later
68
+ */
69
+
70
+#ifndef ACCEL_TCG_DEBUGINFO_H
71
+#define ACCEL_TCG_DEBUGINFO_H
72
+
73
+/*
74
+ * Debuginfo describing a certain address.
75
+ */
76
+struct debuginfo_query {
77
+ uint64_t address; /* Input: address. */
78
+ int flags; /* Input: debuginfo subset. */
79
+ const char *symbol; /* Symbol that the address is part of. */
80
+ uint64_t offset; /* Offset from the symbol. */
81
+ const char *file; /* Source file associated with the address. */
82
+ int line; /* Line number in the source file. */
83
+};
84
+
85
+/*
86
+ * Debuginfo subsets.
87
+ */
88
+#define DEBUGINFO_SYMBOL BIT(1)
89
+#define DEBUGINFO_LINE BIT(2)
90
+
91
+#if defined(CONFIG_TCG) && defined(CONFIG_LIBDW)
92
+/*
93
+ * Load debuginfo for the specified guest ELF image.
94
+ * Return true on success, false on failure.
95
+ */
96
+void debuginfo_report_elf(const char *name, int fd, uint64_t bias);
97
+
98
+/*
99
+ * Take the debuginfo lock.
100
+ */
101
+void debuginfo_lock(void);
102
+
103
+/*
104
+ * Fill each on N Qs with the debuginfo about Q->ADDRESS as specified by
105
+ * Q->FLAGS:
106
+ *
107
+ * - DEBUGINFO_SYMBOL: update Q->SYMBOL and Q->OFFSET. If symbol debuginfo is
108
+ * missing, then leave them as is.
109
+ * - DEBUINFO_LINE: update Q->FILE and Q->LINE. If line debuginfo is missing,
110
+ * then leave them as is.
111
+ *
112
+ * This function must be called under the debuginfo lock. The results can be
113
+ * accessed only until the debuginfo lock is released.
114
+ */
115
+void debuginfo_query(struct debuginfo_query *q, size_t n);
116
+
117
+/*
118
+ * Release the debuginfo lock.
119
+ */
120
+void debuginfo_unlock(void);
121
+#else
122
+static inline void debuginfo_report_elf(const char *image_name, int image_fd,
123
+ uint64_t load_bias)
124
+{
125
+}
126
+
127
+static inline void debuginfo_lock(void)
128
+{
129
+}
130
+
131
+static inline void debuginfo_query(struct debuginfo_query *q, size_t n)
132
+{
133
+}
134
+
135
+static inline void debuginfo_unlock(void)
136
+{
137
+}
138
+#endif
139
+
140
+#endif
141
diff --git a/accel/tcg/debuginfo.c b/accel/tcg/debuginfo.c
142
new file mode 100644
143
index XXXXXXX..XXXXXXX
144
--- /dev/null
145
+++ b/accel/tcg/debuginfo.c
146
@@ -XXX,XX +XXX,XX @@
147
+/*
148
+ * Debug information support.
149
+ *
150
+ * SPDX-License-Identifier: GPL-2.0-or-later
151
+ */
152
+
153
+#include "qemu/osdep.h"
154
+#include "qemu/lockable.h"
155
+
156
+#include <elfutils/libdwfl.h>
157
+
158
+#include "debuginfo.h"
159
+
160
+static QemuMutex lock;
161
+static Dwfl *dwfl;
162
+static const Dwfl_Callbacks dwfl_callbacks = {
163
+ .find_elf = NULL,
164
+ .find_debuginfo = dwfl_standard_find_debuginfo,
165
+ .section_address = NULL,
166
+ .debuginfo_path = NULL,
167
+};
168
+
169
+__attribute__((constructor))
170
+static void debuginfo_init(void)
171
+{
172
+ qemu_mutex_init(&lock);
173
+}
174
+
175
+void debuginfo_report_elf(const char *name, int fd, uint64_t bias)
176
+{
177
+ QEMU_LOCK_GUARD(&lock);
178
+
179
+ if (dwfl) {
180
+ dwfl_report_begin_add(dwfl);
181
+ } else {
182
+ dwfl = dwfl_begin(&dwfl_callbacks);
183
+ }
184
+
185
+ if (dwfl) {
186
+ dwfl_report_elf(dwfl, name, name, fd, bias, true);
187
+ dwfl_report_end(dwfl, NULL, NULL);
188
+ }
189
+}
190
+
191
+void debuginfo_lock(void)
192
+{
193
+ qemu_mutex_lock(&lock);
194
+}
195
+
196
+void debuginfo_query(struct debuginfo_query *q, size_t n)
197
+{
198
+ const char *symbol, *file;
199
+ Dwfl_Module *dwfl_module;
200
+ Dwfl_Line *dwfl_line;
201
+ GElf_Off dwfl_offset;
202
+ GElf_Sym dwfl_sym;
203
+ size_t i;
204
+ int line;
205
+
206
+ if (!dwfl) {
207
+ return;
208
+ }
209
+
210
+ for (i = 0; i < n; i++) {
211
+ dwfl_module = dwfl_addrmodule(dwfl, q[i].address);
212
+ if (!dwfl_module) {
213
+ continue;
214
+ }
215
+
216
+ if (q[i].flags & DEBUGINFO_SYMBOL) {
217
+ symbol = dwfl_module_addrinfo(dwfl_module, q[i].address,
218
+ &dwfl_offset, &dwfl_sym,
219
+ NULL, NULL, NULL);
220
+ if (symbol) {
221
+ q[i].symbol = symbol;
222
+ q[i].offset = dwfl_offset;
223
+ }
224
+ }
225
+
226
+ if (q[i].flags & DEBUGINFO_LINE) {
227
+ dwfl_line = dwfl_module_getsrc(dwfl_module, q[i].address);
228
+ if (dwfl_line) {
229
+ file = dwfl_lineinfo(dwfl_line, NULL, &line, 0, NULL, NULL);
230
+ if (file) {
231
+ q[i].file = file;
232
+ q[i].line = line;
233
+ }
234
+ }
235
+ }
236
+ }
237
+}
238
+
239
+void debuginfo_unlock(void)
240
+{
241
+ qemu_mutex_unlock(&lock);
242
+}
243
diff --git a/hw/core/loader.c b/hw/core/loader.c
244
index XXXXXXX..XXXXXXX 100644
245
--- a/hw/core/loader.c
246
+++ b/hw/core/loader.c
247
@@ -XXX,XX +XXX,XX @@
248
#include "hw/boards.h"
249
#include "qemu/cutils.h"
250
#include "sysemu/runstate.h"
251
+#include "accel/tcg/debuginfo.h"
252
253
#include <zlib.h>
254
255
@@ -XXX,XX +XXX,XX @@ ssize_t load_elf_ram_sym(const char *filename,
256
clear_lsb, data_swab, as, load_rom, sym_cb);
257
}
258
259
+ if (ret != ELF_LOAD_FAILED) {
260
+ debuginfo_report_elf(filename, fd, 0);
261
+ }
262
+
263
fail:
264
close(fd);
265
return ret;
266
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/linux-user/elfload.c
269
+++ b/linux-user/elfload.c
270
@@ -XXX,XX +XXX,XX @@
271
#include "qemu/selfmap.h"
272
#include "qapi/error.h"
273
#include "target_signal.h"
274
+#include "accel/tcg/debuginfo.h"
275
276
#ifdef _ARCH_PPC64
277
#undef ARCH_DLINFO
278
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
279
load_symbols(ehdr, image_fd, load_bias);
280
}
281
282
+ debuginfo_report_elf(image_name, image_fd, load_bias);
283
+
284
mmap_unlock();
285
286
close(image_fd);
287
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
288
index XXXXXXX..XXXXXXX 100644
289
--- a/accel/tcg/meson.build
290
+++ b/accel/tcg/meson.build
291
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
292
tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
293
tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
294
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
295
+tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
296
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
297
298
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
299
diff --git a/linux-user/meson.build b/linux-user/meson.build
300
index XXXXXXX..XXXXXXX 100644
301
--- a/linux-user/meson.build
302
+++ b/linux-user/meson.build
303
@@ -XXX,XX +XXX,XX @@ linux_user_ss.add(files(
304
'uname.c',
305
))
306
linux_user_ss.add(rt)
307
+linux_user_ss.add(libdw)
308
309
linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c'))
310
linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c'))
311
--
312
2.34.1
diff view generated by jsdifflib