Now we no longer have dynamic state affecting things we can remove the
additional fields in cpu.h and simplify the TB hash calculation.
For the benchmark:
hyperfine -w 2 -m 20 \
"./arm-softmmu/qemu-system-arm -cpu cortex-a15 \
-machine type=virt,highmem=off \
-display none -m 2048 \
-serial mon:stdio \
-netdev user,id=unet,hostfwd=tcp::2222-:22 \
-device virtio-net-pci,netdev=unet \
-device virtio-scsi-pci \
-blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf \
-device scsi-hd,drive=hd -smp 4 \
-kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage \
-append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' \
-snapshot"
It has a marginal effect on runtime, before:
Time (mean ± σ): 26.279 s ± 2.438 s [User: 41.113 s, System: 1.843 s]
Range (min … max): 24.420 s … 32.565 s 20 runs
after:
Time (mean ± σ): 24.440 s ± 2.885 s [User: 34.474 s, System: 2.028 s]
Range (min … max): 21.663 s … 29.937 s 20 runs
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1358
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20230524133952.3971948-9-alex.bennee@linaro.org>
---
accel/tcg/tb-hash.h | 6 +++---
include/exec/exec-all.h | 3 ---
include/hw/core/cpu.h | 5 -----
accel/tcg/cpu-exec.c | 7 +------
accel/tcg/tb-maint.c | 5 ++---
accel/tcg/translate-all.c | 6 ------
6 files changed, 6 insertions(+), 26 deletions(-)
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..1d19c69caa 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -61,10 +61,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
#endif /* CONFIG_SOFTMMU */
static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
- uint32_t cf_mask, uint32_t trace_vcpu_dstate)
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+ uint32_t flags, uint32_t cf_mask)
{
- return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
+ return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
}
#endif
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d2b151986..3b1b57f6ad 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -545,9 +545,6 @@ struct TranslationBlock {
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
- /* Per-vCPU dynamic tracing state used to generate this TB */
- uint32_t trace_vcpu_dstate;
-
/*
* Above fields used for comparing
*/
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 39150cf8f8..383456d1b3 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
struct qemu_work_item;
#define CPU_UNSET_NUMA_NODE_ID -1
-#define CPU_TRACE_DSTATE_MAX_EVENTS 32
/**
* CPUState:
@@ -407,10 +406,6 @@ struct CPUState {
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;
- /* Used for events with 'vcpu' and *without* the 'disabled' properties */
- DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
- DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
-
DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
#ifdef CONFIG_PLUGIN
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..4a1dce98ff 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -175,7 +175,6 @@ struct tb_desc {
tb_page_addr_t page_addr0;
uint32_t flags;
uint32_t cflags;
- uint32_t trace_vcpu_dstate;
};
static bool tb_lookup_cmp(const void *p, const void *d)
@@ -187,7 +186,6 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb_page_addr0(tb) == desc->page_addr0 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
- tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
@@ -228,7 +226,6 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.cs_base = cs_base;
desc.flags = flags;
desc.cflags = cflags;
- desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
if (phys_pc == -1) {
@@ -236,7 +233,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
}
desc.page_addr0 = phys_pc;
h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
- flags, cflags, *cpu->trace_dstate);
+ flags, cflags);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@@ -263,7 +260,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
jc->array[hash].pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
- tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}
@@ -282,7 +278,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
- tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..bc1961ea55 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -50,7 +50,6 @@ static bool tb_cmp(const void *ap, const void *bp)
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
tb_page_addr0(a) == tb_page_addr0(b) &&
tb_page_addr1(a) == tb_page_addr1(b));
}
@@ -888,7 +887,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb_page_addr0(tb);
h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
- tb->flags, orig_cflags, tb->trace_vcpu_dstate);
+ tb->flags, orig_cflags);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
}
@@ -969,7 +968,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
/* add in the hash table */
h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
- tb->flags, tb->cflags, tb->trace_vcpu_dstate);
+ tb->flags, tb->cflags);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* remove TB from the page(s) if we couldn't insert it */
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c87648b99e..bf814b9e81 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -65,11 +65,6 @@
#include "internal.h"
#include "perf.h"
-/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
-QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
- sizeof_field(TranslationBlock, trace_vcpu_dstate)
- * BITS_PER_BYTE);
-
TBContext tb_ctx;
/*
@@ -352,7 +347,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
- tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
tcg_ctx->gen_tb = tb;
--
2.39.2