We can call tb_htable_lookup even when the tb_jmp_cache
is completely empty. Therefore, un-nest most of the code
dependent on tb != NULL from the read from the cache.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg-runtime.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/tcg-runtime.c b/tcg-runtime.c
index 7fa90ce..a35c725 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -147,30 +147,32 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
{
CPUState *cpu = ENV_GET_CPU(env);
+ void *ret = tcg_ctx.code_gen_epilogue;
TranslationBlock *tb;
target_ulong cs_base, pc;
- uint32_t flags;
-
- tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
- if (likely(tb)) {
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- if (likely(tb->pc == addr && tb->cs_base == cs_base &&
- tb->flags == flags)) {
- goto found;
- }
+ uint32_t flags, addr_hash;
+
+ addr_hash = tb_jmp_cache_hash_func(addr);
+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+ if (unlikely(!(tb
+ && tb->pc == addr
+ && tb->cs_base == cs_base
+ && tb->flags == flags))) {
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
- if (likely(tb)) {
- atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
- goto found;
+ if (!tb) {
+ return ret;
}
+ atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
}
- return tcg_ctx.code_gen_epilogue;
- found:
+
+ ret = tb->tc_ptr;
qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
"Chain %p [%d: " TARGET_FMT_lx "] %s\n",
- tb->tc_ptr, cpu->cpu_index, addr,
+ ret, cpu->cpu_index, addr,
lookup_symbol(addr));
- return tb->tc_ptr;
+ return ret;
}
void HELPER(exit_atomic)(CPUArchState *env)
--
2.9.4
Richard Henderson <rth@twiddle.net> writes:
> We can call tb_htable_lookup even when the tb_jmp_cache
> is completely empty. Therefore, un-nest most of the code
> dependent on tb != NULL from the read from the cache.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> ---
> tcg-runtime.c | 34 ++++++++++++++++++----------------
> 1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 7fa90ce..a35c725 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -147,30 +147,32 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
> void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
> {
> CPUState *cpu = ENV_GET_CPU(env);
> + void *ret = tcg_ctx.code_gen_epilogue;
> TranslationBlock *tb;
> target_ulong cs_base, pc;
> - uint32_t flags;
> -
> - tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> - if (likely(tb)) {
> - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> - if (likely(tb->pc == addr && tb->cs_base == cs_base &&
> - tb->flags == flags)) {
> - goto found;
> - }
> + uint32_t flags, addr_hash;
> +
> + addr_hash = tb_jmp_cache_hash_func(addr);
> + tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
> + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +
> + if (unlikely(!(tb
> + && tb->pc == addr
> + && tb->cs_base == cs_base
> + && tb->flags == flags))) {
> tb = tb_htable_lookup(cpu, addr, cs_base, flags);
> - if (likely(tb)) {
> - atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
> - goto found;
> + if (!tb) {
> + return ret;
> }
> + atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
> }
> - return tcg_ctx.code_gen_epilogue;
> - found:
> +
> + ret = tb->tc_ptr;
> qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
> "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
> - tb->tc_ptr, cpu->cpu_index, addr,
> + ret, cpu->cpu_index, addr,
> lookup_symbol(addr));
> - return tb->tc_ptr;
> + return ret;
> }
>
> void HELPER(exit_atomic)(CPUArchState *env)
--
Alex Bennée
On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote:
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
(snip)
> tb = tb_htable_lookup(cpu, addr, cs_base, flags);
> - if (likely(tb)) {
> - atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
> - goto found;
> + if (!tb) {
> + return ret;
While booting debian-arm I'm measuring (see below) a hit rate in the
high 90's for the htable lookups here, so adding an 'unlikely()'
hint would be a good idea.
[ I'm using the patch currently in your tcg-next branch, i.e. 0a2adf4e2 ]
E.
The below prints during bootup+shutdown:
hit rate: 94.599500%
hit rate: 95.794100%
hit rate: 96.617900%
hit rate: 96.527275%
hit rate: 96.341320%
hit rate: 96.641883%
diff --git a/tcg-runtime.c b/tcg-runtime.c
index ec3a34e..addc3fa 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -150,6 +150,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags, addr_hash;
+ static unsigned long long accesses, found;
addr_hash = tb_jmp_cache_hash_func(addr);
tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
@@ -159,11 +160,16 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
&& tb->pc == addr
&& tb->cs_base == cs_base
&& tb->flags == flags))) {
+ accesses++;
+ if (!(accesses % 1000000)) {
+ fprintf(stderr, "hit rate: %f%%\n", (double)found * 100 / accesses);
+ }
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
if (!tb) {
return tcg_ctx.code_gen_epilogue;
}
atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
+ found++;
}
qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
On 06/16/2017 01:19 PM, Emilio G. Cota wrote: > While booting debian-arm I'm measuring (see below) a hit rate in the > high 90's for the htable lookups here, so adding an 'unlikely()' > hint would be a good idea. GCC already predicts null pointers unlikely. We get identical code with and without the hint. r~
On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote:
> We can call tb_htable_lookup even when the tb_jmp_cache
> is completely empty. Therefore, un-nest most of the code
> dependent on tb != NULL from the read from the cache.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
I just wrote this alternative patch, which does the same thing
as yours. I also measured what the effect of this change
has on the hit rate of lookup_tb_ptr. Feel free to reuse parts
of the patch and/or the commit message!
Thanks,
E.
--- 8< ---
Strangely, we do not look up the tb in the global hash table
when we get NULL from tb_jmp_cache.
Fix it, which improves the hit rate of lookup_tb_ptr; for instance,
when booting and immediately shutting down debian-arm, the hit
rate improves from
93.150742% (before this patch)
to
99.451323 % (after).
While at it, use a variable for the tb_jmp_cache hash and get rid
of the goto's.
Suggested-by: Richard Henderson <rth@twiddle.net>
Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
tcg-runtime.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/tcg-runtime.c b/tcg-runtime.c
index 7fa90ce..09324b9 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -149,23 +149,19 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
CPUState *cpu = ENV_GET_CPU(env);
TranslationBlock *tb;
target_ulong cs_base, pc;
+ unsigned int hash = tb_jmp_cache_hash_func(addr);
uint32_t flags;
- tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
- if (likely(tb)) {
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- if (likely(tb->pc == addr && tb->cs_base == cs_base &&
- tb->flags == flags)) {
- goto found;
- }
+ tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ if (unlikely(tb == NULL || tb->pc != addr || tb->cs_base != cs_base ||
+ tb->flags != flags)) {
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
- if (likely(tb)) {
- atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
- goto found;
+ if (unlikely(tb == NULL)) {
+ return tcg_ctx.code_gen_epilogue;
}
+ atomic_set(&cpu->tb_jmp_cache[hash], tb);
}
- return tcg_ctx.code_gen_epilogue;
- found:
qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
"Chain %p [%d: " TARGET_FMT_lx "] %s\n",
tb->tc_ptr, cpu->cpu_index, addr,
--
2.7.4
On 06/14/2017 01:27 PM, Emilio G. Cota wrote: > On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote: >> We can call tb_htable_lookup even when the tb_jmp_cache >> is completely empty. Therefore, un-nest most of the code >> dependent on tb != NULL from the read from the cache. >> >> Signed-off-by: Richard Henderson <rth@twiddle.net> > > I just wrote this alternative patch, which does the same thing > as yours. I also measured what the effect of this change > has on the hit rate of lookup_tb_ptr. Feel free to reuse parts > of the patch and/or the commit message! Thanks. I'll adjust the commit. r~
© 2016 - 2025 Red Hat, Inc.