[Qemu-devel] [PATCH v2 1/5] tcg: Refactor helper_lookup_tb_ptr

Richard Henderson posted 5 patches 8 years, 4 months ago
[Qemu-devel] [PATCH v2 1/5] tcg: Refactor helper_lookup_tb_ptr
Posted by Richard Henderson 8 years, 4 months ago
We can call tb_htable_lookup even when the tb_jmp_cache
is completely empty.  Therefore, un-nest most of the code
dependent on tb != NULL from the read from the cache.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg-runtime.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/tcg-runtime.c b/tcg-runtime.c
index 7fa90ce..a35c725 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -147,30 +147,32 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
 void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
 {
     CPUState *cpu = ENV_GET_CPU(env);
+    void *ret = tcg_ctx.code_gen_epilogue;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
-    uint32_t flags;
-
-    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
-    if (likely(tb)) {
-        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-        if (likely(tb->pc == addr && tb->cs_base == cs_base &&
-                   tb->flags == flags)) {
-            goto found;
-        }
+    uint32_t flags, addr_hash;
+
+    addr_hash = tb_jmp_cache_hash_func(addr);
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    if (unlikely(!(tb
+                   && tb->pc == addr
+                   && tb->cs_base == cs_base
+                   && tb->flags == flags))) {
         tb = tb_htable_lookup(cpu, addr, cs_base, flags);
-        if (likely(tb)) {
-            atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
-            goto found;
+        if (!tb) {
+            return ret;
         }
+        atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
     }
-    return tcg_ctx.code_gen_epilogue;
- found:
+
+    ret = tb->tc_ptr;
     qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
                            "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
-                           tb->tc_ptr, cpu->cpu_index, addr,
+                           ret, cpu->cpu_index, addr,
                            lookup_symbol(addr));
-    return tb->tc_ptr;
+    return ret;
 }
 
 void HELPER(exit_atomic)(CPUArchState *env)
-- 
2.9.4


Re: [Qemu-devel] [PATCH v2 1/5] tcg: Refactor helper_lookup_tb_ptr
Posted by Alex Bennée 8 years, 4 months ago
Richard Henderson <rth@twiddle.net> writes:

> We can call tb_htable_lookup even when the tb_jmp_cache
> is completely empty.  Therefore, un-nest most of the code
> dependent on tb != NULL from the read from the cache.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  tcg-runtime.c | 34 ++++++++++++++++++----------------
>  1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 7fa90ce..a35c725 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -147,30 +147,32 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
>  void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
>  {
>      CPUState *cpu = ENV_GET_CPU(env);
> +    void *ret = tcg_ctx.code_gen_epilogue;
>      TranslationBlock *tb;
>      target_ulong cs_base, pc;
> -    uint32_t flags;
> -
> -    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
> -    if (likely(tb)) {
> -        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> -        if (likely(tb->pc == addr && tb->cs_base == cs_base &&
> -                   tb->flags == flags)) {
> -            goto found;
> -        }
> +    uint32_t flags, addr_hash;
> +
> +    addr_hash = tb_jmp_cache_hash_func(addr);
> +    tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +
> +    if (unlikely(!(tb
> +                   && tb->pc == addr
> +                   && tb->cs_base == cs_base
> +                   && tb->flags == flags))) {
>          tb = tb_htable_lookup(cpu, addr, cs_base, flags);
> -        if (likely(tb)) {
> -            atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
> -            goto found;
> +        if (!tb) {
> +            return ret;
>          }
> +        atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
>      }
> -    return tcg_ctx.code_gen_epilogue;
> - found:
> +
> +    ret = tb->tc_ptr;
>      qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
>                             "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
> -                           tb->tc_ptr, cpu->cpu_index, addr,
> +                           ret, cpu->cpu_index, addr,
>                             lookup_symbol(addr));
> -    return tb->tc_ptr;
> +    return ret;
>  }
>
>  void HELPER(exit_atomic)(CPUArchState *env)


--
Alex Bennée

Re: [Qemu-devel] [PATCH v2 1/5] tcg: Refactor helper_lookup_tb_ptr
Posted by Emilio G. Cota 8 years, 4 months ago
On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote:
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
(snip)
>          tb = tb_htable_lookup(cpu, addr, cs_base, flags);
> -        if (likely(tb)) {
> -            atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
> -            goto found;
> +        if (!tb) {
> +            return ret;

While booting debian-arm I'm measuring (see below) a hit rate in the
high 90's for the htable lookups here, so adding an 'unlikely()'
hint would be a good idea.

[ I'm using the patch currently in your tcg-next branch, i.e. 0a2adf4e2 ]

		E.

The below prints during bootup+shutdown:

hit rate: 94.599500%
hit rate: 95.794100%
hit rate: 96.617900%
hit rate: 96.527275%
hit rate: 96.341320%
hit rate: 96.641883%


diff --git a/tcg-runtime.c b/tcg-runtime.c
index ec3a34e..addc3fa 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -150,6 +150,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags, addr_hash;
+    static unsigned long long accesses, found;

     addr_hash = tb_jmp_cache_hash_func(addr);
     tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
@@ -159,11 +160,16 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
                    && tb->pc == addr
                    && tb->cs_base == cs_base
                    && tb->flags == flags))) {
+        accesses++;
+        if (!(accesses % 1000000)) {
+            fprintf(stderr, "hit rate: %f%%\n", (double)found * 100 / accesses);
+        }
         tb = tb_htable_lookup(cpu, addr, cs_base, flags);
         if (!tb) {
             return tcg_ctx.code_gen_epilogue;
         }
         atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
+        found++;
     }

     qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,

Re: [Qemu-devel] [PATCH v2 1/5] tcg: Refactor helper_lookup_tb_ptr
Posted by Richard Henderson 8 years, 4 months ago
On 06/16/2017 01:19 PM, Emilio G. Cota wrote:
> While booting debian-arm I'm measuring (see below) a hit rate in the
> high 90's for the htable lookups here, so adding an 'unlikely()'
> hint would be a good idea.

GCC already predicts null pointers unlikely.
We get identical code with and without the hint.


r~

[Qemu-devel] [PATCH] tcg-runtime: increase hit rate of lookup_tb_ptr
Posted by Emilio G. Cota 8 years, 4 months ago
On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote:
> We can call tb_htable_lookup even when the tb_jmp_cache
> is completely empty.  Therefore, un-nest most of the code
> dependent on tb != NULL from the read from the cache.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>

I just wrote this alternative patch, which does the same thing
as yours. I also measured what the effect of this change
has on the hit rate of lookup_tb_ptr. Feel free to reuse parts
of the patch and/or the commit message!

Thanks,

		E.

--- 8< ---

Strangely, we do not look up the tb in the global hash table
when we get NULL from tb_jmp_cache.

Fix it, which improves the hit rate of lookup_tb_ptr; for instance,
when booting and immediately shutting down debian-arm, the hit
rate improves from
	93.150742% (before this patch)
to
	99.451323 % (after).

While at it, use a variable for the tb_jmp_cache hash and get rid
of the goto's.

Suggested-by: Richard Henderson <rth@twiddle.net>
Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tcg-runtime.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/tcg-runtime.c b/tcg-runtime.c
index 7fa90ce..09324b9 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -149,23 +149,19 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
     CPUState *cpu = ENV_GET_CPU(env);
     TranslationBlock *tb;
     target_ulong cs_base, pc;
+    unsigned int hash = tb_jmp_cache_hash_func(addr);
     uint32_t flags;
 
-    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
-    if (likely(tb)) {
-        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-        if (likely(tb->pc == addr && tb->cs_base == cs_base &&
-                   tb->flags == flags)) {
-            goto found;
-        }
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+    if (unlikely(tb == NULL || tb->pc != addr || tb->cs_base != cs_base ||
+               tb->flags != flags)) {
         tb = tb_htable_lookup(cpu, addr, cs_base, flags);
-        if (likely(tb)) {
-            atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
-            goto found;
+        if (unlikely(tb == NULL)) {
+            return tcg_ctx.code_gen_epilogue;
         }
+        atomic_set(&cpu->tb_jmp_cache[hash], tb);
     }
-    return tcg_ctx.code_gen_epilogue;
- found:
     qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
                            "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
                            tb->tc_ptr, cpu->cpu_index, addr,
-- 
2.7.4


Re: [Qemu-devel] [PATCH] tcg-runtime: increase hit rate of lookup_tb_ptr
Posted by Richard Henderson 8 years, 4 months ago
On 06/14/2017 01:27 PM, Emilio G. Cota wrote:
> On Wed, Jun 14, 2017 at 12:48:17 -0700, Richard Henderson wrote:
>> We can call tb_htable_lookup even when the tb_jmp_cache
>> is completely empty.  Therefore, un-nest most of the code
>> dependent on tb != NULL from the read from the cache.
>>
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
> 
> I just wrote this alternative patch, which does the same thing
> as yours. I also measured what the effect of this change
> has on the hit rate of lookup_tb_ptr. Feel free to reuse parts
> of the patch and/or the commit message!

Thanks.  I'll adjust the commit.


r~