testing and tcg tweaks

[PATCH v1 10/10] translate-all: include guest address in out_asm output

Posted by Alex Bennée 5 years, 9 months ago

We already have information about where each guest instructions
representation starts stored in the tcg_ctx->gen_insn_data so we can
rectify the PC for faults. We can re-use this information to annotate
the out_asm output with guest instruction address which makes it a bit
easier to work out where you are especially with longer blocks. A
minor wrinkle is that some instructions get optimised away so we have
to scan forward until we find some actual generated code.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v1
  - better logic for doing chunk at a time
  - use new "note" facility to tag address
  - rewrite the commit log
v2
  - don't terminate gen_insn_end_off, trust your termination
    conditions ;-)
---
 accel/tcg/translate-all.c | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index cdf58bb420e..42ce1dfcff7 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1794,14 +1794,43 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(tb->pc)) {
         FILE *logfile = qemu_log_lock();
+        int code_size, data_size = 0;
+        g_autoptr(GString) note = g_string_new("[tb header & initial instruction]");
+        size_t chunk_start = 0;
+        int insn = 0;
         qemu_log("OUT: [size=%d]\n", gen_code_size);
         if (tcg_ctx->data_gen_ptr) {
-            size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
-            size_t data_size = gen_code_size - code_size;
-            size_t i;
+            code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
+            data_size = gen_code_size - code_size;
+        } else {
+            code_size = gen_code_size;
+        }
 
-            log_disas(tb->tc.ptr, code_size, NULL);
+        /* Dump header and the first instruction */
+        chunk_start = tcg_ctx->gen_insn_end_off[insn];
+        log_disas(tb->tc.ptr, chunk_start, note->str);
 
+        /*
+         * Dump each instruction chunk, wrapping up empty chunks into
+         * the next instruction. The whole array is offset so the
+         * first entry is the beginning of the 2nd instruction.
+         */
+        while (insn <= tb->icount && chunk_start < code_size) {
+            size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
+            if (chunk_end > chunk_start) {
+                g_string_printf(note, "[guest addr: " TARGET_FMT_lx "]",
+                                tcg_ctx->gen_insn_data[insn][0]);
+                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start,
+                          note->str);
+                chunk_start = chunk_end;
+            }
+            insn++;
+        }
+
+        /* Finally dump any data we may have after the block */
+        if (data_size) {
+            int i;
+            qemu_log("  data: [size=%d]\n", data_size);
             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
                 if (sizeof(tcg_target_ulong) == 8) {
                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
@@ -1813,8 +1842,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
                              *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
                 }
             }
-        } else {
-            log_disas(tb->tc.ptr, gen_code_size, NULL);
         }
         qemu_log("\n");
         qemu_log_flush();
-- 
2.20.1

Re: [PATCH v1 10/10] translate-all: include guest address in out_asm output

Posted by Philippe Mathieu-Daudé 5 years, 8 months ago

Hi Alex,

On 5/13/20 7:51 PM, Alex Bennée wrote:
> We already have information about where each guest instructions
> representation starts stored in the tcg_ctx->gen_insn_data so we can
> rectify the PC for faults. We can re-use this information to annotate
> the out_asm output with guest instruction address which makes it a bit
> easier to work out where you are especially with longer blocks. A
> minor wrinkle is that some instructions get optimised away so we have
> to scan forward until we find some actual generated code.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> 
> ---
> v1
>    - better logic for doing chunk at a time
>    - use new "note" facility to tag address
>    - rewrite the commit log
> v2
>    - don't terminate gen_insn_end_off, trust your termination
>      conditions ;-)
> ---
>   accel/tcg/translate-all.c | 39 +++++++++++++++++++++++++++++++++------
>   1 file changed, 33 insertions(+), 6 deletions(-)
> 
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index cdf58bb420e..42ce1dfcff7 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1794,14 +1794,43 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>       if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
>           qemu_log_in_addr_range(tb->pc)) {
>           FILE *logfile = qemu_log_lock();
> +        int code_size, data_size = 0;
> +        g_autoptr(GString) note = g_string_new("[tb header & initial instruction]");
> +        size_t chunk_start = 0;
> +        int insn = 0;
>           qemu_log("OUT: [size=%d]\n", gen_code_size);
>           if (tcg_ctx->data_gen_ptr) {
> -            size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
> -            size_t data_size = gen_code_size - code_size;
> -            size_t i;
> +            code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
> +            data_size = gen_code_size - code_size;
> +        } else {
> +            code_size = gen_code_size;
> +        }
>   
> -            log_disas(tb->tc.ptr, code_size, NULL);
> +        /* Dump header and the first instruction */
> +        chunk_start = tcg_ctx->gen_insn_end_off[insn];
> +        log_disas(tb->tc.ptr, chunk_start, note->str);
>   
> +        /*
> +         * Dump each instruction chunk, wrapping up empty chunks into
> +         * the next instruction. The whole array is offset so the
> +         * first entry is the beginning of the 2nd instruction.
> +         */
> +        while (insn <= tb->icount && chunk_start < code_size) {
> +            size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
> +            if (chunk_end > chunk_start) {
> +                g_string_printf(note, "[guest addr: " TARGET_FMT_lx "]",
> +                                tcg_ctx->gen_insn_data[insn][0]);
> +                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start,
> +                          note->str);
> +                chunk_start = chunk_end;
> +            }
> +            insn++;
> +        }
> +
> +        /* Finally dump any data we may have after the block */
> +        if (data_size) {

It seems we can simplify checking tcg_ctx->data_gen_ptr here, and 
declaring data_size in this reduced scope. Doing so as a preliminary 
patch makes the rest of this patch easier to review. What do you think?

> +            int i;
> +            qemu_log("  data: [size=%d]\n", data_size);
>               for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
>                   if (sizeof(tcg_target_ulong) == 8) {
>                       qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
> @@ -1813,8 +1842,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>                                *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
>                   }
>               }
> -        } else {
> -            log_disas(tb->tc.ptr, gen_code_size, NULL);
>           }
>           qemu_log("\n");
>           qemu_log_flush();
>