[PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c

Richard Henderson posted 42 patches 2 weeks, 1 day ago
[PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c
Posted by Richard Henderson 2 weeks, 1 day ago
Cache the mmu index in DisasContextBase.
Perform the read on host endianness, which lets us
share code with the translator_ld fast path.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/translator.h |  1 +
 accel/tcg/translator.c    | 58 ++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index d70942a10f..205dd85bba 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -73,6 +73,7 @@ struct DisasContextBase {
     int max_insns;
     bool plugin_enabled;
     bool fake_insn;
+    uint8_t code_mmuidx;
     struct TCGOp *insn_start;
     void *host_addr[2];
 
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 157be33bf6..6fd9237298 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,10 +11,10 @@
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "exec/exec-all.h"
+#include "exec/cpu-ldst-common.h"
+#include "exec/cpu-mmu-index.h"
 #include "exec/translator.h"
-#include "exec/cpu_ldst.h"
 #include "exec/plugin-gen.h"
-#include "exec/cpu_ldst.h"
 #include "exec/tswap.h"
 #include "tcg/tcg-op-common.h"
 #include "internal-target.h"
@@ -142,6 +142,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
     db->host_addr[1] = NULL;
     db->record_start = 0;
     db->record_len = 0;
+    db->code_mmuidx = cpu_mmu_index(cpu, true);
 
     ops->init_disas_context(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
@@ -457,55 +458,50 @@ bool translator_st(const DisasContextBase *db, void *dest,
 
 uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, vaddr pc)
 {
-    uint8_t raw;
+    uint8_t val;
 
-    if (!translator_ld(env, db, &raw, pc, sizeof(raw))) {
-        raw = cpu_ldub_code(env, pc);
-        record_save(db, pc, &raw, sizeof(raw));
+    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+        MemOpIdx oi = make_memop_idx(MO_UB, db->code_mmuidx);
+        val = cpu_ldb_code_mmu(env, pc, oi, 0);
+        record_save(db, pc, &val, sizeof(val));
     }
-    return raw;
+    return val;
 }
 
 uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, vaddr pc)
 {
-    uint16_t raw, tgt;
+    uint16_t val;
 
-    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
-        tgt = tswap16(raw);
-    } else {
-        tgt = cpu_lduw_code(env, pc);
-        raw = tswap16(tgt);
-        record_save(db, pc, &raw, sizeof(raw));
+    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+        MemOpIdx oi = make_memop_idx(MO_UW, db->code_mmuidx);
+        val = cpu_ldw_code_mmu(env, pc, oi, 0);
+        record_save(db, pc, &val, sizeof(val));
     }
-    return tgt;
+    return tswap16(val);
 }
 
 uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, vaddr pc)
 {
-    uint32_t raw, tgt;
+    uint32_t val;
 
-    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
-        tgt = tswap32(raw);
-    } else {
-        tgt = cpu_ldl_code(env, pc);
-        raw = tswap32(tgt);
-        record_save(db, pc, &raw, sizeof(raw));
+    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+        MemOpIdx oi = make_memop_idx(MO_UL, db->code_mmuidx);
+        val = cpu_ldl_code_mmu(env, pc, oi, 0);
+        record_save(db, pc, &val, sizeof(val));
     }
-    return tgt;
+    return tswap32(val);
 }
 
 uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
 {
-    uint64_t raw, tgt;
+    uint64_t val;
 
-    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
-        tgt = tswap64(raw);
-    } else {
-        tgt = cpu_ldq_code(env, pc);
-        raw = tswap64(tgt);
-        record_save(db, pc, &raw, sizeof(raw));
+    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+        MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
+        val = cpu_ldq_code_mmu(env, pc, oi, 0);
+        record_save(db, pc, &val, sizeof(val));
     }
-    return tgt;
+    return tswap64(val);
 }
 
 void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)
-- 
2.43.0
Re: [PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c
Posted by Pierrick Bouvier 2 weeks, 1 day ago
On 3/18/25 14:31, Richard Henderson wrote:
> Cache the mmu index in DisasContextBase.
> Perform the read on host endianness, which lets us
> share code with the translator_ld fast path.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   include/exec/translator.h |  1 +
>   accel/tcg/translator.c    | 58 ++++++++++++++++++---------------------
>   2 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/include/exec/translator.h b/include/exec/translator.h
> index d70942a10f..205dd85bba 100644
> --- a/include/exec/translator.h
> +++ b/include/exec/translator.h
> @@ -73,6 +73,7 @@ struct DisasContextBase {
>       int max_insns;
>       bool plugin_enabled;
>       bool fake_insn;
> +    uint8_t code_mmuidx;
>       struct TCGOp *insn_start;
>       void *host_addr[2];
>   
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 157be33bf6..6fd9237298 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -11,10 +11,10 @@
>   #include "qemu/log.h"
>   #include "qemu/error-report.h"
>   #include "exec/exec-all.h"
> +#include "exec/cpu-ldst-common.h"
> +#include "exec/cpu-mmu-index.h"
>   #include "exec/translator.h"
> -#include "exec/cpu_ldst.h"
>   #include "exec/plugin-gen.h"
> -#include "exec/cpu_ldst.h"
>   #include "exec/tswap.h"
>   #include "tcg/tcg-op-common.h"
>   #include "internal-target.h"
> @@ -142,6 +142,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
>       db->host_addr[1] = NULL;
>       db->record_start = 0;
>       db->record_len = 0;
> +    db->code_mmuidx = cpu_mmu_index(cpu, true);
>   
>       ops->init_disas_context(db, cpu);
>       tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> @@ -457,55 +458,50 @@ bool translator_st(const DisasContextBase *db, void *dest,
>   
>   uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, vaddr pc)
>   {
> -    uint8_t raw;
> +    uint8_t val;
>   
> -    if (!translator_ld(env, db, &raw, pc, sizeof(raw))) {
> -        raw = cpu_ldub_code(env, pc);
> -        record_save(db, pc, &raw, sizeof(raw));
> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> +        MemOpIdx oi = make_memop_idx(MO_UB, db->code_mmuidx);
> +        val = cpu_ldb_code_mmu(env, pc, oi, 0);
> +        record_save(db, pc, &val, sizeof(val));
>       }
> -    return raw;
> +    return val;
>   }
>   
>   uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, vaddr pc)
>   {
> -    uint16_t raw, tgt;
> +    uint16_t val;
>   
> -    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> -        tgt = tswap16(raw);
> -    } else {
> -        tgt = cpu_lduw_code(env, pc);
> -        raw = tswap16(tgt);
> -        record_save(db, pc, &raw, sizeof(raw));
> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> +        MemOpIdx oi = make_memop_idx(MO_UW, db->code_mmuidx);
> +        val = cpu_ldw_code_mmu(env, pc, oi, 0);
> +        record_save(db, pc, &val, sizeof(val));
>       }
> -    return tgt;
> +    return tswap16(val);
>   }
>   
>   uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, vaddr pc)
>   {
> -    uint32_t raw, tgt;
> +    uint32_t val;
>   
> -    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> -        tgt = tswap32(raw);
> -    } else {
> -        tgt = cpu_ldl_code(env, pc);
> -        raw = tswap32(tgt);
> -        record_save(db, pc, &raw, sizeof(raw));
> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> +        MemOpIdx oi = make_memop_idx(MO_UL, db->code_mmuidx);
> +        val = cpu_ldl_code_mmu(env, pc, oi, 0);
> +        record_save(db, pc, &val, sizeof(val));
>       }
> -    return tgt;
> +    return tswap32(val);
>   }
>   
>   uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
>   {
> -    uint64_t raw, tgt;
> +    uint64_t val;
>   
> -    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> -        tgt = tswap64(raw);
> -    } else {
> -        tgt = cpu_ldq_code(env, pc);
> -        raw = tswap64(tgt);
> -        record_save(db, pc, &raw, sizeof(raw));
> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> +        MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
> +        val = cpu_ldq_code_mmu(env, pc, oi, 0);
> +        record_save(db, pc, &val, sizeof(val));
>       }
> -    return tgt;
> +    return tswap64(val);
>   }
>   
>   void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)

If I understand correctly, cpu_ldq_code_mmu performs the tswap call we 
used to before. However, we now call it again when returning the final 
value, leading to tswap(tswap(val)).

Is that expected?
Re: [PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c
Posted by Richard Henderson 1 week, 6 days ago
On 3/18/25 17:23, Pierrick Bouvier wrote:
>>   uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
>>   {
>> -    uint64_t raw, tgt;
>> +    uint64_t val;
>> -    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
>> -        tgt = tswap64(raw);
>> -    } else {
>> -        tgt = cpu_ldq_code(env, pc);
>> -        raw = tswap64(tgt);
>> -        record_save(db, pc, &raw, sizeof(raw));
>> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
>> +        MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
>> +        val = cpu_ldq_code_mmu(env, pc, oi, 0);
>> +        record_save(db, pc, &val, sizeof(val));
>>       }
>> -    return tgt;
>> +    return tswap64(val);
>>   }
>>   void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)
> 
> If I understand correctly, cpu_ldq_code_mmu performs the tswap call we used to before. 

Incorrect: cpu_ldq_code_mmu has no tswap.

It has a conditional bswap, if MO_BSWAP is set, but that's not true for the MO_UQ used 
here.  Therefore both the direct load in translator_ld and the cpu_ld*_code_mmu function 
call both produce host-endian values.

Therefore the tswap at the end correctly swaps host to target-endianness.


r~

Re: [PATCH v2 12/42] accel/tcg: Use cpu_ld*_code_mmu in translator.c
Posted by Pierrick Bouvier 1 week, 5 days ago
On 3/20/25 17:48, Richard Henderson wrote:
> On 3/18/25 17:23, Pierrick Bouvier wrote:
>>>    uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
>>>    {
>>> -    uint64_t raw, tgt;
>>> +    uint64_t val;
>>> -    if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
>>> -        tgt = tswap64(raw);
>>> -    } else {
>>> -        tgt = cpu_ldq_code(env, pc);
>>> -        raw = tswap64(tgt);
>>> -        record_save(db, pc, &raw, sizeof(raw));
>>> +    if (!translator_ld(env, db, &val, pc, sizeof(val))) {
>>> +        MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
>>> +        val = cpu_ldq_code_mmu(env, pc, oi, 0);
>>> +        record_save(db, pc, &val, sizeof(val));
>>>        }
>>> -    return tgt;
>>> +    return tswap64(val);
>>>    }
>>>    void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)
>>
>> If I understand correctly, cpu_ldq_code_mmu performs the tswap call we used to before.
> 
> Incorrect: cpu_ldq_code_mmu has no tswap.
> 
> It has a conditional bswap, if MO_BSWAP is set, but that's not true for the MO_UQ used
> here.  Therefore both the direct load in translator_ld and the cpu_ld*_code_mmu function
> call both produce host-endian values.
> 
> Therefore the tswap at the end correctly swaps host to target-endianness.
> 

Oh right, missed that.

> 
> r~