On 3/18/25 14:31, Richard Henderson wrote:
> Cache the mmu index in DisasContextBase.
> Perform the read on host endianness, which lets us
> share code with the translator_ld fast path.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/translator.h | 1 +
> accel/tcg/translator.c | 58 ++++++++++++++++++---------------------
> 2 files changed, 28 insertions(+), 31 deletions(-)
>
> diff --git a/include/exec/translator.h b/include/exec/translator.h
> index d70942a10f..205dd85bba 100644
> --- a/include/exec/translator.h
> +++ b/include/exec/translator.h
> @@ -73,6 +73,7 @@ struct DisasContextBase {
> int max_insns;
> bool plugin_enabled;
> bool fake_insn;
> + uint8_t code_mmuidx;
> struct TCGOp *insn_start;
> void *host_addr[2];
>
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 157be33bf6..6fd9237298 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -11,10 +11,10 @@
> #include "qemu/log.h"
> #include "qemu/error-report.h"
> #include "exec/exec-all.h"
> +#include "exec/cpu-ldst-common.h"
> +#include "exec/cpu-mmu-index.h"
> #include "exec/translator.h"
> -#include "exec/cpu_ldst.h"
> #include "exec/plugin-gen.h"
> -#include "exec/cpu_ldst.h"
> #include "exec/tswap.h"
> #include "tcg/tcg-op-common.h"
> #include "internal-target.h"
> @@ -142,6 +142,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
> db->host_addr[1] = NULL;
> db->record_start = 0;
> db->record_len = 0;
> + db->code_mmuidx = cpu_mmu_index(cpu, true);
>
> ops->init_disas_context(db, cpu);
> tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
> @@ -457,55 +458,50 @@ bool translator_st(const DisasContextBase *db, void *dest,
>
> uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, vaddr pc)
> {
> - uint8_t raw;
> + uint8_t val;
>
> - if (!translator_ld(env, db, &raw, pc, sizeof(raw))) {
> - raw = cpu_ldub_code(env, pc);
> - record_save(db, pc, &raw, sizeof(raw));
> + if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> + MemOpIdx oi = make_memop_idx(MO_UB, db->code_mmuidx);
> + val = cpu_ldb_code_mmu(env, pc, oi, 0);
> + record_save(db, pc, &val, sizeof(val));
> }
> - return raw;
> + return val;
> }
>
> uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, vaddr pc)
> {
> - uint16_t raw, tgt;
> + uint16_t val;
>
> - if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> - tgt = tswap16(raw);
> - } else {
> - tgt = cpu_lduw_code(env, pc);
> - raw = tswap16(tgt);
> - record_save(db, pc, &raw, sizeof(raw));
> + if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> + MemOpIdx oi = make_memop_idx(MO_UW, db->code_mmuidx);
> + val = cpu_ldw_code_mmu(env, pc, oi, 0);
> + record_save(db, pc, &val, sizeof(val));
> }
> - return tgt;
> + return tswap16(val);
> }
>
> uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, vaddr pc)
> {
> - uint32_t raw, tgt;
> + uint32_t val;
>
> - if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> - tgt = tswap32(raw);
> - } else {
> - tgt = cpu_ldl_code(env, pc);
> - raw = tswap32(tgt);
> - record_save(db, pc, &raw, sizeof(raw));
> + if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> + MemOpIdx oi = make_memop_idx(MO_UL, db->code_mmuidx);
> + val = cpu_ldl_code_mmu(env, pc, oi, 0);
> + record_save(db, pc, &val, sizeof(val));
> }
> - return tgt;
> + return tswap32(val);
> }
>
> uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
> {
> - uint64_t raw, tgt;
> + uint64_t val;
>
> - if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
> - tgt = tswap64(raw);
> - } else {
> - tgt = cpu_ldq_code(env, pc);
> - raw = tswap64(tgt);
> - record_save(db, pc, &raw, sizeof(raw));
> + if (!translator_ld(env, db, &val, pc, sizeof(val))) {
> + MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
> + val = cpu_ldq_code_mmu(env, pc, oi, 0);
> + record_save(db, pc, &val, sizeof(val));
> }
> - return tgt;
> + return tswap64(val);
> }
>
> void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)
If I understand correctly, cpu_ldq_code_mmu performs the tswap call we
used to before. However, we now call it again when returning the final
value, leading to tswap(tswap(val)).
Is that expected?